Coverage Report

Created: 2026-05-09 05:48

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/util/simd/vstring_function.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#ifdef __AVX2__
21
#include <immintrin.h>
22
#elif defined(__ARM_FEATURE_SVE)
23
#include <arm_sve.h>
24
#endif
25
#include <unistd.h>
26
27
#include <array>
28
#include <cstddef>
29
#include <cstdint>
30
31
#include "core/string_ref.h"
32
#include "util/simd/lower_upper_impl.h"
33
#include "util/sse_util.hpp"
34
35
namespace doris {
36
37
static constexpr std::array<uint8_t, 256> UTF8_BYTE_LENGTH = {
38
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
45
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
46
        3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6};
47
48
10.6M
inline uint8_t get_utf8_byte_length(uint8_t character) {
49
10.6M
    return UTF8_BYTE_LENGTH[character];
50
10.6M
}
51
52
// copy from https://github.com/lemire/fastvalidate-utf-8/blob/master/include/simdasciicheck.h
53
// The function returns true (1) if all chars passed in src are
54
// 7-bit values (0x00..0x7F). Otherwise, it returns false (0).
55
0
inline bool validate_ascii_fast(const char* src, size_t len) {
56
0
    size_t i = 0;
57
0
    __m128i has_error = _mm_setzero_si128();
58
0
    if (len >= 16) {
59
0
        for (; i <= len - 16; i += 16) {
60
0
            __m128i current_bytes = _mm_loadu_si128((const __m128i*)(src + i));
61
0
            has_error = _mm_or_si128(has_error, current_bytes);
62
0
        }
63
0
    }
64
0
    int error_mask = _mm_movemask_epi8(has_error);
65
0
66
0
    char tail_has_error = 0;
67
0
    for (; i < len; i++) {
68
0
        tail_has_error |= src[i];
69
0
    }
70
0
    error_mask |= (tail_has_error & 0x80);
71
0
72
0
    return !error_mask;
73
0
}
74
75
#ifdef __AVX2__
76
// The function returns true (1) if all chars passed in src are
77
// 7-bit values (0x00..0x7F). Otherwise, it returns false (0).
78
32.1k
inline bool validate_ascii_fast_avx(const char* src, size_t len) {
79
32.1k
    size_t i = 0;
80
32.1k
    __m256i has_error = _mm256_setzero_si256();
81
32.1k
    if (len >= 32) {
82
355k
        for (; i <= len - 32; i += 32) {
83
353k
            __m256i current_bytes = _mm256_loadu_si256((const __m256i*)(src + i));
84
353k
            has_error = _mm256_or_si256(has_error, current_bytes);
85
353k
        }
86
1.27k
    }
87
32.1k
    int error_mask = _mm256_movemask_epi8(has_error);
88
89
32.1k
    char tail_has_error = 0;
90
208k
    for (; i < len; i++) {
91
176k
        tail_has_error |= src[i];
92
176k
    }
93
32.1k
    error_mask |= (tail_has_error & 0x80);
94
95
32.1k
    return !error_mask;
96
32.1k
}
97
#elif defined(__ARM_FEATURE_SVE)
98
inline bool validate_ascii_fast_sve(const char* src, size_t len) {
99
    for (size_t i = 0; i < len; i += svcntb()) {
100
        svbool_t pg = svwhilelt_b8(i, len);
101
        svuint8_t v = svld1_u8(pg, reinterpret_cast<const uint8_t*>(src + i));
102
        // Check sign bit set => byte < 0 as int8 => non-ASCII
103
        svbool_t neg = svcmplt_n_s8(pg, svreinterpret_s8(v), 0);
104
        if (svptest_any(pg, neg)) {
105
            return false;
106
        }
107
    }
108
109
    return true;
110
}
111
#endif
112
113
namespace simd {
114
115
class VStringFunctions {
116
public:
117
#if defined(__SSE2__) || defined(__aarch64__)
118
    /// n equals to 16 chars length
119
    static constexpr auto REGISTER_SIZE = sizeof(__m128i);
120
#endif
121
122
    template <bool trim_single>
123
    static inline const unsigned char* rtrim(const unsigned char* begin, const unsigned char* end,
124
404
                                             const StringRef& remove_str) {
125
404
        if (remove_str.size == 0) {
126
3
            return end;
127
3
        }
128
401
        const auto* p = end;
129
130
401
        if constexpr (trim_single) {
131
299
            const auto ch = remove_str.data[0];
132
299
#if defined(__AVX2__)
133
299
            constexpr auto AVX2_BYTES = sizeof(__m256i);
134
299
            const auto size = end - begin;
135
299
            const auto* const avx2_begin = end - size / AVX2_BYTES * AVX2_BYTES;
136
299
            const auto spaces = _mm256_set1_epi8(ch);
137
300
            for (p = end - AVX2_BYTES; p >= avx2_begin; p -= AVX2_BYTES) {
138
5
                uint32_t masks = _mm256_movemask_epi8(
139
5
                        _mm256_cmpeq_epi8(_mm256_loadu_si256((__m256i*)p), spaces));
140
5
                if ((~masks)) {
141
4
                    break;
142
4
                }
143
5
            }
144
299
            p += AVX2_BYTES;
145
#elif defined(__ARM_FEATURE_SVE)
146
            const auto size = static_cast<size_t>(end - begin);
147
            const size_t vl = svcntb();
148
            if (size >= vl) {
149
                const auto* const sve_begin = end - (size / vl) * vl;
150
                svbool_t pg = svptrue_b8();
151
                for (p = end - vl; p >= sve_begin; p -= vl) {
152
                    svuint8_t v = svld1_u8(pg, p);
153
                    svbool_t neq = svcmpne_n_u8(pg, v, static_cast<uint8_t>(ch));
154
                    if (svptest_any(pg, neq)) {
155
                        break;
156
                    }
157
                }
158
                p += vl;
159
            }
160
#endif
161
610
            for (; (p - 1) >= begin && *(p - 1) == ch; p--) {
162
311
            }
163
299
            return p;
164
299
        }
165
166
0
        const auto remove_size = remove_str.size;
167
401
        const auto* const remove_data = remove_str.data;
168
450
        while (p - begin >= remove_size) {
169
141
            if (memcmp(p - remove_size, remove_data, remove_size) == 0) {
170
49
                p -= remove_str.size;
171
92
            } else {
172
92
                break;
173
92
            }
174
141
        }
175
401
        return p;
176
404
    }
_ZN5doris4simd16VStringFunctions5rtrimILb1EEEPKhS4_S4_RKNS_9StringRefE
Line
Count
Source
124
299
                                             const StringRef& remove_str) {
125
299
        if (remove_str.size == 0) {
126
0
            return end;
127
0
        }
128
299
        const auto* p = end;
129
130
299
        if constexpr (trim_single) {
131
299
            const auto ch = remove_str.data[0];
132
299
#if defined(__AVX2__)
133
299
            constexpr auto AVX2_BYTES = sizeof(__m256i);
134
299
            const auto size = end - begin;
135
299
            const auto* const avx2_begin = end - size / AVX2_BYTES * AVX2_BYTES;
136
299
            const auto spaces = _mm256_set1_epi8(ch);
137
300
            for (p = end - AVX2_BYTES; p >= avx2_begin; p -= AVX2_BYTES) {
138
5
                uint32_t masks = _mm256_movemask_epi8(
139
5
                        _mm256_cmpeq_epi8(_mm256_loadu_si256((__m256i*)p), spaces));
140
5
                if ((~masks)) {
141
4
                    break;
142
4
                }
143
5
            }
144
299
            p += AVX2_BYTES;
145
#elif defined(__ARM_FEATURE_SVE)
146
            const auto size = static_cast<size_t>(end - begin);
147
            const size_t vl = svcntb();
148
            if (size >= vl) {
149
                const auto* const sve_begin = end - (size / vl) * vl;
150
                svbool_t pg = svptrue_b8();
151
                for (p = end - vl; p >= sve_begin; p -= vl) {
152
                    svuint8_t v = svld1_u8(pg, p);
153
                    svbool_t neq = svcmpne_n_u8(pg, v, static_cast<uint8_t>(ch));
154
                    if (svptest_any(pg, neq)) {
155
                        break;
156
                    }
157
                }
158
                p += vl;
159
            }
160
#endif
161
610
            for (; (p - 1) >= begin && *(p - 1) == ch; p--) {
162
311
            }
163
299
            return p;
164
299
        }
165
166
0
        const auto remove_size = remove_str.size;
167
299
        const auto* const remove_data = remove_str.data;
168
299
        while (p - begin >= remove_size) {
169
0
            if (memcmp(p - remove_size, remove_data, remove_size) == 0) {
170
0
                p -= remove_str.size;
171
0
            } else {
172
0
                break;
173
0
            }
174
0
        }
175
299
        return p;
176
299
    }
_ZN5doris4simd16VStringFunctions5rtrimILb0EEEPKhS4_S4_RKNS_9StringRefE
Line
Count
Source
124
105
                                             const StringRef& remove_str) {
125
105
        if (remove_str.size == 0) {
126
3
            return end;
127
3
        }
128
102
        const auto* p = end;
129
130
        if constexpr (trim_single) {
131
            const auto ch = remove_str.data[0];
132
#if defined(__AVX2__)
133
            constexpr auto AVX2_BYTES = sizeof(__m256i);
134
            const auto size = end - begin;
135
            const auto* const avx2_begin = end - size / AVX2_BYTES * AVX2_BYTES;
136
            const auto spaces = _mm256_set1_epi8(ch);
137
            for (p = end - AVX2_BYTES; p >= avx2_begin; p -= AVX2_BYTES) {
138
                uint32_t masks = _mm256_movemask_epi8(
139
                        _mm256_cmpeq_epi8(_mm256_loadu_si256((__m256i*)p), spaces));
140
                if ((~masks)) {
141
                    break;
142
                }
143
            }
144
            p += AVX2_BYTES;
145
#elif defined(__ARM_FEATURE_SVE)
146
            const auto size = static_cast<size_t>(end - begin);
147
            const size_t vl = svcntb();
148
            if (size >= vl) {
149
                const auto* const sve_begin = end - (size / vl) * vl;
150
                svbool_t pg = svptrue_b8();
151
                for (p = end - vl; p >= sve_begin; p -= vl) {
152
                    svuint8_t v = svld1_u8(pg, p);
153
                    svbool_t neq = svcmpne_n_u8(pg, v, static_cast<uint8_t>(ch));
154
                    if (svptest_any(pg, neq)) {
155
                        break;
156
                    }
157
                }
158
                p += vl;
159
            }
160
#endif
161
            for (; (p - 1) >= begin && *(p - 1) == ch; p--) {
162
            }
163
            return p;
164
        }
165
166
102
        const auto remove_size = remove_str.size;
167
102
        const auto* const remove_data = remove_str.data;
168
151
        while (p - begin >= remove_size) {
169
141
            if (memcmp(p - remove_size, remove_data, remove_size) == 0) {
170
49
                p -= remove_str.size;
171
92
            } else {
172
92
                break;
173
92
            }
174
141
        }
175
102
        return p;
176
105
    }
177
178
    template <bool trim_single>
179
    static inline const unsigned char* ltrim(const unsigned char* begin, const unsigned char* end,
180
364
                                             const StringRef& remove_str) {
181
364
        if (remove_str.size == 0) {
182
2
            return begin;
183
2
        }
184
362
        const auto* p = begin;
185
186
362
        if constexpr (trim_single) {
187
243
            const auto ch = remove_str.data[0];
188
243
#if defined(__AVX2__)
189
243
            constexpr auto AVX2_BYTES = sizeof(__m256i);
190
243
            const auto size = end - begin;
191
243
            const auto* const avx2_end = begin + size / AVX2_BYTES * AVX2_BYTES;
192
243
            const auto spaces = _mm256_set1_epi8(ch);
193
243
            for (; p < avx2_end; p += AVX2_BYTES) {
194
6
                uint32_t masks = _mm256_movemask_epi8(
195
6
                        _mm256_cmpeq_epi8(_mm256_loadu_si256((__m256i*)p), spaces));
196
6
                if ((~masks)) {
197
6
                    break;
198
6
                }
199
6
            }
200
#elif defined(__ARM_FEATURE_SVE)
201
            const auto size = static_cast<size_t>(end - begin);
202
            const size_t vl = svcntb();
203
            if (size >= vl) {
204
                const auto* const sve_end = begin + (size / vl) * vl;
205
                svbool_t pg = svptrue_b8();
206
                for (; p < sve_end; p += vl) {
207
                    svuint8_t v = svld1_u8(pg, p);
208
                    svbool_t eq = svcmpne_n_u8(pg, v, static_cast<uint8_t>(ch));
209
                    if (svptest_any(pg, eq)) {
210
                        break;
211
                    }
212
                }
213
            }
214
#endif
215
565
            for (; p < end && *p == ch; ++p) {
216
322
            }
217
243
            return p;
218
243
        }
219
220
0
        const auto remove_size = remove_str.size;
221
362
        const auto* const remove_data = remove_str.data;
222
484
        while (end - p >= remove_size) {
223
236
            if (memcmp(p, remove_data, remove_size) == 0) {
224
122
                p += remove_str.size;
225
122
            } else {
226
114
                break;
227
114
            }
228
236
        }
229
362
        return p;
230
364
    }
_ZN5doris4simd16VStringFunctions5ltrimILb1EEEPKhS4_S4_RKNS_9StringRefE
Line
Count
Source
180
243
                                             const StringRef& remove_str) {
181
243
        if (remove_str.size == 0) {
182
0
            return begin;
183
0
        }
184
243
        const auto* p = begin;
185
186
243
        if constexpr (trim_single) {
187
243
            const auto ch = remove_str.data[0];
188
243
#if defined(__AVX2__)
189
243
            constexpr auto AVX2_BYTES = sizeof(__m256i);
190
243
            const auto size = end - begin;
191
243
            const auto* const avx2_end = begin + size / AVX2_BYTES * AVX2_BYTES;
192
243
            const auto spaces = _mm256_set1_epi8(ch);
193
243
            for (; p < avx2_end; p += AVX2_BYTES) {
194
6
                uint32_t masks = _mm256_movemask_epi8(
195
6
                        _mm256_cmpeq_epi8(_mm256_loadu_si256((__m256i*)p), spaces));
196
6
                if ((~masks)) {
197
6
                    break;
198
6
                }
199
6
            }
200
#elif defined(__ARM_FEATURE_SVE)
201
            const auto size = static_cast<size_t>(end - begin);
202
            const size_t vl = svcntb();
203
            if (size >= vl) {
204
                const auto* const sve_end = begin + (size / vl) * vl;
205
                svbool_t pg = svptrue_b8();
206
                for (; p < sve_end; p += vl) {
207
                    svuint8_t v = svld1_u8(pg, p);
208
                    svbool_t eq = svcmpne_n_u8(pg, v, static_cast<uint8_t>(ch));
209
                    if (svptest_any(pg, eq)) {
210
                        break;
211
                    }
212
                }
213
            }
214
#endif
215
565
            for (; p < end && *p == ch; ++p) {
216
322
            }
217
243
            return p;
218
243
        }
219
220
0
        const auto remove_size = remove_str.size;
221
243
        const auto* const remove_data = remove_str.data;
222
243
        while (end - p >= remove_size) {
223
0
            if (memcmp(p, remove_data, remove_size) == 0) {
224
0
                p += remove_str.size;
225
0
            } else {
226
0
                break;
227
0
            }
228
0
        }
229
243
        return p;
230
243
    }
_ZN5doris4simd16VStringFunctions5ltrimILb0EEEPKhS4_S4_RKNS_9StringRefE
Line
Count
Source
180
121
                                             const StringRef& remove_str) {
181
121
        if (remove_str.size == 0) {
182
2
            return begin;
183
2
        }
184
119
        const auto* p = begin;
185
186
        if constexpr (trim_single) {
187
            const auto ch = remove_str.data[0];
188
#if defined(__AVX2__)
189
            constexpr auto AVX2_BYTES = sizeof(__m256i);
190
            const auto size = end - begin;
191
            const auto* const avx2_end = begin + size / AVX2_BYTES * AVX2_BYTES;
192
            const auto spaces = _mm256_set1_epi8(ch);
193
            for (; p < avx2_end; p += AVX2_BYTES) {
194
                uint32_t masks = _mm256_movemask_epi8(
195
                        _mm256_cmpeq_epi8(_mm256_loadu_si256((__m256i*)p), spaces));
196
                if ((~masks)) {
197
                    break;
198
                }
199
            }
200
#elif defined(__ARM_FEATURE_SVE)
201
            const auto size = static_cast<size_t>(end - begin);
202
            const size_t vl = svcntb();
203
            if (size >= vl) {
204
                const auto* const sve_end = begin + (size / vl) * vl;
205
                svbool_t pg = svptrue_b8();
206
                for (; p < sve_end; p += vl) {
207
                    svuint8_t v = svld1_u8(pg, p);
208
                    svbool_t eq = svcmpne_n_u8(pg, v, static_cast<uint8_t>(ch));
209
                    if (svptest_any(pg, eq)) {
210
                        break;
211
                    }
212
                }
213
            }
214
#endif
215
            for (; p < end && *p == ch; ++p) {
216
            }
217
            return p;
218
        }
219
220
119
        const auto remove_size = remove_str.size;
221
119
        const auto* const remove_data = remove_str.data;
222
241
        while (end - p >= remove_size) {
223
236
            if (memcmp(p, remove_data, remove_size) == 0) {
224
122
                p += remove_str.size;
225
122
            } else {
226
114
                break;
227
114
            }
228
236
        }
229
119
        return p;
230
121
    }
231
232
    // Iterate a UTF-8 string without exceeding a given length n.
233
    // The function returns two values:
234
    // the first represents the byte length traversed, and the second represents the char length traversed.
235
    static inline std::pair<size_t, size_t> iterate_utf8_with_limit_length(const char* begin,
236
                                                                           const char* end,
237
905
                                                                           size_t n) {
238
905
        const char* p = begin;
239
905
        int char_size = 0;
240
241
905
        size_t i = 0;
242
1.95k
        for (; i < n && p < end; ++i, p += char_size) {
243
1.05k
            char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
244
1.05k
        }
245
246
905
        return {p - begin, i};
247
905
    }
248
249
    // Gcc will do auto simd in this function
250
    // if input empty, return true
251
32.1k
    static bool is_ascii(const StringRef& str) {
252
32.1k
#ifdef __AVX2__
253
32.1k
        return validate_ascii_fast_avx(str.data, str.size);
254
#elif defined(__ARM_FEATURE_SVE)
255
        return validate_ascii_fast_sve(str.data, str.size);
256
#endif
257
0
        return validate_ascii_fast(str.data, str.size);
258
32.1k
    }
259
260
152
    static void reverse(const StringRef& str, std::string* dst) {
261
152
        if (is_ascii(str)) {
262
113
            int64_t begin = 0;
263
113
            int64_t end = str.size;
264
113
            int64_t result_end = dst->size() - 1;
265
266
            // auto SIMD here
267
113
            auto* __restrict l = dst->data();
268
113
            auto* __restrict r = str.data;
269
935
            for (; begin < end; ++begin, --result_end) {
270
822
                l[result_end] = r[begin];
271
822
            }
272
113
        } else {
273
39
            char* dst_data = dst->data();
274
330
            for (size_t i = 0, char_size = 0; i < str.size; i += char_size) {
275
291
                char_size = UTF8_BYTE_LENGTH[(unsigned char)(str.data)[i]];
276
                // there exists occasion where the last character is an illegal UTF-8 one which returns
277
                // a char_size larger than the actual space, which would cause offset execeeding the buffer range
278
                // for example, consider str.size=4, i = 3, then the last char returns char_size 2, then
279
                // the str.data + offset would exceed the buffer range
280
291
                size_t offset = i + char_size;
281
291
                if (offset > str.size) {
282
2
                    offset = str.size;
283
2
                }
284
291
                std::copy(str.data + i, str.data + offset, dst_data + str.size - offset);
285
291
            }
286
39
        }
287
152
    }
288
289
1.19k
    static void hex_encode(const unsigned char* src_str, size_t length, char* dst_str) {
290
1.19k
        static constexpr auto hex_table = "0123456789ABCDEF";
291
1.19k
        auto src_str_end = src_str + length;
292
293
1.19k
#if defined(__SSE2__) || defined(__aarch64__)
294
1.19k
        constexpr auto step = sizeof(uint64_t);
295
1.19k
        if (src_str + step < src_str_end) {
296
965
            const auto hex_map = _mm_loadu_si128(reinterpret_cast<const __m128i*>(hex_table));
297
965
            const auto mask_map = _mm_set1_epi8(0x0F);
298
299
1.01k
            do {
300
1.01k
                auto data = _mm_loadu_si64(src_str);
301
1.01k
                auto hex_loc =
302
1.01k
                        _mm_and_si128(_mm_unpacklo_epi8(_mm_srli_epi64(data, 4), data), mask_map);
303
1.01k
                _mm_storeu_si128(reinterpret_cast<__m128i*>(dst_str),
304
1.01k
                                 _mm_shuffle_epi8(hex_map, hex_loc));
305
306
1.01k
                src_str += step;
307
1.01k
                dst_str += step * 2;
308
1.01k
            } while (src_str + step < src_str_end);
309
965
        }
310
1.19k
#endif
311
1.19k
        char res[2];
312
        // hex(str) str length is n, result must be 2 * n length
313
9.06k
        for (; src_str < src_str_end; src_str += 1, dst_str += 2) {
314
            // low 4 bits
315
7.86k
            *(res + 1) = hex_table[src_str[0] & 0x0F];
316
            // high 4 bits
317
7.86k
            *res = hex_table[(src_str[0] >> 4)];
318
7.86k
            std::copy(res, res + 2, dst_str);
319
7.86k
        }
320
1.19k
    }
321
322
411
    static void to_lower(const uint8_t* src, int64_t len, uint8_t* dst) {
323
411
        if (len <= 0) {
324
8
            return;
325
8
        }
326
403
        LowerUpperImpl<'A', 'Z'> lowerUpper;
327
403
        lowerUpper.transfer(src, src + len, dst);
328
403
    }
329
330
93
    static void to_upper(const uint8_t* src, int64_t len, uint8_t* dst) {
331
93
        if (len <= 0) {
332
5
            return;
333
5
        }
334
88
        LowerUpperImpl<'a', 'z'> lowerUpper;
335
88
        lowerUpper.transfer(src, src + len, dst);
336
88
    }
337
338
201
    static inline size_t get_char_len(const char* src, size_t len, std::vector<size_t>& str_index) {
339
201
        size_t char_len = 0;
340
1.34k
        for (size_t i = 0, char_size = 0; i < len; i += char_size) {
341
1.14k
            char_size = UTF8_BYTE_LENGTH[(unsigned char)src[i]];
342
1.14k
            str_index.push_back(i);
343
1.14k
            ++char_len;
344
1.14k
        }
345
201
        return char_len;
346
201
    }
347
348
    // utf8-encoding:
349
    // - 1-byte: 0xxx_xxxx;
350
    // - 2-byte: 110x_xxxx 10xx_xxxx;
351
    // - 3-byte: 1110_xxxx 10xx_xxxx 10xx_xxxx;
352
    // - 4-byte: 1111_0xxx 10xx_xxxx 10xx_xxxx 10xx_xxxx.
353
    // Counting utf8 chars in a byte string is equivalent to counting first byte of utf chars, that
354
    // is to say, counting bytes which do not match 10xx_xxxx pattern.
355
    // All 0xxx_xxxx, 110x_xxxx, 1110_xxxx and 1111_0xxx are greater than 1011_1111 when use int8_t arithmetic,
356
    // so just count bytes greater than 1011_1111 in a byte string as the result of utf8_length.
357
    // get_char_len is used to return the UTF-8 length of a string.
358
    // The return value will never exceed len.
359
    template <typename T>
360
16.9k
    static inline T get_char_len(const char* src, T len) {
361
16.9k
        T char_len = 0;
362
16.9k
        const char* p = src;
363
16.9k
        const char* end = p + len;
364
16.9k
#if defined(__SSE2__) || defined(__aarch64__)
365
16.9k
        constexpr auto bytes_sse2 = sizeof(__m128i);
366
16.9k
        const auto src_end_sse2 = p + (len & ~(bytes_sse2 - 1));
367
        // threshold = 1011_1111
368
16.9k
        const auto threshold = _mm_set1_epi8(0xBF);
369
78.5k
        for (; p < src_end_sse2; p += bytes_sse2) {
370
61.5k
            char_len += __builtin_popcount(_mm_movemask_epi8(_mm_cmpgt_epi8(
371
61.5k
                    _mm_loadu_si128(reinterpret_cast<const __m128i*>(p)), threshold)));
372
61.5k
        }
373
16.9k
#endif
374
        // process remaining bytes the number of which not exceed bytes_sse2 at the
375
        // tail of string, one by one.
376
130k
        for (; p < end; ++p) {
377
113k
            char_len += static_cast<int8_t>(*p) > static_cast<int8_t>(0xBF);
378
113k
        }
379
16.9k
        return char_len;
380
16.9k
    }
_ZN5doris4simd16VStringFunctions12get_char_lenIiEET_PKcS3_
Line
Count
Source
360
15.5k
    static inline T get_char_len(const char* src, T len) {
361
15.5k
        T char_len = 0;
362
15.5k
        const char* p = src;
363
15.5k
        const char* end = p + len;
364
15.5k
#if defined(__SSE2__) || defined(__aarch64__)
365
15.5k
        constexpr auto bytes_sse2 = sizeof(__m128i);
366
15.5k
        const auto src_end_sse2 = p + (len & ~(bytes_sse2 - 1));
367
        // threshold = 1011_1111
368
15.5k
        const auto threshold = _mm_set1_epi8(0xBF);
369
77.0k
        for (; p < src_end_sse2; p += bytes_sse2) {
370
61.5k
            char_len += __builtin_popcount(_mm_movemask_epi8(_mm_cmpgt_epi8(
371
61.5k
                    _mm_loadu_si128(reinterpret_cast<const __m128i*>(p)), threshold)));
372
61.5k
        }
373
15.5k
#endif
374
        // process remaining bytes the number of which not exceed bytes_sse2 at the
375
        // tail of string, one by one.
376
121k
        for (; p < end; ++p) {
377
105k
            char_len += static_cast<int8_t>(*p) > static_cast<int8_t>(0xBF);
378
105k
        }
379
15.5k
        return char_len;
380
15.5k
    }
_ZN5doris4simd16VStringFunctions12get_char_lenImEET_PKcS3_
Line
Count
Source
360
1.48k
    static inline T get_char_len(const char* src, T len) {
361
1.48k
        T char_len = 0;
362
1.48k
        const char* p = src;
363
1.48k
        const char* end = p + len;
364
1.48k
#if defined(__SSE2__) || defined(__aarch64__)
365
1.48k
        constexpr auto bytes_sse2 = sizeof(__m128i);
366
1.48k
        const auto src_end_sse2 = p + (len & ~(bytes_sse2 - 1));
367
        // threshold = 1011_1111
368
1.48k
        const auto threshold = _mm_set1_epi8(0xBF);
369
1.53k
        for (; p < src_end_sse2; p += bytes_sse2) {
370
45
            char_len += __builtin_popcount(_mm_movemask_epi8(_mm_cmpgt_epi8(
371
45
                    _mm_loadu_si128(reinterpret_cast<const __m128i*>(p)), threshold)));
372
45
        }
373
1.48k
#endif
374
        // process remaining bytes the number of which not exceed bytes_sse2 at the
375
        // tail of string, one by one.
376
9.55k
        for (; p < end; ++p) {
377
8.06k
            char_len += static_cast<int8_t>(*p) > static_cast<int8_t>(0xBF);
378
8.06k
        }
379
1.48k
        return char_len;
380
1.48k
    }
381
};
382
} // namespace simd
383
} // namespace doris