Coverage Report

Created: 2026-05-08 23:56

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/util/simd/vstring_function.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#ifdef __AVX2__
21
#include <immintrin.h>
22
#elif defined(__ARM_FEATURE_SVE)
23
#include <arm_sve.h>
24
#endif
25
#include <unistd.h>
26
27
#include <array>
28
#include <cstddef>
29
#include <cstdint>
30
31
#include "core/string_ref.h"
32
#include "util/simd/lower_upper_impl.h"
33
#include "util/sse_util.hpp"
34
35
namespace doris {
36
37
static constexpr std::array<uint8_t, 256> UTF8_BYTE_LENGTH = {
38
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
45
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
46
        3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6};
47
48
576k
inline uint8_t get_utf8_byte_length(uint8_t character) {
49
576k
    return UTF8_BYTE_LENGTH[character];
50
576k
}
51
52
// copy from https://github.com/lemire/fastvalidate-utf-8/blob/master/include/simdasciicheck.h
53
// The function returns true (1) if all chars passed in src are
54
// 7-bit values (0x00..0x7F). Otherwise, it returns false (0).
55
0
inline bool validate_ascii_fast(const char* src, size_t len) {
56
0
    size_t i = 0;
57
0
    __m128i has_error = _mm_setzero_si128();
58
0
    if (len >= 16) {
59
0
        for (; i <= len - 16; i += 16) {
60
0
            __m128i current_bytes = _mm_loadu_si128((const __m128i*)(src + i));
61
0
            has_error = _mm_or_si128(has_error, current_bytes);
62
0
        }
63
0
    }
64
0
    int error_mask = _mm_movemask_epi8(has_error);
65
0
66
0
    char tail_has_error = 0;
67
0
    for (; i < len; i++) {
68
0
        tail_has_error |= src[i];
69
0
    }
70
0
    error_mask |= (tail_has_error & 0x80);
71
0
72
0
    return !error_mask;
73
0
}
74
75
#ifdef __AVX2__
76
// The function returns true (1) if all chars passed in src are
77
// 7-bit values (0x00..0x7F). Otherwise, it returns false (0).
78
2.93k
inline bool validate_ascii_fast_avx(const char* src, size_t len) {
79
2.93k
    size_t i = 0;
80
2.93k
    __m256i has_error = _mm256_setzero_si256();
81
2.93k
    if (len >= 32) {
82
31.4k
        for (; i <= len - 32; i += 32) {
83
31.4k
            __m256i current_bytes = _mm256_loadu_si256((const __m256i*)(src + i));
84
31.4k
            has_error = _mm256_or_si256(has_error, current_bytes);
85
31.4k
        }
86
26
    }
87
2.93k
    int error_mask = _mm256_movemask_epi8(has_error);
88
89
2.93k
    char tail_has_error = 0;
90
25.9k
    for (; i < len; i++) {
91
22.9k
        tail_has_error |= src[i];
92
22.9k
    }
93
2.93k
    error_mask |= (tail_has_error & 0x80);
94
95
2.93k
    return !error_mask;
96
2.93k
}
97
#elif defined(__ARM_FEATURE_SVE)
98
inline bool validate_ascii_fast_sve(const char* src, size_t len) {
99
    for (size_t i = 0; i < len; i += svcntb()) {
100
        svbool_t pg = svwhilelt_b8(i, len);
101
        svuint8_t v = svld1_u8(pg, reinterpret_cast<const uint8_t*>(src + i));
102
        // Check sign bit set => byte < 0 as int8 => non-ASCII
103
        svbool_t neg = svcmplt_n_s8(pg, svreinterpret_s8(v), 0);
104
        if (svptest_any(pg, neg)) {
105
            return false;
106
        }
107
    }
108
109
    return true;
110
}
111
#endif
112
113
namespace simd {
114
115
class VStringFunctions {
116
public:
117
#if defined(__SSE2__) || defined(__aarch64__)
118
    /// n equals to 16 chars length
119
    static constexpr auto REGISTER_SIZE = sizeof(__m128i);
120
#endif
121
122
    template <bool trim_single>
123
    static inline const unsigned char* rtrim(const unsigned char* begin, const unsigned char* end,
124
84
                                             const StringRef& remove_str) {
125
84
        if (remove_str.size == 0) {
126
0
            return end;
127
0
        }
128
84
        const auto* p = end;
129
130
84
        if constexpr (trim_single) {
131
82
            const auto ch = remove_str.data[0];
132
82
#if defined(__AVX2__)
133
82
            constexpr auto AVX2_BYTES = sizeof(__m256i);
134
82
            const auto size = end - begin;
135
82
            const auto* const avx2_begin = end - size / AVX2_BYTES * AVX2_BYTES;
136
82
            const auto spaces = _mm256_set1_epi8(ch);
137
83
            for (p = end - AVX2_BYTES; p >= avx2_begin; p -= AVX2_BYTES) {
138
5
                uint32_t masks = _mm256_movemask_epi8(
139
5
                        _mm256_cmpeq_epi8(_mm256_loadu_si256((__m256i*)p), spaces));
140
5
                if ((~masks)) {
141
4
                    break;
142
4
                }
143
5
            }
144
82
            p += AVX2_BYTES;
145
#elif defined(__ARM_FEATURE_SVE)
146
            const auto size = static_cast<size_t>(end - begin);
147
            const size_t vl = svcntb();
148
            if (size >= vl) {
149
                const auto* const sve_begin = end - (size / vl) * vl;
150
                svbool_t pg = svptrue_b8();
151
                for (p = end - vl; p >= sve_begin; p -= vl) {
152
                    svuint8_t v = svld1_u8(pg, p);
153
                    svbool_t neq = svcmpne_n_u8(pg, v, static_cast<uint8_t>(ch));
154
                    if (svptest_any(pg, neq)) {
155
                        break;
156
                    }
157
                }
158
                p += vl;
159
            }
160
#endif
161
291
            for (; (p - 1) >= begin && *(p - 1) == ch; p--) {
162
209
            }
163
82
            return p;
164
82
        }
165
166
0
        const auto remove_size = remove_str.size;
167
84
        const auto* const remove_data = remove_str.data;
168
87
        while (p - begin >= remove_size) {
169
5
            if (memcmp(p - remove_size, remove_data, remove_size) == 0) {
170
3
                p -= remove_str.size;
171
3
            } else {
172
2
                break;
173
2
            }
174
5
        }
175
84
        return p;
176
84
    }
_ZN5doris4simd16VStringFunctions5rtrimILb0EEEPKhS4_S4_RKNS_9StringRefE
Line
Count
Source
124
2
                                             const StringRef& remove_str) {
125
2
        if (remove_str.size == 0) {
126
0
            return end;
127
0
        }
128
2
        const auto* p = end;
129
130
        if constexpr (trim_single) {
131
            const auto ch = remove_str.data[0];
132
#if defined(__AVX2__)
133
            constexpr auto AVX2_BYTES = sizeof(__m256i);
134
            const auto size = end - begin;
135
            const auto* const avx2_begin = end - size / AVX2_BYTES * AVX2_BYTES;
136
            const auto spaces = _mm256_set1_epi8(ch);
137
            for (p = end - AVX2_BYTES; p >= avx2_begin; p -= AVX2_BYTES) {
138
                uint32_t masks = _mm256_movemask_epi8(
139
                        _mm256_cmpeq_epi8(_mm256_loadu_si256((__m256i*)p), spaces));
140
                if ((~masks)) {
141
                    break;
142
                }
143
            }
144
            p += AVX2_BYTES;
145
#elif defined(__ARM_FEATURE_SVE)
146
            const auto size = static_cast<size_t>(end - begin);
147
            const size_t vl = svcntb();
148
            if (size >= vl) {
149
                const auto* const sve_begin = end - (size / vl) * vl;
150
                svbool_t pg = svptrue_b8();
151
                for (p = end - vl; p >= sve_begin; p -= vl) {
152
                    svuint8_t v = svld1_u8(pg, p);
153
                    svbool_t neq = svcmpne_n_u8(pg, v, static_cast<uint8_t>(ch));
154
                    if (svptest_any(pg, neq)) {
155
                        break;
156
                    }
157
                }
158
                p += vl;
159
            }
160
#endif
161
            for (; (p - 1) >= begin && *(p - 1) == ch; p--) {
162
            }
163
            return p;
164
        }
165
166
2
        const auto remove_size = remove_str.size;
167
2
        const auto* const remove_data = remove_str.data;
168
5
        while (p - begin >= remove_size) {
169
5
            if (memcmp(p - remove_size, remove_data, remove_size) == 0) {
170
3
                p -= remove_str.size;
171
3
            } else {
172
2
                break;
173
2
            }
174
5
        }
175
2
        return p;
176
2
    }
_ZN5doris4simd16VStringFunctions5rtrimILb1EEEPKhS4_S4_RKNS_9StringRefE
Line
Count
Source
124
82
                                             const StringRef& remove_str) {
125
82
        if (remove_str.size == 0) {
126
0
            return end;
127
0
        }
128
82
        const auto* p = end;
129
130
82
        if constexpr (trim_single) {
131
82
            const auto ch = remove_str.data[0];
132
82
#if defined(__AVX2__)
133
82
            constexpr auto AVX2_BYTES = sizeof(__m256i);
134
82
            const auto size = end - begin;
135
82
            const auto* const avx2_begin = end - size / AVX2_BYTES * AVX2_BYTES;
136
82
            const auto spaces = _mm256_set1_epi8(ch);
137
83
            for (p = end - AVX2_BYTES; p >= avx2_begin; p -= AVX2_BYTES) {
138
5
                uint32_t masks = _mm256_movemask_epi8(
139
5
                        _mm256_cmpeq_epi8(_mm256_loadu_si256((__m256i*)p), spaces));
140
5
                if ((~masks)) {
141
4
                    break;
142
4
                }
143
5
            }
144
82
            p += AVX2_BYTES;
145
#elif defined(__ARM_FEATURE_SVE)
146
            const auto size = static_cast<size_t>(end - begin);
147
            const size_t vl = svcntb();
148
            if (size >= vl) {
149
                const auto* const sve_begin = end - (size / vl) * vl;
150
                svbool_t pg = svptrue_b8();
151
                for (p = end - vl; p >= sve_begin; p -= vl) {
152
                    svuint8_t v = svld1_u8(pg, p);
153
                    svbool_t neq = svcmpne_n_u8(pg, v, static_cast<uint8_t>(ch));
154
                    if (svptest_any(pg, neq)) {
155
                        break;
156
                    }
157
                }
158
                p += vl;
159
            }
160
#endif
161
291
            for (; (p - 1) >= begin && *(p - 1) == ch; p--) {
162
209
            }
163
82
            return p;
164
82
        }
165
166
0
        const auto remove_size = remove_str.size;
167
82
        const auto* const remove_data = remove_str.data;
168
82
        while (p - begin >= remove_size) {
169
0
            if (memcmp(p - remove_size, remove_data, remove_size) == 0) {
170
0
                p -= remove_str.size;
171
0
            } else {
172
0
                break;
173
0
            }
174
0
        }
175
82
        return p;
176
82
    }
177
178
    template <bool trim_single>
179
    static inline const unsigned char* ltrim(const unsigned char* begin, const unsigned char* end,
180
84
                                             const StringRef& remove_str) {
181
84
        if (remove_str.size == 0) {
182
0
            return begin;
183
0
        }
184
84
        const auto* p = begin;
185
186
84
        if constexpr (trim_single) {
187
82
            const auto ch = remove_str.data[0];
188
82
#if defined(__AVX2__)
189
82
            constexpr auto AVX2_BYTES = sizeof(__m256i);
190
82
            const auto size = end - begin;
191
82
            const auto* const avx2_end = begin + size / AVX2_BYTES * AVX2_BYTES;
192
82
            const auto spaces = _mm256_set1_epi8(ch);
193
82
            for (; p < avx2_end; p += AVX2_BYTES) {
194
6
                uint32_t masks = _mm256_movemask_epi8(
195
6
                        _mm256_cmpeq_epi8(_mm256_loadu_si256((__m256i*)p), spaces));
196
6
                if ((~masks)) {
197
6
                    break;
198
6
                }
199
6
            }
200
#elif defined(__ARM_FEATURE_SVE)
201
            const auto size = static_cast<size_t>(end - begin);
202
            const size_t vl = svcntb();
203
            if (size >= vl) {
204
                const auto* const sve_end = begin + (size / vl) * vl;
205
                svbool_t pg = svptrue_b8();
206
                for (; p < sve_end; p += vl) {
207
                    svuint8_t v = svld1_u8(pg, p);
208
                    svbool_t eq = svcmpne_n_u8(pg, v, static_cast<uint8_t>(ch));
209
                    if (svptest_any(pg, eq)) {
210
                        break;
211
                    }
212
                }
213
            }
214
#endif
215
311
            for (; p < end && *p == ch; ++p) {
216
229
            }
217
82
            return p;
218
82
        }
219
220
0
        const auto remove_size = remove_str.size;
221
84
        const auto* const remove_data = remove_str.data;
222
87
        while (end - p >= remove_size) {
223
5
            if (memcmp(p, remove_data, remove_size) == 0) {
224
3
                p += remove_str.size;
225
3
            } else {
226
2
                break;
227
2
            }
228
5
        }
229
84
        return p;
230
84
    }
_ZN5doris4simd16VStringFunctions5ltrimILb0EEEPKhS4_S4_RKNS_9StringRefE
Line
Count
Source
180
2
                                             const StringRef& remove_str) {
181
2
        if (remove_str.size == 0) {
182
0
            return begin;
183
0
        }
184
2
        const auto* p = begin;
185
186
        if constexpr (trim_single) {
187
            const auto ch = remove_str.data[0];
188
#if defined(__AVX2__)
189
            constexpr auto AVX2_BYTES = sizeof(__m256i);
190
            const auto size = end - begin;
191
            const auto* const avx2_end = begin + size / AVX2_BYTES * AVX2_BYTES;
192
            const auto spaces = _mm256_set1_epi8(ch);
193
            for (; p < avx2_end; p += AVX2_BYTES) {
194
                uint32_t masks = _mm256_movemask_epi8(
195
                        _mm256_cmpeq_epi8(_mm256_loadu_si256((__m256i*)p), spaces));
196
                if ((~masks)) {
197
                    break;
198
                }
199
            }
200
#elif defined(__ARM_FEATURE_SVE)
201
            const auto size = static_cast<size_t>(end - begin);
202
            const size_t vl = svcntb();
203
            if (size >= vl) {
204
                const auto* const sve_end = begin + (size / vl) * vl;
205
                svbool_t pg = svptrue_b8();
206
                for (; p < sve_end; p += vl) {
207
                    svuint8_t v = svld1_u8(pg, p);
208
                    svbool_t eq = svcmpne_n_u8(pg, v, static_cast<uint8_t>(ch));
209
                    if (svptest_any(pg, eq)) {
210
                        break;
211
                    }
212
                }
213
            }
214
#endif
215
            for (; p < end && *p == ch; ++p) {
216
            }
217
            return p;
218
        }
219
220
2
        const auto remove_size = remove_str.size;
221
2
        const auto* const remove_data = remove_str.data;
222
5
        while (end - p >= remove_size) {
223
5
            if (memcmp(p, remove_data, remove_size) == 0) {
224
3
                p += remove_str.size;
225
3
            } else {
226
2
                break;
227
2
            }
228
5
        }
229
2
        return p;
230
2
    }
_ZN5doris4simd16VStringFunctions5ltrimILb1EEEPKhS4_S4_RKNS_9StringRefE
Line
Count
Source
180
82
                                             const StringRef& remove_str) {
181
82
        if (remove_str.size == 0) {
182
0
            return begin;
183
0
        }
184
82
        const auto* p = begin;
185
186
82
        if constexpr (trim_single) {
187
82
            const auto ch = remove_str.data[0];
188
82
#if defined(__AVX2__)
189
82
            constexpr auto AVX2_BYTES = sizeof(__m256i);
190
82
            const auto size = end - begin;
191
82
            const auto* const avx2_end = begin + size / AVX2_BYTES * AVX2_BYTES;
192
82
            const auto spaces = _mm256_set1_epi8(ch);
193
82
            for (; p < avx2_end; p += AVX2_BYTES) {
194
6
                uint32_t masks = _mm256_movemask_epi8(
195
6
                        _mm256_cmpeq_epi8(_mm256_loadu_si256((__m256i*)p), spaces));
196
6
                if ((~masks)) {
197
6
                    break;
198
6
                }
199
6
            }
200
#elif defined(__ARM_FEATURE_SVE)
201
            const auto size = static_cast<size_t>(end - begin);
202
            const size_t vl = svcntb();
203
            if (size >= vl) {
204
                const auto* const sve_end = begin + (size / vl) * vl;
205
                svbool_t pg = svptrue_b8();
206
                for (; p < sve_end; p += vl) {
207
                    svuint8_t v = svld1_u8(pg, p);
208
                    svbool_t eq = svcmpne_n_u8(pg, v, static_cast<uint8_t>(ch));
209
                    if (svptest_any(pg, eq)) {
210
                        break;
211
                    }
212
                }
213
            }
214
#endif
215
311
            for (; p < end && *p == ch; ++p) {
216
229
            }
217
82
            return p;
218
82
        }
219
220
0
        const auto remove_size = remove_str.size;
221
82
        const auto* const remove_data = remove_str.data;
222
82
        while (end - p >= remove_size) {
223
0
            if (memcmp(p, remove_data, remove_size) == 0) {
224
0
                p += remove_str.size;
225
0
            } else {
226
0
                break;
227
0
            }
228
0
        }
229
82
        return p;
230
82
    }
231
232
    // Iterate a UTF-8 string without exceeding a given length n.
233
    // The function returns two values:
234
    // the first represents the byte length traversed, and the second represents the char length traversed.
235
    static inline std::pair<size_t, size_t> iterate_utf8_with_limit_length(const char* begin,
236
                                                                           const char* end,
237
426
                                                                           size_t n) {
238
426
        const char* p = begin;
239
426
        int char_size = 0;
240
241
426
        size_t i = 0;
242
693
        for (; i < n && p < end; ++i, p += char_size) {
243
267
            char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
244
267
        }
245
246
426
        return {p - begin, i};
247
426
    }
248
249
    // Gcc will do auto simd in this function
250
    // if input empty, return true
251
2.93k
    static bool is_ascii(const StringRef& str) {
252
2.93k
#ifdef __AVX2__
253
2.93k
        return validate_ascii_fast_avx(str.data, str.size);
254
#elif defined(__ARM_FEATURE_SVE)
255
        return validate_ascii_fast_sve(str.data, str.size);
256
#endif
257
0
        return validate_ascii_fast(str.data, str.size);
258
2.93k
    }
259
260
114
    static void reverse(const StringRef& str, std::string* dst) {
261
114
        if (is_ascii(str)) {
262
79
            int64_t begin = 0;
263
79
            int64_t end = str.size;
264
79
            int64_t result_end = dst->size() - 1;
265
266
            // auto SIMD here
267
79
            auto* __restrict l = dst->data();
268
79
            auto* __restrict r = str.data;
269
781
            for (; begin < end; ++begin, --result_end) {
270
702
                l[result_end] = r[begin];
271
702
            }
272
79
        } else {
273
35
            char* dst_data = dst->data();
274
298
            for (size_t i = 0, char_size = 0; i < str.size; i += char_size) {
275
263
                char_size = UTF8_BYTE_LENGTH[(unsigned char)(str.data)[i]];
276
                // there exists occasion where the last character is an illegal UTF-8 one which returns
277
                // a char_size larger than the actual space, which would cause offset execeeding the buffer range
278
                // for example, consider str.size=4, i = 3, then the last char returns char_size 2, then
279
                // the str.data + offset would exceed the buffer range
280
263
                size_t offset = i + char_size;
281
263
                if (offset > str.size) {
282
1
                    offset = str.size;
283
1
                }
284
263
                std::copy(str.data + i, str.data + offset, dst_data + str.size - offset);
285
263
            }
286
35
        }
287
114
    }
288
289
37
    static void hex_encode(const unsigned char* src_str, size_t length, char* dst_str) {
290
37
        static constexpr auto hex_table = "0123456789ABCDEF";
291
37
        auto src_str_end = src_str + length;
292
293
37
#if defined(__SSE2__) || defined(__aarch64__)
294
37
        constexpr auto step = sizeof(uint64_t);
295
37
        if (src_str + step < src_str_end) {
296
21
            const auto hex_map = _mm_loadu_si128(reinterpret_cast<const __m128i*>(hex_table));
297
21
            const auto mask_map = _mm_set1_epi8(0x0F);
298
299
28
            do {
300
28
                auto data = _mm_loadu_si64(src_str);
301
28
                auto hex_loc =
302
28
                        _mm_and_si128(_mm_unpacklo_epi8(_mm_srli_epi64(data, 4), data), mask_map);
303
28
                _mm_storeu_si128(reinterpret_cast<__m128i*>(dst_str),
304
28
                                 _mm_shuffle_epi8(hex_map, hex_loc));
305
306
28
                src_str += step;
307
28
                dst_str += step * 2;
308
28
            } while (src_str + step < src_str_end);
309
21
        }
310
37
#endif
311
37
        char res[2];
312
        // hex(str) str length is n, result must be 2 * n length
313
212
        for (; src_str < src_str_end; src_str += 1, dst_str += 2) {
314
            // low 4 bits
315
175
            *(res + 1) = hex_table[src_str[0] & 0x0F];
316
            // high 4 bits
317
175
            *res = hex_table[(src_str[0] >> 4)];
318
175
            std::copy(res, res + 2, dst_str);
319
175
        }
320
37
    }
321
322
24
    static void to_lower(const uint8_t* src, int64_t len, uint8_t* dst) {
323
24
        if (len <= 0) {
324
3
            return;
325
3
        }
326
21
        LowerUpperImpl<'A', 'Z'> lowerUpper;
327
21
        lowerUpper.transfer(src, src + len, dst);
328
21
    }
329
330
26
    static void to_upper(const uint8_t* src, int64_t len, uint8_t* dst) {
331
26
        if (len <= 0) {
332
3
            return;
333
3
        }
334
23
        LowerUpperImpl<'a', 'z'> lowerUpper;
335
23
        lowerUpper.transfer(src, src + len, dst);
336
23
    }
337
338
185
    static inline size_t get_char_len(const char* src, size_t len, std::vector<size_t>& str_index) {
339
185
        size_t char_len = 0;
340
1.19k
        for (size_t i = 0, char_size = 0; i < len; i += char_size) {
341
1.01k
            char_size = UTF8_BYTE_LENGTH[(unsigned char)src[i]];
342
1.01k
            str_index.push_back(i);
343
1.01k
            ++char_len;
344
1.01k
        }
345
185
        return char_len;
346
185
    }
347
348
    // utf8-encoding:
349
    // - 1-byte: 0xxx_xxxx;
350
    // - 2-byte: 110x_xxxx 10xx_xxxx;
351
    // - 3-byte: 1110_xxxx 10xx_xxxx 10xx_xxxx;
352
    // - 4-byte: 1111_0xxx 10xx_xxxx 10xx_xxxx 10xx_xxxx.
353
    // Counting utf8 chars in a byte string is equivalent to counting first byte of utf chars, that
354
    // is to say, counting bytes which do not match 10xx_xxxx pattern.
355
    // All 0xxx_xxxx, 110x_xxxx, 1110_xxxx and 1111_0xxx are greater than 1011_1111 when use int8_t arithmetic,
356
    // so just count bytes greater than 1011_1111 in a byte string as the result of utf8_length.
357
    // get_char_len is used to return the UTF-8 length of a string.
358
    // The return value will never exceed len.
359
    template <typename T>
360
1.13k
    static inline T get_char_len(const char* src, T len) {
361
1.13k
        T char_len = 0;
362
1.13k
        const char* p = src;
363
1.13k
        const char* end = p + len;
364
1.13k
#if defined(__SSE2__) || defined(__aarch64__)
365
1.13k
        constexpr auto bytes_sse2 = sizeof(__m128i);
366
1.13k
        const auto src_end_sse2 = p + (len & ~(bytes_sse2 - 1));
367
        // threshold = 1011_1111
368
1.13k
        const auto threshold = _mm_set1_epi8(0xBF);
369
1.17k
        for (; p < src_end_sse2; p += bytes_sse2) {
370
39
            char_len += __builtin_popcount(_mm_movemask_epi8(_mm_cmpgt_epi8(
371
39
                    _mm_loadu_si128(reinterpret_cast<const __m128i*>(p)), threshold)));
372
39
        }
373
1.13k
#endif
374
        // process remaining bytes the number of which not exceed bytes_sse2 at the
375
        // tail of string, one by one.
376
9.11k
        for (; p < end; ++p) {
377
7.98k
            char_len += static_cast<int8_t>(*p) > static_cast<int8_t>(0xBF);
378
7.98k
        }
379
1.13k
        return char_len;
380
1.13k
    }
_ZN5doris4simd16VStringFunctions12get_char_lenIiEET_PKcS3_
Line
Count
Source
360
560
    static inline T get_char_len(const char* src, T len) {
361
560
        T char_len = 0;
362
560
        const char* p = src;
363
560
        const char* end = p + len;
364
560
#if defined(__SSE2__) || defined(__aarch64__)
365
560
        constexpr auto bytes_sse2 = sizeof(__m128i);
366
560
        const auto src_end_sse2 = p + (len & ~(bytes_sse2 - 1));
367
        // threshold = 1011_1111
368
560
        const auto threshold = _mm_set1_epi8(0xBF);
369
599
        for (; p < src_end_sse2; p += bytes_sse2) {
370
39
            char_len += __builtin_popcount(_mm_movemask_epi8(_mm_cmpgt_epi8(
371
39
                    _mm_loadu_si128(reinterpret_cast<const __m128i*>(p)), threshold)));
372
39
        }
373
560
#endif
374
        // process remaining bytes the number of which not exceed bytes_sse2 at the
375
        // tail of string, one by one.
376
4.46k
        for (; p < end; ++p) {
377
3.90k
            char_len += static_cast<int8_t>(*p) > static_cast<int8_t>(0xBF);
378
3.90k
        }
379
560
        return char_len;
380
560
    }
_ZN5doris4simd16VStringFunctions12get_char_lenImEET_PKcS3_
Line
Count
Source
360
573
    static inline T get_char_len(const char* src, T len) {
361
573
        T char_len = 0;
362
573
        const char* p = src;
363
573
        const char* end = p + len;
364
573
#if defined(__SSE2__) || defined(__aarch64__)
365
573
        constexpr auto bytes_sse2 = sizeof(__m128i);
366
573
        const auto src_end_sse2 = p + (len & ~(bytes_sse2 - 1));
367
        // threshold = 1011_1111
368
573
        const auto threshold = _mm_set1_epi8(0xBF);
369
573
        for (; p < src_end_sse2; p += bytes_sse2) {
370
0
            char_len += __builtin_popcount(_mm_movemask_epi8(_mm_cmpgt_epi8(
371
0
                    _mm_loadu_si128(reinterpret_cast<const __m128i*>(p)), threshold)));
372
0
        }
373
573
#endif
374
        // process remaining bytes the number of which not exceed bytes_sse2 at the
375
        // tail of string, one by one.
376
4.65k
        for (; p < end; ++p) {
377
4.07k
            char_len += static_cast<int8_t>(*p) > static_cast<int8_t>(0xBF);
378
4.07k
        }
379
573
        return char_len;
380
573
    }
381
};
382
} // namespace simd
383
} // namespace doris