Coverage Report

Created: 2026-03-12 14:02

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/util/simd/vstring_function.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#ifdef __AVX2__
21
#include <immintrin.h>
22
#elif defined(__ARM_FEATURE_SVE)
23
#include <arm_sve.h>
24
#endif
25
#include <unistd.h>
26
27
#include <array>
28
#include <cstddef>
29
#include <cstdint>
30
31
#include "core/string_ref.h"
32
#include "util/simd/lower_upper_impl.h"
33
#include "util/sse_util.hpp"
34
35
namespace doris {
36
37
static constexpr std::array<uint8_t, 256> UTF8_BYTE_LENGTH = {
38
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
45
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
46
        3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6};
47
48
1.37M
inline uint8_t get_utf8_byte_length(uint8_t character) {
49
1.37M
    return UTF8_BYTE_LENGTH[character];
50
1.37M
}
51
52
// copy from https://github.com/lemire/fastvalidate-utf-8/blob/master/include/simdasciicheck.h
53
// The function returns true (1) if all chars passed in src are
54
// 7-bit values (0x00..0x7F). Otherwise, it returns false (0).
55
0
inline bool validate_ascii_fast(const char* src, size_t len) {
56
0
    size_t i = 0;
57
0
    __m128i has_error = _mm_setzero_si128();
58
0
    if (len >= 16) {
59
0
        for (; i <= len - 16; i += 16) {
60
0
            __m128i current_bytes = _mm_loadu_si128((const __m128i*)(src + i));
61
0
            has_error = _mm_or_si128(has_error, current_bytes);
62
0
        }
63
0
    }
64
0
    int error_mask = _mm_movemask_epi8(has_error);
65
0
66
0
    char tail_has_error = 0;
67
0
    for (; i < len; i++) {
68
0
        tail_has_error |= src[i];
69
0
    }
70
0
    error_mask |= (tail_has_error & 0x80);
71
0
72
0
    return !error_mask;
73
0
}
74
75
#ifdef __AVX2__
76
#include <x86intrin.h>
77
// The function returns true (1) if all chars passed in src are
78
// 7-bit values (0x00..0x7F). Otherwise, it returns false (0).
79
9.70k
inline bool validate_ascii_fast_avx(const char* src, size_t len) {
80
9.70k
    size_t i = 0;
81
9.70k
    __m256i has_error = _mm256_setzero_si256();
82
9.70k
    if (len >= 32) {
83
128k
        for (; i <= len - 32; i += 32) {
84
126k
            __m256i current_bytes = _mm256_loadu_si256((const __m256i*)(src + i));
85
126k
            has_error = _mm256_or_si256(has_error, current_bytes);
86
126k
        }
87
1.38k
    }
88
9.70k
    int error_mask = _mm256_movemask_epi8(has_error);
89
90
9.70k
    char tail_has_error = 0;
91
89.7k
    for (; i < len; i++) {
92
80.0k
        tail_has_error |= src[i];
93
80.0k
    }
94
9.70k
    error_mask |= (tail_has_error & 0x80);
95
96
9.70k
    return !error_mask;
97
9.70k
}
98
#elif defined(__ARM_FEATURE_SVE)
99
inline bool validate_ascii_fast_sve(const char* src, size_t len) {
100
    for (size_t i = 0; i < len; i += svcntb()) {
101
        svbool_t pg = svwhilelt_b8(i, len);
102
        svuint8_t v = svld1_u8(pg, reinterpret_cast<const uint8_t*>(src + i));
103
        // Check sign bit set => byte < 0 as int8 => non-ASCII
104
        svbool_t neg = svcmplt_n_s8(pg, svreinterpret_s8(v), 0);
105
        if (svptest_any(pg, neg)) {
106
            return false;
107
        }
108
    }
109
110
    return true;
111
}
112
#endif
113
114
namespace simd {
115
116
class VStringFunctions {
117
public:
118
#if defined(__SSE2__) || defined(__aarch64__)
119
    /// n equals to 16 chars length
120
    static constexpr auto REGISTER_SIZE = sizeof(__m128i);
121
#endif
122
123
    template <bool trim_single>
124
    static inline const unsigned char* rtrim(const unsigned char* begin, const unsigned char* end,
125
84
                                             const StringRef& remove_str) {
126
84
        if (remove_str.size == 0) {
127
0
            return end;
128
0
        }
129
84
        const auto* p = end;
130
131
84
        if constexpr (trim_single) {
132
82
            const auto ch = remove_str.data[0];
133
82
#if defined(__AVX2__)
134
82
            constexpr auto AVX2_BYTES = sizeof(__m256i);
135
82
            const auto size = end - begin;
136
82
            const auto* const avx2_begin = end - size / AVX2_BYTES * AVX2_BYTES;
137
82
            const auto spaces = _mm256_set1_epi8(ch);
138
83
            for (p = end - AVX2_BYTES; p >= avx2_begin; p -= AVX2_BYTES) {
139
5
                uint32_t masks = _mm256_movemask_epi8(
140
5
                        _mm256_cmpeq_epi8(_mm256_loadu_si256((__m256i*)p), spaces));
141
5
                if ((~masks)) {
142
4
                    break;
143
4
                }
144
5
            }
145
82
            p += AVX2_BYTES;
146
#elif defined(__ARM_FEATURE_SVE)
147
            const auto size = static_cast<size_t>(end - begin);
148
            const size_t vl = svcntb();
149
            if (size >= vl) {
150
                const auto* const sve_begin = end - (size / vl) * vl;
151
                svbool_t pg = svptrue_b8();
152
                for (p = end - vl; p >= sve_begin; p -= vl) {
153
                    svuint8_t v = svld1_u8(pg, p);
154
                    svbool_t neq = svcmpne_n_u8(pg, v, static_cast<uint8_t>(ch));
155
                    if (svptest_any(pg, neq)) {
156
                        break;
157
                    }
158
                }
159
                p += vl;
160
            }
161
#endif
162
291
            for (; (p - 1) >= begin && *(p - 1) == ch; p--) {
163
209
            }
164
82
            return p;
165
82
        }
166
167
0
        const auto remove_size = remove_str.size;
168
84
        const auto* const remove_data = remove_str.data;
169
87
        while (p - begin >= remove_size) {
170
5
            if (memcmp(p - remove_size, remove_data, remove_size) == 0) {
171
3
                p -= remove_str.size;
172
3
            } else {
173
2
                break;
174
2
            }
175
5
        }
176
84
        return p;
177
84
    }
_ZN5doris4simd16VStringFunctions5rtrimILb1EEEPKhS4_S4_RKNS_9StringRefE
Line
Count
Source
125
82
                                             const StringRef& remove_str) {
126
82
        if (remove_str.size == 0) {
127
0
            return end;
128
0
        }
129
82
        const auto* p = end;
130
131
82
        if constexpr (trim_single) {
132
82
            const auto ch = remove_str.data[0];
133
82
#if defined(__AVX2__)
134
82
            constexpr auto AVX2_BYTES = sizeof(__m256i);
135
82
            const auto size = end - begin;
136
82
            const auto* const avx2_begin = end - size / AVX2_BYTES * AVX2_BYTES;
137
82
            const auto spaces = _mm256_set1_epi8(ch);
138
83
            for (p = end - AVX2_BYTES; p >= avx2_begin; p -= AVX2_BYTES) {
139
5
                uint32_t masks = _mm256_movemask_epi8(
140
5
                        _mm256_cmpeq_epi8(_mm256_loadu_si256((__m256i*)p), spaces));
141
5
                if ((~masks)) {
142
4
                    break;
143
4
                }
144
5
            }
145
82
            p += AVX2_BYTES;
146
#elif defined(__ARM_FEATURE_SVE)
147
            const auto size = static_cast<size_t>(end - begin);
148
            const size_t vl = svcntb();
149
            if (size >= vl) {
150
                const auto* const sve_begin = end - (size / vl) * vl;
151
                svbool_t pg = svptrue_b8();
152
                for (p = end - vl; p >= sve_begin; p -= vl) {
153
                    svuint8_t v = svld1_u8(pg, p);
154
                    svbool_t neq = svcmpne_n_u8(pg, v, static_cast<uint8_t>(ch));
155
                    if (svptest_any(pg, neq)) {
156
                        break;
157
                    }
158
                }
159
                p += vl;
160
            }
161
#endif
162
291
            for (; (p - 1) >= begin && *(p - 1) == ch; p--) {
163
209
            }
164
82
            return p;
165
82
        }
166
167
0
        const auto remove_size = remove_str.size;
168
82
        const auto* const remove_data = remove_str.data;
169
82
        while (p - begin >= remove_size) {
170
0
            if (memcmp(p - remove_size, remove_data, remove_size) == 0) {
171
0
                p -= remove_str.size;
172
0
            } else {
173
0
                break;
174
0
            }
175
0
        }
176
82
        return p;
177
82
    }
_ZN5doris4simd16VStringFunctions5rtrimILb0EEEPKhS4_S4_RKNS_9StringRefE
Line
Count
Source
125
2
                                             const StringRef& remove_str) {
126
2
        if (remove_str.size == 0) {
127
0
            return end;
128
0
        }
129
2
        const auto* p = end;
130
131
        if constexpr (trim_single) {
132
            const auto ch = remove_str.data[0];
133
#if defined(__AVX2__)
134
            constexpr auto AVX2_BYTES = sizeof(__m256i);
135
            const auto size = end - begin;
136
            const auto* const avx2_begin = end - size / AVX2_BYTES * AVX2_BYTES;
137
            const auto spaces = _mm256_set1_epi8(ch);
138
            for (p = end - AVX2_BYTES; p >= avx2_begin; p -= AVX2_BYTES) {
139
                uint32_t masks = _mm256_movemask_epi8(
140
                        _mm256_cmpeq_epi8(_mm256_loadu_si256((__m256i*)p), spaces));
141
                if ((~masks)) {
142
                    break;
143
                }
144
            }
145
            p += AVX2_BYTES;
146
#elif defined(__ARM_FEATURE_SVE)
147
            const auto size = static_cast<size_t>(end - begin);
148
            const size_t vl = svcntb();
149
            if (size >= vl) {
150
                const auto* const sve_begin = end - (size / vl) * vl;
151
                svbool_t pg = svptrue_b8();
152
                for (p = end - vl; p >= sve_begin; p -= vl) {
153
                    svuint8_t v = svld1_u8(pg, p);
154
                    svbool_t neq = svcmpne_n_u8(pg, v, static_cast<uint8_t>(ch));
155
                    if (svptest_any(pg, neq)) {
156
                        break;
157
                    }
158
                }
159
                p += vl;
160
            }
161
#endif
162
            for (; (p - 1) >= begin && *(p - 1) == ch; p--) {
163
            }
164
            return p;
165
        }
166
167
2
        const auto remove_size = remove_str.size;
168
2
        const auto* const remove_data = remove_str.data;
169
5
        while (p - begin >= remove_size) {
170
5
            if (memcmp(p - remove_size, remove_data, remove_size) == 0) {
171
3
                p -= remove_str.size;
172
3
            } else {
173
2
                break;
174
2
            }
175
5
        }
176
2
        return p;
177
2
    }
178
179
    template <bool trim_single>
180
    static inline const unsigned char* ltrim(const unsigned char* begin, const unsigned char* end,
181
104
                                             const StringRef& remove_str) {
182
104
        if (remove_str.size == 0) {
183
0
            return begin;
184
0
        }
185
104
        const auto* p = begin;
186
187
104
        if constexpr (trim_single) {
188
102
            const auto ch = remove_str.data[0];
189
102
#if defined(__AVX2__)
190
102
            constexpr auto AVX2_BYTES = sizeof(__m256i);
191
102
            const auto size = end - begin;
192
102
            const auto* const avx2_end = begin + size / AVX2_BYTES * AVX2_BYTES;
193
102
            const auto spaces = _mm256_set1_epi8(ch);
194
102
            for (; p < avx2_end; p += AVX2_BYTES) {
195
6
                uint32_t masks = _mm256_movemask_epi8(
196
6
                        _mm256_cmpeq_epi8(_mm256_loadu_si256((__m256i*)p), spaces));
197
6
                if ((~masks)) {
198
6
                    break;
199
6
                }
200
6
            }
201
#elif defined(__ARM_FEATURE_SVE)
202
            const auto size = static_cast<size_t>(end - begin);
203
            const size_t vl = svcntb();
204
            if (size >= vl) {
205
                const auto* const sve_end = begin + (size / vl) * vl;
206
                svbool_t pg = svptrue_b8();
207
                for (; p < sve_end; p += vl) {
208
                    svuint8_t v = svld1_u8(pg, p);
209
                    svbool_t eq = svcmpne_n_u8(pg, v, static_cast<uint8_t>(ch));
210
                    if (svptest_any(pg, eq)) {
211
                        break;
212
                    }
213
                }
214
            }
215
#endif
216
371
            for (; p < end && *p == ch; ++p) {
217
269
            }
218
102
            return p;
219
102
        }
220
221
0
        const auto remove_size = remove_str.size;
222
104
        const auto* const remove_data = remove_str.data;
223
107
        while (end - p >= remove_size) {
224
5
            if (memcmp(p, remove_data, remove_size) == 0) {
225
3
                p += remove_str.size;
226
3
            } else {
227
2
                break;
228
2
            }
229
5
        }
230
104
        return p;
231
104
    }
_ZN5doris4simd16VStringFunctions5ltrimILb1EEEPKhS4_S4_RKNS_9StringRefE
Line
Count
Source
181
102
                                             const StringRef& remove_str) {
182
102
        if (remove_str.size == 0) {
183
0
            return begin;
184
0
        }
185
102
        const auto* p = begin;
186
187
102
        if constexpr (trim_single) {
188
102
            const auto ch = remove_str.data[0];
189
102
#if defined(__AVX2__)
190
102
            constexpr auto AVX2_BYTES = sizeof(__m256i);
191
102
            const auto size = end - begin;
192
102
            const auto* const avx2_end = begin + size / AVX2_BYTES * AVX2_BYTES;
193
102
            const auto spaces = _mm256_set1_epi8(ch);
194
102
            for (; p < avx2_end; p += AVX2_BYTES) {
195
6
                uint32_t masks = _mm256_movemask_epi8(
196
6
                        _mm256_cmpeq_epi8(_mm256_loadu_si256((__m256i*)p), spaces));
197
6
                if ((~masks)) {
198
6
                    break;
199
6
                }
200
6
            }
201
#elif defined(__ARM_FEATURE_SVE)
202
            const auto size = static_cast<size_t>(end - begin);
203
            const size_t vl = svcntb();
204
            if (size >= vl) {
205
                const auto* const sve_end = begin + (size / vl) * vl;
206
                svbool_t pg = svptrue_b8();
207
                for (; p < sve_end; p += vl) {
208
                    svuint8_t v = svld1_u8(pg, p);
209
                    svbool_t eq = svcmpne_n_u8(pg, v, static_cast<uint8_t>(ch));
210
                    if (svptest_any(pg, eq)) {
211
                        break;
212
                    }
213
                }
214
            }
215
#endif
216
371
            for (; p < end && *p == ch; ++p) {
217
269
            }
218
102
            return p;
219
102
        }
220
221
0
        const auto remove_size = remove_str.size;
222
102
        const auto* const remove_data = remove_str.data;
223
102
        while (end - p >= remove_size) {
224
0
            if (memcmp(p, remove_data, remove_size) == 0) {
225
0
                p += remove_str.size;
226
0
            } else {
227
0
                break;
228
0
            }
229
0
        }
230
102
        return p;
231
102
    }
_ZN5doris4simd16VStringFunctions5ltrimILb0EEEPKhS4_S4_RKNS_9StringRefE
Line
Count
Source
181
2
                                             const StringRef& remove_str) {
182
2
        if (remove_str.size == 0) {
183
0
            return begin;
184
0
        }
185
2
        const auto* p = begin;
186
187
        if constexpr (trim_single) {
188
            const auto ch = remove_str.data[0];
189
#if defined(__AVX2__)
190
            constexpr auto AVX2_BYTES = sizeof(__m256i);
191
            const auto size = end - begin;
192
            const auto* const avx2_end = begin + size / AVX2_BYTES * AVX2_BYTES;
193
            const auto spaces = _mm256_set1_epi8(ch);
194
            for (; p < avx2_end; p += AVX2_BYTES) {
195
                uint32_t masks = _mm256_movemask_epi8(
196
                        _mm256_cmpeq_epi8(_mm256_loadu_si256((__m256i*)p), spaces));
197
                if ((~masks)) {
198
                    break;
199
                }
200
            }
201
#elif defined(__ARM_FEATURE_SVE)
202
            const auto size = static_cast<size_t>(end - begin);
203
            const size_t vl = svcntb();
204
            if (size >= vl) {
205
                const auto* const sve_end = begin + (size / vl) * vl;
206
                svbool_t pg = svptrue_b8();
207
                for (; p < sve_end; p += vl) {
208
                    svuint8_t v = svld1_u8(pg, p);
209
                    svbool_t eq = svcmpne_n_u8(pg, v, static_cast<uint8_t>(ch));
210
                    if (svptest_any(pg, eq)) {
211
                        break;
212
                    }
213
                }
214
            }
215
#endif
216
            for (; p < end && *p == ch; ++p) {
217
            }
218
            return p;
219
        }
220
221
2
        const auto remove_size = remove_str.size;
222
2
        const auto* const remove_data = remove_str.data;
223
5
        while (end - p >= remove_size) {
224
5
            if (memcmp(p, remove_data, remove_size) == 0) {
225
3
                p += remove_str.size;
226
3
            } else {
227
2
                break;
228
2
            }
229
5
        }
230
2
        return p;
231
2
    }
232
233
    // Iterate a UTF-8 string without exceeding a given length n.
234
    // The function returns two values:
235
    // the first represents the byte length traversed, and the second represents the char length traversed.
236
    static inline std::pair<size_t, size_t> iterate_utf8_with_limit_length(const char* begin,
237
                                                                           const char* end,
238
932
                                                                           size_t n) {
239
932
        const char* p = begin;
240
932
        int char_size = 0;
241
242
932
        size_t i = 0;
243
1.67k
        for (; i < n && p < end; ++i, p += char_size) {
244
745
            char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
245
745
        }
246
247
932
        return {p - begin, i};
248
932
    }
249
250
    // Gcc will do auto simd in this function
251
    // if input empty, return true
252
9.70k
    static bool is_ascii(const StringRef& str) {
253
9.70k
#ifdef __AVX2__
254
9.70k
        return validate_ascii_fast_avx(str.data, str.size);
255
#elif defined(__ARM_FEATURE_SVE)
256
        return validate_ascii_fast_sve(str.data, str.size);
257
#endif
258
0
        return validate_ascii_fast(str.data, str.size);
259
9.70k
    }
260
261
136
    static void reverse(const StringRef& str, std::string* dst) {
262
136
        if (is_ascii(str)) {
263
101
            int64_t begin = 0;
264
101
            int64_t end = str.size;
265
101
            int64_t result_end = dst->size() - 1;
266
267
            // auto SIMD here
268
101
            auto* __restrict l = dst->data();
269
101
            auto* __restrict r = str.data;
270
887
            for (; begin < end; ++begin, --result_end) {
271
786
                l[result_end] = r[begin];
272
786
            }
273
101
        } else {
274
35
            char* dst_data = dst->data();
275
298
            for (size_t i = 0, char_size = 0; i < str.size; i += char_size) {
276
263
                char_size = UTF8_BYTE_LENGTH[(unsigned char)(str.data)[i]];
277
                // there exists occasion where the last character is an illegal UTF-8 one which returns
278
                // a char_size larger than the actual space, which would cause offset execeeding the buffer range
279
                // for example, consider str.size=4, i = 3, then the last char returns char_size 2, then
280
                // the str.data + offset would exceed the buffer range
281
263
                size_t offset = i + char_size;
282
263
                if (offset > str.size) {
283
1
                    offset = str.size;
284
1
                }
285
263
                std::copy(str.data + i, str.data + offset, dst_data + str.size - offset);
286
263
            }
287
35
        }
288
136
    }
289
290
181
    static void hex_encode(const unsigned char* src_str, size_t length, char* dst_str) {
291
181
        static constexpr auto hex_table = "0123456789ABCDEF";
292
181
        auto src_str_end = src_str + length;
293
294
181
#if defined(__SSE2__) || defined(__aarch64__)
295
181
        constexpr auto step = sizeof(uint64_t);
296
181
        if (src_str + step < src_str_end) {
297
69
            const auto hex_map = _mm_loadu_si128(reinterpret_cast<const __m128i*>(hex_table));
298
69
            const auto mask_map = _mm_set1_epi8(0x0F);
299
300
116
            do {
301
116
                auto data = _mm_loadu_si64(src_str);
302
116
                auto hex_loc =
303
116
                        _mm_and_si128(_mm_unpacklo_epi8(_mm_srli_epi64(data, 4), data), mask_map);
304
116
                _mm_storeu_si128(reinterpret_cast<__m128i*>(dst_str),
305
116
                                 _mm_shuffle_epi8(hex_map, hex_loc));
306
307
116
                src_str += step;
308
116
                dst_str += step * 2;
309
116
            } while (src_str + step < src_str_end);
310
69
        }
311
181
#endif
312
181
        char res[2];
313
        // hex(str) str length is n, result must be 2 * n length
314
716
        for (; src_str < src_str_end; src_str += 1, dst_str += 2) {
315
            // low 4 bits
316
535
            *(res + 1) = hex_table[src_str[0] & 0x0F];
317
            // high 4 bits
318
535
            *res = hex_table[(src_str[0] >> 4)];
319
535
            std::copy(res, res + 2, dst_str);
320
535
        }
321
181
    }
322
323
24
    static void to_lower(const uint8_t* src, int64_t len, uint8_t* dst) {
324
24
        if (len <= 0) {
325
3
            return;
326
3
        }
327
21
        LowerUpperImpl<'A', 'Z'> lowerUpper;
328
21
        lowerUpper.transfer(src, src + len, dst);
329
21
    }
330
331
26
    static void to_upper(const uint8_t* src, int64_t len, uint8_t* dst) {
332
26
        if (len <= 0) {
333
3
            return;
334
3
        }
335
23
        LowerUpperImpl<'a', 'z'> lowerUpper;
336
23
        lowerUpper.transfer(src, src + len, dst);
337
23
    }
338
339
893
    static inline size_t get_char_len(const char* src, size_t len, std::vector<size_t>& str_index) {
340
893
        size_t char_len = 0;
341
4.80k
        for (size_t i = 0, char_size = 0; i < len; i += char_size) {
342
3.91k
            char_size = UTF8_BYTE_LENGTH[(unsigned char)src[i]];
343
3.91k
            str_index.push_back(i);
344
3.91k
            ++char_len;
345
3.91k
        }
346
893
        return char_len;
347
893
    }
348
349
    // utf8-encoding:
350
    // - 1-byte: 0xxx_xxxx;
351
    // - 2-byte: 110x_xxxx 10xx_xxxx;
352
    // - 3-byte: 1110_xxxx 10xx_xxxx 10xx_xxxx;
353
    // - 4-byte: 1111_0xxx 10xx_xxxx 10xx_xxxx 10xx_xxxx.
354
    // Counting utf8 chars in a byte string is equivalent to counting first byte of utf chars, that
355
    // is to say, counting bytes which do not match 10xx_xxxx pattern.
356
    // All 0xxx_xxxx, 110x_xxxx, 1110_xxxx and 1111_0xxx are greater than 1011_1111 when use int8_t arithmetic,
357
    // so just count bytes greater than 1011_1111 in a byte string as the result of utf8_length.
358
    // get_char_len is used to return the UTF-8 length of a string.
359
    // The return value will never exceed len.
360
    template <typename T>
361
8.68k
    static inline T get_char_len(const char* src, T len) {
362
8.68k
        T char_len = 0;
363
8.68k
        const char* p = src;
364
8.68k
        const char* end = p + len;
365
8.68k
#if defined(__SSE2__) || defined(__aarch64__)
366
8.68k
        constexpr auto bytes_sse2 = sizeof(__m128i);
367
8.68k
        const auto src_end_sse2 = p + (len & ~(bytes_sse2 - 1));
368
        // threshold = 1011_1111
369
8.68k
        const auto threshold = _mm_set1_epi8(0xBF);
370
55.8k
        for (; p < src_end_sse2; p += bytes_sse2) {
371
47.1k
            char_len += __builtin_popcount(_mm_movemask_epi8(_mm_cmpgt_epi8(
372
47.1k
                    _mm_loadu_si128(reinterpret_cast<const __m128i*>(p)), threshold)));
373
47.1k
        }
374
8.68k
#endif
375
        // process remaining bytes the number of which not exceed bytes_sse2 at the
376
        // tail of string, one by one.
377
66.9k
        for (; p < end; ++p) {
378
58.2k
            char_len += static_cast<int8_t>(*p) > static_cast<int8_t>(0xBF);
379
58.2k
        }
380
8.68k
        return char_len;
381
8.68k
    }
_ZN5doris4simd16VStringFunctions12get_char_lenIiEET_PKcS3_
Line
Count
Source
361
8.55k
    static inline T get_char_len(const char* src, T len) {
362
8.55k
        T char_len = 0;
363
8.55k
        const char* p = src;
364
8.55k
        const char* end = p + len;
365
8.55k
#if defined(__SSE2__) || defined(__aarch64__)
366
8.55k
        constexpr auto bytes_sse2 = sizeof(__m128i);
367
8.55k
        const auto src_end_sse2 = p + (len & ~(bytes_sse2 - 1));
368
        // threshold = 1011_1111
369
8.55k
        const auto threshold = _mm_set1_epi8(0xBF);
370
55.6k
        for (; p < src_end_sse2; p += bytes_sse2) {
371
47.1k
            char_len += __builtin_popcount(_mm_movemask_epi8(_mm_cmpgt_epi8(
372
47.1k
                    _mm_loadu_si128(reinterpret_cast<const __m128i*>(p)), threshold)));
373
47.1k
        }
374
8.55k
#endif
375
        // process remaining bytes the number of which not exceed bytes_sse2 at the
376
        // tail of string, one by one.
377
66.1k
        for (; p < end; ++p) {
378
57.6k
            char_len += static_cast<int8_t>(*p) > static_cast<int8_t>(0xBF);
379
57.6k
        }
380
8.55k
        return char_len;
381
8.55k
    }
_ZN5doris4simd16VStringFunctions12get_char_lenImEET_PKcS3_
Line
Count
Source
361
131
    static inline T get_char_len(const char* src, T len) {
362
131
        T char_len = 0;
363
131
        const char* p = src;
364
131
        const char* end = p + len;
365
131
#if defined(__SSE2__) || defined(__aarch64__)
366
131
        constexpr auto bytes_sse2 = sizeof(__m128i);
367
131
        const auto src_end_sse2 = p + (len & ~(bytes_sse2 - 1));
368
        // threshold = 1011_1111
369
131
        const auto threshold = _mm_set1_epi8(0xBF);
370
131
        for (; p < src_end_sse2; p += bytes_sse2) {
371
0
            char_len += __builtin_popcount(_mm_movemask_epi8(_mm_cmpgt_epi8(
372
0
                    _mm_loadu_si128(reinterpret_cast<const __m128i*>(p)), threshold)));
373
0
        }
374
131
#endif
375
        // process remaining bytes the number of which not exceed bytes_sse2 at the
376
        // tail of string, one by one.
377
782
        for (; p < end; ++p) {
378
651
            char_len += static_cast<int8_t>(*p) > static_cast<int8_t>(0xBF);
379
651
        }
380
131
        return char_len;
381
131
    }
382
};
383
} // namespace simd
384
} // namespace doris