Coverage Report

Created: 2026-03-16 13:13

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/url/find_symbols.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/base/base/find_symbols.h
19
// and modified by Doris
20
21
#pragma once
22
23
#include <array>
24
#include <cstdint>
25
#include <string>
26
27
#if defined(__SSE4_2__)
28
#include <nmmintrin.h>
29
#endif
30
31
/** find_first_symbols<c1, c2, ...>(begin, end):
32
  *
33
  * Allow to search for next character from the set of 'symbols...' in a string.
34
  * It is similar to 'strpbrk', 'strcspn' (and 'strchr', 'memchr' in the case of one symbol and '\0'),
35
  * but with the following differences:
36
  * - works with any memory ranges, including containing zero bytes;
37
  * - doesn't require terminating zero byte: end of memory range is passed explicitly;
38
  * - if not found, returns pointer to end instead of nullptr;
39
  * - maximum number of symbols to search is 16.
40
  *
41
  * Uses SSE 2 in case of small number of symbols for search and SSE 4.2 in the case of large number of symbols,
42
  *  that have more than 2x performance advantage over trivial loop
43
  *  in the case of parsing tab-separated dump with (probably escaped) string fields.
44
  * In the case of parsing tab separated dump with short strings, there is no performance degradation over trivial loop.
45
  *
46
  * Note: the optimal threshold to choose between SSE 2 and SSE 4.2 may depend on CPU model.
47
  *
48
  * find_last_symbols_or_null<c1, c2, ...>(begin, end):
49
  *
50
  * Allow to search for the last matching character in a string.
51
  * If no such characters, returns nullptr.
52
  */
53
54
struct SearchSymbols {
55
    static constexpr auto BUFFER_SIZE = 16;
56
57
    SearchSymbols() = default;
58
59
0
    explicit SearchSymbols(std::string in) : str(std::move(in)) {
60
0
#if defined(__SSE4_2__)
61
0
        if (str.size() > BUFFER_SIZE) {
62
0
            throw std::runtime_error("SearchSymbols can contain at most " +
63
0
                                     std::to_string(BUFFER_SIZE) + " symbols and " +
64
0
                                     std::to_string(str.size()) + " was provided\n");
65
0
        }
66
0
67
0
        char tmp_safety_buffer[BUFFER_SIZE] = {0};
68
0
69
0
        memcpy(tmp_safety_buffer, str.data(), str.size());
70
0
71
0
        simd_vector = _mm_loadu_si128(reinterpret_cast<const __m128i*>(tmp_safety_buffer));
72
0
#endif
73
0
    }
74
75
#if defined(__SSE4_2__)
76
    __m128i simd_vector;
77
#endif
78
    std::string str;
79
};
80
81
namespace detail {
82
template <char... chars>
83
322
constexpr bool is_in(char x) {
84
322
    return ((x == chars) || ...);
85
322
} // NOLINT(misc-redundant-expression)
_ZN6detail5is_inIJLc46EEEEbc
Line
Count
Source
83
276
constexpr bool is_in(char x) {
84
276
    return ((x == chars) || ...);
85
276
} // NOLINT(misc-redundant-expression)
_ZN6detail5is_inIJLc47EEEEbc
Line
Count
Source
83
46
constexpr bool is_in(char x) {
84
46
    return ((x == chars) || ...);
85
46
} // NOLINT(misc-redundant-expression)
Unexecuted instantiation: _ZN6detail5is_inIJLc46ELc0EEEEbc
Unexecuted instantiation: _ZN6detail5is_inIJLc46ELc0ELc0EEEEbc
Unexecuted instantiation: _ZN6detail5is_inIJLc46ELc0ELc0ELc0EEEEbc
Unexecuted instantiation: _ZN6detail5is_inIJLc46ELc0ELc0ELc0ELc0EEEEbc
Unexecuted instantiation: _ZN6detail5is_inIJLc46ELc0ELc0ELc0ELc0ELc0EEEEbc
Unexecuted instantiation: _ZN6detail5is_inIJLc46ELc0ELc0ELc0ELc0ELc0ELc0EEEEbc
Unexecuted instantiation: _ZN6detail5is_inIJLc46ELc0ELc0ELc0ELc0ELc0ELc0ELc0EEEEbc
Unexecuted instantiation: _ZN6detail5is_inIJLc46ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0EEEEbc
Unexecuted instantiation: _ZN6detail5is_inIJLc46ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0EEEEbc
Unexecuted instantiation: _ZN6detail5is_inIJLc46ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0EEEEbc
Unexecuted instantiation: _ZN6detail5is_inIJLc46ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0EEEEbc
Unexecuted instantiation: _ZN6detail5is_inIJLc46ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0EEEEbc
Unexecuted instantiation: _ZN6detail5is_inIJLc46ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0EEEEbc
Unexecuted instantiation: _ZN6detail5is_inIJLc46ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0EEEEbc
Unexecuted instantiation: _ZN6detail5is_inIJLc46ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0EEEEbc
Unexecuted instantiation: _ZN6detail5is_inIJLc47ELc0EEEEbc
Unexecuted instantiation: _ZN6detail5is_inIJLc47ELc0ELc0EEEEbc
Unexecuted instantiation: _ZN6detail5is_inIJLc47ELc0ELc0ELc0EEEEbc
Unexecuted instantiation: _ZN6detail5is_inIJLc47ELc0ELc0ELc0ELc0EEEEbc
Unexecuted instantiation: _ZN6detail5is_inIJLc47ELc0ELc0ELc0ELc0ELc0EEEEbc
Unexecuted instantiation: _ZN6detail5is_inIJLc47ELc0ELc0ELc0ELc0ELc0ELc0EEEEbc
Unexecuted instantiation: _ZN6detail5is_inIJLc47ELc0ELc0ELc0ELc0ELc0ELc0ELc0EEEEbc
Unexecuted instantiation: _ZN6detail5is_inIJLc47ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0EEEEbc
Unexecuted instantiation: _ZN6detail5is_inIJLc47ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0EEEEbc
Unexecuted instantiation: _ZN6detail5is_inIJLc47ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0EEEEbc
Unexecuted instantiation: _ZN6detail5is_inIJLc47ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0EEEEbc
Unexecuted instantiation: _ZN6detail5is_inIJLc47ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0EEEEbc
Unexecuted instantiation: _ZN6detail5is_inIJLc47ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0EEEEbc
Unexecuted instantiation: _ZN6detail5is_inIJLc47ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0EEEEbc
Unexecuted instantiation: _ZN6detail5is_inIJLc47ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0EEEEbc
86
87
0
static bool is_in(char c, const char* symbols, size_t num_chars) {
88
0
    for (size_t i = 0U; i < num_chars; ++i) {
89
0
        if (c == symbols[i]) {
90
0
            return true;
91
0
        }
92
0
    }
93
0
94
0
    return false;
95
0
}
96
97
#if defined(__SSE2__)
98
template <char s0>
99
19
inline __m128i mm_is_in(__m128i bytes) {
100
19
    __m128i eq0 = _mm_cmpeq_epi8(bytes, _mm_set1_epi8(s0));
101
19
    return eq0;
102
19
}
_ZN6detail8mm_is_inILc46EEEDv2_xS1_
Line
Count
Source
99
19
inline __m128i mm_is_in(__m128i bytes) {
100
19
    __m128i eq0 = _mm_cmpeq_epi8(bytes, _mm_set1_epi8(s0));
101
19
    return eq0;
102
19
}
Unexecuted instantiation: _ZN6detail8mm_is_inILc47EEEDv2_xS1_
103
104
template <char s0, char s1, char... tail>
105
inline __m128i mm_is_in(__m128i bytes) {
106
    __m128i eq0 = _mm_cmpeq_epi8(bytes, _mm_set1_epi8(s0));
107
    __m128i eq = mm_is_in<s1, tail...>(bytes);
108
    return _mm_or_si128(eq0, eq);
109
}
110
111
0
inline __m128i mm_is_in(__m128i bytes, const char* symbols, size_t num_chars) {
112
0
    __m128i accumulator = _mm_setzero_si128();
113
0
    for (size_t i = 0; i < num_chars; ++i) {
114
0
        __m128i eq = _mm_cmpeq_epi8(bytes, _mm_set1_epi8(symbols[i]));
115
0
        accumulator = _mm_or_si128(accumulator, eq);
116
0
    }
117
0
118
0
    return accumulator;
119
0
}
120
121
using AlignedArray = std::array<std::aligned_storage_t<sizeof(__m128i), alignof(__m128i)>, 16>;
122
0
inline AlignedArray mm_is_in_prepare(const char* symbols, size_t num_chars) {
123
0
    AlignedArray result {};
124
0
125
0
    for (size_t i = 0; i < num_chars; ++i) {
126
0
        reinterpret_cast<__m128i&>(result[i]) = _mm_set1_epi8(symbols[i]);
127
0
    }
128
0
129
0
    return result;
130
0
}
131
132
0
inline __m128i mm_is_in_execute(__m128i bytes, const AlignedArray& needles) {
133
0
    __m128i accumulator = _mm_setzero_si128();
134
0
135
0
    for (const auto& needle : needles) {
136
0
        __m128i eq = _mm_cmpeq_epi8(bytes, reinterpret_cast<const __m128i&>(needle));
137
0
        accumulator = _mm_or_si128(accumulator, eq);
138
0
    }
139
0
140
0
    return accumulator;
141
0
}
142
#endif
143
144
template <bool positive>
145
322
constexpr bool maybe_negate(bool x) {
146
322
    return x == positive;
147
322
}
_ZN6detail12maybe_negateILb1EEEbb
Line
Count
Source
145
322
constexpr bool maybe_negate(bool x) {
146
322
    return x == positive;
147
322
}
Unexecuted instantiation: _ZN6detail12maybe_negateILb0EEEbb
148
149
template <bool positive>
150
19
constexpr uint16_t maybe_negate(uint16_t x) {
151
    if constexpr (positive)
152
19
        return x;
153
    else
154
        return ~x;
155
19
}
_ZN6detail12maybe_negateILb1EEEtt
Line
Count
Source
150
19
constexpr uint16_t maybe_negate(uint16_t x) {
151
    if constexpr (positive)
152
19
        return x;
153
    else
154
        return ~x;
155
19
}
Unexecuted instantiation: _ZN6detail12maybe_negateILb0EEEtt
156
157
enum class ReturnMode : uint8_t {
158
    End,
159
    Nullptr,
160
};
161
162
template <bool positive, ReturnMode return_mode, char... symbols>
163
80
inline const char* find_first_symbols_sse2(const char* const begin, const char* const end) {
164
80
    const char* pos = begin;
165
166
80
#if defined(__SSE2__)
167
80
    for (; pos + 15 < end; pos += 16) {
168
12
        __m128i bytes = _mm_loadu_si128(reinterpret_cast<const __m128i*>(pos));
169
170
12
        __m128i eq = mm_is_in<symbols...>(bytes);
171
172
12
        uint16_t bit_mask = maybe_negate<positive>(uint16_t(_mm_movemask_epi8(eq)));
173
12
        if (bit_mask) return pos + __builtin_ctz(bit_mask);
174
12
    }
175
68
#endif
176
177
340
    for (; pos < end; ++pos)
178
302
        if (maybe_negate<positive>(is_in<symbols...>(*pos))) return pos;
179
180
38
    return return_mode == ReturnMode::End ? end : nullptr;
181
68
}
_ZN6detail23find_first_symbols_sse2ILb1ELNS_10ReturnModeE0EJLc46EEEEPKcS3_S3_
Line
Count
Source
163
66
inline const char* find_first_symbols_sse2(const char* const begin, const char* const end) {
164
66
    const char* pos = begin;
165
166
66
#if defined(__SSE2__)
167
66
    for (; pos + 15 < end; pos += 16) {
168
12
        __m128i bytes = _mm_loadu_si128(reinterpret_cast<const __m128i*>(pos));
169
170
12
        __m128i eq = mm_is_in<symbols...>(bytes);
171
172
12
        uint16_t bit_mask = maybe_negate<positive>(uint16_t(_mm_movemask_epi8(eq)));
173
12
        if (bit_mask) return pos + __builtin_ctz(bit_mask);
174
12
    }
175
54
#endif
176
177
280
    for (; pos < end; ++pos)
178
256
        if (maybe_negate<positive>(is_in<symbols...>(*pos))) return pos;
179
180
24
    return return_mode == ReturnMode::End ? end : nullptr;
181
54
}
_ZN6detail23find_first_symbols_sse2ILb1ELNS_10ReturnModeE0EJLc47EEEEPKcS3_S3_
Line
Count
Source
163
14
inline const char* find_first_symbols_sse2(const char* const begin, const char* const end) {
164
14
    const char* pos = begin;
165
166
14
#if defined(__SSE2__)
167
14
    for (; pos + 15 < end; pos += 16) {
168
0
        __m128i bytes = _mm_loadu_si128(reinterpret_cast<const __m128i*>(pos));
169
170
0
        __m128i eq = mm_is_in<symbols...>(bytes);
171
172
0
        uint16_t bit_mask = maybe_negate<positive>(uint16_t(_mm_movemask_epi8(eq)));
173
0
        if (bit_mask) return pos + __builtin_ctz(bit_mask);
174
0
    }
175
14
#endif
176
177
60
    for (; pos < end; ++pos)
178
46
        if (maybe_negate<positive>(is_in<symbols...>(*pos))) return pos;
179
180
14
    return return_mode == ReturnMode::End ? end : nullptr;
181
14
}
182
183
template <bool positive, ReturnMode return_mode>
184
inline const char* find_first_symbols_sse2(const char* const begin, const char* const end,
185
0
                                           const char* symbols, size_t num_chars) {
186
0
    const char* pos = begin;
187
0
188
0
#if defined(__SSE2__)
189
0
    const auto needles = mm_is_in_prepare(symbols, num_chars);
190
0
    for (; pos + 15 < end; pos += 16) {
191
0
        __m128i bytes = _mm_loadu_si128(reinterpret_cast<const __m128i*>(pos));
192
0
193
0
        __m128i eq = mm_is_in_execute(bytes, needles);
194
0
195
0
        uint16_t bit_mask = maybe_negate<positive>(uint16_t(_mm_movemask_epi8(eq)));
196
0
        if (bit_mask) return pos + __builtin_ctz(bit_mask);
197
0
    }
198
0
#endif
199
0
200
0
    for (; pos < end; ++pos)
201
0
        if (maybe_negate<positive>(is_in(*pos, symbols, num_chars))) return pos;
202
0
203
0
    return return_mode == ReturnMode::End ? end : nullptr;
204
0
}
Unexecuted instantiation: _ZN6detail23find_first_symbols_sse2ILb1ELNS_10ReturnModeE0EEEPKcS3_S3_S3_m
Unexecuted instantiation: _ZN6detail23find_first_symbols_sse2ILb0ELNS_10ReturnModeE0EEEPKcS3_S3_S3_m
Unexecuted instantiation: _ZN6detail23find_first_symbols_sse2ILb1ELNS_10ReturnModeE1EEEPKcS3_S3_S3_m
Unexecuted instantiation: _ZN6detail23find_first_symbols_sse2ILb0ELNS_10ReturnModeE1EEEPKcS3_S3_S3_m
205
206
template <bool positive, ReturnMode return_mode, char... symbols>
207
12
inline const char* find_last_symbols_sse2(const char* const begin, const char* const end) {
208
12
    const char* pos = end;
209
210
12
#if defined(__SSE2__)
211
12
    for (; pos - 16 >= begin;
212
12
         pos -=
213
0
         16) /// Assuming the pointer cannot overflow. Assuming we can compare these pointers.
214
7
    {
215
7
        __m128i bytes = _mm_loadu_si128(reinterpret_cast<const __m128i*>(pos - 16));
216
217
7
        __m128i eq = mm_is_in<symbols...>(bytes);
218
219
7
        uint16_t bit_mask = maybe_negate<positive>(uint16_t(_mm_movemask_epi8(eq)));
220
7
        if (bit_mask)
221
7
            return pos - 1 -
222
7
                   (__builtin_clz(bit_mask) -
223
7
                    16); /// because __builtin_clz works with mask as uint32.
224
7
    }
225
5
#endif
226
227
5
    --pos;
228
20
    for (; pos >= begin; --pos)
229
20
        if (maybe_negate<positive>(is_in<symbols...>(*pos))) return pos;
230
231
0
    return return_mode == ReturnMode::End ? end : nullptr;
232
5
}
233
234
template <bool positive, ReturnMode return_mode, size_t num_chars, char c01, char c02 = 0,
235
          char c03 = 0, char c04 = 0, char c05 = 0, char c06 = 0, char c07 = 0, char c08 = 0,
236
          char c09 = 0, char c10 = 0, char c11 = 0, char c12 = 0, char c13 = 0, char c14 = 0,
237
          char c15 = 0, char c16 = 0>
238
0
inline const char* find_first_symbols_sse42(const char* const begin, const char* const end) {
239
0
    const char* pos = begin;
240
0
241
0
#if defined(__SSE4_2__)
242
0
    constexpr int mode = _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT;
243
0
244
0
    __m128i set = _mm_setr_epi8(c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11, c12, c13,
245
0
                                c14, c15, c16);
246
0
247
0
    for (; pos + 15 < end; pos += 16) {
248
0
        __m128i bytes = _mm_loadu_si128(reinterpret_cast<const __m128i*>(pos));
249
0
250
0
        if constexpr (positive) {
251
0
            if (_mm_cmpestrc(set, num_chars, bytes, 16, mode))
252
0
                return pos + _mm_cmpestri(set, num_chars, bytes, 16, mode);
253
0
        } else {
254
0
            if (_mm_cmpestrc(set, num_chars, bytes, 16, mode | _SIDD_NEGATIVE_POLARITY))
255
0
                return pos +
256
0
                       _mm_cmpestri(set, num_chars, bytes, 16, mode | _SIDD_NEGATIVE_POLARITY);
257
0
        }
258
0
    }
259
0
#endif
260
0
261
0
    for (; pos < end; ++pos)
262
0
        if ((num_chars == 1 && maybe_negate<positive>(is_in<c01>(*pos))) ||
263
0
            (num_chars == 2 && maybe_negate<positive>(is_in<c01, c02>(*pos))) ||
264
0
            (num_chars == 3 && maybe_negate<positive>(is_in<c01, c02, c03>(*pos))) ||
265
0
            (num_chars == 4 && maybe_negate<positive>(is_in<c01, c02, c03, c04>(*pos))) ||
266
0
            (num_chars == 5 && maybe_negate<positive>(is_in<c01, c02, c03, c04, c05>(*pos))) ||
267
0
            (num_chars == 6 && maybe_negate<positive>(is_in<c01, c02, c03, c04, c05, c06>(*pos))) ||
268
0
            (num_chars == 7 &&
269
0
             maybe_negate<positive>(is_in<c01, c02, c03, c04, c05, c06, c07>(*pos))) ||
270
0
            (num_chars == 8 &&
271
0
             maybe_negate<positive>(is_in<c01, c02, c03, c04, c05, c06, c07, c08>(*pos))) ||
272
0
            (num_chars == 9 &&
273
0
             maybe_negate<positive>(is_in<c01, c02, c03, c04, c05, c06, c07, c08, c09>(*pos))) ||
274
0
            (num_chars == 10 &&
275
0
             maybe_negate<positive>(
276
0
                     is_in<c01, c02, c03, c04, c05, c06, c07, c08, c09, c10>(*pos))) ||
277
0
            (num_chars == 11 &&
278
0
             maybe_negate<positive>(
279
0
                     is_in<c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11>(*pos))) ||
280
0
            (num_chars == 12 &&
281
0
             maybe_negate<positive>(
282
0
                     is_in<c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11, c12>(*pos))) ||
283
0
            (num_chars == 13 &&
284
0
             maybe_negate<positive>(
285
0
                     is_in<c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11, c12, c13>(
286
0
                             *pos))) ||
287
0
            (num_chars == 14 &&
288
0
             maybe_negate<positive>(
289
0
                     is_in<c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11, c12, c13, c14>(
290
0
                             *pos))) ||
291
0
            (num_chars == 15 &&
292
0
             maybe_negate<positive>(is_in<c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11,
293
0
                                          c12, c13, c14, c15>(*pos))) ||
294
0
            (num_chars == 16 &&
295
0
             maybe_negate<positive>(is_in<c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11,
296
0
                                          c12, c13, c14, c15, c16>(*pos))))
297
0
            return pos;
298
0
    return return_mode == ReturnMode::End ? end : nullptr;
299
0
}
Unexecuted instantiation: _ZN6detail24find_first_symbols_sse42ILb1ELNS_10ReturnModeE0ELm1ELc46ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0EEEPKcS3_S3_
Unexecuted instantiation: _ZN6detail24find_first_symbols_sse42ILb1ELNS_10ReturnModeE0ELm1ELc47ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0ELc0EEEPKcS3_S3_
300
301
template <bool positive, ReturnMode return_mode>
302
inline const char* find_first_symbols_sse42(const char* const begin, const char* const end,
303
0
                                            const SearchSymbols& symbols) {
304
0
    const char* pos = begin;
305
0
306
0
    const auto num_chars = symbols.str.size();
307
0
308
0
#if defined(__SSE4_2__)
309
0
    constexpr int mode = _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT;
310
0
311
0
    const __m128i set = symbols.simd_vector;
312
0
313
0
    for (; pos + 15 < end; pos += 16) {
314
0
        __m128i bytes = _mm_loadu_si128(reinterpret_cast<const __m128i*>(pos));
315
0
316
0
        if constexpr (positive) {
317
0
            if (_mm_cmpestrc(set, num_chars, bytes, 16, mode))
318
0
                return pos + _mm_cmpestri(set, num_chars, bytes, 16, mode);
319
0
        } else {
320
0
            if (_mm_cmpestrc(set, num_chars, bytes, 16, mode | _SIDD_NEGATIVE_POLARITY))
321
0
                return pos +
322
0
                       _mm_cmpestri(set, num_chars, bytes, 16, mode | _SIDD_NEGATIVE_POLARITY);
323
0
        }
324
0
    }
325
0
#endif
326
0
327
0
    for (; pos < end; ++pos)
328
0
        if (maybe_negate<positive>(is_in(*pos, symbols.str.data(), num_chars))) return pos;
329
0
330
0
    return return_mode == ReturnMode::End ? end : nullptr;
331
0
}
Unexecuted instantiation: _ZN6detail24find_first_symbols_sse42ILb1ELNS_10ReturnModeE0EEEPKcS3_S3_RK13SearchSymbols
Unexecuted instantiation: _ZN6detail24find_first_symbols_sse42ILb0ELNS_10ReturnModeE0EEEPKcS3_S3_RK13SearchSymbols
Unexecuted instantiation: _ZN6detail24find_first_symbols_sse42ILb1ELNS_10ReturnModeE1EEEPKcS3_S3_RK13SearchSymbols
Unexecuted instantiation: _ZN6detail24find_first_symbols_sse42ILb0ELNS_10ReturnModeE1EEEPKcS3_S3_RK13SearchSymbols
332
333
/// NOTE No SSE 4.2 implementation for find_last_symbols_or_null. Not worth to do.
334
335
template <bool positive, ReturnMode return_mode, char... symbols>
336
inline const char* find_first_symbols_dispatch(const char* begin, const char* end)
337
    requires(0 <= sizeof...(symbols) && sizeof...(symbols) <= 16)
338
80
{
339
80
#if defined(__SSE4_2__)
340
80
    if (sizeof...(symbols) >= 5)
341
0
        return find_first_symbols_sse42<positive, return_mode, sizeof...(symbols), symbols...>(
342
0
                begin, end);
343
80
    else
344
80
#endif
345
80
        return find_first_symbols_sse2<positive, return_mode, symbols...>(begin, end);
346
80
}
_ZN6detail27find_first_symbols_dispatchILb1ELNS_10ReturnModeE0EJLc46EEEEPKcS3_S3_QaaleLi0EsZT1_lesZT1_Li16E
Line
Count
Source
338
66
{
339
66
#if defined(__SSE4_2__)
340
66
    if (sizeof...(symbols) >= 5)
341
0
        return find_first_symbols_sse42<positive, return_mode, sizeof...(symbols), symbols...>(
342
0
                begin, end);
343
66
    else
344
66
#endif
345
66
        return find_first_symbols_sse2<positive, return_mode, symbols...>(begin, end);
346
66
}
_ZN6detail27find_first_symbols_dispatchILb1ELNS_10ReturnModeE0EJLc47EEEEPKcS3_S3_QaaleLi0EsZT1_lesZT1_Li16E
Line
Count
Source
338
14
{
339
14
#if defined(__SSE4_2__)
340
14
    if (sizeof...(symbols) >= 5)
341
0
        return find_first_symbols_sse42<positive, return_mode, sizeof...(symbols), symbols...>(
342
0
                begin, end);
343
14
    else
344
14
#endif
345
14
        return find_first_symbols_sse2<positive, return_mode, symbols...>(begin, end);
346
14
}
347
348
template <bool positive, ReturnMode return_mode>
349
inline const char* find_first_symbols_dispatch(const std::string_view haystack,
350
0
                                               const SearchSymbols& symbols) {
351
0
#if defined(__SSE4_2__)
352
0
    if (symbols.str.size() >= 5)
353
0
        return find_first_symbols_sse42<positive, return_mode>(haystack.begin(), haystack.end(),
354
0
                                                               symbols);
355
0
    else
356
0
#endif
357
0
        return find_first_symbols_sse2<positive, return_mode>(
358
0
                haystack.begin(), haystack.end(), symbols.str.data(), symbols.str.size());
359
0
}
Unexecuted instantiation: _ZN6detail27find_first_symbols_dispatchILb1ELNS_10ReturnModeE0EEEPKcSt17basic_string_viewIcSt11char_traitsIcEERK13SearchSymbols
Unexecuted instantiation: _ZN6detail27find_first_symbols_dispatchILb0ELNS_10ReturnModeE0EEEPKcSt17basic_string_viewIcSt11char_traitsIcEERK13SearchSymbols
Unexecuted instantiation: _ZN6detail27find_first_symbols_dispatchILb1ELNS_10ReturnModeE1EEEPKcSt17basic_string_viewIcSt11char_traitsIcEERK13SearchSymbols
Unexecuted instantiation: _ZN6detail27find_first_symbols_dispatchILb0ELNS_10ReturnModeE1EEEPKcSt17basic_string_viewIcSt11char_traitsIcEERK13SearchSymbols
360
361
} // namespace detail
362
363
template <char... symbols>
364
80
inline const char* find_first_symbols(const char* begin, const char* end) {
365
80
    return ::detail::find_first_symbols_dispatch<true, ::detail::ReturnMode::End, symbols...>(begin,
366
80
                                                                                              end);
367
80
}
_Z18find_first_symbolsIJLc46EEEPKcS1_S1_
Line
Count
Source
364
66
inline const char* find_first_symbols(const char* begin, const char* end) {
365
66
    return ::detail::find_first_symbols_dispatch<true, ::detail::ReturnMode::End, symbols...>(begin,
366
66
                                                                                              end);
367
66
}
_Z18find_first_symbolsIJLc47EEEPKcS1_S1_
Line
Count
Source
364
14
inline const char* find_first_symbols(const char* begin, const char* end) {
365
14
    return ::detail::find_first_symbols_dispatch<true, ::detail::ReturnMode::End, symbols...>(begin,
366
14
                                                                                              end);
367
14
}
368
369
/// Returning non const result for non const arguments.
370
/// It is convenient when you are using this function to iterate through non-const buffer.
371
template <char... symbols>
372
inline char* find_first_symbols(char* begin, char* end) {
373
    return const_cast<char*>(
374
            ::detail::find_first_symbols_dispatch<true, ::detail::ReturnMode::End, symbols...>(
375
                    begin, end));
376
}
377
378
0
inline const char* find_first_symbols(std::string_view haystack, const SearchSymbols& symbols) {
379
0
    return ::detail::find_first_symbols_dispatch<true, ::detail::ReturnMode::End>(haystack,
380
0
                                                                                  symbols);
381
0
}
382
383
template <char... symbols>
384
inline const char* find_first_not_symbols(const char* begin, const char* end) {
385
    return ::detail::find_first_symbols_dispatch<false, ::detail::ReturnMode::End, symbols...>(
386
            begin, end);
387
}
388
389
template <char... symbols>
390
inline char* find_first_not_symbols(char* begin, char* end) {
391
    return const_cast<char*>(
392
            ::detail::find_first_symbols_dispatch<false, ::detail::ReturnMode::End, symbols...>(
393
                    begin, end));
394
}
395
396
0
inline const char* find_first_not_symbols(std::string_view haystack, const SearchSymbols& symbols) {
397
0
    return ::detail::find_first_symbols_dispatch<false, ::detail::ReturnMode::End>(haystack,
398
0
                                                                                   symbols);
399
0
}
400
401
template <char... symbols>
402
inline const char* find_first_symbols_or_null(const char* begin, const char* end) {
403
    return ::detail::find_first_symbols_dispatch<true, ::detail::ReturnMode::Nullptr, symbols...>(
404
            begin, end);
405
}
406
407
template <char... symbols>
408
inline char* find_first_symbols_or_null(char* begin, char* end) {
409
    return const_cast<char*>(
410
            ::detail::find_first_symbols_dispatch<true, ::detail::ReturnMode::Nullptr, symbols...>(
411
                    begin, end));
412
}
413
414
inline const char* find_first_symbols_or_null(std::string_view haystack,
415
0
                                              const SearchSymbols& symbols) {
416
0
    return ::detail::find_first_symbols_dispatch<true, ::detail::ReturnMode::Nullptr>(haystack,
417
0
                                                                                      symbols);
418
0
}
419
420
template <char... symbols>
421
inline const char* find_first_not_symbols_or_null(const char* begin, const char* end) {
422
    return ::detail::find_first_symbols_dispatch<false, ::detail::ReturnMode::Nullptr, symbols...>(
423
            begin, end);
424
}
425
426
template <char... symbols>
427
inline char* find_first_not_symbols_or_null(char* begin, char* end) {
428
    return const_cast<char*>(
429
            ::detail::find_first_symbols_dispatch<false, ::detail::ReturnMode::Nullptr, symbols...>(
430
                    begin, end));
431
}
432
433
inline const char* find_first_not_symbols_or_null(std::string_view haystack,
434
0
                                                  const SearchSymbols& symbols) {
435
0
    return ::detail::find_first_symbols_dispatch<false, ::detail::ReturnMode::Nullptr>(haystack,
436
0
                                                                                       symbols);
437
0
}
438
439
template <char... symbols>
440
12
inline const char* find_last_symbols_or_null(const char* begin, const char* end) {
441
12
    return ::detail::find_last_symbols_sse2<true, ::detail::ReturnMode::Nullptr, symbols...>(begin,
442
12
                                                                                             end);
443
12
}
444
445
template <char... symbols>
446
inline char* find_last_symbols_or_null(char* begin, char* end) {
447
    return const_cast<char*>(
448
            ::detail::find_last_symbols_sse2<true, ::detail::ReturnMode::Nullptr, symbols...>(begin,
449
                                                                                              end));
450
}
451
452
template <char... symbols>
453
inline const char* find_last_not_symbols_or_null(const char* begin, const char* end) {
454
    return ::detail::find_last_symbols_sse2<false, ::detail::ReturnMode::Nullptr, symbols...>(begin,
455
                                                                                              end);
456
}
457
458
template <char... symbols>
459
inline char* find_last_not_symbols_or_null(char* begin, char* end) {
460
    return const_cast<char*>(
461
            ::detail::find_last_symbols_sse2<false, ::detail::ReturnMode::Nullptr, symbols...>(
462
                    begin, end));
463
}
464
465
/// Slightly resembles boost::split. The drawback of boost::split is that it fires a false positive in clang static analyzer.
466
/// See https://github.com/boostorg/algorithm/issues/63
467
/// And https://bugs.llvm.org/show_bug.cgi?id=41141
468
template <char... symbols, typename To>
469
inline To& splitInto(To& to, std::string_view what, bool token_compress = false) {
470
    const char* pos = what.data();
471
    const char* end = pos + what.size();
472
    while (pos < end) {
473
        const char* delimiter_or_end = find_first_symbols<symbols...>(pos, end);
474
475
        if (!token_compress || pos < delimiter_or_end) to.emplace_back(pos, delimiter_or_end - pos);
476
477
        if (delimiter_or_end < end)
478
            pos = delimiter_or_end + 1;
479
        else
480
            pos = delimiter_or_end;
481
    }
482
483
    return to;
484
}