Coverage Report

Created: 2024-11-21 13:15

/var/local/thirdparty/installed/include/roaring/bitset_util.h
Line
Count
Source (jump to first uncovered line)
1
#ifndef BITSET_UTIL_H
2
#define BITSET_UTIL_H
3
4
#include <stdint.h>
5
6
#include <roaring/portability.h>
7
#include <roaring/utilasm.h>
8
9
#if CROARING_IS_X64
10
#ifndef CROARING_COMPILER_SUPPORTS_AVX512
11
#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined."
12
#endif // CROARING_COMPILER_SUPPORTS_AVX512
13
#endif
14
15
#ifdef __cplusplus
16
extern "C" { namespace roaring { namespace internal {
17
#endif
18
19
/*
20
 * Set all bits in indexes [begin,end) to true.
21
 */
22
static inline void bitset_set_range(uint64_t *words, uint32_t start,
23
0
                                    uint32_t end) {
24
0
    if (start == end) return;
25
0
    uint32_t firstword = start / 64;
26
0
    uint32_t endword = (end - 1) / 64;
27
0
    if (firstword == endword) {
28
0
        words[firstword] |= ((~UINT64_C(0)) << (start % 64)) &
29
0
                             ((~UINT64_C(0)) >> ((~end + 1) % 64));
30
0
        return;
31
0
    }
32
0
    words[firstword] |= (~UINT64_C(0)) << (start % 64);
33
0
    for (uint32_t i = firstword + 1; i < endword; i++) {
34
0
        words[i] = ~UINT64_C(0);
35
0
    }
36
0
    words[endword] |= (~UINT64_C(0)) >> ((~end + 1) % 64);
37
0
}
Unexecuted instantiation: bkd_writer.cpp:_ZN7roaring8internalL16bitset_set_rangeEPmjj
Unexecuted instantiation: bkd_reader.cpp:_ZN7roaring8internalL16bitset_set_rangeEPmjj
Unexecuted instantiation: packed_index_tree.cpp:_ZN7roaring8internalL16bitset_set_rangeEPmjj
Unexecuted instantiation: index_tree.cpp:_ZN7roaring8internalL16bitset_set_rangeEPmjj
Unexecuted instantiation: legacy_index_tree.cpp:_ZN7roaring8internalL16bitset_set_rangeEPmjj
Unexecuted instantiation: docids_writer.cpp:_ZN7roaring8internalL16bitset_set_rangeEPmjj
Unexecuted instantiation: IndexWriter.cpp:_ZN7roaring8internalL16bitset_set_rangeEPmjj
38
39
40
/*
41
 * Find the cardinality of the bitset in [begin,begin+lenminusone]
42
 */
43
static inline int bitset_lenrange_cardinality(const uint64_t *words,
44
                                              uint32_t start,
45
0
                                              uint32_t lenminusone) {
46
0
    uint32_t firstword = start / 64;
47
0
    uint32_t endword = (start + lenminusone) / 64;
48
0
    if (firstword == endword) {
49
0
        return roaring_hamming(words[firstword] &
50
0
                       ((~UINT64_C(0)) >> ((63 - lenminusone) % 64))
51
0
                           << (start % 64));
52
0
    }
53
0
    int answer = roaring_hamming(words[firstword] & ((~UINT64_C(0)) << (start % 64)));
54
0
    for (uint32_t i = firstword + 1; i < endword; i++) {
55
0
        answer += roaring_hamming(words[i]);
56
0
    }
57
0
    answer +=
58
0
        roaring_hamming(words[endword] &
59
0
                (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64));
60
0
    return answer;
61
0
}
Unexecuted instantiation: bkd_writer.cpp:_ZN7roaring8internalL27bitset_lenrange_cardinalityEPKmjj
Unexecuted instantiation: bkd_reader.cpp:_ZN7roaring8internalL27bitset_lenrange_cardinalityEPKmjj
Unexecuted instantiation: packed_index_tree.cpp:_ZN7roaring8internalL27bitset_lenrange_cardinalityEPKmjj
Unexecuted instantiation: index_tree.cpp:_ZN7roaring8internalL27bitset_lenrange_cardinalityEPKmjj
Unexecuted instantiation: legacy_index_tree.cpp:_ZN7roaring8internalL27bitset_lenrange_cardinalityEPKmjj
Unexecuted instantiation: docids_writer.cpp:_ZN7roaring8internalL27bitset_lenrange_cardinalityEPKmjj
Unexecuted instantiation: IndexWriter.cpp:_ZN7roaring8internalL27bitset_lenrange_cardinalityEPKmjj
62
63
/*
64
 * Check whether the cardinality of the bitset in [begin,begin+lenminusone] is 0
65
 */
66
static inline bool bitset_lenrange_empty(const uint64_t *words, uint32_t start,
67
0
                                         uint32_t lenminusone) {
68
0
    uint32_t firstword = start / 64;
69
0
    uint32_t endword = (start + lenminusone) / 64;
70
0
    if (firstword == endword) {
71
0
        return (words[firstword] & ((~UINT64_C(0)) >> ((63 - lenminusone) % 64))
72
0
              << (start % 64)) == 0;
73
0
    }
74
0
    if (((words[firstword] & ((~UINT64_C(0)) << (start%64)))) != 0) {
75
0
        return false;
76
0
    }
77
0
    for (uint32_t i = firstword + 1; i < endword; i++) {
78
0
        if (words[i] != 0) {
79
0
            return false;
80
0
        }
81
0
    }
82
0
    if ((words[endword] & (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64)) != 0) {
83
0
        return false;
84
0
    }
85
0
    return true;
86
0
}
Unexecuted instantiation: bkd_writer.cpp:_ZN7roaring8internalL21bitset_lenrange_emptyEPKmjj
Unexecuted instantiation: bkd_reader.cpp:_ZN7roaring8internalL21bitset_lenrange_emptyEPKmjj
Unexecuted instantiation: packed_index_tree.cpp:_ZN7roaring8internalL21bitset_lenrange_emptyEPKmjj
Unexecuted instantiation: index_tree.cpp:_ZN7roaring8internalL21bitset_lenrange_emptyEPKmjj
Unexecuted instantiation: legacy_index_tree.cpp:_ZN7roaring8internalL21bitset_lenrange_emptyEPKmjj
Unexecuted instantiation: docids_writer.cpp:_ZN7roaring8internalL21bitset_lenrange_emptyEPKmjj
Unexecuted instantiation: IndexWriter.cpp:_ZN7roaring8internalL21bitset_lenrange_emptyEPKmjj
87
88
89
/*
90
 * Set all bits in indexes [begin,begin+lenminusone] to true.
91
 */
92
static inline void bitset_set_lenrange(uint64_t *words, uint32_t start,
93
0
                                       uint32_t lenminusone) {
94
0
    uint32_t firstword = start / 64;
95
0
    uint32_t endword = (start + lenminusone) / 64;
96
0
    if (firstword == endword) {
97
0
        words[firstword] |= ((~UINT64_C(0)) >> ((63 - lenminusone) % 64))
98
0
                             << (start % 64);
99
0
        return;
100
0
    }
101
0
    uint64_t temp = words[endword];
102
0
    words[firstword] |= (~UINT64_C(0)) << (start % 64);
103
0
    for (uint32_t i = firstword + 1; i < endword; i += 2)
104
0
        words[i] = words[i + 1] = ~UINT64_C(0);
105
0
    words[endword] =
106
0
        temp | (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64);
107
0
}
Unexecuted instantiation: bkd_writer.cpp:_ZN7roaring8internalL19bitset_set_lenrangeEPmjj
Unexecuted instantiation: bkd_reader.cpp:_ZN7roaring8internalL19bitset_set_lenrangeEPmjj
Unexecuted instantiation: packed_index_tree.cpp:_ZN7roaring8internalL19bitset_set_lenrangeEPmjj
Unexecuted instantiation: index_tree.cpp:_ZN7roaring8internalL19bitset_set_lenrangeEPmjj
Unexecuted instantiation: legacy_index_tree.cpp:_ZN7roaring8internalL19bitset_set_lenrangeEPmjj
Unexecuted instantiation: docids_writer.cpp:_ZN7roaring8internalL19bitset_set_lenrangeEPmjj
Unexecuted instantiation: IndexWriter.cpp:_ZN7roaring8internalL19bitset_set_lenrangeEPmjj
108
109
/*
110
 * Flip all the bits in indexes [begin,end).
111
 */
112
static inline void bitset_flip_range(uint64_t *words, uint32_t start,
113
0
                                     uint32_t end) {
114
0
    if (start == end) return;
115
0
    uint32_t firstword = start / 64;
116
0
    uint32_t endword = (end - 1) / 64;
117
0
    words[firstword] ^= ~((~UINT64_C(0)) << (start % 64));
118
0
    for (uint32_t i = firstword; i < endword; i++) {
119
0
        words[i] = ~words[i];
120
0
    }
121
0
    words[endword] ^= ((~UINT64_C(0)) >> ((~end + 1) % 64));
122
0
}
Unexecuted instantiation: bkd_writer.cpp:_ZN7roaring8internalL17bitset_flip_rangeEPmjj
Unexecuted instantiation: bkd_reader.cpp:_ZN7roaring8internalL17bitset_flip_rangeEPmjj
Unexecuted instantiation: packed_index_tree.cpp:_ZN7roaring8internalL17bitset_flip_rangeEPmjj
Unexecuted instantiation: index_tree.cpp:_ZN7roaring8internalL17bitset_flip_rangeEPmjj
Unexecuted instantiation: legacy_index_tree.cpp:_ZN7roaring8internalL17bitset_flip_rangeEPmjj
Unexecuted instantiation: docids_writer.cpp:_ZN7roaring8internalL17bitset_flip_rangeEPmjj
Unexecuted instantiation: IndexWriter.cpp:_ZN7roaring8internalL17bitset_flip_rangeEPmjj
123
124
/*
125
 * Set all bits in indexes [begin,end) to false.
126
 */
127
static inline void bitset_reset_range(uint64_t *words, uint32_t start,
128
0
                                      uint32_t end) {
129
0
    if (start == end) return;
130
0
    uint32_t firstword = start / 64;
131
0
    uint32_t endword = (end - 1) / 64;
132
0
    if (firstword == endword) {
133
0
        words[firstword] &= ~(((~UINT64_C(0)) << (start % 64)) &
134
0
                               ((~UINT64_C(0)) >> ((~end + 1) % 64)));
135
0
        return;
136
0
    }
137
0
    words[firstword] &= ~((~UINT64_C(0)) << (start % 64));
138
0
    for (uint32_t i = firstword + 1; i < endword; i++) {
139
0
        words[i] = UINT64_C(0);
140
0
    }
141
0
    words[endword] &= ~((~UINT64_C(0)) >> ((~end + 1) % 64));
142
0
}
Unexecuted instantiation: bkd_writer.cpp:_ZN7roaring8internalL18bitset_reset_rangeEPmjj
Unexecuted instantiation: bkd_reader.cpp:_ZN7roaring8internalL18bitset_reset_rangeEPmjj
Unexecuted instantiation: packed_index_tree.cpp:_ZN7roaring8internalL18bitset_reset_rangeEPmjj
Unexecuted instantiation: index_tree.cpp:_ZN7roaring8internalL18bitset_reset_rangeEPmjj
Unexecuted instantiation: legacy_index_tree.cpp:_ZN7roaring8internalL18bitset_reset_rangeEPmjj
Unexecuted instantiation: docids_writer.cpp:_ZN7roaring8internalL18bitset_reset_rangeEPmjj
Unexecuted instantiation: IndexWriter.cpp:_ZN7roaring8internalL18bitset_reset_rangeEPmjj
143
144
/*
145
 * Given a bitset containing "length" 64-bit words, write out the position
146
 * of all the set bits to "out", values start at "base".
147
 *
148
 * The "out" pointer should be sufficient to store the actual number of bits
149
 * set.
150
 *
151
 * Returns how many values were actually decoded.
152
 *
153
 * This function should only be expected to be faster than
154
 * bitset_extract_setbits
155
 * when the density of the bitset is high.
156
 *
157
 * This function uses AVX2 decoding.
158
 */
159
size_t bitset_extract_setbits_avx2(const uint64_t *words, size_t length,
160
                                   uint32_t *out, size_t outcapacity,
161
                                   uint32_t base);
162
163
size_t bitset_extract_setbits_avx512(const uint64_t *words, size_t length, 
164
                                   uint32_t *out, size_t outcapacity, 
165
                                   uint32_t base);
166
/*
167
 * Given a bitset containing "length" 64-bit words, write out the position
168
 * of all the set bits to "out", values start at "base".
169
 *
170
 * The "out" pointer should be sufficient to store the actual number of bits
171
 *set.
172
 *
173
 * Returns how many values were actually decoded.
174
 */
175
size_t bitset_extract_setbits(const uint64_t *words, size_t length,
176
                              uint32_t *out, uint32_t base);
177
178
/*
179
 * Given a bitset containing "length" 64-bit words, write out the position
180
 * of all the set bits to "out" as 16-bit integers, values start at "base" (can
181
 *be set to zero)
182
 *
183
 * The "out" pointer should be sufficient to store the actual number of bits
184
 *set.
185
 *
186
 * Returns how many values were actually decoded.
187
 *
188
 * This function should only be expected to be faster than
189
 *bitset_extract_setbits_uint16
190
 * when the density of the bitset is high.
191
 *
192
 * This function uses SSE decoding.
193
 */
194
size_t bitset_extract_setbits_sse_uint16(const uint64_t *words, size_t length,
195
                                         uint16_t *out, size_t outcapacity,
196
                                         uint16_t base);
197
198
size_t bitset_extract_setbits_avx512_uint16(const uint64_t *words, size_t length,
199
                                         uint16_t *out, size_t outcapacity, 
200
                                         uint16_t base);
201
202
/*
203
 * Given a bitset containing "length" 64-bit words, write out the position
204
 * of all the set bits to "out",  values start at "base"
205
 * (can be set to zero)
206
 *
207
 * The "out" pointer should be sufficient to store the actual number of bits
208
 *set.
209
 *
210
 * Returns how many values were actually decoded.
211
 */
212
size_t bitset_extract_setbits_uint16(const uint64_t *words, size_t length,
213
                                     uint16_t *out, uint16_t base);
214
215
/*
216
 * Given two bitsets containing "length" 64-bit words, write out the position
217
 * of all the common set bits to "out", values start at "base"
218
 * (can be set to zero)
219
 *
220
 * The "out" pointer should be sufficient to store the actual number of bits
221
 * set.
222
 *
223
 * Returns how many values were actually decoded.
224
 */
225
size_t bitset_extract_intersection_setbits_uint16(const uint64_t * __restrict__ words1,
226
                                                  const uint64_t * __restrict__ words2,
227
                                                  size_t length, uint16_t *out,
228
                                                  uint16_t base);
229
230
/*
231
 * Given a bitset having cardinality card, set all bit values in the list (there
232
 * are length of them)
233
 * and return the updated cardinality. This evidently assumes that the bitset
234
 * already contained data.
235
 */
236
uint64_t bitset_set_list_withcard(uint64_t *words, uint64_t card,
237
                                  const uint16_t *list, uint64_t length);
238
/*
239
 * Given a bitset, set all bit values in the list (there
240
 * are length of them).
241
 */
242
void bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length);
243
244
/*
245
 * Given a bitset having cardinality card, unset all bit values in the list
246
 * (there are length of them)
247
 * and return the updated cardinality. This evidently assumes that the bitset
248
 * already contained data.
249
 */
250
uint64_t bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list,
251
                           uint64_t length);
252
253
/*
254
 * Given a bitset having cardinality card, toggle all bit values in the list
255
 * (there are length of them)
256
 * and return the updated cardinality. This evidently assumes that the bitset
257
 * already contained data.
258
 */
259
260
uint64_t bitset_flip_list_withcard(uint64_t *words, uint64_t card,
261
                                   const uint16_t *list, uint64_t length);
262
263
void bitset_flip_list(uint64_t *words, const uint16_t *list, uint64_t length);
264
265
#if CROARING_IS_X64
266
/***
267
 * BEGIN Harley-Seal popcount functions.
268
 */
269
CROARING_TARGET_AVX2
270
/**
271
 * Compute the population count of a 256-bit word
272
 * This is not especially fast, but it is convenient as part of other functions.
273
 */
274
0
static inline __m256i popcount256(__m256i v) {
275
0
    const __m256i lookuppos = _mm256_setr_epi8(
276
0
        /* 0 */ 4 + 0, /* 1 */ 4 + 1, /* 2 */ 4 + 1, /* 3 */ 4 + 2,
277
0
        /* 4 */ 4 + 1, /* 5 */ 4 + 2, /* 6 */ 4 + 2, /* 7 */ 4 + 3,
278
0
        /* 8 */ 4 + 1, /* 9 */ 4 + 2, /* a */ 4 + 2, /* b */ 4 + 3,
279
0
        /* c */ 4 + 2, /* d */ 4 + 3, /* e */ 4 + 3, /* f */ 4 + 4,
280
0
281
0
        /* 0 */ 4 + 0, /* 1 */ 4 + 1, /* 2 */ 4 + 1, /* 3 */ 4 + 2,
282
0
        /* 4 */ 4 + 1, /* 5 */ 4 + 2, /* 6 */ 4 + 2, /* 7 */ 4 + 3,
283
0
        /* 8 */ 4 + 1, /* 9 */ 4 + 2, /* a */ 4 + 2, /* b */ 4 + 3,
284
0
        /* c */ 4 + 2, /* d */ 4 + 3, /* e */ 4 + 3, /* f */ 4 + 4);
285
0
    const __m256i lookupneg = _mm256_setr_epi8(
286
0
        /* 0 */ 4 - 0, /* 1 */ 4 - 1, /* 2 */ 4 - 1, /* 3 */ 4 - 2,
287
0
        /* 4 */ 4 - 1, /* 5 */ 4 - 2, /* 6 */ 4 - 2, /* 7 */ 4 - 3,
288
0
        /* 8 */ 4 - 1, /* 9 */ 4 - 2, /* a */ 4 - 2, /* b */ 4 - 3,
289
0
        /* c */ 4 - 2, /* d */ 4 - 3, /* e */ 4 - 3, /* f */ 4 - 4,
290
0
291
0
        /* 0 */ 4 - 0, /* 1 */ 4 - 1, /* 2 */ 4 - 1, /* 3 */ 4 - 2,
292
0
        /* 4 */ 4 - 1, /* 5 */ 4 - 2, /* 6 */ 4 - 2, /* 7 */ 4 - 3,
293
0
        /* 8 */ 4 - 1, /* 9 */ 4 - 2, /* a */ 4 - 2, /* b */ 4 - 3,
294
0
        /* c */ 4 - 2, /* d */ 4 - 3, /* e */ 4 - 3, /* f */ 4 - 4);
295
0
    const __m256i low_mask = _mm256_set1_epi8(0x0f);
296
0
297
0
    const __m256i lo = _mm256_and_si256(v, low_mask);
298
0
    const __m256i hi = _mm256_and_si256(_mm256_srli_epi16(v, 4), low_mask);
299
0
    const __m256i popcnt1 = _mm256_shuffle_epi8(lookuppos, lo);
300
0
    const __m256i popcnt2 = _mm256_shuffle_epi8(lookupneg, hi);
301
0
    return _mm256_sad_epu8(popcnt1, popcnt2);
302
0
}
Unexecuted instantiation: bkd_writer.cpp:_ZN7roaring8internalL11popcount256EDv4_x
Unexecuted instantiation: bkd_reader.cpp:_ZN7roaring8internalL11popcount256EDv4_x
Unexecuted instantiation: packed_index_tree.cpp:_ZN7roaring8internalL11popcount256EDv4_x
Unexecuted instantiation: index_tree.cpp:_ZN7roaring8internalL11popcount256EDv4_x
Unexecuted instantiation: legacy_index_tree.cpp:_ZN7roaring8internalL11popcount256EDv4_x
Unexecuted instantiation: docids_writer.cpp:_ZN7roaring8internalL11popcount256EDv4_x
Unexecuted instantiation: IndexWriter.cpp:_ZN7roaring8internalL11popcount256EDv4_x
303
CROARING_UNTARGET_AVX2
304
305
CROARING_TARGET_AVX2
306
/**
307
 * Simple CSA over 256 bits
308
 */
309
static inline void CSA(__m256i *h, __m256i *l, __m256i a, __m256i b,
310
0
                       __m256i c) {
311
0
    const __m256i u = _mm256_xor_si256(a, b);
312
0
    *h = _mm256_or_si256(_mm256_and_si256(a, b), _mm256_and_si256(u, c));
313
0
    *l = _mm256_xor_si256(u, c);
314
0
}
Unexecuted instantiation: bkd_writer.cpp:_ZN7roaring8internalL3CSAEPDv4_xS2_S1_S1_S1_
Unexecuted instantiation: bkd_reader.cpp:_ZN7roaring8internalL3CSAEPDv4_xS2_S1_S1_S1_
Unexecuted instantiation: packed_index_tree.cpp:_ZN7roaring8internalL3CSAEPDv4_xS2_S1_S1_S1_
Unexecuted instantiation: index_tree.cpp:_ZN7roaring8internalL3CSAEPDv4_xS2_S1_S1_S1_
Unexecuted instantiation: legacy_index_tree.cpp:_ZN7roaring8internalL3CSAEPDv4_xS2_S1_S1_S1_
Unexecuted instantiation: docids_writer.cpp:_ZN7roaring8internalL3CSAEPDv4_xS2_S1_S1_S1_
Unexecuted instantiation: IndexWriter.cpp:_ZN7roaring8internalL3CSAEPDv4_xS2_S1_S1_S1_
315
CROARING_UNTARGET_AVX2
316
317
CROARING_TARGET_AVX2
318
/**
319
 * Fast Harley-Seal AVX population count function
320
 */
321
inline static uint64_t avx2_harley_seal_popcount256(const __m256i *data,
322
0
                                                    const uint64_t size) {
323
0
    __m256i total = _mm256_setzero_si256();
324
0
    __m256i ones = _mm256_setzero_si256();
325
0
    __m256i twos = _mm256_setzero_si256();
326
0
    __m256i fours = _mm256_setzero_si256();
327
0
    __m256i eights = _mm256_setzero_si256();
328
0
    __m256i sixteens = _mm256_setzero_si256();
329
0
    __m256i twosA, twosB, foursA, foursB, eightsA, eightsB;
330
0
331
0
    const uint64_t limit = size - size % 16;
332
0
    uint64_t i = 0;
333
0
334
0
    for (; i < limit; i += 16) {
335
0
        CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i),
336
0
            _mm256_lddqu_si256(data + i + 1));
337
0
        CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 2),
338
0
            _mm256_lddqu_si256(data + i + 3));
339
0
        CSA(&foursA, &twos, twos, twosA, twosB);
340
0
        CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i + 4),
341
0
            _mm256_lddqu_si256(data + i + 5));
342
0
        CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 6),
343
0
            _mm256_lddqu_si256(data + i + 7));
344
0
        CSA(&foursB, &twos, twos, twosA, twosB);
345
0
        CSA(&eightsA, &fours, fours, foursA, foursB);
346
0
        CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i + 8),
347
0
            _mm256_lddqu_si256(data + i + 9));
348
0
        CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 10),
349
0
            _mm256_lddqu_si256(data + i + 11));
350
0
        CSA(&foursA, &twos, twos, twosA, twosB);
351
0
        CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i + 12),
352
0
            _mm256_lddqu_si256(data + i + 13));
353
0
        CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 14),
354
0
            _mm256_lddqu_si256(data + i + 15));
355
0
        CSA(&foursB, &twos, twos, twosA, twosB);
356
0
        CSA(&eightsB, &fours, fours, foursA, foursB);
357
0
        CSA(&sixteens, &eights, eights, eightsA, eightsB);
358
0
359
0
        total = _mm256_add_epi64(total, popcount256(sixteens));
360
0
    }
361
0
362
0
    total = _mm256_slli_epi64(total, 4);  // * 16
363
0
    total = _mm256_add_epi64(
364
0
        total, _mm256_slli_epi64(popcount256(eights), 3));  // += 8 * ...
365
0
    total = _mm256_add_epi64(
366
0
        total, _mm256_slli_epi64(popcount256(fours), 2));  // += 4 * ...
367
0
    total = _mm256_add_epi64(
368
0
        total, _mm256_slli_epi64(popcount256(twos), 1));  // += 2 * ...
369
0
    total = _mm256_add_epi64(total, popcount256(ones));
370
0
    for (; i < size; i++)
371
0
        total =
372
0
            _mm256_add_epi64(total, popcount256(_mm256_lddqu_si256(data + i)));
373
0
374
0
    return (uint64_t)(_mm256_extract_epi64(total, 0)) +
375
0
           (uint64_t)(_mm256_extract_epi64(total, 1)) +
376
0
           (uint64_t)(_mm256_extract_epi64(total, 2)) +
377
0
           (uint64_t)(_mm256_extract_epi64(total, 3));
378
0
}
Unexecuted instantiation: bkd_writer.cpp:_ZN7roaring8internalL28avx2_harley_seal_popcount256EPKDv4_xm
Unexecuted instantiation: bkd_reader.cpp:_ZN7roaring8internalL28avx2_harley_seal_popcount256EPKDv4_xm
Unexecuted instantiation: packed_index_tree.cpp:_ZN7roaring8internalL28avx2_harley_seal_popcount256EPKDv4_xm
Unexecuted instantiation: index_tree.cpp:_ZN7roaring8internalL28avx2_harley_seal_popcount256EPKDv4_xm
Unexecuted instantiation: legacy_index_tree.cpp:_ZN7roaring8internalL28avx2_harley_seal_popcount256EPKDv4_xm
Unexecuted instantiation: docids_writer.cpp:_ZN7roaring8internalL28avx2_harley_seal_popcount256EPKDv4_xm
Unexecuted instantiation: IndexWriter.cpp:_ZN7roaring8internalL28avx2_harley_seal_popcount256EPKDv4_xm
379
CROARING_UNTARGET_AVX2
380
381
#define AVXPOPCNTFNC(opname, avx_intrinsic)                                    \
382
    static inline uint64_t avx2_harley_seal_popcount256_##opname(              \
383
0
        const __m256i *data1, const __m256i *data2, const uint64_t size) {     \
384
0
        __m256i total = _mm256_setzero_si256();                                \
385
0
        __m256i ones = _mm256_setzero_si256();                                 \
386
0
        __m256i twos = _mm256_setzero_si256();                                 \
387
0
        __m256i fours = _mm256_setzero_si256();                                \
388
0
        __m256i eights = _mm256_setzero_si256();                               \
389
0
        __m256i sixteens = _mm256_setzero_si256();                             \
390
0
        __m256i twosA, twosB, foursA, foursB, eightsA, eightsB;                \
391
0
        __m256i A1, A2;                                                        \
392
0
        const uint64_t limit = size - size % 16;                               \
393
0
        uint64_t i = 0;                                                        \
394
0
        for (; i < limit; i += 16) {                                           \
395
0
            A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i),                  \
396
0
                               _mm256_lddqu_si256(data2 + i));                 \
397
0
            A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 1),              \
398
0
                               _mm256_lddqu_si256(data2 + i + 1));             \
399
0
            CSA(&twosA, &ones, ones, A1, A2);                                  \
400
0
            A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 2),              \
401
0
                               _mm256_lddqu_si256(data2 + i + 2));             \
402
0
            A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 3),              \
403
0
                               _mm256_lddqu_si256(data2 + i + 3));             \
404
0
            CSA(&twosB, &ones, ones, A1, A2);                                  \
405
0
            CSA(&foursA, &twos, twos, twosA, twosB);                           \
406
0
            A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 4),              \
407
0
                               _mm256_lddqu_si256(data2 + i + 4));             \
408
0
            A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 5),              \
409
0
                               _mm256_lddqu_si256(data2 + i + 5));             \
410
0
            CSA(&twosA, &ones, ones, A1, A2);                                  \
411
0
            A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 6),              \
412
0
                               _mm256_lddqu_si256(data2 + i + 6));             \
413
0
            A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 7),              \
414
0
                               _mm256_lddqu_si256(data2 + i + 7));             \
415
0
            CSA(&twosB, &ones, ones, A1, A2);                                  \
416
0
            CSA(&foursB, &twos, twos, twosA, twosB);                           \
417
0
            CSA(&eightsA, &fours, fours, foursA, foursB);                      \
418
0
            A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 8),              \
419
0
                               _mm256_lddqu_si256(data2 + i + 8));             \
420
0
            A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 9),              \
421
0
                               _mm256_lddqu_si256(data2 + i + 9));             \
422
0
            CSA(&twosA, &ones, ones, A1, A2);                                  \
423
0
            A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 10),             \
424
0
                               _mm256_lddqu_si256(data2 + i + 10));            \
425
0
            A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 11),             \
426
0
                               _mm256_lddqu_si256(data2 + i + 11));            \
427
0
            CSA(&twosB, &ones, ones, A1, A2);                                  \
428
0
            CSA(&foursA, &twos, twos, twosA, twosB);                           \
429
0
            A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 12),             \
430
0
                               _mm256_lddqu_si256(data2 + i + 12));            \
431
0
            A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 13),             \
432
0
                               _mm256_lddqu_si256(data2 + i + 13));            \
433
0
            CSA(&twosA, &ones, ones, A1, A2);                                  \
434
0
            A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 14),             \
435
0
                               _mm256_lddqu_si256(data2 + i + 14));            \
436
0
            A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 15),             \
437
0
                               _mm256_lddqu_si256(data2 + i + 15));            \
438
0
            CSA(&twosB, &ones, ones, A1, A2);                                  \
439
0
            CSA(&foursB, &twos, twos, twosA, twosB);                           \
440
0
            CSA(&eightsB, &fours, fours, foursA, foursB);                      \
441
0
            CSA(&sixteens, &eights, eights, eightsA, eightsB);                 \
442
0
            total = _mm256_add_epi64(total, popcount256(sixteens));            \
443
0
        }                                                                      \
444
0
        total = _mm256_slli_epi64(total, 4);                                   \
445
0
        total = _mm256_add_epi64(total,                                        \
446
0
                                 _mm256_slli_epi64(popcount256(eights), 3));   \
447
0
        total =                                                                \
448
0
            _mm256_add_epi64(total, _mm256_slli_epi64(popcount256(fours), 2)); \
449
0
        total =                                                                \
450
0
            _mm256_add_epi64(total, _mm256_slli_epi64(popcount256(twos), 1));  \
451
0
        total = _mm256_add_epi64(total, popcount256(ones));                    \
452
0
        for (; i < size; i++) {                                                \
453
0
            A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i),                  \
454
0
                               _mm256_lddqu_si256(data2 + i));                 \
455
0
            total = _mm256_add_epi64(total, popcount256(A1));                  \
456
0
        }                                                                      \
457
0
        return (uint64_t)(_mm256_extract_epi64(total, 0)) +                    \
458
0
               (uint64_t)(_mm256_extract_epi64(total, 1)) +                    \
459
0
               (uint64_t)(_mm256_extract_epi64(total, 2)) +                    \
460
0
               (uint64_t)(_mm256_extract_epi64(total, 3));                     \
461
0
    }                                                                          \
Unexecuted instantiation: bkd_writer.cpp:_ZN7roaring8internalL31avx2_harley_seal_popcount256_orEPKDv4_xS3_m
Unexecuted instantiation: bkd_writer.cpp:_ZN7roaring8internalL34avx2_harley_seal_popcount256_unionEPKDv4_xS3_m
Unexecuted instantiation: bkd_writer.cpp:_ZN7roaring8internalL32avx2_harley_seal_popcount256_andEPKDv4_xS3_m
Unexecuted instantiation: bkd_writer.cpp:_ZN7roaring8internalL41avx2_harley_seal_popcount256_intersectionEPKDv4_xS3_m
Unexecuted instantiation: bkd_writer.cpp:_ZN7roaring8internalL32avx2_harley_seal_popcount256_xorEPKDv4_xS3_m
Unexecuted instantiation: bkd_writer.cpp:_ZN7roaring8internalL35avx2_harley_seal_popcount256_andnotEPKDv4_xS3_m
Unexecuted instantiation: bkd_reader.cpp:_ZN7roaring8internalL31avx2_harley_seal_popcount256_orEPKDv4_xS3_m
Unexecuted instantiation: bkd_reader.cpp:_ZN7roaring8internalL34avx2_harley_seal_popcount256_unionEPKDv4_xS3_m
Unexecuted instantiation: bkd_reader.cpp:_ZN7roaring8internalL32avx2_harley_seal_popcount256_andEPKDv4_xS3_m
Unexecuted instantiation: bkd_reader.cpp:_ZN7roaring8internalL41avx2_harley_seal_popcount256_intersectionEPKDv4_xS3_m
Unexecuted instantiation: bkd_reader.cpp:_ZN7roaring8internalL32avx2_harley_seal_popcount256_xorEPKDv4_xS3_m
Unexecuted instantiation: bkd_reader.cpp:_ZN7roaring8internalL35avx2_harley_seal_popcount256_andnotEPKDv4_xS3_m
Unexecuted instantiation: packed_index_tree.cpp:_ZN7roaring8internalL31avx2_harley_seal_popcount256_orEPKDv4_xS3_m
Unexecuted instantiation: packed_index_tree.cpp:_ZN7roaring8internalL34avx2_harley_seal_popcount256_unionEPKDv4_xS3_m
Unexecuted instantiation: packed_index_tree.cpp:_ZN7roaring8internalL32avx2_harley_seal_popcount256_andEPKDv4_xS3_m
Unexecuted instantiation: packed_index_tree.cpp:_ZN7roaring8internalL41avx2_harley_seal_popcount256_intersectionEPKDv4_xS3_m
Unexecuted instantiation: packed_index_tree.cpp:_ZN7roaring8internalL32avx2_harley_seal_popcount256_xorEPKDv4_xS3_m
Unexecuted instantiation: packed_index_tree.cpp:_ZN7roaring8internalL35avx2_harley_seal_popcount256_andnotEPKDv4_xS3_m
Unexecuted instantiation: index_tree.cpp:_ZN7roaring8internalL31avx2_harley_seal_popcount256_orEPKDv4_xS3_m
Unexecuted instantiation: index_tree.cpp:_ZN7roaring8internalL34avx2_harley_seal_popcount256_unionEPKDv4_xS3_m
Unexecuted instantiation: index_tree.cpp:_ZN7roaring8internalL32avx2_harley_seal_popcount256_andEPKDv4_xS3_m
Unexecuted instantiation: index_tree.cpp:_ZN7roaring8internalL41avx2_harley_seal_popcount256_intersectionEPKDv4_xS3_m
Unexecuted instantiation: index_tree.cpp:_ZN7roaring8internalL32avx2_harley_seal_popcount256_xorEPKDv4_xS3_m
Unexecuted instantiation: index_tree.cpp:_ZN7roaring8internalL35avx2_harley_seal_popcount256_andnotEPKDv4_xS3_m
Unexecuted instantiation: legacy_index_tree.cpp:_ZN7roaring8internalL31avx2_harley_seal_popcount256_orEPKDv4_xS3_m
Unexecuted instantiation: legacy_index_tree.cpp:_ZN7roaring8internalL34avx2_harley_seal_popcount256_unionEPKDv4_xS3_m
Unexecuted instantiation: legacy_index_tree.cpp:_ZN7roaring8internalL32avx2_harley_seal_popcount256_andEPKDv4_xS3_m
Unexecuted instantiation: legacy_index_tree.cpp:_ZN7roaring8internalL41avx2_harley_seal_popcount256_intersectionEPKDv4_xS3_m
Unexecuted instantiation: legacy_index_tree.cpp:_ZN7roaring8internalL32avx2_harley_seal_popcount256_xorEPKDv4_xS3_m
Unexecuted instantiation: legacy_index_tree.cpp:_ZN7roaring8internalL35avx2_harley_seal_popcount256_andnotEPKDv4_xS3_m
Unexecuted instantiation: docids_writer.cpp:_ZN7roaring8internalL31avx2_harley_seal_popcount256_orEPKDv4_xS3_m
Unexecuted instantiation: docids_writer.cpp:_ZN7roaring8internalL34avx2_harley_seal_popcount256_unionEPKDv4_xS3_m
Unexecuted instantiation: docids_writer.cpp:_ZN7roaring8internalL32avx2_harley_seal_popcount256_andEPKDv4_xS3_m
Unexecuted instantiation: docids_writer.cpp:_ZN7roaring8internalL41avx2_harley_seal_popcount256_intersectionEPKDv4_xS3_m
Unexecuted instantiation: docids_writer.cpp:_ZN7roaring8internalL32avx2_harley_seal_popcount256_xorEPKDv4_xS3_m
Unexecuted instantiation: docids_writer.cpp:_ZN7roaring8internalL35avx2_harley_seal_popcount256_andnotEPKDv4_xS3_m
Unexecuted instantiation: IndexWriter.cpp:_ZN7roaring8internalL31avx2_harley_seal_popcount256_orEPKDv4_xS3_m
Unexecuted instantiation: IndexWriter.cpp:_ZN7roaring8internalL34avx2_harley_seal_popcount256_unionEPKDv4_xS3_m
Unexecuted instantiation: IndexWriter.cpp:_ZN7roaring8internalL32avx2_harley_seal_popcount256_andEPKDv4_xS3_m
Unexecuted instantiation: IndexWriter.cpp:_ZN7roaring8internalL41avx2_harley_seal_popcount256_intersectionEPKDv4_xS3_m
Unexecuted instantiation: IndexWriter.cpp:_ZN7roaring8internalL32avx2_harley_seal_popcount256_xorEPKDv4_xS3_m
Unexecuted instantiation: IndexWriter.cpp:_ZN7roaring8internalL35avx2_harley_seal_popcount256_andnotEPKDv4_xS3_m
462
    static inline uint64_t avx2_harley_seal_popcount256andstore_##opname(      \
463
        const __m256i *__restrict__ data1, const __m256i *__restrict__ data2,  \
464
0
        __m256i *__restrict__ out, const uint64_t size) {                      \
465
0
        __m256i total = _mm256_setzero_si256();                                \
466
0
        __m256i ones = _mm256_setzero_si256();                                 \
467
0
        __m256i twos = _mm256_setzero_si256();                                 \
468
0
        __m256i fours = _mm256_setzero_si256();                                \
469
0
        __m256i eights = _mm256_setzero_si256();                               \
470
0
        __m256i sixteens = _mm256_setzero_si256();                             \
471
0
        __m256i twosA, twosB, foursA, foursB, eightsA, eightsB;                \
472
0
        __m256i A1, A2;                                                        \
473
0
        const uint64_t limit = size - size % 16;                               \
474
0
        uint64_t i = 0;                                                        \
475
0
        for (; i < limit; i += 16) {                                           \
476
0
            A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i),                  \
477
0
                               _mm256_lddqu_si256(data2 + i));                 \
478
0
            _mm256_storeu_si256(out + i, A1);                                  \
479
0
            A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 1),              \
480
0
                               _mm256_lddqu_si256(data2 + i + 1));             \
481
0
            _mm256_storeu_si256(out + i + 1, A2);                              \
482
0
            CSA(&twosA, &ones, ones, A1, A2);                                  \
483
0
            A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 2),              \
484
0
                               _mm256_lddqu_si256(data2 + i + 2));             \
485
0
            _mm256_storeu_si256(out + i + 2, A1);                              \
486
0
            A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 3),              \
487
0
                               _mm256_lddqu_si256(data2 + i + 3));             \
488
0
            _mm256_storeu_si256(out + i + 3, A2);                              \
489
0
            CSA(&twosB, &ones, ones, A1, A2);                                  \
490
0
            CSA(&foursA, &twos, twos, twosA, twosB);                           \
491
0
            A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 4),              \
492
0
                               _mm256_lddqu_si256(data2 + i + 4));             \
493
0
            _mm256_storeu_si256(out + i + 4, A1);                              \
494
0
            A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 5),              \
495
0
                               _mm256_lddqu_si256(data2 + i + 5));             \
496
0
            _mm256_storeu_si256(out + i + 5, A2);                              \
497
0
            CSA(&twosA, &ones, ones, A1, A2);                                  \
498
0
            A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 6),              \
499
0
                               _mm256_lddqu_si256(data2 + i + 6));             \
500
0
            _mm256_storeu_si256(out + i + 6, A1);                              \
501
0
            A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 7),              \
502
0
                               _mm256_lddqu_si256(data2 + i + 7));             \
503
0
            _mm256_storeu_si256(out + i + 7, A2);                              \
504
0
            CSA(&twosB, &ones, ones, A1, A2);                                  \
505
0
            CSA(&foursB, &twos, twos, twosA, twosB);                           \
506
0
            CSA(&eightsA, &fours, fours, foursA, foursB);                      \
507
0
            A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 8),              \
508
0
                               _mm256_lddqu_si256(data2 + i + 8));             \
509
0
            _mm256_storeu_si256(out + i + 8, A1);                              \
510
0
            A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 9),              \
511
0
                               _mm256_lddqu_si256(data2 + i + 9));             \
512
0
            _mm256_storeu_si256(out + i + 9, A2);                              \
513
0
            CSA(&twosA, &ones, ones, A1, A2);                                  \
514
0
            A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 10),             \
515
0
                               _mm256_lddqu_si256(data2 + i + 10));            \
516
0
            _mm256_storeu_si256(out + i + 10, A1);                             \
517
0
            A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 11),             \
518
0
                               _mm256_lddqu_si256(data2 + i + 11));            \
519
0
            _mm256_storeu_si256(out + i + 11, A2);                             \
520
0
            CSA(&twosB, &ones, ones, A1, A2);                                  \
521
0
            CSA(&foursA, &twos, twos, twosA, twosB);                           \
522
0
            A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 12),             \
523
0
                               _mm256_lddqu_si256(data2 + i + 12));            \
524
0
            _mm256_storeu_si256(out + i + 12, A1);                             \
525
0
            A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 13),             \
526
0
                               _mm256_lddqu_si256(data2 + i + 13));            \
527
0
            _mm256_storeu_si256(out + i + 13, A2);                             \
528
0
            CSA(&twosA, &ones, ones, A1, A2);                                  \
529
0
            A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 14),             \
530
0
                               _mm256_lddqu_si256(data2 + i + 14));            \
531
0
            _mm256_storeu_si256(out + i + 14, A1);                             \
532
0
            A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 15),             \
533
0
                               _mm256_lddqu_si256(data2 + i + 15));            \
534
0
            _mm256_storeu_si256(out + i + 15, A2);                             \
535
0
            CSA(&twosB, &ones, ones, A1, A2);                                  \
536
0
            CSA(&foursB, &twos, twos, twosA, twosB);                           \
537
0
            CSA(&eightsB, &fours, fours, foursA, foursB);                      \
538
0
            CSA(&sixteens, &eights, eights, eightsA, eightsB);                 \
539
0
            total = _mm256_add_epi64(total, popcount256(sixteens));            \
540
0
        }                                                                      \
541
0
        total = _mm256_slli_epi64(total, 4);                                   \
542
0
        total = _mm256_add_epi64(total,                                        \
543
0
                                 _mm256_slli_epi64(popcount256(eights), 3));   \
544
0
        total =                                                                \
545
0
            _mm256_add_epi64(total, _mm256_slli_epi64(popcount256(fours), 2)); \
546
0
        total =                                                                \
547
0
            _mm256_add_epi64(total, _mm256_slli_epi64(popcount256(twos), 1));  \
548
0
        total = _mm256_add_epi64(total, popcount256(ones));                    \
549
0
        for (; i < size; i++) {                                                \
550
0
            A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i),                  \
551
0
                               _mm256_lddqu_si256(data2 + i));                 \
552
0
            _mm256_storeu_si256(out + i, A1);                                  \
553
0
            total = _mm256_add_epi64(total, popcount256(A1));                  \
554
0
        }                                                                      \
555
0
        return (uint64_t)(_mm256_extract_epi64(total, 0)) +                    \
556
0
               (uint64_t)(_mm256_extract_epi64(total, 1)) +                    \
557
0
               (uint64_t)(_mm256_extract_epi64(total, 2)) +                    \
558
0
               (uint64_t)(_mm256_extract_epi64(total, 3));                     \
559
0
    }
Unexecuted instantiation: bkd_writer.cpp:_ZN7roaring8internalL39avx2_harley_seal_popcount256andstore_orEPKDv4_xS3_PS1_m
Unexecuted instantiation: bkd_writer.cpp:_ZN7roaring8internalL42avx2_harley_seal_popcount256andstore_unionEPKDv4_xS3_PS1_m
Unexecuted instantiation: bkd_writer.cpp:_ZN7roaring8internalL40avx2_harley_seal_popcount256andstore_andEPKDv4_xS3_PS1_m
Unexecuted instantiation: bkd_writer.cpp:_ZN7roaring8internalL49avx2_harley_seal_popcount256andstore_intersectionEPKDv4_xS3_PS1_m
Unexecuted instantiation: bkd_writer.cpp:_ZN7roaring8internalL40avx2_harley_seal_popcount256andstore_xorEPKDv4_xS3_PS1_m
Unexecuted instantiation: bkd_writer.cpp:_ZN7roaring8internalL43avx2_harley_seal_popcount256andstore_andnotEPKDv4_xS3_PS1_m
Unexecuted instantiation: bkd_reader.cpp:_ZN7roaring8internalL39avx2_harley_seal_popcount256andstore_orEPKDv4_xS3_PS1_m
Unexecuted instantiation: bkd_reader.cpp:_ZN7roaring8internalL42avx2_harley_seal_popcount256andstore_unionEPKDv4_xS3_PS1_m
Unexecuted instantiation: bkd_reader.cpp:_ZN7roaring8internalL40avx2_harley_seal_popcount256andstore_andEPKDv4_xS3_PS1_m
Unexecuted instantiation: bkd_reader.cpp:_ZN7roaring8internalL49avx2_harley_seal_popcount256andstore_intersectionEPKDv4_xS3_PS1_m
Unexecuted instantiation: bkd_reader.cpp:_ZN7roaring8internalL40avx2_harley_seal_popcount256andstore_xorEPKDv4_xS3_PS1_m
Unexecuted instantiation: bkd_reader.cpp:_ZN7roaring8internalL43avx2_harley_seal_popcount256andstore_andnotEPKDv4_xS3_PS1_m
Unexecuted instantiation: packed_index_tree.cpp:_ZN7roaring8internalL39avx2_harley_seal_popcount256andstore_orEPKDv4_xS3_PS1_m
Unexecuted instantiation: packed_index_tree.cpp:_ZN7roaring8internalL42avx2_harley_seal_popcount256andstore_unionEPKDv4_xS3_PS1_m
Unexecuted instantiation: packed_index_tree.cpp:_ZN7roaring8internalL40avx2_harley_seal_popcount256andstore_andEPKDv4_xS3_PS1_m
Unexecuted instantiation: packed_index_tree.cpp:_ZN7roaring8internalL49avx2_harley_seal_popcount256andstore_intersectionEPKDv4_xS3_PS1_m
Unexecuted instantiation: packed_index_tree.cpp:_ZN7roaring8internalL40avx2_harley_seal_popcount256andstore_xorEPKDv4_xS3_PS1_m
Unexecuted instantiation: packed_index_tree.cpp:_ZN7roaring8internalL43avx2_harley_seal_popcount256andstore_andnotEPKDv4_xS3_PS1_m
Unexecuted instantiation: index_tree.cpp:_ZN7roaring8internalL39avx2_harley_seal_popcount256andstore_orEPKDv4_xS3_PS1_m
Unexecuted instantiation: index_tree.cpp:_ZN7roaring8internalL42avx2_harley_seal_popcount256andstore_unionEPKDv4_xS3_PS1_m
Unexecuted instantiation: index_tree.cpp:_ZN7roaring8internalL40avx2_harley_seal_popcount256andstore_andEPKDv4_xS3_PS1_m
Unexecuted instantiation: index_tree.cpp:_ZN7roaring8internalL49avx2_harley_seal_popcount256andstore_intersectionEPKDv4_xS3_PS1_m
Unexecuted instantiation: index_tree.cpp:_ZN7roaring8internalL40avx2_harley_seal_popcount256andstore_xorEPKDv4_xS3_PS1_m
Unexecuted instantiation: index_tree.cpp:_ZN7roaring8internalL43avx2_harley_seal_popcount256andstore_andnotEPKDv4_xS3_PS1_m
Unexecuted instantiation: legacy_index_tree.cpp:_ZN7roaring8internalL39avx2_harley_seal_popcount256andstore_orEPKDv4_xS3_PS1_m
Unexecuted instantiation: legacy_index_tree.cpp:_ZN7roaring8internalL42avx2_harley_seal_popcount256andstore_unionEPKDv4_xS3_PS1_m
Unexecuted instantiation: legacy_index_tree.cpp:_ZN7roaring8internalL40avx2_harley_seal_popcount256andstore_andEPKDv4_xS3_PS1_m
Unexecuted instantiation: legacy_index_tree.cpp:_ZN7roaring8internalL49avx2_harley_seal_popcount256andstore_intersectionEPKDv4_xS3_PS1_m
Unexecuted instantiation: legacy_index_tree.cpp:_ZN7roaring8internalL40avx2_harley_seal_popcount256andstore_xorEPKDv4_xS3_PS1_m
Unexecuted instantiation: legacy_index_tree.cpp:_ZN7roaring8internalL43avx2_harley_seal_popcount256andstore_andnotEPKDv4_xS3_PS1_m
Unexecuted instantiation: docids_writer.cpp:_ZN7roaring8internalL39avx2_harley_seal_popcount256andstore_orEPKDv4_xS3_PS1_m
Unexecuted instantiation: docids_writer.cpp:_ZN7roaring8internalL42avx2_harley_seal_popcount256andstore_unionEPKDv4_xS3_PS1_m
Unexecuted instantiation: docids_writer.cpp:_ZN7roaring8internalL40avx2_harley_seal_popcount256andstore_andEPKDv4_xS3_PS1_m
Unexecuted instantiation: docids_writer.cpp:_ZN7roaring8internalL49avx2_harley_seal_popcount256andstore_intersectionEPKDv4_xS3_PS1_m
Unexecuted instantiation: docids_writer.cpp:_ZN7roaring8internalL40avx2_harley_seal_popcount256andstore_xorEPKDv4_xS3_PS1_m
Unexecuted instantiation: docids_writer.cpp:_ZN7roaring8internalL43avx2_harley_seal_popcount256andstore_andnotEPKDv4_xS3_PS1_m
Unexecuted instantiation: IndexWriter.cpp:_ZN7roaring8internalL39avx2_harley_seal_popcount256andstore_orEPKDv4_xS3_PS1_m
Unexecuted instantiation: IndexWriter.cpp:_ZN7roaring8internalL42avx2_harley_seal_popcount256andstore_unionEPKDv4_xS3_PS1_m
Unexecuted instantiation: IndexWriter.cpp:_ZN7roaring8internalL40avx2_harley_seal_popcount256andstore_andEPKDv4_xS3_PS1_m
Unexecuted instantiation: IndexWriter.cpp:_ZN7roaring8internalL49avx2_harley_seal_popcount256andstore_intersectionEPKDv4_xS3_PS1_m
Unexecuted instantiation: IndexWriter.cpp:_ZN7roaring8internalL40avx2_harley_seal_popcount256andstore_xorEPKDv4_xS3_PS1_m
Unexecuted instantiation: IndexWriter.cpp:_ZN7roaring8internalL43avx2_harley_seal_popcount256andstore_andnotEPKDv4_xS3_PS1_m
560
561
CROARING_TARGET_AVX2
562
AVXPOPCNTFNC(or, _mm256_or_si256)
563
CROARING_UNTARGET_AVX2
564
565
CROARING_TARGET_AVX2
566
AVXPOPCNTFNC(union, _mm256_or_si256)
567
CROARING_UNTARGET_AVX2
568
569
CROARING_TARGET_AVX2
570
AVXPOPCNTFNC(and, _mm256_and_si256)
571
CROARING_UNTARGET_AVX2
572
573
CROARING_TARGET_AVX2
574
AVXPOPCNTFNC(intersection, _mm256_and_si256)
575
CROARING_UNTARGET_AVX2
576
577
CROARING_TARGET_AVX2
578
AVXPOPCNTFNC (xor, _mm256_xor_si256)
579
CROARING_UNTARGET_AVX2
580
581
CROARING_TARGET_AVX2
582
AVXPOPCNTFNC(andnot, _mm256_andnot_si256)
583
CROARING_UNTARGET_AVX2
584
585
586
#define VPOPCNT_AND_ADD(ptr, i, accu)   \
587
    const __m512i v##i = _mm512_loadu_si512((const __m512i*)ptr + i);  \
588
    const __m512i p##i = _mm512_popcnt_epi64(v##i);    \
589
    accu = _mm512_add_epi64(accu, p##i);  
590
591
#if CROARING_COMPILER_SUPPORTS_AVX512
592
CROARING_TARGET_AVX512
593
0
static inline uint64_t sum_epu64_256(const __m256i v) {
594
0
595
0
    return (uint64_t)(_mm256_extract_epi64(v, 0))
596
0
         + (uint64_t)(_mm256_extract_epi64(v, 1))
597
0
         + (uint64_t)(_mm256_extract_epi64(v, 2))
598
0
         + (uint64_t)(_mm256_extract_epi64(v, 3));
599
0
}
Unexecuted instantiation: bkd_writer.cpp:_ZN7roaring8internalL13sum_epu64_256EDv4_x
Unexecuted instantiation: bkd_reader.cpp:_ZN7roaring8internalL13sum_epu64_256EDv4_x
Unexecuted instantiation: packed_index_tree.cpp:_ZN7roaring8internalL13sum_epu64_256EDv4_x
Unexecuted instantiation: index_tree.cpp:_ZN7roaring8internalL13sum_epu64_256EDv4_x
Unexecuted instantiation: legacy_index_tree.cpp:_ZN7roaring8internalL13sum_epu64_256EDv4_x
Unexecuted instantiation: docids_writer.cpp:_ZN7roaring8internalL13sum_epu64_256EDv4_x
Unexecuted instantiation: IndexWriter.cpp:_ZN7roaring8internalL13sum_epu64_256EDv4_x
600
601
602
0
static inline uint64_t simd_sum_epu64(const __m512i v) {
603
0
604
0
     __m256i lo = _mm512_extracti64x4_epi64(v, 0);
605
0
     __m256i hi = _mm512_extracti64x4_epi64(v, 1);
606
0
607
0
    return sum_epu64_256(lo) + sum_epu64_256(hi);
608
0
}
Unexecuted instantiation: bkd_writer.cpp:_ZN7roaring8internalL14simd_sum_epu64EDv8_x
Unexecuted instantiation: bkd_reader.cpp:_ZN7roaring8internalL14simd_sum_epu64EDv8_x
Unexecuted instantiation: packed_index_tree.cpp:_ZN7roaring8internalL14simd_sum_epu64EDv8_x
Unexecuted instantiation: index_tree.cpp:_ZN7roaring8internalL14simd_sum_epu64EDv8_x
Unexecuted instantiation: legacy_index_tree.cpp:_ZN7roaring8internalL14simd_sum_epu64EDv8_x
Unexecuted instantiation: docids_writer.cpp:_ZN7roaring8internalL14simd_sum_epu64EDv8_x
Unexecuted instantiation: IndexWriter.cpp:_ZN7roaring8internalL14simd_sum_epu64EDv8_x
609
610
static inline uint64_t avx512_vpopcount(const __m512i* data, const uint64_t size)
611
0
{
612
0
    const uint64_t limit = size - size % 4;
613
0
    __m512i total = _mm512_setzero_si512();
614
0
    uint64_t i = 0;
615
0
616
0
    for (; i < limit; i += 4)
617
0
    {    
618
0
        VPOPCNT_AND_ADD(data + i, 0, total);
619
0
        VPOPCNT_AND_ADD(data + i, 1, total);
620
0
        VPOPCNT_AND_ADD(data + i, 2, total);
621
0
        VPOPCNT_AND_ADD(data + i, 3, total);
622
0
    }
623
0
    
624
0
    for (; i < size; i++)
625
0
    {
626
0
        total = _mm512_add_epi64(total, _mm512_popcnt_epi64(_mm512_loadu_si512(data + i)));
627
0
    }
628
0
        
629
0
    return simd_sum_epu64(total);
630
0
}
Unexecuted instantiation: bkd_writer.cpp:_ZN7roaring8internalL16avx512_vpopcountEPKDv8_xm
Unexecuted instantiation: bkd_reader.cpp:_ZN7roaring8internalL16avx512_vpopcountEPKDv8_xm
Unexecuted instantiation: packed_index_tree.cpp:_ZN7roaring8internalL16avx512_vpopcountEPKDv8_xm
Unexecuted instantiation: index_tree.cpp:_ZN7roaring8internalL16avx512_vpopcountEPKDv8_xm
Unexecuted instantiation: legacy_index_tree.cpp:_ZN7roaring8internalL16avx512_vpopcountEPKDv8_xm
Unexecuted instantiation: docids_writer.cpp:_ZN7roaring8internalL16avx512_vpopcountEPKDv8_xm
Unexecuted instantiation: IndexWriter.cpp:_ZN7roaring8internalL16avx512_vpopcountEPKDv8_xm
631
CROARING_UNTARGET_AVX512
632
#endif
633
634
#define AVXPOPCNTFNC512(opname, avx_intrinsic)                                 \
635
    static inline uint64_t avx512_harley_seal_popcount512_##opname(            \
636
0
        const __m512i *data1, const __m512i *data2, const uint64_t size) {     \
637
0
        __m512i total = _mm512_setzero_si512();                                \
638
0
        const uint64_t limit = size - size % 4;                                \
639
0
        uint64_t i = 0;                                                        \
640
0
      for (; i < limit; i += 4) {                                            \
641
0
            __m512i a1 = avx_intrinsic(_mm512_loadu_si512(data1 + i),          \
642
0
                                       _mm512_loadu_si512(data2 + i));         \
643
0
            total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a1));          \
644
0
            __m512i a2 = avx_intrinsic(_mm512_loadu_si512(data1 + i + 1),      \
645
0
                                       _mm512_loadu_si512(data2 + i + 1));     \
646
0
            total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a2));          \
647
0
             __m512i a3 = avx_intrinsic(_mm512_loadu_si512(data1 + i + 2),     \
648
0
                                       _mm512_loadu_si512(data2 + i + 2));     \
649
0
            total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a3));          \
650
0
             __m512i a4 = avx_intrinsic(_mm512_loadu_si512(data1 + i + 3),     \
651
0
                                       _mm512_loadu_si512(data2 + i + 3));     \
652
0
            total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a4));          \
653
0
       }                                                                       \
654
0
       for(; i < size; i++) {                                                  \
655
0
              __m512i a = avx_intrinsic(_mm512_loadu_si512(data1 + i),         \
656
0
                       _mm512_loadu_si512(data2 + i));                         \
657
0
              total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a));         \
658
0
        }                                                                      \
659
0
        return simd_sum_epu64(total);                                          \
660
0
    }                                                                          \
Unexecuted instantiation: bkd_writer.cpp:_ZN7roaring8internalL33avx512_harley_seal_popcount512_orEPKDv8_xS3_m
Unexecuted instantiation: bkd_writer.cpp:_ZN7roaring8internalL36avx512_harley_seal_popcount512_unionEPKDv8_xS3_m
Unexecuted instantiation: bkd_writer.cpp:_ZN7roaring8internalL34avx512_harley_seal_popcount512_andEPKDv8_xS3_m
Unexecuted instantiation: bkd_writer.cpp:_ZN7roaring8internalL43avx512_harley_seal_popcount512_intersectionEPKDv8_xS3_m
Unexecuted instantiation: bkd_writer.cpp:_ZN7roaring8internalL34avx512_harley_seal_popcount512_xorEPKDv8_xS3_m
Unexecuted instantiation: bkd_writer.cpp:_ZN7roaring8internalL37avx512_harley_seal_popcount512_andnotEPKDv8_xS3_m
Unexecuted instantiation: bkd_reader.cpp:_ZN7roaring8internalL33avx512_harley_seal_popcount512_orEPKDv8_xS3_m
Unexecuted instantiation: bkd_reader.cpp:_ZN7roaring8internalL36avx512_harley_seal_popcount512_unionEPKDv8_xS3_m
Unexecuted instantiation: bkd_reader.cpp:_ZN7roaring8internalL34avx512_harley_seal_popcount512_andEPKDv8_xS3_m
Unexecuted instantiation: bkd_reader.cpp:_ZN7roaring8internalL43avx512_harley_seal_popcount512_intersectionEPKDv8_xS3_m
Unexecuted instantiation: bkd_reader.cpp:_ZN7roaring8internalL34avx512_harley_seal_popcount512_xorEPKDv8_xS3_m
Unexecuted instantiation: bkd_reader.cpp:_ZN7roaring8internalL37avx512_harley_seal_popcount512_andnotEPKDv8_xS3_m
Unexecuted instantiation: packed_index_tree.cpp:_ZN7roaring8internalL33avx512_harley_seal_popcount512_orEPKDv8_xS3_m
Unexecuted instantiation: packed_index_tree.cpp:_ZN7roaring8internalL36avx512_harley_seal_popcount512_unionEPKDv8_xS3_m
Unexecuted instantiation: packed_index_tree.cpp:_ZN7roaring8internalL34avx512_harley_seal_popcount512_andEPKDv8_xS3_m
Unexecuted instantiation: packed_index_tree.cpp:_ZN7roaring8internalL43avx512_harley_seal_popcount512_intersectionEPKDv8_xS3_m
Unexecuted instantiation: packed_index_tree.cpp:_ZN7roaring8internalL34avx512_harley_seal_popcount512_xorEPKDv8_xS3_m
Unexecuted instantiation: packed_index_tree.cpp:_ZN7roaring8internalL37avx512_harley_seal_popcount512_andnotEPKDv8_xS3_m
Unexecuted instantiation: index_tree.cpp:_ZN7roaring8internalL33avx512_harley_seal_popcount512_orEPKDv8_xS3_m
Unexecuted instantiation: index_tree.cpp:_ZN7roaring8internalL36avx512_harley_seal_popcount512_unionEPKDv8_xS3_m
Unexecuted instantiation: index_tree.cpp:_ZN7roaring8internalL34avx512_harley_seal_popcount512_andEPKDv8_xS3_m
Unexecuted instantiation: index_tree.cpp:_ZN7roaring8internalL43avx512_harley_seal_popcount512_intersectionEPKDv8_xS3_m
Unexecuted instantiation: index_tree.cpp:_ZN7roaring8internalL34avx512_harley_seal_popcount512_xorEPKDv8_xS3_m
Unexecuted instantiation: index_tree.cpp:_ZN7roaring8internalL37avx512_harley_seal_popcount512_andnotEPKDv8_xS3_m
Unexecuted instantiation: legacy_index_tree.cpp:_ZN7roaring8internalL33avx512_harley_seal_popcount512_orEPKDv8_xS3_m
Unexecuted instantiation: legacy_index_tree.cpp:_ZN7roaring8internalL36avx512_harley_seal_popcount512_unionEPKDv8_xS3_m
Unexecuted instantiation: legacy_index_tree.cpp:_ZN7roaring8internalL34avx512_harley_seal_popcount512_andEPKDv8_xS3_m
Unexecuted instantiation: legacy_index_tree.cpp:_ZN7roaring8internalL43avx512_harley_seal_popcount512_intersectionEPKDv8_xS3_m
Unexecuted instantiation: legacy_index_tree.cpp:_ZN7roaring8internalL34avx512_harley_seal_popcount512_xorEPKDv8_xS3_m
Unexecuted instantiation: legacy_index_tree.cpp:_ZN7roaring8internalL37avx512_harley_seal_popcount512_andnotEPKDv8_xS3_m
Unexecuted instantiation: docids_writer.cpp:_ZN7roaring8internalL33avx512_harley_seal_popcount512_orEPKDv8_xS3_m
Unexecuted instantiation: docids_writer.cpp:_ZN7roaring8internalL36avx512_harley_seal_popcount512_unionEPKDv8_xS3_m
Unexecuted instantiation: docids_writer.cpp:_ZN7roaring8internalL34avx512_harley_seal_popcount512_andEPKDv8_xS3_m
Unexecuted instantiation: docids_writer.cpp:_ZN7roaring8internalL43avx512_harley_seal_popcount512_intersectionEPKDv8_xS3_m
Unexecuted instantiation: docids_writer.cpp:_ZN7roaring8internalL34avx512_harley_seal_popcount512_xorEPKDv8_xS3_m
Unexecuted instantiation: docids_writer.cpp:_ZN7roaring8internalL37avx512_harley_seal_popcount512_andnotEPKDv8_xS3_m
Unexecuted instantiation: IndexWriter.cpp:_ZN7roaring8internalL33avx512_harley_seal_popcount512_orEPKDv8_xS3_m
Unexecuted instantiation: IndexWriter.cpp:_ZN7roaring8internalL36avx512_harley_seal_popcount512_unionEPKDv8_xS3_m
Unexecuted instantiation: IndexWriter.cpp:_ZN7roaring8internalL34avx512_harley_seal_popcount512_andEPKDv8_xS3_m
Unexecuted instantiation: IndexWriter.cpp:_ZN7roaring8internalL43avx512_harley_seal_popcount512_intersectionEPKDv8_xS3_m
Unexecuted instantiation: IndexWriter.cpp:_ZN7roaring8internalL34avx512_harley_seal_popcount512_xorEPKDv8_xS3_m
Unexecuted instantiation: IndexWriter.cpp:_ZN7roaring8internalL37avx512_harley_seal_popcount512_andnotEPKDv8_xS3_m
661
    static inline uint64_t avx512_harley_seal_popcount512andstore_##opname(    \
662
        const __m512i *__restrict__ data1, const __m512i *__restrict__ data2,  \
663
0
        __m512i *__restrict__ out, const uint64_t size) {                      \
664
0
        __m512i total = _mm512_setzero_si512();                                \
665
0
        const uint64_t limit = size - size % 4;                                \
666
0
        uint64_t i = 0;                                                        \
667
0
      for (; i < limit; i += 4) {                                        \
668
0
            __m512i a1 = avx_intrinsic(_mm512_loadu_si512(data1 + i),          \
669
0
                                       _mm512_loadu_si512(data2 + i));         \
670
0
            _mm512_storeu_si512(out + i, a1);                                  \
671
0
            total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a1));          \
672
0
            __m512i a2 = avx_intrinsic(_mm512_loadu_si512(data1 + i + 1),      \
673
0
                                       _mm512_loadu_si512(data2 + i + 1));     \
674
0
            _mm512_storeu_si512(out + i + 1, a2);                              \
675
0
            total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a2));          \
676
0
             __m512i a3 = avx_intrinsic(_mm512_loadu_si512(data1 + i + 2),     \
677
0
                                       _mm512_loadu_si512(data2 + i + 2));     \
678
0
            _mm512_storeu_si512(out + i + 2, a3);                              \
679
0
            total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a3));          \
680
0
            __m512i a4 = avx_intrinsic(_mm512_loadu_si512(data1 + i + 3),      \
681
0
                                       _mm512_loadu_si512(data2 + i + 3));     \
682
0
            _mm512_storeu_si512(out + i + 3, a4);                              \
683
0
            total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a4));          \
684
0
       }                                                                       \
685
0
       for(; i < size; i++) {                                                  \
686
0
              __m512i a = avx_intrinsic(_mm512_loadu_si512(data1 + i),         \
687
0
                       _mm512_loadu_si512(data2 + i));                         \
688
0
            _mm512_storeu_si512(out + i, a);                                   \
689
0
         total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a));        \
690
0
        }                                                                      \
691
0
        return simd_sum_epu64(total);                                          \
692
0
    }                                                                          \
Unexecuted instantiation: bkd_writer.cpp:_ZN7roaring8internalL41avx512_harley_seal_popcount512andstore_orEPKDv8_xS3_PS1_m
Unexecuted instantiation: bkd_writer.cpp:_ZN7roaring8internalL44avx512_harley_seal_popcount512andstore_unionEPKDv8_xS3_PS1_m
Unexecuted instantiation: bkd_writer.cpp:_ZN7roaring8internalL42avx512_harley_seal_popcount512andstore_andEPKDv8_xS3_PS1_m
Unexecuted instantiation: bkd_writer.cpp:_ZN7roaring8internalL51avx512_harley_seal_popcount512andstore_intersectionEPKDv8_xS3_PS1_m
Unexecuted instantiation: bkd_writer.cpp:_ZN7roaring8internalL42avx512_harley_seal_popcount512andstore_xorEPKDv8_xS3_PS1_m
Unexecuted instantiation: bkd_writer.cpp:_ZN7roaring8internalL45avx512_harley_seal_popcount512andstore_andnotEPKDv8_xS3_PS1_m
Unexecuted instantiation: bkd_reader.cpp:_ZN7roaring8internalL41avx512_harley_seal_popcount512andstore_orEPKDv8_xS3_PS1_m
Unexecuted instantiation: bkd_reader.cpp:_ZN7roaring8internalL44avx512_harley_seal_popcount512andstore_unionEPKDv8_xS3_PS1_m
Unexecuted instantiation: bkd_reader.cpp:_ZN7roaring8internalL42avx512_harley_seal_popcount512andstore_andEPKDv8_xS3_PS1_m
Unexecuted instantiation: bkd_reader.cpp:_ZN7roaring8internalL51avx512_harley_seal_popcount512andstore_intersectionEPKDv8_xS3_PS1_m
Unexecuted instantiation: bkd_reader.cpp:_ZN7roaring8internalL42avx512_harley_seal_popcount512andstore_xorEPKDv8_xS3_PS1_m
Unexecuted instantiation: bkd_reader.cpp:_ZN7roaring8internalL45avx512_harley_seal_popcount512andstore_andnotEPKDv8_xS3_PS1_m
Unexecuted instantiation: packed_index_tree.cpp:_ZN7roaring8internalL41avx512_harley_seal_popcount512andstore_orEPKDv8_xS3_PS1_m
Unexecuted instantiation: packed_index_tree.cpp:_ZN7roaring8internalL44avx512_harley_seal_popcount512andstore_unionEPKDv8_xS3_PS1_m
Unexecuted instantiation: packed_index_tree.cpp:_ZN7roaring8internalL42avx512_harley_seal_popcount512andstore_andEPKDv8_xS3_PS1_m
Unexecuted instantiation: packed_index_tree.cpp:_ZN7roaring8internalL51avx512_harley_seal_popcount512andstore_intersectionEPKDv8_xS3_PS1_m
Unexecuted instantiation: packed_index_tree.cpp:_ZN7roaring8internalL42avx512_harley_seal_popcount512andstore_xorEPKDv8_xS3_PS1_m
Unexecuted instantiation: packed_index_tree.cpp:_ZN7roaring8internalL45avx512_harley_seal_popcount512andstore_andnotEPKDv8_xS3_PS1_m
Unexecuted instantiation: index_tree.cpp:_ZN7roaring8internalL41avx512_harley_seal_popcount512andstore_orEPKDv8_xS3_PS1_m
Unexecuted instantiation: index_tree.cpp:_ZN7roaring8internalL44avx512_harley_seal_popcount512andstore_unionEPKDv8_xS3_PS1_m
Unexecuted instantiation: index_tree.cpp:_ZN7roaring8internalL42avx512_harley_seal_popcount512andstore_andEPKDv8_xS3_PS1_m
Unexecuted instantiation: index_tree.cpp:_ZN7roaring8internalL51avx512_harley_seal_popcount512andstore_intersectionEPKDv8_xS3_PS1_m
Unexecuted instantiation: index_tree.cpp:_ZN7roaring8internalL42avx512_harley_seal_popcount512andstore_xorEPKDv8_xS3_PS1_m
Unexecuted instantiation: index_tree.cpp:_ZN7roaring8internalL45avx512_harley_seal_popcount512andstore_andnotEPKDv8_xS3_PS1_m
Unexecuted instantiation: legacy_index_tree.cpp:_ZN7roaring8internalL41avx512_harley_seal_popcount512andstore_orEPKDv8_xS3_PS1_m
Unexecuted instantiation: legacy_index_tree.cpp:_ZN7roaring8internalL44avx512_harley_seal_popcount512andstore_unionEPKDv8_xS3_PS1_m
Unexecuted instantiation: legacy_index_tree.cpp:_ZN7roaring8internalL42avx512_harley_seal_popcount512andstore_andEPKDv8_xS3_PS1_m
Unexecuted instantiation: legacy_index_tree.cpp:_ZN7roaring8internalL51avx512_harley_seal_popcount512andstore_intersectionEPKDv8_xS3_PS1_m
Unexecuted instantiation: legacy_index_tree.cpp:_ZN7roaring8internalL42avx512_harley_seal_popcount512andstore_xorEPKDv8_xS3_PS1_m
Unexecuted instantiation: legacy_index_tree.cpp:_ZN7roaring8internalL45avx512_harley_seal_popcount512andstore_andnotEPKDv8_xS3_PS1_m
Unexecuted instantiation: docids_writer.cpp:_ZN7roaring8internalL41avx512_harley_seal_popcount512andstore_orEPKDv8_xS3_PS1_m
Unexecuted instantiation: docids_writer.cpp:_ZN7roaring8internalL44avx512_harley_seal_popcount512andstore_unionEPKDv8_xS3_PS1_m
Unexecuted instantiation: docids_writer.cpp:_ZN7roaring8internalL42avx512_harley_seal_popcount512andstore_andEPKDv8_xS3_PS1_m
Unexecuted instantiation: docids_writer.cpp:_ZN7roaring8internalL51avx512_harley_seal_popcount512andstore_intersectionEPKDv8_xS3_PS1_m
Unexecuted instantiation: docids_writer.cpp:_ZN7roaring8internalL42avx512_harley_seal_popcount512andstore_xorEPKDv8_xS3_PS1_m
Unexecuted instantiation: docids_writer.cpp:_ZN7roaring8internalL45avx512_harley_seal_popcount512andstore_andnotEPKDv8_xS3_PS1_m
Unexecuted instantiation: IndexWriter.cpp:_ZN7roaring8internalL41avx512_harley_seal_popcount512andstore_orEPKDv8_xS3_PS1_m
Unexecuted instantiation: IndexWriter.cpp:_ZN7roaring8internalL44avx512_harley_seal_popcount512andstore_unionEPKDv8_xS3_PS1_m
Unexecuted instantiation: IndexWriter.cpp:_ZN7roaring8internalL42avx512_harley_seal_popcount512andstore_andEPKDv8_xS3_PS1_m
Unexecuted instantiation: IndexWriter.cpp:_ZN7roaring8internalL51avx512_harley_seal_popcount512andstore_intersectionEPKDv8_xS3_PS1_m
Unexecuted instantiation: IndexWriter.cpp:_ZN7roaring8internalL42avx512_harley_seal_popcount512andstore_xorEPKDv8_xS3_PS1_m
Unexecuted instantiation: IndexWriter.cpp:_ZN7roaring8internalL45avx512_harley_seal_popcount512andstore_andnotEPKDv8_xS3_PS1_m
693
694
#if CROARING_COMPILER_SUPPORTS_AVX512
695
CROARING_TARGET_AVX512
696
AVXPOPCNTFNC512(or, _mm512_or_si512)
697
AVXPOPCNTFNC512(union, _mm512_or_si512)
698
AVXPOPCNTFNC512(and, _mm512_and_si512)
699
AVXPOPCNTFNC512(intersection, _mm512_and_si512)
700
AVXPOPCNTFNC512(xor, _mm512_xor_si512)
701
AVXPOPCNTFNC512(andnot, _mm512_andnot_si512)
702
CROARING_UNTARGET_AVX512
703
#endif
704
/***
705
 * END Harley-Seal popcount functions.
706
 */
707
708
#endif  // CROARING_IS_X64
709
710
#ifdef __cplusplus
711
} } }  // extern "C" { namespace roaring { namespace internal
712
#endif
713
714
#endif