/root/doris/contrib/faiss/faiss/utils/hamming.cpp
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) Meta Platforms, Inc. and affiliates. |
3 | | * |
4 | | * This source code is licensed under the MIT license found in the |
5 | | * LICENSE file in the root directory of this source tree. |
6 | | */ |
7 | | |
8 | | /* |
9 | | * Implementation of Hamming related functions (distances, smallest distance |
10 | | * selection with regular heap|radix and probabilistic heap|radix. |
11 | | * |
12 | | * IMPLEMENTATION NOTES |
13 | | * Optimal speed is typically obtained for vector sizes of multiples of 64 |
14 | | * bits. |
15 | | * |
16 | | * hamdis_t is used for distances because at this time |
17 | | * it is not clear how we will need to balance |
18 | | * - flexibility in vector size (unclear more than 2^16 or even 2^8 bitvectors) |
19 | | * - memory usage |
20 | | * - cache-misses when dealing with large volumes of data (lower bits is better) |
21 | | * |
22 | | */ |
23 | | |
24 | | #include <faiss/utils/hamming.h> |
25 | | |
26 | | #include <algorithm> |
27 | | #include <cstdio> |
28 | | #include <memory> |
29 | | #include <vector> |
30 | | |
31 | | #include <faiss/impl/AuxIndexStructures.h> |
32 | | #include <faiss/impl/FaissAssert.h> |
33 | | #include <faiss/impl/IDSelector.h> |
34 | | #include <faiss/utils/Heap.h> |
35 | | #include <faiss/utils/approx_topk_hamming/approx_topk_hamming.h> |
36 | | #include <faiss/utils/utils.h> |
37 | | |
38 | | namespace faiss { |
39 | | |
40 | | size_t hamming_batch_size = 65536; |
41 | | |
42 | | template <size_t nbits> |
43 | | void hammings( |
44 | | const uint64_t* __restrict bs1, |
45 | | const uint64_t* __restrict bs2, |
46 | | size_t n1, |
47 | | size_t n2, |
48 | | hamdis_t* __restrict dis) |
49 | | |
50 | 0 | { |
51 | 0 | size_t i, j; |
52 | 0 | const size_t nwords = nbits / 64; |
53 | 0 | for (i = 0; i < n1; i++) { |
54 | 0 | const uint64_t* __restrict bs1_ = bs1 + i * nwords; |
55 | 0 | hamdis_t* __restrict dis_ = dis + i * n2; |
56 | 0 | for (j = 0; j < n2; j++) |
57 | 0 | dis_[j] = hamming<nbits>(bs1_, bs2 + j * nwords); |
58 | 0 | } |
59 | 0 | } Unexecuted instantiation: _ZN5faiss8hammingsILm64EEEvPKmS2_mmPi Unexecuted instantiation: _ZN5faiss8hammingsILm128EEEvPKmS2_mmPi Unexecuted instantiation: _ZN5faiss8hammingsILm256EEEvPKmS2_mmPi Unexecuted instantiation: _ZN5faiss8hammingsILm512EEEvPKmS2_mmPi |
60 | | |
61 | | void hammings( |
62 | | const uint64_t* __restrict bs1, |
63 | | const uint64_t* __restrict bs2, |
64 | | size_t n1, |
65 | | size_t n2, |
66 | | size_t nbits, |
67 | 0 | hamdis_t* __restrict dis) { |
68 | 0 | size_t i, j; |
69 | 0 | const size_t nwords = nbits / 64; |
70 | 0 | for (i = 0; i < n1; i++) { |
71 | 0 | const uint64_t* __restrict bs1_ = bs1 + i * nwords; |
72 | 0 | hamdis_t* __restrict dis_ = dis + i * n2; |
73 | 0 | for (j = 0; j < n2; j++) |
74 | 0 | dis_[j] = hamming(bs1_, bs2 + j * nwords, nwords); |
75 | 0 | } |
76 | 0 | } |
77 | | |
78 | | /* Count number of matches given a max threshold */ |
79 | | template <size_t nbits> |
80 | | void hamming_count_thres( |
81 | | const uint64_t* __restrict bs1, |
82 | | const uint64_t* __restrict bs2, |
83 | | size_t n1, |
84 | | size_t n2, |
85 | | hamdis_t ht, |
86 | 0 | size_t* __restrict nptr) { |
87 | 0 | const size_t nwords = nbits / 64; |
88 | 0 | size_t i, j, posm = 0; |
89 | 0 | const uint64_t* bs2_ = bs2; |
90 | |
|
91 | 0 | for (i = 0; i < n1; i++) { |
92 | 0 | bs2 = bs2_; |
93 | 0 | for (j = 0; j < n2; j++) { |
94 | | /* collect the match only if this satisfies the threshold */ |
95 | 0 | if (hamming<nbits>(bs1, bs2) <= ht) |
96 | 0 | posm++; |
97 | 0 | bs2 += nwords; |
98 | 0 | } |
99 | 0 | bs1 += nwords; /* next signature */ |
100 | 0 | } |
101 | 0 | *nptr = posm; |
102 | 0 | } Unexecuted instantiation: _ZN5faiss19hamming_count_thresILm64EEEvPKmS2_mmiPm Unexecuted instantiation: _ZN5faiss19hamming_count_thresILm128EEEvPKmS2_mmiPm Unexecuted instantiation: _ZN5faiss19hamming_count_thresILm256EEEvPKmS2_mmiPm Unexecuted instantiation: _ZN5faiss19hamming_count_thresILm512EEEvPKmS2_mmiPm |
103 | | |
104 | | template <size_t nbits> |
105 | | void crosshamming_count_thres( |
106 | | const uint64_t* __restrict dbs, |
107 | | size_t n, |
108 | | int ht, |
109 | 0 | size_t* __restrict nptr) { |
110 | 0 | const size_t nwords = nbits / 64; |
111 | 0 | size_t i, j, posm = 0; |
112 | 0 | const uint64_t* bs1 = dbs; |
113 | 0 | for (i = 0; i < n; i++) { |
114 | 0 | const uint64_t* bs2 = bs1 + 2; |
115 | 0 | for (j = i + 1; j < n; j++) { |
116 | | /* collect the match only if this satisfies the threshold */ |
117 | 0 | if (hamming<nbits>(bs1, bs2) <= ht) |
118 | 0 | posm++; |
119 | 0 | bs2 += nwords; |
120 | 0 | } |
121 | 0 | bs1 += nwords; |
122 | 0 | } |
123 | 0 | *nptr = posm; |
124 | 0 | } Unexecuted instantiation: _ZN5faiss24crosshamming_count_thresILm64EEEvPKmmiPm Unexecuted instantiation: _ZN5faiss24crosshamming_count_thresILm128EEEvPKmmiPm Unexecuted instantiation: _ZN5faiss24crosshamming_count_thresILm256EEEvPKmmiPm Unexecuted instantiation: _ZN5faiss24crosshamming_count_thresILm512EEEvPKmmiPm |
125 | | |
126 | | template <size_t nbits> |
127 | | size_t match_hamming_thres( |
128 | | const uint64_t* __restrict bs1, |
129 | | const uint64_t* __restrict bs2, |
130 | | size_t n1, |
131 | | size_t n2, |
132 | | int ht, |
133 | | int64_t* __restrict idx, |
134 | 0 | hamdis_t* __restrict hams) { |
135 | 0 | const size_t nwords = nbits / 64; |
136 | 0 | size_t i, j, posm = 0; |
137 | 0 | hamdis_t h; |
138 | 0 | const uint64_t* bs2_ = bs2; |
139 | 0 | for (i = 0; i < n1; i++) { |
140 | 0 | bs2 = bs2_; |
141 | 0 | for (j = 0; j < n2; j++) { |
142 | | /* Here perform the real work of computing the distance */ |
143 | 0 | h = hamming<nbits>(bs1, bs2); |
144 | | |
145 | | /* collect the match only if this satisfies the threshold */ |
146 | 0 | if (h <= ht) { |
147 | | /* Enough space to store another match ? */ |
148 | 0 | *idx = i; |
149 | 0 | idx++; |
150 | 0 | *idx = j; |
151 | 0 | idx++; |
152 | 0 | *hams = h; |
153 | 0 | hams++; |
154 | 0 | posm++; |
155 | 0 | } |
156 | 0 | bs2 += nwords; /* next signature */ |
157 | 0 | } |
158 | 0 | bs1 += nwords; |
159 | 0 | } |
160 | 0 | return posm; |
161 | 0 | } Unexecuted instantiation: _ZN5faiss19match_hamming_thresILm64EEEmPKmS2_mmiPlPi Unexecuted instantiation: _ZN5faiss19match_hamming_thresILm128EEEmPKmS2_mmiPlPi Unexecuted instantiation: _ZN5faiss19match_hamming_thresILm256EEEmPKmS2_mmiPlPi Unexecuted instantiation: _ZN5faiss19match_hamming_thresILm512EEEmPKmS2_mmiPlPi |
162 | | |
163 | | namespace { |
164 | | |
165 | | /* Return closest neighbors w.r.t Hamming distance, using a heap. */ |
166 | | template <class HammingComputer> |
167 | | void hammings_knn_hc( |
168 | | int bytes_per_code, |
169 | | int_maxheap_array_t* __restrict ha, |
170 | | const uint8_t* __restrict bs1, |
171 | | const uint8_t* __restrict bs2, |
172 | | size_t n2, |
173 | | bool order = true, |
174 | | bool init_heap = true, |
175 | | ApproxTopK_mode_t approx_topk_mode = ApproxTopK_mode_t::EXACT_TOPK, |
176 | 0 | const faiss::IDSelector* sel = nullptr) { |
177 | 0 | size_t k = ha->k; |
178 | 0 | if (init_heap) |
179 | 0 | ha->heapify(); |
180 | |
|
181 | 0 | const size_t block_size = hamming_batch_size; |
182 | 0 | for (size_t j0 = 0; j0 < n2; j0 += block_size) { |
183 | 0 | const size_t j1 = std::min(j0 + block_size, n2); |
184 | 0 | #pragma omp parallel for |
185 | 0 | for (int64_t i = 0; i < ha->nh; i++) { |
186 | 0 | HammingComputer hc(bs1 + i * bytes_per_code, bytes_per_code); |
187 | |
|
188 | 0 | const uint8_t* __restrict bs2_ = bs2 + j0 * bytes_per_code; |
189 | 0 | hamdis_t dis; |
190 | 0 | hamdis_t* __restrict bh_val_ = ha->val + i * k; |
191 | 0 | int64_t* __restrict bh_ids_ = ha->ids + i * k; |
192 | | |
193 | | // if larger number of k is required, then ::bs_addn() needs to be |
194 | | // used instead of ::addn() |
195 | 0 | #define HANDLE_APPROX(NB, BD) \ |
196 | 0 | case ApproxTopK_mode_t::APPROX_TOPK_BUCKETS_B##NB##_D##BD: \ |
197 | 0 | FAISS_THROW_IF_NOT_FMT( \ |
198 | 0 | k <= NB * BD, \ |
199 | 0 | "The chosen mode (%d) of approximate top-k supports " \ |
200 | 0 | "up to %d values, but %zd is requested.", \ |
201 | 0 | (int)(ApproxTopK_mode_t::APPROX_TOPK_BUCKETS_B##NB##_D##BD), \ |
202 | 0 | NB * BD, \ |
203 | 0 | k); \ |
204 | 0 | HeapWithBucketsForHamming32< \ |
205 | 0 | CMax<hamdis_t, int64_t>, \ |
206 | 0 | NB, \ |
207 | 0 | BD, \ |
208 | 0 | HammingComputer>:: \ |
209 | 0 | addn(j1 - j0, hc, bs2_, k, bh_val_, bh_ids_, sel); \ |
210 | 0 | break; |
211 | |
|
212 | 0 | switch (approx_topk_mode) { |
213 | 0 | HANDLE_APPROX(8, 3) |
214 | 0 | HANDLE_APPROX(8, 2) |
215 | 0 | HANDLE_APPROX(16, 2) |
216 | 0 | HANDLE_APPROX(32, 2) |
217 | 0 | default: { |
218 | 0 | for (size_t j = j0; j < j1; j++, bs2_ += bytes_per_code) { |
219 | 0 | if (sel && !sel->is_member(j)) { |
220 | 0 | continue; |
221 | 0 | } |
222 | 0 | dis = hc.hamming(bs2_); |
223 | 0 | if (dis < bh_val_[0]) { |
224 | 0 | faiss::maxheap_replace_top<hamdis_t>( |
225 | 0 | k, bh_val_, bh_ids_, dis, j); |
226 | 0 | } |
227 | 0 | } |
228 | 0 | } break; |
229 | 0 | } |
230 | 0 | } Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_115hammings_knn_hcINS_16HammingComputer4EEEviPNS_9HeapArrayINS_4CMaxIilEEEEPKhS9_mbb17ApproxTopK_mode_tPKNS_10IDSelectorE.omp_outlined_debug__ Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_115hammings_knn_hcINS_16HammingComputer8EEEviPNS_9HeapArrayINS_4CMaxIilEEEEPKhS9_mbb17ApproxTopK_mode_tPKNS_10IDSelectorE.omp_outlined_debug__ Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_115hammings_knn_hcINS_17HammingComputer16EEEviPNS_9HeapArrayINS_4CMaxIilEEEEPKhS9_mbb17ApproxTopK_mode_tPKNS_10IDSelectorE.omp_outlined_debug__ Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_115hammings_knn_hcINS_17HammingComputer20EEEviPNS_9HeapArrayINS_4CMaxIilEEEEPKhS9_mbb17ApproxTopK_mode_tPKNS_10IDSelectorE.omp_outlined_debug__ Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_115hammings_knn_hcINS_17HammingComputer32EEEviPNS_9HeapArrayINS_4CMaxIilEEEEPKhS9_mbb17ApproxTopK_mode_tPKNS_10IDSelectorE.omp_outlined_debug__ Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_115hammings_knn_hcINS_17HammingComputer64EEEviPNS_9HeapArrayINS_4CMaxIilEEEEPKhS9_mbb17ApproxTopK_mode_tPKNS_10IDSelectorE.omp_outlined_debug__ Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_115hammings_knn_hcINS_22HammingComputerDefaultEEEviPNS_9HeapArrayINS_4CMaxIilEEEEPKhS9_mbb17ApproxTopK_mode_tPKNS_10IDSelectorE.omp_outlined_debug__ |
231 | 0 | } |
232 | 0 | if (order) |
233 | 0 | ha->reorder(); |
234 | 0 | } Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_115hammings_knn_hcINS_16HammingComputer4EEEviPNS_9HeapArrayINS_4CMaxIilEEEEPKhS9_mbb17ApproxTopK_mode_tPKNS_10IDSelectorE Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_115hammings_knn_hcINS_16HammingComputer8EEEviPNS_9HeapArrayINS_4CMaxIilEEEEPKhS9_mbb17ApproxTopK_mode_tPKNS_10IDSelectorE Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_115hammings_knn_hcINS_17HammingComputer16EEEviPNS_9HeapArrayINS_4CMaxIilEEEEPKhS9_mbb17ApproxTopK_mode_tPKNS_10IDSelectorE Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_115hammings_knn_hcINS_17HammingComputer20EEEviPNS_9HeapArrayINS_4CMaxIilEEEEPKhS9_mbb17ApproxTopK_mode_tPKNS_10IDSelectorE Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_115hammings_knn_hcINS_17HammingComputer32EEEviPNS_9HeapArrayINS_4CMaxIilEEEEPKhS9_mbb17ApproxTopK_mode_tPKNS_10IDSelectorE Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_115hammings_knn_hcINS_17HammingComputer64EEEviPNS_9HeapArrayINS_4CMaxIilEEEEPKhS9_mbb17ApproxTopK_mode_tPKNS_10IDSelectorE Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_115hammings_knn_hcINS_22HammingComputerDefaultEEEviPNS_9HeapArrayINS_4CMaxIilEEEEPKhS9_mbb17ApproxTopK_mode_tPKNS_10IDSelectorE |
235 | | |
236 | | /* Return closest neighbors w.r.t Hamming distance, using max count. */ |
237 | | template <class HammingComputer> |
238 | | void hammings_knn_mc( |
239 | | int bytes_per_code, |
240 | | const uint8_t* __restrict a, |
241 | | const uint8_t* __restrict b, |
242 | | size_t na, |
243 | | size_t nb, |
244 | | size_t k, |
245 | | int32_t* __restrict distances, |
246 | | int64_t* __restrict labels, |
247 | 0 | const faiss::IDSelector* sel) { |
248 | 0 | const int nBuckets = bytes_per_code * 8 + 1; |
249 | 0 | std::vector<int> all_counters(na * nBuckets, 0); |
250 | 0 | std::unique_ptr<int64_t[]> all_ids_per_dis(new int64_t[na * nBuckets * k]); |
251 | |
|
252 | 0 | std::vector<HCounterState<HammingComputer>> cs; |
253 | 0 | for (size_t i = 0; i < na; ++i) { |
254 | 0 | cs.push_back(HCounterState<HammingComputer>( |
255 | 0 | all_counters.data() + i * nBuckets, |
256 | 0 | all_ids_per_dis.get() + i * nBuckets * k, |
257 | 0 | a + i * bytes_per_code, |
258 | 0 | 8 * bytes_per_code, |
259 | 0 | k)); |
260 | 0 | } |
261 | |
|
262 | 0 | const size_t block_size = hamming_batch_size; |
263 | 0 | for (size_t j0 = 0; j0 < nb; j0 += block_size) { |
264 | 0 | const size_t j1 = std::min(j0 + block_size, nb); |
265 | 0 | #pragma omp parallel for |
266 | 0 | for (int64_t i = 0; i < na; ++i) { |
267 | 0 | for (size_t j = j0; j < j1; ++j) { |
268 | 0 | if (!sel || sel->is_member(j)) { |
269 | 0 | cs[i].update_counter(b + j * bytes_per_code, j); |
270 | 0 | } |
271 | 0 | } |
272 | 0 | } Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_115hammings_knn_mcINS_16HammingComputer4EEEviPKhS4_mmmPiPlPKNS_10IDSelectorE.omp_outlined_debug__ Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_115hammings_knn_mcINS_16HammingComputer8EEEviPKhS4_mmmPiPlPKNS_10IDSelectorE.omp_outlined_debug__ Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_115hammings_knn_mcINS_17HammingComputer16EEEviPKhS4_mmmPiPlPKNS_10IDSelectorE.omp_outlined_debug__ Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_115hammings_knn_mcINS_17HammingComputer20EEEviPKhS4_mmmPiPlPKNS_10IDSelectorE.omp_outlined_debug__ Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_115hammings_knn_mcINS_17HammingComputer32EEEviPKhS4_mmmPiPlPKNS_10IDSelectorE.omp_outlined_debug__ Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_115hammings_knn_mcINS_17HammingComputer64EEEviPKhS4_mmmPiPlPKNS_10IDSelectorE.omp_outlined_debug__ Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_115hammings_knn_mcINS_22HammingComputerDefaultEEEviPKhS4_mmmPiPlPKNS_10IDSelectorE.omp_outlined_debug__ |
273 | 0 | } |
274 | |
|
275 | 0 | for (size_t i = 0; i < na; ++i) { |
276 | 0 | HCounterState<HammingComputer>& csi = cs[i]; |
277 | |
|
278 | 0 | int nres = 0; |
279 | 0 | for (int b_2 = 0; b_2 < nBuckets && nres < k; b_2++) { |
280 | 0 | for (int l = 0; l < csi.counters[b_2] && nres < k; l++) { |
281 | 0 | labels[i * k + nres] = csi.ids_per_dis[b_2 * k + l]; |
282 | 0 | distances[i * k + nres] = b_2; |
283 | 0 | nres++; |
284 | 0 | } |
285 | 0 | } |
286 | 0 | while (nres < k) { |
287 | 0 | labels[i * k + nres] = -1; |
288 | 0 | distances[i * k + nres] = std::numeric_limits<int32_t>::max(); |
289 | 0 | ++nres; |
290 | 0 | } |
291 | 0 | } |
292 | 0 | } Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_115hammings_knn_mcINS_16HammingComputer4EEEviPKhS4_mmmPiPlPKNS_10IDSelectorE Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_115hammings_knn_mcINS_16HammingComputer8EEEviPKhS4_mmmPiPlPKNS_10IDSelectorE Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_115hammings_knn_mcINS_17HammingComputer16EEEviPKhS4_mmmPiPlPKNS_10IDSelectorE Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_115hammings_knn_mcINS_17HammingComputer20EEEviPKhS4_mmmPiPlPKNS_10IDSelectorE Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_115hammings_knn_mcINS_17HammingComputer32EEEviPKhS4_mmmPiPlPKNS_10IDSelectorE Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_115hammings_knn_mcINS_17HammingComputer64EEEviPKhS4_mmmPiPlPKNS_10IDSelectorE Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_115hammings_knn_mcINS_22HammingComputerDefaultEEEviPKhS4_mmmPiPlPKNS_10IDSelectorE |
293 | | |
294 | | template <class HammingComputer> |
295 | | void hamming_range_search( |
296 | | const uint8_t* a, |
297 | | const uint8_t* b, |
298 | | size_t na, |
299 | | size_t nb, |
300 | | int radius, |
301 | | size_t code_size, |
302 | | RangeSearchResult* res, |
303 | 0 | const faiss::IDSelector* sel) { |
304 | 0 | #pragma omp parallel |
305 | 0 | { |
306 | 0 | RangeSearchPartialResult pres(res); |
307 | |
|
308 | 0 | #pragma omp for |
309 | 0 | for (int64_t i = 0; i < na; i++) { |
310 | 0 | HammingComputer hc(a + i * code_size, code_size); |
311 | 0 | const uint8_t* yi = b; |
312 | 0 | RangeQueryResult& qres = pres.new_result(i); |
313 | |
|
314 | 0 | for (size_t j = 0; j < nb; j++) { |
315 | 0 | if (!sel || sel->is_member(j)) { |
316 | 0 | int dis = hc.hamming(yi); |
317 | 0 | if (dis < radius) { |
318 | 0 | qres.add(dis, j); |
319 | 0 | } |
320 | 0 | } |
321 | 0 | yi += code_size; |
322 | 0 | } |
323 | 0 | } |
324 | 0 | pres.finalize(); |
325 | 0 | } Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_120hamming_range_searchINS_16HammingComputer4EEEvPKhS4_mmimPNS_17RangeSearchResultEPKNS_10IDSelectorE.omp_outlined_debug__ Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_120hamming_range_searchINS_16HammingComputer8EEEvPKhS4_mmimPNS_17RangeSearchResultEPKNS_10IDSelectorE.omp_outlined_debug__ Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_120hamming_range_searchINS_17HammingComputer16EEEvPKhS4_mmimPNS_17RangeSearchResultEPKNS_10IDSelectorE.omp_outlined_debug__ Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_120hamming_range_searchINS_17HammingComputer20EEEvPKhS4_mmimPNS_17RangeSearchResultEPKNS_10IDSelectorE.omp_outlined_debug__ Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_120hamming_range_searchINS_17HammingComputer32EEEvPKhS4_mmimPNS_17RangeSearchResultEPKNS_10IDSelectorE.omp_outlined_debug__ Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_120hamming_range_searchINS_17HammingComputer64EEEvPKhS4_mmimPNS_17RangeSearchResultEPKNS_10IDSelectorE.omp_outlined_debug__ Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_120hamming_range_searchINS_22HammingComputerDefaultEEEvPKhS4_mmimPNS_17RangeSearchResultEPKNS_10IDSelectorE.omp_outlined_debug__ |
326 | 0 | } Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_120hamming_range_searchINS_16HammingComputer4EEEvPKhS4_mmimPNS_17RangeSearchResultEPKNS_10IDSelectorE Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_120hamming_range_searchINS_16HammingComputer8EEEvPKhS4_mmimPNS_17RangeSearchResultEPKNS_10IDSelectorE Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_120hamming_range_searchINS_17HammingComputer16EEEvPKhS4_mmimPNS_17RangeSearchResultEPKNS_10IDSelectorE Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_120hamming_range_searchINS_17HammingComputer20EEEvPKhS4_mmimPNS_17RangeSearchResultEPKNS_10IDSelectorE Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_120hamming_range_searchINS_17HammingComputer32EEEvPKhS4_mmimPNS_17RangeSearchResultEPKNS_10IDSelectorE Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_120hamming_range_searchINS_17HammingComputer64EEEvPKhS4_mmimPNS_17RangeSearchResultEPKNS_10IDSelectorE Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_120hamming_range_searchINS_22HammingComputerDefaultEEEvPKhS4_mmimPNS_17RangeSearchResultEPKNS_10IDSelectorE |
327 | | |
328 | | struct Run_hammings_knn_hc { |
329 | | using T = void; |
330 | | template <class HammingComputer, class... Types> |
331 | 0 | void f(Types... args) { |
332 | 0 | hammings_knn_hc<HammingComputer>(args...); |
333 | 0 | } Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_119Run_hammings_knn_hc1fINS_16HammingComputer4EJmPNS_9HeapArrayINS_4CMaxIilEEEEPKhSA_mib17ApproxTopK_mode_tPKNS_10IDSelectorEEEEvDpT0_ Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_119Run_hammings_knn_hc1fINS_16HammingComputer8EJmPNS_9HeapArrayINS_4CMaxIilEEEEPKhSA_mib17ApproxTopK_mode_tPKNS_10IDSelectorEEEEvDpT0_ Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_119Run_hammings_knn_hc1fINS_17HammingComputer16EJmPNS_9HeapArrayINS_4CMaxIilEEEEPKhSA_mib17ApproxTopK_mode_tPKNS_10IDSelectorEEEEvDpT0_ Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_119Run_hammings_knn_hc1fINS_17HammingComputer20EJmPNS_9HeapArrayINS_4CMaxIilEEEEPKhSA_mib17ApproxTopK_mode_tPKNS_10IDSelectorEEEEvDpT0_ Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_119Run_hammings_knn_hc1fINS_17HammingComputer32EJmPNS_9HeapArrayINS_4CMaxIilEEEEPKhSA_mib17ApproxTopK_mode_tPKNS_10IDSelectorEEEEvDpT0_ Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_119Run_hammings_knn_hc1fINS_17HammingComputer64EJmPNS_9HeapArrayINS_4CMaxIilEEEEPKhSA_mib17ApproxTopK_mode_tPKNS_10IDSelectorEEEEvDpT0_ Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_119Run_hammings_knn_hc1fINS_22HammingComputerDefaultEJmPNS_9HeapArrayINS_4CMaxIilEEEEPKhSA_mib17ApproxTopK_mode_tPKNS_10IDSelectorEEEEvDpT0_ |
334 | | }; |
335 | | |
336 | | struct Run_hammings_knn_mc { |
337 | | using T = void; |
338 | | template <class HammingComputer, class... Types> |
339 | 0 | void f(Types... args) { |
340 | 0 | hammings_knn_mc<HammingComputer>(args...); |
341 | 0 | } Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_119Run_hammings_knn_mc1fINS_16HammingComputer4EJmPKhS5_mmmPiPlPKNS_10IDSelectorEEEEvDpT0_ Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_119Run_hammings_knn_mc1fINS_16HammingComputer8EJmPKhS5_mmmPiPlPKNS_10IDSelectorEEEEvDpT0_ Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_119Run_hammings_knn_mc1fINS_17HammingComputer16EJmPKhS5_mmmPiPlPKNS_10IDSelectorEEEEvDpT0_ Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_119Run_hammings_knn_mc1fINS_17HammingComputer20EJmPKhS5_mmmPiPlPKNS_10IDSelectorEEEEvDpT0_ Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_119Run_hammings_knn_mc1fINS_17HammingComputer32EJmPKhS5_mmmPiPlPKNS_10IDSelectorEEEEvDpT0_ Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_119Run_hammings_knn_mc1fINS_17HammingComputer64EJmPKhS5_mmmPiPlPKNS_10IDSelectorEEEEvDpT0_ Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_119Run_hammings_knn_mc1fINS_22HammingComputerDefaultEJmPKhS5_mmmPiPlPKNS_10IDSelectorEEEEvDpT0_ |
342 | | }; |
343 | | |
344 | | struct Run_hamming_range_search { |
345 | | using T = void; |
346 | | template <class HammingComputer, class... Types> |
347 | 0 | void f(Types... args) { |
348 | 0 | hamming_range_search<HammingComputer>(args...); |
349 | 0 | } Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_124Run_hamming_range_search1fINS_16HammingComputer4EJPKhS5_mmimPNS_17RangeSearchResultEPKNS_10IDSelectorEEEEvDpT0_ Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_124Run_hamming_range_search1fINS_16HammingComputer8EJPKhS5_mmimPNS_17RangeSearchResultEPKNS_10IDSelectorEEEEvDpT0_ Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_124Run_hamming_range_search1fINS_17HammingComputer16EJPKhS5_mmimPNS_17RangeSearchResultEPKNS_10IDSelectorEEEEvDpT0_ Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_124Run_hamming_range_search1fINS_17HammingComputer20EJPKhS5_mmimPNS_17RangeSearchResultEPKNS_10IDSelectorEEEEvDpT0_ Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_124Run_hamming_range_search1fINS_17HammingComputer32EJPKhS5_mmimPNS_17RangeSearchResultEPKNS_10IDSelectorEEEEvDpT0_ Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_124Run_hamming_range_search1fINS_17HammingComputer64EJPKhS5_mmimPNS_17RangeSearchResultEPKNS_10IDSelectorEEEEvDpT0_ Unexecuted instantiation: hamming.cpp:_ZN5faiss12_GLOBAL__N_124Run_hamming_range_search1fINS_22HammingComputerDefaultEJPKhS5_mmimPNS_17RangeSearchResultEPKNS_10IDSelectorEEEEvDpT0_ |
350 | | }; |
351 | | |
352 | | } // namespace |
353 | | |
354 | | /* Functions to maps vectors to bits. Assume proper allocation done beforehand, |
355 | | meaning that b should be be able to receive as many bits as x may produce. */ |
356 | | |
357 | | /* |
358 | | * dimension 0 corresponds to the least significant bit of b[0], or |
359 | | * equivalently to the lsb of the first byte that is stored. |
360 | | */ |
361 | 0 | void fvec2bitvec(const float* __restrict x, uint8_t* __restrict b, size_t d) { |
362 | 0 | for (int i = 0; i < d; i += 8) { |
363 | 0 | uint8_t w = 0; |
364 | 0 | uint8_t mask = 1; |
365 | 0 | int nj = i + 8 <= d ? 8 : d - i; |
366 | 0 | for (int j = 0; j < nj; j++) { |
367 | 0 | if (x[i + j] >= 0) |
368 | 0 | w |= mask; |
369 | 0 | mask <<= 1; |
370 | 0 | } |
371 | 0 | *b = w; |
372 | 0 | b++; |
373 | 0 | } |
374 | 0 | } |
375 | | |
376 | | /* Same but for n vectors. |
377 | | Ensure that the output b is byte-aligned (pad with 0s). */ |
378 | | void fvecs2bitvecs( |
379 | | const float* __restrict x, |
380 | | uint8_t* __restrict b, |
381 | | size_t d, |
382 | 0 | size_t n) { |
383 | 0 | const int64_t ncodes = ((d + 7) / 8); |
384 | 0 | #pragma omp parallel for if (n > 100000) |
385 | 0 | for (int64_t i = 0; i < n; i++) |
386 | 0 | fvec2bitvec(x + i * d, b + i * ncodes, d); |
387 | 0 | } |
388 | | |
389 | | void bitvecs2fvecs( |
390 | | const uint8_t* __restrict b, |
391 | | float* __restrict x, |
392 | | size_t d, |
393 | 0 | size_t n) { |
394 | 0 | const int64_t ncodes = ((d + 7) / 8); |
395 | 0 | #pragma omp parallel for if (n > 100000) |
396 | 0 | for (int64_t i = 0; i < n; i++) { |
397 | 0 | binary_to_real(d, b + i * ncodes, x + i * d); |
398 | 0 | } |
399 | 0 | } |
400 | | |
401 | | /* Reverse bit (NOT a optimized function, only used for print purpose) */ |
402 | 0 | static uint64_t uint64_reverse_bits(uint64_t b) { |
403 | 0 | int i; |
404 | 0 | uint64_t revb = 0; |
405 | 0 | for (i = 0; i < 64; i++) { |
406 | 0 | revb <<= 1; |
407 | 0 | revb |= b & 1; |
408 | 0 | b >>= 1; |
409 | 0 | } |
410 | 0 | return revb; |
411 | 0 | } |
412 | | |
413 | | /* print the bit vector */ |
414 | 0 | void bitvec_print(const uint8_t* b, size_t d) { |
415 | 0 | size_t i, j; |
416 | 0 | for (i = 0; i < d;) { |
417 | 0 | uint64_t brev = uint64_reverse_bits(*(uint64_t*)b); |
418 | 0 | for (j = 0; j < 64 && i < d; j++, i++) { |
419 | 0 | printf("%d", (int)(brev & 1)); |
420 | 0 | brev >>= 1; |
421 | 0 | } |
422 | 0 | b += 8; |
423 | 0 | printf(" "); |
424 | 0 | } |
425 | 0 | } |
426 | | |
427 | | void bitvec_shuffle( |
428 | | size_t n, |
429 | | size_t da, |
430 | | size_t db, |
431 | | const int* __restrict order, |
432 | | const uint8_t* __restrict a, |
433 | 0 | uint8_t* __restrict b) { |
434 | 0 | for (size_t i = 0; i < db; i++) { |
435 | 0 | FAISS_THROW_IF_NOT(order[i] >= 0 && order[i] < da); |
436 | 0 | } |
437 | 0 | size_t lda = (da + 7) / 8; |
438 | 0 | size_t ldb = (db + 7) / 8; |
439 | |
|
440 | 0 | #pragma omp parallel for if (n > 10000) |
441 | 0 | for (int64_t i = 0; i < n; i++) { |
442 | 0 | const uint8_t* ai = a + i * lda; |
443 | 0 | uint8_t* bi = b + i * ldb; |
444 | 0 | memset(bi, 0, ldb); |
445 | 0 | for (size_t j = 0; j < db; j++) { |
446 | 0 | int o = order[j]; |
447 | 0 | uint8_t the_bit = (ai[o >> 3] >> (o & 7)) & 1; |
448 | 0 | bi[j >> 3] |= the_bit << (j & 7); |
449 | 0 | } |
450 | 0 | } |
451 | 0 | } |
452 | | |
453 | | /*----------------------------------------*/ |
454 | | /* Hamming distance computation and k-nn */ |
455 | | |
456 | 0 | #define C64(x) ((uint64_t*)x) |
457 | | |
458 | | /* Compute a set of Hamming distances */ |
459 | | void hammings( |
460 | | const uint8_t* __restrict a, |
461 | | const uint8_t* __restrict b, |
462 | | size_t na, |
463 | | size_t nb, |
464 | | size_t ncodes, |
465 | 0 | hamdis_t* __restrict dis) { |
466 | 0 | FAISS_THROW_IF_NOT(ncodes % 8 == 0); |
467 | 0 | switch (ncodes) { |
468 | 0 | case 8: |
469 | 0 | faiss::hammings<64>(C64(a), C64(b), na, nb, dis); |
470 | 0 | return; |
471 | 0 | case 16: |
472 | 0 | faiss::hammings<128>(C64(a), C64(b), na, nb, dis); |
473 | 0 | return; |
474 | 0 | case 32: |
475 | 0 | faiss::hammings<256>(C64(a), C64(b), na, nb, dis); |
476 | 0 | return; |
477 | 0 | case 64: |
478 | 0 | faiss::hammings<512>(C64(a), C64(b), na, nb, dis); |
479 | 0 | return; |
480 | 0 | default: |
481 | 0 | faiss::hammings(C64(a), C64(b), na, nb, ncodes * 8, dis); |
482 | 0 | return; |
483 | 0 | } |
484 | 0 | } |
485 | | |
486 | | void hammings_knn( |
487 | | int_maxheap_array_t* __restrict ha, |
488 | | const uint8_t* __restrict a, |
489 | | const uint8_t* __restrict b, |
490 | | size_t nb, |
491 | | size_t ncodes, |
492 | 0 | int order) { |
493 | 0 | hammings_knn_hc(ha, a, b, nb, ncodes, order); |
494 | 0 | } |
495 | | |
496 | | void hammings_knn_hc( |
497 | | int_maxheap_array_t* __restrict ha, |
498 | | const uint8_t* __restrict a, |
499 | | const uint8_t* __restrict b, |
500 | | size_t nb, |
501 | | size_t ncodes, |
502 | | int order, |
503 | | ApproxTopK_mode_t approx_topk_mode, |
504 | 0 | const faiss::IDSelector* sel) { |
505 | 0 | Run_hammings_knn_hc r; |
506 | 0 | dispatch_HammingComputer( |
507 | 0 | ncodes, |
508 | 0 | r, |
509 | 0 | ncodes, |
510 | 0 | ha, |
511 | 0 | a, |
512 | 0 | b, |
513 | 0 | nb, |
514 | 0 | order, |
515 | 0 | true, |
516 | 0 | approx_topk_mode, |
517 | 0 | sel); |
518 | 0 | } |
519 | | |
520 | | void hammings_knn_mc( |
521 | | const uint8_t* __restrict a, |
522 | | const uint8_t* __restrict b, |
523 | | size_t na, |
524 | | size_t nb, |
525 | | size_t k, |
526 | | size_t ncodes, |
527 | | int32_t* __restrict distances, |
528 | | int64_t* __restrict labels, |
529 | 0 | const faiss::IDSelector* sel) { |
530 | 0 | Run_hammings_knn_mc r; |
531 | 0 | dispatch_HammingComputer( |
532 | 0 | ncodes, r, ncodes, a, b, na, nb, k, distances, labels, sel); |
533 | 0 | } |
534 | | |
535 | | void hamming_range_search( |
536 | | const uint8_t* a, |
537 | | const uint8_t* b, |
538 | | size_t na, |
539 | | size_t nb, |
540 | | int radius, |
541 | | size_t code_size, |
542 | | RangeSearchResult* result, |
543 | 0 | const faiss::IDSelector* sel) { |
544 | 0 | Run_hamming_range_search r; |
545 | 0 | dispatch_HammingComputer( |
546 | 0 | code_size, r, a, b, na, nb, radius, code_size, result, sel); |
547 | 0 | } |
548 | | |
549 | | /* Count number of matches given a max threshold */ |
550 | | void hamming_count_thres( |
551 | | const uint8_t* bs1, |
552 | | const uint8_t* bs2, |
553 | | size_t n1, |
554 | | size_t n2, |
555 | | hamdis_t ht, |
556 | | size_t ncodes, |
557 | 0 | size_t* nptr) { |
558 | 0 | switch (ncodes) { |
559 | 0 | case 8: |
560 | 0 | faiss::hamming_count_thres<64>( |
561 | 0 | C64(bs1), C64(bs2), n1, n2, ht, nptr); |
562 | 0 | return; |
563 | 0 | case 16: |
564 | 0 | faiss::hamming_count_thres<128>( |
565 | 0 | C64(bs1), C64(bs2), n1, n2, ht, nptr); |
566 | 0 | return; |
567 | 0 | case 32: |
568 | 0 | faiss::hamming_count_thres<256>( |
569 | 0 | C64(bs1), C64(bs2), n1, n2, ht, nptr); |
570 | 0 | return; |
571 | 0 | case 64: |
572 | 0 | faiss::hamming_count_thres<512>( |
573 | 0 | C64(bs1), C64(bs2), n1, n2, ht, nptr); |
574 | 0 | return; |
575 | 0 | default: |
576 | 0 | FAISS_THROW_FMT("not implemented for %zu bits", ncodes); |
577 | 0 | } |
578 | 0 | } |
579 | | |
580 | | /* Count number of cross-matches given a threshold */ |
581 | | void crosshamming_count_thres( |
582 | | const uint8_t* dbs, |
583 | | size_t n, |
584 | | hamdis_t ht, |
585 | | size_t ncodes, |
586 | 0 | size_t* nptr) { |
587 | 0 | switch (ncodes) { |
588 | 0 | case 8: |
589 | 0 | faiss::crosshamming_count_thres<64>(C64(dbs), n, ht, nptr); |
590 | 0 | return; |
591 | 0 | case 16: |
592 | 0 | faiss::crosshamming_count_thres<128>(C64(dbs), n, ht, nptr); |
593 | 0 | return; |
594 | 0 | case 32: |
595 | 0 | faiss::crosshamming_count_thres<256>(C64(dbs), n, ht, nptr); |
596 | 0 | return; |
597 | 0 | case 64: |
598 | 0 | faiss::crosshamming_count_thres<512>(C64(dbs), n, ht, nptr); |
599 | 0 | return; |
600 | 0 | default: |
601 | 0 | FAISS_THROW_FMT("not implemented for %zu bits", ncodes); |
602 | 0 | } |
603 | 0 | } |
604 | | |
605 | | /* Returns all matches given a threshold */ |
606 | | size_t match_hamming_thres( |
607 | | const uint8_t* bs1, |
608 | | const uint8_t* bs2, |
609 | | size_t n1, |
610 | | size_t n2, |
611 | | hamdis_t ht, |
612 | | size_t ncodes, |
613 | | int64_t* idx, |
614 | 0 | hamdis_t* dis) { |
615 | 0 | switch (ncodes) { |
616 | 0 | case 8: |
617 | 0 | return faiss::match_hamming_thres<64>( |
618 | 0 | C64(bs1), C64(bs2), n1, n2, ht, idx, dis); |
619 | 0 | case 16: |
620 | 0 | return faiss::match_hamming_thres<128>( |
621 | 0 | C64(bs1), C64(bs2), n1, n2, ht, idx, dis); |
622 | 0 | case 32: |
623 | 0 | return faiss::match_hamming_thres<256>( |
624 | 0 | C64(bs1), C64(bs2), n1, n2, ht, idx, dis); |
625 | 0 | case 64: |
626 | 0 | return faiss::match_hamming_thres<512>( |
627 | 0 | C64(bs1), C64(bs2), n1, n2, ht, idx, dis); |
628 | 0 | default: |
629 | 0 | FAISS_THROW_FMT("not implemented for %zu bits", ncodes); |
630 | 0 | return 0; |
631 | 0 | } |
632 | 0 | } |
633 | | |
634 | | #undef C64 |
635 | | |
636 | | /************************************* |
637 | | * generalized Hamming distances |
638 | | ************************************/ |
639 | | |
640 | | template <class HammingComputer> |
641 | | static void hamming_dis_inner_loop( |
642 | | const uint8_t* __restrict ca, |
643 | | const uint8_t* __restrict cb, |
644 | | size_t nb, |
645 | | size_t code_size, |
646 | | int k, |
647 | | hamdis_t* __restrict bh_val_, |
648 | 0 | int64_t* __restrict bh_ids_) { |
649 | 0 | HammingComputer hc(ca, code_size); |
650 | |
|
651 | 0 | for (size_t j = 0; j < nb; j++) { |
652 | 0 | int ndiff = hc.hamming(cb); |
653 | 0 | cb += code_size; |
654 | 0 | if (ndiff < bh_val_[0]) { |
655 | 0 | maxheap_replace_top<hamdis_t>(k, bh_val_, bh_ids_, ndiff, j); |
656 | 0 | } |
657 | 0 | } |
658 | 0 | } Unexecuted instantiation: hamming.cpp:_ZN5faissL22hamming_dis_inner_loopINS_19GenHammingComputer8EEEvPKhS3_mmiPiPl Unexecuted instantiation: hamming.cpp:_ZN5faissL22hamming_dis_inner_loopINS_20GenHammingComputer16EEEvPKhS3_mmiPiPl Unexecuted instantiation: hamming.cpp:_ZN5faissL22hamming_dis_inner_loopINS_20GenHammingComputer32EEEvPKhS3_mmiPiPl Unexecuted instantiation: hamming.cpp:_ZN5faissL22hamming_dis_inner_loopINS_20GenHammingComputerM8EEEvPKhS3_mmiPiPl |
659 | | |
660 | | void generalized_hammings_knn_hc( |
661 | | int_maxheap_array_t* __restrict ha, |
662 | | const uint8_t* __restrict a, |
663 | | const uint8_t* __restrict b, |
664 | | size_t nb, |
665 | | size_t code_size, |
666 | 0 | int ordered) { |
667 | 0 | int na = ha->nh; |
668 | 0 | int k = ha->k; |
669 | |
|
670 | 0 | if (ordered) |
671 | 0 | ha->heapify(); |
672 | |
|
673 | 0 | #pragma omp parallel for |
674 | 0 | for (int i = 0; i < na; i++) { |
675 | 0 | const uint8_t* __restrict ca = a + i * code_size; |
676 | 0 | const uint8_t* __restrict cb = b; |
677 | |
|
678 | 0 | hamdis_t* __restrict bh_val_ = ha->val + i * k; |
679 | 0 | int64_t* __restrict bh_ids_ = ha->ids + i * k; |
680 | |
|
681 | 0 | switch (code_size) { |
682 | 0 | case 8: |
683 | 0 | hamming_dis_inner_loop<GenHammingComputer8>( |
684 | 0 | ca, cb, nb, 8, k, bh_val_, bh_ids_); |
685 | 0 | break; |
686 | 0 | case 16: |
687 | 0 | hamming_dis_inner_loop<GenHammingComputer16>( |
688 | 0 | ca, cb, nb, 16, k, bh_val_, bh_ids_); |
689 | 0 | break; |
690 | 0 | case 32: |
691 | 0 | hamming_dis_inner_loop<GenHammingComputer32>( |
692 | 0 | ca, cb, nb, 32, k, bh_val_, bh_ids_); |
693 | 0 | break; |
694 | 0 | default: |
695 | 0 | hamming_dis_inner_loop<GenHammingComputerM8>( |
696 | 0 | ca, cb, nb, code_size, k, bh_val_, bh_ids_); |
697 | 0 | break; |
698 | 0 | } |
699 | 0 | } |
700 | |
|
701 | 0 | if (ordered) |
702 | 0 | ha->reorder(); |
703 | 0 | } |
704 | | |
705 | | void pack_bitstrings( |
706 | | size_t n, |
707 | | size_t M, |
708 | | int nbit, |
709 | | const int32_t* unpacked, |
710 | | uint8_t* packed, |
711 | 0 | size_t code_size) { |
712 | 0 | FAISS_THROW_IF_NOT(code_size >= (M * nbit + 7) / 8); |
713 | 0 | #pragma omp parallel for if (n > 1000) |
714 | 0 | for (int64_t i = 0; i < n; i++) { |
715 | 0 | const int32_t* in = unpacked + i * M; |
716 | 0 | uint8_t* out = packed + i * code_size; |
717 | 0 | BitstringWriter wr(out, code_size); |
718 | 0 | for (int j = 0; j < M; j++) { |
719 | 0 | wr.write(in[j], nbit); |
720 | 0 | } |
721 | 0 | } |
722 | 0 | } |
723 | | |
724 | | void pack_bitstrings( |
725 | | size_t n, |
726 | | size_t M, |
727 | | const int32_t* nbit, |
728 | | const int32_t* unpacked, |
729 | | uint8_t* packed, |
730 | 0 | size_t code_size) { |
731 | 0 | int totbit = 0; |
732 | 0 | for (int j = 0; j < M; j++) { |
733 | 0 | totbit += nbit[j]; |
734 | 0 | } |
735 | 0 | FAISS_THROW_IF_NOT(code_size >= (totbit + 7) / 8); |
736 | 0 | #pragma omp parallel for if (n > 1000) |
737 | 0 | for (int64_t i = 0; i < n; i++) { |
738 | 0 | const int32_t* in = unpacked + i * M; |
739 | 0 | uint8_t* out = packed + i * code_size; |
740 | 0 | BitstringWriter wr(out, code_size); |
741 | 0 | for (int j = 0; j < M; j++) { |
742 | 0 | wr.write(in[j], nbit[j]); |
743 | 0 | } |
744 | 0 | } |
745 | 0 | } |
746 | | |
747 | | void unpack_bitstrings( |
748 | | size_t n, |
749 | | size_t M, |
750 | | int nbit, |
751 | | const uint8_t* packed, |
752 | | size_t code_size, |
753 | 0 | int32_t* unpacked) { |
754 | 0 | FAISS_THROW_IF_NOT(code_size >= (M * nbit + 7) / 8); |
755 | 0 | #pragma omp parallel for if (n > 1000) |
756 | 0 | for (int64_t i = 0; i < n; i++) { |
757 | 0 | const uint8_t* in = packed + i * code_size; |
758 | 0 | int32_t* out = unpacked + i * M; |
759 | 0 | BitstringReader rd(in, code_size); |
760 | 0 | for (int j = 0; j < M; j++) { |
761 | 0 | out[j] = rd.read(nbit); |
762 | 0 | } |
763 | 0 | } |
764 | 0 | } |
765 | | |
766 | | void unpack_bitstrings( |
767 | | size_t n, |
768 | | size_t M, |
769 | | const int32_t* nbit, |
770 | | const uint8_t* packed, |
771 | | size_t code_size, |
772 | 0 | int32_t* unpacked) { |
773 | 0 | int totbit = 0; |
774 | 0 | for (int j = 0; j < M; j++) { |
775 | 0 | totbit += nbit[j]; |
776 | 0 | } |
777 | 0 | FAISS_THROW_IF_NOT(code_size >= (totbit + 7) / 8); |
778 | 0 | #pragma omp parallel for if (n > 1000) |
779 | 0 | for (int64_t i = 0; i < n; i++) { |
780 | 0 | const uint8_t* in = packed + i * code_size; |
781 | 0 | int32_t* out = unpacked + i * M; |
782 | 0 | BitstringReader rd(in, code_size); |
783 | 0 | for (int j = 0; j < M; j++) { |
784 | 0 | out[j] = rd.read(nbit[j]); |
785 | 0 | } |
786 | 0 | } |
787 | 0 | } |
788 | | |
789 | | } // namespace faiss |