/root/doris/contrib/faiss/faiss/Index2Layer.cpp
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) Meta Platforms, Inc. and affiliates. |
3 | | * |
4 | | * This source code is licensed under the MIT license found in the |
5 | | * LICENSE file in the root directory of this source tree. |
6 | | */ |
7 | | |
8 | | // -*- c++ -*- |
9 | | |
10 | | #include <faiss/Index2Layer.h> |
11 | | |
12 | | #include <faiss/impl/platform_macros.h> |
13 | | #include <cassert> |
14 | | #include <cinttypes> |
15 | | #include <cmath> |
16 | | #include <cstdint> |
17 | | #include <cstdio> |
18 | | |
19 | | #ifdef __SSE3__ |
20 | | #include <immintrin.h> |
21 | | #endif |
22 | | |
23 | | #include <algorithm> |
24 | | |
25 | | #include <faiss/IndexIVFPQ.h> |
26 | | |
27 | | #include <faiss/IndexFlat.h> |
28 | | #include <faiss/impl/AuxIndexStructures.h> |
29 | | #include <faiss/impl/FaissAssert.h> |
30 | | #include <faiss/utils/distances.h> |
31 | | #include <faiss/utils/utils.h> |
32 | | |
33 | | namespace faiss { |
34 | | |
35 | | /************************************* |
36 | | * Index2Layer implementation |
37 | | *************************************/ |
38 | | |
39 | | Index2Layer::Index2Layer( |
40 | | Index* quantizer, |
41 | | size_t nlist, |
42 | | int M, |
43 | | int nbit, |
44 | | MetricType metric) |
45 | 0 | : IndexFlatCodes(0, quantizer->d, metric), |
46 | 0 | q1(quantizer, nlist), |
47 | 0 | pq(quantizer->d, M, nbit) { |
48 | 0 | is_trained = false; |
49 | 0 | for (int nbyte = 0; nbyte < 7; nbyte++) { |
50 | 0 | if (((size_t)1 << (8 * nbyte)) >= nlist) { |
51 | 0 | code_size_1 = nbyte; |
52 | 0 | break; |
53 | 0 | } |
54 | 0 | } |
55 | 0 | code_size_2 = pq.code_size; |
56 | 0 | code_size = code_size_1 + code_size_2; |
57 | 0 | } |
58 | | |
59 | 0 | Index2Layer::Index2Layer() { |
60 | 0 | code_size = code_size_1 = code_size_2 = 0; |
61 | 0 | } |
62 | | |
63 | 0 | Index2Layer::~Index2Layer() = default; |
64 | | |
65 | 0 | void Index2Layer::train(idx_t n, const float* x) { |
66 | 0 | if (verbose) { |
67 | 0 | printf("training level-1 quantizer %" PRId64 " vectors in %dD\n", n, d); |
68 | 0 | } |
69 | |
|
70 | 0 | q1.train_q1(n, x, verbose, metric_type); |
71 | |
|
72 | 0 | if (verbose) { |
73 | 0 | printf("computing residuals\n"); |
74 | 0 | } |
75 | |
|
76 | 0 | const float* x_in = x; |
77 | |
|
78 | 0 | x = fvecs_maybe_subsample( |
79 | 0 | d, |
80 | 0 | (size_t*)&n, |
81 | 0 | pq.cp.max_points_per_centroid * pq.ksub, |
82 | 0 | x, |
83 | 0 | verbose, |
84 | 0 | pq.cp.seed); |
85 | |
|
86 | 0 | std::unique_ptr<const float[]> del_x(x_in == x ? nullptr : x); |
87 | |
|
88 | 0 | std::vector<idx_t> assign(n); // assignement to coarse centroids |
89 | 0 | q1.quantizer->assign(n, x, assign.data()); |
90 | 0 | std::vector<float> residuals(n * d); |
91 | 0 | for (idx_t i = 0; i < n; i++) { |
92 | 0 | q1.quantizer->compute_residual( |
93 | 0 | x + i * d, residuals.data() + i * d, assign[i]); |
94 | 0 | } |
95 | |
|
96 | 0 | if (verbose) |
97 | 0 | printf("training %zdx%zd product quantizer on %" PRId64 |
98 | 0 | " vectors in %dD\n", |
99 | 0 | pq.M, |
100 | 0 | pq.ksub, |
101 | 0 | n, |
102 | 0 | d); |
103 | 0 | pq.verbose = verbose; |
104 | 0 | pq.train(n, residuals.data()); |
105 | |
|
106 | 0 | is_trained = true; |
107 | 0 | } |
108 | | |
109 | | void Index2Layer::search( |
110 | | idx_t /*n*/, |
111 | | const float* /*x*/, |
112 | | idx_t /*k*/, |
113 | | float* /*distances*/, |
114 | | idx_t* /*labels*/, |
115 | 0 | const SearchParameters* /* params */) const { |
116 | 0 | FAISS_THROW_MSG("not implemented"); |
117 | 0 | } |
118 | | |
119 | 0 | void Index2Layer::transfer_to_IVFPQ(IndexIVFPQ& other) const { |
120 | 0 | FAISS_THROW_IF_NOT(other.nlist == q1.nlist); |
121 | 0 | FAISS_THROW_IF_NOT(other.code_size == code_size_2); |
122 | 0 | FAISS_THROW_IF_NOT(other.ntotal == 0); |
123 | | |
124 | 0 | const uint8_t* rp = codes.data(); |
125 | |
|
126 | 0 | for (idx_t i = 0; i < ntotal; i++) { |
127 | 0 | idx_t key = 0; |
128 | 0 | memcpy(&key, rp, code_size_1); |
129 | 0 | rp += code_size_1; |
130 | 0 | other.invlists->add_entry(key, i, rp); |
131 | 0 | rp += code_size_2; |
132 | 0 | } |
133 | |
|
134 | 0 | other.ntotal = ntotal; |
135 | 0 | } |
136 | | |
137 | | namespace { |
138 | | |
139 | | struct Distance2Level : DistanceComputer { |
140 | | size_t d; |
141 | | const Index2Layer& storage; |
142 | | std::vector<float> buf; |
143 | | const float* q; |
144 | | |
145 | | const float *pq_l1_tab, *pq_l2_tab; |
146 | | |
147 | 0 | explicit Distance2Level(const Index2Layer& storage) : storage(storage) { |
148 | 0 | d = storage.d; |
149 | 0 | FAISS_ASSERT(storage.pq.dsub == 4); |
150 | 0 | pq_l2_tab = storage.pq.centroids.data(); |
151 | 0 | buf.resize(2 * d); |
152 | 0 | } |
153 | | |
154 | 0 | float symmetric_dis(idx_t i, idx_t j) override { |
155 | 0 | storage.reconstruct(i, buf.data()); |
156 | 0 | storage.reconstruct(j, buf.data() + d); |
157 | 0 | return fvec_L2sqr(buf.data() + d, buf.data(), d); |
158 | 0 | } |
159 | | |
160 | 0 | void set_query(const float* x) override { |
161 | 0 | q = x; |
162 | 0 | } |
163 | | }; |
164 | | |
165 | | // well optimized for xNN+PQNN |
166 | | struct DistanceXPQ4 : Distance2Level { |
167 | | int M, k; |
168 | | |
169 | | explicit DistanceXPQ4(const Index2Layer& storage) |
170 | 0 | : Distance2Level(storage) { |
171 | 0 | const IndexFlat* quantizer = |
172 | 0 | dynamic_cast<IndexFlat*>(storage.q1.quantizer); |
173 | |
|
174 | 0 | FAISS_ASSERT(quantizer); |
175 | 0 | M = storage.pq.M; |
176 | 0 | pq_l1_tab = quantizer->get_xb(); |
177 | 0 | } |
178 | | |
179 | 0 | float operator()(idx_t i) override { |
180 | 0 | #ifdef __SSE3__ |
181 | 0 | const uint8_t* code = storage.codes.data() + i * storage.code_size; |
182 | 0 | idx_t key = 0; |
183 | 0 | memcpy(&key, code, storage.code_size_1); |
184 | 0 | code += storage.code_size_1; |
185 | | |
186 | | // walking pointers |
187 | 0 | const float* qa = q; |
188 | 0 | const __m128* l1_t = (const __m128*)(pq_l1_tab + d * key); |
189 | 0 | const __m128* pq_l2_t = (const __m128*)pq_l2_tab; |
190 | 0 | __m128 accu = _mm_setzero_ps(); |
191 | |
|
192 | 0 | for (int m = 0; m < M; m++) { |
193 | 0 | __m128 qi = _mm_loadu_ps(qa); |
194 | 0 | __m128 recons = _mm_add_ps(l1_t[m], pq_l2_t[*code++]); |
195 | 0 | __m128 diff = _mm_sub_ps(qi, recons); |
196 | 0 | accu = _mm_add_ps(accu, _mm_mul_ps(diff, diff)); |
197 | 0 | pq_l2_t += 256; |
198 | 0 | qa += 4; |
199 | 0 | } |
200 | |
|
201 | 0 | accu = _mm_hadd_ps(accu, accu); |
202 | 0 | accu = _mm_hadd_ps(accu, accu); |
203 | 0 | return _mm_cvtss_f32(accu); |
204 | | #else |
205 | | FAISS_THROW_MSG("not implemented for non-x64 platforms"); |
206 | | #endif |
207 | 0 | } |
208 | | }; |
209 | | |
210 | | // well optimized for 2xNN+PQNN |
211 | | struct Distance2xXPQ4 : Distance2Level { |
212 | | int M_2, mi_nbits; |
213 | | |
214 | | explicit Distance2xXPQ4(const Index2Layer& storage) |
215 | 0 | : Distance2Level(storage) { |
216 | 0 | const MultiIndexQuantizer* mi = |
217 | 0 | dynamic_cast<MultiIndexQuantizer*>(storage.q1.quantizer); |
218 | |
|
219 | 0 | FAISS_ASSERT(mi); |
220 | 0 | FAISS_ASSERT(storage.pq.M % 2 == 0); |
221 | 0 | M_2 = storage.pq.M / 2; |
222 | 0 | mi_nbits = mi->pq.nbits; |
223 | 0 | pq_l1_tab = mi->pq.centroids.data(); |
224 | 0 | } |
225 | | |
226 | 0 | float operator()(idx_t i) override { |
227 | 0 | const uint8_t* code = storage.codes.data() + i * storage.code_size; |
228 | 0 | int64_t key01 = 0; |
229 | 0 | memcpy(&key01, code, storage.code_size_1); |
230 | 0 | code += storage.code_size_1; |
231 | 0 | #ifdef __SSE3__ |
232 | | |
233 | | // walking pointers |
234 | 0 | const float* qa = q; |
235 | 0 | const __m128* pq_l1_t = (const __m128*)pq_l1_tab; |
236 | 0 | const __m128* pq_l2_t = (const __m128*)pq_l2_tab; |
237 | 0 | __m128 accu = _mm_setzero_ps(); |
238 | |
|
239 | 0 | for (int mi_m = 0; mi_m < 2; mi_m++) { |
240 | 0 | int64_t l1_idx = key01 & (((int64_t)1 << mi_nbits) - 1); |
241 | 0 | const __m128* pq_l1 = pq_l1_t + M_2 * l1_idx; |
242 | |
|
243 | 0 | for (int m = 0; m < M_2; m++) { |
244 | 0 | __m128 qi = _mm_loadu_ps(qa); |
245 | 0 | __m128 recons = _mm_add_ps(pq_l1[m], pq_l2_t[*code++]); |
246 | 0 | __m128 diff = _mm_sub_ps(qi, recons); |
247 | 0 | accu = _mm_add_ps(accu, _mm_mul_ps(diff, diff)); |
248 | 0 | pq_l2_t += 256; |
249 | 0 | qa += 4; |
250 | 0 | } |
251 | 0 | pq_l1_t += M_2 << mi_nbits; |
252 | 0 | key01 >>= mi_nbits; |
253 | 0 | } |
254 | 0 | accu = _mm_hadd_ps(accu, accu); |
255 | 0 | accu = _mm_hadd_ps(accu, accu); |
256 | 0 | return _mm_cvtss_f32(accu); |
257 | | #else |
258 | | FAISS_THROW_MSG("not implemented for non-x64 platforms"); |
259 | | #endif |
260 | 0 | } |
261 | | }; |
262 | | |
263 | | } // namespace |
264 | | |
265 | 0 | DistanceComputer* Index2Layer::get_distance_computer() const { |
266 | 0 | #ifdef __SSE3__ |
267 | 0 | const MultiIndexQuantizer* mi = |
268 | 0 | dynamic_cast<MultiIndexQuantizer*>(q1.quantizer); |
269 | |
|
270 | 0 | if (mi && pq.M % 2 == 0 && pq.dsub == 4) { |
271 | 0 | return new Distance2xXPQ4(*this); |
272 | 0 | } |
273 | | |
274 | 0 | const IndexFlat* fl = dynamic_cast<IndexFlat*>(q1.quantizer); |
275 | |
|
276 | 0 | if (fl && pq.dsub == 4) { |
277 | 0 | return new DistanceXPQ4(*this); |
278 | 0 | } |
279 | 0 | #endif |
280 | | |
281 | 0 | return Index::get_distance_computer(); |
282 | 0 | } |
283 | | |
284 | | /* The standalone codec interface */ |
285 | | |
286 | | // block size used in Index2Layer::sa_encode |
287 | | int index2layer_sa_encode_bs = 32768; |
288 | | |
289 | 0 | void Index2Layer::sa_encode(idx_t n, const float* x, uint8_t* bytes) const { |
290 | 0 | FAISS_THROW_IF_NOT(is_trained); |
291 | | |
292 | 0 | idx_t bs = index2layer_sa_encode_bs; |
293 | 0 | if (n > bs) { |
294 | 0 | for (idx_t i0 = 0; i0 < n; i0 += bs) { |
295 | 0 | idx_t i1 = std::min(i0 + bs, n); |
296 | 0 | if (verbose) { |
297 | 0 | printf("Index2Layer::add: adding %" PRId64 ":%" PRId64 |
298 | 0 | " / %" PRId64 "\n", |
299 | 0 | i0, |
300 | 0 | i1, |
301 | 0 | n); |
302 | 0 | } |
303 | 0 | sa_encode(i1 - i0, x + i0 * d, bytes + i0 * code_size); |
304 | 0 | } |
305 | 0 | return; |
306 | 0 | } |
307 | | |
308 | 0 | std::unique_ptr<int64_t[]> list_nos(new int64_t[n]); |
309 | 0 | q1.quantizer->assign(n, x, list_nos.get()); |
310 | 0 | std::vector<float> residuals(n * d); |
311 | 0 | for (idx_t i = 0; i < n; i++) { |
312 | 0 | q1.quantizer->compute_residual( |
313 | 0 | x + i * d, residuals.data() + i * d, list_nos[i]); |
314 | 0 | } |
315 | 0 | pq.compute_codes(residuals.data(), bytes, n); |
316 | |
|
317 | 0 | for (idx_t i = n - 1; i >= 0; i--) { |
318 | 0 | uint8_t* code = bytes + i * code_size; |
319 | 0 | memmove(code + code_size_1, bytes + i * code_size_2, code_size_2); |
320 | 0 | q1.encode_listno(list_nos[i], code); |
321 | 0 | } |
322 | 0 | } |
323 | | |
324 | 0 | void Index2Layer::sa_decode(idx_t n, const uint8_t* bytes, float* x) const { |
325 | 0 | #pragma omp parallel |
326 | 0 | { |
327 | 0 | std::vector<float> residual(d); |
328 | |
|
329 | 0 | #pragma omp for |
330 | 0 | for (idx_t i = 0; i < n; i++) { |
331 | 0 | const uint8_t* code = bytes + i * code_size; |
332 | 0 | int64_t list_no = q1.decode_listno(code); |
333 | 0 | float* xi = x + i * d; |
334 | 0 | pq.decode(code + code_size_1, xi); |
335 | 0 | q1.quantizer->reconstruct(list_no, residual.data()); |
336 | 0 | for (int j = 0; j < d; j++) { |
337 | 0 | xi[j] += residual[j]; |
338 | 0 | } |
339 | 0 | } |
340 | 0 | } |
341 | 0 | } |
342 | | |
343 | | } // namespace faiss |