Coverage Report

Created: 2025-09-15 16:31

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/contrib/faiss/faiss/Index2Layer.cpp
Line
Count
Source
1
/*
2
 * Copyright (c) Meta Platforms, Inc. and affiliates.
3
 *
4
 * This source code is licensed under the MIT license found in the
5
 * LICENSE file in the root directory of this source tree.
6
 */
7
8
// -*- c++ -*-
9
10
#include <faiss/Index2Layer.h>
11
12
#include <faiss/impl/platform_macros.h>
13
#include <cassert>
14
#include <cinttypes>
15
#include <cmath>
16
#include <cstdint>
17
#include <cstdio>
18
19
#ifdef __SSE3__
20
#include <immintrin.h>
21
#endif
22
23
#include <algorithm>
24
25
#include <faiss/IndexIVFPQ.h>
26
27
#include <faiss/IndexFlat.h>
28
#include <faiss/impl/AuxIndexStructures.h>
29
#include <faiss/impl/FaissAssert.h>
30
#include <faiss/utils/distances.h>
31
#include <faiss/utils/utils.h>
32
33
namespace faiss {
34
35
/*************************************
36
 * Index2Layer implementation
37
 *************************************/
38
39
Index2Layer::Index2Layer(
40
        Index* quantizer,
41
        size_t nlist,
42
        int M,
43
        int nbit,
44
        MetricType metric)
45
0
        : IndexFlatCodes(0, quantizer->d, metric),
46
0
          q1(quantizer, nlist),
47
0
          pq(quantizer->d, M, nbit) {
48
0
    is_trained = false;
49
0
    for (int nbyte = 0; nbyte < 7; nbyte++) {
50
0
        if (((size_t)1 << (8 * nbyte)) >= nlist) {
51
0
            code_size_1 = nbyte;
52
0
            break;
53
0
        }
54
0
    }
55
0
    code_size_2 = pq.code_size;
56
0
    code_size = code_size_1 + code_size_2;
57
0
}
58
59
0
Index2Layer::Index2Layer() {
60
0
    code_size = code_size_1 = code_size_2 = 0;
61
0
}
62
63
0
Index2Layer::~Index2Layer() = default;
64
65
0
void Index2Layer::train(idx_t n, const float* x) {
66
0
    if (verbose) {
67
0
        printf("training level-1 quantizer %" PRId64 " vectors in %dD\n", n, d);
68
0
    }
69
70
0
    q1.train_q1(n, x, verbose, metric_type);
71
72
0
    if (verbose) {
73
0
        printf("computing residuals\n");
74
0
    }
75
76
0
    const float* x_in = x;
77
78
0
    x = fvecs_maybe_subsample(
79
0
            d,
80
0
            (size_t*)&n,
81
0
            pq.cp.max_points_per_centroid * pq.ksub,
82
0
            x,
83
0
            verbose,
84
0
            pq.cp.seed);
85
86
0
    std::unique_ptr<const float[]> del_x(x_in == x ? nullptr : x);
87
88
0
    std::vector<idx_t> assign(n); // assignement to coarse centroids
89
0
    q1.quantizer->assign(n, x, assign.data());
90
0
    std::vector<float> residuals(n * d);
91
0
    for (idx_t i = 0; i < n; i++) {
92
0
        q1.quantizer->compute_residual(
93
0
                x + i * d, residuals.data() + i * d, assign[i]);
94
0
    }
95
96
0
    if (verbose)
97
0
        printf("training %zdx%zd product quantizer on %" PRId64
98
0
               " vectors in %dD\n",
99
0
               pq.M,
100
0
               pq.ksub,
101
0
               n,
102
0
               d);
103
0
    pq.verbose = verbose;
104
0
    pq.train(n, residuals.data());
105
106
0
    is_trained = true;
107
0
}
108
109
void Index2Layer::search(
110
        idx_t /*n*/,
111
        const float* /*x*/,
112
        idx_t /*k*/,
113
        float* /*distances*/,
114
        idx_t* /*labels*/,
115
0
        const SearchParameters* /* params */) const {
116
0
    FAISS_THROW_MSG("not implemented");
117
0
}
118
119
0
void Index2Layer::transfer_to_IVFPQ(IndexIVFPQ& other) const {
120
0
    FAISS_THROW_IF_NOT(other.nlist == q1.nlist);
121
0
    FAISS_THROW_IF_NOT(other.code_size == code_size_2);
122
0
    FAISS_THROW_IF_NOT(other.ntotal == 0);
123
124
0
    const uint8_t* rp = codes.data();
125
126
0
    for (idx_t i = 0; i < ntotal; i++) {
127
0
        idx_t key = 0;
128
0
        memcpy(&key, rp, code_size_1);
129
0
        rp += code_size_1;
130
0
        other.invlists->add_entry(key, i, rp);
131
0
        rp += code_size_2;
132
0
    }
133
134
0
    other.ntotal = ntotal;
135
0
}
136
137
namespace {
138
139
struct Distance2Level : DistanceComputer {
140
    size_t d;
141
    const Index2Layer& storage;
142
    std::vector<float> buf;
143
    const float* q;
144
145
    const float *pq_l1_tab, *pq_l2_tab;
146
147
0
    explicit Distance2Level(const Index2Layer& storage) : storage(storage) {
148
0
        d = storage.d;
149
0
        FAISS_ASSERT(storage.pq.dsub == 4);
150
0
        pq_l2_tab = storage.pq.centroids.data();
151
0
        buf.resize(2 * d);
152
0
    }
153
154
0
    float symmetric_dis(idx_t i, idx_t j) override {
155
0
        storage.reconstruct(i, buf.data());
156
0
        storage.reconstruct(j, buf.data() + d);
157
0
        return fvec_L2sqr(buf.data() + d, buf.data(), d);
158
0
    }
159
160
0
    void set_query(const float* x) override {
161
0
        q = x;
162
0
    }
163
};
164
165
// well optimized for xNN+PQNN
166
struct DistanceXPQ4 : Distance2Level {
167
    int M, k;
168
169
    explicit DistanceXPQ4(const Index2Layer& storage)
170
0
            : Distance2Level(storage) {
171
0
        const IndexFlat* quantizer =
172
0
                dynamic_cast<IndexFlat*>(storage.q1.quantizer);
173
174
0
        FAISS_ASSERT(quantizer);
175
0
        M = storage.pq.M;
176
0
        pq_l1_tab = quantizer->get_xb();
177
0
    }
178
179
0
    float operator()(idx_t i) override {
180
0
#ifdef __SSE3__
181
0
        const uint8_t* code = storage.codes.data() + i * storage.code_size;
182
0
        idx_t key = 0;
183
0
        memcpy(&key, code, storage.code_size_1);
184
0
        code += storage.code_size_1;
185
186
        // walking pointers
187
0
        const float* qa = q;
188
0
        const __m128* l1_t = (const __m128*)(pq_l1_tab + d * key);
189
0
        const __m128* pq_l2_t = (const __m128*)pq_l2_tab;
190
0
        __m128 accu = _mm_setzero_ps();
191
192
0
        for (int m = 0; m < M; m++) {
193
0
            __m128 qi = _mm_loadu_ps(qa);
194
0
            __m128 recons = _mm_add_ps(l1_t[m], pq_l2_t[*code++]);
195
0
            __m128 diff = _mm_sub_ps(qi, recons);
196
0
            accu = _mm_add_ps(accu, _mm_mul_ps(diff, diff));
197
0
            pq_l2_t += 256;
198
0
            qa += 4;
199
0
        }
200
201
0
        accu = _mm_hadd_ps(accu, accu);
202
0
        accu = _mm_hadd_ps(accu, accu);
203
0
        return _mm_cvtss_f32(accu);
204
#else
205
        FAISS_THROW_MSG("not implemented for non-x64 platforms");
206
#endif
207
0
    }
208
};
209
210
// well optimized for 2xNN+PQNN
211
struct Distance2xXPQ4 : Distance2Level {
212
    int M_2, mi_nbits;
213
214
    explicit Distance2xXPQ4(const Index2Layer& storage)
215
0
            : Distance2Level(storage) {
216
0
        const MultiIndexQuantizer* mi =
217
0
                dynamic_cast<MultiIndexQuantizer*>(storage.q1.quantizer);
218
219
0
        FAISS_ASSERT(mi);
220
0
        FAISS_ASSERT(storage.pq.M % 2 == 0);
221
0
        M_2 = storage.pq.M / 2;
222
0
        mi_nbits = mi->pq.nbits;
223
0
        pq_l1_tab = mi->pq.centroids.data();
224
0
    }
225
226
0
    float operator()(idx_t i) override {
227
0
        const uint8_t* code = storage.codes.data() + i * storage.code_size;
228
0
        int64_t key01 = 0;
229
0
        memcpy(&key01, code, storage.code_size_1);
230
0
        code += storage.code_size_1;
231
0
#ifdef __SSE3__
232
233
        // walking pointers
234
0
        const float* qa = q;
235
0
        const __m128* pq_l1_t = (const __m128*)pq_l1_tab;
236
0
        const __m128* pq_l2_t = (const __m128*)pq_l2_tab;
237
0
        __m128 accu = _mm_setzero_ps();
238
239
0
        for (int mi_m = 0; mi_m < 2; mi_m++) {
240
0
            int64_t l1_idx = key01 & (((int64_t)1 << mi_nbits) - 1);
241
0
            const __m128* pq_l1 = pq_l1_t + M_2 * l1_idx;
242
243
0
            for (int m = 0; m < M_2; m++) {
244
0
                __m128 qi = _mm_loadu_ps(qa);
245
0
                __m128 recons = _mm_add_ps(pq_l1[m], pq_l2_t[*code++]);
246
0
                __m128 diff = _mm_sub_ps(qi, recons);
247
0
                accu = _mm_add_ps(accu, _mm_mul_ps(diff, diff));
248
0
                pq_l2_t += 256;
249
0
                qa += 4;
250
0
            }
251
0
            pq_l1_t += M_2 << mi_nbits;
252
0
            key01 >>= mi_nbits;
253
0
        }
254
0
        accu = _mm_hadd_ps(accu, accu);
255
0
        accu = _mm_hadd_ps(accu, accu);
256
0
        return _mm_cvtss_f32(accu);
257
#else
258
        FAISS_THROW_MSG("not implemented for non-x64 platforms");
259
#endif
260
0
    }
261
};
262
263
} // namespace
264
265
0
DistanceComputer* Index2Layer::get_distance_computer() const {
266
0
#ifdef __SSE3__
267
0
    const MultiIndexQuantizer* mi =
268
0
            dynamic_cast<MultiIndexQuantizer*>(q1.quantizer);
269
270
0
    if (mi && pq.M % 2 == 0 && pq.dsub == 4) {
271
0
        return new Distance2xXPQ4(*this);
272
0
    }
273
274
0
    const IndexFlat* fl = dynamic_cast<IndexFlat*>(q1.quantizer);
275
276
0
    if (fl && pq.dsub == 4) {
277
0
        return new DistanceXPQ4(*this);
278
0
    }
279
0
#endif
280
281
0
    return Index::get_distance_computer();
282
0
}
283
284
/* The standalone codec interface */
285
286
// block size used in Index2Layer::sa_encode
287
int index2layer_sa_encode_bs = 32768;
288
289
0
void Index2Layer::sa_encode(idx_t n, const float* x, uint8_t* bytes) const {
290
0
    FAISS_THROW_IF_NOT(is_trained);
291
292
0
    idx_t bs = index2layer_sa_encode_bs;
293
0
    if (n > bs) {
294
0
        for (idx_t i0 = 0; i0 < n; i0 += bs) {
295
0
            idx_t i1 = std::min(i0 + bs, n);
296
0
            if (verbose) {
297
0
                printf("Index2Layer::add: adding %" PRId64 ":%" PRId64
298
0
                       " / %" PRId64 "\n",
299
0
                       i0,
300
0
                       i1,
301
0
                       n);
302
0
            }
303
0
            sa_encode(i1 - i0, x + i0 * d, bytes + i0 * code_size);
304
0
        }
305
0
        return;
306
0
    }
307
308
0
    std::unique_ptr<int64_t[]> list_nos(new int64_t[n]);
309
0
    q1.quantizer->assign(n, x, list_nos.get());
310
0
    std::vector<float> residuals(n * d);
311
0
    for (idx_t i = 0; i < n; i++) {
312
0
        q1.quantizer->compute_residual(
313
0
                x + i * d, residuals.data() + i * d, list_nos[i]);
314
0
    }
315
0
    pq.compute_codes(residuals.data(), bytes, n);
316
317
0
    for (idx_t i = n - 1; i >= 0; i--) {
318
0
        uint8_t* code = bytes + i * code_size;
319
0
        memmove(code + code_size_1, bytes + i * code_size_2, code_size_2);
320
0
        q1.encode_listno(list_nos[i], code);
321
0
    }
322
0
}
323
324
0
void Index2Layer::sa_decode(idx_t n, const uint8_t* bytes, float* x) const {
325
0
#pragma omp parallel
326
0
    {
327
0
        std::vector<float> residual(d);
328
329
0
#pragma omp for
330
0
        for (idx_t i = 0; i < n; i++) {
331
0
            const uint8_t* code = bytes + i * code_size;
332
0
            int64_t list_no = q1.decode_listno(code);
333
0
            float* xi = x + i * d;
334
0
            pq.decode(code + code_size_1, xi);
335
0
            q1.quantizer->reconstruct(list_no, residual.data());
336
0
            for (int j = 0; j < d; j++) {
337
0
                xi[j] += residual[j];
338
0
            }
339
0
        }
340
0
    }
341
0
}
342
343
} // namespace faiss