Coverage Report

Created: 2025-10-24 11:05

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/contrib/faiss/faiss/IndexIVFPQR.cpp
Line
Count
Source
1
/*
2
 * Copyright (c) Meta Platforms, Inc. and affiliates.
3
 *
4
 * This source code is licensed under the MIT license found in the
5
 * LICENSE file in the root directory of this source tree.
6
 */
7
8
// -*- c++ -*-
9
10
#include <faiss/IndexIVFPQR.h>
11
12
#include <cinttypes>
13
14
#include <faiss/utils/Heap.h>
15
#include <faiss/utils/distances.h>
16
#include <faiss/utils/utils.h>
17
18
#include <faiss/impl/FaissAssert.h>
19
20
namespace faiss {
21
22
/*****************************************
23
 * IndexIVFPQR implementation
24
 ******************************************/
25
26
IndexIVFPQR::IndexIVFPQR(
27
        Index* quantizer,
28
        size_t d,
29
        size_t nlist,
30
        size_t M,
31
        size_t nbits_per_idx,
32
        size_t M_refine,
33
        size_t nbits_per_idx_refine)
34
0
        : IndexIVFPQ(quantizer, d, nlist, M, nbits_per_idx),
35
0
          refine_pq(d, M_refine, nbits_per_idx_refine),
36
0
          k_factor(4) {
37
0
    by_residual = true;
38
0
    refine_pq.cp.max_points_per_centroid = 1000;
39
0
}
40
41
0
IndexIVFPQR::IndexIVFPQR() : k_factor(1) {
42
0
    by_residual = true;
43
0
    refine_pq.cp.max_points_per_centroid = 1000;
44
0
}
45
46
0
void IndexIVFPQR::reset() {
47
0
    IndexIVFPQ::reset();
48
0
    refine_codes.clear();
49
0
}
50
51
0
void IndexIVFPQR::train_encoder(idx_t n, const float* x, const idx_t* assign) {
52
0
    IndexIVFPQ::train_encoder(n, x, assign);
53
0
    if (verbose) {
54
0
        printf("training %zdx%zd 2nd level PQ quantizer on %" PRId64
55
0
               " %dD-vectors\n",
56
0
               refine_pq.M,
57
0
               refine_pq.ksub,
58
0
               n,
59
0
               d);
60
0
    }
61
0
    refine_pq.cp.verbose = verbose;
62
63
    // 2nd level residual
64
0
    std::vector<float> residual_2(n * d);
65
0
    std::vector<uint8_t> train_codes(pq.code_size * n);
66
0
    pq.compute_codes(x, train_codes.data(), n);
67
68
0
    for (idx_t i = 0; i < n; i++) {
69
0
        const float* xx = x + i * d;
70
0
        float* res = residual_2.data() + i * d;
71
0
        pq.decode(train_codes.data() + i * pq.code_size, res);
72
0
        for (int j = 0; j < d; j++) {
73
0
            res[j] = xx[j] - res[j];
74
0
        }
75
0
    }
76
77
0
    refine_pq.train(n, residual_2.data());
78
0
}
79
80
0
idx_t IndexIVFPQR::train_encoder_num_vectors() const {
81
0
    return std::max(
82
0
            pq.cp.max_points_per_centroid * pq.ksub,
83
0
            refine_pq.cp.max_points_per_centroid * refine_pq.ksub);
84
0
}
85
86
0
void IndexIVFPQR::add_with_ids(idx_t n, const float* x, const idx_t* xids) {
87
0
    add_core(n, x, xids, nullptr);
88
0
}
89
90
void IndexIVFPQR::add_core(
91
        idx_t n,
92
        const float* x,
93
        const idx_t* xids,
94
        const idx_t* precomputed_idx,
95
0
        void* /*inverted_list_context*/) {
96
0
    std::unique_ptr<float[]> residual_2(new float[n * d]);
97
98
0
    idx_t n0 = ntotal;
99
100
0
    add_core_o(n, x, xids, residual_2.get(), precomputed_idx);
101
102
0
    refine_codes.resize(ntotal * refine_pq.code_size);
103
104
0
    refine_pq.compute_codes(
105
0
            residual_2.get(), &refine_codes[n0 * refine_pq.code_size], n);
106
0
}
107
0
#define TIC t0 = get_cycles()
108
0
#define TOC get_cycles() - t0
109
110
void IndexIVFPQR::search_preassigned(
111
        idx_t n,
112
        const float* x,
113
        idx_t k,
114
        const idx_t* idx,
115
        const float* L1_dis,
116
        float* distances,
117
        idx_t* labels,
118
        bool store_pairs,
119
        const IVFSearchParameters* params,
120
0
        IndexIVFStats* stats) const {
121
0
    uint64_t t0;
122
0
    TIC;
123
0
    size_t k_coarse = long(k * k_factor);
124
0
    std::unique_ptr<idx_t[]> coarse_labels(new idx_t[k_coarse * n]);
125
0
    {
126
        // query with quantizer levels 1 and 2.
127
0
        std::unique_ptr<float[]> coarse_distances(new float[k_coarse * n]);
128
129
0
        IndexIVFPQ::search_preassigned(
130
0
                n,
131
0
                x,
132
0
                k_coarse,
133
0
                idx,
134
0
                L1_dis,
135
0
                coarse_distances.get(),
136
0
                coarse_labels.get(),
137
0
                true,
138
0
                params);
139
0
    }
140
141
0
    indexIVFPQ_stats.search_cycles += TOC;
142
143
0
    TIC;
144
145
    // 3rd level refinement
146
0
    size_t n_refine = 0;
147
0
#pragma omp parallel reduction(+ : n_refine)
148
0
    {
149
        // tmp buffers
150
0
        std::unique_ptr<float[]> residual_1(new float[2 * d]);
151
0
        float* residual_2 = residual_1.get() + d;
152
0
#pragma omp for
153
0
        for (idx_t i = 0; i < n; i++) {
154
0
            const float* xq = x + i * d;
155
0
            const idx_t* shortlist = coarse_labels.get() + k_coarse * i;
156
0
            float* heap_sim = distances + k * i;
157
0
            idx_t* heap_ids = labels + k * i;
158
0
            maxheap_heapify(k, heap_sim, heap_ids);
159
160
0
            for (int j = 0; j < k_coarse; j++) {
161
0
                idx_t sl = shortlist[j];
162
163
0
                if (sl == -1)
164
0
                    continue;
165
166
0
                int list_no = lo_listno(sl);
167
0
                int ofs = lo_offset(sl);
168
169
0
                assert(list_no >= 0 && list_no < nlist);
170
0
                assert(ofs >= 0 && ofs < invlists->list_size(list_no));
171
172
                // 1st level residual
173
0
                quantizer->compute_residual(xq, residual_1.get(), list_no);
174
175
                // 2nd level residual
176
0
                const uint8_t* l2code = invlists->get_single_code(list_no, ofs);
177
178
0
                pq.decode(l2code, residual_2);
179
0
                for (int l = 0; l < d; l++)
180
0
                    residual_2[l] = residual_1[l] - residual_2[l];
181
182
                // 3rd level residual's approximation
183
0
                idx_t id = invlists->get_single_id(list_no, ofs);
184
0
                assert(0 <= id && id < ntotal);
185
0
                refine_pq.decode(
186
0
                        &refine_codes[id * refine_pq.code_size],
187
0
                        residual_1.get());
188
189
0
                float dis = fvec_L2sqr(residual_1.get(), residual_2, d);
190
191
0
                if (dis < heap_sim[0]) {
192
0
                    idx_t id_or_pair = store_pairs ? sl : id;
193
0
                    maxheap_replace_top(k, heap_sim, heap_ids, dis, id_or_pair);
194
0
                }
195
0
                n_refine++;
196
0
            }
197
0
            maxheap_reorder(k, heap_sim, heap_ids);
198
0
        }
199
0
    }
200
0
    indexIVFPQ_stats.nrefine += n_refine;
201
0
    indexIVFPQ_stats.refine_cycles += TOC;
202
0
}
203
204
void IndexIVFPQR::reconstruct_from_offset(
205
        int64_t list_no,
206
        int64_t offset,
207
0
        float* recons) const {
208
0
    IndexIVFPQ::reconstruct_from_offset(list_no, offset, recons);
209
210
0
    idx_t id = invlists->get_single_id(list_no, offset);
211
0
    assert(0 <= id && id < ntotal);
212
213
0
    std::vector<float> r3(d);
214
0
    refine_pq.decode(&refine_codes[id * refine_pq.code_size], r3.data());
215
0
    for (int i = 0; i < d; ++i) {
216
0
        recons[i] += r3[i];
217
0
    }
218
0
}
219
220
0
void IndexIVFPQR::merge_from(Index& otherIndex, idx_t add_id) {
221
0
    IndexIVFPQR* other = dynamic_cast<IndexIVFPQR*>(&otherIndex);
222
0
    FAISS_THROW_IF_NOT(other);
223
224
0
    IndexIVF::merge_from(otherIndex, add_id);
225
226
0
    refine_codes.insert(
227
0
            refine_codes.end(),
228
0
            other->refine_codes.begin(),
229
0
            other->refine_codes.end());
230
0
    other->refine_codes.clear();
231
0
}
232
233
0
size_t IndexIVFPQR::remove_ids(const IDSelector& /*sel*/) {
234
0
    FAISS_THROW_MSG("not implemented");
235
0
    return 0;
236
0
}
237
238
} // namespace faiss