Coverage Report

Created: 2025-11-19 22:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/contrib/faiss/faiss/IndexIVFSpectralHash.cpp
Line
Count
Source
1
/*
2
 * Copyright (c) Meta Platforms, Inc. and affiliates.
3
 *
4
 * This source code is licensed under the MIT license found in the
5
 * LICENSE file in the root directory of this source tree.
6
 */
7
8
// -*- c++ -*-
9
10
#include <faiss/IndexIVFSpectralHash.h>
11
12
#include <algorithm>
13
#include <cstdint>
14
#include <memory>
15
16
#include <faiss/IndexLSH.h>
17
#include <faiss/IndexPreTransform.h>
18
#include <faiss/VectorTransform.h>
19
#include <faiss/impl/AuxIndexStructures.h>
20
#include <faiss/impl/FaissAssert.h>
21
#include <faiss/utils/hamming.h>
22
23
namespace faiss {
24
25
IndexIVFSpectralHash::IndexIVFSpectralHash(
26
        Index* quantizer,
27
        size_t d,
28
        size_t nlist,
29
        int nbit,
30
        float period)
31
0
        : IndexIVF(quantizer, d, nlist, (nbit + 7) / 8, METRIC_L2),
32
0
          nbit(nbit),
33
0
          period(period) {
34
0
    RandomRotationMatrix* rr = new RandomRotationMatrix(d, nbit);
35
0
    rr->init(1234);
36
0
    vt = rr;
37
0
    is_trained = false;
38
0
    by_residual = false;
39
0
}
40
41
0
IndexIVFSpectralHash::IndexIVFSpectralHash() : IndexIVF() {
42
0
    by_residual = false;
43
0
}
44
45
0
IndexIVFSpectralHash::~IndexIVFSpectralHash() {
46
0
    if (own_fields) {
47
0
        delete vt;
48
0
    }
49
0
}
50
51
namespace {
52
53
0
float median(size_t n, float* x) {
54
0
    std::sort(x, x + n);
55
0
    if (n % 2 == 1) {
56
0
        return x[n / 2];
57
0
    } else {
58
0
        return (x[n / 2 - 1] + x[n / 2]) / 2;
59
0
    }
60
0
}
61
62
} // namespace
63
64
void IndexIVFSpectralHash::train_encoder(
65
        idx_t n,
66
        const float* x,
67
0
        const idx_t* assign) {
68
0
    if (!vt->is_trained) {
69
0
        vt->train(n, x);
70
0
    }
71
0
    FAISS_THROW_IF_NOT(!by_residual);
72
73
0
    if (threshold_type == Thresh_global) {
74
        // nothing to do
75
0
        return;
76
0
    } else if (
77
0
            threshold_type == Thresh_centroid ||
78
0
            threshold_type == Thresh_centroid_half) {
79
        // convert all centroids with vt
80
0
        std::vector<float> centroids(nlist * d);
81
0
        quantizer->reconstruct_n(0, nlist, centroids.data());
82
0
        trained.resize(nlist * nbit);
83
0
        vt->apply_noalloc(nlist, centroids.data(), trained.data());
84
0
        if (threshold_type == Thresh_centroid_half) {
85
0
            for (size_t i = 0; i < nlist * nbit; i++) {
86
0
                trained[i] -= 0.25 * period;
87
0
            }
88
0
        }
89
0
        return;
90
0
    }
91
    // otherwise train medians
92
93
    // assign
94
0
    std::unique_ptr<idx_t[]> idx(new idx_t[n]);
95
0
    quantizer->assign(n, x, idx.get());
96
97
0
    std::vector<size_t> sizes(nlist + 1);
98
0
    for (size_t i = 0; i < n; i++) {
99
0
        FAISS_THROW_IF_NOT(idx[i] >= 0);
100
0
        sizes[idx[i]]++;
101
0
    }
102
103
0
    size_t ofs = 0;
104
0
    for (int j = 0; j < nlist; j++) {
105
0
        size_t o0 = ofs;
106
0
        ofs += sizes[j];
107
0
        sizes[j] = o0;
108
0
    }
109
110
    // transform
111
0
    std::unique_ptr<float[]> xt(vt->apply(n, x));
112
113
    // transpose + reorder
114
0
    std::unique_ptr<float[]> xo(new float[n * nbit]);
115
116
0
    for (size_t i = 0; i < n; i++) {
117
0
        size_t idest = sizes[idx[i]]++;
118
0
        for (size_t j = 0; j < nbit; j++) {
119
0
            xo[idest + n * j] = xt[i * nbit + j];
120
0
        }
121
0
    }
122
123
0
    trained.resize(n * nbit);
124
    // compute medians
125
0
#pragma omp for
126
0
    for (int i = 0; i < nlist; i++) {
127
0
        size_t i0 = i == 0 ? 0 : sizes[i - 1];
128
0
        size_t i1 = sizes[i];
129
0
        for (int j = 0; j < nbit; j++) {
130
0
            float* xoi = xo.get() + i0 + n * j;
131
0
            if (i0 == i1) { // nothing to train
132
0
                trained[i * nbit + j] = 0.0;
133
0
            } else if (i1 == i0 + 1) {
134
0
                trained[i * nbit + j] = xoi[0];
135
0
            } else {
136
0
                trained[i * nbit + j] = median(i1 - i0, xoi);
137
0
            }
138
0
        }
139
0
    }
140
0
}
141
142
namespace {
143
144
void binarize_with_freq(
145
        size_t nbit,
146
        float freq,
147
        const float* x,
148
        const float* c,
149
0
        uint8_t* codes) {
150
0
    memset(codes, 0, (nbit + 7) / 8);
151
0
    for (size_t i = 0; i < nbit; i++) {
152
0
        float xf = (x[i] - c[i]);
153
0
        int64_t xi = int64_t(floor(xf * freq));
154
0
        int64_t bit = xi & 1;
155
0
        codes[i >> 3] |= bit << (i & 7);
156
0
    }
157
0
}
158
159
} // namespace
160
161
void IndexIVFSpectralHash::encode_vectors(
162
        idx_t n,
163
        const float* x_in,
164
        const idx_t* list_nos,
165
        uint8_t* codes,
166
0
        bool include_listnos) const {
167
0
    FAISS_THROW_IF_NOT(is_trained);
168
0
    FAISS_THROW_IF_NOT(!by_residual);
169
0
    float freq = 2.0 / period;
170
0
    size_t coarse_size = include_listnos ? coarse_code_size() : 0;
171
172
    // transform with vt
173
0
    std::unique_ptr<float[]> x(vt->apply(n, x_in));
174
175
0
    std::vector<float> zero(nbit);
176
177
0
#pragma omp for
178
0
    for (idx_t i = 0; i < n; i++) {
179
0
        int64_t list_no = list_nos[i];
180
0
        uint8_t* code = codes + i * (code_size + coarse_size);
181
182
0
        if (list_no >= 0) {
183
0
            if (coarse_size) {
184
0
                encode_listno(list_no, code);
185
0
            }
186
0
            const float* c;
187
188
0
            if (threshold_type == Thresh_global) {
189
0
                c = zero.data();
190
0
            } else {
191
0
                c = trained.data() + list_no * nbit;
192
0
            }
193
0
            binarize_with_freq(
194
0
                    nbit, freq, x.get() + i * nbit, c, code + coarse_size);
195
0
        } else {
196
0
            memset(code, 0, code_size + coarse_size);
197
0
        }
198
0
    }
199
0
}
200
201
namespace {
202
203
template <class HammingComputer>
204
struct IVFScanner : InvertedListScanner {
205
    // copied from index structure
206
    const IndexIVFSpectralHash* index;
207
    size_t nbit;
208
209
    float period, freq;
210
    std::vector<float> q;
211
    std::vector<float> zero;
212
    std::vector<uint8_t> qcode;
213
    HammingComputer hc;
214
215
    IVFScanner(const IndexIVFSpectralHash* index, bool store_pairs)
216
0
            : index(index),
217
0
              nbit(index->nbit),
218
0
              period(index->period),
219
0
              freq(2.0 / index->period),
220
0
              q(nbit),
221
0
              zero(nbit),
222
0
              qcode(index->code_size),
223
0
              hc(qcode.data(), index->code_size) {
224
0
        this->store_pairs = store_pairs;
225
0
        this->code_size = index->code_size;
226
0
        this->keep_max = is_similarity_metric(index->metric_type);
227
0
    }
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZN5faiss12_GLOBAL__N_110IVFScannerINS_16HammingComputer4EEC2EPKNS_20IndexIVFSpectralHashEb
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZN5faiss12_GLOBAL__N_110IVFScannerINS_16HammingComputer8EEC2EPKNS_20IndexIVFSpectralHashEb
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZN5faiss12_GLOBAL__N_110IVFScannerINS_17HammingComputer16EEC2EPKNS_20IndexIVFSpectralHashEb
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZN5faiss12_GLOBAL__N_110IVFScannerINS_17HammingComputer20EEC2EPKNS_20IndexIVFSpectralHashEb
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZN5faiss12_GLOBAL__N_110IVFScannerINS_17HammingComputer32EEC2EPKNS_20IndexIVFSpectralHashEb
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZN5faiss12_GLOBAL__N_110IVFScannerINS_17HammingComputer64EEC2EPKNS_20IndexIVFSpectralHashEb
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZN5faiss12_GLOBAL__N_110IVFScannerINS_22HammingComputerDefaultEEC2EPKNS_20IndexIVFSpectralHashEb
228
229
0
    void set_query(const float* query) override {
230
0
        FAISS_THROW_IF_NOT(query);
231
0
        FAISS_THROW_IF_NOT(q.size() == nbit);
232
0
        index->vt->apply_noalloc(1, query, q.data());
233
234
0
        if (index->threshold_type == IndexIVFSpectralHash::Thresh_global) {
235
0
            binarize_with_freq(nbit, freq, q.data(), zero.data(), qcode.data());
236
0
            hc.set(qcode.data(), code_size);
237
0
        }
238
0
    }
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZN5faiss12_GLOBAL__N_110IVFScannerINS_16HammingComputer4EE9set_queryEPKf
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZN5faiss12_GLOBAL__N_110IVFScannerINS_16HammingComputer8EE9set_queryEPKf
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZN5faiss12_GLOBAL__N_110IVFScannerINS_17HammingComputer16EE9set_queryEPKf
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZN5faiss12_GLOBAL__N_110IVFScannerINS_17HammingComputer20EE9set_queryEPKf
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZN5faiss12_GLOBAL__N_110IVFScannerINS_17HammingComputer32EE9set_queryEPKf
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZN5faiss12_GLOBAL__N_110IVFScannerINS_17HammingComputer64EE9set_queryEPKf
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZN5faiss12_GLOBAL__N_110IVFScannerINS_22HammingComputerDefaultEE9set_queryEPKf
239
240
0
    void set_list(idx_t list_no, float /*coarse_dis*/) override {
241
0
        this->list_no = list_no;
242
0
        if (index->threshold_type != IndexIVFSpectralHash::Thresh_global) {
243
0
            const float* c = index->trained.data() + list_no * nbit;
244
0
            binarize_with_freq(nbit, freq, q.data(), c, qcode.data());
245
0
            hc.set(qcode.data(), code_size);
246
0
        }
247
0
    }
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZN5faiss12_GLOBAL__N_110IVFScannerINS_16HammingComputer4EE8set_listElf
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZN5faiss12_GLOBAL__N_110IVFScannerINS_16HammingComputer8EE8set_listElf
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZN5faiss12_GLOBAL__N_110IVFScannerINS_17HammingComputer16EE8set_listElf
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZN5faiss12_GLOBAL__N_110IVFScannerINS_17HammingComputer20EE8set_listElf
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZN5faiss12_GLOBAL__N_110IVFScannerINS_17HammingComputer32EE8set_listElf
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZN5faiss12_GLOBAL__N_110IVFScannerINS_17HammingComputer64EE8set_listElf
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZN5faiss12_GLOBAL__N_110IVFScannerINS_22HammingComputerDefaultEE8set_listElf
248
249
0
    float distance_to_code(const uint8_t* code) const final {
250
0
        return hc.hamming(code);
251
0
    }
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZNK5faiss12_GLOBAL__N_110IVFScannerINS_16HammingComputer4EE16distance_to_codeEPKh
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZNK5faiss12_GLOBAL__N_110IVFScannerINS_16HammingComputer8EE16distance_to_codeEPKh
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZNK5faiss12_GLOBAL__N_110IVFScannerINS_17HammingComputer16EE16distance_to_codeEPKh
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZNK5faiss12_GLOBAL__N_110IVFScannerINS_17HammingComputer20EE16distance_to_codeEPKh
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZNK5faiss12_GLOBAL__N_110IVFScannerINS_17HammingComputer32EE16distance_to_codeEPKh
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZNK5faiss12_GLOBAL__N_110IVFScannerINS_17HammingComputer64EE16distance_to_codeEPKh
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZNK5faiss12_GLOBAL__N_110IVFScannerINS_22HammingComputerDefaultEE16distance_to_codeEPKh
252
253
    size_t scan_codes(
254
            size_t list_size,
255
            const uint8_t* codes,
256
            const idx_t* ids,
257
            float* simi,
258
            idx_t* idxi,
259
0
            size_t k) const override {
260
0
        size_t nup = 0;
261
0
        for (size_t j = 0; j < list_size; j++) {
262
0
            float dis = hc.hamming(codes);
263
264
0
            if (dis < simi[0]) {
265
0
                int64_t id = store_pairs ? lo_build(list_no, j) : ids[j];
266
0
                maxheap_replace_top(k, simi, idxi, dis, id);
267
0
                nup++;
268
0
            }
269
0
            codes += code_size;
270
0
        }
271
0
        return nup;
272
0
    }
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZNK5faiss12_GLOBAL__N_110IVFScannerINS_16HammingComputer4EE10scan_codesEmPKhPKlPfPlm
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZNK5faiss12_GLOBAL__N_110IVFScannerINS_16HammingComputer8EE10scan_codesEmPKhPKlPfPlm
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZNK5faiss12_GLOBAL__N_110IVFScannerINS_17HammingComputer16EE10scan_codesEmPKhPKlPfPlm
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZNK5faiss12_GLOBAL__N_110IVFScannerINS_17HammingComputer20EE10scan_codesEmPKhPKlPfPlm
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZNK5faiss12_GLOBAL__N_110IVFScannerINS_17HammingComputer32EE10scan_codesEmPKhPKlPfPlm
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZNK5faiss12_GLOBAL__N_110IVFScannerINS_17HammingComputer64EE10scan_codesEmPKhPKlPfPlm
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZNK5faiss12_GLOBAL__N_110IVFScannerINS_22HammingComputerDefaultEE10scan_codesEmPKhPKlPfPlm
273
274
    void scan_codes_range(
275
            size_t list_size,
276
            const uint8_t* codes,
277
            const idx_t* ids,
278
            float radius,
279
0
            RangeQueryResult& res) const override {
280
0
        for (size_t j = 0; j < list_size; j++) {
281
0
            float dis = hc.hamming(codes);
282
0
            if (dis < radius) {
283
0
                int64_t id = store_pairs ? lo_build(list_no, j) : ids[j];
284
0
                res.add(dis, id);
285
0
            }
286
0
            codes += code_size;
287
0
        }
288
0
    }
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZNK5faiss12_GLOBAL__N_110IVFScannerINS_16HammingComputer4EE16scan_codes_rangeEmPKhPKlfRNS_16RangeQueryResultE
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZNK5faiss12_GLOBAL__N_110IVFScannerINS_16HammingComputer8EE16scan_codes_rangeEmPKhPKlfRNS_16RangeQueryResultE
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZNK5faiss12_GLOBAL__N_110IVFScannerINS_17HammingComputer16EE16scan_codes_rangeEmPKhPKlfRNS_16RangeQueryResultE
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZNK5faiss12_GLOBAL__N_110IVFScannerINS_17HammingComputer20EE16scan_codes_rangeEmPKhPKlfRNS_16RangeQueryResultE
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZNK5faiss12_GLOBAL__N_110IVFScannerINS_17HammingComputer32EE16scan_codes_rangeEmPKhPKlfRNS_16RangeQueryResultE
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZNK5faiss12_GLOBAL__N_110IVFScannerINS_17HammingComputer64EE16scan_codes_rangeEmPKhPKlfRNS_16RangeQueryResultE
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZNK5faiss12_GLOBAL__N_110IVFScannerINS_22HammingComputerDefaultEE16scan_codes_rangeEmPKhPKlfRNS_16RangeQueryResultE
289
};
290
291
struct BuildScanner {
292
    using T = InvertedListScanner*;
293
294
    template <class HammingComputer>
295
0
    static T f(const IndexIVFSpectralHash* index, bool store_pairs) {
296
0
        return new IVFScanner<HammingComputer>(index, store_pairs);
297
0
    }
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZN5faiss12_GLOBAL__N_112BuildScanner1fINS_16HammingComputer4EEEPNS_19InvertedListScannerEPKNS_20IndexIVFSpectralHashEb
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZN5faiss12_GLOBAL__N_112BuildScanner1fINS_16HammingComputer8EEEPNS_19InvertedListScannerEPKNS_20IndexIVFSpectralHashEb
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZN5faiss12_GLOBAL__N_112BuildScanner1fINS_17HammingComputer16EEEPNS_19InvertedListScannerEPKNS_20IndexIVFSpectralHashEb
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZN5faiss12_GLOBAL__N_112BuildScanner1fINS_17HammingComputer20EEEPNS_19InvertedListScannerEPKNS_20IndexIVFSpectralHashEb
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZN5faiss12_GLOBAL__N_112BuildScanner1fINS_17HammingComputer32EEEPNS_19InvertedListScannerEPKNS_20IndexIVFSpectralHashEb
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZN5faiss12_GLOBAL__N_112BuildScanner1fINS_17HammingComputer64EEEPNS_19InvertedListScannerEPKNS_20IndexIVFSpectralHashEb
Unexecuted instantiation: IndexIVFSpectralHash.cpp:_ZN5faiss12_GLOBAL__N_112BuildScanner1fINS_22HammingComputerDefaultEEEPNS_19InvertedListScannerEPKNS_20IndexIVFSpectralHashEb
298
};
299
300
} // anonymous namespace
301
302
InvertedListScanner* IndexIVFSpectralHash::get_InvertedListScanner(
303
        bool store_pairs,
304
        const IDSelector* sel,
305
0
        const IVFSearchParameters*) const {
306
0
    FAISS_THROW_IF_NOT(!sel);
307
0
    BuildScanner bs;
308
0
    return dispatch_HammingComputer(code_size, bs, this, store_pairs);
309
0
}
310
311
0
void IndexIVFSpectralHash::replace_vt(VectorTransform* vt_in, bool own) {
312
0
    FAISS_THROW_IF_NOT(vt_in->d_out == nbit);
313
0
    FAISS_THROW_IF_NOT(vt_in->d_in == d);
314
0
    if (own_fields) {
315
0
        delete vt;
316
0
    }
317
0
    vt = vt_in;
318
0
    threshold_type = Thresh_global;
319
0
    is_trained = quantizer->is_trained && quantizer->ntotal == nlist &&
320
0
            vt->is_trained;
321
0
    own_fields = own;
322
0
}
323
324
/*
325
    Check that the encoder is a single vector transform followed by a LSH
326
    that just does thresholding.
327
    If this is not the case, the linear transform + threhsolds of the IndexLSH
328
    should be merged into the VectorTransform (which is feasible).
329
*/
330
331
0
void IndexIVFSpectralHash::replace_vt(IndexPreTransform* encoder, bool own) {
332
0
    FAISS_THROW_IF_NOT(encoder->chain.size() == 1);
333
0
    auto sub_index = dynamic_cast<IndexLSH*>(encoder->index);
334
0
    FAISS_THROW_IF_NOT_MSG(sub_index, "final index should be LSH");
335
0
    FAISS_THROW_IF_NOT(sub_index->nbits == nbit);
336
0
    FAISS_THROW_IF_NOT(!sub_index->rotate_data);
337
0
    FAISS_THROW_IF_NOT(!sub_index->train_thresholds);
338
0
    replace_vt(encoder->chain[0], own);
339
0
}
340
341
} // namespace faiss