Coverage Report

Created: 2025-08-27 05:46

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/contrib/faiss/faiss/IndexIVFPQ.h
Line
Count
Source
1
/*
2
 * Copyright (c) Meta Platforms, Inc. and affiliates.
3
 *
4
 * This source code is licensed under the MIT license found in the
5
 * LICENSE file in the root directory of this source tree.
6
 */
7
8
// -*- c++ -*-
9
10
#ifndef FAISS_INDEX_IVFPQ_H
11
#define FAISS_INDEX_IVFPQ_H
12
13
#include <vector>
14
15
#include <faiss/IndexIVF.h>
16
#include <faiss/IndexPQ.h>
17
#include <faiss/impl/platform_macros.h>
18
#include <faiss/utils/AlignedTable.h>
19
20
namespace faiss {
21
22
struct IVFPQSearchParameters : IVFSearchParameters {
23
    size_t scan_table_threshold; ///< use table computation or on-the-fly?
24
    int polysemous_ht;           ///< Hamming thresh for polysemous filtering
25
0
    IVFPQSearchParameters() : scan_table_threshold(0), polysemous_ht(0) {}
26
0
    ~IVFPQSearchParameters() {}
27
};
28
29
FAISS_API extern size_t precomputed_table_max_bytes;
30
31
/** Inverted file with Product Quantizer encoding. Each residual
32
 * vector is encoded as a product quantizer code.
33
 */
34
struct IndexIVFPQ : IndexIVF {
35
    ProductQuantizer pq; ///< produces the codes
36
37
    bool do_polysemous_training; ///< reorder PQ centroids after training?
38
    PolysemousTraining* polysemous_training; ///< if NULL, use default
39
40
    // search-time parameters
41
    size_t scan_table_threshold; ///< use table computation or on-the-fly?
42
    int polysemous_ht;           ///< Hamming thresh for polysemous filtering
43
44
    /** Precompute table that speed up query preprocessing at some
45
     * memory cost (used only for by_residual with L2 metric)
46
     */
47
    int use_precomputed_table;
48
49
    /// if use_precompute_table
50
    /// size nlist * pq.M * pq.ksub
51
    AlignedTable<float> precomputed_table;
52
53
    IndexIVFPQ(
54
            Index* quantizer,
55
            size_t d,
56
            size_t nlist,
57
            size_t M,
58
            size_t nbits_per_idx,
59
            MetricType metric = METRIC_L2);
60
61
    void encode_vectors(
62
            idx_t n,
63
            const float* x,
64
            const idx_t* list_nos,
65
            uint8_t* codes,
66
            bool include_listnos = false) const override;
67
68
    void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
69
70
    void add_core(
71
            idx_t n,
72
            const float* x,
73
            const idx_t* xids,
74
            const idx_t* precomputed_idx,
75
            void* inverted_list_context = nullptr) override;
76
77
    /// same as add_core, also:
78
    /// - output 2nd level residuals if residuals_2 != NULL
79
    /// - accepts precomputed_idx = nullptr
80
    void add_core_o(
81
            idx_t n,
82
            const float* x,
83
            const idx_t* xids,
84
            float* residuals_2,
85
            const idx_t* precomputed_idx = nullptr,
86
            void* inverted_list_context = nullptr);
87
88
    /// trains the product quantizer
89
    void train_encoder(idx_t n, const float* x, const idx_t* assign) override;
90
91
    idx_t train_encoder_num_vectors() const override;
92
93
    void reconstruct_from_offset(int64_t list_no, int64_t offset, float* recons)
94
            const override;
95
96
    /** Find exact duplicates in the dataset.
97
     *
98
     * the duplicates are returned in pre-allocated arrays (see the
99
     * max sizes).
100
     *
101
     * @param lims   limits between groups of duplicates
102
     *                (max size ntotal / 2 + 1)
103
     * @param ids    ids[lims[i]] : ids[lims[i+1]-1] is a group of
104
     *                duplicates (max size ntotal)
105
     * @return n      number of groups found
106
     */
107
    size_t find_duplicates(idx_t* ids, size_t* lims) const;
108
109
    // map a vector to a binary code knowning the index
110
    void encode(idx_t key, const float* x, uint8_t* code) const;
111
112
    /** Encode multiple vectors
113
     *
114
     * @param n       nb vectors to encode
115
     * @param keys    posting list ids for those vectors (size n)
116
     * @param x       vectors (size n * d)
117
     * @param codes   output codes (size n * code_size)
118
     * @param compute_keys  if false, assume keys are precomputed,
119
     *                      otherwise compute them
120
     */
121
    void encode_multiple(
122
            size_t n,
123
            idx_t* keys,
124
            const float* x,
125
            uint8_t* codes,
126
            bool compute_keys = false) const;
127
128
    /// inverse of encode_multiple
129
    void decode_multiple(
130
            size_t n,
131
            const idx_t* keys,
132
            const uint8_t* xcodes,
133
            float* x) const;
134
135
    InvertedListScanner* get_InvertedListScanner(
136
            bool store_pairs,
137
            const IDSelector* sel,
138
            const IVFSearchParameters* params) const override;
139
140
    /// build precomputed table
141
    void precompute_table();
142
143
    IndexIVFPQ();
144
};
145
146
// block size used in IndexIVFPQ::add_core_o
147
FAISS_API extern int index_ivfpq_add_core_o_bs;
148
149
/** Pre-compute distance tables for IVFPQ with by-residual and METRIC_L2
150
 *
151
 * @param use_precomputed_table (I/O)
152
 *        =-1: force disable
153
 *        =0: decide heuristically (default: use tables only if they are
154
 *            < precomputed_tables_max_bytes), set use_precomputed_table on
155
 * output =1: tables that work for all quantizers (size 256 * nlist * M) =2:
156
 * specific version for MultiIndexQuantizer (much more compact)
157
 * @param precomputed_table precomputed table to initialize
158
 */
159
160
void initialize_IVFPQ_precomputed_table(
161
        int& use_precomputed_table,
162
        const Index* quantizer,
163
        const ProductQuantizer& pq,
164
        AlignedTable<float>& precomputed_table,
165
        bool by_residual,
166
        bool verbose);
167
168
/// statistics are robust to internal threading, but not if
169
/// IndexIVFPQ::search_preassigned is called by multiple threads
170
struct IndexIVFPQStats {
171
    size_t nrefine; ///< nb of refines (IVFPQR)
172
173
    size_t n_hamming_pass;
174
    ///< nb of passed Hamming distance tests (for polysemous)
175
176
    // timings measured with the CPU RTC on all threads
177
    size_t search_cycles;
178
    size_t refine_cycles; ///< only for IVFPQR
179
180
10
    IndexIVFPQStats() {
181
10
        reset();
182
10
    }
183
    void reset();
184
};
185
186
// global var that collects them all
187
FAISS_API extern IndexIVFPQStats indexIVFPQ_stats;
188
189
} // namespace faiss
190
191
#endif