Coverage Report

Created: 2025-11-21 12:27

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/contrib/faiss/faiss/IndexBinaryIVF.h
Line
Count
Source
1
/*
2
 * Copyright (c) Meta Platforms, Inc. and affiliates.
3
 *
4
 * This source code is licensed under the MIT license found in the
5
 * LICENSE file in the root directory of this source tree.
6
 */
7
8
// -*- c++ -*-
9
10
#ifndef FAISS_INDEX_BINARY_IVF_H
11
#define FAISS_INDEX_BINARY_IVF_H
12
13
#include <vector>
14
15
#include <faiss/Clustering.h>
16
#include <faiss/IndexBinary.h>
17
#include <faiss/IndexIVF.h>
18
#include <faiss/utils/Heap.h>
19
20
namespace faiss {
21
22
struct BinaryInvertedListScanner;
23
24
/** Index based on a inverted file (IVF)
25
 *
26
 * In the inverted file, the quantizer (an IndexBinary instance) provides a
27
 * quantization index for each vector to be added. The quantization
28
 * index maps to a list (aka inverted list or posting list), where the
29
 * id of the vector is stored.
30
 *
31
 * Otherwise the object is similar to the IndexIVF
32
 */
33
struct IndexBinaryIVF : IndexBinary {
34
    /// Access to the actual data
35
    InvertedLists* invlists = nullptr;
36
    bool own_invlists = true;
37
38
    size_t nprobe = 1;    ///< number of probes at query time
39
    size_t max_codes = 0; ///< max nb of codes to visit to do a query
40
41
    /** Select between using a heap or counting to select the k smallest values
42
     * when scanning inverted lists.
43
     */
44
    bool use_heap = true;
45
46
    /** collect computations per batch */
47
    bool per_invlist_search = false;
48
49
    /// map for direct access to the elements. Enables reconstruct().
50
    DirectMap direct_map;
51
52
    /// quantizer that maps vectors to inverted lists
53
    IndexBinary* quantizer = nullptr;
54
55
    /// number of possible key values
56
    size_t nlist = 0;
57
58
    /// whether object owns the quantizer
59
    bool own_fields = false;
60
61
    ClusteringParameters cp; ///< to override default clustering params
62
63
    /// to override index used during clustering
64
    Index* clustering_index = nullptr;
65
66
    /** The Inverted file takes a quantizer (an IndexBinary) on input,
67
     * which implements the function mapping a vector to a list
68
     * identifier. The pointer is borrowed: the quantizer should not
69
     * be deleted while the IndexBinaryIVF is in use.
70
     */
71
    IndexBinaryIVF(IndexBinary* quantizer, size_t d, size_t nlist);
72
73
    IndexBinaryIVF();
74
75
    ~IndexBinaryIVF() override;
76
77
    void reset() override;
78
79
    /// Trains the quantizer
80
    void train(idx_t n, const uint8_t* x) override;
81
82
    void add(idx_t n, const uint8_t* x) override;
83
84
    void add_with_ids(idx_t n, const uint8_t* x, const idx_t* xids) override;
85
86
    /** Implementation of vector addition where the vector assignments are
87
     * predefined.
88
     *
89
     * @param precomputed_idx    quantization indices for the input vectors
90
     * (size n)
91
     */
92
    void add_core(
93
            idx_t n,
94
            const uint8_t* x,
95
            const idx_t* xids,
96
            const idx_t* precomputed_idx);
97
98
    /** Search a set of vectors, that are pre-quantized by the IVF
99
     *  quantizer. Fill in the corresponding heaps with the query
100
     *  results. search() calls this.
101
     *
102
     * @param n      nb of vectors to query
103
     * @param x      query vectors, size nx * d
104
     * @param assign coarse quantization indices, size nx * nprobe
105
     * @param centroid_dis
106
     *               distances to coarse centroids, size nx * nprobe
107
     * @param distance
108
     *               output distances, size n * k
109
     * @param labels output labels, size n * k
110
     * @param store_pairs store inv list index + inv list offset
111
     *                     instead in upper/lower 32 bit of result,
112
     *                     instead of ids (used for reranking).
113
     * @param params used to override the object's search parameters
114
     */
115
    void search_preassigned(
116
            idx_t n,
117
            const uint8_t* x,
118
            idx_t k,
119
            const idx_t* assign,
120
            const int32_t* centroid_dis,
121
            int32_t* distances,
122
            idx_t* labels,
123
            bool store_pairs,
124
            const IVFSearchParameters* params = nullptr) const;
125
126
    virtual BinaryInvertedListScanner* get_InvertedListScanner(
127
            bool store_pairs = false) const;
128
129
    /** assign the vectors, then call search_preassign */
130
    void search(
131
            idx_t n,
132
            const uint8_t* x,
133
            idx_t k,
134
            int32_t* distances,
135
            idx_t* labels,
136
            const SearchParameters* params = nullptr) const override;
137
138
    void range_search(
139
            idx_t n,
140
            const uint8_t* x,
141
            int radius,
142
            RangeSearchResult* result,
143
            const SearchParameters* params = nullptr) const override;
144
145
    void range_search_preassigned(
146
            idx_t n,
147
            const uint8_t* x,
148
            int radius,
149
            const idx_t* assign,
150
            const int32_t* centroid_dis,
151
            RangeSearchResult* result) const;
152
153
    void reconstruct(idx_t key, uint8_t* recons) const override;
154
155
    /** Reconstruct a subset of the indexed vectors.
156
     *
157
     * Overrides default implementation to bypass reconstruct() which requires
158
     * direct_map to be maintained.
159
     *
160
     * @param i0     first vector to reconstruct
161
     * @param ni     nb of vectors to reconstruct
162
     * @param recons output array of reconstructed vectors, size ni * d / 8
163
     */
164
    void reconstruct_n(idx_t i0, idx_t ni, uint8_t* recons) const override;
165
166
    /** Similar to search, but also reconstructs the stored vectors (or an
167
     * approximation in the case of lossy coding) for the search results.
168
     *
169
     * Overrides default implementation to avoid having to maintain direct_map
170
     * and instead fetch the code offsets through the `store_pairs` flag in
171
     * search_preassigned().
172
     *
173
     * @param recons      reconstructed vectors size (n, k, d / 8)
174
     */
175
    void search_and_reconstruct(
176
            idx_t n,
177
            const uint8_t* x,
178
            idx_t k,
179
            int32_t* distances,
180
            idx_t* labels,
181
            uint8_t* recons,
182
            const SearchParameters* params = nullptr) const override;
183
184
    /** Reconstruct a vector given the location in terms of (inv list index +
185
     * inv list offset) instead of the id.
186
     *
187
     * Useful for reconstructing when the direct_map is not maintained and
188
     * the inv list offset is computed by search_preassigned() with
189
     * `store_pairs` set.
190
     */
191
    virtual void reconstruct_from_offset(
192
            idx_t list_no,
193
            idx_t offset,
194
            uint8_t* recons) const;
195
196
    /// Dataset manipulation functions
197
    size_t remove_ids(const IDSelector& sel) override;
198
199
    void merge_from(IndexBinary& other, idx_t add_id) override;
200
201
    void check_compatible_for_merge(
202
            const IndexBinary& otherIndex) const override;
203
204
0
    size_t get_list_size(size_t list_no) const {
205
0
        return invlists->list_size(list_no);
206
0
    }
207
208
    /** initialize a direct map
209
     *
210
     * @param new_maintain_direct_map    if true, create a direct map,
211
     *                                   else clear it
212
     */
213
    void make_direct_map(bool new_maintain_direct_map = true);
214
215
    void set_direct_map_type(DirectMap::Type type);
216
217
    void replace_invlists(InvertedLists* il, bool own = false);
218
};
219
220
struct BinaryInvertedListScanner {
221
    /// from now on we handle this query.
222
    virtual void set_query(const uint8_t* query_vector) = 0;
223
224
    /// following codes come from this inverted list
225
    virtual void set_list(idx_t list_no, uint8_t coarse_dis) = 0;
226
227
    /// compute a single query-to-code distance
228
    virtual uint32_t distance_to_code(const uint8_t* code) const = 0;
229
230
    /** compute the distances to codes. (distances, labels) should be
231
     * organized as a min- or max-heap
232
     *
233
     * @param n      number of codes to scan
234
     * @param codes  codes to scan (n * code_size)
235
     * @param ids        corresponding ids (ignored if store_pairs)
236
     * @param distances  heap distances (size k)
237
     * @param labels     heap labels (size k)
238
     * @param k          heap size
239
     */
240
    virtual size_t scan_codes(
241
            size_t n,
242
            const uint8_t* codes,
243
            const idx_t* ids,
244
            int32_t* distances,
245
            idx_t* labels,
246
            size_t k) const = 0;
247
248
    virtual void scan_codes_range(
249
            size_t n,
250
            const uint8_t* codes,
251
            const idx_t* ids,
252
            int radius,
253
            RangeQueryResult& result) const = 0;
254
255
0
    virtual ~BinaryInvertedListScanner() {}
256
};
257
258
} // namespace faiss
259
260
#endif // FAISS_INDEX_BINARY_IVF_H