Coverage Report

Created: 2025-09-12 10:59

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/contrib/faiss/faiss/Index.h
Line
Count
Source
1
/*
2
 * Copyright (c) Meta Platforms, Inc. and affiliates.
3
 *
4
 * This source code is licensed under the MIT license found in the
5
 * LICENSE file in the root directory of this source tree.
6
 */
7
8
// -*- c++ -*-
9
10
#ifndef FAISS_INDEX_H
11
#define FAISS_INDEX_H
12
13
#include <faiss/MetricType.h>
14
#include <cstdio>
15
#include <sstream>
16
#include <string>
17
#include <typeinfo>
18
19
#define FAISS_VERSION_MAJOR 1
20
#define FAISS_VERSION_MINOR 11
21
#define FAISS_VERSION_PATCH 0
22
23
// Macro to combine the version components into a single string
24
#ifndef FAISS_STRINGIFY
25
0
#define FAISS_STRINGIFY(ARG) #ARG
26
#endif
27
#ifndef FAISS_TOSTRING
28
0
#define FAISS_TOSTRING(ARG) FAISS_STRINGIFY(ARG)
29
#endif
30
#define VERSION_STRING                                          \
31
0
    FAISS_TOSTRING(FAISS_VERSION_MAJOR)                         \
32
0
    "." FAISS_TOSTRING(FAISS_VERSION_MINOR) "." FAISS_TOSTRING( \
33
0
            FAISS_VERSION_PATCH)
34
35
/**
36
 * @namespace faiss
37
 *
38
 * Throughout the library, vectors are provided as float * pointers.
39
 * Most algorithms can be optimized when several vectors are processed
40
 * (added/searched) together in a batch. In this case, they are passed
41
 * in as a matrix. When n vectors of size d are provided as float * x,
42
 * component j of vector i is
43
 *
44
 *   x[ i * d + j ]
45
 *
46
 * where 0 <= i < n and 0 <= j < d. In other words, matrices are
47
 * always compact. When specifying the size of the matrix, we call it
48
 * an n*d matrix, which implies a row-major storage.
49
 */
50
51
namespace faiss {
52
53
/// Forward declarations see impl/AuxIndexStructures.h, impl/IDSelector.h
54
/// and impl/DistanceComputer.h
55
struct IDSelector;
56
struct RangeSearchResult;
57
struct DistanceComputer;
58
59
/** Parent class for the optional search paramenters.
60
 *
61
 * Sub-classes with additional search parameters should inherit this class.
62
 * Ownership of the object fields is always to the caller.
63
 */
64
struct SearchParameters {
65
    /// if non-null, only these IDs will be considered during search.
66
    IDSelector* sel = nullptr;
67
    /// make sure we can dynamic_cast this
68
126
    virtual ~SearchParameters() {}
69
};
70
71
/** Abstract structure for an index, supports adding vectors and searching
72
 * them.
73
 *
74
 * All vectors provided at add or search time are 32-bit float arrays,
75
 * although the internal representation may vary.
76
 */
77
struct Index {
78
    using component_t = float;
79
    using distance_t = float;
80
81
    int d;        ///< vector dimension
82
    idx_t ntotal; ///< total nb of indexed vectors
83
    bool verbose; ///< verbosity level
84
85
    /// set if the Index does not require training, or if training is
86
    /// done already
87
    bool is_trained;
88
89
    /// type of metric this index uses for search
90
    MetricType metric_type;
91
    float metric_arg; ///< argument of the metric type
92
93
    explicit Index(idx_t d = 0, MetricType metric = METRIC_L2)
94
374
            : d(d),
95
374
              ntotal(0),
96
374
              verbose(false),
97
374
              is_trained(true),
98
374
              metric_type(metric),
99
374
              metric_arg(0) {}
100
101
    virtual ~Index();
102
103
    /** Perform training on a representative set of vectors
104
     *
105
     * @param n      nb of training vectors
106
     * @param x      training vecors, size n * d
107
     */
108
    virtual void train(idx_t n, const float* x);
109
110
    /** Add n vectors of dimension d to the index.
111
     *
112
     * Vectors are implicitly assigned labels ntotal .. ntotal + n - 1
113
     * This function slices the input vectors in chunks smaller than
114
     * blocksize_add and calls add_core.
115
     * @param n      number of vectors
116
     * @param x      input matrix, size n * d
117
     */
118
    virtual void add(idx_t n, const float* x) = 0;
119
120
    /** Same as add, but stores xids instead of sequential ids.
121
     *
122
     * The default implementation fails with an assertion, as it is
123
     * not supported by all indexes.
124
     *
125
     * @param n         number of vectors
126
     * @param x         input vectors, size n * d
127
     * @param xids      if non-null, ids to store for the vectors (size n)
128
     */
129
    virtual void add_with_ids(idx_t n, const float* x, const idx_t* xids);
130
131
    /** query n vectors of dimension d to the index.
132
     *
133
     * return at most k vectors. If there are not enough results for a
134
     * query, the result array is padded with -1s.
135
     *
136
     * @param n           number of vectors
137
     * @param x           input vectors to search, size n * d
138
     * @param k           number of extracted vectors
139
     * @param distances   output pairwise distances, size n*k
140
     * @param labels      output labels of the NNs, size n*k
141
     */
142
    virtual void search(
143
            idx_t n,
144
            const float* x,
145
            idx_t k,
146
            float* distances,
147
            idx_t* labels,
148
            const SearchParameters* params = nullptr) const = 0;
149
150
    /** query n vectors of dimension d to the index.
151
     *
152
     * return all vectors with distance < radius. Note that many
153
     * indexes do not implement the range_search (only the k-NN search
154
     * is mandatory).
155
     *
156
     * @param n           number of vectors
157
     * @param x           input vectors to search, size n * d
158
     * @param radius      search radius
159
     * @param result      result table
160
     */
161
    virtual void range_search(
162
            idx_t n,
163
            const float* x,
164
            float radius,
165
            RangeSearchResult* result,
166
            const SearchParameters* params = nullptr) const;
167
168
    /** return the indexes of the k vectors closest to the query x.
169
     *
170
     * This function is identical as search but only return labels of
171
     * neighbors.
172
     * @param n           number of vectors
173
     * @param x           input vectors to search, size n * d
174
     * @param labels      output labels of the NNs, size n*k
175
     * @param k           number of nearest neighbours
176
     */
177
    virtual void assign(idx_t n, const float* x, idx_t* labels, idx_t k = 1)
178
            const;
179
180
    /// removes all elements from the database.
181
    virtual void reset() = 0;
182
183
    /** removes IDs from the index. Not supported by all
184
     * indexes. Returns the number of elements removed.
185
     */
186
    virtual size_t remove_ids(const IDSelector& sel);
187
188
    /** Reconstruct a stored vector (or an approximation if lossy coding)
189
     *
190
     * this function may not be defined for some indexes
191
     * @param key         id of the vector to reconstruct
192
     * @param recons      reconstucted vector (size d)
193
     */
194
    virtual void reconstruct(idx_t key, float* recons) const;
195
196
    /** Reconstruct several stored vectors (or an approximation if lossy
197
     * coding)
198
     *
199
     * this function may not be defined for some indexes
200
     * @param n           number of vectors to reconstruct
201
     * @param keys        ids of the vectors to reconstruct (size n)
202
     * @param recons      reconstucted vector (size n * d)
203
     */
204
    virtual void reconstruct_batch(idx_t n, const idx_t* keys, float* recons)
205
            const;
206
207
    /** Reconstruct vectors i0 to i0 + ni - 1
208
     *
209
     * this function may not be defined for some indexes
210
     * @param i0          index of the first vector in the sequence
211
     * @param ni          number of vectors in the sequence
212
     * @param recons      reconstucted vector (size ni * d)
213
     */
214
    virtual void reconstruct_n(idx_t i0, idx_t ni, float* recons) const;
215
216
    /** Similar to search, but also reconstructs the stored vectors (or an
217
     * approximation in the case of lossy coding) for the search results.
218
     *
219
     * If there are not enough results for a query, the resulting arrays
220
     * is padded with -1s.
221
     *
222
     * @param n           number of vectors
223
     * @param x           input vectors to search, size n * d
224
     * @param k           number of extracted vectors
225
     * @param distances   output pairwise distances, size n*k
226
     * @param labels      output labels of the NNs, size n*k
227
     * @param recons      reconstructed vectors size (n, k, d)
228
     **/
229
    virtual void search_and_reconstruct(
230
            idx_t n,
231
            const float* x,
232
            idx_t k,
233
            float* distances,
234
            idx_t* labels,
235
            float* recons,
236
            const SearchParameters* params = nullptr) const;
237
238
    /** Computes a residual vector after indexing encoding.
239
     *
240
     * The residual vector is the difference between a vector and the
241
     * reconstruction that can be decoded from its representation in
242
     * the index. The residual can be used for multiple-stage indexing
243
     * methods, like IndexIVF's methods.
244
     *
245
     * @param x           input vector, size d
246
     * @param residual    output residual vector, size d
247
     * @param key         encoded index, as returned by search and assign
248
     */
249
    virtual void compute_residual(const float* x, float* residual, idx_t key)
250
            const;
251
252
    /** Computes a residual vector after indexing encoding (batch form).
253
     * Equivalent to calling compute_residual for each vector.
254
     *
255
     * The residual vector is the difference between a vector and the
256
     * reconstruction that can be decoded from its representation in
257
     * the index. The residual can be used for multiple-stage indexing
258
     * methods, like IndexIVF's methods.
259
     *
260
     * @param n           number of vectors
261
     * @param xs          input vectors, size (n x d)
262
     * @param residuals   output residual vectors, size (n x d)
263
     * @param keys        encoded index, as returned by search and assign
264
     */
265
    virtual void compute_residual_n(
266
            idx_t n,
267
            const float* xs,
268
            float* residuals,
269
            const idx_t* keys) const;
270
271
    /** Get a DistanceComputer (defined in AuxIndexStructures) object
272
     * for this kind of index.
273
     *
274
     * DistanceComputer is implemented for indexes that support random
275
     * access of their vectors.
276
     */
277
    virtual DistanceComputer* get_distance_computer() const;
278
279
    /* The standalone codec interface */
280
281
    /** size of the produced codes in bytes */
282
    virtual size_t sa_code_size() const;
283
284
    /** encode a set of vectors
285
     *
286
     * @param n       number of vectors
287
     * @param x       input vectors, size n * d
288
     * @param bytes   output encoded vectors, size n * sa_code_size()
289
     */
290
    virtual void sa_encode(idx_t n, const float* x, uint8_t* bytes) const;
291
292
    /** decode a set of vectors
293
     *
294
     * @param n       number of vectors
295
     * @param bytes   input encoded vectors, size n * sa_code_size()
296
     * @param x       output vectors, size n * d
297
     */
298
    virtual void sa_decode(idx_t n, const uint8_t* bytes, float* x) const;
299
300
    /** moves the entries from another dataset to self.
301
     * On output, other is empty.
302
     * add_id is added to all moved ids
303
     * (for sequential ids, this would be this->ntotal) */
304
    virtual void merge_from(Index& otherIndex, idx_t add_id = 0);
305
306
    /** check that the two indexes are compatible (ie, they are
307
     * trained in the same way and have the same
308
     * parameters). Otherwise throw. */
309
    virtual void check_compatible_for_merge(const Index& otherIndex) const;
310
311
    /** Add vectors that are computed with the standalone codec
312
     *
313
     * @param codes  codes to add size n * sa_code_size()
314
     * @param xids   corresponding ids, size n
315
     */
316
    virtual void add_sa_codes(idx_t n, const uint8_t* codes, const idx_t* xids);
317
};
318
319
} // namespace faiss
320
321
#endif