/root/doris/contrib/faiss/faiss/Index.h

Source
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

// -*- c++ -*-

#ifndef FAISS_INDEX_H
#define FAISS_INDEX_H

#include <faiss/MetricType.h>
#include <cstdio>
#include <sstream>
#include <string>
#include <typeinfo>

#define FAISS_VERSION_MAJOR 1
#define FAISS_VERSION_MINOR 11
#define FAISS_VERSION_PATCH 0

// Macro to combine the version components into a single string
#ifndef FAISS_STRINGIFY
#define FAISS_STRINGIFY(ARG) #ARG
#endif
#ifndef FAISS_TOSTRING
#define FAISS_TOSTRING(ARG) FAISS_STRINGIFY(ARG)
#endif
#define VERSION_STRING                                          \
    FAISS_TOSTRING(FAISS_VERSION_MAJOR)                         \
    "." FAISS_TOSTRING(FAISS_VERSION_MINOR) "." FAISS_TOSTRING( \
            FAISS_VERSION_PATCH)

/**
 * @namespace faiss
 *
 * Throughout the library, vectors are provided as float * pointers.
 * Most algorithms can be optimized when several vectors are processed
 * (added/searched) together in a batch. In this case, they are passed
 * in as a matrix. When n vectors of size d are provided as float * x,
 * component j of vector i is
 *
 *   x[ i * d + j ]
 *
 * where 0 <= i < n and 0 <= j < d. In other words, matrices are
 * always compact. When specifying the size of the matrix, we call it
 * an n*d matrix, which implies a row-major storage.
 */

namespace faiss {

/// Forward declarations see impl/AuxIndexStructures.h, impl/IDSelector.h
/// and impl/DistanceComputer.h
struct IDSelector;
struct RangeSearchResult;
struct DistanceComputer;

/** Parent class for the optional search paramenters.
 *
 * Sub-classes with additional search parameters should inherit this class.
 * Ownership of the object fields is always to the caller.
 */
struct SearchParameters {
    /// if non-null, only these IDs will be considered during search.
    IDSelector* sel = nullptr;
    /// make sure we can dynamic_cast this
    virtual ~SearchParameters() {}
};

/** Abstract structure for an index, supports adding vectors and searching
 * them.
 *
 * All vectors provided at add or search time are 32-bit float arrays,
 * although the internal representation may vary.
 */
struct Index {
    using component_t = float;
    using distance_t = float;

    int d;        ///< vector dimension
    idx_t ntotal; ///< total nb of indexed vectors
    bool verbose; ///< verbosity level

    /// set if the Index does not require training, or if training is
    /// done already
    bool is_trained;

    /// type of metric this index uses for search
    MetricType metric_type;
    float metric_arg; ///< argument of the metric type

    explicit Index(idx_t d = 0, MetricType metric = METRIC_L2)
            : d(d),
              ntotal(0),
              verbose(false),
              is_trained(true),
              metric_type(metric),
              metric_arg(0) {}

    virtual ~Index();

    /** Perform training on a representative set of vectors
     *
     * @param n      nb of training vectors
     * @param x      training vecors, size n * d
     */
    virtual void train(idx_t n, const float* x);

    /** Add n vectors of dimension d to the index.
     *
     * Vectors are implicitly assigned labels ntotal .. ntotal + n - 1
     * This function slices the input vectors in chunks smaller than
     * blocksize_add and calls add_core.
     * @param n      number of vectors
     * @param x      input matrix, size n * d
     */
    virtual void add(idx_t n, const float* x) = 0;

    /** Same as add, but stores xids instead of sequential ids.
     *
     * The default implementation fails with an assertion, as it is
     * not supported by all indexes.
     *
     * @param n         number of vectors
     * @param x         input vectors, size n * d
     * @param xids      if non-null, ids to store for the vectors (size n)
     */
    virtual void add_with_ids(idx_t n, const float* x, const idx_t* xids);

    /** query n vectors of dimension d to the index.
     *
     * return at most k vectors. If there are not enough results for a
     * query, the result array is padded with -1s.
     *
     * @param n           number of vectors
     * @param x           input vectors to search, size n * d
     * @param k           number of extracted vectors
     * @param distances   output pairwise distances, size n*k
     * @param labels      output labels of the NNs, size n*k
     */
    virtual void search(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels,
            const SearchParameters* params = nullptr) const = 0;

    /** query n vectors of dimension d to the index.
     *
     * return all vectors with distance < radius. Note that many
     * indexes do not implement the range_search (only the k-NN search
     * is mandatory).
     *
     * @param n           number of vectors
     * @param x           input vectors to search, size n * d
     * @param radius      search radius
     * @param result      result table
     */
    virtual void range_search(
            idx_t n,
            const float* x,
            float radius,
            RangeSearchResult* result,
            const SearchParameters* params = nullptr) const;

    /** return the indexes of the k vectors closest to the query x.
     *
     * This function is identical as search but only return labels of
     * neighbors.
     * @param n           number of vectors
     * @param x           input vectors to search, size n * d
     * @param labels      output labels of the NNs, size n*k
     * @param k           number of nearest neighbours
     */
    virtual void assign(idx_t n, const float* x, idx_t* labels, idx_t k = 1)
            const;

    /// removes all elements from the database.
    virtual void reset() = 0;

    /** removes IDs from the index. Not supported by all
     * indexes. Returns the number of elements removed.
     */
    virtual size_t remove_ids(const IDSelector& sel);

    /** Reconstruct a stored vector (or an approximation if lossy coding)
     *
     * this function may not be defined for some indexes
     * @param key         id of the vector to reconstruct
     * @param recons      reconstucted vector (size d)
     */
    virtual void reconstruct(idx_t key, float* recons) const;

    /** Reconstruct several stored vectors (or an approximation if lossy
     * coding)
     *
     * this function may not be defined for some indexes
     * @param n           number of vectors to reconstruct
     * @param keys        ids of the vectors to reconstruct (size n)
     * @param recons      reconstucted vector (size n * d)
     */
    virtual void reconstruct_batch(idx_t n, const idx_t* keys, float* recons)
            const;

    /** Reconstruct vectors i0 to i0 + ni - 1
     *
     * this function may not be defined for some indexes
     * @param i0          index of the first vector in the sequence
     * @param ni          number of vectors in the sequence
     * @param recons      reconstucted vector (size ni * d)
     */
    virtual void reconstruct_n(idx_t i0, idx_t ni, float* recons) const;

    /** Similar to search, but also reconstructs the stored vectors (or an
     * approximation in the case of lossy coding) for the search results.
     *
     * If there are not enough results for a query, the resulting arrays
     * is padded with -1s.
     *
     * @param n           number of vectors
     * @param x           input vectors to search, size n * d
     * @param k           number of extracted vectors
     * @param distances   output pairwise distances, size n*k
     * @param labels      output labels of the NNs, size n*k
     * @param recons      reconstructed vectors size (n, k, d)
     **/
    virtual void search_and_reconstruct(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels,
            float* recons,
            const SearchParameters* params = nullptr) const;

    /** Computes a residual vector after indexing encoding.
     *
     * The residual vector is the difference between a vector and the
     * reconstruction that can be decoded from its representation in
     * the index. The residual can be used for multiple-stage indexing
     * methods, like IndexIVF's methods.
     *
     * @param x           input vector, size d
     * @param residual    output residual vector, size d
     * @param key         encoded index, as returned by search and assign
     */
    virtual void compute_residual(const float* x, float* residual, idx_t key)
            const;

    /** Computes a residual vector after indexing encoding (batch form).
     * Equivalent to calling compute_residual for each vector.
     *
     * The residual vector is the difference between a vector and the
     * reconstruction that can be decoded from its representation in
     * the index. The residual can be used for multiple-stage indexing
     * methods, like IndexIVF's methods.
     *
     * @param n           number of vectors
     * @param xs          input vectors, size (n x d)
     * @param residuals   output residual vectors, size (n x d)
     * @param keys        encoded index, as returned by search and assign
     */
    virtual void compute_residual_n(
            idx_t n,
            const float* xs,
            float* residuals,
            const idx_t* keys) const;

    /** Get a DistanceComputer (defined in AuxIndexStructures) object
     * for this kind of index.
     *
     * DistanceComputer is implemented for indexes that support random
     * access of their vectors.
     */
    virtual DistanceComputer* get_distance_computer() const;

    /* The standalone codec interface */

    /** size of the produced codes in bytes */
    virtual size_t sa_code_size() const;

    /** encode a set of vectors
     *
     * @param n       number of vectors
     * @param x       input vectors, size n * d
     * @param bytes   output encoded vectors, size n * sa_code_size()
     */
    virtual void sa_encode(idx_t n, const float* x, uint8_t* bytes) const;

    /** decode a set of vectors
     *
     * @param n       number of vectors
     * @param bytes   input encoded vectors, size n * sa_code_size()
     * @param x       output vectors, size n * d
     */
    virtual void sa_decode(idx_t n, const uint8_t* bytes, float* x) const;

    /** moves the entries from another dataset to self.
     * On output, other is empty.
     * add_id is added to all moved ids
     * (for sequential ids, this would be this->ntotal) */
    virtual void merge_from(Index& otherIndex, idx_t add_id = 0);

    /** check that the two indexes are compatible (ie, they are
     * trained in the same way and have the same
     * parameters). Otherwise throw. */
    virtual void check_compatible_for_merge(const Index& otherIndex) const;

    /** Add vectors that are computed with the standalone codec
     *
     * @param codes  codes to add size n * sa_code_size()
     * @param xids   corresponding ids, size n
     */
    virtual void add_sa_codes(idx_t n, const uint8_t* codes, const idx_t* xids);
};

} // namespace faiss

#endif

Coverage Report

Created: 2025-09-12 10:59

Line	Count	Source
1		/*
2		* Copyright (c) Meta Platforms, Inc. and affiliates.
3		*
4		* This source code is licensed under the MIT license found in the
5		* LICENSE file in the root directory of this source tree.
6		*/
7
8		// -- c++ --
9
10		#ifndef FAISS_INDEX_H
11		#define FAISS_INDEX_H
12
13		#include <faiss/MetricType.h>
14		#include <cstdio>
15		#include <sstream>
16		#include <string>
17		#include <typeinfo>
18
19		#define FAISS_VERSION_MAJOR 1
20		#define FAISS_VERSION_MINOR 11
21		#define FAISS_VERSION_PATCH 0
22
23		// Macro to combine the version components into a single string
24		#ifndef FAISS_STRINGIFY
25	0	#define FAISS_STRINGIFY(ARG) #ARG
26		#endif
27		#ifndef FAISS_TOSTRING
28	0	#define FAISS_TOSTRING(ARG) FAISS_STRINGIFY(ARG)
29		#endif
30		#define VERSION_STRING \
31	0	FAISS_TOSTRING(FAISS_VERSION_MAJOR) \
32	0	"." FAISS_TOSTRING(FAISS_VERSION_MINOR) "." FAISS_TOSTRING( \
33	0	FAISS_VERSION_PATCH)
34
35		/**
36		* @namespace faiss
37		*
38		* Throughout the library, vectors are provided as float * pointers.
39		* Most algorithms can be optimized when several vectors are processed
40		* (added/searched) together in a batch. In this case, they are passed
41		* in as a matrix. When n vectors of size d are provided as float * x,
42		* component j of vector i is
43		*
44		* x[ i * d + j ]
45		*
46		* where 0 <= i < n and 0 <= j < d. In other words, matrices are
47		* always compact. When specifying the size of the matrix, we call it
48		* an n*d matrix, which implies a row-major storage.
49		*/
50
51		namespace faiss {
52
53		/// Forward declarations see impl/AuxIndexStructures.h, impl/IDSelector.h
54		/// and impl/DistanceComputer.h
55		struct IDSelector;
56		struct RangeSearchResult;
57		struct DistanceComputer;
58
59		/** Parent class for the optional search paramenters.
60		*
61		* Sub-classes with additional search parameters should inherit this class.
62		* Ownership of the object fields is always to the caller.
63		*/
64		struct SearchParameters {
65		/// if non-null, only these IDs will be considered during search.
66		IDSelector* sel = nullptr;
67		/// make sure we can dynamic_cast this
68	126	virtual ~SearchParameters() {}
69		};
70
71		/** Abstract structure for an index, supports adding vectors and searching
72		* them.
73		*
74		* All vectors provided at add or search time are 32-bit float arrays,
75		* although the internal representation may vary.
76		*/
77		struct Index {
78		using component_t = float;
79		using distance_t = float;
80
81		int d; ///< vector dimension
82		idx_t ntotal; ///< total nb of indexed vectors
83		bool verbose; ///< verbosity level
84
85		/// set if the Index does not require training, or if training is
86		/// done already
87		bool is_trained;
88
89		/// type of metric this index uses for search
90		MetricType metric_type;
91		float metric_arg; ///< argument of the metric type
92
93		explicit Index(idx_t d = 0, MetricType metric = METRIC_L2)
94	374	: d(d),
95	374	ntotal(0),
96	374	verbose(false),
97	374	is_trained(true),
98	374	metric_type(metric),
99	374	metric_arg(0) {}
100
101		virtual ~Index();
102
103		/** Perform training on a representative set of vectors
104		*
105		* @param n nb of training vectors
106		* @param x training vecors, size n * d
107		*/
108		virtual void train(idx_t n, const float* x);
109
110		/** Add n vectors of dimension d to the index.
111		*
112		* Vectors are implicitly assigned labels ntotal .. ntotal + n - 1
113		* This function slices the input vectors in chunks smaller than
114		* blocksize_add and calls add_core.
115		* @param n number of vectors
116		* @param x input matrix, size n * d
117		*/
118		virtual void add(idx_t n, const float* x) = 0;
119
120		/** Same as add, but stores xids instead of sequential ids.
121		*
122		* The default implementation fails with an assertion, as it is
123		* not supported by all indexes.
124		*
125		* @param n number of vectors
126		* @param x input vectors, size n * d
127		* @param xids if non-null, ids to store for the vectors (size n)
128		*/
129		virtual void add_with_ids(idx_t n, const float* x, const idx_t* xids);
130
131		/** query n vectors of dimension d to the index.
132		*
133		* return at most k vectors. If there are not enough results for a
134		* query, the result array is padded with -1s.
135		*
136		* @param n number of vectors
137		* @param x input vectors to search, size n * d
138		* @param k number of extracted vectors
139		* @param distances output pairwise distances, size n*k
140		* @param labels output labels of the NNs, size n*k
141		*/
142		virtual void search(
143		idx_t n,
144		const float* x,
145		idx_t k,
146		float* distances,
147		idx_t* labels,
148		const SearchParameters* params = nullptr) const = 0;
149
150		/** query n vectors of dimension d to the index.
151		*
152		* return all vectors with distance < radius. Note that many
153		* indexes do not implement the range_search (only the k-NN search
154		* is mandatory).
155		*
156		* @param n number of vectors
157		* @param x input vectors to search, size n * d
158		* @param radius search radius
159		* @param result result table
160		*/
161		virtual void range_search(
162		idx_t n,
163		const float* x,
164		float radius,
165		RangeSearchResult* result,
166		const SearchParameters* params = nullptr) const;
167
168		/** return the indexes of the k vectors closest to the query x.
169		*
170		* This function is identical as search but only return labels of
171		* neighbors.
172		* @param n number of vectors
173		* @param x input vectors to search, size n * d
174		* @param labels output labels of the NNs, size n*k
175		* @param k number of nearest neighbours
176		*/
177		virtual void assign(idx_t n, const float* x, idx_t* labels, idx_t k = 1)
178		const;
179
180		/// removes all elements from the database.
181		virtual void reset() = 0;
182
183		/** removes IDs from the index. Not supported by all
184		* indexes. Returns the number of elements removed.
185		*/
186		virtual size_t remove_ids(const IDSelector& sel);
187
188		/** Reconstruct a stored vector (or an approximation if lossy coding)
189		*
190		* this function may not be defined for some indexes
191		* @param key id of the vector to reconstruct
192		* @param recons reconstucted vector (size d)
193		*/
194		virtual void reconstruct(idx_t key, float* recons) const;
195
196		/** Reconstruct several stored vectors (or an approximation if lossy
197		* coding)
198		*
199		* this function may not be defined for some indexes
200		* @param n number of vectors to reconstruct
201		* @param keys ids of the vectors to reconstruct (size n)
202		* @param recons reconstucted vector (size n * d)
203		*/
204		virtual void reconstruct_batch(idx_t n, const idx_t* keys, float* recons)
205		const;
206
207		/** Reconstruct vectors i0 to i0 + ni - 1
208		*
209		* this function may not be defined for some indexes
210		* @param i0 index of the first vector in the sequence
211		* @param ni number of vectors in the sequence
212		* @param recons reconstucted vector (size ni * d)
213		*/
214		virtual void reconstruct_n(idx_t i0, idx_t ni, float* recons) const;
215
216		/** Similar to search, but also reconstructs the stored vectors (or an
217		* approximation in the case of lossy coding) for the search results.
218		*
219		* If there are not enough results for a query, the resulting arrays
220		* is padded with -1s.
221		*
222		* @param n number of vectors
223		* @param x input vectors to search, size n * d
224		* @param k number of extracted vectors
225		* @param distances output pairwise distances, size n*k
226		* @param labels output labels of the NNs, size n*k
227		* @param recons reconstructed vectors size (n, k, d)
228		**/
229		virtual void search_and_reconstruct(
230		idx_t n,
231		const float* x,
232		idx_t k,
233		float* distances,
234		idx_t* labels,
235		float* recons,
236		const SearchParameters* params = nullptr) const;
237
238		/** Computes a residual vector after indexing encoding.
239		*
240		* The residual vector is the difference between a vector and the
241		* reconstruction that can be decoded from its representation in
242		* the index. The residual can be used for multiple-stage indexing
243		* methods, like IndexIVF's methods.
244		*
245		* @param x input vector, size d
246		* @param residual output residual vector, size d
247		* @param key encoded index, as returned by search and assign
248		*/
249		virtual void compute_residual(const float* x, float* residual, idx_t key)
250		const;
251
252		/** Computes a residual vector after indexing encoding (batch form).
253		* Equivalent to calling compute_residual for each vector.
254		*
255		* The residual vector is the difference between a vector and the
256		* reconstruction that can be decoded from its representation in
257		* the index. The residual can be used for multiple-stage indexing
258		* methods, like IndexIVF's methods.
259		*
260		* @param n number of vectors
261		* @param xs input vectors, size (n x d)
262		* @param residuals output residual vectors, size (n x d)
263		* @param keys encoded index, as returned by search and assign
264		*/
265		virtual void compute_residual_n(
266		idx_t n,
267		const float* xs,
268		float* residuals,
269		const idx_t* keys) const;
270
271		/** Get a DistanceComputer (defined in AuxIndexStructures) object
272		* for this kind of index.
273		*
274		* DistanceComputer is implemented for indexes that support random
275		* access of their vectors.
276		*/
277		virtual DistanceComputer* get_distance_computer() const;
278
279		/* The standalone codec interface */
280
281		/** size of the produced codes in bytes */
282		virtual size_t sa_code_size() const;
283
284		/** encode a set of vectors
285		*
286		* @param n number of vectors
287		* @param x input vectors, size n * d
288		* @param bytes output encoded vectors, size n * sa_code_size()
289		*/
290		virtual void sa_encode(idx_t n, const float* x, uint8_t* bytes) const;
291
292		/** decode a set of vectors
293		*
294		* @param n number of vectors
295		* @param bytes input encoded vectors, size n * sa_code_size()
296		* @param x output vectors, size n * d
297		*/
298		virtual void sa_decode(idx_t n, const uint8_t* bytes, float* x) const;
299
300		/** moves the entries from another dataset to self.
301		* On output, other is empty.
302		* add_id is added to all moved ids
303		* (for sequential ids, this would be this->ntotal) */
304		virtual void merge_from(Index& otherIndex, idx_t add_id = 0);
305
306		/** check that the two indexes are compatible (ie, they are
307		* trained in the same way and have the same
308		* parameters). Otherwise throw. */
309		virtual void check_compatible_for_merge(const Index& otherIndex) const;
310
311		/** Add vectors that are computed with the standalone codec
312		*
313		* @param codes codes to add size n * sa_code_size()
314		* @param xids corresponding ids, size n
315		*/
316		virtual void add_sa_codes(idx_t n, const uint8_t* codes, const idx_t* xids);
317		};
318
319		} // namespace faiss
320
321		#endif