/root/doris/contrib/faiss/faiss/Index.h
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) Meta Platforms, Inc. and affiliates. |
3 | | * |
4 | | * This source code is licensed under the MIT license found in the |
5 | | * LICENSE file in the root directory of this source tree. |
6 | | */ |
7 | | |
8 | | // -*- c++ -*- |
9 | | |
10 | | #ifndef FAISS_INDEX_H |
11 | | #define FAISS_INDEX_H |
12 | | |
13 | | #include <faiss/MetricType.h> |
14 | | #include <cstdio> |
15 | | #include <sstream> |
16 | | #include <string> |
17 | | #include <typeinfo> |
18 | | |
19 | | #define FAISS_VERSION_MAJOR 1 |
20 | | #define FAISS_VERSION_MINOR 11 |
21 | | #define FAISS_VERSION_PATCH 0 |
22 | | |
23 | | // Macro to combine the version components into a single string |
24 | | #ifndef FAISS_STRINGIFY |
25 | 0 | #define FAISS_STRINGIFY(ARG) #ARG |
26 | | #endif |
27 | | #ifndef FAISS_TOSTRING |
28 | 0 | #define FAISS_TOSTRING(ARG) FAISS_STRINGIFY(ARG) |
29 | | #endif |
30 | | #define VERSION_STRING \ |
31 | 0 | FAISS_TOSTRING(FAISS_VERSION_MAJOR) \ |
32 | 0 | "." FAISS_TOSTRING(FAISS_VERSION_MINOR) "." FAISS_TOSTRING( \ |
33 | 0 | FAISS_VERSION_PATCH) |
34 | | |
35 | | /** |
36 | | * @namespace faiss |
37 | | * |
38 | | * Throughout the library, vectors are provided as float * pointers. |
39 | | * Most algorithms can be optimized when several vectors are processed |
40 | | * (added/searched) together in a batch. In this case, they are passed |
41 | | * in as a matrix. When n vectors of size d are provided as float * x, |
42 | | * component j of vector i is |
43 | | * |
44 | | * x[ i * d + j ] |
45 | | * |
46 | | * where 0 <= i < n and 0 <= j < d. In other words, matrices are |
47 | | * always compact. When specifying the size of the matrix, we call it |
48 | | * an n*d matrix, which implies a row-major storage. |
49 | | */ |
50 | | |
51 | | namespace faiss { |
52 | | |
53 | | /// Forward declarations see impl/AuxIndexStructures.h, impl/IDSelector.h |
54 | | /// and impl/DistanceComputer.h |
55 | | struct IDSelector; |
56 | | struct RangeSearchResult; |
57 | | struct DistanceComputer; |
58 | | |
59 | | /** Parent class for the optional search paramenters. |
60 | | * |
61 | | * Sub-classes with additional search parameters should inherit this class. |
62 | | * Ownership of the object fields is always to the caller. |
63 | | */ |
64 | | struct SearchParameters { |
65 | | /// if non-null, only these IDs will be considered during search. |
66 | | IDSelector* sel = nullptr; |
67 | | /// make sure we can dynamic_cast this |
68 | 126 | virtual ~SearchParameters() {} |
69 | | }; |
70 | | |
71 | | /** Abstract structure for an index, supports adding vectors and searching |
72 | | * them. |
73 | | * |
74 | | * All vectors provided at add or search time are 32-bit float arrays, |
75 | | * although the internal representation may vary. |
76 | | */ |
77 | | struct Index { |
78 | | using component_t = float; |
79 | | using distance_t = float; |
80 | | |
81 | | int d; ///< vector dimension |
82 | | idx_t ntotal; ///< total nb of indexed vectors |
83 | | bool verbose; ///< verbosity level |
84 | | |
85 | | /// set if the Index does not require training, or if training is |
86 | | /// done already |
87 | | bool is_trained; |
88 | | |
89 | | /// type of metric this index uses for search |
90 | | MetricType metric_type; |
91 | | float metric_arg; ///< argument of the metric type |
92 | | |
93 | | explicit Index(idx_t d = 0, MetricType metric = METRIC_L2) |
94 | 374 | : d(d), |
95 | 374 | ntotal(0), |
96 | 374 | verbose(false), |
97 | 374 | is_trained(true), |
98 | 374 | metric_type(metric), |
99 | 374 | metric_arg(0) {} |
100 | | |
101 | | virtual ~Index(); |
102 | | |
103 | | /** Perform training on a representative set of vectors |
104 | | * |
105 | | * @param n nb of training vectors |
106 | | * @param x training vecors, size n * d |
107 | | */ |
108 | | virtual void train(idx_t n, const float* x); |
109 | | |
110 | | /** Add n vectors of dimension d to the index. |
111 | | * |
112 | | * Vectors are implicitly assigned labels ntotal .. ntotal + n - 1 |
113 | | * This function slices the input vectors in chunks smaller than |
114 | | * blocksize_add and calls add_core. |
115 | | * @param n number of vectors |
116 | | * @param x input matrix, size n * d |
117 | | */ |
118 | | virtual void add(idx_t n, const float* x) = 0; |
119 | | |
120 | | /** Same as add, but stores xids instead of sequential ids. |
121 | | * |
122 | | * The default implementation fails with an assertion, as it is |
123 | | * not supported by all indexes. |
124 | | * |
125 | | * @param n number of vectors |
126 | | * @param x input vectors, size n * d |
127 | | * @param xids if non-null, ids to store for the vectors (size n) |
128 | | */ |
129 | | virtual void add_with_ids(idx_t n, const float* x, const idx_t* xids); |
130 | | |
131 | | /** query n vectors of dimension d to the index. |
132 | | * |
133 | | * return at most k vectors. If there are not enough results for a |
134 | | * query, the result array is padded with -1s. |
135 | | * |
136 | | * @param n number of vectors |
137 | | * @param x input vectors to search, size n * d |
138 | | * @param k number of extracted vectors |
139 | | * @param distances output pairwise distances, size n*k |
140 | | * @param labels output labels of the NNs, size n*k |
141 | | */ |
142 | | virtual void search( |
143 | | idx_t n, |
144 | | const float* x, |
145 | | idx_t k, |
146 | | float* distances, |
147 | | idx_t* labels, |
148 | | const SearchParameters* params = nullptr) const = 0; |
149 | | |
150 | | /** query n vectors of dimension d to the index. |
151 | | * |
152 | | * return all vectors with distance < radius. Note that many |
153 | | * indexes do not implement the range_search (only the k-NN search |
154 | | * is mandatory). |
155 | | * |
156 | | * @param n number of vectors |
157 | | * @param x input vectors to search, size n * d |
158 | | * @param radius search radius |
159 | | * @param result result table |
160 | | */ |
161 | | virtual void range_search( |
162 | | idx_t n, |
163 | | const float* x, |
164 | | float radius, |
165 | | RangeSearchResult* result, |
166 | | const SearchParameters* params = nullptr) const; |
167 | | |
168 | | /** return the indexes of the k vectors closest to the query x. |
169 | | * |
170 | | * This function is identical as search but only return labels of |
171 | | * neighbors. |
172 | | * @param n number of vectors |
173 | | * @param x input vectors to search, size n * d |
174 | | * @param labels output labels of the NNs, size n*k |
175 | | * @param k number of nearest neighbours |
176 | | */ |
177 | | virtual void assign(idx_t n, const float* x, idx_t* labels, idx_t k = 1) |
178 | | const; |
179 | | |
180 | | /// removes all elements from the database. |
181 | | virtual void reset() = 0; |
182 | | |
183 | | /** removes IDs from the index. Not supported by all |
184 | | * indexes. Returns the number of elements removed. |
185 | | */ |
186 | | virtual size_t remove_ids(const IDSelector& sel); |
187 | | |
188 | | /** Reconstruct a stored vector (or an approximation if lossy coding) |
189 | | * |
190 | | * this function may not be defined for some indexes |
191 | | * @param key id of the vector to reconstruct |
192 | | * @param recons reconstucted vector (size d) |
193 | | */ |
194 | | virtual void reconstruct(idx_t key, float* recons) const; |
195 | | |
196 | | /** Reconstruct several stored vectors (or an approximation if lossy |
197 | | * coding) |
198 | | * |
199 | | * this function may not be defined for some indexes |
200 | | * @param n number of vectors to reconstruct |
201 | | * @param keys ids of the vectors to reconstruct (size n) |
202 | | * @param recons reconstucted vector (size n * d) |
203 | | */ |
204 | | virtual void reconstruct_batch(idx_t n, const idx_t* keys, float* recons) |
205 | | const; |
206 | | |
207 | | /** Reconstruct vectors i0 to i0 + ni - 1 |
208 | | * |
209 | | * this function may not be defined for some indexes |
210 | | * @param i0 index of the first vector in the sequence |
211 | | * @param ni number of vectors in the sequence |
212 | | * @param recons reconstucted vector (size ni * d) |
213 | | */ |
214 | | virtual void reconstruct_n(idx_t i0, idx_t ni, float* recons) const; |
215 | | |
216 | | /** Similar to search, but also reconstructs the stored vectors (or an |
217 | | * approximation in the case of lossy coding) for the search results. |
218 | | * |
219 | | * If there are not enough results for a query, the resulting arrays |
220 | | * is padded with -1s. |
221 | | * |
222 | | * @param n number of vectors |
223 | | * @param x input vectors to search, size n * d |
224 | | * @param k number of extracted vectors |
225 | | * @param distances output pairwise distances, size n*k |
226 | | * @param labels output labels of the NNs, size n*k |
227 | | * @param recons reconstructed vectors size (n, k, d) |
228 | | **/ |
229 | | virtual void search_and_reconstruct( |
230 | | idx_t n, |
231 | | const float* x, |
232 | | idx_t k, |
233 | | float* distances, |
234 | | idx_t* labels, |
235 | | float* recons, |
236 | | const SearchParameters* params = nullptr) const; |
237 | | |
238 | | /** Computes a residual vector after indexing encoding. |
239 | | * |
240 | | * The residual vector is the difference between a vector and the |
241 | | * reconstruction that can be decoded from its representation in |
242 | | * the index. The residual can be used for multiple-stage indexing |
243 | | * methods, like IndexIVF's methods. |
244 | | * |
245 | | * @param x input vector, size d |
246 | | * @param residual output residual vector, size d |
247 | | * @param key encoded index, as returned by search and assign |
248 | | */ |
249 | | virtual void compute_residual(const float* x, float* residual, idx_t key) |
250 | | const; |
251 | | |
252 | | /** Computes a residual vector after indexing encoding (batch form). |
253 | | * Equivalent to calling compute_residual for each vector. |
254 | | * |
255 | | * The residual vector is the difference between a vector and the |
256 | | * reconstruction that can be decoded from its representation in |
257 | | * the index. The residual can be used for multiple-stage indexing |
258 | | * methods, like IndexIVF's methods. |
259 | | * |
260 | | * @param n number of vectors |
261 | | * @param xs input vectors, size (n x d) |
262 | | * @param residuals output residual vectors, size (n x d) |
263 | | * @param keys encoded index, as returned by search and assign |
264 | | */ |
265 | | virtual void compute_residual_n( |
266 | | idx_t n, |
267 | | const float* xs, |
268 | | float* residuals, |
269 | | const idx_t* keys) const; |
270 | | |
271 | | /** Get a DistanceComputer (defined in AuxIndexStructures) object |
272 | | * for this kind of index. |
273 | | * |
274 | | * DistanceComputer is implemented for indexes that support random |
275 | | * access of their vectors. |
276 | | */ |
277 | | virtual DistanceComputer* get_distance_computer() const; |
278 | | |
279 | | /* The standalone codec interface */ |
280 | | |
281 | | /** size of the produced codes in bytes */ |
282 | | virtual size_t sa_code_size() const; |
283 | | |
284 | | /** encode a set of vectors |
285 | | * |
286 | | * @param n number of vectors |
287 | | * @param x input vectors, size n * d |
288 | | * @param bytes output encoded vectors, size n * sa_code_size() |
289 | | */ |
290 | | virtual void sa_encode(idx_t n, const float* x, uint8_t* bytes) const; |
291 | | |
292 | | /** decode a set of vectors |
293 | | * |
294 | | * @param n number of vectors |
295 | | * @param bytes input encoded vectors, size n * sa_code_size() |
296 | | * @param x output vectors, size n * d |
297 | | */ |
298 | | virtual void sa_decode(idx_t n, const uint8_t* bytes, float* x) const; |
299 | | |
300 | | /** moves the entries from another dataset to self. |
301 | | * On output, other is empty. |
302 | | * add_id is added to all moved ids |
303 | | * (for sequential ids, this would be this->ntotal) */ |
304 | | virtual void merge_from(Index& otherIndex, idx_t add_id = 0); |
305 | | |
306 | | /** check that the two indexes are compatible (ie, they are |
307 | | * trained in the same way and have the same |
308 | | * parameters). Otherwise throw. */ |
309 | | virtual void check_compatible_for_merge(const Index& otherIndex) const; |
310 | | |
311 | | /** Add vectors that are computed with the standalone codec |
312 | | * |
313 | | * @param codes codes to add size n * sa_code_size() |
314 | | * @param xids corresponding ids, size n |
315 | | */ |
316 | | virtual void add_sa_codes(idx_t n, const uint8_t* codes, const idx_t* xids); |
317 | | }; |
318 | | |
319 | | } // namespace faiss |
320 | | |
321 | | #endif |