Coverage Report

Created: 2026-03-25 10:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
contrib/faiss/faiss/VectorTransform.h
Line
Count
Source
1
/*
2
 * Copyright (c) Meta Platforms, Inc. and affiliates.
3
 *
4
 * This source code is licensed under the MIT license found in the
5
 * LICENSE file in the root directory of this source tree.
6
 */
7
8
// -*- c++ -*-
9
10
#ifndef FAISS_VECTOR_TRANSFORM_H
11
#define FAISS_VECTOR_TRANSFORM_H
12
13
/** Defines a few objects that apply transformations to a set of
14
 * vectors Often these are pre-processing steps.
15
 */
16
17
#include <stdint.h>
18
#include <vector>
19
20
#include <faiss/Index.h>
21
22
namespace faiss {
23
24
/** Any transformation applied on a set of vectors */
25
struct VectorTransform {
26
    int d_in;  ///! input dimension
27
    int d_out; ///! output dimension
28
29
    explicit VectorTransform(int d_in = 0, int d_out = 0)
30
0
            : d_in(d_in), d_out(d_out), is_trained(true) {}
31
32
    /// set if the VectorTransform does not require training, or if
33
    /// training is done already
34
    bool is_trained;
35
36
    /** Perform training on a representative set of vectors. Does
37
     * nothing by default.
38
     *
39
     * @param n      nb of training vectors
40
     * @param x      training vecors, size n * d
41
     */
42
    virtual void train(idx_t n, const float* x);
43
44
    /** apply the transformation and return the result in an allocated pointer
45
     * @param     n number of vectors to transform
46
     * @param     x input vectors, size n * d_in
47
     * @return    output vectors, size n * d_out
48
     */
49
    float* apply(idx_t n, const float* x) const;
50
51
    /** apply the transformation and return the result in a provided matrix
52
     * @param     n number of vectors to transform
53
     * @param     x input vectors, size n * d_in
54
     * @param    xt output vectors, size n * d_out
55
     */
56
    virtual void apply_noalloc(idx_t n, const float* x, float* xt) const = 0;
57
58
    /// reverse transformation. May not be implemented or may return
59
    /// approximate result
60
    virtual void reverse_transform(idx_t n, const float* xt, float* x) const;
61
62
    // check that the two transforms are identical (to merge indexes)
63
    virtual void check_identical(const VectorTransform& other) const = 0;
64
65
0
    virtual ~VectorTransform() {}
66
};
67
68
/** Generic linear transformation, with bias term applied on output
69
 * y = A * x + b
70
 */
71
struct LinearTransform : VectorTransform {
72
    bool have_bias; ///! whether to use the bias term
73
74
    /// check if matrix A is orthonormal (enables reverse_transform)
75
    bool is_orthonormal;
76
77
    /// Transformation matrix, size d_out * d_in
78
    std::vector<float> A;
79
80
    /// bias vector, size d_out
81
    std::vector<float> b;
82
83
    /// both d_in > d_out and d_out < d_in are supported
84
    explicit LinearTransform(
85
            int d_in = 0,
86
            int d_out = 0,
87
            bool have_bias = false);
88
89
    /// same as apply, but result is pre-allocated
90
    void apply_noalloc(idx_t n, const float* x, float* xt) const override;
91
92
    /// compute x = A^T * (x - b)
93
    /// is reverse transform if A has orthonormal lines
94
    void transform_transpose(idx_t n, const float* y, float* x) const;
95
96
    /// works only if is_orthonormal
97
    void reverse_transform(idx_t n, const float* xt, float* x) const override;
98
99
    /// compute A^T * A to set the is_orthonormal flag
100
    void set_is_orthonormal();
101
102
    bool verbose;
103
    void print_if_verbose(
104
            const char* name,
105
            const std::vector<double>& mat,
106
            int n,
107
            int d) const;
108
109
    void check_identical(const VectorTransform& other) const override;
110
111
0
    ~LinearTransform() override {}
112
};
113
114
/// Randomly rotate a set of vectors
115
struct RandomRotationMatrix : LinearTransform {
116
    /// both d_in > d_out and d_out < d_in are supported
117
    RandomRotationMatrix(int d_in, int d_out)
118
0
            : LinearTransform(d_in, d_out, false) {}
119
120
    /// must be called before the transform is used
121
    void init(int seed);
122
123
    // initializes with an arbitrary seed
124
    void train(idx_t n, const float* x) override;
125
126
0
    RandomRotationMatrix() {}
127
};
128
129
/** Applies a principal component analysis on a set of vectors,
130
 *  with optionally whitening and random rotation. */
131
struct PCAMatrix : LinearTransform {
132
    /** after transformation the components are multiplied by
133
     * eigenvalues^eigen_power
134
     *
135
     * =0: no whitening
136
     * =-0.5: full whitening
137
     */
138
    float eigen_power;
139
140
    /// value added to eigenvalues to avoid division by 0 when whitening
141
    float epsilon;
142
143
    /// random rotation after PCA
144
    bool random_rotation;
145
146
    /// ratio between # training vectors and dimension
147
    size_t max_points_per_d;
148
149
    /// try to distribute output eigenvectors in this many bins
150
    int balanced_bins;
151
152
    /// Mean, size d_in
153
    std::vector<float> mean;
154
155
    /// eigenvalues of covariance matrix (= squared singular values)
156
    std::vector<float> eigenvalues;
157
158
    /// PCA matrix, size d_in * d_in
159
    std::vector<float> PCAMat;
160
161
    // the final matrix is computed after random rotation and/or whitening
162
    explicit PCAMatrix(
163
            int d_in = 0,
164
            int d_out = 0,
165
            float eigen_power = 0,
166
            bool random_rotation = false);
167
168
    /// train on n vectors. If n < d_in then the eigenvector matrix
169
    /// will be completed with 0s
170
    void train(idx_t n, const float* x) override;
171
172
    /// copy pre-trained PCA matrix
173
    void copy_from(const PCAMatrix& other);
174
175
    /// called after mean, PCAMat and eigenvalues are computed
176
    void prepare_Ab();
177
};
178
179
/** ITQ implementation from
180
 *
181
 *     Iterative quantization: A procrustean approach to learning binary codes
182
 *     for large-scale image retrieval,
183
 *
184
 * Yunchao Gong, Svetlana Lazebnik, Albert Gordo, Florent Perronnin,
185
 * PAMI'12.
186
 */
187
188
struct ITQMatrix : LinearTransform {
189
    int max_iter;
190
    int seed;
191
192
    // force initialization of the rotation (for debugging)
193
    std::vector<double> init_rotation;
194
195
    explicit ITQMatrix(int d = 0);
196
197
    void train(idx_t n, const float* x) override;
198
};
199
200
/** The full ITQ transform, including normalizations and PCA transformation
201
 */
202
struct ITQTransform : VectorTransform {
203
    std::vector<float> mean;
204
    bool do_pca;
205
    ITQMatrix itq;
206
207
    /// max training points per dimension
208
    int max_train_per_dim;
209
210
    // concatenation of PCA + ITQ transformation
211
    LinearTransform pca_then_itq;
212
213
    explicit ITQTransform(int d_in = 0, int d_out = 0, bool do_pca = false);
214
215
    void train(idx_t n, const float* x) override;
216
217
    void apply_noalloc(idx_t n, const float* x, float* xt) const override;
218
219
    void check_identical(const VectorTransform& other) const override;
220
};
221
222
struct ProductQuantizer;
223
224
/** Applies a rotation to align the dimensions with a PQ to minimize
225
 *  the reconstruction error. Can be used before an IndexPQ or an
226
 *  IndexIVFPQ. The method is the non-parametric version described in:
227
 *
228
 * "Optimized Product Quantization for Approximate Nearest Neighbor Search"
229
 * Tiezheng Ge, Kaiming He, Qifa Ke, Jian Sun, CVPR'13
230
 *
231
 */
232
struct OPQMatrix : LinearTransform {
233
    int M;               ///< nb of subquantizers
234
    int niter = 50;      ///< Number of outer training iterations
235
    int niter_pq = 4;    ///< Number of training iterations for the PQ
236
    int niter_pq_0 = 40; ///< same, for the first outer iteration
237
238
    /// if there are too many training points, resample
239
    size_t max_train_points = 256 * 256;
240
    bool verbose = false;
241
242
    /// if non-NULL, use this product quantizer for training
243
    /// should be constructed with (d_out, M, _)
244
    ProductQuantizer* pq = nullptr;
245
246
    /// if d2 != -1, output vectors of this dimension
247
    explicit OPQMatrix(int d = 0, int M = 1, int d2 = -1);
248
249
    void train(idx_t n, const float* x) override;
250
};
251
252
/** remap dimensions for intput vectors, possibly inserting 0s
253
 * strictly speaking this is also a linear transform but we don't want
254
 * to compute it with matrix multiplies */
255
struct RemapDimensionsTransform : VectorTransform {
256
    /// map from output dimension to input, size d_out
257
    /// -1 -> set output to 0
258
    std::vector<int> map;
259
260
    RemapDimensionsTransform(int d_in, int d_out, const int* map);
261
262
    /// remap input to output, skipping or inserting dimensions as needed
263
    /// if uniform: distribute dimensions uniformly
264
    /// otherwise just take the d_out first ones.
265
    RemapDimensionsTransform(int d_in, int d_out, bool uniform = true);
266
267
    void apply_noalloc(idx_t n, const float* x, float* xt) const override;
268
269
    /// reverse transform correct only when the mapping is a permutation
270
    void reverse_transform(idx_t n, const float* xt, float* x) const override;
271
272
0
    RemapDimensionsTransform() {}
273
274
    void check_identical(const VectorTransform& other) const override;
275
};
276
277
/** per-vector normalization */
278
struct NormalizationTransform : VectorTransform {
279
    float norm;
280
281
    explicit NormalizationTransform(int d, float norm = 2.0);
282
    NormalizationTransform();
283
284
    void apply_noalloc(idx_t n, const float* x, float* xt) const override;
285
286
    /// Identity transform since norm is not revertible
287
    void reverse_transform(idx_t n, const float* xt, float* x) const override;
288
289
    void check_identical(const VectorTransform& other) const override;
290
};
291
292
/** Subtract the mean of each component from the vectors. */
293
struct CenteringTransform : VectorTransform {
294
    /// Mean, size d_in = d_out
295
    std::vector<float> mean;
296
297
    explicit CenteringTransform(int d = 0);
298
299
    /// train on n vectors.
300
    void train(idx_t n, const float* x) override;
301
302
    /// subtract the mean
303
    void apply_noalloc(idx_t n, const float* x, float* xt) const override;
304
305
    /// add the mean
306
    void reverse_transform(idx_t n, const float* xt, float* x) const override;
307
308
    void check_identical(const VectorTransform& other) const override;
309
};
310
311
} // namespace faiss
312
313
#endif