contrib/faiss/faiss/impl/lattice_Zn.h

Source
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

// -*- c++ -*-
#ifndef FAISS_LATTICE_ZN_H
#define FAISS_LATTICE_ZN_H

#include <stddef.h>
#include <stdint.h>
#include <vector>

namespace faiss {

/** returns the nearest vertex in the sphere to a query. Returns only
 * the coordinates, not an id.
 *
 * Algorithm: all points are derived from a one atom vector up to a
 * permutation and sign changes. The search function finds the most
 * appropriate atom and transformation.
 */
struct ZnSphereSearch {
    int dimS, r2;
    int natom;

    /// size dim * ntatom
    std::vector<float> voc;

    ZnSphereSearch(int dim, int r2);

    /// find nearest centroid. x does not need to be normalized
    float search(const float* x, float* c) const;

    /// full call. Requires externally-allocated temp space
    float search(
            const float* x,
            float* c,
            float* tmp,   // size 2 *dim
            int* tmp_int, // size dim
            int* ibest_out = nullptr) const;

    // multi-threaded
    void search_multi(int n, const float* x, float* c_out, float* dp_out);
};

/***************************************************************************
 * Support ids as well.
 *
 * Limitations: ids are limited to 64 bit
 ***************************************************************************/

struct EnumeratedVectors {
    /// size of the collection
    uint64_t nv;
    int dim;

    explicit EnumeratedVectors(int dim) : nv(0), dim(dim) {}

    /// encode a vector from a collection
    virtual uint64_t encode(const float* x) const = 0;

    /// decode it
    virtual void decode(uint64_t code, float* c) const = 0;

    // call encode on nc vectors
    void encode_multi(size_t nc, const float* c, uint64_t* codes) const;

    // call decode on nc codes
    void decode_multi(size_t nc, const uint64_t* codes, float* c) const;

    // find the nearest neighbor of each xq
    // (decodes and computes distances)
    void find_nn(
            size_t n,
            const uint64_t* codes,
            size_t nq,
            const float* xq,
            int64_t* idx,
            float* dis);

    virtual ~EnumeratedVectors() {}
};

struct Repeat {
    float val;
    int n;
};

/** Repeats: used to encode a vector that has n occurrences of
 *  val. Encodes the signs and permutation of the vector. Useful for
 *  atoms.
 */
struct Repeats {
    int dim;
    std::vector<Repeat> repeats;

    // initialize from a template of the atom.
    Repeats(int dim = 0, const float* c = nullptr);

    // count number of possible codes for this atom
    uint64_t count() const;

    uint64_t encode(const float* c) const;

    void decode(uint64_t code, float* c) const;
};

/** codec that can return ids for the encoded vectors
 *
 * uses the ZnSphereSearch to encode the vector by encoding the
 * permutation and signs. Depends on ZnSphereSearch because it uses
 * the atom numbers */
struct ZnSphereCodec : ZnSphereSearch, EnumeratedVectors {
    struct CodeSegment : Repeats {
        explicit CodeSegment(const Repeats& r) : Repeats(r) {}
        uint64_t c0; // first code assigned to segment
        int signbits;
    };

    std::vector<CodeSegment> code_segments;
    uint64_t nv;
    size_t code_size;

    ZnSphereCodec(int dim, int r2);

    uint64_t search_and_encode(const float* x) const;

    void decode(uint64_t code, float* c) const override;

    /// takes vectors that do not need to be centroids
    uint64_t encode(const float* x) const override;
};

/** recursive sphere codec
 *
 * Uses a recursive decomposition on the dimensions to encode
 * centroids found by the ZnSphereSearch. The codes are *not*
 * compatible with the ones of ZnSpehreCodec
 */
struct ZnSphereCodecRec : EnumeratedVectors {
    int r2;

    int log2_dim;
    int code_size;

    ZnSphereCodecRec(int dim, int r2);

    uint64_t encode_centroid(const float* c) const;

    void decode(uint64_t code, float* c) const override;

    /// vectors need to be centroids (does not work on arbitrary
    /// vectors)
    uint64_t encode(const float* x) const override;

    std::vector<uint64_t> all_nv;
    std::vector<uint64_t> all_nv_cum;

    int decode_cache_ld;
    std::vector<std::vector<float>> decode_cache;

    // nb of vectors in the sphere in dim 2^ld with r2 radius
    uint64_t get_nv(int ld, int r2a) const;

    // cumulative version
    uint64_t get_nv_cum(int ld, int r2t, int r2a) const;
    void set_nv_cum(int ld, int r2t, int r2a, uint64_t v);
};

/** Codec that uses the recursive codec if dim is a power of 2 and
 * the regular one otherwise */
struct ZnSphereCodecAlt : ZnSphereCodec {
    bool use_rec;
    ZnSphereCodecRec znc_rec;

    ZnSphereCodecAlt(int dim, int r2);

    uint64_t encode(const float* x) const override;

    void decode(uint64_t code, float* c) const override;
};

} // namespace faiss

#endif

Line	Count	Source
1		/*
2		* Copyright (c) Meta Platforms, Inc. and affiliates.
3		*
4		* This source code is licensed under the MIT license found in the
5		* LICENSE file in the root directory of this source tree.
6		*/
7
8		// -- c++ --
9		#ifndef FAISS_LATTICE_ZN_H
10		#define FAISS_LATTICE_ZN_H
11
12		#include <stddef.h>
13		#include <stdint.h>
14		#include <vector>
15
16		namespace faiss {
17
18		/** returns the nearest vertex in the sphere to a query. Returns only
19		* the coordinates, not an id.
20		*
21		* Algorithm: all points are derived from a one atom vector up to a
22		* permutation and sign changes. The search function finds the most
23		* appropriate atom and transformation.
24		*/
25		struct ZnSphereSearch {
26		int dimS, r2;
27		int natom;
28
29		/// size dim * ntatom
30		std::vector<float> voc;
31
32		ZnSphereSearch(int dim, int r2);
33
34		/// find nearest centroid. x does not need to be normalized
35		float search(const float* x, float* c) const;
36
37		/// full call. Requires externally-allocated temp space
38		float search(
39		const float* x,
40		float* c,
41		float* tmp, // size 2 *dim
42		int* tmp_int, // size dim
43		int* ibest_out = nullptr) const;
44
45		// multi-threaded
46		void search_multi(int n, const float* x, float* c_out, float* dp_out);
47		};
48
49		/***************************************************************************
50		* Support ids as well.
51		*
52		* Limitations: ids are limited to 64 bit
53		***************************************************************************/
54
55		struct EnumeratedVectors {
56		/// size of the collection
57		uint64_t nv;
58		int dim;
59
60	0	explicit EnumeratedVectors(int dim) : nv(0), dim(dim) {}
61
62		/// encode a vector from a collection
63		virtual uint64_t encode(const float* x) const = 0;
64
65		/// decode it
66		virtual void decode(uint64_t code, float* c) const = 0;
67
68		// call encode on nc vectors
69		void encode_multi(size_t nc, const float* c, uint64_t* codes) const;
70
71		// call decode on nc codes
72		void decode_multi(size_t nc, const uint64_t* codes, float* c) const;
73
74		// find the nearest neighbor of each xq
75		// (decodes and computes distances)
76		void find_nn(
77		size_t n,
78		const uint64_t* codes,
79		size_t nq,
80		const float* xq,
81		int64_t* idx,
82		float* dis);
83
84	0	virtual ~EnumeratedVectors() {}
85		};
86
87		struct Repeat {
88		float val;
89		int n;
90		};
91
92		/** Repeats: used to encode a vector that has n occurrences of
93		* val. Encodes the signs and permutation of the vector. Useful for
94		* atoms.
95		*/
96		struct Repeats {
97		int dim;
98		std::vector<Repeat> repeats;
99
100		// initialize from a template of the atom.
101		Repeats(int dim = 0, const float* c = nullptr);
102
103		// count number of possible codes for this atom
104		uint64_t count() const;
105
106		uint64_t encode(const float* c) const;
107
108		void decode(uint64_t code, float* c) const;
109		};
110
111		/** codec that can return ids for the encoded vectors
112		*
113		* uses the ZnSphereSearch to encode the vector by encoding the
114		* permutation and signs. Depends on ZnSphereSearch because it uses
115		* the atom numbers */
116		struct ZnSphereCodec : ZnSphereSearch, EnumeratedVectors {
117		struct CodeSegment : Repeats {
118	0	explicit CodeSegment(const Repeats& r) : Repeats(r) {}
119		uint64_t c0; // first code assigned to segment
120		int signbits;
121		};
122
123		std::vector<CodeSegment> code_segments;
124		uint64_t nv;
125		size_t code_size;
126
127		ZnSphereCodec(int dim, int r2);
128
129		uint64_t search_and_encode(const float* x) const;
130
131		void decode(uint64_t code, float* c) const override;
132
133		/// takes vectors that do not need to be centroids
134		uint64_t encode(const float* x) const override;
135		};
136
137		/** recursive sphere codec
138		*
139		* Uses a recursive decomposition on the dimensions to encode
140		* centroids found by the ZnSphereSearch. The codes are not
141		* compatible with the ones of ZnSpehreCodec
142		*/
143		struct ZnSphereCodecRec : EnumeratedVectors {
144		int r2;
145
146		int log2_dim;
147		int code_size;
148
149		ZnSphereCodecRec(int dim, int r2);
150
151		uint64_t encode_centroid(const float* c) const;
152
153		void decode(uint64_t code, float* c) const override;
154
155		/// vectors need to be centroids (does not work on arbitrary
156		/// vectors)
157		uint64_t encode(const float* x) const override;
158
159		std::vector<uint64_t> all_nv;
160		std::vector<uint64_t> all_nv_cum;
161
162		int decode_cache_ld;
163		std::vector<std::vector<float>> decode_cache;
164
165		// nb of vectors in the sphere in dim 2^ld with r2 radius
166		uint64_t get_nv(int ld, int r2a) const;
167
168		// cumulative version
169		uint64_t get_nv_cum(int ld, int r2t, int r2a) const;
170		void set_nv_cum(int ld, int r2t, int r2a, uint64_t v);
171		};
172
173		/** Codec that uses the recursive codec if dim is a power of 2 and
174		* the regular one otherwise */
175		struct ZnSphereCodecAlt : ZnSphereCodec {
176		bool use_rec;
177		ZnSphereCodecRec znc_rec;
178
179		ZnSphereCodecAlt(int dim, int r2);
180
181		uint64_t encode(const float* x) const override;
182
183		void decode(uint64_t code, float* c) const override;
184		};
185
186		} // namespace faiss
187
188		#endif

Coverage Report

Created: 2026-03-13 03:47