/root/doris/contrib/faiss/faiss/utils/utils.h

Source
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

// -*- c++ -*-

/*
 *  A few utilitary functions for similarity search:
 * - optimized exhaustive distance and knn search functions
 * - some functions reimplemented from torch for speed
 */

#ifndef FAISS_utils_h
#define FAISS_utils_h

#include <stdint.h>
#include <set>
#include <string>
#include <vector>

#include <faiss/impl/platform_macros.h>
#include <faiss/utils/Heap.h>

namespace faiss {

/****************************************************************************
 * Get compile specific variables
 ***************************************************************************/

/// get compile options
std::string get_compile_options();

/**************************************************
 * Get some stats about the system
 **************************************************/

// Expose Faiss version as a string
std::string get_version();

/// ms elapsed since some arbitrary epoch
double getmillisecs();

/// get current RSS usage in kB
size_t get_mem_usage_kb();

uint64_t get_cycles();

/***************************************************************************
 * Misc  matrix and vector manipulation functions
 ***************************************************************************/

/* perform a reflection (not an efficient implementation, just for test ) */
void reflection(const float* u, float* x, size_t n, size_t d, size_t nu);

/** compute the Q of the QR decomposition for m > n
 * @param a   size n * m: input matrix and output Q
 */
void matrix_qr(int m, int n, float* a);

/** distances are supposed to be sorted. Sorts indices with same distance*/
void ranklist_handle_ties(int k, int64_t* idx, const float* dis);

/** count the number of common elements between v1 and v2
 * algorithm = sorting + bissection to avoid double-counting duplicates
 */
size_t ranklist_intersection_size(
        size_t k1,
        const int64_t* v1,
        size_t k2,
        const int64_t* v2);

/** merge a result table into another one
 *
 * @param I0, D0       first result table, size (n, k)
 * @param I1, D1       second result table, size (n, k)
 * @param keep_min     if true, keep min values, otherwise keep max
 * @param translation  add this value to all I1's indexes
 * @return             nb of values that were taken from the second table
 */
size_t merge_result_table_with(
        size_t n,
        size_t k,
        int64_t* I0,
        float* D0,
        const int64_t* I1,
        const float* D1,
        bool keep_min = true,
        int64_t translation = 0);

/// a balanced assignment has a IF of 1, a completely unbalanced assignment has
/// an IF = k.
double imbalance_factor(int64_t n, int k, const int64_t* assign);

/// same, takes a histogram as input
double imbalance_factor(int k, const int64_t* hist);

/// compute histogram on v
int ivec_hist(size_t n, const int* v, int vmax, int* hist);

/** Compute histogram of bits on a code array
 *
 * @param codes   size(n, nbits / 8)
 * @param hist    size(nbits): nb of 1s in the array of codes
 */
void bincode_hist(size_t n, size_t nbits, const uint8_t* codes, int* hist);

/// compute a checksum on a table.
uint64_t ivec_checksum(size_t n, const int32_t* a);

/// compute a checksum on a table.
uint64_t bvec_checksum(size_t n, const uint8_t* a);

/** compute checksums for the rows of a matrix
 *
 * @param n   number of rows
 * @param d   size per row
 * @param a   matrix to handle, size n * d
 * @param cs  output checksums, size n
 */
void bvecs_checksum(size_t n, size_t d, const uint8_t* a, uint64_t* cs);

/** random subsamples a set of vectors if there are too many of them
 *
 * @param d      dimension of the vectors
 * @param n      on input: nb of input vectors, output: nb of output vectors
 * @param nmax   max nb of vectors to keep
 * @param x      input array, size *n-by-d
 * @param seed   random seed to use for sampling
 * @return       x or an array allocated with new [] with *n vectors
 */
const float* fvecs_maybe_subsample(
        size_t d,
        size_t* n,
        size_t nmax,
        const float* x,
        bool verbose = false,
        int64_t seed = 1234);

/** Convert binary vector to +1/-1 valued float vector.
 *
 * @param d      dimension of the vector (multiple of 8)
 * @param x_in   input binary vector (uint8_t table of size d / 8)
 * @param x_out  output float vector (float table of size d)
 */
void binary_to_real(size_t d, const uint8_t* x_in, float* x_out);

/** Convert float vector to binary vector. Components > 0 are converted to 1,
 * others to 0.
 *
 * @param d      dimension of the vector (multiple of 8)
 * @param x_in   input float vector (float table of size d)
 * @param x_out  output binary vector (uint8_t table of size d / 8)
 */
void real_to_binary(size_t d, const float* x_in, uint8_t* x_out);

/** A reasonable hashing function */
uint64_t hash_bytes(const uint8_t* bytes, int64_t n);

/** Whether OpenMP annotations were respected. */
bool check_openmp();

/** This class is used to combine range and knn search results
 * in contrib.exhaustive_search.range_search_gpu */

template <typename T>
struct CombinerRangeKNN {
    int64_t nq;    /// nb of queries
    size_t k;      /// number of neighbors for the knn search part
    T r2;          /// range search radius
    bool keep_max; /// whether to keep max values instead of min.

    CombinerRangeKNN(int64_t nq, size_t k, T r2, bool keep_max)
            : nq(nq), k(k), r2(r2), keep_max(keep_max) {}

    /// Knn search results
    const int64_t* I = nullptr; /// size nq * k
    const T* D = nullptr;       /// size nq * k

    /// optional: range search results (ignored if mask is NULL)
    const bool* mask =
            nullptr; /// mask for where knn results are valid, size nq
    // range search results for remaining entries nrange = sum(mask)
    const int64_t* lim_remain = nullptr; /// size nrange + 1
    const T* D_remain = nullptr;         /// size lim_remain[nrange]
    const int64_t* I_remain = nullptr;   /// size lim_remain[nrange]

    const int64_t* L_res = nullptr; /// size nq + 1
    // Phase 1: compute sizes into limits array (of size nq + 1)
    void compute_sizes(int64_t* L_res);

    /// Phase 2: caller allocates D_res and I_res (size L_res[nq])
    /// Phase 3: fill in D_res and I_res
    void write_result(T* D_res, int64_t* I_res);
};

struct CodeSet {
    size_t d;
    std::set<std::vector<uint8_t>> s;

    explicit CodeSet(size_t d) : d(d) {}
    void insert(size_t n, const uint8_t* codes, bool* inserted);
};

} // namespace faiss

#endif /* FAISS_utils_h */

Coverage Report

Created: 2026-01-08 00:11

Line	Count	Source
1		/*
2		* Copyright (c) Meta Platforms, Inc. and affiliates.
3		*
4		* This source code is licensed under the MIT license found in the
5		* LICENSE file in the root directory of this source tree.
6		*/
7
8		// -- c++ --
9
10		/*
11		* A few utilitary functions for similarity search:
12		* - optimized exhaustive distance and knn search functions
13		* - some functions reimplemented from torch for speed
14		*/
15
16		#ifndef FAISS_utils_h
17		#define FAISS_utils_h
18
19		#include <stdint.h>
20		#include <set>
21		#include <string>
22		#include <vector>
23
24		#include <faiss/impl/platform_macros.h>
25		#include <faiss/utils/Heap.h>
26
27		namespace faiss {
28
29		/****************************************************************************
30		* Get compile specific variables
31		***************************************************************************/
32
33		/// get compile options
34		std::string get_compile_options();
35
36		/**************************************************
37		* Get some stats about the system
38		**************************************************/
39
40		// Expose Faiss version as a string
41		std::string get_version();
42
43		/// ms elapsed since some arbitrary epoch
44		double getmillisecs();
45
46		/// get current RSS usage in kB
47		size_t get_mem_usage_kb();
48
49		uint64_t get_cycles();
50
51		/***************************************************************************
52		* Misc matrix and vector manipulation functions
53		***************************************************************************/
54
55		/* perform a reflection (not an efficient implementation, just for test ) */
56		void reflection(const float* u, float* x, size_t n, size_t d, size_t nu);
57
58		/** compute the Q of the QR decomposition for m > n
59		* @param a size n * m: input matrix and output Q
60		*/
61		void matrix_qr(int m, int n, float* a);
62
63		/** distances are supposed to be sorted. Sorts indices with same distance*/
64		void ranklist_handle_ties(int k, int64_t* idx, const float* dis);
65
66		/** count the number of common elements between v1 and v2
67		* algorithm = sorting + bissection to avoid double-counting duplicates
68		*/
69		size_t ranklist_intersection_size(
70		size_t k1,
71		const int64_t* v1,
72		size_t k2,
73		const int64_t* v2);
74
75		/** merge a result table into another one
76		*
77		* @param I0, D0 first result table, size (n, k)
78		* @param I1, D1 second result table, size (n, k)
79		* @param keep_min if true, keep min values, otherwise keep max
80		* @param translation add this value to all I1's indexes
81		* @return nb of values that were taken from the second table
82		*/
83		size_t merge_result_table_with(
84		size_t n,
85		size_t k,
86		int64_t* I0,
87		float* D0,
88		const int64_t* I1,
89		const float* D1,
90		bool keep_min = true,
91		int64_t translation = 0);
92
93		/// a balanced assignment has a IF of 1, a completely unbalanced assignment has
94		/// an IF = k.
95		double imbalance_factor(int64_t n, int k, const int64_t* assign);
96
97		/// same, takes a histogram as input
98		double imbalance_factor(int k, const int64_t* hist);
99
100		/// compute histogram on v
101		int ivec_hist(size_t n, const int* v, int vmax, int* hist);
102
103		/** Compute histogram of bits on a code array
104		*
105		* @param codes size(n, nbits / 8)
106		* @param hist size(nbits): nb of 1s in the array of codes
107		*/
108		void bincode_hist(size_t n, size_t nbits, const uint8_t* codes, int* hist);
109
110		/// compute a checksum on a table.
111		uint64_t ivec_checksum(size_t n, const int32_t* a);
112
113		/// compute a checksum on a table.
114		uint64_t bvec_checksum(size_t n, const uint8_t* a);
115
116		/** compute checksums for the rows of a matrix
117		*
118		* @param n number of rows
119		* @param d size per row
120		* @param a matrix to handle, size n * d
121		* @param cs output checksums, size n
122		*/
123		void bvecs_checksum(size_t n, size_t d, const uint8_t* a, uint64_t* cs);
124
125		/** random subsamples a set of vectors if there are too many of them
126		*
127		* @param d dimension of the vectors
128		* @param n on input: nb of input vectors, output: nb of output vectors
129		* @param nmax max nb of vectors to keep
130		* @param x input array, size *n-by-d
131		* @param seed random seed to use for sampling
132		* @return x or an array allocated with new [] with *n vectors
133		*/
134		const float* fvecs_maybe_subsample(
135		size_t d,
136		size_t* n,
137		size_t nmax,
138		const float* x,
139		bool verbose = false,
140		int64_t seed = 1234);
141
142		/** Convert binary vector to +1/-1 valued float vector.
143		*
144		* @param d dimension of the vector (multiple of 8)
145		* @param x_in input binary vector (uint8_t table of size d / 8)
146		* @param x_out output float vector (float table of size d)
147		*/
148		void binary_to_real(size_t d, const uint8_t* x_in, float* x_out);
149
150		/** Convert float vector to binary vector. Components > 0 are converted to 1,
151		* others to 0.
152		*
153		* @param d dimension of the vector (multiple of 8)
154		* @param x_in input float vector (float table of size d)
155		* @param x_out output binary vector (uint8_t table of size d / 8)
156		*/
157		void real_to_binary(size_t d, const float* x_in, uint8_t* x_out);
158
159		/** A reasonable hashing function */
160		uint64_t hash_bytes(const uint8_t* bytes, int64_t n);
161
162		/** Whether OpenMP annotations were respected. */
163		bool check_openmp();
164
165		/** This class is used to combine range and knn search results
166		* in contrib.exhaustive_search.range_search_gpu */
167
168		template <typename T>
169		struct CombinerRangeKNN {
170		int64_t nq; /// nb of queries
171		size_t k; /// number of neighbors for the knn search part
172		T r2; /// range search radius
173		bool keep_max; /// whether to keep max values instead of min.
174
175		CombinerRangeKNN(int64_t nq, size_t k, T r2, bool keep_max)
176	0	: nq(nq), k(k), r2(r2), keep_max(keep_max) {} Unexecuted instantiation: _ZN5faiss16CombinerRangeKNNIfEC2Elmfb Unexecuted instantiation: _ZN5faiss16CombinerRangeKNNIsEC2Elmsb
177
178		/// Knn search results
179		const int64_t* I = nullptr; /// size nq * k
180		const T* D = nullptr; /// size nq * k
181
182		/// optional: range search results (ignored if mask is NULL)
183		const bool* mask =
184		nullptr; /// mask for where knn results are valid, size nq
185		// range search results for remaining entries nrange = sum(mask)
186		const int64_t* lim_remain = nullptr; /// size nrange + 1
187		const T* D_remain = nullptr; /// size lim_remain[nrange]
188		const int64_t* I_remain = nullptr; /// size lim_remain[nrange]
189
190		const int64_t* L_res = nullptr; /// size nq + 1
191		// Phase 1: compute sizes into limits array (of size nq + 1)
192		void compute_sizes(int64_t* L_res);
193
194		/// Phase 2: caller allocates D_res and I_res (size L_res[nq])
195		/// Phase 3: fill in D_res and I_res
196		void write_result(T* D_res, int64_t* I_res);
197		};
198
199		struct CodeSet {
200		size_t d;
201		std::set<std::vector<uint8_t>> s;
202
203	0	explicit CodeSet(size_t d) : d(d) {}
204		void insert(size_t n, const uint8_t* codes, bool* inserted);
205		};
206
207		} // namespace faiss
208
209		#endif /* FAISS_utils_h */