/root/doris/contrib/faiss/faiss/impl/NSG.h
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) Meta Platforms, Inc. and affiliates. |
3 | | * |
4 | | * This source code is licensed under the MIT license found in the |
5 | | * LICENSE file in the root directory of this source tree. |
6 | | */ |
7 | | |
8 | | #pragma once |
9 | | |
10 | | #include <memory> |
11 | | #include <mutex> |
12 | | #include <vector> |
13 | | |
14 | | #include <omp.h> |
15 | | |
16 | | #include <faiss/Index.h> |
17 | | #include <faiss/impl/AuxIndexStructures.h> |
18 | | #include <faiss/impl/FaissAssert.h> |
19 | | #include <faiss/utils/Heap.h> |
20 | | #include <faiss/utils/random.h> |
21 | | |
22 | | namespace faiss { |
23 | | |
24 | | /** Implementation of the Navigating Spreading-out Graph (NSG) |
25 | | * datastructure. |
26 | | * |
27 | | * Fast Approximate Nearest Neighbor Search With The |
28 | | * Navigating Spreading-out Graph |
29 | | * |
30 | | * Cong Fu, Chao Xiang, Changxu Wang, Deng Cai, VLDB 2019 |
31 | | * |
32 | | * This implementation is heavily influenced by the NSG |
33 | | * implementation by ZJULearning Group |
34 | | * (https://github.com/zjulearning/nsg) |
35 | | * |
36 | | * The NSG object stores only the neighbor link structure, see |
37 | | * IndexNSG.h for the full index object. |
38 | | */ |
39 | | |
40 | | struct DistanceComputer; // from AuxIndexStructures |
41 | | |
42 | | namespace nsg { |
43 | | |
44 | | struct Neighbor; |
45 | | struct Node; |
46 | | |
47 | | /*********************************************************** |
48 | | * Graph structure to store a graph. |
49 | | * |
50 | | * It is represented by an adjacency matrix `data`, where |
51 | | * data[i, j] is the j-th neighbor of node i. |
52 | | ***********************************************************/ |
53 | | |
54 | | template <class node_t> |
55 | | struct Graph { |
56 | | node_t* data; ///< the flattened adjacency matrix, size N-by-K |
57 | | int K; ///< nb of neighbors per node |
58 | | int N; ///< total nb of nodes |
59 | | bool own_fields; ///< the underlying data owned by itself or not |
60 | | |
61 | | // construct from a known graph |
62 | | Graph(node_t* data, int N, int K) |
63 | 0 | : data(data), K(K), N(N), own_fields(false) {} |
64 | | |
65 | | // construct an empty graph |
66 | | // NOTE: the newly allocated data needs to be destroyed at destruction time |
67 | 0 | Graph(int N, int K) : K(K), N(N), own_fields(true) { |
68 | 0 | data = new node_t[N * K]; |
69 | 0 | } Unexecuted instantiation: _ZN5faiss3nsg5GraphIiEC2Eii Unexecuted instantiation: _ZN5faiss3nsg5GraphINS0_4NodeEEC2Eii |
70 | | |
71 | | // copy constructor |
72 | 0 | Graph(const Graph& g) : Graph(g.N, g.K) { |
73 | 0 | memcpy(data, g.data, N * K * sizeof(node_t)); |
74 | 0 | } |
75 | | |
76 | | // release the allocated memory if needed |
77 | 0 | virtual ~Graph() { |
78 | 0 | if (own_fields) { |
79 | 0 | delete[] data; |
80 | 0 | } |
81 | 0 | } Unexecuted instantiation: _ZN5faiss3nsg5GraphIiED2Ev Unexecuted instantiation: _ZN5faiss3nsg5GraphIlED2Ev Unexecuted instantiation: _ZN5faiss3nsg5GraphINS0_4NodeEED2Ev |
82 | | |
83 | | // access the j-th neighbor of node i |
84 | 0 | inline node_t at(int i, int j) const { |
85 | 0 | return data[i * K + j]; |
86 | 0 | } |
87 | | |
88 | | // access the j-th neighbor of node i by reference |
89 | 0 | inline node_t& at(int i, int j) { |
90 | 0 | return data[i * K + j]; |
91 | 0 | } Unexecuted instantiation: _ZN5faiss3nsg5GraphIiE2atEii Unexecuted instantiation: _ZN5faiss3nsg5GraphINS0_4NodeEE2atEii |
92 | | |
93 | | // get all neighbors of node i (used during search only) |
94 | 0 | virtual size_t get_neighbors(int i, node_t* neighbors) const { |
95 | 0 | for (int j = 0; j < K; j++) { |
96 | 0 | if (data[i * K + j] < 0) { |
97 | 0 | return j; |
98 | 0 | } |
99 | 0 | neighbors[j] = data[i * K + j]; |
100 | 0 | } |
101 | 0 | return K; |
102 | 0 | } Unexecuted instantiation: _ZNK5faiss3nsg5GraphIiE13get_neighborsEiPi Unexecuted instantiation: _ZNK5faiss3nsg5GraphIlE13get_neighborsEiPl Unexecuted instantiation: _ZNK5faiss3nsg5GraphINS0_4NodeEE13get_neighborsEiPS2_ |
103 | | }; |
104 | | |
105 | | DistanceComputer* storage_distance_computer(const Index* storage); |
106 | | |
107 | | } // namespace nsg |
108 | | |
109 | | struct NSG { |
110 | | /// internal storage of vectors (32 bits: this is expensive) |
111 | | using storage_idx_t = int32_t; |
112 | | using Node = nsg::Node; |
113 | | using Neighbor = nsg::Neighbor; |
114 | | |
115 | | int ntotal = 0; ///< nb of nodes |
116 | | |
117 | | // construction-time parameters |
118 | | int R; ///< nb of neighbors per node |
119 | | int L; ///< length of the search path at construction time |
120 | | int C; ///< candidate pool size at construction time |
121 | | |
122 | | // search-time parameters |
123 | | int search_L = 16; ///< length of the search path |
124 | | |
125 | | int enterpoint; ///< enterpoint |
126 | | |
127 | | std::shared_ptr<nsg::Graph<int32_t>> final_graph; ///< NSG graph structure |
128 | | |
129 | | bool is_built = false; ///< NSG is built or not |
130 | | |
131 | | RandomGenerator rng; ///< random generator |
132 | | |
133 | | explicit NSG(int R = 32); |
134 | | |
135 | | // build NSG from a KNN graph |
136 | | void build( |
137 | | Index* storage, |
138 | | idx_t n, |
139 | | const nsg::Graph<idx_t>& knn_graph, |
140 | | bool verbose); |
141 | | |
142 | | // reset the graph |
143 | | void reset(); |
144 | | |
145 | | // search interface |
146 | | void search( |
147 | | DistanceComputer& dis, |
148 | | int k, |
149 | | idx_t* I, |
150 | | float* D, |
151 | | VisitedTable& vt) const; |
152 | | |
153 | | // Compute the center point |
154 | | void init_graph(Index* storage, const nsg::Graph<idx_t>& knn_graph); |
155 | | |
156 | | // Search on a built graph. |
157 | | // If collect_fullset is true, the visited nodes will be |
158 | | // collected in `fullset`. |
159 | | template <bool collect_fullset, class index_t> |
160 | | void search_on_graph( |
161 | | const nsg::Graph<index_t>& graph, |
162 | | DistanceComputer& dis, |
163 | | VisitedTable& vt, |
164 | | int ep, |
165 | | int pool_size, |
166 | | std::vector<Neighbor>& retset, |
167 | | std::vector<Node>& fullset) const; |
168 | | |
169 | | // Add reverse links |
170 | | void add_reverse_links( |
171 | | int q, |
172 | | std::vector<std::mutex>& locks, |
173 | | DistanceComputer& dis, |
174 | | nsg::Graph<Node>& graph); |
175 | | |
176 | | void sync_prune( |
177 | | int q, |
178 | | std::vector<Node>& pool, |
179 | | DistanceComputer& dis, |
180 | | VisitedTable& vt, |
181 | | const nsg::Graph<idx_t>& knn_graph, |
182 | | nsg::Graph<Node>& graph); |
183 | | |
184 | | void link( |
185 | | Index* storage, |
186 | | const nsg::Graph<idx_t>& knn_graph, |
187 | | nsg::Graph<Node>& graph, |
188 | | bool verbose); |
189 | | |
190 | | // make NSG be fully connected |
191 | | int tree_grow(Index* storage, std::vector<int>& degrees); |
192 | | |
193 | | // count the size of the connected component |
194 | | // using depth first search start by root |
195 | | int dfs(VisitedTable& vt, int root, int cnt) const; |
196 | | |
197 | | // attach one unlinked node |
198 | | int attach_unlinked( |
199 | | Index* storage, |
200 | | VisitedTable& vt, |
201 | | VisitedTable& vt2, |
202 | | std::vector<int>& degrees); |
203 | | |
204 | | // check the integrity of the NSG built |
205 | | void check_graph() const; |
206 | | }; |
207 | | |
208 | | } // namespace faiss |