/root/doris/contrib/faiss/faiss/impl/NNDescent.h
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) Meta Platforms, Inc. and affiliates. |
3 | | * |
4 | | * This source code is licensed under the MIT license found in the |
5 | | * LICENSE file in the root directory of this source tree. |
6 | | */ |
7 | | |
8 | | // -*- c++ -*- |
9 | | |
10 | | #pragma once |
11 | | |
12 | | #include <algorithm> |
13 | | #include <mutex> |
14 | | #include <queue> |
15 | | #include <random> |
16 | | #include <unordered_set> |
17 | | #include <vector> |
18 | | |
19 | | #include <omp.h> |
20 | | |
21 | | #include <faiss/Index.h> |
22 | | #include <faiss/impl/FaissAssert.h> |
23 | | #include <faiss/impl/platform_macros.h> |
24 | | #include <faiss/utils/Heap.h> |
25 | | #include <faiss/utils/random.h> |
26 | | |
27 | | namespace faiss { |
28 | | |
29 | | /** Implementation of NNDescent which is one of the most popular |
30 | | * KNN graph building algorithms |
31 | | * |
32 | | * Efficient K-Nearest Neighbor Graph Construction for Generic |
33 | | * Similarity Measures |
34 | | * |
35 | | * Dong, Wei, Charikar Moses, and Kai Li, WWW 2011 |
36 | | * |
37 | | * This implmentation is heavily influenced by the efanna |
38 | | * implementation by Cong Fu and the KGraph library by Wei Dong |
39 | | * (https://github.com/ZJULearning/efanna_graph) |
40 | | * (https://github.com/aaalgo/kgraph) |
41 | | * |
42 | | * The NNDescent object stores only the neighbor link structure, |
43 | | * see IndexNNDescent.h for the full index object. |
44 | | */ |
45 | | |
46 | | struct VisitedTable; |
47 | | struct DistanceComputer; |
48 | | |
49 | | namespace nndescent { |
50 | | |
51 | | struct Neighbor { |
52 | | int id; |
53 | | float distance; |
54 | | bool flag; |
55 | | |
56 | | Neighbor() = default; |
57 | | Neighbor(int id, float distance, bool f) |
58 | 0 | : id(id), distance(distance), flag(f) {} |
59 | | |
60 | 0 | inline bool operator<(const Neighbor& other) const { |
61 | 0 | return distance < other.distance; |
62 | 0 | } |
63 | | }; |
64 | | |
65 | | struct Nhood { |
66 | | std::mutex lock; |
67 | | std::vector<Neighbor> pool; // candidate pool (a max heap) |
68 | | int M; // number of new neighbors to be operated |
69 | | |
70 | | std::vector<int> nn_old; // old neighbors |
71 | | std::vector<int> nn_new; // new neighbors |
72 | | std::vector<int> rnn_old; // reverse old neighbors |
73 | | std::vector<int> rnn_new; // reverse new neighbors |
74 | | |
75 | | Nhood() = default; |
76 | | |
77 | | Nhood(int l, int s, std::mt19937& rng, int N); |
78 | | |
79 | | Nhood& operator=(const Nhood& other); |
80 | | |
81 | | Nhood(const Nhood& other); |
82 | | |
83 | | void insert(int id, float dist); |
84 | | |
85 | | template <typename C> |
86 | | void join(C callback) const; |
87 | | }; |
88 | | |
89 | | } // namespace nndescent |
90 | | |
91 | | struct NNDescent { |
92 | | using storage_idx_t = int; |
93 | | |
94 | | using KNNGraph = std::vector<nndescent::Nhood>; |
95 | | |
96 | | explicit NNDescent(const int d, const int K); |
97 | | |
98 | | ~NNDescent(); |
99 | | |
100 | | void build(DistanceComputer& qdis, const int n, bool verbose); |
101 | | |
102 | | void search( |
103 | | DistanceComputer& qdis, |
104 | | const int topk, |
105 | | idx_t* indices, |
106 | | float* dists, |
107 | | VisitedTable& vt) const; |
108 | | |
109 | | void reset(); |
110 | | |
111 | | /// Initialize the KNN graph randomly |
112 | | void init_graph(DistanceComputer& qdis); |
113 | | |
114 | | /// Perform NNDescent algorithm |
115 | | void nndescent(DistanceComputer& qdis, bool verbose); |
116 | | |
117 | | /// Perform local join on each node |
118 | | void join(DistanceComputer& qdis); |
119 | | |
120 | | /// Sample new neighbors for each node to peform local join later |
121 | | void update(); |
122 | | |
123 | | /// Sample a small number of points to evaluate the quality of KNNG built |
124 | | void generate_eval_set( |
125 | | DistanceComputer& qdis, |
126 | | std::vector<int>& c, |
127 | | std::vector<std::vector<int>>& v, |
128 | | int N); |
129 | | |
130 | | /// Evaluate the quality of KNNG built |
131 | | float eval_recall( |
132 | | std::vector<int>& ctrl_points, |
133 | | std::vector<std::vector<int>>& acc_eval_set); |
134 | | |
135 | | bool has_built = false; |
136 | | |
137 | | int S = 10; // number of sample neighbors to be updated for each node |
138 | | int R = 100; // size of reverse links, 0 means the reverse links will not be |
139 | | // used |
140 | | int iter = 10; // number of iterations to iterate over |
141 | | int search_L = 0; // size of candidate pool in searching |
142 | | int random_seed = 2021; // random seed for generators |
143 | | |
144 | | int K; // K in KNN graph |
145 | | int d; // dimensions |
146 | | int L; // size of the candidate pool in building |
147 | | |
148 | | int ntotal = 0; |
149 | | |
150 | | KNNGraph graph; |
151 | | std::vector<int> final_graph; |
152 | | }; |
153 | | |
154 | | } // namespace faiss |