Coverage Report

Created: 2025-11-17 18:02

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/contrib/faiss/faiss/impl/NSG.h
Line
Count
Source
1
/*
2
 * Copyright (c) Meta Platforms, Inc. and affiliates.
3
 *
4
 * This source code is licensed under the MIT license found in the
5
 * LICENSE file in the root directory of this source tree.
6
 */
7
8
#pragma once
9
10
#include <memory>
11
#include <mutex>
12
#include <vector>
13
14
#include <omp.h>
15
16
#include <faiss/Index.h>
17
#include <faiss/impl/AuxIndexStructures.h>
18
#include <faiss/impl/FaissAssert.h>
19
#include <faiss/utils/Heap.h>
20
#include <faiss/utils/random.h>
21
22
namespace faiss {
23
24
/** Implementation of the Navigating Spreading-out Graph (NSG)
25
 * datastructure.
26
 *
27
 * Fast Approximate Nearest Neighbor Search With The
28
 * Navigating Spreading-out Graph
29
 *
30
 *  Cong Fu, Chao Xiang, Changxu Wang, Deng Cai, VLDB 2019
31
 *
32
 * This implementation is heavily influenced by the NSG
33
 * implementation by ZJULearning Group
34
 * (https://github.com/zjulearning/nsg)
35
 *
36
 * The NSG object stores only the neighbor link structure, see
37
 * IndexNSG.h for the full index object.
38
 */
39
40
struct DistanceComputer; // from AuxIndexStructures
41
42
namespace nsg {
43
44
struct Neighbor;
45
struct Node;
46
47
/***********************************************************
48
 * Graph structure to store a graph.
49
 *
50
 * It is represented by an adjacency matrix `data`, where
51
 * data[i, j] is the j-th neighbor of node i.
52
 ***********************************************************/
53
54
template <class node_t>
55
struct Graph {
56
    node_t* data;    ///< the flattened adjacency matrix, size N-by-K
57
    int K;           ///< nb of neighbors per node
58
    int N;           ///< total nb of nodes
59
    bool own_fields; ///< the underlying data owned by itself or not
60
61
    // construct from a known graph
62
    Graph(node_t* data, int N, int K)
63
0
            : data(data), K(K), N(N), own_fields(false) {}
64
65
    // construct an empty graph
66
    // NOTE: the newly allocated data needs to be destroyed at destruction time
67
0
    Graph(int N, int K) : K(K), N(N), own_fields(true) {
68
0
        data = new node_t[N * K];
69
0
    }
Unexecuted instantiation: _ZN5faiss3nsg5GraphIiEC2Eii
Unexecuted instantiation: _ZN5faiss3nsg5GraphINS0_4NodeEEC2Eii
70
71
    // copy constructor
72
0
    Graph(const Graph& g) : Graph(g.N, g.K) {
73
0
        memcpy(data, g.data, N * K * sizeof(node_t));
74
0
    }
75
76
    // release the allocated memory if needed
77
0
    virtual ~Graph() {
78
0
        if (own_fields) {
79
0
            delete[] data;
80
0
        }
81
0
    }
Unexecuted instantiation: _ZN5faiss3nsg5GraphIiED2Ev
Unexecuted instantiation: _ZN5faiss3nsg5GraphIlED2Ev
Unexecuted instantiation: _ZN5faiss3nsg5GraphINS0_4NodeEED2Ev
82
83
    // access the j-th neighbor of node i
84
0
    inline node_t at(int i, int j) const {
85
0
        return data[i * K + j];
86
0
    }
87
88
    // access the j-th neighbor of node i by reference
89
0
    inline node_t& at(int i, int j) {
90
0
        return data[i * K + j];
91
0
    }
Unexecuted instantiation: _ZN5faiss3nsg5GraphIiE2atEii
Unexecuted instantiation: _ZN5faiss3nsg5GraphINS0_4NodeEE2atEii
92
93
    // get all neighbors of node i (used during search only)
94
0
    virtual size_t get_neighbors(int i, node_t* neighbors) const {
95
0
        for (int j = 0; j < K; j++) {
96
0
            if (data[i * K + j] < 0) {
97
0
                return j;
98
0
            }
99
0
            neighbors[j] = data[i * K + j];
100
0
        }
101
0
        return K;
102
0
    }
Unexecuted instantiation: _ZNK5faiss3nsg5GraphIiE13get_neighborsEiPi
Unexecuted instantiation: _ZNK5faiss3nsg5GraphIlE13get_neighborsEiPl
Unexecuted instantiation: _ZNK5faiss3nsg5GraphINS0_4NodeEE13get_neighborsEiPS2_
103
};
104
105
DistanceComputer* storage_distance_computer(const Index* storage);
106
107
} // namespace nsg
108
109
struct NSG {
110
    /// internal storage of vectors (32 bits: this is expensive)
111
    using storage_idx_t = int32_t;
112
    using Node = nsg::Node;
113
    using Neighbor = nsg::Neighbor;
114
115
    int ntotal = 0; ///< nb of nodes
116
117
    // construction-time parameters
118
    int R; ///< nb of neighbors per node
119
    int L; ///< length of the search path at construction time
120
    int C; ///< candidate pool size at construction time
121
122
    // search-time parameters
123
    int search_L = 16; ///< length of the search path
124
125
    int enterpoint; ///< enterpoint
126
127
    std::shared_ptr<nsg::Graph<int32_t>> final_graph; ///< NSG graph structure
128
129
    bool is_built = false; ///< NSG is built or not
130
131
    RandomGenerator rng; ///< random generator
132
133
    explicit NSG(int R = 32);
134
135
    // build NSG from a KNN graph
136
    void build(
137
            Index* storage,
138
            idx_t n,
139
            const nsg::Graph<idx_t>& knn_graph,
140
            bool verbose);
141
142
    // reset the graph
143
    void reset();
144
145
    // search interface
146
    void search(
147
            DistanceComputer& dis,
148
            int k,
149
            idx_t* I,
150
            float* D,
151
            VisitedTable& vt) const;
152
153
    // Compute the center point
154
    void init_graph(Index* storage, const nsg::Graph<idx_t>& knn_graph);
155
156
    // Search on a built graph.
157
    // If collect_fullset is true, the visited nodes will be
158
    // collected in `fullset`.
159
    template <bool collect_fullset, class index_t>
160
    void search_on_graph(
161
            const nsg::Graph<index_t>& graph,
162
            DistanceComputer& dis,
163
            VisitedTable& vt,
164
            int ep,
165
            int pool_size,
166
            std::vector<Neighbor>& retset,
167
            std::vector<Node>& fullset) const;
168
169
    // Add reverse links
170
    void add_reverse_links(
171
            int q,
172
            std::vector<std::mutex>& locks,
173
            DistanceComputer& dis,
174
            nsg::Graph<Node>& graph);
175
176
    void sync_prune(
177
            int q,
178
            std::vector<Node>& pool,
179
            DistanceComputer& dis,
180
            VisitedTable& vt,
181
            const nsg::Graph<idx_t>& knn_graph,
182
            nsg::Graph<Node>& graph);
183
184
    void link(
185
            Index* storage,
186
            const nsg::Graph<idx_t>& knn_graph,
187
            nsg::Graph<Node>& graph,
188
            bool verbose);
189
190
    // make NSG be fully connected
191
    int tree_grow(Index* storage, std::vector<int>& degrees);
192
193
    // count the size of the connected component
194
    // using depth first search start by root
195
    int dfs(VisitedTable& vt, int root, int cnt) const;
196
197
    // attach one unlinked node
198
    int attach_unlinked(
199
            Index* storage,
200
            VisitedTable& vt,
201
            VisitedTable& vt2,
202
            std::vector<int>& degrees);
203
204
    // check the integrity of the NSG built
205
    void check_graph() const;
206
};
207
208
} // namespace faiss