Coverage Report

Created: 2025-09-22 13:36

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/contrib/faiss/faiss/impl/NNDescent.h
Line
Count
Source
1
/*
2
 * Copyright (c) Meta Platforms, Inc. and affiliates.
3
 *
4
 * This source code is licensed under the MIT license found in the
5
 * LICENSE file in the root directory of this source tree.
6
 */
7
8
// -*- c++ -*-
9
10
#pragma once
11
12
#include <algorithm>
13
#include <mutex>
14
#include <queue>
15
#include <random>
16
#include <unordered_set>
17
#include <vector>
18
19
#include <omp.h>
20
21
#include <faiss/Index.h>
22
#include <faiss/impl/FaissAssert.h>
23
#include <faiss/impl/platform_macros.h>
24
#include <faiss/utils/Heap.h>
25
#include <faiss/utils/random.h>
26
27
namespace faiss {
28
29
/** Implementation of NNDescent which is one of the most popular
30
 *  KNN graph building algorithms
31
 *
32
 * Efficient K-Nearest Neighbor Graph Construction for Generic
33
 * Similarity Measures
34
 *
35
 *  Dong, Wei, Charikar Moses, and Kai Li, WWW 2011
36
 *
37
 * This implmentation is heavily influenced by the efanna
38
 * implementation by Cong Fu and the KGraph library by Wei Dong
39
 * (https://github.com/ZJULearning/efanna_graph)
40
 * (https://github.com/aaalgo/kgraph)
41
 *
42
 * The NNDescent object stores only the neighbor link structure,
43
 * see IndexNNDescent.h for the full index object.
44
 */
45
46
struct VisitedTable;
47
struct DistanceComputer;
48
49
namespace nndescent {
50
51
struct Neighbor {
52
    int id;
53
    float distance;
54
    bool flag;
55
56
    Neighbor() = default;
57
    Neighbor(int id, float distance, bool f)
58
0
            : id(id), distance(distance), flag(f) {}
59
60
0
    inline bool operator<(const Neighbor& other) const {
61
0
        return distance < other.distance;
62
0
    }
63
};
64
65
struct Nhood {
66
    std::mutex lock;
67
    std::vector<Neighbor> pool; // candidate pool (a max heap)
68
    int M;                      // number of new neighbors to be operated
69
70
    std::vector<int> nn_old;  // old neighbors
71
    std::vector<int> nn_new;  // new neighbors
72
    std::vector<int> rnn_old; // reverse old neighbors
73
    std::vector<int> rnn_new; // reverse new neighbors
74
75
    Nhood() = default;
76
77
    Nhood(int l, int s, std::mt19937& rng, int N);
78
79
    Nhood& operator=(const Nhood& other);
80
81
    Nhood(const Nhood& other);
82
83
    void insert(int id, float dist);
84
85
    template <typename C>
86
    void join(C callback) const;
87
};
88
89
} // namespace nndescent
90
91
struct NNDescent {
92
    using storage_idx_t = int;
93
94
    using KNNGraph = std::vector<nndescent::Nhood>;
95
96
    explicit NNDescent(const int d, const int K);
97
98
    ~NNDescent();
99
100
    void build(DistanceComputer& qdis, const int n, bool verbose);
101
102
    void search(
103
            DistanceComputer& qdis,
104
            const int topk,
105
            idx_t* indices,
106
            float* dists,
107
            VisitedTable& vt) const;
108
109
    void reset();
110
111
    /// Initialize the KNN graph randomly
112
    void init_graph(DistanceComputer& qdis);
113
114
    /// Perform NNDescent algorithm
115
    void nndescent(DistanceComputer& qdis, bool verbose);
116
117
    /// Perform local join on each node
118
    void join(DistanceComputer& qdis);
119
120
    /// Sample new neighbors for each node to peform local join later
121
    void update();
122
123
    /// Sample a small number of points to evaluate the quality of KNNG built
124
    void generate_eval_set(
125
            DistanceComputer& qdis,
126
            std::vector<int>& c,
127
            std::vector<std::vector<int>>& v,
128
            int N);
129
130
    /// Evaluate the quality of KNNG built
131
    float eval_recall(
132
            std::vector<int>& ctrl_points,
133
            std::vector<std::vector<int>>& acc_eval_set);
134
135
    bool has_built = false;
136
137
    int S = 10;  // number of sample neighbors to be updated for each node
138
    int R = 100; // size of reverse links, 0 means the reverse links will not be
139
                 // used
140
    int iter = 10;          // number of iterations to iterate over
141
    int search_L = 0;       // size of candidate pool in searching
142
    int random_seed = 2021; // random seed for generators
143
144
    int K; // K in KNN graph
145
    int d; // dimensions
146
    int L; // size of the candidate pool in building
147
148
    int ntotal = 0;
149
150
    KNNGraph graph;
151
    std::vector<int> final_graph;
152
};
153
154
} // namespace faiss