Coverage Report

Created: 2026-03-25 19:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
contrib/faiss/faiss/impl/IDSelector.h
Line
Count
Source
1
/*
2
 * Copyright (c) Meta Platforms, Inc. and affiliates.
3
 *
4
 * This source code is licensed under the MIT license found in the
5
 * LICENSE file in the root directory of this source tree.
6
 */
7
8
#pragma once
9
10
#include <unordered_set>
11
#include <vector>
12
13
#include <faiss/MetricType.h>
14
15
/** IDSelector is intended to define a subset of vectors to handle (for removal
16
 * or as subset to search) */
17
18
namespace faiss {
19
20
/** Encapsulates a set of ids to handle. */
21
struct IDSelector {
22
    virtual bool is_member(idx_t id) const = 0;
23
97
    virtual ~IDSelector() {}
24
};
25
26
/** ids between [imin, imax) */
27
struct IDSelectorRange : IDSelector {
28
    idx_t imin, imax;
29
30
    /// Assume that the ids to handle are sorted. In some cases this can speed
31
    /// up processing
32
    bool assume_sorted;
33
34
    IDSelectorRange(idx_t imin, idx_t imax, bool assume_sorted = false);
35
36
    bool is_member(idx_t id) const final;
37
38
    /// for sorted ids, find the range of list indices where the valid ids are
39
    /// stored
40
    void find_sorted_ids_bounds(
41
            size_t list_size,
42
            const idx_t* ids,
43
            size_t* jmin,
44
            size_t* jmax) const;
45
46
0
    ~IDSelectorRange() override {}
47
};
48
49
/** Simple array of elements
50
 *
51
 * is_member calls are very inefficient, but some operations can use the ids
52
 * directly.
53
 */
54
struct IDSelectorArray : IDSelector {
55
    size_t n;
56
    const idx_t* ids;
57
58
    /** Construct with an array of ids to process
59
     *
60
     * @param n number of ids to store
61
     * @param ids elements to store. The pointer should remain valid during
62
     *            IDSelectorArray's lifetime
63
     */
64
    IDSelectorArray(size_t n, const idx_t* ids);
65
    bool is_member(idx_t id) const final;
66
0
    ~IDSelectorArray() override {}
67
};
68
69
/** Ids from a set.
70
 *
71
 * Repetitions of ids in the indices set passed to the constructor does not hurt
72
 * performance.
73
 *
74
 * The hash function used for the bloom filter and GCC's implementation of
75
 * unordered_set are just the least significant bits of the id. This works fine
76
 * for random ids or ids in sequences but will produce many hash collisions if
77
 * lsb's are always the same
78
 */
79
struct IDSelectorBatch : IDSelector {
80
    std::unordered_set<idx_t> set;
81
82
    // Bloom filter to avoid accessing the unordered set if it is unlikely
83
    // to be true
84
    std::vector<uint8_t> bloom;
85
    int nbits;
86
    idx_t mask;
87
88
    /** Construct with an array of ids to process
89
     *
90
     * @param n number of ids to store
91
     * @param ids elements to store. The pointer can be released after
92
     *            construction
93
     */
94
    IDSelectorBatch(size_t n, const idx_t* indices);
95
    bool is_member(idx_t id) const final;
96
0
    ~IDSelectorBatch() override {}
97
};
98
99
/** One bit per element. Constructed with a bitmap, size ceil(n / 8).
100
 */
101
struct IDSelectorBitmap : IDSelector {
102
    size_t n;
103
    const uint8_t* bitmap;
104
105
    /** Construct with a binary mask
106
     *
107
     * @param n size of the bitmap array
108
     * @param bitmap id will be selected iff id / 8 < n and bit number
109
     *               (i%8) of bitmap[floor(i / 8)] is 1.
110
     */
111
    IDSelectorBitmap(size_t n, const uint8_t* bitmap);
112
    bool is_member(idx_t id) const final;
113
0
    ~IDSelectorBitmap() override {}
114
};
115
116
/** reverts the membership test of another selector */
117
struct IDSelectorNot : IDSelector {
118
    const IDSelector* sel;
119
0
    IDSelectorNot(const IDSelector* sel) : sel(sel) {}
120
0
    bool is_member(idx_t id) const final {
121
0
        return !sel->is_member(id);
122
0
    }
123
0
    virtual ~IDSelectorNot() {}
124
};
125
126
/// selects all entries (useful for benchmarking)
127
struct IDSelectorAll : IDSelector {
128
0
    bool is_member(idx_t id) const final {
129
0
        return true;
130
0
    }
131
0
    virtual ~IDSelectorAll() {}
132
};
133
134
/// does an AND operation on the the two given IDSelector's is_membership
135
/// results.
136
struct IDSelectorAnd : IDSelector {
137
    const IDSelector* lhs;
138
    const IDSelector* rhs;
139
    IDSelectorAnd(const IDSelector* lhs, const IDSelector* rhs)
140
0
            : lhs(lhs), rhs(rhs) {}
141
0
    bool is_member(idx_t id) const final {
142
0
        return lhs->is_member(id) && rhs->is_member(id);
143
0
    }
144
0
    virtual ~IDSelectorAnd() {}
145
};
146
147
/// does an OR operation on the the two given IDSelector's is_membership
148
/// results.
149
struct IDSelectorOr : IDSelector {
150
    const IDSelector* lhs;
151
    const IDSelector* rhs;
152
    IDSelectorOr(const IDSelector* lhs, const IDSelector* rhs)
153
0
            : lhs(lhs), rhs(rhs) {}
154
0
    bool is_member(idx_t id) const final {
155
0
        return lhs->is_member(id) || rhs->is_member(id);
156
0
    }
157
0
    virtual ~IDSelectorOr() {}
158
};
159
160
/// does an XOR operation on the the two given IDSelector's is_membership
161
/// results.
162
struct IDSelectorXOr : IDSelector {
163
    const IDSelector* lhs;
164
    const IDSelector* rhs;
165
    IDSelectorXOr(const IDSelector* lhs, const IDSelector* rhs)
166
0
            : lhs(lhs), rhs(rhs) {}
167
0
    bool is_member(idx_t id) const final {
168
0
        return lhs->is_member(id) ^ rhs->is_member(id);
169
0
    }
170
0
    virtual ~IDSelectorXOr() {}
171
};
172
173
} // namespace faiss