contrib/faiss/faiss/impl/IDSelector.h
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) Meta Platforms, Inc. and affiliates. |
3 | | * |
4 | | * This source code is licensed under the MIT license found in the |
5 | | * LICENSE file in the root directory of this source tree. |
6 | | */ |
7 | | |
8 | | #pragma once |
9 | | |
10 | | #include <unordered_set> |
11 | | #include <vector> |
12 | | |
13 | | #include <faiss/MetricType.h> |
14 | | |
15 | | /** IDSelector is intended to define a subset of vectors to handle (for removal |
16 | | * or as subset to search) */ |
17 | | |
18 | | namespace faiss { |
19 | | |
20 | | /** Encapsulates a set of ids to handle. */ |
21 | | struct IDSelector { |
22 | | virtual bool is_member(idx_t id) const = 0; |
23 | 97 | virtual ~IDSelector() {} |
24 | | }; |
25 | | |
26 | | /** ids between [imin, imax) */ |
27 | | struct IDSelectorRange : IDSelector { |
28 | | idx_t imin, imax; |
29 | | |
30 | | /// Assume that the ids to handle are sorted. In some cases this can speed |
31 | | /// up processing |
32 | | bool assume_sorted; |
33 | | |
34 | | IDSelectorRange(idx_t imin, idx_t imax, bool assume_sorted = false); |
35 | | |
36 | | bool is_member(idx_t id) const final; |
37 | | |
38 | | /// for sorted ids, find the range of list indices where the valid ids are |
39 | | /// stored |
40 | | void find_sorted_ids_bounds( |
41 | | size_t list_size, |
42 | | const idx_t* ids, |
43 | | size_t* jmin, |
44 | | size_t* jmax) const; |
45 | | |
46 | 0 | ~IDSelectorRange() override {} |
47 | | }; |
48 | | |
49 | | /** Simple array of elements |
50 | | * |
51 | | * is_member calls are very inefficient, but some operations can use the ids |
52 | | * directly. |
53 | | */ |
54 | | struct IDSelectorArray : IDSelector { |
55 | | size_t n; |
56 | | const idx_t* ids; |
57 | | |
58 | | /** Construct with an array of ids to process |
59 | | * |
60 | | * @param n number of ids to store |
61 | | * @param ids elements to store. The pointer should remain valid during |
62 | | * IDSelectorArray's lifetime |
63 | | */ |
64 | | IDSelectorArray(size_t n, const idx_t* ids); |
65 | | bool is_member(idx_t id) const final; |
66 | 0 | ~IDSelectorArray() override {} |
67 | | }; |
68 | | |
69 | | /** Ids from a set. |
70 | | * |
71 | | * Repetitions of ids in the indices set passed to the constructor does not hurt |
72 | | * performance. |
73 | | * |
74 | | * The hash function used for the bloom filter and GCC's implementation of |
75 | | * unordered_set are just the least significant bits of the id. This works fine |
76 | | * for random ids or ids in sequences but will produce many hash collisions if |
77 | | * lsb's are always the same |
78 | | */ |
79 | | struct IDSelectorBatch : IDSelector { |
80 | | std::unordered_set<idx_t> set; |
81 | | |
82 | | // Bloom filter to avoid accessing the unordered set if it is unlikely |
83 | | // to be true |
84 | | std::vector<uint8_t> bloom; |
85 | | int nbits; |
86 | | idx_t mask; |
87 | | |
88 | | /** Construct with an array of ids to process |
89 | | * |
90 | | * @param n number of ids to store |
91 | | * @param ids elements to store. The pointer can be released after |
92 | | * construction |
93 | | */ |
94 | | IDSelectorBatch(size_t n, const idx_t* indices); |
95 | | bool is_member(idx_t id) const final; |
96 | 0 | ~IDSelectorBatch() override {} |
97 | | }; |
98 | | |
99 | | /** One bit per element. Constructed with a bitmap, size ceil(n / 8). |
100 | | */ |
101 | | struct IDSelectorBitmap : IDSelector { |
102 | | size_t n; |
103 | | const uint8_t* bitmap; |
104 | | |
105 | | /** Construct with a binary mask |
106 | | * |
107 | | * @param n size of the bitmap array |
108 | | * @param bitmap id will be selected iff id / 8 < n and bit number |
109 | | * (i%8) of bitmap[floor(i / 8)] is 1. |
110 | | */ |
111 | | IDSelectorBitmap(size_t n, const uint8_t* bitmap); |
112 | | bool is_member(idx_t id) const final; |
113 | 0 | ~IDSelectorBitmap() override {} |
114 | | }; |
115 | | |
116 | | /** reverts the membership test of another selector */ |
117 | | struct IDSelectorNot : IDSelector { |
118 | | const IDSelector* sel; |
119 | 0 | IDSelectorNot(const IDSelector* sel) : sel(sel) {} |
120 | 0 | bool is_member(idx_t id) const final { |
121 | 0 | return !sel->is_member(id); |
122 | 0 | } |
123 | 0 | virtual ~IDSelectorNot() {} |
124 | | }; |
125 | | |
126 | | /// selects all entries (useful for benchmarking) |
127 | | struct IDSelectorAll : IDSelector { |
128 | 0 | bool is_member(idx_t id) const final { |
129 | 0 | return true; |
130 | 0 | } |
131 | 0 | virtual ~IDSelectorAll() {} |
132 | | }; |
133 | | |
134 | | /// does an AND operation on the the two given IDSelector's is_membership |
135 | | /// results. |
136 | | struct IDSelectorAnd : IDSelector { |
137 | | const IDSelector* lhs; |
138 | | const IDSelector* rhs; |
139 | | IDSelectorAnd(const IDSelector* lhs, const IDSelector* rhs) |
140 | 0 | : lhs(lhs), rhs(rhs) {} |
141 | 0 | bool is_member(idx_t id) const final { |
142 | 0 | return lhs->is_member(id) && rhs->is_member(id); |
143 | 0 | } |
144 | 0 | virtual ~IDSelectorAnd() {} |
145 | | }; |
146 | | |
147 | | /// does an OR operation on the the two given IDSelector's is_membership |
148 | | /// results. |
149 | | struct IDSelectorOr : IDSelector { |
150 | | const IDSelector* lhs; |
151 | | const IDSelector* rhs; |
152 | | IDSelectorOr(const IDSelector* lhs, const IDSelector* rhs) |
153 | 0 | : lhs(lhs), rhs(rhs) {} |
154 | 0 | bool is_member(idx_t id) const final { |
155 | 0 | return lhs->is_member(id) || rhs->is_member(id); |
156 | 0 | } |
157 | 0 | virtual ~IDSelectorOr() {} |
158 | | }; |
159 | | |
160 | | /// does an XOR operation on the the two given IDSelector's is_membership |
161 | | /// results. |
162 | | struct IDSelectorXOr : IDSelector { |
163 | | const IDSelector* lhs; |
164 | | const IDSelector* rhs; |
165 | | IDSelectorXOr(const IDSelector* lhs, const IDSelector* rhs) |
166 | 0 | : lhs(lhs), rhs(rhs) {} |
167 | 0 | bool is_member(idx_t id) const final { |
168 | 0 | return lhs->is_member(id) ^ rhs->is_member(id); |
169 | 0 | } |
170 | 0 | virtual ~IDSelectorXOr() {} |
171 | | }; |
172 | | |
173 | | } // namespace faiss |