Coverage Report

Created: 2026-03-17 18:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
contrib/faiss/faiss/invlists/InvertedLists.h
Line
Count
Source
1
/*
2
 * Copyright (c) Meta Platforms, Inc. and affiliates.
3
 *
4
 * This source code is licensed under the MIT license found in the
5
 * LICENSE file in the root directory of this source tree.
6
 */
7
8
// -*- c++ -*-
9
10
#ifndef FAISS_INVERTEDLISTS_IVF_H
11
#define FAISS_INVERTEDLISTS_IVF_H
12
13
/**
14
 * Definition of inverted lists + a few common classes that implement
15
 * the interface.
16
 */
17
18
#include <vector>
19
20
#include <faiss/MetricType.h>
21
#include <faiss/impl/maybe_owned_vector.h>
22
23
namespace faiss {
24
25
struct InvertedListsIterator {
26
    virtual ~InvertedListsIterator();
27
    virtual bool is_available() const = 0;
28
    virtual void next() = 0;
29
    virtual std::pair<idx_t, const uint8_t*> get_id_and_codes() = 0;
30
};
31
32
/** Table of inverted lists
33
 * multithreading rules:
34
 * - concurrent read accesses are allowed
35
 * - concurrent update accesses are allowed
36
 * - for resize and add_entries, only concurrent access to different lists
37
 *   are allowed
38
 */
39
struct InvertedLists {
40
    size_t nlist;     ///< number of possible key values
41
    size_t code_size; ///< code size per vector in bytes
42
43
    /// request to use iterator rather than get_codes / get_ids
44
    bool use_iterator = false;
45
46
    InvertedLists(size_t nlist, size_t code_size);
47
48
    virtual ~InvertedLists();
49
50
    /// used for BlockInvertedLists, where the codes are packed into groups
51
    /// and the individual code size is meaningless
52
    static const size_t INVALID_CODE_SIZE = static_cast<size_t>(-1);
53
54
    /*************************
55
     *  Read only functions */
56
57
    /// get the size of a list
58
    virtual size_t list_size(size_t list_no) const = 0;
59
60
    /** get the codes for an inverted list
61
     * must be released by release_codes
62
     *
63
     * @return codes    size list_size * code_size
64
     */
65
    virtual const uint8_t* get_codes(size_t list_no) const = 0;
66
67
    /** get the ids for an inverted list
68
     * must be released by release_ids
69
     *
70
     * @return ids      size list_size
71
     */
72
    virtual const idx_t* get_ids(size_t list_no) const = 0;
73
74
    /// release codes returned by get_codes (default implementation is nop
75
    virtual void release_codes(size_t list_no, const uint8_t* codes) const;
76
77
    /// release ids returned by get_ids
78
    virtual void release_ids(size_t list_no, const idx_t* ids) const;
79
80
    /// @return a single id in an inverted list
81
    virtual idx_t get_single_id(size_t list_no, size_t offset) const;
82
83
    /// @return a single code in an inverted list
84
    /// (should be deallocated with release_codes)
85
    virtual const uint8_t* get_single_code(size_t list_no, size_t offset) const;
86
87
    /// prepare the following lists (default does nothing)
88
    /// a list can be -1 hence the signed long
89
    virtual void prefetch_lists(const idx_t* list_nos, int nlist) const;
90
91
    /*****************************************
92
     * Iterator interface (with context)     */
93
94
    /// check if the list is empty
95
    virtual bool is_empty(size_t list_no, void* inverted_list_context = nullptr)
96
            const;
97
98
    /// get iterable for lists that use_iterator
99
    virtual InvertedListsIterator* get_iterator(
100
            size_t list_no,
101
            void* inverted_list_context = nullptr) const;
102
103
    /*************************
104
     * writing functions     */
105
106
    /// add one entry to an inverted list
107
    virtual size_t add_entry(
108
            size_t list_no,
109
            idx_t theid,
110
            const uint8_t* code,
111
            void* inverted_list_context = nullptr);
112
113
    virtual size_t add_entries(
114
            size_t list_no,
115
            size_t n_entry,
116
            const idx_t* ids,
117
            const uint8_t* code) = 0;
118
119
    virtual void update_entry(
120
            size_t list_no,
121
            size_t offset,
122
            idx_t id,
123
            const uint8_t* code);
124
125
    virtual void update_entries(
126
            size_t list_no,
127
            size_t offset,
128
            size_t n_entry,
129
            const idx_t* ids,
130
            const uint8_t* code) = 0;
131
132
    virtual void resize(size_t list_no, size_t new_size) = 0;
133
134
    virtual void reset();
135
136
    /*************************
137
     * high level functions  */
138
139
    /// move all entries from oivf (empty on output)
140
    void merge_from(InvertedLists* oivf, size_t add_id);
141
142
    // how to copy a subset of elements from the inverted lists
143
    // This depends on two integers, a1 and a2.
144
    enum subset_type_t : int {
145
        // depends on IDs
146
        SUBSET_TYPE_ID_RANGE = 0, // copies ids in [a1, a2)
147
        SUBSET_TYPE_ID_MOD = 1,   // copies ids if id % a1 == a2
148
        // depends on order within invlists
149
        SUBSET_TYPE_ELEMENT_RANGE =
150
                2, // copies fractions of invlists so that a1 elements are left
151
                   // before and a2 after
152
        SUBSET_TYPE_INVLIST_FRACTION =
153
                3, // take fraction a2 out of a1 from each invlist, 0 <= a2 < a1
154
        // copy only inverted lists a1:a2
155
        SUBSET_TYPE_INVLIST = 4
156
    };
157
158
    /** copy a subset of the entries index to the other index
159
     * @return number of entries copied
160
     */
161
    size_t copy_subset_to(
162
            InvertedLists& other,
163
            subset_type_t subset_type,
164
            idx_t a1,
165
            idx_t a2) const;
166
167
    /*************************
168
     * statistics            */
169
170
    /// 1= perfectly balanced, >1: imbalanced
171
    double imbalance_factor() const;
172
173
    /// display some stats about the inverted lists
174
    void print_stats() const;
175
176
    /// sum up list sizes
177
    size_t compute_ntotal() const;
178
179
    /**************************************
180
     * Scoped inverted lists (for automatic deallocation)
181
     *
182
     * instead of writing:
183
     *
184
     *     uint8_t * codes = invlists->get_codes (10);
185
     *     ... use codes
186
     *     invlists->release_codes(10, codes)
187
     *
188
     * write:
189
     *
190
     *    ScopedCodes codes (invlists, 10);
191
     *    ... use codes.get()
192
     *    // release called automatically when codes goes out of scope
193
     *
194
     * the following function call also works:
195
     *
196
     *    foo (123, ScopedCodes (invlists, 10).get(), 456);
197
     *
198
     */
199
200
    struct ScopedIds {
201
        const InvertedLists* il;
202
        const idx_t* ids;
203
        size_t list_no;
204
205
        ScopedIds(const InvertedLists* il, size_t list_no)
206
122
                : il(il), ids(il->get_ids(list_no)), list_no(list_no) {}
207
208
122
        const idx_t* get() {
209
122
            return ids;
210
122
        }
211
212
0
        idx_t operator[](size_t i) const {
213
0
            return ids[i];
214
0
        }
215
216
122
        ~ScopedIds() {
217
122
            il->release_ids(list_no, ids);
218
122
        }
219
    };
220
221
    struct ScopedCodes {
222
        const InvertedLists* il;
223
        const uint8_t* codes;
224
        size_t list_no;
225
226
        ScopedCodes(const InvertedLists* il, size_t list_no)
227
122
                : il(il), codes(il->get_codes(list_no)), list_no(list_no) {}
228
229
        ScopedCodes(const InvertedLists* il, size_t list_no, size_t offset)
230
0
                : il(il),
231
0
                  codes(il->get_single_code(list_no, offset)),
232
0
                  list_no(list_no) {}
233
234
122
        const uint8_t* get() {
235
122
            return codes;
236
122
        }
237
238
122
        ~ScopedCodes() {
239
122
            il->release_codes(list_no, codes);
240
122
        }
241
    };
242
};
243
244
/// simple (default) implementation as an array of inverted lists
245
struct ArrayInvertedLists : InvertedLists {
246
    std::vector<MaybeOwnedVector<uint8_t>> codes; // binary codes, size nlist
247
    std::vector<MaybeOwnedVector<idx_t>> ids; ///< Inverted lists for indexes
248
249
    ArrayInvertedLists(size_t nlist, size_t code_size);
250
251
    size_t list_size(size_t list_no) const override;
252
    const uint8_t* get_codes(size_t list_no) const override;
253
    const idx_t* get_ids(size_t list_no) const override;
254
255
    size_t add_entries(
256
            size_t list_no,
257
            size_t n_entry,
258
            const idx_t* ids,
259
            const uint8_t* code) override;
260
261
    void update_entries(
262
            size_t list_no,
263
            size_t offset,
264
            size_t n_entry,
265
            const idx_t* ids,
266
            const uint8_t* code) override;
267
268
    void resize(size_t list_no, size_t new_size) override;
269
270
    /// permute the inverted lists, map maps new_id to old_id
271
    void permute_invlists(const idx_t* map);
272
273
    bool is_empty(size_t list_no, void* inverted_list_context = nullptr)
274
            const override;
275
276
    ~ArrayInvertedLists() override;
277
};
278
279
/*****************************************************************
280
 * Meta-inverted lists
281
 *
282
 * About terminology: the inverted lists are seen as a sparse matrix,
283
 * that can be stacked horizontally, vertically and sliced.
284
 *****************************************************************/
285
286
/// invlists that fail for all write functions
287
struct ReadOnlyInvertedLists : InvertedLists {
288
    ReadOnlyInvertedLists(size_t nlist, size_t code_size)
289
0
            : InvertedLists(nlist, code_size) {}
290
291
    size_t add_entries(
292
            size_t list_no,
293
            size_t n_entry,
294
            const idx_t* ids,
295
            const uint8_t* code) override;
296
297
    void update_entries(
298
            size_t list_no,
299
            size_t offset,
300
            size_t n_entry,
301
            const idx_t* ids,
302
            const uint8_t* code) override;
303
304
    void resize(size_t list_no, size_t new_size) override;
305
};
306
307
/// Horizontal stack of inverted lists
308
struct HStackInvertedLists : ReadOnlyInvertedLists {
309
    std::vector<const InvertedLists*> ils;
310
311
    /// build InvertedLists by concatenating nil of them
312
    HStackInvertedLists(int nil, const InvertedLists** ils);
313
314
    size_t list_size(size_t list_no) const override;
315
    const uint8_t* get_codes(size_t list_no) const override;
316
    const idx_t* get_ids(size_t list_no) const override;
317
318
    void prefetch_lists(const idx_t* list_nos, int nlist) const override;
319
320
    void release_codes(size_t list_no, const uint8_t* codes) const override;
321
    void release_ids(size_t list_no, const idx_t* ids) const override;
322
323
    idx_t get_single_id(size_t list_no, size_t offset) const override;
324
325
    const uint8_t* get_single_code(size_t list_no, size_t offset)
326
            const override;
327
};
328
329
using ConcatenatedInvertedLists = HStackInvertedLists;
330
331
/// vertical slice of indexes in another InvertedLists
332
struct SliceInvertedLists : ReadOnlyInvertedLists {
333
    const InvertedLists* il;
334
    idx_t i0, i1;
335
336
    SliceInvertedLists(const InvertedLists* il, idx_t i0, idx_t i1);
337
338
    size_t list_size(size_t list_no) const override;
339
    const uint8_t* get_codes(size_t list_no) const override;
340
    const idx_t* get_ids(size_t list_no) const override;
341
342
    void release_codes(size_t list_no, const uint8_t* codes) const override;
343
    void release_ids(size_t list_no, const idx_t* ids) const override;
344
345
    idx_t get_single_id(size_t list_no, size_t offset) const override;
346
347
    const uint8_t* get_single_code(size_t list_no, size_t offset)
348
            const override;
349
350
    void prefetch_lists(const idx_t* list_nos, int nlist) const override;
351
};
352
353
struct VStackInvertedLists : ReadOnlyInvertedLists {
354
    std::vector<const InvertedLists*> ils;
355
    std::vector<idx_t> cumsz;
356
357
    /// build InvertedLists by concatenating nil of them
358
    VStackInvertedLists(int nil, const InvertedLists** ils);
359
360
    size_t list_size(size_t list_no) const override;
361
    const uint8_t* get_codes(size_t list_no) const override;
362
    const idx_t* get_ids(size_t list_no) const override;
363
364
    void release_codes(size_t list_no, const uint8_t* codes) const override;
365
    void release_ids(size_t list_no, const idx_t* ids) const override;
366
367
    idx_t get_single_id(size_t list_no, size_t offset) const override;
368
369
    const uint8_t* get_single_code(size_t list_no, size_t offset)
370
            const override;
371
372
    void prefetch_lists(const idx_t* list_nos, int nlist) const override;
373
};
374
375
/** use the first inverted lists if they are non-empty otherwise use the second
376
 *
377
 * This is useful if il1 has a few inverted lists that are too long,
378
 * and that il0 has replacement lists for those, with empty lists for
379
 * the others. */
380
struct MaskedInvertedLists : ReadOnlyInvertedLists {
381
    const InvertedLists* il0;
382
    const InvertedLists* il1;
383
384
    MaskedInvertedLists(const InvertedLists* il0, const InvertedLists* il1);
385
386
    size_t list_size(size_t list_no) const override;
387
    const uint8_t* get_codes(size_t list_no) const override;
388
    const idx_t* get_ids(size_t list_no) const override;
389
390
    void release_codes(size_t list_no, const uint8_t* codes) const override;
391
    void release_ids(size_t list_no, const idx_t* ids) const override;
392
393
    idx_t get_single_id(size_t list_no, size_t offset) const override;
394
395
    const uint8_t* get_single_code(size_t list_no, size_t offset)
396
            const override;
397
398
    void prefetch_lists(const idx_t* list_nos, int nlist) const override;
399
};
400
401
/** if the inverted list in il is smaller than maxsize then return it,
402
 *  otherwise return an empty invlist */
403
struct StopWordsInvertedLists : ReadOnlyInvertedLists {
404
    const InvertedLists* il0;
405
    size_t maxsize;
406
407
    StopWordsInvertedLists(const InvertedLists* il, size_t maxsize);
408
409
    size_t list_size(size_t list_no) const override;
410
    const uint8_t* get_codes(size_t list_no) const override;
411
    const idx_t* get_ids(size_t list_no) const override;
412
413
    void release_codes(size_t list_no, const uint8_t* codes) const override;
414
    void release_ids(size_t list_no, const idx_t* ids) const override;
415
416
    idx_t get_single_id(size_t list_no, size_t offset) const override;
417
418
    const uint8_t* get_single_code(size_t list_no, size_t offset)
419
            const override;
420
421
    void prefetch_lists(const idx_t* list_nos, int nlist) const override;
422
};
423
424
} // namespace faiss
425
426
#endif