Coverage Report

Created: 2026-04-02 18:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
contrib/faiss/faiss/invlists/OnDiskInvertedLists.cpp
Line
Count
Source
1
/*
2
 * Copyright (c) Meta Platforms, Inc. and affiliates.
3
 *
4
 * This source code is licensed under the MIT license found in the
5
 * LICENSE file in the root directory of this source tree.
6
 */
7
8
// -*- c++ -*-
9
10
#include <faiss/invlists/OnDiskInvertedLists.h>
11
12
#include <pthread.h>
13
14
#include <unordered_set>
15
16
#include <sys/mman.h>
17
#include <sys/stat.h>
18
#include <unistd.h>
19
20
#include <faiss/impl/FaissAssert.h>
21
#include <faiss/utils/utils.h>
22
23
#include <faiss/impl/io.h>
24
#include <faiss/impl/io_macros.h>
25
26
namespace faiss {
27
28
/**********************************************
29
 * LockLevels
30
 **********************************************/
31
32
struct LockLevels {
33
    /* There n times lock1(n), one lock2 and one lock3
34
     * Invariants:
35
     *    a single thread can hold one lock1(n) for some n
36
     *    a single thread can hold lock2, if it holds lock1(n) for some n
37
     *    a single thread can hold lock3, if it holds lock1(n) for some n
38
     *       AND lock2 AND no other thread holds lock1(m) for m != n
39
     */
40
    pthread_mutex_t mutex1;
41
    pthread_cond_t level1_cv;
42
    pthread_cond_t level2_cv;
43
    pthread_cond_t level3_cv;
44
45
    std::unordered_set<int> level1_holders; // which level1 locks are held
46
    int n_level2;                           // nb threads that wait on level2
47
    bool level3_in_use;                     // a threads waits on level3
48
    bool level2_in_use;
49
50
0
    LockLevels() {
51
0
        pthread_mutex_init(&mutex1, nullptr);
52
0
        pthread_cond_init(&level1_cv, nullptr);
53
0
        pthread_cond_init(&level2_cv, nullptr);
54
0
        pthread_cond_init(&level3_cv, nullptr);
55
0
        n_level2 = 0;
56
0
        level2_in_use = false;
57
0
        level3_in_use = false;
58
0
    }
59
60
0
    ~LockLevels() {
61
0
        pthread_cond_destroy(&level1_cv);
62
0
        pthread_cond_destroy(&level2_cv);
63
0
        pthread_cond_destroy(&level3_cv);
64
0
        pthread_mutex_destroy(&mutex1);
65
0
    }
66
67
0
    void lock_1(int no) {
68
0
        pthread_mutex_lock(&mutex1);
69
0
        while (level3_in_use || level1_holders.count(no) > 0) {
70
0
            pthread_cond_wait(&level1_cv, &mutex1);
71
0
        }
72
0
        level1_holders.insert(no);
73
0
        pthread_mutex_unlock(&mutex1);
74
0
    }
75
76
0
    void unlock_1(int no) {
77
0
        pthread_mutex_lock(&mutex1);
78
0
        assert(level1_holders.count(no) == 1);
79
0
        level1_holders.erase(no);
80
0
        if (level3_in_use) { // a writer is waiting
81
0
            pthread_cond_signal(&level3_cv);
82
0
        } else {
83
0
            pthread_cond_broadcast(&level1_cv);
84
0
        }
85
0
        pthread_mutex_unlock(&mutex1);
86
0
    }
87
88
0
    void lock_2() {
89
0
        pthread_mutex_lock(&mutex1);
90
0
        n_level2++;
91
0
        if (level3_in_use) { // tell waiting level3 that we are blocked
92
0
            pthread_cond_signal(&level3_cv);
93
0
        }
94
0
        while (level2_in_use) {
95
0
            pthread_cond_wait(&level2_cv, &mutex1);
96
0
        }
97
0
        level2_in_use = true;
98
0
        pthread_mutex_unlock(&mutex1);
99
0
    }
100
101
0
    void unlock_2() {
102
0
        pthread_mutex_lock(&mutex1);
103
0
        level2_in_use = false;
104
0
        n_level2--;
105
0
        pthread_cond_signal(&level2_cv);
106
0
        pthread_mutex_unlock(&mutex1);
107
0
    }
108
109
0
    void lock_3() {
110
0
        pthread_mutex_lock(&mutex1);
111
0
        level3_in_use = true;
112
        // wait until there are no level1 holders anymore except the
113
        // ones that are waiting on level2 (we are holding lock2)
114
0
        while (level1_holders.size() > n_level2) {
115
0
            pthread_cond_wait(&level3_cv, &mutex1);
116
0
        }
117
        // don't release the lock!
118
0
    }
119
120
0
    void unlock_3() {
121
0
        level3_in_use = false;
122
        // wake up all level1_holders
123
0
        pthread_cond_broadcast(&level1_cv);
124
0
        pthread_mutex_unlock(&mutex1);
125
0
    }
126
127
0
    void print() {
128
0
        pthread_mutex_lock(&mutex1);
129
0
        printf("State: level3_in_use=%d n_level2=%d level1_holders: [",
130
0
               int(level3_in_use),
131
0
               n_level2);
132
0
        for (int k : level1_holders) {
133
0
            printf("%d ", k);
134
0
        }
135
0
        printf("]\n");
136
0
        pthread_mutex_unlock(&mutex1);
137
0
    }
138
};
139
140
/**********************************************
141
 * OngoingPrefetch
142
 **********************************************/
143
144
struct OnDiskInvertedLists::OngoingPrefetch {
145
    struct Thread {
146
        pthread_t pth;
147
        OngoingPrefetch* pf;
148
149
0
        bool one_list() {
150
0
            idx_t list_no = pf->get_next_list();
151
0
            if (list_no == -1)
152
0
                return false;
153
0
            const OnDiskInvertedLists* od = pf->od;
154
0
            od->locks->lock_1(list_no);
155
0
            size_t n = od->list_size(list_no);
156
0
            const idx_t* idx = od->get_ids(list_no);
157
0
            const uint8_t* codes = od->get_codes(list_no);
158
0
            int cs = 0;
159
0
            for (size_t i = 0; i < n; i++) {
160
0
                cs += idx[i];
161
0
            }
162
0
            const idx_t* codes8 = (const idx_t*)codes;
163
0
            idx_t n8 = n * od->code_size / 8;
164
165
0
            for (size_t i = 0; i < n8; i++) {
166
0
                cs += codes8[i];
167
0
            }
168
0
            od->locks->unlock_1(list_no);
169
170
0
            global_cs += cs & 1;
171
0
            return true;
172
0
        }
173
    };
174
175
    std::vector<Thread> threads;
176
177
    pthread_mutex_t list_ids_mutex;
178
    std::vector<idx_t> list_ids;
179
    int cur_list;
180
181
    // mutex for the list of tasks
182
    pthread_mutex_t mutex;
183
184
    // pretext to avoid code below to be optimized out
185
    static int global_cs;
186
187
    const OnDiskInvertedLists* od;
188
189
0
    explicit OngoingPrefetch(const OnDiskInvertedLists* od) : od(od) {
190
0
        pthread_mutex_init(&mutex, nullptr);
191
0
        pthread_mutex_init(&list_ids_mutex, nullptr);
192
0
        cur_list = 0;
193
0
    }
194
195
0
    static void* prefetch_list(void* arg) {
196
0
        Thread* th = static_cast<Thread*>(arg);
197
198
0
        while (th->one_list())
199
0
            ;
200
201
0
        return nullptr;
202
0
    }
203
204
0
    idx_t get_next_list() {
205
0
        idx_t list_no = -1;
206
0
        pthread_mutex_lock(&list_ids_mutex);
207
0
        if (cur_list >= 0 && cur_list < list_ids.size()) {
208
0
            list_no = list_ids[cur_list++];
209
0
        }
210
0
        pthread_mutex_unlock(&list_ids_mutex);
211
0
        return list_no;
212
0
    }
213
214
0
    void prefetch_lists(const idx_t* list_nos, int n) {
215
0
        pthread_mutex_lock(&mutex);
216
0
        pthread_mutex_lock(&list_ids_mutex);
217
0
        list_ids.clear();
218
0
        pthread_mutex_unlock(&list_ids_mutex);
219
0
        for (auto& th : threads) {
220
0
            pthread_join(th.pth, nullptr);
221
0
        }
222
223
0
        threads.resize(0);
224
0
        cur_list = 0;
225
0
        int nt = std::min(n, od->prefetch_nthread);
226
227
0
        if (nt > 0) {
228
            // prepare tasks
229
0
            for (int i = 0; i < n; i++) {
230
0
                idx_t list_no = list_nos[i];
231
0
                if (list_no >= 0 && od->list_size(list_no) > 0) {
232
0
                    list_ids.push_back(list_no);
233
0
                }
234
0
            }
235
            // prepare threads
236
0
            threads.resize(nt);
237
0
            for (Thread& th : threads) {
238
0
                th.pf = this;
239
0
                pthread_create(&th.pth, nullptr, prefetch_list, &th);
240
0
            }
241
0
        }
242
0
        pthread_mutex_unlock(&mutex);
243
0
    }
244
245
0
    ~OngoingPrefetch() {
246
0
        pthread_mutex_lock(&mutex);
247
0
        for (auto& th : threads) {
248
0
            pthread_join(th.pth, nullptr);
249
0
        }
250
0
        pthread_mutex_unlock(&mutex);
251
0
        pthread_mutex_destroy(&mutex);
252
0
        pthread_mutex_destroy(&list_ids_mutex);
253
0
    }
254
};
255
256
int OnDiskInvertedLists::OngoingPrefetch::global_cs = 0;
257
258
0
void OnDiskInvertedLists::prefetch_lists(const idx_t* list_nos, int n) const {
259
0
    pf->prefetch_lists(list_nos, n);
260
0
}
261
262
/**********************************************
263
 * OnDiskInvertedLists: mmapping
264
 **********************************************/
265
266
0
void OnDiskInvertedLists::do_mmap() {
267
0
    const char* rw_flags = read_only ? "r" : "r+";
268
0
    int prot = read_only ? PROT_READ : PROT_WRITE | PROT_READ;
269
0
    FILE* f = fopen(filename.c_str(), rw_flags);
270
0
    FAISS_THROW_IF_NOT_FMT(
271
0
            f,
272
0
            "could not open %s in mode %s: %s",
273
0
            filename.c_str(),
274
0
            rw_flags,
275
0
            strerror(errno));
276
277
0
    uint8_t* ptro =
278
0
            (uint8_t*)mmap(nullptr, totsize, prot, MAP_SHARED, fileno(f), 0);
279
280
0
    fclose(f);
281
282
0
    FAISS_THROW_IF_NOT_FMT(
283
0
            ptro != MAP_FAILED,
284
0
            "could not mmap %s: %s",
285
0
            filename.c_str(),
286
0
            strerror(errno));
287
0
    ptr = ptro;
288
0
}
289
290
0
void OnDiskInvertedLists::update_totsize(size_t new_size) {
291
    // unmap file
292
0
    if (ptr != nullptr) {
293
0
        int err = munmap(ptr, totsize);
294
0
        FAISS_THROW_IF_NOT_FMT(err == 0, "munmap error: %s", strerror(errno));
295
0
    }
296
0
    if (totsize == 0) {
297
        // must create file before truncating it
298
0
        FILE* f = fopen(filename.c_str(), "w");
299
0
        FAISS_THROW_IF_NOT_FMT(
300
0
                f,
301
0
                "could not open %s in mode W: %s",
302
0
                filename.c_str(),
303
0
                strerror(errno));
304
0
        fclose(f);
305
0
    }
306
307
0
    if (new_size > totsize) {
308
0
        if (!slots.empty() &&
309
0
            slots.back().offset + slots.back().capacity == totsize) {
310
0
            slots.back().capacity += new_size - totsize;
311
0
        } else {
312
0
            slots.push_back(Slot(totsize, new_size - totsize));
313
0
        }
314
0
    } else {
315
0
        assert(!"not implemented");
316
0
    }
317
318
0
    totsize = new_size;
319
320
    // create file
321
0
    printf("resizing %s to %zd bytes\n", filename.c_str(), totsize);
322
323
0
    int err = truncate(filename.c_str(), totsize);
324
325
0
    FAISS_THROW_IF_NOT_FMT(
326
0
            err == 0,
327
0
            "truncate %s to %ld: %s",
328
0
            filename.c_str(),
329
0
            totsize,
330
0
            strerror(errno));
331
0
    do_mmap();
332
0
}
333
334
/**********************************************
335
 * OnDiskInvertedLists
336
 **********************************************/
337
338
0
#define INVALID_OFFSET (size_t)(-1)
339
340
0
OnDiskOneList::OnDiskOneList() : size(0), capacity(0), offset(INVALID_OFFSET) {}
341
342
OnDiskInvertedLists::Slot::Slot(size_t offset, size_t capacity)
343
0
        : offset(offset), capacity(capacity) {}
344
345
0
OnDiskInvertedLists::Slot::Slot() : offset(0), capacity(0) {}
346
347
OnDiskInvertedLists::OnDiskInvertedLists(
348
        size_t nlist,
349
        size_t code_size,
350
        const char* filename)
351
0
        : InvertedLists(nlist, code_size),
352
0
          filename(filename),
353
0
          totsize(0),
354
0
          ptr(nullptr),
355
0
          read_only(false),
356
0
          locks(new LockLevels()),
357
0
          pf(new OngoingPrefetch(this)),
358
0
          prefetch_nthread(32) {
359
0
    lists.resize(nlist);
360
361
    // slots starts empty
362
0
}
363
364
0
OnDiskInvertedLists::OnDiskInvertedLists() : OnDiskInvertedLists(0, 0, "") {}
365
366
0
OnDiskInvertedLists::~OnDiskInvertedLists() {
367
0
    delete pf;
368
369
    // unmap all lists
370
0
    if (ptr != nullptr) {
371
0
        int err = munmap(ptr, totsize);
372
0
        if (err != 0) {
373
0
            fprintf(stderr, "mumap error: %s", strerror(errno));
374
0
        }
375
0
    }
376
0
    delete locks;
377
0
}
378
379
0
size_t OnDiskInvertedLists::list_size(size_t list_no) const {
380
0
    return lists[list_no].size;
381
0
}
382
383
0
const uint8_t* OnDiskInvertedLists::get_codes(size_t list_no) const {
384
0
    if (lists[list_no].offset == INVALID_OFFSET) {
385
0
        return nullptr;
386
0
    }
387
388
0
    return ptr + lists[list_no].offset;
389
0
}
390
391
0
const idx_t* OnDiskInvertedLists::get_ids(size_t list_no) const {
392
0
    if (lists[list_no].offset == INVALID_OFFSET) {
393
0
        return nullptr;
394
0
    }
395
396
0
    return (const idx_t*)(ptr + lists[list_no].offset +
397
0
                          code_size * lists[list_no].capacity);
398
0
}
399
400
void OnDiskInvertedLists::update_entries(
401
        size_t list_no,
402
        size_t offset,
403
        size_t n_entry,
404
        const idx_t* ids_in,
405
0
        const uint8_t* codes_in) {
406
0
    FAISS_THROW_IF_NOT(!read_only);
407
0
    if (n_entry == 0)
408
0
        return;
409
0
    [[maybe_unused]] const List& l = lists[list_no];
410
0
    assert(n_entry + offset <= l.size);
411
0
    idx_t* ids = const_cast<idx_t*>(get_ids(list_no));
412
0
    memcpy(ids + offset, ids_in, sizeof(ids_in[0]) * n_entry);
413
0
    uint8_t* codes = const_cast<uint8_t*>(get_codes(list_no));
414
0
    memcpy(codes + offset * code_size, codes_in, code_size * n_entry);
415
0
}
416
417
size_t OnDiskInvertedLists::add_entries(
418
        size_t list_no,
419
        size_t n_entry,
420
        const idx_t* ids,
421
0
        const uint8_t* code) {
422
0
    FAISS_THROW_IF_NOT(!read_only);
423
0
    locks->lock_1(list_no);
424
0
    size_t o = list_size(list_no);
425
0
    resize_locked(list_no, n_entry + o);
426
0
    update_entries(list_no, o, n_entry, ids, code);
427
0
    locks->unlock_1(list_no);
428
0
    return o;
429
0
}
430
431
0
void OnDiskInvertedLists::resize(size_t list_no, size_t new_size) {
432
0
    FAISS_THROW_IF_NOT(!read_only);
433
0
    locks->lock_1(list_no);
434
0
    resize_locked(list_no, new_size);
435
0
    locks->unlock_1(list_no);
436
0
}
437
438
0
void OnDiskInvertedLists::resize_locked(size_t list_no, size_t new_size) {
439
0
    List& l = lists[list_no];
440
441
0
    if (new_size <= l.capacity && new_size > l.capacity / 2) {
442
0
        l.size = new_size;
443
0
        return;
444
0
    }
445
446
    // otherwise we release the current slot, and find a new one
447
448
0
    locks->lock_2();
449
0
    free_slot(l.offset, l.capacity);
450
451
0
    List new_l;
452
453
0
    if (new_size == 0) {
454
0
        new_l = List();
455
0
    } else {
456
0
        new_l.size = new_size;
457
0
        new_l.capacity = 1;
458
0
        while (new_l.capacity < new_size) {
459
0
            new_l.capacity *= 2;
460
0
        }
461
0
        new_l.offset =
462
0
                allocate_slot(new_l.capacity * (sizeof(idx_t) + code_size));
463
0
    }
464
465
    // copy common data
466
0
    if (l.offset != new_l.offset) {
467
0
        size_t n = std::min(new_size, l.size);
468
0
        if (n > 0) {
469
0
            memcpy(ptr + new_l.offset, get_codes(list_no), n * code_size);
470
0
            memcpy(ptr + new_l.offset + new_l.capacity * code_size,
471
0
                   get_ids(list_no),
472
0
                   n * sizeof(idx_t));
473
0
        }
474
0
    }
475
476
0
    lists[list_no] = new_l;
477
0
    locks->unlock_2();
478
0
}
479
480
0
size_t OnDiskInvertedLists::allocate_slot(size_t capacity) {
481
    // should hold lock2
482
483
0
    auto it = slots.begin();
484
0
    while (it != slots.end() && it->capacity < capacity) {
485
0
        it++;
486
0
    }
487
488
0
    if (it == slots.end()) {
489
        // not enough capacity
490
0
        size_t new_size = totsize == 0 ? 32 : totsize * 2;
491
0
        while (new_size - totsize < capacity) {
492
0
            new_size *= 2;
493
0
        }
494
0
        locks->lock_3();
495
0
        update_totsize(new_size);
496
0
        locks->unlock_3();
497
0
        it = slots.begin();
498
0
        while (it != slots.end() && it->capacity < capacity) {
499
0
            it++;
500
0
        }
501
0
        assert(it != slots.end());
502
0
    }
503
504
0
    size_t o = it->offset;
505
0
    if (it->capacity == capacity) {
506
0
        slots.erase(it);
507
0
    } else {
508
        // take from beginning of slot
509
0
        it->capacity -= capacity;
510
0
        it->offset += capacity;
511
0
    }
512
513
0
    return o;
514
0
}
515
516
0
void OnDiskInvertedLists::free_slot(size_t offset, size_t capacity) {
517
    // should hold lock2
518
0
    if (capacity == 0)
519
0
        return;
520
521
0
    auto it = slots.begin();
522
0
    while (it != slots.end() && it->offset <= offset) {
523
0
        it++;
524
0
    }
525
526
0
    size_t inf = ((size_t)1) << 60;
527
528
0
    size_t end_prev = inf;
529
0
    if (it != slots.begin()) {
530
0
        auto prev = it;
531
0
        prev--;
532
0
        end_prev = prev->offset + prev->capacity;
533
0
    }
534
535
0
    size_t begin_next = ((size_t)1) << 60;
536
0
    if (it != slots.end()) {
537
0
        begin_next = it->offset;
538
0
    }
539
540
0
    assert(end_prev == inf || offset >= end_prev);
541
0
    assert(offset + capacity <= begin_next);
542
543
0
    if (offset == end_prev) {
544
0
        auto prev = it;
545
0
        prev--;
546
0
        if (offset + capacity == begin_next) {
547
0
            prev->capacity += capacity + it->capacity;
548
0
            slots.erase(it);
549
0
        } else {
550
0
            prev->capacity += capacity;
551
0
        }
552
0
    } else {
553
0
        if (offset + capacity == begin_next) {
554
0
            it->offset -= capacity;
555
0
            it->capacity += capacity;
556
0
        } else {
557
0
            slots.insert(it, Slot(offset, capacity));
558
0
        }
559
0
    }
560
561
    // TODO shrink global storage if needed
562
0
}
563
564
/*****************************************
565
 * Compact form
566
 *****************************************/
567
size_t OnDiskInvertedLists::merge_from_multiple(
568
        const InvertedLists** ils,
569
        int n_il,
570
        bool shift_ids,
571
0
        bool verbose) {
572
0
    FAISS_THROW_IF_NOT_MSG(
573
0
            totsize == 0, "works only on an empty InvertedLists");
574
575
0
    std::vector<size_t> sizes(nlist);
576
0
    std::vector<size_t> shift_id_offsets(n_il);
577
0
    for (int i = 0; i < n_il; i++) {
578
0
        const InvertedLists* il = ils[i];
579
0
        FAISS_THROW_IF_NOT(il->nlist == nlist && il->code_size == code_size);
580
581
0
        for (size_t j = 0; j < nlist; j++) {
582
0
            sizes[j] += il->list_size(j);
583
0
        }
584
585
0
        size_t il_totsize = il->compute_ntotal();
586
0
        shift_id_offsets[i] =
587
0
                (shift_ids && i > 0) ? shift_id_offsets[i - 1] + il_totsize : 0;
588
0
    }
589
590
0
    size_t cums = 0;
591
0
    size_t ntotal = 0;
592
0
    for (size_t j = 0; j < nlist; j++) {
593
0
        ntotal += sizes[j];
594
0
        lists[j].size = 0;
595
0
        lists[j].capacity = sizes[j];
596
0
        lists[j].offset = cums;
597
0
        cums += lists[j].capacity * (sizeof(idx_t) + code_size);
598
0
    }
599
600
0
    update_totsize(cums);
601
602
0
    size_t nmerged = 0;
603
0
    double t0 = getmillisecs(), last_t = t0;
604
605
0
#pragma omp parallel for
606
0
    for (size_t j = 0; j < nlist; j++) {
607
0
        List& l = lists[j];
608
0
        for (int i = 0; i < n_il; i++) {
609
0
            const InvertedLists* il = ils[i];
610
0
            size_t n_entry = il->list_size(j);
611
0
            l.size += n_entry;
612
0
            ScopedIds scope_ids(il, j);
613
0
            const idx_t* scope_ids_data = scope_ids.get();
614
0
            std::vector<idx_t> new_ids;
615
0
            if (shift_ids) {
616
0
                new_ids.resize(n_entry);
617
0
                for (size_t k = 0; k < n_entry; k++) {
618
0
                    new_ids[k] = scope_ids[k] + shift_id_offsets[i];
619
0
                }
620
0
                scope_ids_data = new_ids.data();
621
0
            }
622
0
            update_entries(
623
0
                    j,
624
0
                    l.size - n_entry,
625
0
                    n_entry,
626
0
                    scope_ids_data,
627
0
                    ScopedCodes(il, j).get());
628
0
        }
629
0
        assert(l.size == l.capacity);
630
0
        if (verbose) {
631
0
#pragma omp critical
632
0
            {
633
0
                nmerged++;
634
0
                double t1 = getmillisecs();
635
0
                if (t1 - last_t > 500) {
636
0
                    printf("merged %zd lists in %.3f s\r",
637
0
                           nmerged,
638
0
                           (t1 - t0) / 1000.0);
639
0
                    fflush(stdout);
640
0
                    last_t = t1;
641
0
                }
642
0
            }
643
0
        }
644
0
    }
645
0
    if (verbose) {
646
0
        printf("\n");
647
0
    }
648
649
0
    return ntotal;
650
0
}
651
652
size_t OnDiskInvertedLists::merge_from_1(
653
        const InvertedLists* ils,
654
0
        bool verbose) {
655
0
    return merge_from_multiple(&ils, 1, verbose);
656
0
}
657
658
0
void OnDiskInvertedLists::crop_invlists(size_t l0, size_t l1) {
659
0
    FAISS_THROW_IF_NOT(0 <= l0 && l0 <= l1 && l1 <= nlist);
660
661
0
    std::vector<List> new_lists(l1 - l0);
662
0
    memcpy(new_lists.data(), &lists[l0], (l1 - l0) * sizeof(List));
663
664
0
    lists.swap(new_lists);
665
666
0
    nlist = l1 - l0;
667
0
}
668
669
0
void OnDiskInvertedLists::set_all_lists_sizes(const size_t* sizes) {
670
0
    size_t ofs = 0;
671
0
    for (size_t i = 0; i < nlist; i++) {
672
0
        lists[i].offset = ofs;
673
0
        lists[i].capacity = lists[i].size = sizes[i];
674
0
        ofs += sizes[i] * (sizeof(idx_t) + code_size);
675
0
    }
676
0
}
677
678
/*******************************************************
679
 * I/O support via callbacks
680
 *******************************************************/
681
682
OnDiskInvertedListsIOHook::OnDiskInvertedListsIOHook()
683
1
        : InvertedListsIOHook("ilod", typeid(OnDiskInvertedLists).name()) {}
684
685
void OnDiskInvertedListsIOHook::write(const InvertedLists* ils, IOWriter* f)
686
0
        const {
687
0
    uint32_t h = fourcc("ilod");
688
0
    WRITE1(h);
689
0
    WRITE1(ils->nlist);
690
0
    WRITE1(ils->code_size);
691
0
    const OnDiskInvertedLists* od =
692
0
            dynamic_cast<const OnDiskInvertedLists*>(ils);
693
    // this is a POD object
694
0
    WRITEVECTOR(od->lists);
695
696
0
    {
697
0
        std::vector<OnDiskInvertedLists::Slot> v(
698
0
                od->slots.begin(), od->slots.end());
699
0
        WRITEVECTOR(v);
700
0
    }
701
0
    {
702
0
        std::vector<char> x(od->filename.begin(), od->filename.end());
703
0
        WRITEVECTOR(x);
704
0
    }
705
0
    WRITE1(od->totsize);
706
0
}
707
708
InvertedLists* OnDiskInvertedListsIOHook::read(IOReader* f, int io_flags)
709
0
        const {
710
0
    OnDiskInvertedLists* od = new OnDiskInvertedLists();
711
0
    od->read_only = io_flags & IO_FLAG_READ_ONLY;
712
0
    READ1(od->nlist);
713
0
    READ1(od->code_size);
714
    // this is a POD object
715
0
    READVECTOR(od->lists);
716
0
    {
717
0
        std::vector<OnDiskInvertedLists::Slot> v;
718
0
        READVECTOR(v);
719
0
        od->slots.assign(v.begin(), v.end());
720
0
    }
721
0
    {
722
0
        std::vector<char> x;
723
0
        READVECTOR(x);
724
0
        od->filename.assign(x.begin(), x.end());
725
726
0
        if (io_flags & IO_FLAG_ONDISK_SAME_DIR) {
727
0
            FileIOReader* reader = dynamic_cast<FileIOReader*>(f);
728
0
            FAISS_THROW_IF_NOT_MSG(
729
0
                    reader,
730
0
                    "IO_FLAG_ONDISK_SAME_DIR only supported "
731
0
                    "when reading from file");
732
0
            std::string indexname = reader->name;
733
0
            std::string dirname = "./";
734
0
            size_t slash = indexname.find_last_of('/');
735
0
            if (slash != std::string::npos) {
736
0
                dirname = indexname.substr(0, slash + 1);
737
0
            }
738
0
            std::string filename = od->filename;
739
0
            slash = filename.find_last_of('/');
740
0
            if (slash != std::string::npos) {
741
0
                filename = filename.substr(slash + 1);
742
0
            }
743
0
            filename = dirname + filename;
744
0
            printf("IO_FLAG_ONDISK_SAME_DIR: "
745
0
                   "updating ondisk filename from %s to %s\n",
746
0
                   od->filename.c_str(),
747
0
                   filename.c_str());
748
0
            od->filename = filename;
749
0
        }
750
0
    }
751
0
    READ1(od->totsize);
752
0
    if (!(io_flags & IO_FLAG_SKIP_IVF_DATA)) {
753
0
        od->do_mmap();
754
0
    }
755
0
    return od;
756
0
}
757
758
/** read from a ArrayInvertedLists into this invertedlist type */
759
InvertedLists* OnDiskInvertedListsIOHook::read_ArrayInvertedLists(
760
        IOReader* f,
761
        int /* io_flags */,
762
        size_t nlist,
763
        size_t code_size,
764
0
        const std::vector<size_t>& sizes) const {
765
0
    auto ails = new OnDiskInvertedLists();
766
0
    ails->nlist = nlist;
767
0
    ails->code_size = code_size;
768
0
    ails->read_only = true;
769
0
    ails->lists.resize(nlist);
770
771
0
    FileIOReader* reader = dynamic_cast<FileIOReader*>(f);
772
0
    FAISS_THROW_IF_NOT_MSG(reader, "mmap only supported for File objects");
773
0
    FILE* fdesc = reader->f;
774
0
    size_t o0 = ftell(fdesc);
775
0
    size_t o = o0;
776
0
    { // do the mmap
777
0
        struct stat buf;
778
0
        int ret = fstat(fileno(fdesc), &buf);
779
0
        FAISS_THROW_IF_NOT_FMT(ret == 0, "fstat failed: %s", strerror(errno));
780
0
        ails->totsize = buf.st_size;
781
0
        ails->ptr = (uint8_t*)mmap(
782
0
                nullptr,
783
0
                ails->totsize,
784
0
                PROT_READ,
785
0
                MAP_SHARED,
786
0
                fileno(fdesc),
787
0
                0);
788
0
        FAISS_THROW_IF_NOT_FMT(
789
0
                ails->ptr != MAP_FAILED, "could not mmap: %s", strerror(errno));
790
0
    }
791
792
0
    FAISS_THROW_IF_NOT(o <= ails->totsize);
793
794
0
    for (size_t i = 0; i < ails->nlist; i++) {
795
0
        OnDiskInvertedLists::List& l = ails->lists[i];
796
0
        l.size = l.capacity = sizes[i];
797
0
        l.offset = o;
798
0
        o += l.size * (sizeof(idx_t) + ails->code_size);
799
0
    }
800
    // resume normal reading of file
801
0
    fseek(fdesc, o, SEEK_SET);
802
803
0
    return ails;
804
0
}
805
806
} // namespace faiss