Coverage Report

Created: 2025-12-21 13:42

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/contrib/faiss/faiss/utils/utils.cpp
Line
Count
Source
1
/*
2
 * Copyright (c) Meta Platforms, Inc. and affiliates.
3
 *
4
 * This source code is licensed under the MIT license found in the
5
 * LICENSE file in the root directory of this source tree.
6
 */
7
8
// -*- c++ -*-
9
10
#include <faiss/Index.h>
11
#include <faiss/utils/utils.h>
12
13
#include <cassert>
14
#include <cmath>
15
#include <cstdio>
16
#include <cstring>
17
18
#include <sys/types.h>
19
20
#ifdef _MSC_VER
21
#define NOMINMAX
22
#include <windows.h>
23
#undef NOMINMAX
24
#else
25
#include <sys/time.h>
26
#include <unistd.h>
27
#endif // !_MSC_VER
28
29
#include <omp.h>
30
31
#include <algorithm>
32
#include <set>
33
#include <type_traits>
34
#include <vector>
35
36
#include <faiss/impl/AuxIndexStructures.h>
37
#include <faiss/impl/FaissAssert.h>
38
#include <faiss/impl/platform_macros.h>
39
#include <faiss/utils/random.h>
40
41
#ifndef FINTEGER
42
#define FINTEGER long
43
#endif
44
45
extern "C" {
46
47
/* declare BLAS functions, see http://www.netlib.org/clapack/cblas/ */
48
49
int sgemm_(
50
        const char* transa,
51
        const char* transb,
52
        FINTEGER* m,
53
        FINTEGER* n,
54
        FINTEGER* k,
55
        const float* alpha,
56
        const float* a,
57
        FINTEGER* lda,
58
        const float* b,
59
        FINTEGER* ldb,
60
        float* beta,
61
        float* c,
62
        FINTEGER* ldc);
63
64
/* Lapack functions, see http://www.netlib.org/clapack/old/single/sgeqrf.c */
65
66
int sgeqrf_(
67
        FINTEGER* m,
68
        FINTEGER* n,
69
        float* a,
70
        FINTEGER* lda,
71
        float* tau,
72
        float* work,
73
        FINTEGER* lwork,
74
        FINTEGER* info);
75
76
int sorgqr_(
77
        FINTEGER* m,
78
        FINTEGER* n,
79
        FINTEGER* k,
80
        float* a,
81
        FINTEGER* lda,
82
        float* tau,
83
        float* work,
84
        FINTEGER* lwork,
85
        FINTEGER* info);
86
87
int sgemv_(
88
        const char* trans,
89
        FINTEGER* m,
90
        FINTEGER* n,
91
        float* alpha,
92
        const float* a,
93
        FINTEGER* lda,
94
        const float* x,
95
        FINTEGER* incx,
96
        float* beta,
97
        float* y,
98
        FINTEGER* incy);
99
}
100
101
/**************************************************
102
 * Get some stats about the system
103
 **************************************************/
104
105
namespace faiss {
106
107
// this will be set at load time from GPU Faiss
108
std::string gpu_compile_options;
109
110
0
std::string get_compile_options() {
111
0
    std::string options;
112
113
    // this flag is set by GCC and Clang
114
#ifdef __OPTIMIZE__
115
    options += "OPTIMIZE ";
116
#endif
117
118
#ifdef __AVX512F__
119
    options += "AVX512 ";
120
#elif defined(__AVX2__)
121
    options += "AVX2 ";
122
#elif defined(__ARM_FEATURE_SVE)
123
    options += "SVE NEON ";
124
#elif defined(__aarch64__)
125
    options += "NEON ";
126
#else
127
    options += "GENERIC ";
128
#endif
129
130
0
    options += gpu_compile_options;
131
132
0
    return options;
133
0
}
134
135
0
std::string get_version() {
136
0
    return VERSION_STRING;
137
0
}
138
139
#ifdef _MSC_VER
140
double getmillisecs() {
141
    LARGE_INTEGER ts;
142
    LARGE_INTEGER freq;
143
    QueryPerformanceFrequency(&freq);
144
    QueryPerformanceCounter(&ts);
145
146
    return (ts.QuadPart * 1e3) / freq.QuadPart;
147
}
148
#else  // _MSC_VER
149
16.9k
double getmillisecs() {
150
16.9k
    struct timeval tv;
151
16.9k
    gettimeofday(&tv, nullptr);
152
16.9k
    return tv.tv_sec * 1e3 + tv.tv_usec * 1e-3;
153
16.9k
}
154
#endif // _MSC_VER
155
156
0
uint64_t get_cycles() {
157
0
#ifdef __x86_64__
158
0
    uint32_t high, low;
159
0
    asm volatile("rdtsc \n\t" : "=a"(low), "=d"(high));
160
0
    return ((uint64_t)high << 32) | (low);
161
#else
162
    return 0;
163
#endif
164
0
}
165
166
#ifdef __linux__
167
168
0
size_t get_mem_usage_kb() {
169
0
    int pid = getpid();
170
0
    char fname[256];
171
0
    snprintf(fname, 256, "/proc/%d/status", pid);
172
0
    FILE* f = fopen(fname, "r");
173
0
    FAISS_THROW_IF_NOT_MSG(f, "cannot open proc status file");
174
0
    size_t sz = 0;
175
0
    for (;;) {
176
0
        char buf[256];
177
0
        if (!fgets(buf, 256, f))
178
0
            break;
179
0
        if (sscanf(buf, "VmRSS: %ld kB", &sz) == 1)
180
0
            break;
181
0
    }
182
0
    fclose(f);
183
0
    return sz;
184
0
}
185
186
#else
187
188
size_t get_mem_usage_kb() {
189
    fprintf(stderr,
190
            "WARN: get_mem_usage_kb not implemented on current architecture\n");
191
    return 0;
192
}
193
194
#endif
195
196
void reflection(
197
        const float* __restrict u,
198
        float* __restrict x,
199
        size_t n,
200
        size_t d,
201
0
        size_t nu) {
202
0
    size_t i, j, l;
203
0
    for (i = 0; i < n; i++) {
204
0
        const float* up = u;
205
0
        for (l = 0; l < nu; l++) {
206
0
            float ip1 = 0, ip2 = 0;
207
208
0
            for (j = 0; j < d; j += 2) {
209
0
                ip1 += up[j] * x[j];
210
0
                ip2 += up[j + 1] * x[j + 1];
211
0
            }
212
0
            float ip = 2 * (ip1 + ip2);
213
214
0
            for (j = 0; j < d; j++)
215
0
                x[j] -= ip * up[j];
216
0
            up += d;
217
0
        }
218
0
        x += d;
219
0
    }
220
0
}
221
222
/* Reference implementation (slower) */
223
0
void reflection_ref(const float* u, float* x, size_t n, size_t d, size_t nu) {
224
0
    size_t i, j, l;
225
0
    for (i = 0; i < n; i++) {
226
0
        const float* up = u;
227
0
        for (l = 0; l < nu; l++) {
228
0
            double ip = 0;
229
230
0
            for (j = 0; j < d; j++)
231
0
                ip += up[j] * x[j];
232
0
            ip *= 2;
233
234
0
            for (j = 0; j < d; j++)
235
0
                x[j] -= ip * up[j];
236
237
0
            up += d;
238
0
        }
239
0
        x += d;
240
0
    }
241
0
}
242
243
/***************************************************************************
244
 * Some matrix manipulation functions
245
 ***************************************************************************/
246
247
0
void matrix_qr(int m, int n, float* a) {
248
0
    FAISS_THROW_IF_NOT(m >= n);
249
0
    FINTEGER mi = m, ni = n, ki = mi < ni ? mi : ni;
250
0
    std::vector<float> tau(ki);
251
0
    FINTEGER lwork = -1, info;
252
0
    float work_size;
253
254
0
    sgeqrf_(&mi, &ni, a, &mi, tau.data(), &work_size, &lwork, &info);
255
0
    lwork = size_t(work_size);
256
0
    std::vector<float> work(lwork);
257
258
0
    sgeqrf_(&mi, &ni, a, &mi, tau.data(), work.data(), &lwork, &info);
259
260
0
    sorgqr_(&mi, &ni, &ki, a, &mi, tau.data(), work.data(), &lwork, &info);
261
0
}
262
263
/***************************************************************************
264
 * Result list routines
265
 ***************************************************************************/
266
267
0
void ranklist_handle_ties(int k, int64_t* idx, const float* dis) {
268
0
    float prev_dis = -1e38;
269
0
    int prev_i = -1;
270
0
    for (int i = 0; i < k; i++) {
271
0
        if (dis[i] != prev_dis) {
272
0
            if (i > prev_i + 1) {
273
                // sort between prev_i and i - 1
274
0
                std::sort(idx + prev_i, idx + i);
275
0
            }
276
0
            prev_i = i;
277
0
            prev_dis = dis[i];
278
0
        }
279
0
    }
280
0
}
281
282
size_t merge_result_table_with(
283
        size_t n,
284
        size_t k,
285
        int64_t* I0,
286
        float* D0,
287
        const int64_t* I1,
288
        const float* D1,
289
        bool keep_min,
290
0
        int64_t translation) {
291
0
    size_t n1 = 0;
292
293
0
#pragma omp parallel reduction(+ : n1)
294
0
    {
295
0
        std::vector<int64_t> tmpI(k);
296
0
        std::vector<float> tmpD(k);
297
298
0
#pragma omp for
299
0
        for (int64_t i = 0; i < n; i++) {
300
0
            int64_t* lI0 = I0 + i * k;
301
0
            float* lD0 = D0 + i * k;
302
0
            const int64_t* lI1 = I1 + i * k;
303
0
            const float* lD1 = D1 + i * k;
304
0
            size_t r0 = 0;
305
0
            size_t r1 = 0;
306
307
0
            if (keep_min) {
308
0
                for (size_t j = 0; j < k; j++) {
309
0
                    if (lI0[r0] >= 0 && lD0[r0] < lD1[r1]) {
310
0
                        tmpD[j] = lD0[r0];
311
0
                        tmpI[j] = lI0[r0];
312
0
                        r0++;
313
0
                    } else if (lD1[r1] >= 0) {
314
0
                        tmpD[j] = lD1[r1];
315
0
                        tmpI[j] = lI1[r1] + translation;
316
0
                        r1++;
317
0
                    } else { // both are NaNs
318
0
                        tmpD[j] = NAN;
319
0
                        tmpI[j] = -1;
320
0
                    }
321
0
                }
322
0
            } else {
323
0
                for (size_t j = 0; j < k; j++) {
324
0
                    if (lI0[r0] >= 0 && lD0[r0] > lD1[r1]) {
325
0
                        tmpD[j] = lD0[r0];
326
0
                        tmpI[j] = lI0[r0];
327
0
                        r0++;
328
0
                    } else if (lD1[r1] >= 0) {
329
0
                        tmpD[j] = lD1[r1];
330
0
                        tmpI[j] = lI1[r1] + translation;
331
0
                        r1++;
332
0
                    } else { // both are NaNs
333
0
                        tmpD[j] = NAN;
334
0
                        tmpI[j] = -1;
335
0
                    }
336
0
                }
337
0
            }
338
0
            n1 += r1;
339
0
            memcpy(lD0, tmpD.data(), sizeof(lD0[0]) * k);
340
0
            memcpy(lI0, tmpI.data(), sizeof(lI0[0]) * k);
341
0
        }
342
0
    }
343
344
0
    return n1;
345
0
}
346
347
size_t ranklist_intersection_size(
348
        size_t k1,
349
        const int64_t* v1,
350
        size_t k2,
351
0
        const int64_t* v2_in) {
352
0
    if (k2 > k1)
353
0
        return ranklist_intersection_size(k2, v2_in, k1, v1);
354
0
    int64_t* v2 = new int64_t[k2];
355
0
    memcpy(v2, v2_in, sizeof(int64_t) * k2);
356
0
    std::sort(v2, v2 + k2);
357
0
    { // de-dup v2
358
0
        int64_t prev = -1;
359
0
        size_t wp = 0;
360
0
        for (size_t i = 0; i < k2; i++) {
361
0
            if (v2[i] != prev) {
362
0
                v2[wp++] = prev = v2[i];
363
0
            }
364
0
        }
365
0
        k2 = wp;
366
0
    }
367
0
    const int64_t seen_flag = int64_t{1} << 60;
368
0
    size_t count = 0;
369
0
    for (size_t i = 0; i < k1; i++) {
370
0
        int64_t q = v1[i];
371
0
        size_t i0 = 0, i1 = k2;
372
0
        while (i0 + 1 < i1) {
373
0
            size_t imed = (i1 + i0) / 2;
374
0
            int64_t piv = v2[imed] & ~seen_flag;
375
0
            if (piv <= q)
376
0
                i0 = imed;
377
0
            else
378
0
                i1 = imed;
379
0
        }
380
0
        if (v2[i0] == q) {
381
0
            count++;
382
0
            v2[i0] |= seen_flag;
383
0
        }
384
0
    }
385
0
    delete[] v2;
386
387
0
    return count;
388
0
}
389
390
270
double imbalance_factor(int k, const int64_t* hist) {
391
270
    double tot = 0, uf = 0;
392
393
1.35k
    for (int i = 0; i < k; i++) {
394
1.08k
        tot += hist[i];
395
1.08k
        uf += hist[i] * (double)hist[i];
396
1.08k
    }
397
270
    uf = uf * k / (tot * tot);
398
399
270
    return uf;
400
270
}
401
402
270
double imbalance_factor(int64_t n, int k, const int64_t* assign) {
403
270
    std::vector<int64_t> hist(k, 0);
404
39.3k
    for (int64_t i = 0; i < n; i++) {
405
39.0k
        hist[assign[i]]++;
406
39.0k
    }
407
408
270
    return imbalance_factor(k, hist.data());
409
270
}
410
411
0
int ivec_hist(size_t n, const int* v, int vmax, int* hist) {
412
0
    memset(hist, 0, sizeof(hist[0]) * vmax);
413
0
    int nout = 0;
414
0
    while (n--) {
415
0
        if (v[n] < 0 || v[n] >= vmax)
416
0
            nout++;
417
0
        else
418
0
            hist[v[n]]++;
419
0
    }
420
0
    return nout;
421
0
}
422
423
0
void bincode_hist(size_t n, size_t nbits, const uint8_t* codes, int* hist) {
424
0
    FAISS_THROW_IF_NOT(nbits % 8 == 0);
425
0
    size_t d = nbits / 8;
426
0
    std::vector<int> accu(d * 256);
427
0
    const uint8_t* c = codes;
428
0
    for (size_t i = 0; i < n; i++)
429
0
        for (int j = 0; j < d; j++)
430
0
            accu[j * 256 + *c++]++;
431
0
    memset(hist, 0, sizeof(*hist) * nbits);
432
0
    for (int i = 0; i < d; i++) {
433
0
        const int* ai = accu.data() + i * 256;
434
0
        int* hi = hist + i * 8;
435
0
        for (int j = 0; j < 256; j++)
436
0
            for (int k = 0; k < 8; k++)
437
0
                if ((j >> k) & 1)
438
0
                    hi[k] += ai[j];
439
0
    }
440
0
}
441
442
0
uint64_t ivec_checksum(size_t n, const int32_t* assigned) {
443
0
    const uint32_t* a = reinterpret_cast<const uint32_t*>(assigned);
444
0
    uint64_t cs = 112909;
445
0
    while (n--) {
446
0
        cs = cs * 65713 + a[n] * 1686049;
447
0
    }
448
0
    return cs;
449
0
}
450
451
0
uint64_t bvec_checksum(size_t n, const uint8_t* a) {
452
0
    uint64_t cs = ivec_checksum(n / 4, (const int32_t*)a);
453
0
    for (size_t i = n / 4 * 4; i < n; i++) {
454
0
        cs = cs * 65713 + a[n] * 1686049;
455
0
    }
456
0
    return cs;
457
0
}
458
459
0
void bvecs_checksum(size_t n, size_t d, const uint8_t* a, uint64_t* cs) {
460
    // MSVC can't accept unsigned index for #pragma omp parallel for
461
    // so below codes only accept n <= std::numeric_limits<ssize_t>::max()
462
0
    using ssize_t = std::make_signed<std::size_t>::type;
463
0
    const ssize_t size = n;
464
0
#pragma omp parallel for if (size > 1000)
465
0
    for (ssize_t i_ = 0; i_ < size; i_++) {
466
0
        const auto i = static_cast<std::size_t>(i_);
467
0
        cs[i] = bvec_checksum(d, a + i * d);
468
0
    }
469
0
}
470
471
const float* fvecs_maybe_subsample(
472
        size_t d,
473
        size_t* n,
474
        size_t nmax,
475
        const float* x,
476
        bool verbose,
477
27
        int64_t seed) {
478
27
    if (*n <= nmax)
479
27
        return x; // nothing to do
480
481
0
    size_t n2 = nmax;
482
0
    if (verbose) {
483
0
        printf("  Input training set too big (max size is %zd), sampling "
484
0
               "%zd / %zd vectors\n",
485
0
               nmax,
486
0
               n2,
487
0
               *n);
488
0
    }
489
0
    std::vector<int> subset(*n);
490
0
    rand_perm(subset.data(), *n, seed);
491
0
    float* x_subset = new float[n2 * d];
492
0
    for (int64_t i = 0; i < n2; i++)
493
0
        memcpy(&x_subset[i * d], &x[subset[i] * size_t(d)], sizeof(x[0]) * d);
494
0
    *n = n2;
495
0
    return x_subset;
496
27
}
497
498
0
void binary_to_real(size_t d, const uint8_t* x_in, float* x_out) {
499
0
    for (size_t i = 0; i < d; ++i) {
500
0
        x_out[i] = 2 * ((x_in[i >> 3] >> (i & 7)) & 1) - 1;
501
0
    }
502
0
}
503
504
0
void real_to_binary(size_t d, const float* x_in, uint8_t* x_out) {
505
0
    for (size_t i = 0; i < d / 8; ++i) {
506
0
        uint8_t b = 0;
507
0
        for (int j = 0; j < 8; ++j) {
508
0
            if (x_in[8 * i + j] > 0) {
509
0
                b |= (1 << j);
510
0
            }
511
0
        }
512
0
        x_out[i] = b;
513
0
    }
514
0
}
515
516
// from Python's stringobject.c
517
0
uint64_t hash_bytes(const uint8_t* bytes, int64_t n) {
518
0
    const uint8_t* p = bytes;
519
0
    uint64_t x = (uint64_t)(*p) << 7;
520
0
    int64_t len = n;
521
0
    while (--len >= 0) {
522
0
        x = (1000003 * x) ^ *p++;
523
0
    }
524
0
    x ^= n;
525
0
    return x;
526
0
}
527
528
0
bool check_openmp() {
529
0
    omp_set_num_threads(10);
530
531
0
    if (omp_get_max_threads() != 10) {
532
0
        return false;
533
0
    }
534
535
0
    std::vector<int> nt_per_thread(10);
536
0
    size_t sum = 0;
537
0
    bool in_parallel = true;
538
0
#pragma omp parallel reduction(+ : sum)
539
0
    {
540
0
        if (!omp_in_parallel()) {
541
0
            in_parallel = false;
542
0
        }
543
544
0
        int nt = omp_get_num_threads();
545
0
        int rank = omp_get_thread_num();
546
547
0
        nt_per_thread[rank] = nt;
548
0
#pragma omp for
549
0
        for (int i = 0; i < 1000 * 1000 * 10; i++) {
550
0
            sum += i;
551
0
        }
552
0
    }
553
554
0
    if (!in_parallel) {
555
0
        return false;
556
0
    }
557
0
    if (nt_per_thread[0] != 10) {
558
0
        return false;
559
0
    }
560
0
    if (sum == 0) {
561
0
        return false;
562
0
    }
563
564
0
    return true;
565
0
}
566
567
namespace {
568
569
template <typename T>
570
0
int64_t count_lt(int64_t n, const T* row, T threshold) {
571
0
    for (int64_t i = 0; i < n; i++) {
572
0
        if (!(row[i] < threshold)) {
573
0
            return i;
574
0
        }
575
0
    }
576
0
    return n;
577
0
}
Unexecuted instantiation: utils.cpp:_ZN5faiss12_GLOBAL__N_18count_ltIfEEllPKT_S2_
Unexecuted instantiation: utils.cpp:_ZN5faiss12_GLOBAL__N_18count_ltIsEEllPKT_S2_
578
579
template <typename T>
580
0
int64_t count_gt(int64_t n, const T* row, T threshold) {
581
0
    for (int64_t i = 0; i < n; i++) {
582
0
        if (!(row[i] > threshold)) {
583
0
            return i;
584
0
        }
585
0
    }
586
0
    return n;
587
0
}
Unexecuted instantiation: utils.cpp:_ZN5faiss12_GLOBAL__N_18count_gtIfEEllPKT_S2_
Unexecuted instantiation: utils.cpp:_ZN5faiss12_GLOBAL__N_18count_gtIsEEllPKT_S2_
588
589
} // namespace
590
591
template <typename T>
592
0
void CombinerRangeKNN<T>::compute_sizes(int64_t* L_res_init) {
593
0
    this->L_res = L_res_init;
594
0
    L_res_init[0] = 0;
595
0
    int64_t j = 0;
596
0
    for (int64_t i = 0; i < nq; i++) {
597
0
        int64_t n_in;
598
0
        if (!mask || !mask[i]) {
599
0
            const T* row = D + i * k;
600
0
            n_in = keep_max ? count_gt(k, row, r2) : count_lt(k, row, r2);
601
0
        } else {
602
0
            n_in = lim_remain[j + 1] - lim_remain[j];
603
0
            j++;
604
0
        }
605
0
        L_res_init[i + 1] = n_in; // L_res_init[i] + n_in;
606
0
    }
607
    // cumsum
608
0
    for (int64_t i = 0; i < nq; i++) {
609
0
        L_res_init[i + 1] += L_res_init[i];
610
0
    }
611
0
}
Unexecuted instantiation: _ZN5faiss16CombinerRangeKNNIfE13compute_sizesEPl
Unexecuted instantiation: _ZN5faiss16CombinerRangeKNNIsE13compute_sizesEPl
612
613
template <typename T>
614
0
void CombinerRangeKNN<T>::write_result(T* D_res, int64_t* I_res) {
615
0
    FAISS_THROW_IF_NOT(L_res);
616
0
    int64_t j = 0;
617
0
    for (int64_t i = 0; i < nq; i++) {
618
0
        int64_t n_in = L_res[i + 1] - L_res[i];
619
0
        T* D_row = D_res + L_res[i];
620
0
        int64_t* I_row = I_res + L_res[i];
621
0
        if (!mask || !mask[i]) {
622
0
            memcpy(D_row, D + i * k, n_in * sizeof(*D_row));
623
0
            memcpy(I_row, I + i * k, n_in * sizeof(*I_row));
624
0
        } else {
625
0
            memcpy(D_row, D_remain + lim_remain[j], n_in * sizeof(*D_row));
626
0
            memcpy(I_row, I_remain + lim_remain[j], n_in * sizeof(*I_row));
627
0
            j++;
628
0
        }
629
0
    }
630
0
}
Unexecuted instantiation: _ZN5faiss16CombinerRangeKNNIfE12write_resultEPfPl
Unexecuted instantiation: _ZN5faiss16CombinerRangeKNNIsE12write_resultEPsPl
631
632
// explicit template instantiations
633
template struct CombinerRangeKNN<float>;
634
template struct CombinerRangeKNN<int16_t>;
635
636
0
void CodeSet::insert(size_t n, const uint8_t* codes, bool* inserted) {
637
0
    for (size_t i = 0; i < n; i++) {
638
0
        auto res = s.insert(
639
0
                std::vector<uint8_t>(codes + i * d, codes + i * d + d));
640
0
        inserted[i] = res.second;
641
0
    }
642
0
}
643
644
} // namespace faiss