/root/doris/contrib/faiss/faiss/utils/utils.cpp

Source
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

// -*- c++ -*-

#include <faiss/Index.h>
#include <faiss/utils/utils.h>

#include <cassert>
#include <cmath>
#include <cstdio>
#include <cstring>

#include <sys/types.h>

#ifdef _MSC_VER
#define NOMINMAX
#include <windows.h>
#undef NOMINMAX
#else
#include <sys/time.h>
#include <unistd.h>
#endif // !_MSC_VER

#include <omp.h>

#include <algorithm>
#include <set>
#include <type_traits>
#include <vector>

#include <faiss/impl/AuxIndexStructures.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/impl/platform_macros.h>
#include <faiss/utils/random.h>

#ifndef FINTEGER
#define FINTEGER long
#endif

extern "C" {

/* declare BLAS functions, see http://www.netlib.org/clapack/cblas/ */

int sgemm_(
        const char* transa,
        const char* transb,
        FINTEGER* m,
        FINTEGER* n,
        FINTEGER* k,
        const float* alpha,
        const float* a,
        FINTEGER* lda,
        const float* b,
        FINTEGER* ldb,
        float* beta,
        float* c,
        FINTEGER* ldc);

/* Lapack functions, see http://www.netlib.org/clapack/old/single/sgeqrf.c */

int sgeqrf_(
        FINTEGER* m,
        FINTEGER* n,
        float* a,
        FINTEGER* lda,
        float* tau,
        float* work,
        FINTEGER* lwork,
        FINTEGER* info);

int sorgqr_(
        FINTEGER* m,
        FINTEGER* n,
        FINTEGER* k,
        float* a,
        FINTEGER* lda,
        float* tau,
        float* work,
        FINTEGER* lwork,
        FINTEGER* info);

int sgemv_(
        const char* trans,
        FINTEGER* m,
        FINTEGER* n,
        float* alpha,
        const float* a,
        FINTEGER* lda,
        const float* x,
        FINTEGER* incx,
        float* beta,
        float* y,
        FINTEGER* incy);
}

/**************************************************
 * Get some stats about the system
 **************************************************/

namespace faiss {

// this will be set at load time from GPU Faiss
std::string gpu_compile_options;

std::string get_compile_options() {
    std::string options;

    // this flag is set by GCC and Clang
#ifdef __OPTIMIZE__
    options += "OPTIMIZE ";
#endif

#ifdef __AVX512F__
    options += "AVX512 ";
#elif defined(__AVX2__)
    options += "AVX2 ";
#elif defined(__ARM_FEATURE_SVE)
    options += "SVE NEON ";
#elif defined(__aarch64__)
    options += "NEON ";
#else
    options += "GENERIC ";
#endif

    options += gpu_compile_options;

    return options;
}

std::string get_version() {
    return VERSION_STRING;
}

#ifdef _MSC_VER
double getmillisecs() {
    LARGE_INTEGER ts;
    LARGE_INTEGER freq;
    QueryPerformanceFrequency(&freq);
    QueryPerformanceCounter(&ts);

    return (ts.QuadPart * 1e3) / freq.QuadPart;
}
#else  // _MSC_VER
double getmillisecs() {
    struct timeval tv;
    gettimeofday(&tv, nullptr);
    return tv.tv_sec * 1e3 + tv.tv_usec * 1e-3;
}
#endif // _MSC_VER

uint64_t get_cycles() {
#ifdef __x86_64__
    uint32_t high, low;
    asm volatile("rdtsc \n\t" : "=a"(low), "=d"(high));
    return ((uint64_t)high << 32) | (low);
#else
    return 0;
#endif
}

#ifdef __linux__

size_t get_mem_usage_kb() {
    int pid = getpid();
    char fname[256];
    snprintf(fname, 256, "/proc/%d/status", pid);
    FILE* f = fopen(fname, "r");
    FAISS_THROW_IF_NOT_MSG(f, "cannot open proc status file");
    size_t sz = 0;
    for (;;) {
        char buf[256];
        if (!fgets(buf, 256, f))
            break;
        if (sscanf(buf, "VmRSS: %ld kB", &sz) == 1)
            break;
    }
    fclose(f);
    return sz;
}

#else

size_t get_mem_usage_kb() {
    fprintf(stderr,
            "WARN: get_mem_usage_kb not implemented on current architecture\n");
    return 0;
}

#endif

void reflection(
        const float* __restrict u,
        float* __restrict x,
        size_t n,
        size_t d,
        size_t nu) {
    size_t i, j, l;
    for (i = 0; i < n; i++) {
        const float* up = u;
        for (l = 0; l < nu; l++) {
            float ip1 = 0, ip2 = 0;

            for (j = 0; j < d; j += 2) {
                ip1 += up[j] * x[j];
                ip2 += up[j + 1] * x[j + 1];
            }
            float ip = 2 * (ip1 + ip2);

            for (j = 0; j < d; j++)
                x[j] -= ip * up[j];
            up += d;
        }
        x += d;
    }
}

/* Reference implementation (slower) */
void reflection_ref(const float* u, float* x, size_t n, size_t d, size_t nu) {
    size_t i, j, l;
    for (i = 0; i < n; i++) {
        const float* up = u;
        for (l = 0; l < nu; l++) {
            double ip = 0;

            for (j = 0; j < d; j++)
                ip += up[j] * x[j];
            ip *= 2;

            for (j = 0; j < d; j++)
                x[j] -= ip * up[j];

            up += d;
        }
        x += d;
    }
}

/***************************************************************************
 * Some matrix manipulation functions
 ***************************************************************************/

void matrix_qr(int m, int n, float* a) {
    FAISS_THROW_IF_NOT(m >= n);
    FINTEGER mi = m, ni = n, ki = mi < ni ? mi : ni;
    std::vector<float> tau(ki);
    FINTEGER lwork = -1, info;
    float work_size;

    sgeqrf_(&mi, &ni, a, &mi, tau.data(), &work_size, &lwork, &info);
    lwork = size_t(work_size);
    std::vector<float> work(lwork);

    sgeqrf_(&mi, &ni, a, &mi, tau.data(), work.data(), &lwork, &info);

    sorgqr_(&mi, &ni, &ki, a, &mi, tau.data(), work.data(), &lwork, &info);
}

/***************************************************************************
 * Result list routines
 ***************************************************************************/

void ranklist_handle_ties(int k, int64_t* idx, const float* dis) {
    float prev_dis = -1e38;
    int prev_i = -1;
    for (int i = 0; i < k; i++) {
        if (dis[i] != prev_dis) {
            if (i > prev_i + 1) {
                // sort between prev_i and i - 1
                std::sort(idx + prev_i, idx + i);
            }
            prev_i = i;
            prev_dis = dis[i];
        }
    }
}

size_t merge_result_table_with(
        size_t n,
        size_t k,
        int64_t* I0,
        float* D0,
        const int64_t* I1,
        const float* D1,
        bool keep_min,
        int64_t translation) {
    size_t n1 = 0;

#pragma omp parallel reduction(+ : n1)
    {
        std::vector<int64_t> tmpI(k);
        std::vector<float> tmpD(k);

#pragma omp for
        for (int64_t i = 0; i < n; i++) {
            int64_t* lI0 = I0 + i * k;
            float* lD0 = D0 + i * k;
            const int64_t* lI1 = I1 + i * k;
            const float* lD1 = D1 + i * k;
            size_t r0 = 0;
            size_t r1 = 0;

            if (keep_min) {
                for (size_t j = 0; j < k; j++) {
                    if (lI0[r0] >= 0 && lD0[r0] < lD1[r1]) {
                        tmpD[j] = lD0[r0];
                        tmpI[j] = lI0[r0];
                        r0++;
                    } else if (lD1[r1] >= 0) {
                        tmpD[j] = lD1[r1];
                        tmpI[j] = lI1[r1] + translation;
                        r1++;
                    } else { // both are NaNs
                        tmpD[j] = NAN;
                        tmpI[j] = -1;
                    }
                }
            } else {
                for (size_t j = 0; j < k; j++) {
                    if (lI0[r0] >= 0 && lD0[r0] > lD1[r1]) {
                        tmpD[j] = lD0[r0];
                        tmpI[j] = lI0[r0];
                        r0++;
                    } else if (lD1[r1] >= 0) {
                        tmpD[j] = lD1[r1];
                        tmpI[j] = lI1[r1] + translation;
                        r1++;
                    } else { // both are NaNs
                        tmpD[j] = NAN;
                        tmpI[j] = -1;
                    }
                }
            }
            n1 += r1;
            memcpy(lD0, tmpD.data(), sizeof(lD0[0]) * k);
            memcpy(lI0, tmpI.data(), sizeof(lI0[0]) * k);
        }
    }

    return n1;
}

size_t ranklist_intersection_size(
        size_t k1,
        const int64_t* v1,
        size_t k2,
        const int64_t* v2_in) {
    if (k2 > k1)
        return ranklist_intersection_size(k2, v2_in, k1, v1);
    int64_t* v2 = new int64_t[k2];
    memcpy(v2, v2_in, sizeof(int64_t) * k2);
    std::sort(v2, v2 + k2);
    { // de-dup v2
        int64_t prev = -1;
        size_t wp = 0;
        for (size_t i = 0; i < k2; i++) {
            if (v2[i] != prev) {
                v2[wp++] = prev = v2[i];
            }
        }
        k2 = wp;
    }
    const int64_t seen_flag = int64_t{1} << 60;
    size_t count = 0;
    for (size_t i = 0; i < k1; i++) {
        int64_t q = v1[i];
        size_t i0 = 0, i1 = k2;
        while (i0 + 1 < i1) {
            size_t imed = (i1 + i0) / 2;
            int64_t piv = v2[imed] & ~seen_flag;
            if (piv <= q)
                i0 = imed;
            else
                i1 = imed;
        }
        if (v2[i0] == q) {
            count++;
            v2[i0] |= seen_flag;
        }
    }
    delete[] v2;

    return count;
}

double imbalance_factor(int k, const int64_t* hist) {
    double tot = 0, uf = 0;

    for (int i = 0; i < k; i++) {
        tot += hist[i];
        uf += hist[i] * (double)hist[i];
    }
    uf = uf * k / (tot * tot);

    return uf;
}

double imbalance_factor(int64_t n, int k, const int64_t* assign) {
    std::vector<int64_t> hist(k, 0);
    for (int64_t i = 0; i < n; i++) {
        hist[assign[i]]++;
    }

    return imbalance_factor(k, hist.data());
}

int ivec_hist(size_t n, const int* v, int vmax, int* hist) {
    memset(hist, 0, sizeof(hist[0]) * vmax);
    int nout = 0;
    while (n--) {
        if (v[n] < 0 || v[n] >= vmax)
            nout++;
        else
            hist[v[n]]++;
    }
    return nout;
}

void bincode_hist(size_t n, size_t nbits, const uint8_t* codes, int* hist) {
    FAISS_THROW_IF_NOT(nbits % 8 == 0);
    size_t d = nbits / 8;
    std::vector<int> accu(d * 256);
    const uint8_t* c = codes;
    for (size_t i = 0; i < n; i++)
        for (int j = 0; j < d; j++)
            accu[j * 256 + *c++]++;
    memset(hist, 0, sizeof(*hist) * nbits);
    for (int i = 0; i < d; i++) {
        const int* ai = accu.data() + i * 256;
        int* hi = hist + i * 8;
        for (int j = 0; j < 256; j++)
            for (int k = 0; k < 8; k++)
                if ((j >> k) & 1)
                    hi[k] += ai[j];
    }
}

uint64_t ivec_checksum(size_t n, const int32_t* assigned) {
    const uint32_t* a = reinterpret_cast<const uint32_t*>(assigned);
    uint64_t cs = 112909;
    while (n--) {
        cs = cs * 65713 + a[n] * 1686049;
    }
    return cs;
}

uint64_t bvec_checksum(size_t n, const uint8_t* a) {
    uint64_t cs = ivec_checksum(n / 4, (const int32_t*)a);
    for (size_t i = n / 4 * 4; i < n; i++) {
        cs = cs * 65713 + a[n] * 1686049;
    }
    return cs;
}

void bvecs_checksum(size_t n, size_t d, const uint8_t* a, uint64_t* cs) {
    // MSVC can't accept unsigned index for #pragma omp parallel for
    // so below codes only accept n <= std::numeric_limits<ssize_t>::max()
    using ssize_t = std::make_signed<std::size_t>::type;
    const ssize_t size = n;
#pragma omp parallel for if (size > 1000)
    for (ssize_t i_ = 0; i_ < size; i_++) {
        const auto i = static_cast<std::size_t>(i_);
        cs[i] = bvec_checksum(d, a + i * d);
    }
}

const float* fvecs_maybe_subsample(
        size_t d,
        size_t* n,
        size_t nmax,
        const float* x,
        bool verbose,
        int64_t seed) {
    if (*n <= nmax)
        return x; // nothing to do

    size_t n2 = nmax;
    if (verbose) {
        printf("  Input training set too big (max size is %zd), sampling "
               "%zd / %zd vectors\n",
               nmax,
               n2,
               *n);
    }
    std::vector<int> subset(*n);
    rand_perm(subset.data(), *n, seed);
    float* x_subset = new float[n2 * d];
    for (int64_t i = 0; i < n2; i++)
        memcpy(&x_subset[i * d], &x[subset[i] * size_t(d)], sizeof(x[0]) * d);
    *n = n2;
    return x_subset;
}

void binary_to_real(size_t d, const uint8_t* x_in, float* x_out) {
    for (size_t i = 0; i < d; ++i) {
        x_out[i] = 2 * ((x_in[i >> 3] >> (i & 7)) & 1) - 1;
    }
}

void real_to_binary(size_t d, const float* x_in, uint8_t* x_out) {
    for (size_t i = 0; i < d / 8; ++i) {
        uint8_t b = 0;
        for (int j = 0; j < 8; ++j) {
            if (x_in[8 * i + j] > 0) {
                b |= (1 << j);
            }
        }
        x_out[i] = b;
    }
}

// from Python's stringobject.c
uint64_t hash_bytes(const uint8_t* bytes, int64_t n) {
    const uint8_t* p = bytes;
    uint64_t x = (uint64_t)(*p) << 7;
    int64_t len = n;
    while (--len >= 0) {
        x = (1000003 * x) ^ *p++;
    }
    x ^= n;
    return x;
}

bool check_openmp() {
    omp_set_num_threads(10);

    if (omp_get_max_threads() != 10) {
        return false;
    }

    std::vector<int> nt_per_thread(10);
    size_t sum = 0;
    bool in_parallel = true;
#pragma omp parallel reduction(+ : sum)
    {
        if (!omp_in_parallel()) {
            in_parallel = false;
        }

        int nt = omp_get_num_threads();
        int rank = omp_get_thread_num();

        nt_per_thread[rank] = nt;
#pragma omp for
        for (int i = 0; i < 1000 * 1000 * 10; i++) {
            sum += i;
        }
    }

    if (!in_parallel) {
        return false;
    }
    if (nt_per_thread[0] != 10) {
        return false;
    }
    if (sum == 0) {
        return false;
    }

    return true;
}

namespace {

template <typename T>
int64_t count_lt(int64_t n, const T* row, T threshold) {
    for (int64_t i = 0; i < n; i++) {
        if (!(row[i] < threshold)) {
            return i;
        }
    }
    return n;
}

template <typename T>
int64_t count_gt(int64_t n, const T* row, T threshold) {
    for (int64_t i = 0; i < n; i++) {
        if (!(row[i] > threshold)) {
            return i;
        }
    }
    return n;
}

} // namespace

template <typename T>
void CombinerRangeKNN<T>::compute_sizes(int64_t* L_res_init) {
    this->L_res = L_res_init;
    L_res_init[0] = 0;
    int64_t j = 0;
    for (int64_t i = 0; i < nq; i++) {
        int64_t n_in;
        if (!mask || !mask[i]) {
            const T* row = D + i * k;
            n_in = keep_max ? count_gt(k, row, r2) : count_lt(k, row, r2);
        } else {
            n_in = lim_remain[j + 1] - lim_remain[j];
            j++;
        }
        L_res_init[i + 1] = n_in; // L_res_init[i] + n_in;
    }
    // cumsum
    for (int64_t i = 0; i < nq; i++) {
        L_res_init[i + 1] += L_res_init[i];
    }
}

template <typename T>
void CombinerRangeKNN<T>::write_result(T* D_res, int64_t* I_res) {
    FAISS_THROW_IF_NOT(L_res);
    int64_t j = 0;
    for (int64_t i = 0; i < nq; i++) {
        int64_t n_in = L_res[i + 1] - L_res[i];
        T* D_row = D_res + L_res[i];
        int64_t* I_row = I_res + L_res[i];
        if (!mask || !mask[i]) {
            memcpy(D_row, D + i * k, n_in * sizeof(*D_row));
            memcpy(I_row, I + i * k, n_in * sizeof(*I_row));
        } else {
            memcpy(D_row, D_remain + lim_remain[j], n_in * sizeof(*D_row));
            memcpy(I_row, I_remain + lim_remain[j], n_in * sizeof(*I_row));
            j++;
        }
    }
}

// explicit template instantiations
template struct CombinerRangeKNN<float>;
template struct CombinerRangeKNN<int16_t>;

void CodeSet::insert(size_t n, const uint8_t* codes, bool* inserted) {
    for (size_t i = 0; i < n; i++) {
        auto res = s.insert(
                std::vector<uint8_t>(codes + i * d, codes + i * d + d));
        inserted[i] = res.second;
    }
}

} // namespace faiss

Coverage Report

Created: 2025-12-21 13:42

Line	Count	Source
1		/*
2		* Copyright (c) Meta Platforms, Inc. and affiliates.
3		*
4		* This source code is licensed under the MIT license found in the
5		* LICENSE file in the root directory of this source tree.
6		*/
7
8		// -- c++ --
9
10		#include <faiss/Index.h>
11		#include <faiss/utils/utils.h>
12
13		#include <cassert>
14		#include <cmath>
15		#include <cstdio>
16		#include <cstring>
17
18		#include <sys/types.h>
19
20		#ifdef _MSC_VER
21		#define NOMINMAX
22		#include <windows.h>
23		#undef NOMINMAX
24		#else
25		#include <sys/time.h>
26		#include <unistd.h>
27		#endif // !_MSC_VER
28
29		#include <omp.h>
30
31		#include <algorithm>
32		#include <set>
33		#include <type_traits>
34		#include <vector>
35
36		#include <faiss/impl/AuxIndexStructures.h>
37		#include <faiss/impl/FaissAssert.h>
38		#include <faiss/impl/platform_macros.h>
39		#include <faiss/utils/random.h>
40
41		#ifndef FINTEGER
42		#define FINTEGER long
43		#endif
44
45		extern "C" {
46
47		/* declare BLAS functions, see http://www.netlib.org/clapack/cblas/ */
48
49		int sgemm_(
50		const char* transa,
51		const char* transb,
52		FINTEGER* m,
53		FINTEGER* n,
54		FINTEGER* k,
55		const float* alpha,
56		const float* a,
57		FINTEGER* lda,
58		const float* b,
59		FINTEGER* ldb,
60		float* beta,
61		float* c,
62		FINTEGER* ldc);
63
64		/* Lapack functions, see http://www.netlib.org/clapack/old/single/sgeqrf.c */
65
66		int sgeqrf_(
67		FINTEGER* m,
68		FINTEGER* n,
69		float* a,
70		FINTEGER* lda,
71		float* tau,
72		float* work,
73		FINTEGER* lwork,
74		FINTEGER* info);
75
76		int sorgqr_(
77		FINTEGER* m,
78		FINTEGER* n,
79		FINTEGER* k,
80		float* a,
81		FINTEGER* lda,
82		float* tau,
83		float* work,
84		FINTEGER* lwork,
85		FINTEGER* info);
86
87		int sgemv_(
88		const char* trans,
89		FINTEGER* m,
90		FINTEGER* n,
91		float* alpha,
92		const float* a,
93		FINTEGER* lda,
94		const float* x,
95		FINTEGER* incx,
96		float* beta,
97		float* y,
98		FINTEGER* incy);
99		}
100
101		/**************************************************
102		* Get some stats about the system
103		**************************************************/
104
105		namespace faiss {
106
107		// this will be set at load time from GPU Faiss
108		std::string gpu_compile_options;
109
110	0	std::string get_compile_options() {
111	0	std::string options;
112
113		// this flag is set by GCC and Clang
114		#ifdef __OPTIMIZE__
115		options += "OPTIMIZE ";
116		#endif
117
118		#ifdef __AVX512F__
119		options += "AVX512 ";
120		#elif defined(__AVX2__)
121		options += "AVX2 ";
122		#elif defined(__ARM_FEATURE_SVE)
123		options += "SVE NEON ";
124		#elif defined(__aarch64__)
125		options += "NEON ";
126		#else
127		options += "GENERIC ";
128		#endif
129
130	0	options += gpu_compile_options;
131
132	0	return options;
133	0	}
134
135	0	std::string get_version() {
136	0	return VERSION_STRING;
137	0	}
138
139		#ifdef _MSC_VER
140		double getmillisecs() {
141		LARGE_INTEGER ts;
142		LARGE_INTEGER freq;
143		QueryPerformanceFrequency(&freq);
144		QueryPerformanceCounter(&ts);
145
146		return (ts.QuadPart * 1e3) / freq.QuadPart;
147		}
148		#else // _MSC_VER
149	16.9k	double getmillisecs() {
150	16.9k	struct timeval tv;
151	16.9k	gettimeofday(&tv, nullptr);
152	16.9k	return tv.tv_sec * 1e3 + tv.tv_usec * 1e-3;
153	16.9k	}
154		#endif // _MSC_VER
155
156	0	uint64_t get_cycles() {
157	0	#ifdef __x86_64__
158	0	uint32_t high, low;
159	0	asm volatile("rdtsc \n\t" : "=a"(low), "=d"(high));
160	0	return ((uint64_t)high << 32) \| (low);
161		#else
162		return 0;
163		#endif
164	0	}
165
166		#ifdef __linux__
167
168	0	size_t get_mem_usage_kb() {
169	0	int pid = getpid();
170	0	char fname[256];
171	0	snprintf(fname, 256, "/proc/%d/status", pid);
172	0	FILE* f = fopen(fname, "r");
173	0	FAISS_THROW_IF_NOT_MSG(f, "cannot open proc status file");
174	0	size_t sz = 0;
175	0	for (;;) {
176	0	char buf[256];
177	0	if (!fgets(buf, 256, f))
178	0	break;
179	0	if (sscanf(buf, "VmRSS: %ld kB", &sz) == 1)
180	0	break;
181	0	}
182	0	fclose(f);
183	0	return sz;
184	0	}
185
186		#else
187
188		size_t get_mem_usage_kb() {
189		fprintf(stderr,
190		"WARN: get_mem_usage_kb not implemented on current architecture\n");
191		return 0;
192		}
193
194		#endif
195
196		void reflection(
197		const float* __restrict u,
198		float* __restrict x,
199		size_t n,
200		size_t d,
201	0	size_t nu) {
202	0	size_t i, j, l;
203	0	for (i = 0; i < n; i++) {
204	0	const float* up = u;
205	0	for (l = 0; l < nu; l++) {
206	0	float ip1 = 0, ip2 = 0;
207
208	0	for (j = 0; j < d; j += 2) {
209	0	ip1 += up[j] * x[j];
210	0	ip2 += up[j + 1] * x[j + 1];
211	0	}
212	0	float ip = 2 * (ip1 + ip2);
213
214	0	for (j = 0; j < d; j++)
215	0	x[j] -= ip * up[j];
216	0	up += d;
217	0	}
218	0	x += d;
219	0	}
220	0	}
221
222		/* Reference implementation (slower) */
223	0	void reflection_ref(const float* u, float* x, size_t n, size_t d, size_t nu) {
224	0	size_t i, j, l;
225	0	for (i = 0; i < n; i++) {
226	0	const float* up = u;
227	0	for (l = 0; l < nu; l++) {
228	0	double ip = 0;
229
230	0	for (j = 0; j < d; j++)
231	0	ip += up[j] * x[j];
232	0	ip *= 2;
233
234	0	for (j = 0; j < d; j++)
235	0	x[j] -= ip * up[j];
236
237	0	up += d;
238	0	}
239	0	x += d;
240	0	}
241	0	}
242
243		/***************************************************************************
244		* Some matrix manipulation functions
245		***************************************************************************/
246
247	0	void matrix_qr(int m, int n, float* a) {
248	0	FAISS_THROW_IF_NOT(m >= n);
249	0	FINTEGER mi = m, ni = n, ki = mi < ni ? mi : ni;
250	0	std::vector<float> tau(ki);
251	0	FINTEGER lwork = -1, info;
252	0	float work_size;
253
254	0	sgeqrf_(&mi, &ni, a, &mi, tau.data(), &work_size, &lwork, &info);
255	0	lwork = size_t(work_size);
256	0	std::vector<float> work(lwork);
257
258	0	sgeqrf_(&mi, &ni, a, &mi, tau.data(), work.data(), &lwork, &info);
259
260	0	sorgqr_(&mi, &ni, &ki, a, &mi, tau.data(), work.data(), &lwork, &info);
261	0	}
262
263		/***************************************************************************
264		* Result list routines
265		***************************************************************************/
266
267	0	void ranklist_handle_ties(int k, int64_t* idx, const float* dis) {
268	0	float prev_dis = -1e38;
269	0	int prev_i = -1;
270	0	for (int i = 0; i < k; i++) {
271	0	if (dis[i] != prev_dis) {
272	0	if (i > prev_i + 1) {
273		// sort between prev_i and i - 1
274	0	std::sort(idx + prev_i, idx + i);
275	0	}
276	0	prev_i = i;
277	0	prev_dis = dis[i];
278	0	}
279	0	}
280	0	}
281
282		size_t merge_result_table_with(
283		size_t n,
284		size_t k,
285		int64_t* I0,
286		float* D0,
287		const int64_t* I1,
288		const float* D1,
289		bool keep_min,
290	0	int64_t translation) {
291	0	size_t n1 = 0;
292
293	0	#pragma omp parallel reduction(+ : n1)
294	0	{
295	0	std::vector<int64_t> tmpI(k);
296	0	std::vector<float> tmpD(k);
297
298	0	#pragma omp for
299	0	for (int64_t i = 0; i < n; i++) {
300	0	int64_t* lI0 = I0 + i * k;
301	0	float* lD0 = D0 + i * k;
302	0	const int64_t* lI1 = I1 + i * k;
303	0	const float* lD1 = D1 + i * k;
304	0	size_t r0 = 0;
305	0	size_t r1 = 0;
306
307	0	if (keep_min) {
308	0	for (size_t j = 0; j < k; j++) {
309	0	if (lI0[r0] >= 0 && lD0[r0] < lD1[r1]) {
310	0	tmpD[j] = lD0[r0];
311	0	tmpI[j] = lI0[r0];
312	0	r0++;
313	0	} else if (lD1[r1] >= 0) {
314	0	tmpD[j] = lD1[r1];
315	0	tmpI[j] = lI1[r1] + translation;
316	0	r1++;
317	0	} else { // both are NaNs
318	0	tmpD[j] = NAN;
319	0	tmpI[j] = -1;
320	0	}
321	0	}
322	0	} else {
323	0	for (size_t j = 0; j < k; j++) {
324	0	if (lI0[r0] >= 0 && lD0[r0] > lD1[r1]) {
325	0	tmpD[j] = lD0[r0];
326	0	tmpI[j] = lI0[r0];
327	0	r0++;
328	0	} else if (lD1[r1] >= 0) {
329	0	tmpD[j] = lD1[r1];
330	0	tmpI[j] = lI1[r1] + translation;
331	0	r1++;
332	0	} else { // both are NaNs
333	0	tmpD[j] = NAN;
334	0	tmpI[j] = -1;
335	0	}
336	0	}
337	0	}
338	0	n1 += r1;
339	0	memcpy(lD0, tmpD.data(), sizeof(lD0[0]) * k);
340	0	memcpy(lI0, tmpI.data(), sizeof(lI0[0]) * k);
341	0	}
342	0	}
343
344	0	return n1;
345	0	}
346
347		size_t ranklist_intersection_size(
348		size_t k1,
349		const int64_t* v1,
350		size_t k2,
351	0	const int64_t* v2_in) {
352	0	if (k2 > k1)
353	0	return ranklist_intersection_size(k2, v2_in, k1, v1);
354	0	int64_t* v2 = new int64_t[k2];
355	0	memcpy(v2, v2_in, sizeof(int64_t) * k2);
356	0	std::sort(v2, v2 + k2);
357	0	{ // de-dup v2
358	0	int64_t prev = -1;
359	0	size_t wp = 0;
360	0	for (size_t i = 0; i < k2; i++) {
361	0	if (v2[i] != prev) {
362	0	v2[wp++] = prev = v2[i];
363	0	}
364	0	}
365	0	k2 = wp;
366	0	}
367	0	const int64_t seen_flag = int64_t{1} << 60;
368	0	size_t count = 0;
369	0	for (size_t i = 0; i < k1; i++) {
370	0	int64_t q = v1[i];
371	0	size_t i0 = 0, i1 = k2;
372	0	while (i0 + 1 < i1) {
373	0	size_t imed = (i1 + i0) / 2;
374	0	int64_t piv = v2[imed] & ~seen_flag;
375	0	if (piv <= q)
376	0	i0 = imed;
377	0	else
378	0	i1 = imed;
379	0	}
380	0	if (v2[i0] == q) {
381	0	count++;
382	0	v2[i0] \|= seen_flag;
383	0	}
384	0	}
385	0	delete[] v2;
386
387	0	return count;
388	0	}
389
390	270	double imbalance_factor(int k, const int64_t* hist) {
391	270	double tot = 0, uf = 0;
392
393	1.35k	for (int i = 0; i < k; i++) {
394	1.08k	tot += hist[i];
395	1.08k	uf += hist[i] * (double)hist[i];
396	1.08k	}
397	270	uf = uf * k / (tot * tot);
398
399	270	return uf;
400	270	}
401
402	270	double imbalance_factor(int64_t n, int k, const int64_t* assign) {
403	270	std::vector<int64_t> hist(k, 0);
404	39.3k	for (int64_t i = 0; i < n; i++) {
405	39.0k	hist[assign[i]]++;
406	39.0k	}
407
408	270	return imbalance_factor(k, hist.data());
409	270	}
410
411	0	int ivec_hist(size_t n, const int* v, int vmax, int* hist) {
412	0	memset(hist, 0, sizeof(hist[0]) * vmax);
413	0	int nout = 0;
414	0	while (n--) {
415	0	if (v[n] < 0 \|\| v[n] >= vmax)
416	0	nout++;
417	0	else
418	0	hist[v[n]]++;
419	0	}
420	0	return nout;
421	0	}
422
423	0	void bincode_hist(size_t n, size_t nbits, const uint8_t* codes, int* hist) {
424	0	FAISS_THROW_IF_NOT(nbits % 8 == 0);
425	0	size_t d = nbits / 8;
426	0	std::vector<int> accu(d * 256);
427	0	const uint8_t* c = codes;
428	0	for (size_t i = 0; i < n; i++)
429	0	for (int j = 0; j < d; j++)
430	0	accu[j * 256 + *c++]++;
431	0	memset(hist, 0, sizeof(hist) nbits);
432	0	for (int i = 0; i < d; i++) {
433	0	const int* ai = accu.data() + i * 256;
434	0	int* hi = hist + i * 8;
435	0	for (int j = 0; j < 256; j++)
436	0	for (int k = 0; k < 8; k++)
437	0	if ((j >> k) & 1)
438	0	hi[k] += ai[j];
439	0	}
440	0	}
441
442	0	uint64_t ivec_checksum(size_t n, const int32_t* assigned) {
443	0	const uint32_t* a = reinterpret_cast<const uint32_t*>(assigned);
444	0	uint64_t cs = 112909;
445	0	while (n--) {
446	0	cs = cs * 65713 + a[n] * 1686049;
447	0	}
448	0	return cs;
449	0	}
450
451	0	uint64_t bvec_checksum(size_t n, const uint8_t* a) {
452	0	uint64_t cs = ivec_checksum(n / 4, (const int32_t*)a);
453	0	for (size_t i = n / 4 * 4; i < n; i++) {
454	0	cs = cs * 65713 + a[n] * 1686049;
455	0	}
456	0	return cs;
457	0	}
458
459	0	void bvecs_checksum(size_t n, size_t d, const uint8_t* a, uint64_t* cs) {
460		// MSVC can't accept unsigned index for #pragma omp parallel for
461		// so below codes only accept n <= std::numeric_limits<ssize_t>::max()
462	0	using ssize_t = std::make_signed<std::size_t>::type;
463	0	const ssize_t size = n;
464	0	#pragma omp parallel for if (size > 1000)
465	0	for (ssize_t i_ = 0; i_ < size; i_++) {
466	0	const auto i = static_cast<std::size_t>(i_);
467	0	cs[i] = bvec_checksum(d, a + i * d);
468	0	}
469	0	}
470
471		const float* fvecs_maybe_subsample(
472		size_t d,
473		size_t* n,
474		size_t nmax,
475		const float* x,
476		bool verbose,
477	27	int64_t seed) {
478	27	if (*n <= nmax)
479	27	return x; // nothing to do
480
481	0	size_t n2 = nmax;
482	0	if (verbose) {
483	0	printf(" Input training set too big (max size is %zd), sampling "
484	0	"%zd / %zd vectors\n",
485	0	nmax,
486	0	n2,
487	0	*n);
488	0	}
489	0	std::vector<int> subset(*n);
490	0	rand_perm(subset.data(), *n, seed);
491	0	float* x_subset = new float[n2 * d];
492	0	for (int64_t i = 0; i < n2; i++)
493	0	memcpy(&x_subset[i * d], &x[subset[i] * size_t(d)], sizeof(x[0]) * d);
494	0	*n = n2;
495	0	return x_subset;
496	27	}
497
498	0	void binary_to_real(size_t d, const uint8_t* x_in, float* x_out) {
499	0	for (size_t i = 0; i < d; ++i) {
500	0	x_out[i] = 2 * ((x_in[i >> 3] >> (i & 7)) & 1) - 1;
501	0	}
502	0	}
503
504	0	void real_to_binary(size_t d, const float* x_in, uint8_t* x_out) {
505	0	for (size_t i = 0; i < d / 8; ++i) {
506	0	uint8_t b = 0;
507	0	for (int j = 0; j < 8; ++j) {
508	0	if (x_in[8 * i + j] > 0) {
509	0	b \|= (1 << j);
510	0	}
511	0	}
512	0	x_out[i] = b;
513	0	}
514	0	}
515
516		// from Python's stringobject.c
517	0	uint64_t hash_bytes(const uint8_t* bytes, int64_t n) {
518	0	const uint8_t* p = bytes;
519	0	uint64_t x = (uint64_t)(*p) << 7;
520	0	int64_t len = n;
521	0	while (--len >= 0) {
522	0	x = (1000003 * x) ^ *p++;
523	0	}
524	0	x ^= n;
525	0	return x;
526	0	}
527
528	0	bool check_openmp() {
529	0	omp_set_num_threads(10);
530
531	0	if (omp_get_max_threads() != 10) {
532	0	return false;
533	0	}
534
535	0	std::vector<int> nt_per_thread(10);
536	0	size_t sum = 0;
537	0	bool in_parallel = true;
538	0	#pragma omp parallel reduction(+ : sum)
539	0	{
540	0	if (!omp_in_parallel()) {
541	0	in_parallel = false;
542	0	}
543
544	0	int nt = omp_get_num_threads();
545	0	int rank = omp_get_thread_num();
546
547	0	nt_per_thread[rank] = nt;
548	0	#pragma omp for
549	0	for (int i = 0; i < 1000 * 1000 * 10; i++) {
550	0	sum += i;
551	0	}
552	0	}
553
554	0	if (!in_parallel) {
555	0	return false;
556	0	}
557	0	if (nt_per_thread[0] != 10) {
558	0	return false;
559	0	}
560	0	if (sum == 0) {
561	0	return false;
562	0	}
563
564	0	return true;
565	0	}
566
567		namespace {
568
569		template <typename T>
570	0	int64_t count_lt(int64_t n, const T* row, T threshold) {
571	0	for (int64_t i = 0; i < n; i++) {
572	0	if (!(row[i] < threshold)) {
573	0	return i;
574	0	}
575	0	}
576	0	return n;
577	0	} Unexecuted instantiation: utils.cpp:_ZN5faiss12_GLOBAL__N_18count_ltIfEEllPKT_S2_ Unexecuted instantiation: utils.cpp:_ZN5faiss12_GLOBAL__N_18count_ltIsEEllPKT_S2_
578
579		template <typename T>
580	0	int64_t count_gt(int64_t n, const T* row, T threshold) {
581	0	for (int64_t i = 0; i < n; i++) {
582	0	if (!(row[i] > threshold)) {
583	0	return i;
584	0	}
585	0	}
586	0	return n;
587	0	} Unexecuted instantiation: utils.cpp:_ZN5faiss12_GLOBAL__N_18count_gtIfEEllPKT_S2_ Unexecuted instantiation: utils.cpp:_ZN5faiss12_GLOBAL__N_18count_gtIsEEllPKT_S2_
588
589		} // namespace
590
591		template <typename T>
592	0	void CombinerRangeKNN<T>::compute_sizes(int64_t* L_res_init) {
593	0	this->L_res = L_res_init;
594	0	L_res_init[0] = 0;
595	0	int64_t j = 0;
596	0	for (int64_t i = 0; i < nq; i++) {
597	0	int64_t n_in;
598	0	if (!mask \|\| !mask[i]) {
599	0	const T* row = D + i * k;
600	0	n_in = keep_max ? count_gt(k, row, r2) : count_lt(k, row, r2);
601	0	} else {
602	0	n_in = lim_remain[j + 1] - lim_remain[j];
603	0	j++;
604	0	}
605	0	L_res_init[i + 1] = n_in; // L_res_init[i] + n_in;
606	0	}
607		// cumsum
608	0	for (int64_t i = 0; i < nq; i++) {
609	0	L_res_init[i + 1] += L_res_init[i];
610	0	}
611	0	} Unexecuted instantiation: _ZN5faiss16CombinerRangeKNNIfE13compute_sizesEPl Unexecuted instantiation: _ZN5faiss16CombinerRangeKNNIsE13compute_sizesEPl
612
613		template <typename T>
614	0	void CombinerRangeKNN<T>::write_result(T* D_res, int64_t* I_res) {
615	0	FAISS_THROW_IF_NOT(L_res);
616	0	int64_t j = 0;
617	0	for (int64_t i = 0; i < nq; i++) {
618	0	int64_t n_in = L_res[i + 1] - L_res[i];
619	0	T* D_row = D_res + L_res[i];
620	0	int64_t* I_row = I_res + L_res[i];
621	0	if (!mask \|\| !mask[i]) {
622	0	memcpy(D_row, D + i * k, n_in * sizeof(*D_row));
623	0	memcpy(I_row, I + i * k, n_in * sizeof(*I_row));
624	0	} else {
625	0	memcpy(D_row, D_remain + lim_remain[j], n_in * sizeof(*D_row));
626	0	memcpy(I_row, I_remain + lim_remain[j], n_in * sizeof(*I_row));
627	0	j++;
628	0	}
629	0	}
630	0	} Unexecuted instantiation: _ZN5faiss16CombinerRangeKNNIfE12write_resultEPfPl Unexecuted instantiation: _ZN5faiss16CombinerRangeKNNIsE12write_resultEPsPl
631
632		// explicit template instantiations
633		template struct CombinerRangeKNN<float>;
634		template struct CombinerRangeKNN<int16_t>;
635
636	0	void CodeSet::insert(size_t n, const uint8_t* codes, bool* inserted) {
637	0	for (size_t i = 0; i < n; i++) {
638	0	auto res = s.insert(
639	0	std::vector<uint8_t>(codes + i * d, codes + i * d + d));
640	0	inserted[i] = res.second;
641	0	}
642	0	}
643
644		} // namespace faiss