Coverage Report

Created: 2025-11-20 18:33

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/contrib/faiss/faiss/utils/random.cpp
Line
Count
Source
1
/*
2
 * Copyright (c) Meta Platforms, Inc. and affiliates.
3
 *
4
 * This source code is licensed under the MIT license found in the
5
 * LICENSE file in the root directory of this source tree.
6
 */
7
8
// -*- c++ -*-
9
10
#include <faiss/utils/random.h>
11
12
extern "C" {
13
int sgemm_(
14
        const char* transa,
15
        const char* transb,
16
        FINTEGER* m,
17
        FINTEGER* n,
18
        FINTEGER* k,
19
        const float* alpha,
20
        const float* a,
21
        FINTEGER* lda,
22
        const float* b,
23
        FINTEGER* ldb,
24
        float* beta,
25
        float* c,
26
        FINTEGER* ldc);
27
}
28
29
namespace faiss {
30
31
/**************************************************
32
 * Random data generation functions
33
 **************************************************/
34
35
18.0k
RandomGenerator::RandomGenerator(int64_t seed) : mt((unsigned int)seed) {}
36
37
0
int RandomGenerator::rand_int() {
38
0
    return mt() & 0x7fffffff;
39
0
}
40
41
0
int64_t RandomGenerator::rand_int64() {
42
0
    return int64_t(rand_int()) | int64_t(rand_int()) << 31;
43
0
}
44
45
26.3k
int RandomGenerator::rand_int(int max) {
46
26.3k
    return mt() % max;
47
26.3k
}
48
49
26.3k
float RandomGenerator::rand_float() {
50
26.3k
    return mt() / float(mt.max());
51
26.3k
}
52
53
0
double RandomGenerator::rand_double() {
54
0
    return mt() / double(mt.max());
55
0
}
56
57
SplitMix64RandomGenerator::SplitMix64RandomGenerator(int64_t seed)
58
0
        : state{static_cast<uint64_t>(seed)} {}
59
60
0
int SplitMix64RandomGenerator::rand_int() {
61
0
    return next() & 0x7fffffff;
62
0
}
63
64
0
int64_t SplitMix64RandomGenerator::rand_int64() {
65
0
    uint64_t value = next();
66
0
    return static_cast<int64_t>(value & 0x7fffffffffffffffULL);
67
0
}
68
69
0
int SplitMix64RandomGenerator::rand_int(int max) {
70
0
    return next() % max;
71
0
}
72
73
0
float SplitMix64RandomGenerator::rand_float() {
74
0
    return next() / float(std::numeric_limits<uint64_t>::max());
75
0
}
76
77
0
double SplitMix64RandomGenerator::rand_double() {
78
0
    return next() / double(std::numeric_limits<uint64_t>::max());
79
0
}
80
81
0
uint64_t SplitMix64RandomGenerator::next() {
82
0
    uint64_t z = (state += 0x9e3779b97f4a7c15ULL);
83
0
    z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9ULL;
84
0
    z = (z ^ (z >> 27)) * 0x94d049bb133111ebULL;
85
0
    return z ^ (z >> 31);
86
0
}
87
88
/***********************************************************************
89
 * Random functions in this C file only exist because Torch
90
 *  counterparts are slow and not multi-threaded.  Typical use is for
91
 *  more than 1-100 billion values. */
92
93
/* Generate a set of random floating point values such that x[i] in [0,1]
94
   multi-threading. For this reason, we rely on re-entreant functions.  */
95
0
void float_rand(float* x, size_t n, int64_t seed) {
96
    // only try to parallelize on large enough arrays
97
0
    const size_t nblock = n < 1024 ? 1 : 1024;
98
99
0
    RandomGenerator rng0(seed);
100
0
    int a0 = rng0.rand_int(), b0 = rng0.rand_int();
101
102
0
#pragma omp parallel for
103
0
    for (int64_t j = 0; j < nblock; j++) {
104
0
        RandomGenerator rng(a0 + j * b0);
105
106
0
        const size_t istart = j * n / nblock;
107
0
        const size_t iend = (j + 1) * n / nblock;
108
109
0
        for (size_t i = istart; i < iend; i++)
110
0
            x[i] = rng.rand_float();
111
0
    }
112
0
}
113
114
0
void float_randn(float* x, size_t n, int64_t seed) {
115
    // only try to parallelize on large enough arrays
116
0
    const size_t nblock = n < 1024 ? 1 : 1024;
117
118
0
    RandomGenerator rng0(seed);
119
0
    int a0 = rng0.rand_int(), b0 = rng0.rand_int();
120
121
0
#pragma omp parallel for
122
0
    for (int64_t j = 0; j < nblock; j++) {
123
0
        RandomGenerator rng(a0 + j * b0);
124
125
0
        double a = 0, b = 0, s = 0;
126
0
        int state = 0; /* generate two number per "do-while" loop */
127
128
0
        const size_t istart = j * n / nblock;
129
0
        const size_t iend = (j + 1) * n / nblock;
130
131
0
        for (size_t i = istart; i < iend; i++) {
132
            /* Marsaglia's method (see Knuth) */
133
0
            if (state == 0) {
134
0
                do {
135
0
                    a = 2.0 * rng.rand_double() - 1;
136
0
                    b = 2.0 * rng.rand_double() - 1;
137
0
                    s = a * a + b * b;
138
0
                } while (s >= 1.0);
139
0
                x[i] = a * sqrt(-2.0 * log(s) / s);
140
0
            } else
141
0
                x[i] = b * sqrt(-2.0 * log(s) / s);
142
0
            state = 1 - state;
143
0
        }
144
0
    }
145
0
}
146
147
/* Integer versions */
148
0
void int64_rand(int64_t* x, size_t n, int64_t seed) {
149
    // only try to parallelize on large enough arrays
150
0
    const size_t nblock = n < 1024 ? 1 : 1024;
151
152
0
    RandomGenerator rng0(seed);
153
0
    int a0 = rng0.rand_int(), b0 = rng0.rand_int();
154
155
0
#pragma omp parallel for
156
0
    for (int64_t j = 0; j < nblock; j++) {
157
0
        RandomGenerator rng(a0 + j * b0);
158
159
0
        const size_t istart = j * n / nblock;
160
0
        const size_t iend = (j + 1) * n / nblock;
161
0
        for (size_t i = istart; i < iend; i++)
162
0
            x[i] = rng.rand_int64();
163
0
    }
164
0
}
165
166
0
void int64_rand_max(int64_t* x, size_t n, uint64_t max, int64_t seed) {
167
    // only try to parallelize on large enough arrays
168
0
    const size_t nblock = n < 1024 ? 1 : 1024;
169
170
0
    RandomGenerator rng0(seed);
171
0
    int a0 = rng0.rand_int(), b0 = rng0.rand_int();
172
173
0
#pragma omp parallel for
174
0
    for (int64_t j = 0; j < nblock; j++) {
175
0
        RandomGenerator rng(a0 + j * b0);
176
177
0
        const size_t istart = j * n / nblock;
178
0
        const size_t iend = (j + 1) * n / nblock;
179
0
        for (size_t i = istart; i < iend; i++)
180
0
            x[i] = rng.rand_int64() % max;
181
0
    }
182
0
}
183
184
0
void rand_perm(int* perm, size_t n, int64_t seed) {
185
0
    for (size_t i = 0; i < n; i++)
186
0
        perm[i] = i;
187
188
0
    RandomGenerator rng(seed);
189
190
0
    for (size_t i = 0; i + 1 < n; i++) {
191
0
        int i2 = i + rng.rand_int(n - i);
192
0
        std::swap(perm[i], perm[i2]);
193
0
    }
194
0
}
195
196
0
void rand_perm_splitmix64(int* perm, size_t n, int64_t seed) {
197
0
    for (size_t i = 0; i < n; i++)
198
0
        perm[i] = i;
199
200
0
    SplitMix64RandomGenerator rng(seed);
201
202
0
    for (size_t i = 0; i + 1 < n; i++) {
203
0
        int i2 = i + rng.rand_int(n - i);
204
0
        std::swap(perm[i], perm[i2]);
205
0
    }
206
0
}
207
208
0
void byte_rand(uint8_t* x, size_t n, int64_t seed) {
209
    // only try to parallelize on large enough arrays
210
0
    const size_t nblock = n < 1024 ? 1 : 1024;
211
212
0
    RandomGenerator rng0(seed);
213
0
    int a0 = rng0.rand_int(), b0 = rng0.rand_int();
214
215
0
#pragma omp parallel for
216
0
    for (int64_t j = 0; j < nblock; j++) {
217
0
        RandomGenerator rng(a0 + j * b0);
218
219
0
        const size_t istart = j * n / nblock;
220
0
        const size_t iend = (j + 1) * n / nblock;
221
222
0
        size_t i;
223
0
        for (i = istart; i < iend; i++)
224
0
            x[i] = rng.rand_int64();
225
0
    }
226
0
}
227
228
0
void rand_smooth_vectors(size_t n, size_t d, float* x, int64_t seed) {
229
0
    size_t d1 = 10;
230
0
    std::vector<float> x1(n * d1);
231
0
    float_randn(x1.data(), x1.size(), seed);
232
0
    std::vector<float> rot(d1 * d);
233
0
    float_rand(rot.data(), rot.size(), seed + 1);
234
235
0
    { //
236
0
        FINTEGER di = d, d1i = d1, ni = n;
237
0
        float one = 1.0, zero = 0.0;
238
0
        sgemm_("Not transposed",
239
0
               "Not transposed", // natural order
240
0
               &di,
241
0
               &ni,
242
0
               &d1i,
243
0
               &one,
244
0
               rot.data(),
245
0
               &di, // rotation matrix
246
0
               x1.data(),
247
0
               &d1i, // second term
248
0
               &zero,
249
0
               x,
250
0
               &di);
251
0
    }
252
253
0
    std::vector<float> scales(d);
254
0
    float_rand(scales.data(), d, seed + 2);
255
256
0
#pragma omp parallel for if (n * d > 10000)
257
0
    for (int64_t i = 0; i < n; i++) {
258
0
        for (size_t j = 0; j < d; j++) {
259
0
            x[i * d + j] = sinf(x[i * d + j] * (scales[j] * 4 + 0.1));
260
0
        }
261
0
    }
262
0
}
263
264
} // namespace faiss