Coverage Report

Created: 2025-11-18 17:05

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/contrib/faiss/faiss/IndexRowwiseMinMax.cpp
Line
Count
Source
1
/*
2
 * Copyright (c) Meta Platforms, Inc. and affiliates.
3
 *
4
 * This source code is licensed under the MIT license found in the
5
 * LICENSE file in the root directory of this source tree.
6
 */
7
8
#include <faiss/IndexRowwiseMinMax.h>
9
10
#include <cstdint>
11
#include <cstring>
12
#include <limits>
13
14
#include <faiss/impl/FaissAssert.h>
15
#include <faiss/utils/fp16.h>
16
17
namespace faiss {
18
19
namespace {
20
21
using idx_t = faiss::idx_t;
22
23
struct StorageMinMaxFP16 {
24
    uint16_t scaler;
25
    uint16_t minv;
26
27
0
    inline void from_floats(const float float_scaler, const float float_minv) {
28
0
        scaler = encode_fp16(float_scaler);
29
0
        minv = encode_fp16(float_minv);
30
0
    }
31
32
0
    inline void to_floats(float& float_scaler, float& float_minv) const {
33
0
        float_scaler = decode_fp16(scaler);
34
0
        float_minv = decode_fp16(minv);
35
0
    }
36
};
37
38
struct StorageMinMaxFP32 {
39
    float scaler;
40
    float minv;
41
42
0
    inline void from_floats(const float float_scaler, const float float_minv) {
43
0
        scaler = float_scaler;
44
0
        minv = float_minv;
45
0
    }
46
47
0
    inline void to_floats(float& float_scaler, float& float_minv) const {
48
0
        float_scaler = scaler;
49
0
        float_minv = minv;
50
0
    }
51
};
52
53
template <typename StorageMinMaxT>
54
void sa_encode_impl(
55
        const IndexRowwiseMinMaxBase* const index,
56
        const idx_t n_input,
57
        const float* x_input,
58
0
        uint8_t* bytes_output) {
59
    // process chunks
60
0
    const size_t chunk_size = rowwise_minmax_sa_encode_bs;
61
62
    // useful variables
63
0
    const Index* const sub_index = index->index;
64
0
    const int d = index->d;
65
66
    // the code size of the subindex
67
0
    const size_t old_code_size = sub_index->sa_code_size();
68
    // the code size of the index
69
0
    const size_t new_code_size = index->sa_code_size();
70
71
    // allocate tmp buffers
72
0
    std::vector<float> tmp(chunk_size * d);
73
0
    std::vector<StorageMinMaxT> minmax(chunk_size);
74
75
    // all the elements to process
76
0
    size_t n_left = n_input;
77
78
0
    const float* __restrict x = x_input;
79
0
    uint8_t* __restrict bytes = bytes_output;
80
81
0
    while (n_left > 0) {
82
        // current portion to be processed
83
0
        const idx_t n = std::min(n_left, chunk_size);
84
85
        // allocate a temporary buffer and do the rescale
86
0
        for (idx_t i = 0; i < n; i++) {
87
            // compute min & max values
88
0
            float minv = std::numeric_limits<float>::max();
89
0
            float maxv = std::numeric_limits<float>::lowest();
90
91
0
            const float* const vec_in = x + i * d;
92
0
            for (idx_t j = 0; j < d; j++) {
93
0
                minv = std::min(minv, vec_in[j]);
94
0
                maxv = std::max(maxv, vec_in[j]);
95
0
            }
96
97
            // save the coefficients
98
0
            const float scaler = maxv - minv;
99
0
            minmax[i].from_floats(scaler, minv);
100
101
            // and load them back, because the coefficients might
102
            // be modified.
103
0
            float actual_scaler = 0;
104
0
            float actual_minv = 0;
105
0
            minmax[i].to_floats(actual_scaler, actual_minv);
106
107
0
            float* const vec_out = tmp.data() + i * d;
108
0
            if (actual_scaler == 0) {
109
0
                for (idx_t j = 0; j < d; j++) {
110
0
                    vec_out[j] = 0;
111
0
                }
112
0
            } else {
113
0
                float inv_actual_scaler = 1.0f / actual_scaler;
114
0
                for (idx_t j = 0; j < d; j++) {
115
0
                    vec_out[j] = (vec_in[j] - actual_minv) * inv_actual_scaler;
116
0
                }
117
0
            }
118
0
        }
119
120
        // do the coding
121
0
        sub_index->sa_encode(n, tmp.data(), bytes);
122
123
        // rearrange
124
0
        for (idx_t i = n; (i--) > 0;) {
125
            // move a single index
126
0
            std::memmove(
127
0
                    bytes + i * new_code_size + (new_code_size - old_code_size),
128
0
                    bytes + i * old_code_size,
129
0
                    old_code_size);
130
131
            // save min & max values
132
0
            StorageMinMaxT* fpv = reinterpret_cast<StorageMinMaxT*>(
133
0
                    bytes + i * new_code_size);
134
0
            *fpv = minmax[i];
135
0
        }
136
137
        // next chunk
138
0
        x += n * d;
139
0
        bytes += n * new_code_size;
140
141
0
        n_left -= n;
142
0
    }
143
0
}
Unexecuted instantiation: IndexRowwiseMinMax.cpp:_ZN5faiss12_GLOBAL__N_114sa_encode_implINS0_17StorageMinMaxFP16EEEvPKNS_22IndexRowwiseMinMaxBaseElPKfPh
Unexecuted instantiation: IndexRowwiseMinMax.cpp:_ZN5faiss12_GLOBAL__N_114sa_encode_implINS0_17StorageMinMaxFP32EEEvPKNS_22IndexRowwiseMinMaxBaseElPKfPh
144
145
template <typename StorageMinMaxT>
146
void sa_decode_impl(
147
        const IndexRowwiseMinMaxBase* const index,
148
        const idx_t n_input,
149
        const uint8_t* bytes_input,
150
0
        float* x_output) {
151
    // process chunks
152
0
    const size_t chunk_size = rowwise_minmax_sa_decode_bs;
153
154
    // useful variables
155
0
    const Index* const sub_index = index->index;
156
0
    const int d = index->d;
157
158
    // the code size of the subindex
159
0
    const size_t old_code_size = sub_index->sa_code_size();
160
    // the code size of the index
161
0
    const size_t new_code_size = index->sa_code_size();
162
163
    // allocate tmp buffers
164
0
    std::vector<uint8_t> tmp(
165
0
            (chunk_size < n_input ? chunk_size : n_input) * old_code_size);
166
0
    std::vector<StorageMinMaxFP16> minmax(
167
0
            (chunk_size < n_input ? chunk_size : n_input));
168
169
    // all the elements to process
170
0
    size_t n_left = n_input;
171
172
0
    const uint8_t* __restrict bytes = bytes_input;
173
0
    float* __restrict x = x_output;
174
175
0
    while (n_left > 0) {
176
        // current portion to be processed
177
0
        const idx_t n = std::min(n_left, chunk_size);
178
179
        // rearrange
180
0
        for (idx_t i = 0; i < n; i++) {
181
0
            std::memcpy(
182
0
                    tmp.data() + i * old_code_size,
183
0
                    bytes + i * new_code_size + (new_code_size - old_code_size),
184
0
                    old_code_size);
185
0
        }
186
187
        // decode
188
0
        sub_index->sa_decode(n, tmp.data(), x);
189
190
        // scale back
191
0
        for (idx_t i = 0; i < n; i++) {
192
0
            const uint8_t* const vec_in = bytes + i * new_code_size;
193
0
            StorageMinMaxT fpv =
194
0
                    *(reinterpret_cast<const StorageMinMaxT*>(vec_in));
195
196
0
            float scaler = 0;
197
0
            float minv = 0;
198
0
            fpv.to_floats(scaler, minv);
199
200
0
            float* const __restrict vec = x + d * i;
201
202
0
            for (idx_t j = 0; j < d; j++) {
203
0
                vec[j] = vec[j] * scaler + minv;
204
0
            }
205
0
        }
206
207
        // next chunk
208
0
        bytes += n * new_code_size;
209
0
        x += n * d;
210
211
0
        n_left -= n;
212
0
    }
213
0
}
Unexecuted instantiation: IndexRowwiseMinMax.cpp:_ZN5faiss12_GLOBAL__N_114sa_decode_implINS0_17StorageMinMaxFP16EEEvPKNS_22IndexRowwiseMinMaxBaseElPKhPf
Unexecuted instantiation: IndexRowwiseMinMax.cpp:_ZN5faiss12_GLOBAL__N_114sa_decode_implINS0_17StorageMinMaxFP32EEEvPKNS_22IndexRowwiseMinMaxBaseElPKhPf
214
215
//
216
template <typename StorageMinMaxT>
217
void train_inplace_impl(
218
        IndexRowwiseMinMaxBase* const index,
219
        idx_t n,
220
0
        float* x) {
221
    // useful variables
222
0
    Index* const sub_index = index->index;
223
0
    const int d = index->d;
224
225
    // save normalizing coefficients
226
0
    std::vector<StorageMinMaxT> minmax(n);
227
228
    // normalize
229
0
#pragma omp for
230
0
    for (idx_t i = 0; i < n; i++) {
231
        // compute min & max values
232
0
        float minv = std::numeric_limits<float>::max();
233
0
        float maxv = std::numeric_limits<float>::lowest();
234
235
0
        float* const vec = x + i * d;
236
0
        for (idx_t j = 0; j < d; j++) {
237
0
            minv = std::min(minv, vec[j]);
238
0
            maxv = std::max(maxv, vec[j]);
239
0
        }
240
241
        // save the coefficients
242
0
        const float scaler = maxv - minv;
243
0
        minmax[i].from_floats(scaler, minv);
244
245
        // and load them back, because the coefficients might
246
        // be modified.
247
0
        float actual_scaler = 0;
248
0
        float actual_minv = 0;
249
0
        minmax[i].to_floats(actual_scaler, actual_minv);
250
251
0
        if (actual_scaler == 0) {
252
0
            for (idx_t j = 0; j < d; j++) {
253
0
                vec[j] = 0;
254
0
            }
255
0
        } else {
256
0
            float inv_actual_scaler = 1.0f / actual_scaler;
257
0
            for (idx_t j = 0; j < d; j++) {
258
0
                vec[j] = (vec[j] - actual_minv) * inv_actual_scaler;
259
0
            }
260
0
        }
261
0
    }
262
263
    // train the subindex
264
0
    sub_index->train(n, x);
265
266
    // rescale data back
267
0
    for (idx_t i = 0; i < n; i++) {
268
0
        float scaler = 0;
269
0
        float minv = 0;
270
0
        minmax[i].to_floats(scaler, minv);
271
272
0
        float* const vec = x + i * d;
273
274
0
        for (idx_t j = 0; j < d; j++) {
275
0
            vec[j] = vec[j] * scaler + minv;
276
0
        }
277
0
    }
278
0
}
Unexecuted instantiation: IndexRowwiseMinMax.cpp:_ZN5faiss12_GLOBAL__N_118train_inplace_implINS0_17StorageMinMaxFP16EEEvPNS_22IndexRowwiseMinMaxBaseElPf
Unexecuted instantiation: IndexRowwiseMinMax.cpp:_ZN5faiss12_GLOBAL__N_118train_inplace_implINS0_17StorageMinMaxFP32EEEvPNS_22IndexRowwiseMinMaxBaseElPf
279
280
//
281
template <typename StorageMinMaxT>
282
0
void train_impl(IndexRowwiseMinMaxBase* const index, idx_t n, const float* x) {
283
    // the default training that creates a copy of the input data
284
285
    // useful variables
286
0
    Index* const sub_index = index->index;
287
0
    const int d = index->d;
288
289
    // temp buffer
290
0
    std::vector<float> tmp(n * d);
291
292
0
#pragma omp for
293
0
    for (idx_t i = 0; i < n; i++) {
294
        // compute min & max values
295
0
        float minv = std::numeric_limits<float>::max();
296
0
        float maxv = std::numeric_limits<float>::lowest();
297
298
0
        const float* const __restrict vec_in = x + i * d;
299
0
        for (idx_t j = 0; j < d; j++) {
300
0
            minv = std::min(minv, vec_in[j]);
301
0
            maxv = std::max(maxv, vec_in[j]);
302
0
        }
303
304
0
        const float scaler = maxv - minv;
305
306
        // save the coefficients
307
0
        StorageMinMaxT storage;
308
0
        storage.from_floats(scaler, minv);
309
310
        // and load them back, because the coefficients might
311
        // be modified.
312
0
        float actual_scaler = 0;
313
0
        float actual_minv = 0;
314
0
        storage.to_floats(actual_scaler, actual_minv);
315
316
0
        float* const __restrict vec_out = tmp.data() + i * d;
317
0
        if (actual_scaler == 0) {
318
0
            for (idx_t j = 0; j < d; j++) {
319
0
                vec_out[j] = 0;
320
0
            }
321
0
        } else {
322
0
            float inv_actual_scaler = 1.0f / actual_scaler;
323
0
            for (idx_t j = 0; j < d; j++) {
324
0
                vec_out[j] = (vec_in[j] - actual_minv) * inv_actual_scaler;
325
0
            }
326
0
        }
327
0
    }
328
329
0
    sub_index->train(n, tmp.data());
330
0
}
Unexecuted instantiation: IndexRowwiseMinMax.cpp:_ZN5faiss12_GLOBAL__N_110train_implINS0_17StorageMinMaxFP16EEEvPNS_22IndexRowwiseMinMaxBaseElPKf
Unexecuted instantiation: IndexRowwiseMinMax.cpp:_ZN5faiss12_GLOBAL__N_110train_implINS0_17StorageMinMaxFP32EEEvPNS_22IndexRowwiseMinMaxBaseElPKf
331
332
} // namespace
333
334
// block size for performing sa_encode and sa_decode
335
int rowwise_minmax_sa_encode_bs = 16384;
336
int rowwise_minmax_sa_decode_bs = 16384;
337
338
/*********************************************************
339
 * IndexRowwiseMinMaxBase implementation
340
 ********************************************************/
341
342
IndexRowwiseMinMaxBase::IndexRowwiseMinMaxBase(Index* index)
343
0
        : Index(index->d, index->metric_type),
344
0
          index{index},
345
0
          own_fields{false} {}
346
347
IndexRowwiseMinMaxBase::IndexRowwiseMinMaxBase()
348
0
        : index{nullptr}, own_fields{false} {}
349
350
0
IndexRowwiseMinMaxBase::~IndexRowwiseMinMaxBase() {
351
0
    if (own_fields) {
352
0
        delete index;
353
0
        index = nullptr;
354
0
    }
355
0
}
356
357
0
void IndexRowwiseMinMaxBase::add(idx_t, const float*) {
358
0
    FAISS_THROW_MSG("add not implemented for this type of index");
359
0
}
360
361
void IndexRowwiseMinMaxBase::search(
362
        idx_t,
363
        const float*,
364
        idx_t,
365
        float*,
366
        idx_t*,
367
0
        const SearchParameters*) const {
368
0
    FAISS_THROW_MSG("search not implemented for this type of index");
369
0
}
370
371
0
void IndexRowwiseMinMaxBase::reset() {
372
0
    FAISS_THROW_MSG("reset not implemented for this type of index");
373
0
}
374
375
/*********************************************************
376
 * IndexRowwiseMinMaxFP16 implementation
377
 ********************************************************/
378
379
IndexRowwiseMinMaxFP16::IndexRowwiseMinMaxFP16(Index* index)
380
0
        : IndexRowwiseMinMaxBase(index) {}
381
382
0
IndexRowwiseMinMaxFP16::IndexRowwiseMinMaxFP16() : IndexRowwiseMinMaxBase() {}
383
384
0
size_t IndexRowwiseMinMaxFP16::sa_code_size() const {
385
0
    return index->sa_code_size() + 2 * sizeof(uint16_t);
386
0
}
387
388
void IndexRowwiseMinMaxFP16::sa_encode(
389
        idx_t n_input,
390
        const float* x_input,
391
0
        uint8_t* bytes_output) const {
392
0
    sa_encode_impl<StorageMinMaxFP16>(this, n_input, x_input, bytes_output);
393
0
}
394
395
void IndexRowwiseMinMaxFP16::sa_decode(
396
        idx_t n_input,
397
        const uint8_t* bytes_input,
398
0
        float* x_output) const {
399
0
    sa_decode_impl<StorageMinMaxFP16>(this, n_input, bytes_input, x_output);
400
0
}
401
402
0
void IndexRowwiseMinMaxFP16::train(idx_t n, const float* x) {
403
0
    train_impl<StorageMinMaxFP16>(this, n, x);
404
0
}
405
406
0
void IndexRowwiseMinMaxFP16::train_inplace(idx_t n, float* x) {
407
0
    train_inplace_impl<StorageMinMaxFP16>(this, n, x);
408
0
}
409
410
/*********************************************************
411
 * IndexRowwiseMinMax implementation
412
 ********************************************************/
413
414
IndexRowwiseMinMax::IndexRowwiseMinMax(Index* index)
415
0
        : IndexRowwiseMinMaxBase(index) {}
416
417
0
IndexRowwiseMinMax::IndexRowwiseMinMax() : IndexRowwiseMinMaxBase() {}
418
419
0
size_t IndexRowwiseMinMax::sa_code_size() const {
420
0
    return index->sa_code_size() + 2 * sizeof(float);
421
0
}
422
423
void IndexRowwiseMinMax::sa_encode(
424
        idx_t n_input,
425
        const float* x_input,
426
0
        uint8_t* bytes_output) const {
427
0
    sa_encode_impl<StorageMinMaxFP32>(this, n_input, x_input, bytes_output);
428
0
}
429
430
void IndexRowwiseMinMax::sa_decode(
431
        idx_t n_input,
432
        const uint8_t* bytes_input,
433
0
        float* x_output) const {
434
0
    sa_decode_impl<StorageMinMaxFP32>(this, n_input, bytes_input, x_output);
435
0
}
436
437
0
void IndexRowwiseMinMax::train(idx_t n, const float* x) {
438
0
    train_impl<StorageMinMaxFP32>(this, n, x);
439
0
}
440
441
0
void IndexRowwiseMinMax::train_inplace(idx_t n, float* x) {
442
0
    train_inplace_impl<StorageMinMaxFP32>(this, n, x);
443
0
}
444
445
} // namespace faiss