contrib/faiss/faiss/impl/index_read.cpp
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) Meta Platforms, Inc. and affiliates. |
3 | | * |
4 | | * This source code is licensed under the MIT license found in the |
5 | | * LICENSE file in the root directory of this source tree. |
6 | | */ |
7 | | |
8 | | #include <faiss/impl/index_read_utils.h> |
9 | | #include <faiss/index_io.h> |
10 | | |
11 | | #include <faiss/impl/io_macros.h> |
12 | | |
13 | | #include <cstdio> |
14 | | #include <cstdlib> |
15 | | #include <optional> |
16 | | |
17 | | #include <faiss/impl/FaissAssert.h> |
18 | | #include <faiss/impl/io.h> |
19 | | #include <faiss/impl/io_macros.h> |
20 | | #include <faiss/utils/hamming.h> |
21 | | |
22 | | #include <faiss/invlists/InvertedListsIOHook.h> |
23 | | |
24 | | #include <faiss/Index2Layer.h> |
25 | | #include <faiss/IndexAdditiveQuantizer.h> |
26 | | #include <faiss/IndexAdditiveQuantizerFastScan.h> |
27 | | #include <faiss/IndexFlat.h> |
28 | | #include <faiss/IndexHNSW.h> |
29 | | #include <faiss/IndexIVF.h> |
30 | | #include <faiss/IndexIVFAdditiveQuantizer.h> |
31 | | #include <faiss/IndexIVFAdditiveQuantizerFastScan.h> |
32 | | #include <faiss/IndexIVFFlat.h> |
33 | | #include <faiss/IndexIVFIndependentQuantizer.h> |
34 | | #include <faiss/IndexIVFPQ.h> |
35 | | #include <faiss/IndexIVFPQFastScan.h> |
36 | | #include <faiss/IndexIVFPQR.h> |
37 | | #include <faiss/IndexIVFRaBitQ.h> |
38 | | #include <faiss/IndexIVFSpectralHash.h> |
39 | | #include <faiss/IndexLSH.h> |
40 | | #include <faiss/IndexLattice.h> |
41 | | #include <faiss/IndexNNDescent.h> |
42 | | #include <faiss/IndexNSG.h> |
43 | | #include <faiss/IndexPQ.h> |
44 | | #include <faiss/IndexPQFastScan.h> |
45 | | #include <faiss/IndexPreTransform.h> |
46 | | #include <faiss/IndexRaBitQ.h> |
47 | | #include <faiss/IndexRefine.h> |
48 | | #include <faiss/IndexRowwiseMinMax.h> |
49 | | #include <faiss/IndexScalarQuantizer.h> |
50 | | #include <faiss/MetaIndexes.h> |
51 | | #include <faiss/VectorTransform.h> |
52 | | |
53 | | #include <faiss/IndexBinaryFlat.h> |
54 | | #include <faiss/IndexBinaryFromFloat.h> |
55 | | #include <faiss/IndexBinaryHNSW.h> |
56 | | #include <faiss/IndexBinaryHash.h> |
57 | | #include <faiss/IndexBinaryIVF.h> |
58 | | |
59 | | // mmap-ing and viewing facilities |
60 | | #include <faiss/impl/maybe_owned_vector.h> |
61 | | |
62 | | #include <faiss/impl/mapped_io.h> |
63 | | #include <faiss/impl/zerocopy_io.h> |
64 | | |
65 | | namespace faiss { |
66 | | |
67 | | /************************************************************* |
68 | | * Mmap-ing and viewing facilities |
69 | | **************************************************************/ |
70 | | |
71 | | // This is a baseline functionality for reading mmapped and zerocopied vector. |
72 | | // * if `beforeknown_size` is defined, then a size of the vector won't be read. |
73 | | // * if `size_multiplier` is defined, then a size will be multiplied by it. |
74 | | // * returns true is the case was handled; ownerwise, false |
75 | | template <typename VectorT> |
76 | | bool read_vector_base( |
77 | | VectorT& target, |
78 | | IOReader* f, |
79 | | const std::optional<size_t> beforeknown_size, |
80 | 69 | const std::optional<size_t> size_multiplier) { |
81 | | // check if the use case is right |
82 | 69 | if constexpr (is_maybe_owned_vector_v<VectorT>) { |
83 | | // is it a mmap-enabled reader? |
84 | 69 | MappedFileIOReader* mf = dynamic_cast<MappedFileIOReader*>(f); |
85 | 69 | if (mf != nullptr) { |
86 | | // read the size or use a known one |
87 | 0 | size_t size = 0; |
88 | 0 | if (beforeknown_size.has_value()) { |
89 | 0 | size = beforeknown_size.value(); |
90 | 0 | } else { |
91 | 0 | READANDCHECK(&size, 1); |
92 | 0 | } |
93 | | |
94 | | // perform the size multiplication |
95 | 0 | size *= size_multiplier.value_or(1); |
96 | | |
97 | | // ok, mmap and check |
98 | 0 | char* address = nullptr; |
99 | 0 | const size_t nread = mf->mmap( |
100 | 0 | (void**)&address, |
101 | 0 | sizeof(typename VectorT::value_type), |
102 | 0 | size); |
103 | |
|
104 | 0 | FAISS_THROW_IF_NOT_FMT( |
105 | 0 | nread == (size), |
106 | 0 | "read error in %s: %zd != %zd (%s)", |
107 | 0 | f->name.c_str(), |
108 | 0 | nread, |
109 | 0 | size, |
110 | 0 | strerror(errno)); |
111 | | |
112 | 0 | VectorT mmapped_view = |
113 | 0 | VectorT::create_view(address, nread, mf->mmap_owner); |
114 | 0 | target = std::move(mmapped_view); |
115 | |
|
116 | 0 | return true; |
117 | 0 | } |
118 | | |
119 | | // is it a zero-copy reader? |
120 | 69 | ZeroCopyIOReader* zr = dynamic_cast<ZeroCopyIOReader*>(f); |
121 | 69 | if (zr != nullptr) { |
122 | | // read the size or use a known one |
123 | 0 | size_t size = 0; |
124 | 0 | if (beforeknown_size.has_value()) { |
125 | 0 | size = beforeknown_size.value(); |
126 | 0 | } else { |
127 | 0 | READANDCHECK(&size, 1); |
128 | 0 | } |
129 | | |
130 | | // perform the size multiplication |
131 | 0 | size *= size_multiplier.value_or(1); |
132 | | |
133 | | // create a view |
134 | 0 | char* address = nullptr; |
135 | 0 | size_t nread = zr->get_data_view( |
136 | 0 | (void**)&address, |
137 | 0 | sizeof(typename VectorT::value_type), |
138 | 0 | size); |
139 | |
|
140 | 0 | FAISS_THROW_IF_NOT_FMT( |
141 | 0 | nread == (size), |
142 | 0 | "read error in %s: %zd != %zd (%s)", |
143 | 0 | f->name.c_str(), |
144 | 0 | nread, |
145 | 0 | size_t(size), |
146 | 0 | strerror(errno)); |
147 | | |
148 | 0 | VectorT view = VectorT::create_view(address, nread, nullptr); |
149 | 0 | target = std::move(view); |
150 | |
|
151 | 0 | return true; |
152 | 0 | } |
153 | 69 | } |
154 | | |
155 | 69 | return false; |
156 | 69 | } _ZN5faiss16read_vector_baseINS_16MaybeOwnedVectorIhEEEEbRT_PNS_8IOReaderESt8optionalImES8_ Line | Count | Source | 80 | 37 | const std::optional<size_t> size_multiplier) { | 81 | | // check if the use case is right | 82 | 37 | if constexpr (is_maybe_owned_vector_v<VectorT>) { | 83 | | // is it a mmap-enabled reader? | 84 | 37 | MappedFileIOReader* mf = dynamic_cast<MappedFileIOReader*>(f); | 85 | 37 | if (mf != nullptr) { | 86 | | // read the size or use a known one | 87 | 0 | size_t size = 0; | 88 | 0 | if (beforeknown_size.has_value()) { | 89 | 0 | size = beforeknown_size.value(); | 90 | 0 | } else { | 91 | 0 | READANDCHECK(&size, 1); | 92 | 0 | } | 93 | | | 94 | | // perform the size multiplication | 95 | 0 | size *= size_multiplier.value_or(1); | 96 | | | 97 | | // ok, mmap and check | 98 | 0 | char* address = nullptr; | 99 | 0 | const size_t nread = mf->mmap( | 100 | 0 | (void**)&address, | 101 | 0 | sizeof(typename VectorT::value_type), | 102 | 0 | size); | 103 | |
| 104 | 0 | FAISS_THROW_IF_NOT_FMT( | 105 | 0 | nread == (size), | 106 | 0 | "read error in %s: %zd != %zd (%s)", | 107 | 0 | f->name.c_str(), | 108 | 0 | nread, | 109 | 0 | size, | 110 | 0 | strerror(errno)); | 111 | | | 112 | 0 | VectorT mmapped_view = | 113 | 0 | VectorT::create_view(address, nread, mf->mmap_owner); | 114 | 0 | target = std::move(mmapped_view); | 115 | |
| 116 | 0 | return true; | 117 | 0 | } | 118 | | | 119 | | // is it a zero-copy reader? | 120 | 37 | ZeroCopyIOReader* zr = dynamic_cast<ZeroCopyIOReader*>(f); | 121 | 37 | if (zr != nullptr) { | 122 | | // read the size or use a known one | 123 | 0 | size_t size = 0; | 124 | 0 | if (beforeknown_size.has_value()) { | 125 | 0 | size = beforeknown_size.value(); | 126 | 0 | } else { | 127 | 0 | READANDCHECK(&size, 1); | 128 | 0 | } | 129 | | | 130 | | // perform the size multiplication | 131 | 0 | size *= size_multiplier.value_or(1); | 132 | | | 133 | | // create a view | 134 | 0 | char* address = nullptr; | 135 | 0 | size_t nread = zr->get_data_view( | 136 | 0 | (void**)&address, | 137 | 0 | sizeof(typename VectorT::value_type), | 138 | 0 | size); | 139 | |
| 140 | 0 | FAISS_THROW_IF_NOT_FMT( | 141 | 0 | nread == (size), | 142 | 0 | "read error in %s: %zd != %zd (%s)", | 143 | 0 | f->name.c_str(), | 144 | 0 | nread, | 145 | 0 | size_t(size), | 146 | 0 | strerror(errno)); | 147 | | | 148 | 0 | VectorT view = VectorT::create_view(address, nread, nullptr); | 149 | 0 | target = std::move(view); | 150 | |
| 151 | 0 | return true; | 152 | 0 | } | 153 | 37 | } | 154 | | | 155 | 37 | return false; | 156 | 37 | } |
_ZN5faiss16read_vector_baseINS_16MaybeOwnedVectorIlEEEEbRT_PNS_8IOReaderESt8optionalImES8_ Line | Count | Source | 80 | 12 | const std::optional<size_t> size_multiplier) { | 81 | | // check if the use case is right | 82 | 12 | if constexpr (is_maybe_owned_vector_v<VectorT>) { | 83 | | // is it a mmap-enabled reader? | 84 | 12 | MappedFileIOReader* mf = dynamic_cast<MappedFileIOReader*>(f); | 85 | 12 | if (mf != nullptr) { | 86 | | // read the size or use a known one | 87 | 0 | size_t size = 0; | 88 | 0 | if (beforeknown_size.has_value()) { | 89 | 0 | size = beforeknown_size.value(); | 90 | 0 | } else { | 91 | 0 | READANDCHECK(&size, 1); | 92 | 0 | } | 93 | | | 94 | | // perform the size multiplication | 95 | 0 | size *= size_multiplier.value_or(1); | 96 | | | 97 | | // ok, mmap and check | 98 | 0 | char* address = nullptr; | 99 | 0 | const size_t nread = mf->mmap( | 100 | 0 | (void**)&address, | 101 | 0 | sizeof(typename VectorT::value_type), | 102 | 0 | size); | 103 | |
| 104 | 0 | FAISS_THROW_IF_NOT_FMT( | 105 | 0 | nread == (size), | 106 | 0 | "read error in %s: %zd != %zd (%s)", | 107 | 0 | f->name.c_str(), | 108 | 0 | nread, | 109 | 0 | size, | 110 | 0 | strerror(errno)); | 111 | | | 112 | 0 | VectorT mmapped_view = | 113 | 0 | VectorT::create_view(address, nread, mf->mmap_owner); | 114 | 0 | target = std::move(mmapped_view); | 115 | |
| 116 | 0 | return true; | 117 | 0 | } | 118 | | | 119 | | // is it a zero-copy reader? | 120 | 12 | ZeroCopyIOReader* zr = dynamic_cast<ZeroCopyIOReader*>(f); | 121 | 12 | if (zr != nullptr) { | 122 | | // read the size or use a known one | 123 | 0 | size_t size = 0; | 124 | 0 | if (beforeknown_size.has_value()) { | 125 | 0 | size = beforeknown_size.value(); | 126 | 0 | } else { | 127 | 0 | READANDCHECK(&size, 1); | 128 | 0 | } | 129 | | | 130 | | // perform the size multiplication | 131 | 0 | size *= size_multiplier.value_or(1); | 132 | | | 133 | | // create a view | 134 | 0 | char* address = nullptr; | 135 | 0 | size_t nread = zr->get_data_view( | 136 | 0 | (void**)&address, | 137 | 0 | sizeof(typename VectorT::value_type), | 138 | 0 | size); | 139 | |
| 140 | 0 | FAISS_THROW_IF_NOT_FMT( | 141 | 0 | nread == (size), | 142 | 0 | "read error in %s: %zd != %zd (%s)", | 143 | 0 | f->name.c_str(), | 144 | 0 | nread, | 145 | 0 | size_t(size), | 146 | 0 | strerror(errno)); | 147 | | | 148 | 0 | VectorT view = VectorT::create_view(address, nread, nullptr); | 149 | 0 | target = std::move(view); | 150 | |
| 151 | 0 | return true; | 152 | 0 | } | 153 | 12 | } | 154 | | | 155 | 12 | return false; | 156 | 12 | } |
_ZN5faiss16read_vector_baseINS_16MaybeOwnedVectorIiEEEEbRT_PNS_8IOReaderESt8optionalImES8_ Line | Count | Source | 80 | 20 | const std::optional<size_t> size_multiplier) { | 81 | | // check if the use case is right | 82 | 20 | if constexpr (is_maybe_owned_vector_v<VectorT>) { | 83 | | // is it a mmap-enabled reader? | 84 | 20 | MappedFileIOReader* mf = dynamic_cast<MappedFileIOReader*>(f); | 85 | 20 | if (mf != nullptr) { | 86 | | // read the size or use a known one | 87 | 0 | size_t size = 0; | 88 | 0 | if (beforeknown_size.has_value()) { | 89 | 0 | size = beforeknown_size.value(); | 90 | 0 | } else { | 91 | 0 | READANDCHECK(&size, 1); | 92 | 0 | } | 93 | | | 94 | | // perform the size multiplication | 95 | 0 | size *= size_multiplier.value_or(1); | 96 | | | 97 | | // ok, mmap and check | 98 | 0 | char* address = nullptr; | 99 | 0 | const size_t nread = mf->mmap( | 100 | 0 | (void**)&address, | 101 | 0 | sizeof(typename VectorT::value_type), | 102 | 0 | size); | 103 | |
| 104 | 0 | FAISS_THROW_IF_NOT_FMT( | 105 | 0 | nread == (size), | 106 | 0 | "read error in %s: %zd != %zd (%s)", | 107 | 0 | f->name.c_str(), | 108 | 0 | nread, | 109 | 0 | size, | 110 | 0 | strerror(errno)); | 111 | | | 112 | 0 | VectorT mmapped_view = | 113 | 0 | VectorT::create_view(address, nread, mf->mmap_owner); | 114 | 0 | target = std::move(mmapped_view); | 115 | |
| 116 | 0 | return true; | 117 | 0 | } | 118 | | | 119 | | // is it a zero-copy reader? | 120 | 20 | ZeroCopyIOReader* zr = dynamic_cast<ZeroCopyIOReader*>(f); | 121 | 20 | if (zr != nullptr) { | 122 | | // read the size or use a known one | 123 | 0 | size_t size = 0; | 124 | 0 | if (beforeknown_size.has_value()) { | 125 | 0 | size = beforeknown_size.value(); | 126 | 0 | } else { | 127 | 0 | READANDCHECK(&size, 1); | 128 | 0 | } | 129 | | | 130 | | // perform the size multiplication | 131 | 0 | size *= size_multiplier.value_or(1); | 132 | | | 133 | | // create a view | 134 | 0 | char* address = nullptr; | 135 | 0 | size_t nread = zr->get_data_view( | 136 | 0 | (void**)&address, | 137 | 0 | sizeof(typename VectorT::value_type), | 138 | 0 | size); | 139 | |
| 140 | 0 | FAISS_THROW_IF_NOT_FMT( | 141 | 0 | nread == (size), | 142 | 0 | "read error in %s: %zd != %zd (%s)", | 143 | 0 | f->name.c_str(), | 144 | 0 | nread, | 145 | 0 | size_t(size), | 146 | 0 | strerror(errno)); | 147 | | | 148 | 0 | VectorT view = VectorT::create_view(address, nread, nullptr); | 149 | 0 | target = std::move(view); | 150 | |
| 151 | 0 | return true; | 152 | 0 | } | 153 | 20 | } | 154 | | | 155 | 20 | return false; | 156 | 20 | } |
|
157 | | |
158 | | // a replacement for READANDCHECK for reading data into std::vector |
159 | | template <typename VectorT> |
160 | 24 | void read_vector_with_known_size(VectorT& target, IOReader* f, size_t size) { |
161 | | // size is known beforehand, no size multiplication |
162 | 24 | if (read_vector_base<VectorT>(target, f, size, std::nullopt)) { |
163 | 0 | return; |
164 | 0 | } |
165 | | |
166 | | // the default case |
167 | 24 | READANDCHECK(target.data(), size); |
168 | 24 | } _ZN5faiss27read_vector_with_known_sizeINS_16MaybeOwnedVectorIhEEEEvRT_PNS_8IOReaderEm Line | Count | Source | 160 | 12 | void read_vector_with_known_size(VectorT& target, IOReader* f, size_t size) { | 161 | | // size is known beforehand, no size multiplication | 162 | 12 | if (read_vector_base<VectorT>(target, f, size, std::nullopt)) { | 163 | 0 | return; | 164 | 0 | } | 165 | | | 166 | | // the default case | 167 | 12 | READANDCHECK(target.data(), size); | 168 | 12 | } |
_ZN5faiss27read_vector_with_known_sizeINS_16MaybeOwnedVectorIlEEEEvRT_PNS_8IOReaderEm Line | Count | Source | 160 | 12 | void read_vector_with_known_size(VectorT& target, IOReader* f, size_t size) { | 161 | | // size is known beforehand, no size multiplication | 162 | 12 | if (read_vector_base<VectorT>(target, f, size, std::nullopt)) { | 163 | 0 | return; | 164 | 0 | } | 165 | | | 166 | | // the default case | 167 | 12 | READANDCHECK(target.data(), size); | 168 | 12 | } |
|
169 | | |
170 | | // a replacement for READVECTOR |
171 | | template <typename VectorT> |
172 | 23 | void read_vector(VectorT& target, IOReader* f) { |
173 | | // size is not known beforehand, no size multiplication |
174 | 23 | if (read_vector_base<VectorT>(target, f, std::nullopt, std::nullopt)) { |
175 | 0 | return; |
176 | 0 | } |
177 | | |
178 | | // the default case |
179 | 92 | READVECTOR(target); |
180 | 92 | } _ZN5faiss11read_vectorINS_16MaybeOwnedVectorIhEEEEvRT_PNS_8IOReaderE Line | Count | Source | 172 | 3 | void read_vector(VectorT& target, IOReader* f) { | 173 | | // size is not known beforehand, no size multiplication | 174 | 3 | if (read_vector_base<VectorT>(target, f, std::nullopt, std::nullopt)) { | 175 | 0 | return; | 176 | 0 | } | 177 | | | 178 | | // the default case | 179 | 12 | READVECTOR(target); | 180 | 12 | } |
_ZN5faiss11read_vectorINS_16MaybeOwnedVectorIiEEEEvRT_PNS_8IOReaderE Line | Count | Source | 172 | 20 | void read_vector(VectorT& target, IOReader* f) { | 173 | | // size is not known beforehand, no size multiplication | 174 | 20 | if (read_vector_base<VectorT>(target, f, std::nullopt, std::nullopt)) { | 175 | 0 | return; | 176 | 0 | } | 177 | | | 178 | | // the default case | 179 | 80 | READVECTOR(target); | 180 | 80 | } |
|
181 | | |
182 | | // a replacement for READXBVECTOR |
183 | | template <typename VectorT> |
184 | 22 | void read_xb_vector(VectorT& target, IOReader* f) { |
185 | | // size is not known beforehand, nultiply the size 4x |
186 | 22 | if (read_vector_base<VectorT>(target, f, std::nullopt, 4)) { |
187 | 0 | return; |
188 | 0 | } |
189 | | |
190 | | // the default case |
191 | 88 | READXBVECTOR(target); |
192 | 88 | } |
193 | | |
194 | | /************************************************************* |
195 | | * Read |
196 | | **************************************************************/ |
197 | | |
198 | 50 | void read_index_header(Index* idx, IOReader* f) { |
199 | 50 | READ1(idx->d); |
200 | 50 | READ1(idx->ntotal); |
201 | 50 | idx_t dummy; |
202 | 50 | READ1(dummy); |
203 | 50 | READ1(dummy); |
204 | 50 | READ1(idx->is_trained); |
205 | 50 | READ1(idx->metric_type); |
206 | 50 | if (idx->metric_type > 1) { |
207 | 0 | READ1(idx->metric_arg); |
208 | 0 | } |
209 | 50 | idx->verbose = false; |
210 | 50 | } |
211 | | |
212 | 0 | VectorTransform* read_VectorTransform(IOReader* f) { |
213 | 0 | uint32_t h; |
214 | 0 | READ1(h); |
215 | 0 | VectorTransform* vt = nullptr; |
216 | |
|
217 | 0 | if (h == fourcc("rrot") || h == fourcc("PCAm") || h == fourcc("LTra") || |
218 | 0 | h == fourcc("PcAm") || h == fourcc("Viqm") || h == fourcc("Pcam")) { |
219 | 0 | LinearTransform* lt = nullptr; |
220 | 0 | if (h == fourcc("rrot")) { |
221 | 0 | lt = new RandomRotationMatrix(); |
222 | 0 | } else if ( |
223 | 0 | h == fourcc("PCAm") || h == fourcc("PcAm") || |
224 | 0 | h == fourcc("Pcam")) { |
225 | 0 | PCAMatrix* pca = new PCAMatrix(); |
226 | 0 | READ1(pca->eigen_power); |
227 | 0 | if (h == fourcc("Pcam")) { |
228 | 0 | READ1(pca->epsilon); |
229 | 0 | } |
230 | 0 | READ1(pca->random_rotation); |
231 | 0 | if (h != fourcc("PCAm")) { |
232 | 0 | READ1(pca->balanced_bins); |
233 | 0 | } |
234 | 0 | READVECTOR(pca->mean); |
235 | 0 | READVECTOR(pca->eigenvalues); |
236 | 0 | READVECTOR(pca->PCAMat); |
237 | 0 | lt = pca; |
238 | 0 | } else if (h == fourcc("Viqm")) { |
239 | 0 | ITQMatrix* itqm = new ITQMatrix(); |
240 | 0 | READ1(itqm->max_iter); |
241 | 0 | READ1(itqm->seed); |
242 | 0 | lt = itqm; |
243 | 0 | } else if (h == fourcc("LTra")) { |
244 | 0 | lt = new LinearTransform(); |
245 | 0 | } |
246 | 0 | READ1(lt->have_bias); |
247 | 0 | READVECTOR(lt->A); |
248 | 0 | READVECTOR(lt->b); |
249 | 0 | FAISS_THROW_IF_NOT(lt->A.size() >= lt->d_in * lt->d_out); |
250 | 0 | FAISS_THROW_IF_NOT(!lt->have_bias || lt->b.size() >= lt->d_out); |
251 | 0 | lt->set_is_orthonormal(); |
252 | 0 | vt = lt; |
253 | 0 | } else if (h == fourcc("RmDT")) { |
254 | 0 | RemapDimensionsTransform* rdt = new RemapDimensionsTransform(); |
255 | 0 | READVECTOR(rdt->map); |
256 | 0 | vt = rdt; |
257 | 0 | } else if (h == fourcc("VNrm")) { |
258 | 0 | NormalizationTransform* nt = new NormalizationTransform(); |
259 | 0 | READ1(nt->norm); |
260 | 0 | vt = nt; |
261 | 0 | } else if (h == fourcc("VCnt")) { |
262 | 0 | CenteringTransform* ct = new CenteringTransform(); |
263 | 0 | READVECTOR(ct->mean); |
264 | 0 | vt = ct; |
265 | 0 | } else if (h == fourcc("Viqt")) { |
266 | 0 | ITQTransform* itqt = new ITQTransform(); |
267 | |
|
268 | 0 | READVECTOR(itqt->mean); |
269 | 0 | READ1(itqt->do_pca); |
270 | 0 | { |
271 | 0 | ITQMatrix* itqm = dynamic_cast<ITQMatrix*>(read_VectorTransform(f)); |
272 | 0 | FAISS_THROW_IF_NOT(itqm); |
273 | 0 | itqt->itq = *itqm; |
274 | 0 | delete itqm; |
275 | 0 | } |
276 | 0 | { |
277 | 0 | LinearTransform* pi = |
278 | 0 | dynamic_cast<LinearTransform*>(read_VectorTransform(f)); |
279 | 0 | FAISS_THROW_IF_NOT(pi); |
280 | 0 | itqt->pca_then_itq = *pi; |
281 | 0 | delete pi; |
282 | 0 | } |
283 | 0 | vt = itqt; |
284 | 0 | } else { |
285 | 0 | FAISS_THROW_FMT( |
286 | 0 | "fourcc %ud (\"%s\") not recognized in %s", |
287 | 0 | h, |
288 | 0 | fourcc_inv_printable(h).c_str(), |
289 | 0 | f->name.c_str()); |
290 | 0 | } |
291 | 0 | READ1(vt->d_in); |
292 | 0 | READ1(vt->d_out); |
293 | 0 | READ1(vt->is_trained); |
294 | 0 | return vt; |
295 | 0 | } |
296 | | |
297 | | static void read_ArrayInvertedLists_sizes( |
298 | | IOReader* f, |
299 | 5 | std::vector<size_t>& sizes) { |
300 | 5 | uint32_t list_type; |
301 | 5 | READ1(list_type); |
302 | 5 | if (list_type == fourcc("full")) { |
303 | 3 | size_t os = sizes.size(); |
304 | 9 | READVECTOR(sizes); |
305 | 9 | FAISS_THROW_IF_NOT(os == sizes.size()); |
306 | 9 | } else if (list_type == fourcc("sprs")) { |
307 | 2 | std::vector<size_t> idsizes; |
308 | 6 | READVECTOR(idsizes); |
309 | 6 | for (size_t j = 0; j < idsizes.size(); j += 2) { |
310 | 0 | FAISS_THROW_IF_NOT(idsizes[j] < sizes.size()); |
311 | 0 | sizes[idsizes[j]] = idsizes[j + 1]; |
312 | 0 | } |
313 | 6 | } else { |
314 | 0 | FAISS_THROW_FMT( |
315 | 0 | "list_type %ud (\"%s\") not recognized", |
316 | 0 | list_type, |
317 | 0 | fourcc_inv_printable(list_type).c_str()); |
318 | 0 | } |
319 | 5 | } |
320 | | |
321 | 5 | InvertedLists* read_InvertedLists(IOReader* f, int io_flags) { |
322 | 5 | uint32_t h; |
323 | 5 | READ1(h); |
324 | 5 | if (h == fourcc("il00")) { |
325 | 0 | fprintf(stderr, |
326 | 0 | "read_InvertedLists:" |
327 | 0 | " WARN! inverted lists not stored with IVF object\n"); |
328 | 0 | return nullptr; |
329 | 5 | } else if (h == fourcc("ilar") && !(io_flags & IO_FLAG_SKIP_IVF_DATA)) { |
330 | 5 | auto ails = new ArrayInvertedLists(0, 0); |
331 | 5 | READ1(ails->nlist); |
332 | 5 | READ1(ails->code_size); |
333 | 5 | ails->ids.resize(ails->nlist); |
334 | 5 | ails->codes.resize(ails->nlist); |
335 | 5 | std::vector<size_t> sizes(ails->nlist); |
336 | 5 | read_ArrayInvertedLists_sizes(f, sizes); |
337 | 29 | for (size_t i = 0; i < ails->nlist; i++) { |
338 | 24 | ails->ids[i].resize(sizes[i]); |
339 | 24 | ails->codes[i].resize(sizes[i] * ails->code_size); |
340 | 24 | } |
341 | 29 | for (size_t i = 0; i < ails->nlist; i++) { |
342 | 24 | size_t n = ails->ids[i].size(); |
343 | 24 | if (n > 0) { |
344 | 12 | read_vector_with_known_size( |
345 | 12 | ails->codes[i], f, n * ails->code_size); |
346 | 12 | read_vector_with_known_size(ails->ids[i], f, n); |
347 | 12 | } |
348 | 24 | } |
349 | 5 | return ails; |
350 | | |
351 | 5 | } else if (h == fourcc("ilar") && (io_flags & IO_FLAG_SKIP_IVF_DATA)) { |
352 | | // code is always ilxx where xx is specific to the type of invlists we |
353 | | // want so we get the 16 high bits from the io_flag and the 16 low bits |
354 | | // as "il" |
355 | 0 | int h2 = (io_flags & 0xffff0000) | (fourcc("il__") & 0x0000ffff); |
356 | 0 | size_t nlist, code_size; |
357 | 0 | READ1(nlist); |
358 | 0 | READ1(code_size); |
359 | 0 | std::vector<size_t> sizes(nlist); |
360 | 0 | read_ArrayInvertedLists_sizes(f, sizes); |
361 | 0 | return InvertedListsIOHook::lookup(h2)->read_ArrayInvertedLists( |
362 | 0 | f, io_flags, nlist, code_size, sizes); |
363 | 0 | } else { |
364 | 0 | return InvertedListsIOHook::lookup(h)->read(f, io_flags); |
365 | 0 | } |
366 | 5 | } |
367 | | |
368 | 5 | void read_InvertedLists(IndexIVF* ivf, IOReader* f, int io_flags) { |
369 | 5 | InvertedLists* ils = read_InvertedLists(f, io_flags); |
370 | 5 | if (ils) { |
371 | 5 | FAISS_THROW_IF_NOT(ils->nlist == ivf->nlist); |
372 | 5 | FAISS_THROW_IF_NOT( |
373 | 5 | ils->code_size == InvertedLists::INVALID_CODE_SIZE || |
374 | 5 | ils->code_size == ivf->code_size); |
375 | 5 | } |
376 | 5 | ivf->invlists = ils; |
377 | 5 | ivf->own_invlists = true; |
378 | 5 | } |
379 | | |
380 | 2 | void read_ProductQuantizer(ProductQuantizer* pq, IOReader* f) { |
381 | 2 | READ1(pq->d); |
382 | 2 | READ1(pq->M); |
383 | 2 | READ1(pq->nbits); |
384 | 2 | pq->set_derived_values(); |
385 | 6 | READVECTOR(pq->centroids); |
386 | 6 | } |
387 | | |
388 | 0 | static void read_ResidualQuantizer_old(ResidualQuantizer* rq, IOReader* f) { |
389 | 0 | READ1(rq->d); |
390 | 0 | READ1(rq->M); |
391 | 0 | READVECTOR(rq->nbits); |
392 | 0 | READ1(rq->is_trained); |
393 | 0 | READ1(rq->train_type); |
394 | 0 | READ1(rq->max_beam_size); |
395 | 0 | READVECTOR(rq->codebooks); |
396 | 0 | READ1(rq->search_type); |
397 | 0 | READ1(rq->norm_min); |
398 | 0 | READ1(rq->norm_max); |
399 | 0 | rq->set_derived_values(); |
400 | 0 | } |
401 | | |
402 | 0 | static void read_AdditiveQuantizer(AdditiveQuantizer* aq, IOReader* f) { |
403 | 0 | READ1(aq->d); |
404 | 0 | READ1(aq->M); |
405 | 0 | READVECTOR(aq->nbits); |
406 | 0 | READ1(aq->is_trained); |
407 | 0 | READVECTOR(aq->codebooks); |
408 | 0 | READ1(aq->search_type); |
409 | 0 | READ1(aq->norm_min); |
410 | 0 | READ1(aq->norm_max); |
411 | 0 | if (aq->search_type == AdditiveQuantizer::ST_norm_cqint8 || |
412 | 0 | aq->search_type == AdditiveQuantizer::ST_norm_cqint4 || |
413 | 0 | aq->search_type == AdditiveQuantizer::ST_norm_lsq2x4 || |
414 | 0 | aq->search_type == AdditiveQuantizer::ST_norm_rq2x4) { |
415 | 0 | read_xb_vector(aq->qnorm.codes, f); |
416 | 0 | aq->qnorm.ntotal = aq->qnorm.codes.size() / 4; |
417 | 0 | aq->qnorm.update_permutation(); |
418 | 0 | } |
419 | |
|
420 | 0 | if (aq->search_type == AdditiveQuantizer::ST_norm_lsq2x4 || |
421 | 0 | aq->search_type == AdditiveQuantizer::ST_norm_rq2x4) { |
422 | 0 | READVECTOR(aq->norm_tabs); |
423 | 0 | } |
424 | | |
425 | 0 | aq->set_derived_values(); |
426 | 0 | } |
427 | | |
428 | | static void read_ResidualQuantizer( |
429 | | ResidualQuantizer* rq, |
430 | | IOReader* f, |
431 | 0 | int io_flags) { |
432 | 0 | read_AdditiveQuantizer(rq, f); |
433 | 0 | READ1(rq->train_type); |
434 | 0 | READ1(rq->max_beam_size); |
435 | 0 | if ((rq->train_type & ResidualQuantizer::Skip_codebook_tables) || |
436 | 0 | (io_flags & IO_FLAG_SKIP_PRECOMPUTE_TABLE)) { |
437 | | // don't precompute the tables |
438 | 0 | } else { |
439 | 0 | rq->compute_codebook_tables(); |
440 | 0 | } |
441 | 0 | } |
442 | | |
443 | 0 | static void read_LocalSearchQuantizer(LocalSearchQuantizer* lsq, IOReader* f) { |
444 | 0 | read_AdditiveQuantizer(lsq, f); |
445 | 0 | READ1(lsq->K); |
446 | 0 | READ1(lsq->train_iters); |
447 | 0 | READ1(lsq->encode_ils_iters); |
448 | 0 | READ1(lsq->train_ils_iters); |
449 | 0 | READ1(lsq->icm_iters); |
450 | 0 | READ1(lsq->p); |
451 | 0 | READ1(lsq->lambd); |
452 | 0 | READ1(lsq->chunk_size); |
453 | 0 | READ1(lsq->random_seed); |
454 | 0 | READ1(lsq->nperts); |
455 | 0 | READ1(lsq->update_codebooks_with_double); |
456 | 0 | } |
457 | | |
458 | | static void read_ProductAdditiveQuantizer( |
459 | | ProductAdditiveQuantizer* paq, |
460 | 0 | IOReader* f) { |
461 | 0 | read_AdditiveQuantizer(paq, f); |
462 | 0 | READ1(paq->nsplits); |
463 | 0 | } |
464 | | |
465 | | static void read_ProductResidualQuantizer( |
466 | | ProductResidualQuantizer* prq, |
467 | | IOReader* f, |
468 | 0 | int io_flags) { |
469 | 0 | read_ProductAdditiveQuantizer(prq, f); |
470 | |
|
471 | 0 | for (size_t i = 0; i < prq->nsplits; i++) { |
472 | 0 | auto rq = new ResidualQuantizer(); |
473 | 0 | read_ResidualQuantizer(rq, f, io_flags); |
474 | 0 | prq->quantizers.push_back(rq); |
475 | 0 | } |
476 | 0 | } |
477 | | |
478 | | static void read_ProductLocalSearchQuantizer( |
479 | | ProductLocalSearchQuantizer* plsq, |
480 | 0 | IOReader* f) { |
481 | 0 | read_ProductAdditiveQuantizer(plsq, f); |
482 | |
|
483 | 0 | for (size_t i = 0; i < plsq->nsplits; i++) { |
484 | 0 | auto lsq = new LocalSearchQuantizer(); |
485 | 0 | read_LocalSearchQuantizer(lsq, f); |
486 | 0 | plsq->quantizers.push_back(lsq); |
487 | 0 | } |
488 | 0 | } |
489 | | |
490 | 4 | void read_ScalarQuantizer(ScalarQuantizer* ivsc, IOReader* f) { |
491 | 4 | READ1(ivsc->qtype); |
492 | 4 | READ1(ivsc->rangestat); |
493 | 4 | READ1(ivsc->rangestat_arg); |
494 | 4 | READ1(ivsc->d); |
495 | 4 | READ1(ivsc->code_size); |
496 | 12 | READVECTOR(ivsc->trained); |
497 | 12 | ivsc->set_derived_sizes(); |
498 | 12 | } |
499 | | |
500 | 20 | static void read_HNSW(HNSW* hnsw, IOReader* f) { |
501 | 60 | READVECTOR(hnsw->assign_probas); |
502 | 60 | READVECTOR(hnsw->cum_nneighbor_per_level); |
503 | 60 | READVECTOR(hnsw->levels); |
504 | 60 | READVECTOR(hnsw->offsets); |
505 | 60 | read_vector(hnsw->neighbors, f); |
506 | | |
507 | 60 | READ1(hnsw->entry_point); |
508 | 20 | READ1(hnsw->max_level); |
509 | 20 | READ1(hnsw->efConstruction); |
510 | 20 | READ1(hnsw->efSearch); |
511 | | |
512 | | // // deprecated field |
513 | | // READ1(hnsw->upper_beam); |
514 | 20 | READ1_DUMMY(int) |
515 | 20 | } |
516 | | |
517 | 0 | static void read_NSG(NSG* nsg, IOReader* f) { |
518 | 0 | READ1(nsg->ntotal); |
519 | 0 | READ1(nsg->R); |
520 | 0 | READ1(nsg->L); |
521 | 0 | READ1(nsg->C); |
522 | 0 | READ1(nsg->search_L); |
523 | 0 | READ1(nsg->enterpoint); |
524 | 0 | READ1(nsg->is_built); |
525 | |
|
526 | 0 | if (!nsg->is_built) { |
527 | 0 | return; |
528 | 0 | } |
529 | | |
530 | 0 | constexpr int EMPTY_ID = -1; |
531 | 0 | int N = nsg->ntotal; |
532 | 0 | int R = nsg->R; |
533 | 0 | auto& graph = nsg->final_graph; |
534 | 0 | graph = std::make_shared<nsg::Graph<int>>(N, R); |
535 | 0 | std::fill_n(graph->data, N * R, EMPTY_ID); |
536 | |
|
537 | 0 | for (int i = 0; i < N; i++) { |
538 | 0 | for (int j = 0; j < R + 1; j++) { |
539 | 0 | int id; |
540 | 0 | READ1(id); |
541 | 0 | if (id != EMPTY_ID) { |
542 | 0 | graph->at(i, j) = id; |
543 | 0 | } else { |
544 | 0 | break; |
545 | 0 | } |
546 | 0 | } |
547 | 0 | } |
548 | 0 | } |
549 | | |
550 | 0 | static void read_NNDescent(NNDescent* nnd, IOReader* f) { |
551 | 0 | READ1(nnd->ntotal); |
552 | 0 | READ1(nnd->d); |
553 | 0 | READ1(nnd->K); |
554 | 0 | READ1(nnd->S); |
555 | 0 | READ1(nnd->R); |
556 | 0 | READ1(nnd->L); |
557 | 0 | READ1(nnd->iter); |
558 | 0 | READ1(nnd->search_L); |
559 | 0 | READ1(nnd->random_seed); |
560 | 0 | READ1(nnd->has_built); |
561 | |
|
562 | 0 | READVECTOR(nnd->final_graph); |
563 | 0 | } |
564 | | |
565 | 0 | ProductQuantizer* read_ProductQuantizer(const char* fname) { |
566 | 0 | FileIOReader reader(fname); |
567 | 0 | return read_ProductQuantizer(&reader); |
568 | 0 | } |
569 | | |
570 | 0 | ProductQuantizer* read_ProductQuantizer(IOReader* reader) { |
571 | 0 | ProductQuantizer* pq = new ProductQuantizer(); |
572 | 0 | std::unique_ptr<ProductQuantizer> del(pq); |
573 | |
|
574 | 0 | read_ProductQuantizer(pq, reader); |
575 | 0 | del.release(); |
576 | 0 | return pq; |
577 | 0 | } |
578 | | |
579 | 0 | static void read_RaBitQuantizer(RaBitQuantizer* rabitq, IOReader* f) { |
580 | | // don't care about rabitq->centroid |
581 | 0 | READ1(rabitq->d); |
582 | 0 | READ1(rabitq->code_size); |
583 | 0 | READ1(rabitq->metric_type); |
584 | 0 | } |
585 | | |
586 | 5 | void read_direct_map(DirectMap* dm, IOReader* f) { |
587 | 5 | char maintain_direct_map; |
588 | 5 | READ1(maintain_direct_map); |
589 | 5 | dm->type = (DirectMap::Type)maintain_direct_map; |
590 | 15 | READVECTOR(dm->array); |
591 | 15 | if (dm->type == DirectMap::Hashtable) { |
592 | 0 | std::vector<std::pair<idx_t, idx_t>> v; |
593 | 0 | READVECTOR(v); |
594 | 0 | std::unordered_map<idx_t, idx_t>& map = dm->hashtable; |
595 | 0 | map.reserve(v.size()); |
596 | 0 | for (auto it : v) { |
597 | 0 | map[it.first] = it.second; |
598 | 0 | } |
599 | 0 | } |
600 | 15 | } |
601 | | |
602 | | void read_ivf_header( |
603 | | IndexIVF* ivf, |
604 | | IOReader* f, |
605 | 5 | std::vector<std::vector<idx_t>>* ids) { |
606 | 5 | read_index_header(ivf, f); |
607 | 5 | READ1(ivf->nlist); |
608 | 5 | READ1(ivf->nprobe); |
609 | 5 | ivf->quantizer = read_index(f); |
610 | 5 | ivf->own_fields = true; |
611 | 5 | if (ids) { // used in legacy "Iv" formats |
612 | 0 | ids->resize(ivf->nlist); |
613 | 0 | for (size_t i = 0; i < ivf->nlist; i++) |
614 | 0 | READVECTOR((*ids)[i]); |
615 | 0 | } |
616 | 5 | read_direct_map(&ivf->direct_map, f); |
617 | 5 | } |
618 | | |
619 | | // used for legacy formats |
620 | | ArrayInvertedLists* set_array_invlist( |
621 | | IndexIVF* ivf, |
622 | 0 | std::vector<std::vector<idx_t>>& ids) { |
623 | 0 | ArrayInvertedLists* ail = |
624 | 0 | new ArrayInvertedLists(ivf->nlist, ivf->code_size); |
625 | |
|
626 | 0 | ail->ids.resize(ids.size()); |
627 | 0 | for (size_t i = 0; i < ids.size(); i++) { |
628 | 0 | ail->ids[i] = MaybeOwnedVector<idx_t>(std::move(ids[i])); |
629 | 0 | } |
630 | |
|
631 | 0 | ivf->invlists = ail; |
632 | 0 | ivf->own_invlists = true; |
633 | 0 | return ail; |
634 | 0 | } |
635 | | |
636 | 1 | static IndexIVFPQ* read_ivfpq(IOReader* f, uint32_t h, int io_flags) { |
637 | 1 | bool legacy = h == fourcc("IvQR") || h == fourcc("IvPQ"); |
638 | | |
639 | 1 | IndexIVFPQR* ivfpqr = h == fourcc("IvQR") || h == fourcc("IwQR") |
640 | 1 | ? new IndexIVFPQR() |
641 | 1 | : nullptr; |
642 | 1 | IndexIVFPQ* ivpq = ivfpqr ? ivfpqr : new IndexIVFPQ(); |
643 | | |
644 | 1 | std::vector<std::vector<idx_t>> ids; |
645 | 1 | read_ivf_header(ivpq, f, legacy ? &ids : nullptr); |
646 | 1 | READ1(ivpq->by_residual); |
647 | 1 | READ1(ivpq->code_size); |
648 | 1 | read_ProductQuantizer(&ivpq->pq, f); |
649 | | |
650 | 1 | if (legacy) { |
651 | 0 | ArrayInvertedLists* ail = set_array_invlist(ivpq, ids); |
652 | 0 | for (size_t i = 0; i < ail->nlist; i++) |
653 | 0 | READVECTOR(ail->codes[i]); |
654 | 1 | } else { |
655 | 1 | read_InvertedLists(ivpq, f, io_flags); |
656 | 1 | } |
657 | | |
658 | 1 | if (ivpq->is_trained) { |
659 | | // precomputed table not stored. It is cheaper to recompute it. |
660 | | // precompute_table() may be disabled with a flag. |
661 | 0 | ivpq->use_precomputed_table = 0; |
662 | 0 | if (ivpq->by_residual) { |
663 | 0 | if ((io_flags & IO_FLAG_SKIP_PRECOMPUTE_TABLE) == 0) { |
664 | 0 | ivpq->precompute_table(); |
665 | 0 | } |
666 | 0 | } |
667 | 0 | if (ivfpqr) { |
668 | 0 | read_ProductQuantizer(&ivfpqr->refine_pq, f); |
669 | 0 | READVECTOR(ivfpqr->refine_codes); |
670 | 0 | READ1(ivfpqr->k_factor); |
671 | 0 | } |
672 | 0 | } |
673 | 1 | return ivpq; |
674 | 1 | } |
675 | | |
676 | | int read_old_fmt_hack = 0; |
677 | | |
678 | 50 | Index* read_index(IOReader* f, int io_flags) { |
679 | 50 | Index* idx = nullptr; |
680 | 50 | uint32_t h; |
681 | 50 | READ1(h); |
682 | 50 | if (h == fourcc("null")) { |
683 | | // denotes a missing index, useful for some cases |
684 | 0 | return nullptr; |
685 | 50 | } else if ( |
686 | 50 | h == fourcc("IxFI") || h == fourcc("IxF2") || h == fourcc("IxFl")) { |
687 | 22 | IndexFlat* idxf; |
688 | 22 | if (h == fourcc("IxFI")) { |
689 | 0 | idxf = new IndexFlatIP(); |
690 | 22 | } else if (h == fourcc("IxF2")) { |
691 | 22 | idxf = new IndexFlatL2(); |
692 | 22 | } else { |
693 | 0 | idxf = new IndexFlat(); |
694 | 0 | } |
695 | 22 | read_index_header(idxf, f); |
696 | 22 | idxf->code_size = idxf->d * sizeof(float); |
697 | 22 | read_xb_vector(idxf->codes, f); |
698 | 22 | FAISS_THROW_IF_NOT( |
699 | 22 | idxf->codes.size() == idxf->ntotal * idxf->code_size); |
700 | | // leak! |
701 | 22 | idx = idxf; |
702 | 28 | } else if (h == fourcc("IxHE") || h == fourcc("IxHe")) { |
703 | 0 | IndexLSH* idxl = new IndexLSH(); |
704 | 0 | read_index_header(idxl, f); |
705 | 0 | READ1(idxl->nbits); |
706 | 0 | READ1(idxl->rotate_data); |
707 | 0 | READ1(idxl->train_thresholds); |
708 | 0 | READVECTOR(idxl->thresholds); |
709 | 0 | int code_size_i; |
710 | 0 | READ1(code_size_i); |
711 | 0 | idxl->code_size = code_size_i; |
712 | 0 | if (h == fourcc("IxHE")) { |
713 | 0 | FAISS_THROW_IF_NOT_FMT( |
714 | 0 | idxl->nbits % 64 == 0, |
715 | 0 | "can only read old format IndexLSH with " |
716 | 0 | "nbits multiple of 64 (got %d)", |
717 | 0 | (int)idxl->nbits); |
718 | | // leak |
719 | 0 | idxl->code_size *= 8; |
720 | 0 | } |
721 | 0 | { |
722 | 0 | RandomRotationMatrix* rrot = dynamic_cast<RandomRotationMatrix*>( |
723 | 0 | read_VectorTransform(f)); |
724 | 0 | FAISS_THROW_IF_NOT_MSG(rrot, "expected a random rotation"); |
725 | 0 | idxl->rrot = *rrot; |
726 | 0 | delete rrot; |
727 | 0 | } |
728 | 0 | read_vector(idxl->codes, f); |
729 | 0 | FAISS_THROW_IF_NOT( |
730 | 0 | idxl->rrot.d_in == idxl->d && idxl->rrot.d_out == idxl->nbits); |
731 | 0 | FAISS_THROW_IF_NOT( |
732 | 0 | idxl->codes.size() == idxl->ntotal * idxl->code_size); |
733 | 0 | idx = idxl; |
734 | 28 | } else if ( |
735 | 28 | h == fourcc("IxPQ") || h == fourcc("IxPo") || h == fourcc("IxPq")) { |
736 | | // IxPQ and IxPo were merged into the same IndexPQ object |
737 | 1 | IndexPQ* idxp = new IndexPQ(); |
738 | 1 | read_index_header(idxp, f); |
739 | 1 | read_ProductQuantizer(&idxp->pq, f); |
740 | 1 | idxp->code_size = idxp->pq.code_size; |
741 | 1 | read_vector(idxp->codes, f); |
742 | 1 | if (h == fourcc("IxPo") || h == fourcc("IxPq")) { |
743 | 1 | READ1(idxp->search_type); |
744 | 1 | READ1(idxp->encode_signs); |
745 | 1 | READ1(idxp->polysemous_ht); |
746 | 1 | } |
747 | | // Old versions of PQ all had metric_type set to INNER_PRODUCT |
748 | | // when they were in fact using L2. Therefore, we force metric type |
749 | | // to L2 when the old format is detected |
750 | 1 | if (h == fourcc("IxPQ") || h == fourcc("IxPo")) { |
751 | 0 | idxp->metric_type = METRIC_L2; |
752 | 0 | } |
753 | 1 | idx = idxp; |
754 | 27 | } else if (h == fourcc("IxRQ") || h == fourcc("IxRq")) { |
755 | 0 | IndexResidualQuantizer* idxr = new IndexResidualQuantizer(); |
756 | 0 | read_index_header(idxr, f); |
757 | 0 | if (h == fourcc("IxRQ")) { |
758 | 0 | read_ResidualQuantizer_old(&idxr->rq, f); |
759 | 0 | } else { |
760 | 0 | read_ResidualQuantizer(&idxr->rq, f, io_flags); |
761 | 0 | } |
762 | 0 | READ1(idxr->code_size); |
763 | 0 | read_vector(idxr->codes, f); |
764 | 0 | idx = idxr; |
765 | 27 | } else if (h == fourcc("IxLS")) { |
766 | 0 | auto idxr = new IndexLocalSearchQuantizer(); |
767 | 0 | read_index_header(idxr, f); |
768 | 0 | read_LocalSearchQuantizer(&idxr->lsq, f); |
769 | 0 | READ1(idxr->code_size); |
770 | 0 | read_vector(idxr->codes, f); |
771 | 0 | idx = idxr; |
772 | 27 | } else if (h == fourcc("IxPR")) { |
773 | 0 | auto idxpr = new IndexProductResidualQuantizer(); |
774 | 0 | read_index_header(idxpr, f); |
775 | 0 | read_ProductResidualQuantizer(&idxpr->prq, f, io_flags); |
776 | 0 | READ1(idxpr->code_size); |
777 | 0 | read_vector(idxpr->codes, f); |
778 | 0 | idx = idxpr; |
779 | 27 | } else if (h == fourcc("IxPL")) { |
780 | 0 | auto idxpl = new IndexProductLocalSearchQuantizer(); |
781 | 0 | read_index_header(idxpl, f); |
782 | 0 | read_ProductLocalSearchQuantizer(&idxpl->plsq, f); |
783 | 0 | READ1(idxpl->code_size); |
784 | 0 | read_vector(idxpl->codes, f); |
785 | 0 | idx = idxpl; |
786 | 27 | } else if (h == fourcc("ImRQ")) { |
787 | 0 | ResidualCoarseQuantizer* idxr = new ResidualCoarseQuantizer(); |
788 | 0 | read_index_header(idxr, f); |
789 | 0 | read_ResidualQuantizer(&idxr->rq, f, io_flags); |
790 | 0 | READ1(idxr->beam_factor); |
791 | 0 | if (io_flags & IO_FLAG_SKIP_PRECOMPUTE_TABLE) { |
792 | | // then we force the beam factor to -1 |
793 | | // which skips the table precomputation. |
794 | 0 | idxr->beam_factor = -1; |
795 | 0 | } |
796 | 0 | idxr->set_beam_factor(idxr->beam_factor); |
797 | 0 | idx = idxr; |
798 | 27 | } else if ( |
799 | 27 | h == fourcc("ILfs") || h == fourcc("IRfs") || h == fourcc("IPRf") || |
800 | 27 | h == fourcc("IPLf")) { |
801 | 0 | bool is_LSQ = h == fourcc("ILfs"); |
802 | 0 | bool is_RQ = h == fourcc("IRfs"); |
803 | 0 | bool is_PLSQ = h == fourcc("IPLf"); |
804 | |
|
805 | 0 | IndexAdditiveQuantizerFastScan* idxaqfs; |
806 | 0 | if (is_LSQ) { |
807 | 0 | idxaqfs = new IndexLocalSearchQuantizerFastScan(); |
808 | 0 | } else if (is_RQ) { |
809 | 0 | idxaqfs = new IndexResidualQuantizerFastScan(); |
810 | 0 | } else if (is_PLSQ) { |
811 | 0 | idxaqfs = new IndexProductLocalSearchQuantizerFastScan(); |
812 | 0 | } else { |
813 | 0 | idxaqfs = new IndexProductResidualQuantizerFastScan(); |
814 | 0 | } |
815 | 0 | read_index_header(idxaqfs, f); |
816 | |
|
817 | 0 | if (is_LSQ) { |
818 | 0 | read_LocalSearchQuantizer((LocalSearchQuantizer*)idxaqfs->aq, f); |
819 | 0 | } else if (is_RQ) { |
820 | 0 | read_ResidualQuantizer( |
821 | 0 | (ResidualQuantizer*)idxaqfs->aq, f, io_flags); |
822 | 0 | } else if (is_PLSQ) { |
823 | 0 | read_ProductLocalSearchQuantizer( |
824 | 0 | (ProductLocalSearchQuantizer*)idxaqfs->aq, f); |
825 | 0 | } else { |
826 | 0 | read_ProductResidualQuantizer( |
827 | 0 | (ProductResidualQuantizer*)idxaqfs->aq, f, io_flags); |
828 | 0 | } |
829 | |
|
830 | 0 | READ1(idxaqfs->implem); |
831 | 0 | READ1(idxaqfs->bbs); |
832 | 0 | READ1(idxaqfs->qbs); |
833 | |
|
834 | 0 | READ1(idxaqfs->M); |
835 | 0 | READ1(idxaqfs->nbits); |
836 | 0 | READ1(idxaqfs->ksub); |
837 | 0 | READ1(idxaqfs->code_size); |
838 | 0 | READ1(idxaqfs->ntotal2); |
839 | 0 | READ1(idxaqfs->M2); |
840 | |
|
841 | 0 | READ1(idxaqfs->rescale_norm); |
842 | 0 | READ1(idxaqfs->norm_scale); |
843 | 0 | READ1(idxaqfs->max_train_points); |
844 | |
|
845 | 0 | READVECTOR(idxaqfs->codes); |
846 | 0 | idx = idxaqfs; |
847 | 27 | } else if ( |
848 | 27 | h == fourcc("IVLf") || h == fourcc("IVRf") || h == fourcc("NPLf") || |
849 | 27 | h == fourcc("NPRf")) { |
850 | 0 | bool is_LSQ = h == fourcc("IVLf"); |
851 | 0 | bool is_RQ = h == fourcc("IVRf"); |
852 | 0 | bool is_PLSQ = h == fourcc("NPLf"); |
853 | |
|
854 | 0 | IndexIVFAdditiveQuantizerFastScan* ivaqfs; |
855 | 0 | if (is_LSQ) { |
856 | 0 | ivaqfs = new IndexIVFLocalSearchQuantizerFastScan(); |
857 | 0 | } else if (is_RQ) { |
858 | 0 | ivaqfs = new IndexIVFResidualQuantizerFastScan(); |
859 | 0 | } else if (is_PLSQ) { |
860 | 0 | ivaqfs = new IndexIVFProductLocalSearchQuantizerFastScan(); |
861 | 0 | } else { |
862 | 0 | ivaqfs = new IndexIVFProductResidualQuantizerFastScan(); |
863 | 0 | } |
864 | 0 | read_ivf_header(ivaqfs, f); |
865 | |
|
866 | 0 | if (is_LSQ) { |
867 | 0 | read_LocalSearchQuantizer((LocalSearchQuantizer*)ivaqfs->aq, f); |
868 | 0 | } else if (is_RQ) { |
869 | 0 | read_ResidualQuantizer((ResidualQuantizer*)ivaqfs->aq, f, io_flags); |
870 | 0 | } else if (is_PLSQ) { |
871 | 0 | read_ProductLocalSearchQuantizer( |
872 | 0 | (ProductLocalSearchQuantizer*)ivaqfs->aq, f); |
873 | 0 | } else { |
874 | 0 | read_ProductResidualQuantizer( |
875 | 0 | (ProductResidualQuantizer*)ivaqfs->aq, f, io_flags); |
876 | 0 | } |
877 | |
|
878 | 0 | READ1(ivaqfs->by_residual); |
879 | 0 | READ1(ivaqfs->implem); |
880 | 0 | READ1(ivaqfs->bbs); |
881 | 0 | READ1(ivaqfs->qbs); |
882 | |
|
883 | 0 | READ1(ivaqfs->M); |
884 | 0 | READ1(ivaqfs->nbits); |
885 | 0 | READ1(ivaqfs->ksub); |
886 | 0 | READ1(ivaqfs->code_size); |
887 | 0 | READ1(ivaqfs->qbs2); |
888 | 0 | READ1(ivaqfs->M2); |
889 | |
|
890 | 0 | READ1(ivaqfs->rescale_norm); |
891 | 0 | READ1(ivaqfs->norm_scale); |
892 | 0 | READ1(ivaqfs->max_train_points); |
893 | |
|
894 | 0 | read_InvertedLists(ivaqfs, f, io_flags); |
895 | 0 | ivaqfs->init_code_packer(); |
896 | 0 | idx = ivaqfs; |
897 | 27 | } else if (h == fourcc("IvFl") || h == fourcc("IvFL")) { // legacy |
898 | 0 | IndexIVFFlat* ivfl = new IndexIVFFlat(); |
899 | 0 | std::vector<std::vector<idx_t>> ids; |
900 | 0 | read_ivf_header(ivfl, f, &ids); |
901 | 0 | ivfl->code_size = ivfl->d * sizeof(float); |
902 | 0 | ArrayInvertedLists* ail = set_array_invlist(ivfl, ids); |
903 | |
|
904 | 0 | if (h == fourcc("IvFL")) { |
905 | 0 | for (size_t i = 0; i < ivfl->nlist; i++) { |
906 | 0 | READVECTOR(ail->codes[i]); |
907 | 0 | } |
908 | 0 | } else { // old format |
909 | 0 | for (size_t i = 0; i < ivfl->nlist; i++) { |
910 | 0 | std::vector<float> vec; |
911 | 0 | READVECTOR(vec); |
912 | 0 | ail->codes[i].resize(vec.size() * sizeof(float)); |
913 | 0 | memcpy(ail->codes[i].data(), vec.data(), ail->codes[i].size()); |
914 | 0 | } |
915 | 0 | } |
916 | 0 | idx = ivfl; |
917 | 27 | } else if (h == fourcc("IwFd")) { |
918 | 0 | IndexIVFFlatDedup* ivfl = new IndexIVFFlatDedup(); |
919 | 0 | read_ivf_header(ivfl, f); |
920 | 0 | ivfl->code_size = ivfl->d * sizeof(float); |
921 | 0 | { |
922 | 0 | std::vector<idx_t> tab; |
923 | 0 | READVECTOR(tab); |
924 | 0 | for (long i = 0; i < tab.size(); i += 2) { |
925 | 0 | std::pair<idx_t, idx_t> pair(tab[i], tab[i + 1]); |
926 | 0 | ivfl->instances.insert(pair); |
927 | 0 | } |
928 | 0 | } |
929 | 0 | read_InvertedLists(ivfl, f, io_flags); |
930 | 0 | idx = ivfl; |
931 | 27 | } else if (h == fourcc("IwFl")) { |
932 | 2 | IndexIVFFlat* ivfl = new IndexIVFFlat(); |
933 | 2 | read_ivf_header(ivfl, f); |
934 | 2 | ivfl->code_size = ivfl->d * sizeof(float); |
935 | 2 | read_InvertedLists(ivfl, f, io_flags); |
936 | 2 | idx = ivfl; |
937 | 25 | } else if (h == fourcc("IxSQ")) { |
938 | 2 | IndexScalarQuantizer* idxs = new IndexScalarQuantizer(); |
939 | 2 | read_index_header(idxs, f); |
940 | 2 | read_ScalarQuantizer(&idxs->sq, f); |
941 | 2 | read_vector(idxs->codes, f); |
942 | 2 | idxs->code_size = idxs->sq.code_size; |
943 | 2 | idx = idxs; |
944 | 23 | } else if (h == fourcc("IxLa")) { |
945 | 0 | int d, nsq, scale_nbit, r2; |
946 | 0 | READ1(d); |
947 | 0 | READ1(nsq); |
948 | 0 | READ1(scale_nbit); |
949 | 0 | READ1(r2); |
950 | 0 | IndexLattice* idxl = new IndexLattice(d, nsq, scale_nbit, r2); |
951 | 0 | read_index_header(idxl, f); |
952 | 0 | READVECTOR(idxl->trained); |
953 | 0 | idx = idxl; |
954 | 23 | } else if (h == fourcc("IvSQ")) { // legacy |
955 | 0 | IndexIVFScalarQuantizer* ivsc = new IndexIVFScalarQuantizer(); |
956 | 0 | std::vector<std::vector<idx_t>> ids; |
957 | 0 | read_ivf_header(ivsc, f, &ids); |
958 | 0 | read_ScalarQuantizer(&ivsc->sq, f); |
959 | 0 | READ1(ivsc->code_size); |
960 | 0 | ArrayInvertedLists* ail = set_array_invlist(ivsc, ids); |
961 | 0 | for (int i = 0; i < ivsc->nlist; i++) |
962 | 0 | READVECTOR(ail->codes[i]); |
963 | 0 | idx = ivsc; |
964 | 23 | } else if (h == fourcc("IwSQ") || h == fourcc("IwSq")) { |
965 | 2 | IndexIVFScalarQuantizer* ivsc = new IndexIVFScalarQuantizer(); |
966 | 2 | read_ivf_header(ivsc, f); |
967 | 2 | read_ScalarQuantizer(&ivsc->sq, f); |
968 | 2 | READ1(ivsc->code_size); |
969 | 2 | if (h == fourcc("IwSQ")) { |
970 | 0 | ivsc->by_residual = true; |
971 | 2 | } else { |
972 | 2 | READ1(ivsc->by_residual); |
973 | 2 | } |
974 | 2 | read_InvertedLists(ivsc, f, io_flags); |
975 | 2 | idx = ivsc; |
976 | 21 | } else if ( |
977 | 21 | h == fourcc("IwLS") || h == fourcc("IwRQ") || h == fourcc("IwPL") || |
978 | 21 | h == fourcc("IwPR")) { |
979 | 0 | bool is_LSQ = h == fourcc("IwLS"); |
980 | 0 | bool is_RQ = h == fourcc("IwRQ"); |
981 | 0 | bool is_PLSQ = h == fourcc("IwPL"); |
982 | 0 | IndexIVFAdditiveQuantizer* iva; |
983 | 0 | if (is_LSQ) { |
984 | 0 | iva = new IndexIVFLocalSearchQuantizer(); |
985 | 0 | } else if (is_RQ) { |
986 | 0 | iva = new IndexIVFResidualQuantizer(); |
987 | 0 | } else if (is_PLSQ) { |
988 | 0 | iva = new IndexIVFProductLocalSearchQuantizer(); |
989 | 0 | } else { |
990 | 0 | iva = new IndexIVFProductResidualQuantizer(); |
991 | 0 | } |
992 | 0 | read_ivf_header(iva, f); |
993 | 0 | READ1(iva->code_size); |
994 | 0 | if (is_LSQ) { |
995 | 0 | read_LocalSearchQuantizer((LocalSearchQuantizer*)iva->aq, f); |
996 | 0 | } else if (is_RQ) { |
997 | 0 | read_ResidualQuantizer((ResidualQuantizer*)iva->aq, f, io_flags); |
998 | 0 | } else if (is_PLSQ) { |
999 | 0 | read_ProductLocalSearchQuantizer( |
1000 | 0 | (ProductLocalSearchQuantizer*)iva->aq, f); |
1001 | 0 | } else { |
1002 | 0 | read_ProductResidualQuantizer( |
1003 | 0 | (ProductResidualQuantizer*)iva->aq, f, io_flags); |
1004 | 0 | } |
1005 | 0 | READ1(iva->by_residual); |
1006 | 0 | READ1(iva->use_precomputed_table); |
1007 | 0 | read_InvertedLists(iva, f, io_flags); |
1008 | 0 | idx = iva; |
1009 | 21 | } else if (h == fourcc("IwSh")) { |
1010 | 0 | IndexIVFSpectralHash* ivsp = new IndexIVFSpectralHash(); |
1011 | 0 | read_ivf_header(ivsp, f); |
1012 | 0 | ivsp->vt = read_VectorTransform(f); |
1013 | 0 | ivsp->own_fields = true; |
1014 | 0 | READ1(ivsp->nbit); |
1015 | | // not stored by write_ivf_header |
1016 | 0 | ivsp->code_size = (ivsp->nbit + 7) / 8; |
1017 | 0 | READ1(ivsp->period); |
1018 | 0 | READ1(ivsp->threshold_type); |
1019 | 0 | READVECTOR(ivsp->trained); |
1020 | 0 | read_InvertedLists(ivsp, f, io_flags); |
1021 | 0 | idx = ivsp; |
1022 | 21 | } else if ( |
1023 | 21 | h == fourcc("IvPQ") || h == fourcc("IvQR") || h == fourcc("IwPQ") || |
1024 | 21 | h == fourcc("IwQR")) { |
1025 | 1 | idx = read_ivfpq(f, h, io_flags); |
1026 | 20 | } else if (h == fourcc("IwIQ")) { |
1027 | 0 | auto* indep = new IndexIVFIndependentQuantizer(); |
1028 | 0 | indep->own_fields = true; |
1029 | 0 | read_index_header(indep, f); |
1030 | 0 | indep->quantizer = read_index(f, io_flags); |
1031 | 0 | bool has_vt; |
1032 | 0 | READ1(has_vt); |
1033 | 0 | if (has_vt) { |
1034 | 0 | indep->vt = read_VectorTransform(f); |
1035 | 0 | } |
1036 | 0 | indep->index_ivf = dynamic_cast<IndexIVF*>(read_index(f, io_flags)); |
1037 | 0 | FAISS_THROW_IF_NOT(indep->index_ivf); |
1038 | 0 | if (auto index_ivfpq = dynamic_cast<IndexIVFPQ*>(indep->index_ivf)) { |
1039 | 0 | READ1(index_ivfpq->use_precomputed_table); |
1040 | 0 | } |
1041 | 0 | idx = indep; |
1042 | 20 | } else if (h == fourcc("IxPT")) { |
1043 | 0 | IndexPreTransform* ixpt = new IndexPreTransform(); |
1044 | 0 | ixpt->own_fields = true; |
1045 | 0 | read_index_header(ixpt, f); |
1046 | 0 | int nt; |
1047 | 0 | if (read_old_fmt_hack == 2) { |
1048 | 0 | nt = 1; |
1049 | 0 | } else { |
1050 | 0 | READ1(nt); |
1051 | 0 | } |
1052 | 0 | for (int i = 0; i < nt; i++) { |
1053 | 0 | ixpt->chain.push_back(read_VectorTransform(f)); |
1054 | 0 | } |
1055 | 0 | ixpt->index = read_index(f, io_flags); |
1056 | 0 | idx = ixpt; |
1057 | 20 | } else if (h == fourcc("Imiq")) { |
1058 | 0 | MultiIndexQuantizer* imiq = new MultiIndexQuantizer(); |
1059 | 0 | read_index_header(imiq, f); |
1060 | 0 | read_ProductQuantizer(&imiq->pq, f); |
1061 | 0 | idx = imiq; |
1062 | 20 | } else if (h == fourcc("IxRF")) { |
1063 | 0 | IndexRefine* idxrf = new IndexRefine(); |
1064 | 0 | read_index_header(idxrf, f); |
1065 | 0 | idxrf->base_index = read_index(f, io_flags); |
1066 | 0 | idxrf->refine_index = read_index(f, io_flags); |
1067 | 0 | READ1(idxrf->k_factor); |
1068 | 0 | if (dynamic_cast<IndexFlat*>(idxrf->refine_index)) { |
1069 | | // then make a RefineFlat with it |
1070 | 0 | IndexRefine* idxrf_old = idxrf; |
1071 | 0 | idxrf = new IndexRefineFlat(); |
1072 | 0 | *idxrf = *idxrf_old; |
1073 | 0 | delete idxrf_old; |
1074 | 0 | } |
1075 | 0 | idxrf->own_fields = true; |
1076 | 0 | idxrf->own_refine_index = true; |
1077 | 0 | idx = idxrf; |
1078 | 20 | } else if (h == fourcc("IxMp") || h == fourcc("IxM2")) { |
1079 | 0 | bool is_map2 = h == fourcc("IxM2"); |
1080 | 0 | IndexIDMap* idxmap = is_map2 ? new IndexIDMap2() : new IndexIDMap(); |
1081 | 0 | read_index_header(idxmap, f); |
1082 | 0 | idxmap->index = read_index(f, io_flags); |
1083 | 0 | idxmap->own_fields = true; |
1084 | 0 | READVECTOR(idxmap->id_map); |
1085 | 0 | if (is_map2) { |
1086 | 0 | static_cast<IndexIDMap2*>(idxmap)->construct_rev_map(); |
1087 | 0 | } |
1088 | 0 | idx = idxmap; |
1089 | 20 | } else if (h == fourcc("Ix2L")) { |
1090 | 0 | Index2Layer* idxp = new Index2Layer(); |
1091 | 0 | read_index_header(idxp, f); |
1092 | 0 | idxp->q1.quantizer = read_index(f, io_flags); |
1093 | 0 | READ1(idxp->q1.nlist); |
1094 | 0 | READ1(idxp->q1.quantizer_trains_alone); |
1095 | 0 | read_ProductQuantizer(&idxp->pq, f); |
1096 | 0 | READ1(idxp->code_size_1); |
1097 | 0 | READ1(idxp->code_size_2); |
1098 | 0 | READ1(idxp->code_size); |
1099 | 0 | read_vector(idxp->codes, f); |
1100 | 0 | idx = idxp; |
1101 | 20 | } else if ( |
1102 | 20 | h == fourcc("IHNf") || h == fourcc("IHNp") || h == fourcc("IHNs") || |
1103 | 20 | h == fourcc("IHN2") || h == fourcc("IHNc")) { |
1104 | 20 | IndexHNSW* idxhnsw = nullptr; |
1105 | 20 | if (h == fourcc("IHNf")) |
1106 | 17 | idxhnsw = new IndexHNSWFlat(); |
1107 | 20 | if (h == fourcc("IHNp")) |
1108 | 1 | idxhnsw = new IndexHNSWPQ(); |
1109 | 20 | if (h == fourcc("IHNs")) |
1110 | 2 | idxhnsw = new IndexHNSWSQ(); |
1111 | 20 | if (h == fourcc("IHN2")) |
1112 | 0 | idxhnsw = new IndexHNSW2Level(); |
1113 | 20 | if (h == fourcc("IHNc")) |
1114 | 0 | idxhnsw = new IndexHNSWCagra(); |
1115 | 20 | read_index_header(idxhnsw, f); |
1116 | 20 | if (h == fourcc("IHNc")) { |
1117 | 0 | READ1(idxhnsw->keep_max_size_level0); |
1118 | 0 | auto idx_hnsw_cagra = dynamic_cast<IndexHNSWCagra*>(idxhnsw); |
1119 | 0 | READ1(idx_hnsw_cagra->base_level_only); |
1120 | 0 | READ1(idx_hnsw_cagra->num_base_level_search_entrypoints); |
1121 | 0 | } |
1122 | 20 | read_HNSW(&idxhnsw->hnsw, f); |
1123 | 20 | idxhnsw->storage = read_index(f, io_flags); |
1124 | 20 | idxhnsw->own_fields = idxhnsw->storage != nullptr; |
1125 | 20 | if (h == fourcc("IHNp") && !(io_flags & IO_FLAG_PQ_SKIP_SDC_TABLE)) { |
1126 | 1 | dynamic_cast<IndexPQ*>(idxhnsw->storage)->pq.compute_sdc_table(); |
1127 | 1 | } |
1128 | 20 | idx = idxhnsw; |
1129 | 20 | } else if ( |
1130 | 0 | h == fourcc("INSf") || h == fourcc("INSp") || h == fourcc("INSs")) { |
1131 | 0 | IndexNSG* idxnsg; |
1132 | 0 | if (h == fourcc("INSf")) |
1133 | 0 | idxnsg = new IndexNSGFlat(); |
1134 | 0 | if (h == fourcc("INSp")) |
1135 | 0 | idxnsg = new IndexNSGPQ(); |
1136 | 0 | if (h == fourcc("INSs")) |
1137 | 0 | idxnsg = new IndexNSGSQ(); |
1138 | 0 | read_index_header(idxnsg, f); |
1139 | 0 | READ1(idxnsg->GK); |
1140 | 0 | READ1(idxnsg->build_type); |
1141 | 0 | READ1(idxnsg->nndescent_S); |
1142 | 0 | READ1(idxnsg->nndescent_R); |
1143 | 0 | READ1(idxnsg->nndescent_L); |
1144 | 0 | READ1(idxnsg->nndescent_iter); |
1145 | 0 | read_NSG(&idxnsg->nsg, f); |
1146 | 0 | idxnsg->storage = read_index(f, io_flags); |
1147 | 0 | idxnsg->own_fields = true; |
1148 | 0 | idx = idxnsg; |
1149 | 0 | } else if (h == fourcc("INNf")) { |
1150 | 0 | IndexNNDescent* idxnnd = new IndexNNDescentFlat(); |
1151 | 0 | read_index_header(idxnnd, f); |
1152 | 0 | read_NNDescent(&idxnnd->nndescent, f); |
1153 | 0 | idxnnd->storage = read_index(f, io_flags); |
1154 | 0 | idxnnd->own_fields = true; |
1155 | 0 | idx = idxnnd; |
1156 | 0 | } else if (h == fourcc("IPfs")) { |
1157 | 0 | IndexPQFastScan* idxpqfs = new IndexPQFastScan(); |
1158 | 0 | read_index_header(idxpqfs, f); |
1159 | 0 | read_ProductQuantizer(&idxpqfs->pq, f); |
1160 | 0 | READ1(idxpqfs->implem); |
1161 | 0 | READ1(idxpqfs->bbs); |
1162 | 0 | READ1(idxpqfs->qbs); |
1163 | 0 | READ1(idxpqfs->ntotal2); |
1164 | 0 | READ1(idxpqfs->M2); |
1165 | 0 | READVECTOR(idxpqfs->codes); |
1166 | |
|
1167 | 0 | const auto& pq = idxpqfs->pq; |
1168 | 0 | idxpqfs->M = pq.M; |
1169 | 0 | idxpqfs->nbits = pq.nbits; |
1170 | 0 | idxpqfs->ksub = (1 << pq.nbits); |
1171 | 0 | idxpqfs->code_size = pq.code_size; |
1172 | |
|
1173 | 0 | idx = idxpqfs; |
1174 | |
|
1175 | 0 | } else if (h == fourcc("IwPf")) { |
1176 | 0 | IndexIVFPQFastScan* ivpq = new IndexIVFPQFastScan(); |
1177 | 0 | read_ivf_header(ivpq, f); |
1178 | 0 | READ1(ivpq->by_residual); |
1179 | 0 | READ1(ivpq->code_size); |
1180 | 0 | READ1(ivpq->bbs); |
1181 | 0 | READ1(ivpq->M2); |
1182 | 0 | READ1(ivpq->implem); |
1183 | 0 | READ1(ivpq->qbs2); |
1184 | 0 | read_ProductQuantizer(&ivpq->pq, f); |
1185 | 0 | read_InvertedLists(ivpq, f, io_flags); |
1186 | 0 | ivpq->precompute_table(); |
1187 | |
|
1188 | 0 | const auto& pq = ivpq->pq; |
1189 | 0 | ivpq->M = pq.M; |
1190 | 0 | ivpq->nbits = pq.nbits; |
1191 | 0 | ivpq->ksub = (1 << pq.nbits); |
1192 | 0 | ivpq->code_size = pq.code_size; |
1193 | 0 | ivpq->init_code_packer(); |
1194 | |
|
1195 | 0 | idx = ivpq; |
1196 | 0 | } else if (h == fourcc("IRMf")) { |
1197 | 0 | IndexRowwiseMinMax* imm = new IndexRowwiseMinMax(); |
1198 | 0 | read_index_header(imm, f); |
1199 | |
|
1200 | 0 | imm->index = read_index(f, io_flags); |
1201 | 0 | imm->own_fields = true; |
1202 | |
|
1203 | 0 | idx = imm; |
1204 | 0 | } else if (h == fourcc("IRMh")) { |
1205 | 0 | IndexRowwiseMinMaxFP16* imm = new IndexRowwiseMinMaxFP16(); |
1206 | 0 | read_index_header(imm, f); |
1207 | |
|
1208 | 0 | imm->index = read_index(f, io_flags); |
1209 | 0 | imm->own_fields = true; |
1210 | |
|
1211 | 0 | idx = imm; |
1212 | 0 | } else if (h == fourcc("Ixrq")) { |
1213 | 0 | IndexRaBitQ* idxq = new IndexRaBitQ(); |
1214 | 0 | read_index_header(idxq, f); |
1215 | 0 | read_RaBitQuantizer(&idxq->rabitq, f); |
1216 | 0 | READVECTOR(idxq->codes); |
1217 | 0 | READVECTOR(idxq->center); |
1218 | 0 | READ1(idxq->qb); |
1219 | 0 | idxq->code_size = idxq->rabitq.code_size; |
1220 | 0 | idx = idxq; |
1221 | 0 | } else if (h == fourcc("Iwrq")) { |
1222 | 0 | IndexIVFRaBitQ* ivrq = new IndexIVFRaBitQ(); |
1223 | 0 | read_ivf_header(ivrq, f); |
1224 | 0 | read_RaBitQuantizer(&ivrq->rabitq, f); |
1225 | 0 | READ1(ivrq->code_size); |
1226 | 0 | READ1(ivrq->by_residual); |
1227 | 0 | READ1(ivrq->qb); |
1228 | 0 | read_InvertedLists(ivrq, f, io_flags); |
1229 | 0 | idx = ivrq; |
1230 | 0 | } else { |
1231 | 0 | FAISS_THROW_FMT( |
1232 | 0 | "Index type 0x%08x (\"%s\") not recognized", |
1233 | 0 | h, |
1234 | 0 | fourcc_inv_printable(h).c_str()); |
1235 | 0 | idx = nullptr; |
1236 | 0 | } |
1237 | 50 | return idx; |
1238 | 50 | } |
1239 | | |
1240 | 0 | Index* read_index(FILE* f, int io_flags) { |
1241 | 0 | if ((io_flags & IO_FLAG_MMAP_IFC) == IO_FLAG_MMAP_IFC) { |
1242 | | // enable mmap-supporting IOReader |
1243 | 0 | auto owner = std::make_shared<MmappedFileMappingOwner>(f); |
1244 | 0 | MappedFileIOReader reader(owner); |
1245 | 0 | return read_index(&reader, io_flags); |
1246 | 0 | } else { |
1247 | 0 | FileIOReader reader(f); |
1248 | 0 | return read_index(&reader, io_flags); |
1249 | 0 | } |
1250 | 0 | } |
1251 | | |
1252 | 0 | Index* read_index(const char* fname, int io_flags) { |
1253 | 0 | if ((io_flags & IO_FLAG_MMAP_IFC) == IO_FLAG_MMAP_IFC) { |
1254 | | // enable mmap-supporting IOReader |
1255 | 0 | auto owner = std::make_shared<MmappedFileMappingOwner>(fname); |
1256 | 0 | MappedFileIOReader reader(owner); |
1257 | 0 | return read_index(&reader, io_flags); |
1258 | 0 | } else { |
1259 | 0 | FileIOReader reader(fname); |
1260 | 0 | Index* idx = read_index(&reader, io_flags); |
1261 | 0 | return idx; |
1262 | 0 | } |
1263 | 0 | } |
1264 | | |
1265 | 0 | VectorTransform* read_VectorTransform(const char* fname) { |
1266 | 0 | FileIOReader reader(fname); |
1267 | 0 | VectorTransform* vt = read_VectorTransform(&reader); |
1268 | 0 | return vt; |
1269 | 0 | } |
1270 | | |
1271 | | /************************************************************* |
1272 | | * Read binary indexes |
1273 | | **************************************************************/ |
1274 | | |
1275 | 0 | static void read_InvertedLists(IndexBinaryIVF* ivf, IOReader* f, int io_flags) { |
1276 | 0 | InvertedLists* ils = read_InvertedLists(f, io_flags); |
1277 | 0 | FAISS_THROW_IF_NOT( |
1278 | 0 | !ils || |
1279 | 0 | (ils->nlist == ivf->nlist && ils->code_size == ivf->code_size)); |
1280 | 0 | ivf->invlists = ils; |
1281 | 0 | ivf->own_invlists = true; |
1282 | 0 | } |
1283 | | |
1284 | 0 | static void read_index_binary_header(IndexBinary* idx, IOReader* f) { |
1285 | 0 | READ1(idx->d); |
1286 | 0 | READ1(idx->code_size); |
1287 | 0 | READ1(idx->ntotal); |
1288 | 0 | READ1(idx->is_trained); |
1289 | 0 | READ1(idx->metric_type); |
1290 | 0 | idx->verbose = false; |
1291 | 0 | } |
1292 | | |
1293 | | static void read_binary_ivf_header( |
1294 | | IndexBinaryIVF* ivf, |
1295 | | IOReader* f, |
1296 | 0 | std::vector<std::vector<idx_t>>* ids = nullptr) { |
1297 | 0 | read_index_binary_header(ivf, f); |
1298 | 0 | READ1(ivf->nlist); |
1299 | 0 | READ1(ivf->nprobe); |
1300 | 0 | ivf->quantizer = read_index_binary(f); |
1301 | 0 | ivf->own_fields = true; |
1302 | 0 | if (ids) { // used in legacy "Iv" formats |
1303 | 0 | ids->resize(ivf->nlist); |
1304 | 0 | for (size_t i = 0; i < ivf->nlist; i++) |
1305 | 0 | READVECTOR((*ids)[i]); |
1306 | 0 | } |
1307 | 0 | read_direct_map(&ivf->direct_map, f); |
1308 | 0 | } |
1309 | | |
1310 | | static void read_binary_hash_invlists( |
1311 | | IndexBinaryHash::InvertedListMap& invlists, |
1312 | | int b, |
1313 | 0 | IOReader* f) { |
1314 | 0 | size_t sz; |
1315 | 0 | READ1(sz); |
1316 | 0 | int il_nbit = 0; |
1317 | 0 | READ1(il_nbit); |
1318 | | // buffer for bitstrings |
1319 | 0 | std::vector<uint8_t> buf((b + il_nbit) * sz); |
1320 | 0 | READVECTOR(buf); |
1321 | 0 | BitstringReader rd(buf.data(), buf.size()); |
1322 | 0 | invlists.reserve(sz); |
1323 | 0 | for (size_t i = 0; i < sz; i++) { |
1324 | 0 | uint64_t hash = rd.read(b); |
1325 | 0 | uint64_t ilsz = rd.read(il_nbit); |
1326 | 0 | auto& il = invlists[hash]; |
1327 | 0 | READVECTOR(il.ids); |
1328 | 0 | FAISS_THROW_IF_NOT(il.ids.size() == ilsz); |
1329 | 0 | READVECTOR(il.vecs); |
1330 | 0 | } |
1331 | 0 | } |
1332 | | |
1333 | | static void read_binary_multi_hash_map( |
1334 | | IndexBinaryMultiHash::Map& map, |
1335 | | int b, |
1336 | | size_t ntotal, |
1337 | 0 | IOReader* f) { |
1338 | 0 | int id_bits; |
1339 | 0 | size_t sz; |
1340 | 0 | READ1(id_bits); |
1341 | 0 | READ1(sz); |
1342 | 0 | std::vector<uint8_t> buf; |
1343 | 0 | READVECTOR(buf); |
1344 | 0 | size_t nbit = (b + id_bits) * sz + ntotal * id_bits; |
1345 | 0 | FAISS_THROW_IF_NOT(buf.size() == (nbit + 7) / 8); |
1346 | 0 | BitstringReader rd(buf.data(), buf.size()); |
1347 | 0 | map.reserve(sz); |
1348 | 0 | for (size_t i = 0; i < sz; i++) { |
1349 | 0 | uint64_t hash = rd.read(b); |
1350 | 0 | uint64_t ilsz = rd.read(id_bits); |
1351 | 0 | auto& il = map[hash]; |
1352 | 0 | for (size_t j = 0; j < ilsz; j++) { |
1353 | 0 | il.push_back(rd.read(id_bits)); |
1354 | 0 | } |
1355 | 0 | } |
1356 | 0 | } |
1357 | | |
1358 | 0 | IndexBinary* read_index_binary(IOReader* f, int io_flags) { |
1359 | 0 | IndexBinary* idx = nullptr; |
1360 | 0 | uint32_t h; |
1361 | 0 | READ1(h); |
1362 | 0 | if (h == fourcc("IBxF")) { |
1363 | 0 | IndexBinaryFlat* idxf = new IndexBinaryFlat(); |
1364 | 0 | read_index_binary_header(idxf, f); |
1365 | 0 | read_vector(idxf->xb, f); |
1366 | 0 | FAISS_THROW_IF_NOT(idxf->xb.size() == idxf->ntotal * idxf->code_size); |
1367 | | // leak! |
1368 | 0 | idx = idxf; |
1369 | 0 | } else if (h == fourcc("IBwF")) { |
1370 | 0 | IndexBinaryIVF* ivf = new IndexBinaryIVF(); |
1371 | 0 | read_binary_ivf_header(ivf, f); |
1372 | 0 | read_InvertedLists(ivf, f, io_flags); |
1373 | 0 | idx = ivf; |
1374 | 0 | } else if (h == fourcc("IBFf")) { |
1375 | 0 | IndexBinaryFromFloat* idxff = new IndexBinaryFromFloat(); |
1376 | 0 | read_index_binary_header(idxff, f); |
1377 | 0 | idxff->own_fields = true; |
1378 | 0 | idxff->index = read_index(f, io_flags); |
1379 | 0 | idx = idxff; |
1380 | 0 | } else if (h == fourcc("IBHf")) { |
1381 | 0 | IndexBinaryHNSW* idxhnsw = new IndexBinaryHNSW(); |
1382 | 0 | read_index_binary_header(idxhnsw, f); |
1383 | 0 | read_HNSW(&idxhnsw->hnsw, f); |
1384 | 0 | idxhnsw->storage = read_index_binary(f, io_flags); |
1385 | 0 | idxhnsw->own_fields = true; |
1386 | 0 | idx = idxhnsw; |
1387 | 0 | } else if (h == fourcc("IBMp") || h == fourcc("IBM2")) { |
1388 | 0 | bool is_map2 = h == fourcc("IBM2"); |
1389 | 0 | IndexBinaryIDMap* idxmap = |
1390 | 0 | is_map2 ? new IndexBinaryIDMap2() : new IndexBinaryIDMap(); |
1391 | 0 | read_index_binary_header(idxmap, f); |
1392 | 0 | idxmap->index = read_index_binary(f, io_flags); |
1393 | 0 | idxmap->own_fields = true; |
1394 | 0 | READVECTOR(idxmap->id_map); |
1395 | 0 | if (is_map2) { |
1396 | 0 | static_cast<IndexBinaryIDMap2*>(idxmap)->construct_rev_map(); |
1397 | 0 | } |
1398 | 0 | idx = idxmap; |
1399 | 0 | } else if (h == fourcc("IBHh")) { |
1400 | 0 | IndexBinaryHash* idxh = new IndexBinaryHash(); |
1401 | 0 | read_index_binary_header(idxh, f); |
1402 | 0 | READ1(idxh->b); |
1403 | 0 | READ1(idxh->nflip); |
1404 | 0 | read_binary_hash_invlists(idxh->invlists, idxh->b, f); |
1405 | 0 | idx = idxh; |
1406 | 0 | } else if (h == fourcc("IBHm")) { |
1407 | 0 | IndexBinaryMultiHash* idxmh = new IndexBinaryMultiHash(); |
1408 | 0 | read_index_binary_header(idxmh, f); |
1409 | 0 | idxmh->storage = dynamic_cast<IndexBinaryFlat*>(read_index_binary(f)); |
1410 | 0 | FAISS_THROW_IF_NOT( |
1411 | 0 | idxmh->storage && idxmh->storage->ntotal == idxmh->ntotal); |
1412 | 0 | idxmh->own_fields = true; |
1413 | 0 | READ1(idxmh->b); |
1414 | 0 | READ1(idxmh->nhash); |
1415 | 0 | READ1(idxmh->nflip); |
1416 | 0 | idxmh->maps.resize(idxmh->nhash); |
1417 | 0 | for (int i = 0; i < idxmh->nhash; i++) { |
1418 | 0 | read_binary_multi_hash_map( |
1419 | 0 | idxmh->maps[i], idxmh->b, idxmh->ntotal, f); |
1420 | 0 | } |
1421 | 0 | idx = idxmh; |
1422 | 0 | } else { |
1423 | 0 | FAISS_THROW_FMT( |
1424 | 0 | "Index type %08x (\"%s\") not recognized", |
1425 | 0 | h, |
1426 | 0 | fourcc_inv_printable(h).c_str()); |
1427 | 0 | idx = nullptr; |
1428 | 0 | } |
1429 | 0 | return idx; |
1430 | 0 | } |
1431 | | |
1432 | 0 | IndexBinary* read_index_binary(FILE* f, int io_flags) { |
1433 | 0 | if ((io_flags & IO_FLAG_MMAP_IFC) == IO_FLAG_MMAP_IFC) { |
1434 | | // enable mmap-supporting IOReader |
1435 | 0 | auto owner = std::make_shared<MmappedFileMappingOwner>(f); |
1436 | 0 | MappedFileIOReader reader(owner); |
1437 | 0 | return read_index_binary(&reader, io_flags); |
1438 | 0 | } else { |
1439 | 0 | FileIOReader reader(f); |
1440 | 0 | return read_index_binary(&reader, io_flags); |
1441 | 0 | } |
1442 | 0 | } |
1443 | | |
1444 | 0 | IndexBinary* read_index_binary(const char* fname, int io_flags) { |
1445 | 0 | if ((io_flags & IO_FLAG_MMAP_IFC) == IO_FLAG_MMAP_IFC) { |
1446 | | // enable mmap-supporting IOReader |
1447 | 0 | auto owner = std::make_shared<MmappedFileMappingOwner>(fname); |
1448 | 0 | MappedFileIOReader reader(owner); |
1449 | 0 | return read_index_binary(&reader, io_flags); |
1450 | 0 | } else { |
1451 | 0 | FileIOReader reader(fname); |
1452 | 0 | IndexBinary* idx = read_index_binary(&reader, io_flags); |
1453 | 0 | return idx; |
1454 | 0 | } |
1455 | 0 | } |
1456 | | |
1457 | | } // namespace faiss |