/root/doris/contrib/faiss/faiss/impl/io.cpp
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) Meta Platforms, Inc. and affiliates. |
3 | | * |
4 | | * This source code is licensed under the MIT license found in the |
5 | | * LICENSE file in the root directory of this source tree. |
6 | | */ |
7 | | |
8 | | // -*- c++ -*- |
9 | | |
10 | | #include <algorithm> |
11 | | #include <cassert> |
12 | | #include <cstring> |
13 | | |
14 | | #include <faiss/impl/FaissAssert.h> |
15 | | #include <faiss/impl/io.h> |
16 | | |
17 | | namespace faiss { |
18 | | |
19 | | /*********************************************************************** |
20 | | * IO functions |
21 | | ***********************************************************************/ |
22 | | |
23 | 0 | int IOReader::filedescriptor() { |
24 | 0 | FAISS_THROW_MSG("IOReader does not support memory mapping"); |
25 | 0 | } |
26 | | |
27 | 0 | int IOWriter::filedescriptor() { |
28 | 0 | FAISS_THROW_MSG("IOWriter does not support memory mapping"); |
29 | 0 | } |
30 | | |
31 | | /*********************************************************************** |
32 | | * IO Vector |
33 | | ***********************************************************************/ |
34 | | |
35 | 0 | size_t VectorIOWriter::operator()(const void* ptr, size_t size, size_t nitems) { |
36 | 0 | size_t bytes = size * nitems; |
37 | 0 | if (bytes > 0) { |
38 | 0 | size_t o = data.size(); |
39 | 0 | data.resize(o + bytes); |
40 | 0 | memcpy(&data[o], ptr, size * nitems); |
41 | 0 | } |
42 | 0 | return nitems; |
43 | 0 | } |
44 | | |
45 | 0 | size_t VectorIOReader::operator()(void* ptr, size_t size, size_t nitems) { |
46 | 0 | if (rp >= data.size()) |
47 | 0 | return 0; |
48 | 0 | size_t nremain = (data.size() - rp) / size; |
49 | 0 | if (nremain < nitems) |
50 | 0 | nitems = nremain; |
51 | 0 | if (size * nitems > 0) { |
52 | 0 | memcpy(ptr, &data[rp], size * nitems); |
53 | 0 | rp += size * nitems; |
54 | 0 | } |
55 | 0 | return nitems; |
56 | 0 | } |
57 | | |
58 | | /*********************************************************************** |
59 | | * IO File |
60 | | ***********************************************************************/ |
61 | | |
62 | 0 | FileIOReader::FileIOReader(FILE* rf) : f(rf) {} |
63 | | |
64 | 0 | FileIOReader::FileIOReader(const char* fname) { |
65 | 0 | name = fname; |
66 | 0 | f = fopen(fname, "rb"); |
67 | 0 | FAISS_THROW_IF_NOT_FMT( |
68 | 0 | f, "could not open %s for reading: %s", fname, strerror(errno)); |
69 | 0 | need_close = true; |
70 | 0 | } |
71 | | |
72 | 0 | FileIOReader::~FileIOReader() { |
73 | 0 | if (need_close) { |
74 | 0 | int ret = fclose(f); |
75 | 0 | if (ret != 0) { // we cannot raise and exception in the destructor |
76 | 0 | fprintf(stderr, |
77 | 0 | "file %s close error: %s", |
78 | 0 | name.c_str(), |
79 | 0 | strerror(errno)); |
80 | 0 | } |
81 | 0 | } |
82 | 0 | } |
83 | | |
84 | 0 | size_t FileIOReader::operator()(void* ptr, size_t size, size_t nitems) { |
85 | 0 | return fread(ptr, size, nitems, f); |
86 | 0 | } |
87 | | |
88 | 0 | int FileIOReader::filedescriptor() { |
89 | | #ifdef _AIX |
90 | | return fileno(f); |
91 | | #else |
92 | 0 | return ::fileno(f); |
93 | 0 | #endif |
94 | 0 | } |
95 | | |
96 | 0 | FileIOWriter::FileIOWriter(FILE* wf) : f(wf) {} |
97 | | |
98 | 0 | FileIOWriter::FileIOWriter(const char* fname) { |
99 | 0 | name = fname; |
100 | 0 | f = fopen(fname, "wb"); |
101 | 0 | FAISS_THROW_IF_NOT_FMT( |
102 | 0 | f, "could not open %s for writing: %s", fname, strerror(errno)); |
103 | 0 | need_close = true; |
104 | 0 | } |
105 | | |
106 | 0 | FileIOWriter::~FileIOWriter() { |
107 | 0 | if (need_close) { |
108 | 0 | int ret = fclose(f); |
109 | 0 | if (ret != 0) { |
110 | | // we cannot raise and exception in the destructor |
111 | 0 | fprintf(stderr, |
112 | 0 | "file %s close error: %s", |
113 | 0 | name.c_str(), |
114 | 0 | strerror(errno)); |
115 | 0 | } |
116 | 0 | } |
117 | 0 | } |
118 | | |
119 | 0 | size_t FileIOWriter::operator()(const void* ptr, size_t size, size_t nitems) { |
120 | 0 | return fwrite(ptr, size, nitems, f); |
121 | 0 | } |
122 | | |
123 | 0 | int FileIOWriter::filedescriptor() { |
124 | | #ifdef _AIX |
125 | | return fileno(f); |
126 | | #else |
127 | 0 | return ::fileno(f); |
128 | 0 | #endif |
129 | 0 | } |
130 | | |
131 | | /*********************************************************************** |
132 | | * IO buffer |
133 | | ***********************************************************************/ |
134 | | |
135 | | BufferedIOReader::BufferedIOReader(IOReader* reader, size_t bsz) |
136 | 0 | : reader(reader), |
137 | 0 | bsz(bsz), |
138 | 0 | ofs(0), |
139 | 0 | ofs2(0), |
140 | 0 | b0(0), |
141 | 0 | b1(0), |
142 | 0 | buffer(bsz) {} |
143 | | |
144 | 0 | size_t BufferedIOReader::operator()(void* ptr, size_t unitsize, size_t nitems) { |
145 | 0 | size_t size = unitsize * nitems; |
146 | 0 | if (size == 0) |
147 | 0 | return 0; |
148 | 0 | char* dst = (char*)ptr; |
149 | 0 | size_t nb; |
150 | |
|
151 | 0 | { // first copy available bytes |
152 | 0 | nb = std::min(b1 - b0, size); |
153 | 0 | memcpy(dst, buffer.data() + b0, nb); |
154 | 0 | b0 += nb; |
155 | 0 | dst += nb; |
156 | 0 | size -= nb; |
157 | 0 | } |
158 | | |
159 | | // while we would like to have more data |
160 | 0 | while (size > 0) { |
161 | 0 | assert(b0 == b1); // buffer empty on input |
162 | | // try to read from main reader |
163 | 0 | b0 = 0; |
164 | 0 | b1 = (*reader)(buffer.data(), 1, bsz); |
165 | |
|
166 | 0 | if (b1 == 0) { |
167 | | // no more bytes available |
168 | 0 | break; |
169 | 0 | } |
170 | 0 | ofs += b1; |
171 | | |
172 | | // copy remaining bytes |
173 | 0 | size_t nb2 = std::min(b1, size); |
174 | 0 | memcpy(dst, buffer.data(), nb2); |
175 | 0 | b0 = nb2; |
176 | 0 | nb += nb2; |
177 | 0 | dst += nb2; |
178 | 0 | size -= nb2; |
179 | 0 | } |
180 | 0 | ofs2 += nb; |
181 | 0 | return nb / unitsize; |
182 | 0 | } |
183 | | |
184 | | BufferedIOWriter::BufferedIOWriter(IOWriter* writer, size_t bsz) |
185 | 0 | : writer(writer), bsz(bsz), ofs2(0), b0(0), buffer(bsz) {} |
186 | | |
187 | | size_t BufferedIOWriter::operator()( |
188 | | const void* ptr, |
189 | | size_t unitsize, |
190 | 0 | size_t nitems) { |
191 | 0 | size_t size = unitsize * nitems; |
192 | 0 | if (size == 0) |
193 | 0 | return 0; |
194 | 0 | const char* src = (const char*)ptr; |
195 | 0 | size_t nb; |
196 | |
|
197 | 0 | { // copy as many bytes as possible to buffer |
198 | 0 | nb = std::min(bsz - b0, size); |
199 | 0 | memcpy(buffer.data() + b0, src, nb); |
200 | 0 | b0 += nb; |
201 | 0 | src += nb; |
202 | 0 | size -= nb; |
203 | 0 | } |
204 | 0 | while (size > 0) { |
205 | 0 | assert(b0 == bsz); |
206 | | // now we need to flush to add more bytes |
207 | 0 | size_t ofs_2 = 0; |
208 | 0 | do { |
209 | 0 | assert(ofs_2 < 10000000); |
210 | 0 | size_t written = (*writer)(buffer.data() + ofs_2, 1, bsz - ofs_2); |
211 | 0 | FAISS_THROW_IF_NOT(written > 0); |
212 | 0 | ofs_2 += written; |
213 | 0 | } while (ofs_2 != bsz); |
214 | | |
215 | | // copy src to buffer |
216 | 0 | size_t nb1 = std::min(bsz, size); |
217 | 0 | memcpy(buffer.data(), src, nb1); |
218 | 0 | b0 = nb1; |
219 | 0 | nb += nb1; |
220 | 0 | src += nb1; |
221 | 0 | size -= nb1; |
222 | 0 | } |
223 | 0 | ofs2 += nb; |
224 | 0 | return nb / unitsize; |
225 | 0 | } |
226 | | |
227 | 0 | BufferedIOWriter::~BufferedIOWriter() { |
228 | 0 | size_t ofs_2 = 0; |
229 | 0 | while (ofs_2 != b0) { |
230 | | // printf("Destructor write %zd \n", b0 - ofs_2); |
231 | 0 | size_t written = (*writer)(buffer.data() + ofs_2, 1, b0 - ofs_2); |
232 | 0 | FAISS_THROW_IF_NOT(written > 0); |
233 | 0 | ofs_2 += written; |
234 | 0 | } |
235 | 0 | } |
236 | | |
237 | 1.58k | uint32_t fourcc(const char sx[4]) { |
238 | 1.58k | FAISS_THROW_IF_NOT(4 == strlen(sx)); |
239 | 1.58k | const unsigned char* x = (unsigned char*)sx; |
240 | 1.58k | return x[0] | x[1] << 8 | x[2] << 16 | x[3] << 24; |
241 | 1.58k | } |
242 | | |
243 | 0 | uint32_t fourcc(const std::string& sx) { |
244 | 0 | FAISS_THROW_IF_NOT(sx.length() == 4); |
245 | 0 | const unsigned char* x = (unsigned char*)sx.c_str(); |
246 | 0 | return x[0] | x[1] << 8 | x[2] << 16 | x[3] << 24; |
247 | 0 | } |
248 | | |
249 | 0 | void fourcc_inv(uint32_t x, char str[5]) { |
250 | 0 | *(uint32_t*)str = x; |
251 | 0 | str[4] = 0; |
252 | 0 | } |
253 | | |
254 | 0 | std::string fourcc_inv(uint32_t x) { |
255 | 0 | char str[5]; |
256 | 0 | fourcc_inv(x, str); |
257 | 0 | return std::string(str); |
258 | 0 | } |
259 | | |
260 | 0 | std::string fourcc_inv_printable(uint32_t x) { |
261 | 0 | char cstr[5]; |
262 | 0 | fourcc_inv(x, cstr); |
263 | 0 | std::string str = ""; |
264 | 0 | for (int i = 0; i < 4; i++) { |
265 | 0 | uint8_t c = cstr[i]; |
266 | 0 | if (32 <= c && c < 127) { |
267 | 0 | str += c; |
268 | 0 | } else { |
269 | 0 | char buf[10]; |
270 | 0 | snprintf(buf, sizeof(buf), "\\x%02x", c); |
271 | 0 | str += buf; |
272 | 0 | } |
273 | 0 | } |
274 | 0 | return str; |
275 | 0 | } |
276 | | |
277 | | } // namespace faiss |