/var/local/thirdparty/installed/include/roaring/roaring.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * An implementation of Roaring Bitmaps in C. |
3 | | */ |
4 | | |
5 | | #ifndef ROARING_H |
6 | | #define ROARING_H |
7 | | |
8 | | #include <stdbool.h> |
9 | | #include <stdint.h> |
10 | | #include <stddef.h> // for `size_t` |
11 | | |
12 | | #include <roaring/memory.h> |
13 | | #include <roaring/roaring_types.h> |
14 | | #include <roaring/roaring_version.h> |
15 | | #include <roaring/bitset/bitset.h> |
16 | | |
17 | | #ifdef __cplusplus |
18 | | extern "C" { namespace roaring { namespace api { |
19 | | #endif |
20 | | |
21 | | typedef struct roaring_bitmap_s { |
22 | | roaring_array_t high_low_container; |
23 | | } roaring_bitmap_t; |
24 | | |
25 | | /** |
26 | | * Dynamically allocates a new bitmap (initially empty). |
27 | | * Returns NULL if the allocation fails. |
28 | | * Capacity is a performance hint for how many "containers" the data will need. |
29 | | * Client is responsible for calling `roaring_bitmap_free()`. |
30 | | */ |
31 | | roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap); |
32 | | |
33 | | /** |
34 | | * Dynamically allocates a new bitmap (initially empty). |
35 | | * Returns NULL if the allocation fails. |
36 | | * Client is responsible for calling `roaring_bitmap_free()`. |
37 | | */ |
38 | | inline roaring_bitmap_t *roaring_bitmap_create(void) |
39 | 0 | { return roaring_bitmap_create_with_capacity(0); } |
40 | | |
41 | | /** |
42 | | * Initialize a roaring bitmap structure in memory controlled by client. |
43 | | * Capacity is a performance hint for how many "containers" the data will need. |
44 | | * Can return false if auxiliary allocations fail when capacity greater than 0. |
45 | | */ |
46 | | bool roaring_bitmap_init_with_capacity(roaring_bitmap_t *r, uint32_t cap); |
47 | | |
48 | | /** |
49 | | * Initialize a roaring bitmap structure in memory controlled by client. |
50 | | * The bitmap will be in a "clear" state, with no auxiliary allocations. |
51 | | * Since this performs no allocations, the function will not fail. |
52 | | */ |
53 | | inline void roaring_bitmap_init_cleared(roaring_bitmap_t *r) |
54 | 0 | { roaring_bitmap_init_with_capacity(r, 0); } |
55 | | |
56 | | /** |
57 | | * Add all the values between min (included) and max (excluded) that are at a |
58 | | * distance k*step from min. |
59 | | */ |
60 | | roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max, |
61 | | uint32_t step); |
62 | | |
63 | | /** |
64 | | * Creates a new bitmap from a pointer of uint32_t integers |
65 | | */ |
66 | | roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals); |
67 | | |
68 | | /* |
69 | | * Whether you want to use copy-on-write. |
70 | | * Saves memory and avoids copies, but needs more care in a threaded context. |
71 | | * Most users should ignore this flag. |
72 | | * |
73 | | * Note: If you do turn this flag to 'true', enabling COW, then ensure that you |
74 | | * do so for all of your bitmaps, since interactions between bitmaps with and |
75 | | * without COW is unsafe. |
76 | | */ |
77 | | inline bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t* r) { |
78 | | return r->high_low_container.flags & ROARING_FLAG_COW; |
79 | | } |
80 | | inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t* r, bool cow) { |
81 | | if (cow) { |
82 | | r->high_low_container.flags |= ROARING_FLAG_COW; |
83 | | } else { |
84 | | r->high_low_container.flags &= ~ROARING_FLAG_COW; |
85 | | } |
86 | | } |
87 | | |
88 | | roaring_bitmap_t *roaring_bitmap_add_offset(const roaring_bitmap_t *bm, |
89 | | int64_t offset); |
90 | | /** |
91 | | * Describe the inner structure of the bitmap. |
92 | | */ |
93 | | void roaring_bitmap_printf_describe(const roaring_bitmap_t *r); |
94 | | |
95 | | /** |
96 | | * Creates a new bitmap from a list of uint32_t integers |
97 | | */ |
98 | | roaring_bitmap_t *roaring_bitmap_of(size_t n, ...); |
99 | | |
100 | | /** |
101 | | * Copies a bitmap (this does memory allocation). |
102 | | * The caller is responsible for memory management. |
103 | | */ |
104 | | roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r); |
105 | | |
106 | | /** |
107 | | * Copies a bitmap from src to dest. It is assumed that the pointer dest |
108 | | * is to an already allocated bitmap. The content of the dest bitmap is |
109 | | * freed/deleted. |
110 | | * |
111 | | * It might be preferable and simpler to call roaring_bitmap_copy except |
112 | | * that roaring_bitmap_overwrite can save on memory allocations. |
113 | | * |
114 | | * Returns true if successful, or false if there was an error. On failure, |
115 | | * the dest bitmap is left in a valid, empty state (even if it was not empty before). |
116 | | */ |
117 | | bool roaring_bitmap_overwrite(roaring_bitmap_t *dest, |
118 | | const roaring_bitmap_t *src); |
119 | | |
120 | | /** |
121 | | * Print the content of the bitmap. |
122 | | */ |
123 | | void roaring_bitmap_printf(const roaring_bitmap_t *r); |
124 | | |
125 | | /** |
126 | | * Computes the intersection between two bitmaps and returns new bitmap. The |
127 | | * caller is responsible for memory management. |
128 | | * |
129 | | * Performance hint: if you are computing the intersection between several |
130 | | * bitmaps, two-by-two, it is best to start with the smallest bitmap. |
131 | | * You may also rely on roaring_bitmap_and_inplace to avoid creating |
132 | | * many temporary bitmaps. |
133 | | */ |
134 | | roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *r1, |
135 | | const roaring_bitmap_t *r2); |
136 | | |
137 | | /** |
138 | | * Computes the size of the intersection between two bitmaps. |
139 | | */ |
140 | | uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *r1, |
141 | | const roaring_bitmap_t *r2); |
142 | | |
143 | | /** |
144 | | * Check whether two bitmaps intersect. |
145 | | */ |
146 | | bool roaring_bitmap_intersect(const roaring_bitmap_t *r1, |
147 | | const roaring_bitmap_t *r2); |
148 | | |
149 | | /** |
150 | | * Check whether a bitmap and a closed range intersect. |
151 | | */ |
152 | | bool roaring_bitmap_intersect_with_range(const roaring_bitmap_t *bm, |
153 | | uint64_t x, uint64_t y); |
154 | | |
155 | | /** |
156 | | * Computes the Jaccard index between two bitmaps. (Also known as the Tanimoto |
157 | | * distance, or the Jaccard similarity coefficient) |
158 | | * |
159 | | * The Jaccard index is undefined if both bitmaps are empty. |
160 | | */ |
161 | | double roaring_bitmap_jaccard_index(const roaring_bitmap_t *r1, |
162 | | const roaring_bitmap_t *r2); |
163 | | |
164 | | /** |
165 | | * Computes the size of the union between two bitmaps. |
166 | | */ |
167 | | uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *r1, |
168 | | const roaring_bitmap_t *r2); |
169 | | |
170 | | /** |
171 | | * Computes the size of the difference (andnot) between two bitmaps. |
172 | | */ |
173 | | uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *r1, |
174 | | const roaring_bitmap_t *r2); |
175 | | |
176 | | /** |
177 | | * Computes the size of the symmetric difference (xor) between two bitmaps. |
178 | | */ |
179 | | uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *r1, |
180 | | const roaring_bitmap_t *r2); |
181 | | |
182 | | /** |
183 | | * Inplace version of `roaring_bitmap_and()`, modifies r1 |
184 | | * r1 == r2 is allowed. |
185 | | * |
186 | | * Performance hint: if you are computing the intersection between several |
187 | | * bitmaps, two-by-two, it is best to start with the smallest bitmap. |
188 | | */ |
189 | | void roaring_bitmap_and_inplace(roaring_bitmap_t *r1, |
190 | | const roaring_bitmap_t *r2); |
191 | | |
192 | | /** |
193 | | * Computes the union between two bitmaps and returns new bitmap. The caller is |
194 | | * responsible for memory management. |
195 | | */ |
196 | | roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *r1, |
197 | | const roaring_bitmap_t *r2); |
198 | | |
199 | | /** |
200 | | * Inplace version of `roaring_bitmap_or(), modifies r1. |
201 | | * TODO: decide whether r1 == r2 ok |
202 | | */ |
203 | | void roaring_bitmap_or_inplace(roaring_bitmap_t *r1, |
204 | | const roaring_bitmap_t *r2); |
205 | | |
206 | | /** |
207 | | * Compute the union of 'number' bitmaps. |
208 | | * Caller is responsible for freeing the result. |
209 | | * See also `roaring_bitmap_or_many_heap()` |
210 | | */ |
211 | | roaring_bitmap_t *roaring_bitmap_or_many(size_t number, |
212 | | const roaring_bitmap_t **rs); |
213 | | |
214 | | /** |
215 | | * Compute the union of 'number' bitmaps using a heap. This can sometimes be |
216 | | * faster than `roaring_bitmap_or_many() which uses a naive algorithm. |
217 | | * Caller is responsible for freeing the result. |
218 | | */ |
219 | | roaring_bitmap_t *roaring_bitmap_or_many_heap(uint32_t number, |
220 | | const roaring_bitmap_t **rs); |
221 | | |
222 | | /** |
223 | | * Computes the symmetric difference (xor) between two bitmaps |
224 | | * and returns new bitmap. The caller is responsible for memory management. |
225 | | */ |
226 | | roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *r1, |
227 | | const roaring_bitmap_t *r2); |
228 | | |
229 | | /** |
230 | | * Inplace version of roaring_bitmap_xor, modifies r1, r1 != r2. |
231 | | */ |
232 | | void roaring_bitmap_xor_inplace(roaring_bitmap_t *r1, |
233 | | const roaring_bitmap_t *r2); |
234 | | |
235 | | /** |
236 | | * Compute the xor of 'number' bitmaps. |
237 | | * Caller is responsible for freeing the result. |
238 | | */ |
239 | | roaring_bitmap_t *roaring_bitmap_xor_many(size_t number, |
240 | | const roaring_bitmap_t **rs); |
241 | | |
242 | | /** |
243 | | * Computes the difference (andnot) between two bitmaps and returns new bitmap. |
244 | | * Caller is responsible for freeing the result. |
245 | | */ |
246 | | roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *r1, |
247 | | const roaring_bitmap_t *r2); |
248 | | |
249 | | /** |
250 | | * Inplace version of roaring_bitmap_andnot, modifies r1, r1 != r2. |
251 | | */ |
252 | | void roaring_bitmap_andnot_inplace(roaring_bitmap_t *r1, |
253 | | const roaring_bitmap_t *r2); |
254 | | |
255 | | /** |
256 | | * TODO: consider implementing: |
257 | | * |
258 | | * "Compute the xor of 'number' bitmaps using a heap. This can sometimes be |
259 | | * faster than roaring_bitmap_xor_many which uses a naive algorithm. Caller is |
260 | | * responsible for freeing the result."" |
261 | | * |
262 | | * roaring_bitmap_t *roaring_bitmap_xor_many_heap(uint32_t number, |
263 | | * const roaring_bitmap_t **rs); |
264 | | */ |
265 | | |
266 | | /** |
267 | | * Frees the memory. |
268 | | */ |
269 | | void roaring_bitmap_free(const roaring_bitmap_t *r); |
270 | | |
271 | | /** |
272 | | * A bit of context usable with `roaring_bitmap_*_bulk()` functions |
273 | | * |
274 | | * Should be initialized with `{0}` (or `memset()` to all zeros). |
275 | | * Callers should treat it as an opaque type. |
276 | | * |
277 | | * A context may only be used with a single bitmap |
278 | | * (unless re-initialized to zero), and any modification to a bitmap |
279 | | * (other than modifications performed with `_bulk()` functions with the context |
280 | | * passed) will invalidate any contexts associated with that bitmap. |
281 | | */ |
282 | | typedef struct roaring_bulk_context_s { |
283 | | ROARING_CONTAINER_T *container; |
284 | | int idx; |
285 | | uint16_t key; |
286 | | uint8_t typecode; |
287 | | } roaring_bulk_context_t; |
288 | | |
289 | | /** |
290 | | * Add an item, using context from a previous insert for speed optimization. |
291 | | * |
292 | | * `context` will be used to store information between calls to make bulk |
293 | | * operations faster. `*context` should be zero-initialized before the first |
294 | | * call to this function. |
295 | | * |
296 | | * Modifying the bitmap in any way (other than `-bulk` suffixed functions) |
297 | | * will invalidate the stored context, calling this function with a non-zero |
298 | | * context after doing any modification invokes undefined behavior. |
299 | | * |
300 | | * In order to exploit this optimization, the caller should call this function |
301 | | * with values with the same "key" (high 16 bits of the value) consecutively. |
302 | | */ |
303 | | void roaring_bitmap_add_bulk(roaring_bitmap_t *r, |
304 | | roaring_bulk_context_t *context, uint32_t val); |
305 | | |
306 | | /** |
307 | | * Add value n_args from pointer vals, faster than repeatedly calling |
308 | | * `roaring_bitmap_add()` |
309 | | * |
310 | | * In order to exploit this optimization, the caller should attempt to keep |
311 | | * values with the same "key" (high 16 bits of the value) as consecutive |
312 | | * elements in `vals` |
313 | | */ |
314 | | void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args, |
315 | | const uint32_t *vals); |
316 | | |
317 | | /** |
318 | | * Add value x |
319 | | */ |
320 | | void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t x); |
321 | | |
322 | | /** |
323 | | * Add value x |
324 | | * Returns true if a new value was added, false if the value already existed. |
325 | | */ |
326 | | bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t x); |
327 | | |
328 | | /** |
329 | | * Add all values in range [min, max] |
330 | | */ |
331 | | void roaring_bitmap_add_range_closed(roaring_bitmap_t *r, |
332 | | uint32_t min, uint32_t max); |
333 | | |
334 | | /** |
335 | | * Add all values in range [min, max) |
336 | | */ |
337 | | inline void roaring_bitmap_add_range(roaring_bitmap_t *r, |
338 | | uint64_t min, uint64_t max) { |
339 | | if(max <= min) return; |
340 | | roaring_bitmap_add_range_closed(r, (uint32_t)min, (uint32_t)(max - 1)); |
341 | | } |
342 | | |
343 | | /** |
344 | | * Remove value x |
345 | | */ |
346 | | void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t x); |
347 | | |
348 | | /** |
349 | | * Remove all values in range [min, max] |
350 | | */ |
351 | | void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r, |
352 | | uint32_t min, uint32_t max); |
353 | | |
354 | | /** |
355 | | * Remove all values in range [min, max) |
356 | | */ |
357 | | inline void roaring_bitmap_remove_range(roaring_bitmap_t *r, |
358 | 0 | uint64_t min, uint64_t max) { |
359 | 0 | if(max <= min) return; |
360 | 0 | roaring_bitmap_remove_range_closed(r, (uint32_t)min, (uint32_t)(max - 1)); |
361 | 0 | } |
362 | | |
363 | | /** |
364 | | * Remove multiple values |
365 | | */ |
366 | | void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args, |
367 | | const uint32_t *vals); |
368 | | |
369 | | /** |
370 | | * Remove value x |
371 | | * Returns true if a new value was removed, false if the value was not existing. |
372 | | */ |
373 | | bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t x); |
374 | | |
375 | | /** |
376 | | * Check if value is present |
377 | | */ |
378 | | bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val); |
379 | | |
380 | | /** |
381 | | * Check whether a range of values from range_start (included) |
382 | | * to range_end (excluded) is present |
383 | | */ |
384 | | bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, |
385 | | uint64_t range_start, |
386 | | uint64_t range_end); |
387 | | |
388 | | /** |
389 | | * Check if an items is present, using context from a previous insert or search |
390 | | * for speed optimization. |
391 | | * |
392 | | * `context` will be used to store information between calls to make bulk |
393 | | * operations faster. `*context` should be zero-initialized before the first |
394 | | * call to this function. |
395 | | * |
396 | | * Modifying the bitmap in any way (other than `-bulk` suffixed functions) |
397 | | * will invalidate the stored context, calling this function with a non-zero |
398 | | * context after doing any modification invokes undefined behavior. |
399 | | * |
400 | | * In order to exploit this optimization, the caller should call this function |
401 | | * with values with the same "key" (high 16 bits of the value) consecutively. |
402 | | */ |
403 | | bool roaring_bitmap_contains_bulk(const roaring_bitmap_t *r, |
404 | | roaring_bulk_context_t *context, |
405 | | uint32_t val); |
406 | | |
407 | | /** |
408 | | * Get the cardinality of the bitmap (number of elements). |
409 | | */ |
410 | | uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *r); |
411 | | |
412 | | /** |
413 | | * Returns the number of elements in the range [range_start, range_end). |
414 | | */ |
415 | | uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *r, |
416 | | uint64_t range_start, |
417 | | uint64_t range_end); |
418 | | |
419 | | /** |
420 | | * Returns true if the bitmap is empty (cardinality is zero). |
421 | | */ |
422 | | bool roaring_bitmap_is_empty(const roaring_bitmap_t *r); |
423 | | |
424 | | |
425 | | /** |
426 | | * Empties the bitmap. It will have no auxiliary allocations (so if the bitmap |
427 | | * was initialized in client memory via roaring_bitmap_init(), then a call to |
428 | | * roaring_bitmap_clear() would be enough to "free" it) |
429 | | */ |
430 | | void roaring_bitmap_clear(roaring_bitmap_t *r); |
431 | | |
432 | | /** |
433 | | * Convert the bitmap to a sorted array, output in `ans`. |
434 | | * |
435 | | * Caller is responsible to ensure that there is enough memory allocated, e.g. |
436 | | * |
437 | | * ans = malloc(roaring_bitmap_get_cardinality(bitmap) * sizeof(uint32_t)); |
438 | | */ |
439 | | void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans); |
440 | | |
441 | | /** |
442 | | * Store the bitmap to a bitset. This can be useful for people |
443 | | * who need the performance and simplicity of a standard bitset. |
444 | | * We assume that the input bitset is originally empty (does not |
445 | | * have any set bit). |
446 | | * |
447 | | * bitset_t * out = bitset_create(); |
448 | | * // if the bitset has content in it, call "bitset_clear(out)" |
449 | | * bool success = roaring_bitmap_to_bitset(mybitmap, out); |
450 | | * // on failure, success will be false. |
451 | | * // You can then query the bitset: |
452 | | * bool is_present = bitset_get(out, 10011 ); |
453 | | * // you must free the memory: |
454 | | * bitset_free(out); |
455 | | * |
456 | | */ |
457 | | bool roaring_bitmap_to_bitset(const roaring_bitmap_t *r, bitset_t * bitset); |
458 | | |
459 | | /** |
460 | | * Convert the bitmap to a sorted array from `offset` by `limit`, output in `ans`. |
461 | | * |
462 | | * Caller is responsible to ensure that there is enough memory allocated, e.g. |
463 | | * |
464 | | * ans = malloc(roaring_bitmap_get_cardinality(limit) * sizeof(uint32_t)); |
465 | | * |
466 | | * Return false in case of failure (e.g., insufficient memory) |
467 | | */ |
468 | | bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *r, |
469 | | size_t offset, size_t limit, |
470 | | uint32_t *ans); |
471 | | |
472 | | /** |
473 | | * Remove run-length encoding even when it is more space efficient. |
474 | | * Return whether a change was applied. |
475 | | */ |
476 | | bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r); |
477 | | |
478 | | /** |
479 | | * Convert array and bitmap containers to run containers when it is more |
480 | | * efficient; also convert from run containers when more space efficient. |
481 | | * |
482 | | * Returns true if the result has at least one run container. |
483 | | * Additional savings might be possible by calling `shrinkToFit()`. |
484 | | */ |
485 | | bool roaring_bitmap_run_optimize(roaring_bitmap_t *r); |
486 | | |
487 | | /** |
488 | | * If needed, reallocate memory to shrink the memory usage. |
489 | | * Returns the number of bytes saved. |
490 | | */ |
491 | | size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r); |
492 | | |
493 | | /** |
494 | | * Write the bitmap to an output pointer, this output buffer should refer to |
495 | | * at least `roaring_bitmap_size_in_bytes(r)` allocated bytes. |
496 | | * |
497 | | * See `roaring_bitmap_portable_serialize()` if you want a format that's |
498 | | * compatible with Java and Go implementations. This format can sometimes be |
499 | | * more space efficient than the portable form, e.g. when the data is sparse. |
500 | | * |
501 | | * Returns how many bytes written, should be `roaring_bitmap_size_in_bytes(r)`. |
502 | | * |
503 | | * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), |
504 | | * the data format is going to be big-endian and not compatible with little-endian systems. |
505 | | */ |
506 | | size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf); |
507 | | |
508 | | /** |
509 | | * Use with `roaring_bitmap_serialize()`. |
510 | | * |
511 | | * (See `roaring_bitmap_portable_deserialize()` if you want a format that's |
512 | | * compatible with Java and Go implementations). |
513 | | * |
514 | | * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), |
515 | | * the data format is going to be big-endian and not compatible with little-endian systems. |
516 | | */ |
517 | | roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf); |
518 | | |
519 | | /** |
520 | | * Use with `roaring_bitmap_serialize()`. |
521 | | * |
522 | | * (See `roaring_bitmap_portable_deserialize_safe()` if you want a format that's |
523 | | * compatible with Java and Go implementations). |
524 | | * |
525 | | * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), |
526 | | * the data format is going to be big-endian and not compatible with little-endian systems. |
527 | | * |
528 | | * The difference with `roaring_bitmap_deserialize()` is that this function checks that the input buffer |
529 | | * is a valid bitmap. If the buffer is too small, NULL is returned. |
530 | | */ |
531 | | roaring_bitmap_t *roaring_bitmap_deserialize_safe(const void *buf, size_t maxbytes); |
532 | | |
533 | | /** |
534 | | * How many bytes are required to serialize this bitmap (NOT compatible |
535 | | * with Java and Go versions) |
536 | | */ |
537 | | size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *r); |
538 | | |
539 | | /** |
540 | | * Read bitmap from a serialized buffer. |
541 | | * In case of failure, NULL is returned. |
542 | | * |
543 | | * This function is unsafe in the sense that if there is no valid serialized |
544 | | * bitmap at the pointer, then many bytes could be read, possibly causing a |
545 | | * buffer overflow. See also roaring_bitmap_portable_deserialize_safe(). |
546 | | * |
547 | | * This is meant to be compatible with the Java and Go versions: |
548 | | * https://github.com/RoaringBitmap/RoaringFormatSpec |
549 | | * |
550 | | * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), |
551 | | * the data format is going to be big-endian and not compatible with little-endian systems. |
552 | | */ |
553 | | roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf); |
554 | | |
555 | | /** |
556 | | * Read bitmap from a serialized buffer safely (reading up to maxbytes). |
557 | | * In case of failure, NULL is returned. |
558 | | * |
559 | | * This is meant to be compatible with the Java and Go versions: |
560 | | * https://github.com/RoaringBitmap/RoaringFormatSpec |
561 | | * |
562 | | * The function itself is safe in the sense that it will not cause buffer overflows. |
563 | | * However, for correct operations, it is assumed that the bitmap read was once |
564 | | * serialized from a valid bitmap (i.e., it follows the format specification). |
565 | | * If you provided an incorrect input (garbage), then the bitmap read may not be in |
566 | | * a valid state and following operations may not lead to sensible results. |
567 | | * In particular, the serialized array containers need to be in sorted order, and the |
568 | | * run containers should be in sorted non-overlapping order. This is is guaranteed to |
569 | | * happen when serializing an existing bitmap, but not for random inputs. |
570 | | * |
571 | | * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), |
572 | | * the data format is going to be big-endian and not compatible with little-endian systems. |
573 | | */ |
574 | | roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, |
575 | | size_t maxbytes); |
576 | | |
577 | | /** |
578 | | * Read bitmap from a serialized buffer. |
579 | | * In case of failure, NULL is returned. |
580 | | * |
581 | | * Bitmap returned by this function can be used in all readonly contexts. |
582 | | * Bitmap must be freed as usual, by calling roaring_bitmap_free(). |
583 | | * Underlying buffer must not be freed or modified while it backs any bitmaps. |
584 | | * |
585 | | * The function is unsafe in the following ways: |
586 | | * 1) It may execute unaligned memory accesses. |
587 | | * 2) A buffer overflow may occur if buf does not point to a valid serialized |
588 | | * bitmap. |
589 | | * |
590 | | * This is meant to be compatible with the Java and Go versions: |
591 | | * https://github.com/RoaringBitmap/RoaringFormatSpec |
592 | | * |
593 | | * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), |
594 | | * the data format is going to be big-endian and not compatible with little-endian systems. |
595 | | */ |
596 | | roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf); |
597 | | |
598 | | /** |
599 | | * Check how many bytes would be read (up to maxbytes) at this pointer if there |
600 | | * is a bitmap, returns zero if there is no valid bitmap. |
601 | | * |
602 | | * This is meant to be compatible with the Java and Go versions: |
603 | | * https://github.com/RoaringBitmap/RoaringFormatSpec |
604 | | */ |
605 | | size_t roaring_bitmap_portable_deserialize_size(const char *buf, |
606 | | size_t maxbytes); |
607 | | |
608 | | /** |
609 | | * How many bytes are required to serialize this bitmap. |
610 | | * |
611 | | * This is meant to be compatible with the Java and Go versions: |
612 | | * https://github.com/RoaringBitmap/RoaringFormatSpec |
613 | | */ |
614 | | size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r); |
615 | | |
616 | | /** |
617 | | * Write a bitmap to a char buffer. The output buffer should refer to at least |
618 | | * `roaring_bitmap_portable_size_in_bytes(r)` bytes of allocated memory. |
619 | | * |
620 | | * Returns how many bytes were written which should match |
621 | | * `roaring_bitmap_portable_size_in_bytes(r)`. |
622 | | * |
623 | | * This is meant to be compatible with the Java and Go versions: |
624 | | * https://github.com/RoaringBitmap/RoaringFormatSpec |
625 | | * |
626 | | * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), |
627 | | * the data format is going to be big-endian and not compatible with little-endian systems. |
628 | | */ |
629 | | size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, char *buf); |
630 | | |
631 | | /* |
632 | | * "Frozen" serialization format imitates memory layout of roaring_bitmap_t. |
633 | | * Deserialized bitmap is a constant view of the underlying buffer. |
634 | | * This significantly reduces amount of allocations and copying required during |
635 | | * deserialization. |
636 | | * It can be used with memory mapped files. |
637 | | * Example can be found in benchmarks/frozen_benchmark.c |
638 | | * |
639 | | * [#####] const roaring_bitmap_t * |
640 | | * | | | |
641 | | * +----+ | +-+ |
642 | | * | | | |
643 | | * [#####################################] underlying buffer |
644 | | * |
645 | | * Note that because frozen serialization format imitates C memory layout |
646 | | * of roaring_bitmap_t, it is not fixed. It is different on big/little endian |
647 | | * platforms and can be changed in future. |
648 | | */ |
649 | | |
650 | | /** |
651 | | * Returns number of bytes required to serialize bitmap using frozen format. |
652 | | */ |
653 | | size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *r); |
654 | | |
655 | | /** |
656 | | * Serializes bitmap using frozen format. |
657 | | * Buffer size must be at least roaring_bitmap_frozen_size_in_bytes(). |
658 | | * |
659 | | * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), |
660 | | * the data format is going to be big-endian and not compatible with little-endian systems. |
661 | | */ |
662 | | void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *r, char *buf); |
663 | | |
664 | | /** |
665 | | * Creates constant bitmap that is a view of a given buffer. |
666 | | * Buffer data should have been written by `roaring_bitmap_frozen_serialize()` |
667 | | * Its beginning must also be aligned by 32 bytes. |
668 | | * Length must be equal exactly to `roaring_bitmap_frozen_size_in_bytes()`. |
669 | | * In case of failure, NULL is returned. |
670 | | * |
671 | | * Bitmap returned by this function can be used in all readonly contexts. |
672 | | * Bitmap must be freed as usual, by calling roaring_bitmap_free(). |
673 | | * Underlying buffer must not be freed or modified while it backs any bitmaps. |
674 | | * |
675 | | * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), |
676 | | * the data format is going to be big-endian and not compatible with little-endian systems. |
677 | | */ |
678 | | const roaring_bitmap_t *roaring_bitmap_frozen_view(const char *buf, |
679 | | size_t length); |
680 | | |
681 | | /** |
682 | | * Iterate over the bitmap elements. The function iterator is called once for |
683 | | * all the values with ptr (can be NULL) as the second parameter of each call. |
684 | | * |
685 | | * `roaring_iterator` is simply a pointer to a function that returns bool |
686 | | * (true means that the iteration should continue while false means that it |
687 | | * should stop), and takes (uint32_t,void*) as inputs. |
688 | | * |
689 | | * Returns true if the roaring_iterator returned true throughout (so that all |
690 | | * data points were necessarily visited). |
691 | | * |
692 | | * Iteration is ordered: from the smallest to the largest elements. |
693 | | */ |
694 | | bool roaring_iterate(const roaring_bitmap_t *r, roaring_iterator iterator, |
695 | | void *ptr); |
696 | | |
697 | | bool roaring_iterate64(const roaring_bitmap_t *r, roaring_iterator64 iterator, |
698 | | uint64_t high_bits, void *ptr); |
699 | | |
700 | | /** |
701 | | * Return true if the two bitmaps contain the same elements. |
702 | | */ |
703 | | bool roaring_bitmap_equals(const roaring_bitmap_t *r1, |
704 | | const roaring_bitmap_t *r2); |
705 | | |
706 | | /** |
707 | | * Return true if all the elements of r1 are also in r2. |
708 | | */ |
709 | | bool roaring_bitmap_is_subset(const roaring_bitmap_t *r1, |
710 | | const roaring_bitmap_t *r2); |
711 | | |
712 | | /** |
713 | | * Return true if all the elements of r1 are also in r2, and r2 is strictly |
714 | | * greater than r1. |
715 | | */ |
716 | | bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *r1, |
717 | | const roaring_bitmap_t *r2); |
718 | | |
719 | | /** |
720 | | * (For expert users who seek high performance.) |
721 | | * |
722 | | * Computes the union between two bitmaps and returns new bitmap. The caller is |
723 | | * responsible for memory management. |
724 | | * |
725 | | * The lazy version defers some computations such as the maintenance of the |
726 | | * cardinality counts. Thus you must call `roaring_bitmap_repair_after_lazy()` |
727 | | * after executing "lazy" computations. |
728 | | * |
729 | | * It is safe to repeatedly call roaring_bitmap_lazy_or_inplace on the result. |
730 | | * |
731 | | * `bitsetconversion` is a flag which determines whether container-container |
732 | | * operations force a bitset conversion. |
733 | | */ |
734 | | roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *r1, |
735 | | const roaring_bitmap_t *r2, |
736 | | const bool bitsetconversion); |
737 | | |
738 | | /** |
739 | | * (For expert users who seek high performance.) |
740 | | * |
741 | | * Inplace version of roaring_bitmap_lazy_or, modifies r1. |
742 | | * |
743 | | * `bitsetconversion` is a flag which determines whether container-container |
744 | | * operations force a bitset conversion. |
745 | | */ |
746 | | void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *r1, |
747 | | const roaring_bitmap_t *r2, |
748 | | const bool bitsetconversion); |
749 | | |
750 | | /** |
751 | | * (For expert users who seek high performance.) |
752 | | * |
753 | | * Execute maintenance on a bitmap created from `roaring_bitmap_lazy_or()` |
754 | | * or modified with `roaring_bitmap_lazy_or_inplace()`. |
755 | | */ |
756 | | void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *r1); |
757 | | |
758 | | /** |
759 | | * Computes the symmetric difference between two bitmaps and returns new bitmap. |
760 | | * The caller is responsible for memory management. |
761 | | * |
762 | | * The lazy version defers some computations such as the maintenance of the |
763 | | * cardinality counts. Thus you must call `roaring_bitmap_repair_after_lazy()` |
764 | | * after executing "lazy" computations. |
765 | | * |
766 | | * It is safe to repeatedly call `roaring_bitmap_lazy_xor_inplace()` on |
767 | | * the result. |
768 | | */ |
769 | | roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *r1, |
770 | | const roaring_bitmap_t *r2); |
771 | | |
772 | | /** |
773 | | * (For expert users who seek high performance.) |
774 | | * |
775 | | * Inplace version of roaring_bitmap_lazy_xor, modifies r1. r1 != r2 |
776 | | */ |
777 | | void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *r1, |
778 | | const roaring_bitmap_t *r2); |
779 | | |
780 | | /** |
781 | | * Compute the negation of the bitmap in the interval [range_start, range_end). |
782 | | * The number of negated values is range_end - range_start. |
783 | | * Areas outside the range are passed through unchanged. |
784 | | */ |
785 | | roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *r1, |
786 | | uint64_t range_start, uint64_t range_end); |
787 | | |
788 | | /** |
789 | | * compute (in place) the negation of the roaring bitmap within a specified |
790 | | * interval: [range_start, range_end). The number of negated values is |
791 | | * range_end - range_start. |
792 | | * Areas outside the range are passed through unchanged. |
793 | | */ |
794 | | void roaring_bitmap_flip_inplace(roaring_bitmap_t *r1, uint64_t range_start, |
795 | | uint64_t range_end); |
796 | | |
797 | | /** |
798 | | * Selects the element at index 'rank' where the smallest element is at index 0. |
799 | | * If the size of the roaring bitmap is strictly greater than rank, then this |
800 | | * function returns true and sets element to the element of given rank. |
801 | | * Otherwise, it returns false. |
802 | | */ |
803 | | bool roaring_bitmap_select(const roaring_bitmap_t *r, uint32_t rank, |
804 | | uint32_t *element); |
805 | | |
806 | | /** |
807 | | * roaring_bitmap_rank returns the number of integers that are smaller or equal |
808 | | * to x. Thus if x is the first element, this function will return 1. If |
809 | | * x is smaller than the smallest element, this function will return 0. |
810 | | * |
811 | | * The indexing convention differs between roaring_bitmap_select and |
812 | | * roaring_bitmap_rank: roaring_bitmap_select refers to the smallest value |
813 | | * as having index 0, whereas roaring_bitmap_rank returns 1 when ranking |
814 | | * the smallest value. |
815 | | */ |
816 | | uint64_t roaring_bitmap_rank(const roaring_bitmap_t *r, uint32_t x); |
817 | | |
818 | | /** |
819 | | * roaring_bitmap_rank_many is an `Bulk` version of `roaring_bitmap_rank` |
820 | | * it puts rank value of each element in `[begin .. end)` to `ans[]` |
821 | | * |
822 | | * the values in `[begin .. end)` must be sorted in Ascending order; |
823 | | * Caller is responsible to ensure that there is enough memory allocated, e.g. |
824 | | * |
825 | | * ans = malloc((end-begin) * sizeof(uint64_t)); |
826 | | */ |
827 | | void roaring_bitmap_rank_many(const roaring_bitmap_t *r, const uint32_t* begin, const uint32_t* end, uint64_t* ans); |
828 | | |
829 | | /** |
830 | | * Returns the index of x in the given roaring bitmap. |
831 | | * If the roaring bitmap doesn't contain x , this function will return -1. |
832 | | * The difference with rank function is that this function will return -1 when x |
833 | | * is not the element of roaring bitmap, but the rank function will return a |
834 | | * non-negative number. |
835 | | */ |
836 | | int64_t roaring_bitmap_get_index(const roaring_bitmap_t *r, uint32_t x); |
837 | | |
838 | | /** |
839 | | * Returns the smallest value in the set, or UINT32_MAX if the set is empty. |
840 | | */ |
841 | | uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *r); |
842 | | |
843 | | /** |
844 | | * Returns the greatest value in the set, or 0 if the set is empty. |
845 | | */ |
846 | | uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *r); |
847 | | |
848 | | /** |
849 | | * (For advanced users.) |
850 | | * |
851 | | * Collect statistics about the bitmap, see roaring_types.h for |
852 | | * a description of roaring_statistics_t |
853 | | */ |
854 | | void roaring_bitmap_statistics(const roaring_bitmap_t *r, |
855 | | roaring_statistics_t *stat); |
856 | | |
857 | | /** |
858 | | * Perform internal consistency checks. Returns true if the bitmap is consistent. |
859 | | * |
860 | | * Note that some operations intentionally leave bitmaps in an inconsistent state temporarily, |
861 | | * for example, `roaring_bitmap_lazy_*` functions, until `roaring_bitmap_repair_after_lazy` is called. |
862 | | * |
863 | | * If reason is non-null, it will be set to a string describing the first inconsistency found if any. |
864 | | */ |
865 | | bool roaring_bitmap_internal_validate(const roaring_bitmap_t *r, const char **reason); |
866 | | |
867 | | /********************* |
868 | | * What follows is code use to iterate through values in a roaring bitmap |
869 | | |
870 | | roaring_bitmap_t *r =... |
871 | | roaring_uint32_iterator_t i; |
872 | | roaring_create_iterator(r, &i); |
873 | | while(i.has_value) { |
874 | | printf("value = %d\n", i.current_value); |
875 | | roaring_advance_uint32_iterator(&i); |
876 | | } |
877 | | |
878 | | Obviously, if you modify the underlying bitmap, the iterator |
879 | | becomes invalid. So don't. |
880 | | */ |
881 | | |
882 | | typedef struct roaring_uint32_iterator_s { |
883 | | const roaring_bitmap_t *parent; // owner |
884 | | int32_t container_index; // point to the current container index |
885 | | int32_t in_container_index; // for bitset and array container, this is out |
886 | | // index |
887 | | int32_t run_index; // for run container, this points at the run |
888 | | |
889 | | uint32_t current_value; |
890 | | bool has_value; |
891 | | |
892 | | const ROARING_CONTAINER_T |
893 | | *container; // should be: |
894 | | // parent->high_low_container.containers[container_index]; |
895 | | uint8_t typecode; // should be: |
896 | | // parent->high_low_container.typecodes[container_index]; |
897 | | uint32_t highbits; // should be: |
898 | | // parent->high_low_container.keys[container_index]) << |
899 | | // 16; |
900 | | |
901 | | } roaring_uint32_iterator_t; |
902 | | |
903 | | /** |
904 | | * Initialize an iterator object that can be used to iterate through the |
905 | | * values. If there is a value, then this iterator points to the first value |
906 | | * and `it->has_value` is true. The value is in `it->current_value`. |
907 | | */ |
908 | | void roaring_init_iterator(const roaring_bitmap_t *r, |
909 | | roaring_uint32_iterator_t *newit); |
910 | | |
911 | | /** |
912 | | * Initialize an iterator object that can be used to iterate through the |
913 | | * values. If there is a value, then this iterator points to the last value |
914 | | * and `it->has_value` is true. The value is in `it->current_value`. |
915 | | */ |
916 | | void roaring_init_iterator_last(const roaring_bitmap_t *r, |
917 | | roaring_uint32_iterator_t *newit); |
918 | | |
919 | | /** |
920 | | * Create an iterator object that can be used to iterate through the values. |
921 | | * Caller is responsible for calling `roaring_free_iterator()`. |
922 | | * |
923 | | * The iterator is initialized (this function calls `roaring_init_iterator()`) |
924 | | * If there is a value, then this iterator points to the first value and |
925 | | * `it->has_value` is true. The value is in `it->current_value`. |
926 | | */ |
927 | | roaring_uint32_iterator_t *roaring_create_iterator(const roaring_bitmap_t *r); |
928 | | |
929 | | /** |
930 | | * Advance the iterator. If there is a new value, then `it->has_value` is true. |
931 | | * The new value is in `it->current_value`. Values are traversed in increasing |
932 | | * orders. For convenience, returns `it->has_value`. |
933 | | */ |
934 | | bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it); |
935 | | |
936 | | /** |
937 | | * Decrement the iterator. If there's a new value, then `it->has_value` is true. |
938 | | * The new value is in `it->current_value`. Values are traversed in decreasing |
939 | | * order. For convenience, returns `it->has_value`. |
940 | | */ |
941 | | bool roaring_previous_uint32_iterator(roaring_uint32_iterator_t *it); |
942 | | |
943 | | /** |
944 | | * Move the iterator to the first value >= `val`. If there is a such a value, |
945 | | * then `it->has_value` is true. The new value is in `it->current_value`. |
946 | | * For convenience, returns `it->has_value`. |
947 | | */ |
948 | | bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it, |
949 | | uint32_t val); |
950 | | |
951 | | /** |
952 | | * Creates a copy of an iterator. |
953 | | * Caller must free it. |
954 | | */ |
955 | | roaring_uint32_iterator_t *roaring_copy_uint32_iterator( |
956 | | const roaring_uint32_iterator_t *it); |
957 | | |
958 | | /** |
959 | | * Free memory following `roaring_create_iterator()` |
960 | | */ |
961 | | void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it); |
962 | | |
963 | | /* |
964 | | * Reads next ${count} values from iterator into user-supplied ${buf}. |
965 | | * Returns the number of read elements. |
966 | | * This number can be smaller than ${count}, which means that iterator is drained. |
967 | | * |
968 | | * This function satisfies semantics of iteration and can be used together with |
969 | | * other iterator functions. |
970 | | * - first value is copied from ${it}->current_value |
971 | | * - after function returns, iterator is positioned at the next element |
972 | | */ |
973 | | uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it, |
974 | | uint32_t* buf, uint32_t count); |
975 | | |
976 | | #ifdef __cplusplus |
977 | | } } } // extern "C" { namespace roaring { namespace api { |
978 | | #endif |
979 | | |
980 | | #endif /* ROARING_H */ |
981 | | |
982 | | #ifdef __cplusplus |
983 | | /** |
984 | | * Best practices for C++ headers is to avoid polluting global scope. |
985 | | * But for C compatibility when just `roaring.h` is included building as |
986 | | * C++, default to global access for the C public API. |
987 | | * |
988 | | * BUT when `roaring.hh` is included instead, it sets this flag. That way |
989 | | * explicit namespacing must be used to get the C functions. |
990 | | * |
991 | | * This is outside the include guard so that if you include BOTH headers, |
992 | | * the order won't matter; you still get the global definitions. |
993 | | */ |
994 | | #if !defined(ROARING_API_NOT_IN_GLOBAL_NAMESPACE) |
995 | | using namespace ::roaring::api; |
996 | | #endif |
997 | | #endif |