Skip to content

Commit

Permalink
Improves the documentation of roaring_bitmap_internal_validate (#510)
Browse files Browse the repository at this point in the history
  • Loading branch information
lemire authored Sep 26, 2023
1 parent a103d38 commit 8dc3881
Show file tree
Hide file tree
Showing 8 changed files with 620 additions and 553 deletions.
21 changes: 18 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -325,12 +325,15 @@ int main() {
uint32_t expectedsize = roaring_bitmap_portable_size_in_bytes(r1);
char *serializedbytes = malloc(expectedsize);
roaring_bitmap_portable_serialize(r1, serializedbytes);
// For additional safety, you may replace roaring_bitmap_portable_deserialize by
// roaring_bitmap_portable_deserialize_safe.
// Note: it is expected that the input follows the specification
// https://github.com/RoaringBitmap/RoaringFormatSpec
// otherwise the result may be unusable.
roaring_bitmap_t *t = roaring_bitmap_portable_deserialize(serializedbytes);
roaring_bitmap_t *t = roaring_bitmap_portable_deserialize_safe(serializedbytes, expectedsize);
if(t == NULL) { return EXIT_FAILURE; }
const char *reason = NULL;
if (!roaring_bitmap_internal_validate(t, &reason)) {
return EXIT_FAILURE;
}
assert(roaring_bitmap_equals(r1, t)); // what we recover is equal
roaring_bitmap_free(t);
// we can also check whether there is a bitmap at a memory location without
Expand All @@ -341,6 +344,18 @@ int main() {
expectedsize); // sizeofbitmap would be zero if no bitmap were found
// we can also read the bitmap "safely" by specifying a byte size limit:
t = roaring_bitmap_portable_deserialize_safe(serializedbytes, expectedsize);
if(t == NULL) {
printf("Problem during deserialization.\n");
// We could clear any memory and close any file here.
return EXIT_FAILURE;
}
// We can validate the bitmap we recovered to make sure it is proper.
const char *reason_failure = NULL;
if (!roaring_bitmap_internal_validate(t, &reason_failure)) {
printf("safely deserialized invalid bitmap: %s\n", reason_failure);
// We could clear any memory and close any file here.
return EXIT_FAILURE;
}
// It is still necessary for the content of seriallizedbytes to follow
// the standard: https://github.com/RoaringBitmap/RoaringFormatSpec
// This is guaranted when calling 'roaring_bitmap_portable_deserialize'.
Expand Down
26 changes: 23 additions & 3 deletions fuzz/croaring_fuzzer.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,35 @@
////////////////////////////////////////////////////////////////////////////////

#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#include <string.h>

#include "roaring/roaring.h"

int LLVMFuzzerTestOneInput(const char *data, size_t size) {
// We test that deserialization never fails.
roaring_bitmap_t* bitmap = roaring_bitmap_portable_deserialize_safe(data, size);
if(bitmap) {
roaring_bitmap_t *bitmap =
roaring_bitmap_portable_deserialize_safe(data, size);
if (bitmap) {
// The bitmap may not be usable if it does not follow the specification.
// We can validate the bitmap we recovered to make sure it is proper.
const char *reason_failure = NULL;
if (roaring_bitmap_internal_validate(t, &reason_failure)) {
// the bitmap is ok!
uint32_t cardinality = roaring_bitmap_get_cardinality(t);

for (uint32_t i = 100; i < 1000; i++) {
if (!roaring_bitmap_contains(t, i)) {
cardinality++;
roaring_bitmap_add(r1, i);
}
}
uint32_t new_cardinality = roaring_bitmap_get_cardinality(t);
if (cardinality != new_cardinality) {
printf("bug\n");
exit(1);
}
}
roaring_bitmap_free(bitmap);
}
return 0;
Expand Down
6 changes: 1 addition & 5 deletions src/containers/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -172,11 +172,7 @@ void array_container_grow(array_container_t *container, int32_t min,
container->array = (uint16_t *)roaring_malloc(new_capacity * sizeof(uint16_t));
}

// handle the case where realloc fails
if (container->array == NULL) {
fprintf(stderr, "could not allocate memory\n");
}
assert(container->array != NULL);
// if realloc fails, we have container->array == NULL.
}

/* Copy one container into another. We assume that they are distinct. */
Expand Down
6 changes: 1 addition & 5 deletions src/containers/run.c
Original file line number Diff line number Diff line change
Expand Up @@ -208,11 +208,7 @@ void run_container_grow(run_container_t *run, int32_t min, bool copy) {
}
run->runs = (rle16_t *)roaring_malloc(run->capacity * sizeof(rle16_t));
}
// handle the case where realloc fails
if (run->runs == NULL) {
fprintf(stderr, "could not allocate memory\n");
}
assert(run->runs != NULL);
// We may have run->runs == NULL.
}

/* copy one container into another */
Expand Down
5 changes: 4 additions & 1 deletion src/roaring.c
Original file line number Diff line number Diff line change
Expand Up @@ -1505,7 +1505,10 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, size
}
size_t bytesread;
bool is_ok = ra_portable_deserialize(&ans->high_low_container, buf, maxbytes, &bytesread);
if(is_ok) assert(bytesread <= maxbytes);
if (!is_ok) {
roaring_free(ans);
return NULL;
}
roaring_bitmap_set_copy_on_write(ans, false);
if (!is_ok) {
roaring_free(ans);
Expand Down
45 changes: 21 additions & 24 deletions src/roaring_array.c
Original file line number Diff line number Diff line change
Expand Up @@ -635,7 +635,7 @@ size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes) {
buf += sizeof(uint32_t);
}
if (size > (1<<16)) {
return 0; // logically impossible
return 0;
}
char *bitmapOfRunContainers = NULL;
bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE;
Expand Down Expand Up @@ -694,24 +694,23 @@ size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes) {
return bytestotal;
}

// this function populates answer from the content of buf (reading up to maxbytes bytes).
// This function populates answer from the content of buf (reading up to maxbytes bytes).
// The function returns false if a properly serialized bitmap cannot be found.
// if it returns true, readbytes is populated by how many bytes were read, we have that *readbytes <= maxbytes.
// If it returns true, readbytes is populated by how many bytes were read, we have that *readbytes <= maxbytes.
//
// This function is endian-sensitive.
bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const size_t maxbytes, size_t * readbytes) {
*readbytes = sizeof(int32_t);// for cookie
if(*readbytes > maxbytes) {
fprintf(stderr, "Ran out of bytes while reading first 4 bytes.\n");
// Ran out of bytes while reading first 4 bytes.
return false;
}
uint32_t cookie;
memcpy(&cookie, buf, sizeof(int32_t));
buf += sizeof(uint32_t);
if ((cookie & 0xFFFF) != SERIAL_COOKIE &&
cookie != SERIAL_COOKIE_NO_RUNCONTAINER) {
fprintf(stderr, "I failed to find one of the right cookies. Found %" PRIu32 "\n",
cookie);
// "I failed to find one of the right cookies.
return false;
}
int32_t size;
Expand All @@ -721,29 +720,27 @@ bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const siz
else {
*readbytes += sizeof(int32_t);
if(*readbytes > maxbytes) {
fprintf(stderr, "Ran out of bytes while reading second part of the cookie.\n");
// Ran out of bytes while reading second part of the cookie.
return false;
}
memcpy(&size, buf, sizeof(int32_t));
buf += sizeof(uint32_t);
}
if (size < 0) {
fprintf(stderr, "You cannot have a negative number of containers, the data must be corrupted: %" PRId32 "\n",
size);
return false; // logically impossible
// You cannot have a negative number of containers, the data must be corrupted.
return false;
}
if (size > (1<<16)) {
fprintf(stderr, "You cannot have so many containers, the data must be corrupted: %" PRId32 "\n",
size);
return false; // logically impossible
// You cannot have so many containers, the data must be corrupted.
return false;
}
const char *bitmapOfRunContainers = NULL;
bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE;
if (hasrun) {
int32_t s = (size + 7) / 8;
*readbytes += s;
if(*readbytes > maxbytes) {// data is corrupted?
fprintf(stderr, "Ran out of bytes while reading run bitmap.\n");
// Ran out of bytes while reading run bitmap.
return false;
}
bitmapOfRunContainers = buf;
Expand All @@ -753,14 +750,14 @@ bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const siz

*readbytes += size * 2 * sizeof(uint16_t);
if(*readbytes > maxbytes) {
fprintf(stderr, "Ran out of bytes while reading key-cardinality array.\n");
// Ran out of bytes while reading key-cardinality array.
return false;
}
buf += size * 2 * sizeof(uint16_t);

bool is_ok = ra_init_with_capacity(answer, size);
if (!is_ok) {
fprintf(stderr, "Failed to allocate memory for roaring array. Bailing out.\n");
// Failed to allocate memory for roaring array. Bailing out.
return false;
}

Expand All @@ -772,7 +769,7 @@ bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const siz
if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) {
*readbytes += size * 4;
if(*readbytes > maxbytes) {// data is corrupted?
fprintf(stderr, "Ran out of bytes while reading offsets.\n");
// Ran out of bytes while reading offsets.
ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
return false;
}
Expand All @@ -798,14 +795,14 @@ bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const siz
size_t containersize = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
*readbytes += containersize;
if(*readbytes > maxbytes) {
fprintf(stderr, "Running out of bytes while reading a bitset container.\n");
// Running out of bytes while reading a bitset container.
ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
return false;
}
// it is now safe to read
bitset_container_t *c = bitset_container_create();
if(c == NULL) {// memory allocation failure
fprintf(stderr, "Failed to allocate memory for a bitset container.\n");
// Failed to allocate memory for a bitset container.
ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
return false;
}
Expand All @@ -817,7 +814,7 @@ bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const siz
// we check that the read is allowed
*readbytes += sizeof(uint16_t);
if(*readbytes > maxbytes) {
fprintf(stderr, "Running out of bytes while reading a run container (header).\n");
// Running out of bytes while reading a run container (header).
ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
return false;
}
Expand All @@ -826,15 +823,15 @@ bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const siz
size_t containersize = n_runs * sizeof(rle16_t);
*readbytes += containersize;
if(*readbytes > maxbytes) {// data is corrupted?
fprintf(stderr, "Running out of bytes while reading a run container.\n");
// Running out of bytes while reading a run container.
ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
return false;
}
// it is now safe to read

run_container_t *c = run_container_create();
if(c == NULL) {// memory allocation failure
fprintf(stderr, "Failed to allocate memory for a run container.\n");
// Failed to allocate memory for a run container.
ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
return false;
}
Expand All @@ -847,15 +844,15 @@ bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const siz
size_t containersize = thiscard * sizeof(uint16_t);
*readbytes += containersize;
if(*readbytes > maxbytes) {// data is corrupted?
fprintf(stderr, "Running out of bytes while reading an array container.\n");
// Running out of bytes while reading an array container.
ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
return false;
}
// it is now safe to read
array_container_t *c =
array_container_create_given_capacity(thiscard);
if(c == NULL) {// memory allocation failure
fprintf(stderr, "Failed to allocate memory for an array container.\n");
// Failed to allocate memory for an array container.
ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
return false;
}
Expand Down
19 changes: 18 additions & 1 deletion tests/c_example1.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,12 @@ int main() {
uint32_t expectedsize = roaring_bitmap_portable_size_in_bytes(r1);
char *serializedbytes = (char*)malloc(expectedsize);
roaring_bitmap_portable_serialize(r1, serializedbytes);
roaring_bitmap_t *t = roaring_bitmap_portable_deserialize(serializedbytes);
roaring_bitmap_t *t = roaring_bitmap_portable_deserialize_safe(serializedbytes, expectedsize);
if(t == NULL) { return EXIT_FAILURE; }
const char *reason = NULL;
if (!roaring_bitmap_internal_validate(t, &reason)) {
return EXIT_FAILURE;
}
assert_true(roaring_bitmap_equals(r1, t)); // what we recover is equal
roaring_bitmap_free(t);
// we can also check whether there is a bitmap at a memory location without
Expand All @@ -98,6 +103,18 @@ int main() {
expectedsize); // sizeofbitmap would be zero if no bitmap were found
// we can also read the bitmap "safely" by specifying a byte size limit:
t = roaring_bitmap_portable_deserialize_safe(serializedbytes, expectedsize);
if(t == NULL) {
printf("Problem during deserialization.\n");
// We could clear any memory and close any file here.
return EXIT_FAILURE;
}
// We can validate the bitmap we recovered to make sure it is proper.
const char *reason_failure = NULL;
if (!roaring_bitmap_internal_validate(t, &reason_failure)) {
printf("safely deserialized invalid bitmap: %s\n", reason_failure);
// We could clear any memory and close any file here.
return EXIT_FAILURE;
}
assert_true(roaring_bitmap_equals(r1, t)); // what we recover is equal
roaring_bitmap_free(t);

Expand Down
Loading

0 comments on commit 8dc3881

Please sign in to comment.