Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improves the documentation of roaring_bitmap_internal_validate #510

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 18 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -325,12 +325,15 @@ int main() {
uint32_t expectedsize = roaring_bitmap_portable_size_in_bytes(r1);
char *serializedbytes = malloc(expectedsize);
roaring_bitmap_portable_serialize(r1, serializedbytes);
// For additional safety, you may replace roaring_bitmap_portable_deserialize by
// roaring_bitmap_portable_deserialize_safe.
// Note: it is expected that the input follows the specification
// https://github.com/RoaringBitmap/RoaringFormatSpec
// otherwise the result may be unusable.
roaring_bitmap_t *t = roaring_bitmap_portable_deserialize(serializedbytes);
roaring_bitmap_t *t = roaring_bitmap_portable_deserialize_safe(serializedbytes, expectedsize);
if(t == NULL) { return EXIT_FAILURE; }
const char *reason = NULL;
if (!roaring_bitmap_internal_validate(t, &reason)) {
return EXIT_FAILURE;
}
assert(roaring_bitmap_equals(r1, t)); // what we recover is equal
roaring_bitmap_free(t);
// we can also check whether there is a bitmap at a memory location without
Expand All @@ -341,6 +344,18 @@ int main() {
expectedsize); // sizeofbitmap would be zero if no bitmap were found
// we can also read the bitmap "safely" by specifying a byte size limit:
t = roaring_bitmap_portable_deserialize_safe(serializedbytes, expectedsize);
if(t == NULL) {
printf("Problem during deserialization.\n");
// We could clear any memory and close any file here.
return EXIT_FAILURE;
}
// We can validate the bitmap we recovered to make sure it is proper.
const char *reason_failure = NULL;
if (!roaring_bitmap_internal_validate(t, &reason_failure)) {
printf("safely deserialized invalid bitmap: %s\n", reason_failure);
// We could clear any memory and close any file here.
return EXIT_FAILURE;
}
// It is still necessary for the content of seriallizedbytes to follow
// the standard: https://github.com/RoaringBitmap/RoaringFormatSpec
// This is guaranted when calling 'roaring_bitmap_portable_deserialize'.
Expand Down
26 changes: 23 additions & 3 deletions fuzz/croaring_fuzzer.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,35 @@
////////////////////////////////////////////////////////////////////////////////

#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#include <string.h>

#include "roaring/roaring.h"

int LLVMFuzzerTestOneInput(const char *data, size_t size) {
// We test that deserialization never fails.
roaring_bitmap_t* bitmap = roaring_bitmap_portable_deserialize_safe(data, size);
if(bitmap) {
roaring_bitmap_t *bitmap =
roaring_bitmap_portable_deserialize_safe(data, size);
if (bitmap) {
// The bitmap may not be usable if it does not follow the specification.
// We can validate the bitmap we recovered to make sure it is proper.
const char *reason_failure = NULL;
if (roaring_bitmap_internal_validate(t, &reason_failure)) {
// the bitmap is ok!
uint32_t cardinality = roaring_bitmap_get_cardinality(t);

for (uint32_t i = 100; i < 1000; i++) {
if (!roaring_bitmap_contains(t, i)) {
cardinality++;
roaring_bitmap_add(r1, i);
}
}
uint32_t new_cardinality = roaring_bitmap_get_cardinality(t);
if (cardinality != new_cardinality) {
printf("bug\n");
exit(1);
}
}
roaring_bitmap_free(bitmap);
}
return 0;
Expand Down
6 changes: 1 addition & 5 deletions src/containers/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -172,11 +172,7 @@ void array_container_grow(array_container_t *container, int32_t min,
container->array = (uint16_t *)roaring_malloc(new_capacity * sizeof(uint16_t));
}

// handle the case where realloc fails
if (container->array == NULL) {
fprintf(stderr, "could not allocate memory\n");
}
assert(container->array != NULL);
// if realloc fails, we have container->array == NULL.
}

/* Copy one container into another. We assume that they are distinct. */
Expand Down
6 changes: 1 addition & 5 deletions src/containers/run.c
Original file line number Diff line number Diff line change
Expand Up @@ -208,11 +208,7 @@ void run_container_grow(run_container_t *run, int32_t min, bool copy) {
}
run->runs = (rle16_t *)roaring_malloc(run->capacity * sizeof(rle16_t));
}
// handle the case where realloc fails
if (run->runs == NULL) {
fprintf(stderr, "could not allocate memory\n");
}
assert(run->runs != NULL);
// We may have run->runs == NULL.
}

/* copy one container into another */
Expand Down
5 changes: 4 additions & 1 deletion src/roaring.c
Original file line number Diff line number Diff line change
Expand Up @@ -1505,7 +1505,10 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, size
}
size_t bytesread;
bool is_ok = ra_portable_deserialize(&ans->high_low_container, buf, maxbytes, &bytesread);
if(is_ok) assert(bytesread <= maxbytes);
if (!is_ok) {
roaring_free(ans);
return NULL;
}
roaring_bitmap_set_copy_on_write(ans, false);
if (!is_ok) {
roaring_free(ans);
Expand Down
39 changes: 18 additions & 21 deletions src/roaring_array.c
Original file line number Diff line number Diff line change
Expand Up @@ -694,24 +694,23 @@ size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes) {
return bytestotal;
}

// this function populates answer from the content of buf (reading up to maxbytes bytes).
// This function populates answer from the content of buf (reading up to maxbytes bytes).
// The function returns false if a properly serialized bitmap cannot be found.
// if it returns true, readbytes is populated by how many bytes were read, we have that *readbytes <= maxbytes.
// If it returns true, readbytes is populated by how many bytes were read, we have that *readbytes <= maxbytes.
//
// This function is endian-sensitive.
bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const size_t maxbytes, size_t * readbytes) {
*readbytes = sizeof(int32_t);// for cookie
if(*readbytes > maxbytes) {
fprintf(stderr, "Ran out of bytes while reading first 4 bytes.\n");
// Ran out of bytes while reading first 4 bytes.
Dr-Emann marked this conversation as resolved.
Show resolved Hide resolved
return false;
}
uint32_t cookie;
memcpy(&cookie, buf, sizeof(int32_t));
buf += sizeof(uint32_t);
if ((cookie & 0xFFFF) != SERIAL_COOKIE &&
cookie != SERIAL_COOKIE_NO_RUNCONTAINER) {
fprintf(stderr, "I failed to find one of the right cookies. Found %" PRIu32 "\n",
cookie);
// "I failed to find one of the right cookies.
return false;
}
int32_t size;
Expand All @@ -721,20 +720,18 @@ bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const siz
else {
*readbytes += sizeof(int32_t);
if(*readbytes > maxbytes) {
fprintf(stderr, "Ran out of bytes while reading second part of the cookie.\n");
// Ran out of bytes while reading second part of the cookie.
return false;
}
memcpy(&size, buf, sizeof(int32_t));
buf += sizeof(uint32_t);
}
if (size < 0) {
fprintf(stderr, "You cannot have a negative number of containers, the data must be corrupted: %" PRId32 "\n",
size);
// You cannot have a negative number of containers, the data must be corrupted.
return false; // logically impossible
Dr-Emann marked this conversation as resolved.
Show resolved Hide resolved
}
if (size > (1<<16)) {
fprintf(stderr, "You cannot have so many containers, the data must be corrupted: %" PRId32 "\n",
size);
// You cannot have so many containers, the data must be corrupted.
return false; // logically impossible
}
const char *bitmapOfRunContainers = NULL;
Expand All @@ -743,7 +740,7 @@ bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const siz
int32_t s = (size + 7) / 8;
*readbytes += s;
if(*readbytes > maxbytes) {// data is corrupted?
fprintf(stderr, "Ran out of bytes while reading run bitmap.\n");
// Ran out of bytes while reading run bitmap.
return false;
}
bitmapOfRunContainers = buf;
Expand All @@ -753,14 +750,14 @@ bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const siz

*readbytes += size * 2 * sizeof(uint16_t);
if(*readbytes > maxbytes) {
fprintf(stderr, "Ran out of bytes while reading key-cardinality array.\n");
// Ran out of bytes while reading key-cardinality array.
return false;
}
buf += size * 2 * sizeof(uint16_t);

bool is_ok = ra_init_with_capacity(answer, size);
if (!is_ok) {
fprintf(stderr, "Failed to allocate memory for roaring array. Bailing out.\n");
// Failed to allocate memory for roaring array. Bailing out.
return false;
}

Expand All @@ -772,7 +769,7 @@ bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const siz
if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) {
*readbytes += size * 4;
if(*readbytes > maxbytes) {// data is corrupted?
fprintf(stderr, "Ran out of bytes while reading offsets.\n");
// Ran out of bytes while reading offsets.
ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
return false;
}
Expand All @@ -798,14 +795,14 @@ bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const siz
size_t containersize = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
*readbytes += containersize;
if(*readbytes > maxbytes) {
fprintf(stderr, "Running out of bytes while reading a bitset container.\n");
// Running out of bytes while reading a bitset container.
ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
return false;
}
// it is now safe to read
bitset_container_t *c = bitset_container_create();
if(c == NULL) {// memory allocation failure
fprintf(stderr, "Failed to allocate memory for a bitset container.\n");
// Failed to allocate memory for a bitset container.
ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
return false;
}
Expand All @@ -817,7 +814,7 @@ bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const siz
// we check that the read is allowed
*readbytes += sizeof(uint16_t);
if(*readbytes > maxbytes) {
fprintf(stderr, "Running out of bytes while reading a run container (header).\n");
// Running out of bytes while reading a run container (header).
ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
return false;
}
Expand All @@ -826,15 +823,15 @@ bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const siz
size_t containersize = n_runs * sizeof(rle16_t);
*readbytes += containersize;
if(*readbytes > maxbytes) {// data is corrupted?
fprintf(stderr, "Running out of bytes while reading a run container.\n");
// Running out of bytes while reading a run container.
ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
return false;
}
// it is now safe to read

run_container_t *c = run_container_create();
if(c == NULL) {// memory allocation failure
fprintf(stderr, "Failed to allocate memory for a run container.\n");
// Failed to allocate memory for a run container.
ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
return false;
}
Expand All @@ -847,15 +844,15 @@ bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const siz
size_t containersize = thiscard * sizeof(uint16_t);
*readbytes += containersize;
if(*readbytes > maxbytes) {// data is corrupted?
fprintf(stderr, "Running out of bytes while reading an array container.\n");
// Running out of bytes while reading an array container.
ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
return false;
}
// it is now safe to read
array_container_t *c =
array_container_create_given_capacity(thiscard);
if(c == NULL) {// memory allocation failure
fprintf(stderr, "Failed to allocate memory for an array container.\n");
// Failed to allocate memory for an array container.
ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
return false;
}
Expand Down
19 changes: 18 additions & 1 deletion tests/c_example1.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,12 @@ int main() {
uint32_t expectedsize = roaring_bitmap_portable_size_in_bytes(r1);
char *serializedbytes = (char*)malloc(expectedsize);
roaring_bitmap_portable_serialize(r1, serializedbytes);
roaring_bitmap_t *t = roaring_bitmap_portable_deserialize(serializedbytes);
roaring_bitmap_t *t = roaring_bitmap_portable_deserialize_safe(serializedbytes, expectedsize);
if(t == NULL) { return EXIT_FAILURE; }
const char *reason = NULL;
if (!roaring_bitmap_internal_validate(t, &reason)) {
return EXIT_FAILURE;
}
assert_true(roaring_bitmap_equals(r1, t)); // what we recover is equal
roaring_bitmap_free(t);
// we can also check whether there is a bitmap at a memory location without
Expand All @@ -98,6 +103,18 @@ int main() {
expectedsize); // sizeofbitmap would be zero if no bitmap were found
// we can also read the bitmap "safely" by specifying a byte size limit:
t = roaring_bitmap_portable_deserialize_safe(serializedbytes, expectedsize);
if(t == NULL) {
printf("Problem during deserialization.\n");
// We could clear any memory and close any file here.
return EXIT_FAILURE;
}
// We can validate the bitmap we recovered to make sure it is proper.
const char *reason_failure = NULL;
if (!roaring_bitmap_internal_validate(t, &reason_failure)) {
printf("safely deserialized invalid bitmap: %s\n", reason_failure);
// We could clear any memory and close any file here.
return EXIT_FAILURE;
}
assert_true(roaring_bitmap_equals(r1, t)); // what we recover is equal
roaring_bitmap_free(t);

Expand Down
Loading