Add validation functions and fixup sanitizer build (#73)

* Make sure that ASAN is linked in When UBSan is enabled the equivalent flag is already added to the link options and the UBSan runtime gets linked in, but when enabling ASAN the build fails because it can't find a lot of symbols. The fix is to link it if you use it. * Remove unnecessary compile definition ASAN_OPTIONS is a runtime environment variable and doesn't affect compilation (unless exported as the symbol __asan_default_options, which it isn't here). Also note that enabling detect_leaks breaks macOS since AppleClang doesn't support this (though the LLVM from brew does). * Fix leaks spotted by the sanitizers * Fix Makefile for unit unit makes use of streamvbyte_isadetection.h which is in an internal header not in the public API. * Typo copypasta fix * Add stream validation functions These are useful when loading untrusted data since it's typically trivial to change the uncompressed size that's stored alongside the encoded data. Doing so can cause the decode functions to read past the end of the input data, which at best generates bad output and at worst causes crashes if it happens to read into an unmapped page. * Add missing newlines from print statements These presumably always passed so were never printed. Also include the iteration for easier debugging. * Silence MSVC warnings about possible loss of data The encode functions return size_ts so we should match that. * Change data initialisation in unit tests All the data would be about the same value [gap-1, gap+6] and wouldn't border on a byte width meaning that all of the keys would be the same and hence wouldn't catch bugs when reading the wrong keys. Instead use the gap to space the elements apart. * Improve performance of streamvbyte_validate_stream AppleClang 16 doesn't appear to be clever enough to transform the loop into a branchless one, so help it out by adding a loop that it knows how to unroll. Also do the same for streamvbyte_validate_stream_0124 though I haven't tested the performance of that. * Add NEON implementation of validate Nothing fancy, just processing 4 keys per vector. In both micro- and macro-benchmarks this performs at basically the same speed as the loop added in the previous commit (tested with clang), both of which are significantly faster than the original version. * Document the validate functions in the README
fast-pack · Dec 9, 2024 · 0934b51 · 0934b51
1 parent dd95854
commit 0934b51
Show file tree

Hide file tree

Showing 10 changed files with 186 additions and 14 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -86,7 +86,11 @@ if(STREAMVBYTE_SANITIZE)
         -fno-omit-frame-pointer
         -fno-sanitize-recover=all
     )
-    add_compile_definitions(ASAN_OPTIONS=detect_leaks=1)
+    add_link_options(
+        -fsanitize=address
+        -fno-omit-frame-pointer
+        -fno-sanitize-recover=all
+    )
 endif()
 
 if(MSVC)

diff --git a/Makefile b/Makefile
@@ -83,10 +83,10 @@ writeseq: ./tests/writeseq.c    $(HEADERS) $(OBJECTS)
 	$(CC) $(CFLAGS) -o writeseq ./tests/writeseq.c -Iinclude  $(OBJECTS)
 
 unit: ./tests/unit.c    $(HEADERS) $(OBJECTS)
-	$(CC) $(CFLAGS) -o unit ./tests/unit.c -Iinclude  $(OBJECTS)
+	$(CC) $(CFLAGS) -o unit ./tests/unit.c -Iinclude -Isrc  $(OBJECTS)
 
 dynunit: ./tests/unit.c    $(HEADERS) $(LIBNAME) $(LNLIBNAME)
-	$(CC) $(CFLAGS) -o dynunit ./tests/unit.c -Iinclude  -L. -lstreamvbyte
+	$(CC) $(CFLAGS) -o dynunit ./tests/unit.c -Iinclude -Isrc  -L. -lstreamvbyte
 
 clean:
 	rm -f unit *.o $(LIBNAME) $(LNLIBNAME)  example shuffle_tables perf writeseq dynunit
diff --git a/README.md b/README.md
@@ -64,6 +64,19 @@ information along with the compressed stream.
 During decoding, the library may read up to `STREAMVBYTE_PADDING` extra bytes
 from the input buffer (these bytes are read but never used).
 
+To verify that the expected size of a stream is correct you may validate it before
+decoding:
+```C
+// compressedbuffer, compsize, recovdata, N are as above
+if (streamvbyte_validate_stream(compressedbuffer, compsize, N)) {
+    // the stream is safe to decode
+    streamvbyte_decode(compressedbuffer, recovdata, N);
+} else {
+    // there's a mismatch between the expected size of the data (N) and the contents of
+    // the stream, so performing a decode is unsafe since the behaviour is undefined
+}
+```
+
 
 
 

diff --git a/include/streamvbyte.h b/include/streamvbyte.h
@@ -1,6 +1,7 @@
 #ifndef INCLUDE_STREAMVBYTE_H_
 #define INCLUDE_STREAMVBYTE_H_
 
+#include <stdbool.h>
 #include <stddef.h>
 #include <stdint.h>
 
@@ -10,7 +11,7 @@ extern "C" {
 
 #define STREAMVBYTE_PADDING 16
 
-// Encode an array of a given length read from in to bout in varint format.
+// Encode an array of a given length read from in to out in varint format.
 // Returns the number of bytes written.
 // The number of values being stored (length) is not encoded in the compressed stream,
 // the caller is responsible for keeping a record of this length.
@@ -66,6 +67,17 @@ size_t streamvbyte_decode(const uint8_t* in, uint32_t* out, uint32_t length);
 // streamvbyte_encode_0124.
 size_t streamvbyte_decode_0124(const uint8_t* in, uint32_t* out, uint32_t length);
 
+// Validate an encoded stream.
+// This can be used to validate that data received from an untrusted source (disk, network,
+// etc...) has a valid length stored alongside it.
+// "inLength" is the size of the encoded data "in", and "outLength" is the expected number
+// of integers that were compressed.
+bool streamvbyte_validate_stream(const uint8_t* in, size_t inLength, uint32_t outLength);
+
+// Same as streamvbyte_validate_stream but is meant to be used for streams encoded with
+// streamvbyte_encode_0124.
+bool streamvbyte_validate_stream_0124(const uint8_t* in, size_t inLength, uint32_t outLength);
+
 #ifdef __cplusplus
 }
 #endif

diff --git a/include/streamvbytedelta.h b/include/streamvbytedelta.h
@@ -8,7 +8,7 @@
 extern "C" {
 #endif
 
-// Encode an array of a given length read from in to bout in StreamVByte format.
+// Encode an array of a given length read from in to out in StreamVByte format.
 // Returns the number of bytes written.
 // The number of values being stored (length) is not encoded in the compressed stream,
 // the caller is responsible for keeping a record of this length. The pointer "in" should

diff --git a/src/streamvbyte_0124_decode.c b/src/streamvbyte_0124_decode.c
@@ -181,5 +181,49 @@ size_t streamvbyte_decode_0124(const uint8_t *in, uint32_t *out, uint32_t count)
 #endif
 
   return (size_t)(svb_decode_scalar(out, keyPtr, dataPtr, count) - in);
+}
+
+bool streamvbyte_validate_stream_0124(const uint8_t *in, size_t inCount,
+                                      uint32_t outCount) {
+  if (inCount == 0 || outCount == 0)
+    return inCount == outCount;
+
+  // 2-bits per key (rounded up)
+  // Note that we don't add to outCount in case it overflows
+  uint32_t keyLen = outCount / 4;
+  if (outCount & 3)
+    keyLen++;
+
+  // Check that there's enough space for the keys
+  if (keyLen > inCount)
+    return false;
+
+  // Accumulate the key sizes in a wider type to avoid overflow
+  const uint8_t *keyPtr = in;
+  uint64_t encodedSize = 0;
+
+  // Give the compiler a hint that it can avoid branches in the inner loop
+  for (uint32_t c = 0; c < outCount / 4; c++) {
+    uint32_t key = *keyPtr++;
+    for (uint8_t shift = 0; shift < 8; shift += 2) {
+      const uint8_t code = (key >> shift) & 0x3;
+      encodedSize += (1 << code) >> 1;
+    }
+  }
+  outCount &= 3;
+
+  // Process the remainder one at a time
+  uint8_t shift = 0;
+  uint32_t key = *keyPtr++;
+  for (uint32_t c = 0; c < outCount; c++) {
+    if (shift == 8) {
+      shift = 0;
+      key = *keyPtr++;
+    }
+    const uint8_t code = (key >> shift) & 0x3;
+    encodedSize += (1 << code) >> 1;
+    shift += 2;
+  }
 
+  return encodedSize == inCount - keyLen;
 }
diff --git a/src/streamvbyte_arm_decode.c b/src/streamvbyte_arm_decode.c
@@ -51,4 +51,32 @@ static const uint8_t *svb_decode_vector(uint32_t *out, const uint8_t *keyPtr, co
 
   return dataPtr;
 }
+
+static uint64_t svb_validate_vector(const uint8_t **keyPtrPtr,
+                                    uint32_t *countPtr) {
+  // Reduce the count by how many we'll process
+  const uint32_t count = *countPtr & ~7U;
+  const uint8_t *keyPtr = *keyPtrPtr;
+  *countPtr &= 7;
+  *keyPtrPtr += count / 4;
+
+  // Deal with each of the 4 keys in a separate lane
+  const int32x4_t shifts = {0, -2, -4, -6};
+  const uint32x4_t mask = vdupq_n_u32(3);
+  uint32x4_t acc0 = vdupq_n_u32(0);
+  uint32x4_t acc1 = vdupq_n_u32(0);
+
+  // Unrolling more than twice doesn't seem to improve performance
+  for (uint32_t c = 0; c < count; c += 8) {
+    uint32x4_t shifted0 = vshlq_u32(vdupq_n_u32(*keyPtr++), shifts);
+    acc0 = vaddq_u32(acc0, vandq_u32(shifted0, mask));
+    uint32x4_t shifted1 = vshlq_u32(vdupq_n_u32(*keyPtr++), shifts);
+    acc1 = vaddq_u32(acc1, vandq_u32(shifted1, mask));
+  }
+
+  // Accumulate the sums and add the +1 for each element (count)
+  uint64x2_t sum0 = vpaddlq_u32(acc0);
+  uint64x2_t sum1 = vpaddlq_u32(acc1);
+  return sum0[0] + sum0[1] + sum1[0] + sum1[1] + count;
+}
 #endif
diff --git a/src/streamvbyte_decode.c b/src/streamvbyte_decode.c
@@ -84,5 +84,53 @@ size_t streamvbyte_decode(const uint8_t *in, uint32_t *out, uint32_t count) {
 #endif
 
   return (size_t)(svb_decode_scalar(out, keyPtr, dataPtr, count) - in);
+}
+
+bool streamvbyte_validate_stream(const uint8_t *in, size_t inCount,
+                                 uint32_t outCount) {
+  if (inCount == 0 || outCount == 0)
+    return inCount == outCount;
+
+  // 2-bits per key (rounded up)
+  // Note that we don't add to outCount in case it overflows
+  uint32_t keyLen = outCount / 4;
+  if (outCount & 3)
+    keyLen++;
+
+  // Check that there's enough space for the keys
+  if (keyLen > inCount)
+    return false;
+
+  // Accumulate the key sizes in a wider type to avoid overflow
+  const uint8_t *keyPtr = in;
+  uint64_t encodedSize = 0;
+
+#if defined(__ARM_NEON__)
+  encodedSize = svb_validate_vector(&keyPtr, &outCount);
+#endif
+
+  // Give the compiler a hint that it can avoid branches in the inner loop
+  for (uint32_t c = 0; c < outCount / 4; c++) {
+    uint32_t key = *keyPtr++;
+    for (uint8_t shift = 0; shift < 8; shift += 2) {
+      const uint8_t code = (key >> shift) & 0x3;
+      encodedSize += code + 1;
+    }
+  }
+  outCount &= 3;
+
+  // Process the remainder one at a time
+  uint8_t shift = 0;
+  uint32_t key = *keyPtr++;
+  for (uint32_t c = 0; c < outCount; c++) {
+    if (shift == 8) {
+      shift = 0;
+      key = *keyPtr++;
+    }
+    const uint8_t code = (key >> shift) & 0x3;
+    encodedSize += code + 1;
+    shift += 2;
+  }
 
+  return encodedSize == inCount - keyLen;
 }
diff --git a/src/streamvbyte_encode.c b/src/streamvbyte_encode.c
@@ -108,7 +108,7 @@ size_t streamvbyte_compressedbytes_0124(const uint32_t* in, uint32_t length) {
 }
 
 
-// Encode an array of a given length read from in to bout in streamvbyte format.
+// Encode an array of a given length read from in to out in streamvbyte format.
 // Returns the number of bytes written.
 size_t streamvbyte_encode(const uint32_t *in, uint32_t count, uint8_t *out) {
 #ifdef STREAMVBYTE_X64

diff --git a/tests/unit.c b/tests/unit.c
@@ -47,6 +47,8 @@ static int zigzagtests(void) {
       }
     }
 
+    free(deltadataout);
+    free(deltadataback);
     free(databack);
     free(dataout);
     free(datain);
@@ -96,11 +98,18 @@ static int basictests(void) {
 
   for (uint32_t length = 0; length <= N;) {
     for (uint32_t gap = 1; gap <= 387420489; gap *= 3) {
-      for (uint32_t k = 0; k < length; ++k)
-        datain[k] = gap - 1 + ((uint32_t)rand() % 8); // sometimes start with zero
+      datain[0] = (uint32_t)rand() % 8; // sometimes start with zero
+      for (uint32_t k = 1; k < length; ++k)
+        datain[k] = datain[k - 1] + gap - 1 + (uint32_t)rand() % 8;
 
       // Default encoding: 1,2,3,4 bytes per value
       size_t compsize = streamvbyte_encode(datain, length, compressedbuffer);
+      if (!streamvbyte_validate_stream(compressedbuffer, compsize, length)) {
+        printf("[streamvbyte_validate_stream] code is buggy length=%d gap=%d: compsize=%d\n",
+               (int)length, (int)gap, (int)compsize);
+        return -1;
+      }
+
       size_t usedbytes = streamvbyte_decode(compressedbuffer, recovdata, length);
       if (compsize != usedbytes) {
         printf("[streamvbyte_decode] code is buggy length=%d gap=%d: compsize=%d != "
@@ -118,6 +127,12 @@ static int basictests(void) {
 
       // Alternative encoding: 0,1,2,4 bytes per value
       compsize = streamvbyte_encode_0124(datain, length, compressedbuffer);
+      if (!streamvbyte_validate_stream_0124(compressedbuffer, compsize, length)) {
+        printf("[streamvbyte_validate_stream_0124] code is buggy length=%d gap=%d: compsize=%d\n",
+               (int)length, (int)gap, (int)compsize);
+        return -1;
+      }
+
       usedbytes = streamvbyte_decode_0124(compressedbuffer, recovdata, length);
       if (compsize != usedbytes) {
         printf("[streamvbyte_decode_0124] code is buggy length=%d gap=%d: compsize=%d != "
@@ -197,29 +212,37 @@ static int aqrittests(void) {
     const int length = 4;
 
     size_t compsize = streamvbyte_encode((uint32_t *)in, length, compressedbuffer);
-    size_t usedbytes = streamvbyte_decode(compressedbuffer, (uint32_t *)recovdata, length);
+    if (!streamvbyte_validate_stream(compressedbuffer, compsize, length)) {
+      printf("[streamvbyte_validate_stream] code is buggy i=%i\n", i);
+      return -1;
+    }
 
+    size_t usedbytes = streamvbyte_decode(compressedbuffer, (uint32_t *)recovdata, length);
     if (compsize != usedbytes) {
-      printf("[streamvbyte_decode] code is buggy");
+      printf("[streamvbyte_decode] code is buggy i=%i\n", i);
       return -1;
     }
     for (size_t k = 0; k < length * sizeof(uint32_t); ++k) {
       if (recovdata[k] != in[k]) {
-        printf("[streamvbyte_decode] code is buggy");
+        printf("[streamvbyte_decode] code is buggy i=%i\n", i);
         return -1;
       }
     }
 
     compsize = streamvbyte_encode_0124((uint32_t *)in, length, compressedbuffer);
-    usedbytes = streamvbyte_decode_0124(compressedbuffer, (uint32_t *)recovdata, length);
+    if (!streamvbyte_validate_stream_0124(compressedbuffer, compsize, length)) {
+      printf("[streamvbyte_validate_stream_0124] code is buggy i=%i\n", i);
+      return -1;
+    }
 
+    usedbytes = streamvbyte_decode_0124(compressedbuffer, (uint32_t *)recovdata, length);
     if (compsize != usedbytes) {
-      printf("[streamvbyte_decode_0124] code is buggy");
+      printf("[streamvbyte_decode_0124] code is buggy i=%i\n", i);
       return -1;
     }
     for (size_t k = 0; k < length * sizeof(uint32_t); ++k) {
       if (recovdata[k] != in[k]) {
-        printf("[streamvbyte_decode_0124] code is buggy");
+        printf("[streamvbyte_decode_0124] code is buggy i=%i\n", i);
         return -1;
       }
     }