From 35e6a049cf11539f1367838ebd47631d1a3bce6a Mon Sep 17 00:00:00 2001
From: l-k- <alenka0000@hotmail.com>
Date: Tue, 28 Nov 2023 23:30:18 -0500
Subject: [PATCH] handle failed checksum

---
 R/readAxivity.R | 143 +++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 116 insertions(+), 27 deletions(-)

diff --git a/R/readAxivity.R b/R/readAxivity.R
index ec52ec7..1f109ec 100755
--- a/R/readAxivity.R
+++ b/R/readAxivity.R
@@ -4,6 +4,8 @@ readAxivity = function(filename, start = 0, end = 0, progressBar = FALSE, desire
   if (length(configtz) == 0) configtz = desiredtz
   blockBytes = 512
   headerBytes = 1024
+  maxAllowedCorruptBlocks = 20 # max number consecutive blocks with a failed checksum that we'll tolerate
+
   # Credits: The original version of this code developed outside GitHub was 
   # contributed by Dr. Evgeny Mirkes (Leicester University, UK)
   #========================================================================
@@ -75,6 +77,28 @@ readAxivity = function(filename, start = 0, end = 0, progressBar = FALSE, desire
       return(NULL)
     }
 
+    # sampling rate in one of file format U8 at offset 24
+    samplerate_dynrange = readBin(block[25], integer(), size = 1, signed = FALSE)
+
+    if (samplerate_dynrange != 0) { # Very old files that have zero at offset 24 don't have a checksum
+      checksum = sum(readBin(block, n = 256,
+                             integer(),
+                             size = 2,
+                             signed = FALSE,
+                             endian = "little"))
+      checksum = checksum %% 65536 # 65536 = 2^16; the checksum is calculated as a 16-bit integer
+      if (checksum != 0) {
+        # Checksum doesn't match. This means some bits in this block got corrupted.
+        # We don't know which, so we can't trust this block. We skip it, and impute it later.
+        rawdata_list = list(
+          struc = struc,
+          parameters = parameters,
+          checksum_pass = FALSE
+        )
+        return(invisible(rawdata_list))
+      }
+    }
+
     idstr = readChar(block, 2, useBytes = TRUE)
     if (idstr != "AX") {
       stop("Packet header is incorrect. First two characters must be AX.")
@@ -114,22 +138,6 @@ readAxivity = function(filename, start = 0, end = 0, progressBar = FALSE, desire
     } else {
       battery = 0
     }
-    # sampling rate in one of file format U8 at offset 24
-    samplerate_dynrange = readBin(block[25], integer(), size = 1, signed = FALSE)
-
-    checksum_pass = TRUE
-    if (samplerate_dynrange != 0) { # Very old files that have zero at offset 24 don't have a checksum
-      # Perform checksum
-      checksum = sum(readBin(block, n = 256,
-                             integer(),
-                             size = 2,
-                             signed = FALSE,
-                             endian = "little"))
-      checksum = checksum %% 65536 # equals 2^16 the checksum is calculated on a 16bit integer
-      if (checksum != 0) {
-        checksum_pass = FALSE
-      }
-    }
 
     # offset 25, per documentation: 
     # "top nibble: number of axes, 3=Axyz, 6=Gxyz/Axyz, 9=Gxyz/Axyz/Mxyz; 
@@ -244,7 +252,7 @@ readAxivity = function(filename, start = 0, end = 0, progressBar = FALSE, desire
       length = blockLength,
       struc = struc,
       parameters = parameters,
-      checksum_pass = checksum_pass,
+      checksum_pass = TRUE,
       blockID = blockID
     )
     if (complete) {
@@ -302,11 +310,27 @@ readAxivity = function(filename, start = 0, end = 0, progressBar = FALSE, desire
     accrange = bitwShiftR(16, (bitwShiftR(samplerate_dynrange, 6)))
     version = readBin(block[42], integer(), size = 1, signed = FALSE) #offset 41
 
-    # Read the first data block without data
-    datas = readDataBlock(fid, complete = FALSE)
-    if (is.null(datas)) {
-      stop("Error reading the first data block.")
+    # Read the first data block without data.
+    # Skip any corrupt blocks, up to maxAllowedCorruptBlocks in number.
+    is_corrupt = TRUE
+    for (ii in 1:maxAllowedCorruptBlocks+1) {
+      datas = readDataBlock(fid, complete = FALSE)
+
+      if (is.null(datas)) {
+        stop("Error reading the first data block.")
+      }
+
+      if (datas$checksum_pass) {
+        is_corrupt = FALSE
+        break
+      }
+
+      warning("Skipping corrupt block #", ii)
     }
+    if (is_corrupt) {
+      stop("Error reading file. The first ", maxAllowedCorruptBlocks+1, " blocks are corrupt")
+    }
+    
     if (frequency_header != datas$frequency) {
       warning("Inconsistent value of measurement frequency: there is ",
               frequency_header, " in header and ", datas$frequency, " in the first data block ")
@@ -368,14 +392,47 @@ readAxivity = function(filename, start = 0, end = 0, progressBar = FALSE, desire
   struc = list(0,0L,0)
   if (end < numDBlocks) { # the end block isn't part of the data we'll read, but its start will be our ending timestamp
     seek(fid, headerBytes + blockBytes * end, origin = 'start')
-    endBlock = readDataBlock(fid, struc = struc) 
+
+    # Skip any corrupt blocks, up to maxAllowedCorruptBlocks in number.
+    is_corrupt = TRUE
+    for (ii in end : min(numDBlocks, end+maxAllowedCorruptBlocks+1)) {
+      endBlock = readDataBlock(fid, struc = struc) 
+
+      if (endBlock$checksum_pass) {
+        is_corrupt = FALSE
+        break
+      }
+
+      warning("Skipping corrupt end block #", ii)
+    }
+    if (is_corrupt && ii == end+maxAllowedCorruptBlocks+1) {
+      stop("Error reading file. The last ", maxAllowedCorruptBlocks+1, " blocks are corrupt")
+    }
     endTimestamp = as.numeric(endBlock$start)
-  } else {
+  }
+
+  if (end == numDBlocks) {
     # end == numDBlocks, meaning we'll be reading all the remaining blocks.
     # There is no block #numDBlocks, so we can't get the ending timestamp from the start of that block.
-    # Instead read the very last block of the file, and project what the ending timestamp should be.
-    seek(fid, headerBytes + blockBytes * (end-1), origin = 'start')
-    lastBlock = readDataBlock(fid, struc = struc) 
+    # Instead read the very last block of the file (if the last block is corrupt, fing the last non-corrupt one),
+    # then project what the ending timestamp should be.
+
+    # Skip any corrupt blocks, up to maxAllowedCorruptBlocks in number.
+    is_corrupt = TRUE
+    for (ii in (end-1) : (end-maxAllowedCorruptBlocks-1)) {
+      seek(fid, headerBytes + blockBytes * ii, origin = 'start')
+      lastBlock = readDataBlock(fid, struc = struc)
+
+      if (lastBlock$checksum_pass) {
+        is_corrupt = FALSE
+        break
+      }
+
+      warning("Skipping corrupt end block #", ii)
+    }
+    if (is_corrupt) {
+      stop("Error reading file. The last ", maxAllowedCorruptBlocks+1, " blocks are corrupt")
+    }
     # the end timestamp should fall right after the actual very last timestamp of the file
     endTimestamp = as.numeric(lastBlock$start) + blockLength * step
     # now pad it generously in case there are gaps in the last block
@@ -386,7 +443,23 @@ readAxivity = function(filename, start = 0, end = 0, progressBar = FALSE, desire
   # Reinitiate file and skip header as well as the initial start-1 blocks
   seek(fid, headerBytes + blockBytes * start, origin = 'start')
   pos = 1 # position of the first element to complete in data
-  prevRaw = readDataBlock(fid, struc = struc) 
+
+  # Skip any corrupt blocks, up to maxAllowedCorruptBlocks in number.
+  is_corrupt = TRUE
+  for (ii in 1:maxAllowedCorruptBlocks+1) {
+    prevRaw = readDataBlock(fid, struc = struc) 
+
+    if (prevRaw$checksum_pass) {
+      is_corrupt = FALSE
+      break
+    }
+
+    warning("Skipping corrupt block #", ii)
+  }
+  if (is_corrupt) {
+    stop("Error reading file. The first ", maxAllowedCorruptBlocks+1, " blocks are corrupt")
+  }
+
   if (is.null(prevRaw)) {
     return(invisible(list(header = header, data = NULL)))
   }
@@ -431,6 +504,22 @@ readAxivity = function(filename, start = 0, end = 0, progressBar = FALSE, desire
     }
     else { # read a new block
       raw = readDataBlock(fid, struc = struc, parameters = prevRaw$parameters)
+      if (!raw$checksum_pass) {
+        # If the checksum doesn't match, we can't trust any of this block's data,
+        # so we have to completely skip the block.
+        # Depending on the nature of the faulty block, the data for the time period it represented
+        # will probably get imputed later, once we encounter a block with a valid checksum.
+        QClog = rbind(QClog, data.frame(checksum_pass = FALSE,
+                                        blockID_current = prevRaw$blockID + 1, # we can end up with several blocks with this ID
+                                        blockID_next = prevRaw$blockID + 1,
+                                        start = 0,
+                                        end = 0,
+                                        blockLengthSeconds = 0,
+                                        frequency_blockheader = 0,
+                                        frequency_observed = 0,
+                                        imputed = FALSE))
+        next
+      }
 
       if (is.null(raw)) {
         # this shouldn't happen