From 35e6a049cf11539f1367838ebd47631d1a3bce6a Mon Sep 17 00:00:00 2001 From: l-k- Date: Tue, 28 Nov 2023 23:30:18 -0500 Subject: [PATCH] handle failed checksum --- R/readAxivity.R | 143 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 116 insertions(+), 27 deletions(-) diff --git a/R/readAxivity.R b/R/readAxivity.R index ec52ec7..1f109ec 100755 --- a/R/readAxivity.R +++ b/R/readAxivity.R @@ -4,6 +4,8 @@ readAxivity = function(filename, start = 0, end = 0, progressBar = FALSE, desire if (length(configtz) == 0) configtz = desiredtz blockBytes = 512 headerBytes = 1024 + maxAllowedCorruptBlocks = 20 # max number consecutive blocks with a failed checksum that we'll tolerate + # Credits: The original version of this code developed outside GitHub was # contributed by Dr. Evgeny Mirkes (Leicester University, UK) #======================================================================== @@ -75,6 +77,28 @@ readAxivity = function(filename, start = 0, end = 0, progressBar = FALSE, desire return(NULL) } + # sampling rate in one of file format U8 at offset 24 + samplerate_dynrange = readBin(block[25], integer(), size = 1, signed = FALSE) + + if (samplerate_dynrange != 0) { # Very old files that have zero at offset 24 don't have a checksum + checksum = sum(readBin(block, n = 256, + integer(), + size = 2, + signed = FALSE, + endian = "little")) + checksum = checksum %% 65536 # 65536 = 2^16; the checksum is calculated as a 16-bit integer + if (checksum != 0) { + # Checksum doesn't match. This means some bits in this block got corrupted. + # We don't know which, so we can't trust this block. We skip it, and impute it later. + rawdata_list = list( + struc = struc, + parameters = parameters, + checksum_pass = FALSE + ) + return(invisible(rawdata_list)) + } + } + idstr = readChar(block, 2, useBytes = TRUE) if (idstr != "AX") { stop("Packet header is incorrect. First two characters must be AX.") @@ -114,22 +138,6 @@ readAxivity = function(filename, start = 0, end = 0, progressBar = FALSE, desire } else { battery = 0 } - # sampling rate in one of file format U8 at offset 24 - samplerate_dynrange = readBin(block[25], integer(), size = 1, signed = FALSE) - - checksum_pass = TRUE - if (samplerate_dynrange != 0) { # Very old files that have zero at offset 24 don't have a checksum - # Perform checksum - checksum = sum(readBin(block, n = 256, - integer(), - size = 2, - signed = FALSE, - endian = "little")) - checksum = checksum %% 65536 # equals 2^16 the checksum is calculated on a 16bit integer - if (checksum != 0) { - checksum_pass = FALSE - } - } # offset 25, per documentation: # "top nibble: number of axes, 3=Axyz, 6=Gxyz/Axyz, 9=Gxyz/Axyz/Mxyz; @@ -244,7 +252,7 @@ readAxivity = function(filename, start = 0, end = 0, progressBar = FALSE, desire length = blockLength, struc = struc, parameters = parameters, - checksum_pass = checksum_pass, + checksum_pass = TRUE, blockID = blockID ) if (complete) { @@ -302,11 +310,27 @@ readAxivity = function(filename, start = 0, end = 0, progressBar = FALSE, desire accrange = bitwShiftR(16, (bitwShiftR(samplerate_dynrange, 6))) version = readBin(block[42], integer(), size = 1, signed = FALSE) #offset 41 - # Read the first data block without data - datas = readDataBlock(fid, complete = FALSE) - if (is.null(datas)) { - stop("Error reading the first data block.") + # Read the first data block without data. + # Skip any corrupt blocks, up to maxAllowedCorruptBlocks in number. + is_corrupt = TRUE + for (ii in 1:maxAllowedCorruptBlocks+1) { + datas = readDataBlock(fid, complete = FALSE) + + if (is.null(datas)) { + stop("Error reading the first data block.") + } + + if (datas$checksum_pass) { + is_corrupt = FALSE + break + } + + warning("Skipping corrupt block #", ii) } + if (is_corrupt) { + stop("Error reading file. The first ", maxAllowedCorruptBlocks+1, " blocks are corrupt") + } + if (frequency_header != datas$frequency) { warning("Inconsistent value of measurement frequency: there is ", frequency_header, " in header and ", datas$frequency, " in the first data block ") @@ -368,14 +392,47 @@ readAxivity = function(filename, start = 0, end = 0, progressBar = FALSE, desire struc = list(0,0L,0) if (end < numDBlocks) { # the end block isn't part of the data we'll read, but its start will be our ending timestamp seek(fid, headerBytes + blockBytes * end, origin = 'start') - endBlock = readDataBlock(fid, struc = struc) + + # Skip any corrupt blocks, up to maxAllowedCorruptBlocks in number. + is_corrupt = TRUE + for (ii in end : min(numDBlocks, end+maxAllowedCorruptBlocks+1)) { + endBlock = readDataBlock(fid, struc = struc) + + if (endBlock$checksum_pass) { + is_corrupt = FALSE + break + } + + warning("Skipping corrupt end block #", ii) + } + if (is_corrupt && ii == end+maxAllowedCorruptBlocks+1) { + stop("Error reading file. The last ", maxAllowedCorruptBlocks+1, " blocks are corrupt") + } endTimestamp = as.numeric(endBlock$start) - } else { + } + + if (end == numDBlocks) { # end == numDBlocks, meaning we'll be reading all the remaining blocks. # There is no block #numDBlocks, so we can't get the ending timestamp from the start of that block. - # Instead read the very last block of the file, and project what the ending timestamp should be. - seek(fid, headerBytes + blockBytes * (end-1), origin = 'start') - lastBlock = readDataBlock(fid, struc = struc) + # Instead read the very last block of the file (if the last block is corrupt, fing the last non-corrupt one), + # then project what the ending timestamp should be. + + # Skip any corrupt blocks, up to maxAllowedCorruptBlocks in number. + is_corrupt = TRUE + for (ii in (end-1) : (end-maxAllowedCorruptBlocks-1)) { + seek(fid, headerBytes + blockBytes * ii, origin = 'start') + lastBlock = readDataBlock(fid, struc = struc) + + if (lastBlock$checksum_pass) { + is_corrupt = FALSE + break + } + + warning("Skipping corrupt end block #", ii) + } + if (is_corrupt) { + stop("Error reading file. The last ", maxAllowedCorruptBlocks+1, " blocks are corrupt") + } # the end timestamp should fall right after the actual very last timestamp of the file endTimestamp = as.numeric(lastBlock$start) + blockLength * step # now pad it generously in case there are gaps in the last block @@ -386,7 +443,23 @@ readAxivity = function(filename, start = 0, end = 0, progressBar = FALSE, desire # Reinitiate file and skip header as well as the initial start-1 blocks seek(fid, headerBytes + blockBytes * start, origin = 'start') pos = 1 # position of the first element to complete in data - prevRaw = readDataBlock(fid, struc = struc) + + # Skip any corrupt blocks, up to maxAllowedCorruptBlocks in number. + is_corrupt = TRUE + for (ii in 1:maxAllowedCorruptBlocks+1) { + prevRaw = readDataBlock(fid, struc = struc) + + if (prevRaw$checksum_pass) { + is_corrupt = FALSE + break + } + + warning("Skipping corrupt block #", ii) + } + if (is_corrupt) { + stop("Error reading file. The first ", maxAllowedCorruptBlocks+1, " blocks are corrupt") + } + if (is.null(prevRaw)) { return(invisible(list(header = header, data = NULL))) } @@ -431,6 +504,22 @@ readAxivity = function(filename, start = 0, end = 0, progressBar = FALSE, desire } else { # read a new block raw = readDataBlock(fid, struc = struc, parameters = prevRaw$parameters) + if (!raw$checksum_pass) { + # If the checksum doesn't match, we can't trust any of this block's data, + # so we have to completely skip the block. + # Depending on the nature of the faulty block, the data for the time period it represented + # will probably get imputed later, once we encounter a block with a valid checksum. + QClog = rbind(QClog, data.frame(checksum_pass = FALSE, + blockID_current = prevRaw$blockID + 1, # we can end up with several blocks with this ID + blockID_next = prevRaw$blockID + 1, + start = 0, + end = 0, + blockLengthSeconds = 0, + frequency_blockheader = 0, + frequency_observed = 0, + imputed = FALSE)) + next + } if (is.null(raw)) { # this shouldn't happen