Skip to content

Commit

Permalink
handle failed checksum
Browse files Browse the repository at this point in the history
  • Loading branch information
l-k- committed Dec 1, 2023
1 parent 42d89d2 commit 998c6bd
Showing 1 changed file with 117 additions and 27 deletions.
144 changes: 117 additions & 27 deletions R/readAxivity.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ readAxivity = function(filename, start = 0, end = 0, progressBar = FALSE, desire
if (length(configtz) == 0) configtz = desiredtz
blockBytes = 512
headerBytes = 1024
maxAllowedCorruptBlocks = 20 # max number consecutive blocks with a failed checksum that we'll tolerate

# Credits: The original version of this code developed outside GitHub was
# contributed by Dr. Evgeny Mirkes (Leicester University, UK)
#========================================================================
Expand Down Expand Up @@ -75,6 +77,28 @@ readAxivity = function(filename, start = 0, end = 0, progressBar = FALSE, desire
return(NULL)
}

# sampling rate in one of file format U8 at offset 24
samplerate_dynrange = readBin(block[25], integer(), size = 1, signed = FALSE)

if (samplerate_dynrange != 0) { # Very old files that have zero at offset 24 don't have a checksum
checksum = sum(readBin(block, n = 256,
integer(),
size = 2,
signed = FALSE,
endian = "little"))
checksum = checksum %% 65536 # 65536 = 2^16; the checksum is calculated as a 16-bit integer
if (checksum != 0) {
# Checksum doesn't match. This means some bits in this block got corrupted.
# We don't know which, so we can't trust this block. We skip it, and impute it later.
rawdata_list = list(
struc = struc,
parameters = parameters,
checksum_pass = FALSE
)
return(invisible(rawdata_list))
}
}

idstr = readChar(block, 2, useBytes = TRUE)
if (idstr != "AX") {
stop("Packet header is incorrect. First two characters must be AX.")
Expand Down Expand Up @@ -114,22 +138,6 @@ readAxivity = function(filename, start = 0, end = 0, progressBar = FALSE, desire
} else {
battery = 0
}
# sampling rate in one of file format U8 at offset 24
samplerate_dynrange = readBin(block[25], integer(), size = 1, signed = FALSE)

checksum_pass = TRUE
if (samplerate_dynrange != 0) { # Very old files that have zero at offset 24 don't have a checksum
# Perform checksum
checksum = sum(readBin(block, n = 256,
integer(),
size = 2,
signed = FALSE,
endian = "little"))
checksum = checksum %% 65536 # equals 2^16 the checksum is calculated on a 16bit integer
if (checksum != 0) {
checksum_pass = FALSE
}
}

# offset 25, per documentation:
# "top nibble: number of axes, 3=Axyz, 6=Gxyz/Axyz, 9=Gxyz/Axyz/Mxyz;
Expand Down Expand Up @@ -244,7 +252,7 @@ readAxivity = function(filename, start = 0, end = 0, progressBar = FALSE, desire
length = blockLength,
struc = struc,
parameters = parameters,
checksum_pass = checksum_pass,
checksum_pass = TRUE,
blockID = blockID
)
if (complete) {
Expand Down Expand Up @@ -302,11 +310,27 @@ readAxivity = function(filename, start = 0, end = 0, progressBar = FALSE, desire
accrange = bitwShiftR(16, (bitwShiftR(samplerate_dynrange, 6)))
version = readBin(block[42], integer(), size = 1, signed = FALSE) #offset 41

# Read the first data block without data
datas = readDataBlock(fid, complete = FALSE)
if (is.null(datas)) {
stop("Error reading the first data block.")
# Read the first data block without data.
# Skip any corrupt blocks, up to maxAllowedCorruptBlocks in number.
is_corrupt = TRUE
for (ii in 1:maxAllowedCorruptBlocks+1) {
datas = readDataBlock(fid, complete = FALSE)

if (is.null(datas)) {
stop("Error reading the first data block.")
}

if (datas$checksum_pass) {
is_corrupt = FALSE
break
}

warning("Skipping corrupt block #", ii)
}
if (is_corrupt) {
stop("Error reading file. The first ", maxAllowedCorruptBlocks+1, " blocks are corrupt")
}

if (frequency_header != datas$frequency) {
warning("Inconsistent value of measurement frequency: there is ",
frequency_header, " in header and ", datas$frequency, " in the first data block ")
Expand Down Expand Up @@ -368,14 +392,47 @@ readAxivity = function(filename, start = 0, end = 0, progressBar = FALSE, desire
struc = list(0,0L,0)
if (end < numDBlocks) { # the end block isn't part of the data we'll read, but its start will be our ending timestamp
seek(fid, headerBytes + blockBytes * end, origin = 'start')
endBlock = readDataBlock(fid, struc = struc)

# Skip any corrupt blocks, up to maxAllowedCorruptBlocks in number.
is_corrupt = TRUE
for (ii in end : min(numDBlocks, end+maxAllowedCorruptBlocks+1)) {
endBlock = readDataBlock(fid, struc = struc)

if (endBlock$checksum_pass) {
is_corrupt = FALSE
break
}

warning("Skipping corrupt end block #", ii)
}
if (is_corrupt && ii == end+maxAllowedCorruptBlocks+1) {
stop("Error reading file. The last ", maxAllowedCorruptBlocks+1, " blocks are corrupt")
}
endTimestamp = as.numeric(endBlock$start)
} else {
}

if (end == numDBlocks) {
# end == numDBlocks, meaning we'll be reading all the remaining blocks.
# There is no block #numDBlocks, so we can't get the ending timestamp from the start of that block.
# Instead read the very last block of the file, and project what the ending timestamp should be.
seek(fid, headerBytes + blockBytes * (end-1), origin = 'start')
lastBlock = readDataBlock(fid, struc = struc)
# Instead read the very last block of the file (if the last block is corrupt, fing the last non-corrupt one),
# then project what the ending timestamp should be.

# Skip any corrupt blocks, up to maxAllowedCorruptBlocks in number.
is_corrupt = TRUE
for (ii in (end-1) : (end-maxAllowedCorruptBlocks-1)) {
seek(fid, headerBytes + blockBytes * ii, origin = 'start')
lastBlock = readDataBlock(fid, struc = struc)

if (lastBlock$checksum_pass) {
is_corrupt = FALSE
break
}

warning("Skipping corrupt end block #", ii)
}
if (is_corrupt) {
stop("Error reading file. The last ", maxAllowedCorruptBlocks+1, " blocks are corrupt")
}
# the end timestamp should fall right after the actual very last timestamp of the file
endTimestamp = as.numeric(lastBlock$start) + blockLength * step
# now pad it generously in case there are gaps in the last block
Expand All @@ -386,7 +443,23 @@ readAxivity = function(filename, start = 0, end = 0, progressBar = FALSE, desire
# Reinitiate file and skip header as well as the initial start-1 blocks
seek(fid, headerBytes + blockBytes * start, origin = 'start')
pos = 1 # position of the first element to complete in data
prevRaw = readDataBlock(fid, struc = struc)

# Skip any corrupt blocks, up to maxAllowedCorruptBlocks in number.
is_corrupt = TRUE
for (ii in 1:maxAllowedCorruptBlocks+1) {
prevRaw = readDataBlock(fid, struc = struc)

if (prevRaw$checksum_pass) {
is_corrupt = FALSE
break
}

warning("Skipping corrupt block #", ii)
}
if (is_corrupt) {
stop("Error reading file. The first ", maxAllowedCorruptBlocks+1, " blocks are corrupt")
}

if (is.null(prevRaw)) {
return(invisible(list(header = header, data = NULL)))
}
Expand Down Expand Up @@ -430,7 +503,24 @@ readAxivity = function(filename, start = 0, end = 0, progressBar = FALSE, desire
rawTime[2:(rawLast+1)] = seq(prevStart, newTimes, length.out = rawLast) # rawTime[rawLast+1] will be ignored by resampling alg
}
else { # read a new block
browser()
raw = readDataBlock(fid, struc = struc, parameters = prevRaw$parameters)
if (!raw$checksum_pass) {
# If the checksum doesn't match, we can't trust any of this block's data,
# so we have to completely skip the block.
# Depending on the nature of the faulty block, the data for the time period it represented
# will probably get imputed later, once we encounter a block with a valid checksum.
QClog = rbind(QClog, data.frame(checksum_pass = FALSE,
blockID_current = prevRaw$blockID + 1, # we can end up with several blocks with this ID
blockID_next = prevRaw$blockID + 1,
start = 0,
end = 0,
blockLengthSeconds = 0,
frequency_blockheader = 0,
frequency_observed = 0,
imputed = FALSE))
next
}

if (is.null(raw)) {
# this shouldn't happen
Expand Down

0 comments on commit 998c6bd

Please sign in to comment.