Skip to content

Commit

Permalink
add arguments to control timezone #68
Browse files Browse the repository at this point in the history
  • Loading branch information
vincentvanhees committed Sep 30, 2024
1 parent 32f4460 commit 2ceb92c
Show file tree
Hide file tree
Showing 18 changed files with 367 additions and 69 deletions.
45 changes: 18 additions & 27 deletions R/mergePHBfilePairs.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
mergePHBfilePairs = function(inputPath = ".", outputPath = ".") {
mergePHBfilePairs = function(inputPath = ".", outputPath = ".",
timeformat = "%m/%d/%Y %H:%M:%S",
desiredtz = "", configtz = NULL,
timeformatName = "timeformat") {
# merges Philips Health Band xlsx files per participant
# as there can be multiple files per participant.

fnames = dir(inputPath, recursive = FALSE, full.names = TRUE, pattern = "[.]xlsx")
fileOverview = data.frame(filename = fnames)

extractID = function(x) {
x = basename(x)
x = gsub(pattern = "sleep_wake", replacement = "sleepwake", x = tolower(x))
Expand All @@ -23,41 +24,31 @@ mergePHBfilePairs = function(inputPath = ".", outputPath = ".") {
next
}
# Data
deviceSN = NULL
if (length(file1) > 0) {
data1 = as.data.frame(readxl::read_excel(path = filesForThisPerson[file1],
col_types = "text", skip = 8),
row.names = FALSE)
header = as.data.frame(readxl::read_excel(path = filesForThisPerson[file1],
col_types = "text", n_max = 8,
.name_repair = "unique_quiet"),
row.names = FALSE)[, 1]
SNlocation = grep(pattern = "deviceSN", x = header)
if (length(SNlocation) > 0) {
deviceSN = unlist(strsplit(header[grep(pattern = "deviceSN", x = header)], " "))
deviceSN = deviceSN[length(deviceSN)]
} else {
deviceSN = NULL
}
colnames(data1)[grep(pattern = "counts", x = colnames(data1), ignore.case = TRUE)] = "counts"
colnames(data1)[grep(pattern = "offWrist", x = colnames(data1), ignore.case = TRUE)] = "nonwear"
data1 = readPHBCount(filename = filesForThisPerson[file1], timeformat = timeformat,
desiredtz = desiredtz, configtz = configtz,
timeformatName = timeformatName)
deviceSN = data1$deviceSN
}
# Sleep wake scores
if (length(file2) > 0) {
data2 = as.data.frame(readxl::read_excel(path = filesForThisPerson[file2], col_types = "text", skip = 8), row.names = FALSE)
colnames(data2)[grep(pattern = "sleepWake", x = colnames(data2), ignore.case = TRUE)] = "sleep"
data2 = readPHBCount(filename = filesForThisPerson[file2], timeformat = timeformat,
desiredtz = desiredtz, configtz = configtz,
timeformatName = timeformatName)
}
if (length(file1) > 0 && length(file2) > 0) {
data2 = data2[, which(colnames(data2) != "sleepEventMarker")]
data = merge(data1, data2, by = "timeStamp")
data2$data = data2$data[, which(colnames(data2$data) != "sleepEventMarker")]
data = merge(data1$data, data2$data, by = "timestamp")
} else {
if (length(file1) > 0) {
data = data1
data = data1$data
} else {
data = data2
data = data2$data
}
}
colnames(data)[grep(pattern = "timeStamp", x = colnames(data))] = "timestamp"
newName = gsub(pattern = "Sleep_Wake", replacement = "def", x = basename(filesForThisPerson[file2]))
colnames(data)[grep(pattern = "timestamp", x = colnames(data))] = "timestamp"
newName = gsub(pattern = "Sleep_Wake", replacement = "def", x = basename(filesForThisPerson[file2]), ignore.case = TRUE)
newName = paste0(unlist(strsplit(newName, "[.]")) , collapse = paste0("_", deviceSN, "."))
newName = gsub(pattern = "xlsx", replacement = "csv", x = newName)
outputfile = paste0(outputPath, "/", newName)
Expand Down
17 changes: 14 additions & 3 deletions R/readActiGraphCount.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
readActiGraphCount = function(filename = NULL,
timeformat = "%m/%d/%Y %H:%M:%S", tz = "", timeformatName = "timeformat") {
timeformat = "%m/%d/%Y %H:%M:%S",
desiredtz = "",
configtz = NULL,
timeformatName = "timeformat") {
if (length(configtz) == 0) configtz = desiredtz
# In GGIR set timeformatName to extEpochData_timeformat
deviceSerialNumber = NULL

Expand Down Expand Up @@ -138,7 +142,7 @@ readActiGraphCount = function(filename = NULL,
starttime = fileHeader$value[grep(pattern = "starttime", x = fileHeader$item)]
startdate = fileHeader$value[grep(pattern = "startdate", x = fileHeader$item)]
timestamp = paste0(startdate, " ", starttime)
timestamp_POSIX = as.POSIXlt(timestamp, tz = tz,
timestamp_POSIX = as.POSIXct(timestamp, tz = configtz,
format = timeformat)
} else if (headerAvailable == FALSE) {
# Extract date/timestamp from first values of column
Expand All @@ -152,7 +156,7 @@ readActiGraphCount = function(filename = NULL,
timecol = grep("time|epoch", colnames(tmp), ignore.case = TRUE)
timestamp = paste0(tmp[, datecol], " ", tmp[1, timecol])
format = timeformat
timestamp_POSIX = as.POSIXlt(timestamp, tz = tz, format = format)
timestamp_POSIX = as.POSIXct(timestamp, tz = configtz, format = format)
if (all(is.na(timestamp_POSIX))) {
stop(paste0("\nTimestamps are not available in the file, neither has",
" it a header to extract the timestamps from. Therefore, the file",
Expand All @@ -169,6 +173,13 @@ readActiGraphCount = function(filename = NULL,
timeformat = timeformat,
timeformatName = timeformatName)


# Establish starttime in the correct timezone
if (configtz != desiredtz) {
timestamp_POSIX = as.POSIXct(x = as.numeric(timestamp_POSIX), tz = desiredtz,
origin = "1970-01-01")
}

invisible(list(data = D, epochSize = epSizeShort,
startTime = timestamp_POSIX,
deviceSerialNumber = deviceSerialNumber))
Expand Down
15 changes: 11 additions & 4 deletions R/readActicalCount.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
readActicalCount = function(filename = NULL,
timeformat = "%m/%d/%Y %H:%M:%S", tz = "",
timeformat = "%m/%d/%Y %H:%M:%S",
desiredtz = "",
configtz = NULL,
timeformatName = "timeformat") {
# In GGIR set timeformatName to extEpochData_timeformat

if (length(configtz) == 0) configtz = desiredtz
# ! Assumptions that timeseries start before line 1000
startindex = 300
quote = detectQuote(fn = filename, index = startindex)
Expand Down Expand Up @@ -41,7 +43,7 @@ readActicalCount = function(filename = NULL,
D = D[, grep(pattern = "time|date|counts|steps", x = colnames(D))]
timestamp_POSIX = as.POSIXct(x = paste(D$date[1:4], D$time[1:4], sep = " "),
format = timeformat,
tz = tz)
tz = configtz)
checkTimeFormat(timestamp_POSIX[1],
rawValue = paste(D$date[1], D$time[1], sep = " "),
timeformat = timeformat,
Expand All @@ -51,7 +53,12 @@ readActicalCount = function(filename = NULL,
D = D[, -which(colnames(D) %in% c("date", "time"))]
D = as.matrix(D, drop = FALSE)
if (quote == "") D = apply(D, 2, as.numeric)


# Establish starttime in the correct timezone
if (configtz != desiredtz) {
timestamp_POSIX = as.POSIXct(x = as.numeric(timestamp_POSIX), tz = desiredtz,
origin = "1970-01-01")
}
invisible(list(data = D, epochSize = epSizeShort,
startTime = timestamp_POSIX))
}
15 changes: 12 additions & 3 deletions R/readActiwatchCount.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
readActiwatchCount = function(filename = NULL,
timeformat = "%m/%d/%Y %H:%M:%S", tz = "",
timeformat = "%m/%d/%Y %H:%M:%S",
desiredtz = "",
configtz = NULL,
timeformatName = "timeformat") {
# In GGIR set timeformatName to extEpochData_timeformat

if (length(configtz) == 0) configtz = desiredtz
fileExtension = tolower(getExtension(filename))

if (fileExtension == "csv") {
Expand Down Expand Up @@ -36,7 +39,7 @@ readActiwatchCount = function(filename = NULL,
D = D[, grep(pattern = "time|date|counts|sleep|nonwear", x = colnames(D))]
timestamp_POSIX = as.POSIXct(x = paste(D$date[1:4], D$time[1:4], sep = " "),
format = timeformat,
tz = tz)
tz = configtz)
checkTimeFormat(timestamp_POSIX[1],
rawValue = paste(D$date[1], D$time[1], sep = " "),
timeformat = timeformat,
Expand Down Expand Up @@ -78,7 +81,7 @@ readActiwatchCount = function(filename = NULL,
# Get starttime
timestampFormat = paste0(unlist(strsplit(timeformat, " "))[1], " %H:%M")
timestamp_POSIX = as.POSIXct(x = paste(header[2], header[3], sep = " "),
format = timestampFormat, tz = tz)
format = timestampFormat, tz = configtz)
checkTimeFormat(timestamp_POSIX,
rawValue = header[2],
timeformat = timeformat,
Expand All @@ -87,6 +90,12 @@ readActiwatchCount = function(filename = NULL,
}
D = as.matrix(D, drop = FALSE)
if (quote == "") D = apply(D, 2, as.numeric)

# Establish starttime in the correct timezone
if (configtz != desiredtz) {
timestamp_POSIX = as.POSIXct(x = as.numeric(timestamp_POSIX), tz = desiredtz,
origin = "1970-01-01")
}
invisible(list(data = D, epochSize = epSizeShort,
startTime = timestamp_POSIX))
}
16 changes: 11 additions & 5 deletions R/readFitbit.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
readFitbit = function(filename = NULL) {
readFitbit = function(filename = NULL, desiredtz = "",
configtz = NULL) {
# Assumptions made:
# - sleep is sampled at 30 second resolution
# - steps are sampled at 60 second resolution
Expand All @@ -16,7 +17,7 @@ readFitbit = function(filename = NULL) {

#-------------------------------------------------
# Main code

if (length(configtz) == 0) configtz = desiredtz
D = jsonlite::read_json(path = filename,
simplifyVector = FALSE,
flatten = FALSE)
Expand All @@ -30,15 +31,15 @@ readFitbit = function(filename = NULL) {
for (i in 1:length(D)) {
tmp = D[[i]][15]$levels
data = as.data.frame(data.table::rbindlist(tmp$data, fill = TRUE))
data$dateTime = as.POSIXct(data$dateTime, format = "%Y-%m-%dT%H:%M:%S")
data$dateTime = as.POSIXct(data$dateTime, format = "%Y-%m-%dT%H:%M:%S", tz = configtz)
if (i == 1) {
all_data = data
} else {
all_data = rbind(all_data, data)
}
if ("shortData" %in% names(tmp)) {
shortData = data.table::rbindlist(tmp$shortData, fill = TRUE)
shortData$dateTime = as.POSIXct(shortData$dateTime, format = "%Y-%m-%dT%H:%M:%S")
shortData$dateTime = as.POSIXct(shortData$dateTime, format = "%Y-%m-%dT%H:%M:%S", tz = configtz)
if (i == 1) {
all_shortData = shortData
} else {
Expand Down Expand Up @@ -74,12 +75,17 @@ readFitbit = function(filename = NULL) {
} else if (dataType == "steps" || dataType == "calories") {
epochSize = 60
data = as.data.frame(data.table::rbindlist(D, fill = TRUE))
data$dateTime = as.POSIXct(data$dateTime, format = "%m/%d/%y %H:%M:%S")
data$dateTime = as.POSIXct(data$dateTime, format = "%m/%d/%y %H:%M:%S", tz = configtz)
D = handleTimeGaps(data, epochSize = 60)
D$value = as.numeric(D$value)
colnames(D)[2] = dataType
} else {
stop("File type not recognised")
}
# Establish starttime in the correct timezone
if (configtz != desiredtz) {
D$dateTime = as.POSIXct(x = as.numeric(D$dateTime), tz = desiredtz,
origin = "1970-01-01")
}
return(D)
}
43 changes: 43 additions & 0 deletions R/readPHBCount.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
readPHBCount = function(filename = NULL, timeformat = "%m/%d/%Y %H:%M:%S",
desiredtz = "", configtz = NULL,
timeformatName = "timeformat") {
if (length(configtz) == 0) configtz = desiredtz
deviceSN = NULL
if (length(grep(pattern = "datalist", x = filename, ignore.case = TRUE)) > 0) {
data = as.data.frame(readxl::read_excel(path = filename,
col_types = "text", skip = 8),
row.names = FALSE)
header = as.data.frame(readxl::read_excel(path = filename,
col_types = "text", n_max = 8,
.name_repair = "unique_quiet"),
row.names = FALSE)[, 1]
SNlocation = grep(pattern = "deviceSN", x = header)
if (length(SNlocation) > 0) {
deviceSN = unlist(strsplit(header[grep(pattern = "deviceSN", x = header)], " "))
deviceSN = deviceSN[length(deviceSN)]
}
colnames(data)[grep(pattern = "counts", x = colnames(data), ignore.case = TRUE)] = "counts"
colnames(data)[grep(pattern = "offWrist", x = colnames(data), ignore.case = TRUE)] = "nonwear"
data$counts = as.numeric(data$counts)
data$nonwear = as.numeric(data$counts)
} else {
data = as.data.frame(readxl::read_excel(path = filename, col_types = "text", skip = 8), row.names = FALSE)
colnames(data)[grep(pattern = "sleepWake", x = colnames(data), ignore.case = TRUE)] = "sleep"
data$sleep = as.numeric(data$sleep)
}
colnames(data)[grep(pattern = "timeStamp", x = colnames(data))] = "timestamp"
rawValue = data$timestamp[1]
data$timestamp = as.POSIXct(data$timestamp, format = timeformat, tz = configtz,
origin = "1970-01-01")

checkTimeFormat(data$timestamp[1],
rawValue = rawValue,
timeformat = timeformat,
timeformatName = timeformatName)
# Establish starttime in the correct timezone
if (configtz != desiredtz) {
data$timestamp = as.POSIXct(x = as.numeric(data$timestamp), tz = desiredtz,
origin = "1970-01-01")
}
invisible(list(data = data, deviceSN = deviceSN))
}
23 changes: 22 additions & 1 deletion man/mergePHBfilePairs.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@
xlsx file to csv.
}
\usage{
mergePHBfilePairs(inputPath = ".", outputPath = ".")
mergePHBfilePairs(inputPath = ".", outputPath = ".",
timeformat = "\%m/\%d/\%Y \%H:\%M:\%S",
desiredtz = "", configtz = NULL,
timeformatName = "timeformat")
}
\arguments{
\item{inputPath}{
Expand All @@ -20,6 +23,24 @@
\item{outputPath}{
Character, path to store the merge output
}
\item{timeformat}{
Character, timestemp format.
}
\item{desiredtz}{
Character, timezone name where the accelerometer was worn. Timezone names are
expected to be the timezone database names, e.g. Europe/London. See also:
https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
}
\item{configtz}{
Character, timezone name where the accelerometer was configured. Leave NULL
if equal to desiredtz. Timezones name are expected to be the timezone database
names, e.g. Europe/London. See also:
https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
}
\item{timeformatName}{
Character, name of timeformat variable to print in error message when
timeformat is incorrect, of use to GGIR where argument names can differ.
}
}
\value{
Function does not output values. Instead it stores the merged data as file in the location
Expand Down
17 changes: 13 additions & 4 deletions man/readActiGraphCount.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
Reads ActiGraph Count data file. Currently a variety of csv format are facilitated.
}
\usage{
readActiGraphCount(filename = NULL,
timeformat = "\%m/\%d/\%Y \%H:\%M:\%S", tz = "",
readActiGraphCount(filename = NULL,
timeformat = "\%m/\%d/\%Y \%H:\%M:\%S",
desiredtz = "", configtz = NULL,
timeformatName = "timeformat")
}
\arguments{
Expand All @@ -18,8 +19,16 @@
\item{timeformat}{
Character, timestemp format.
}
\item{tz}{
Character, timezone name from the timezone database names.
\item{desiredtz}{
Character, timezone name where the accelerometer was worn. Timezone names are
expected to be the timezone database names, e.g. Europe/London. See also:
https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
}
\item{configtz}{
Character, timezone name where the accelerometer was configured. Leave NULL
if equal to desiredtz. Timezones name are expected to be the timezone database
names, e.g. Europe/London. See also:
https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
}
\item{timeformatName}{
Character, name of timeformat variable to print in error message when
Expand Down
15 changes: 12 additions & 3 deletions man/readActicalCount.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
}
\usage{
readActicalCount(filename = NULL,
timeformat = "\%m/\%d/\%Y \%H:\%M:\%S", tz = "",
timeformat = "\%m/\%d/\%Y \%H:\%M:\%S",
desiredtz = "", configtz = NULL,
timeformatName = "timeformat")
}
\arguments{
Expand All @@ -18,8 +19,16 @@
\item{timeformat}{
Character, timestemp format.
}
\item{tz}{
Character, timezone name from the timezone database names.
\item{desiredtz}{
Character, timezone name where the accelerometer was worn. Timezone names are
expected to be the timezone database names, e.g. Europe/London. See also:
https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
}
\item{configtz}{
Character, timezone name where the accelerometer was configured. Leave NULL
if equal to desiredtz. Timezones name are expected to be the timezone database
names, e.g. Europe/London. See also:
https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
}
\item{timeformatName}{
Character, name of timeformat variable to print in error message when
Expand Down
Loading

0 comments on commit 2ceb92c

Please sign in to comment.