add arguments to control timezone #68

wadpac · Sep 30, 2024 · 2ceb92c · 2ceb92c
1 parent 32f4460
commit 2ceb92c
Show file tree

Hide file tree

Showing 18 changed files with 367 additions and 69 deletions.
diff --git a/R/mergePHBfilePairs.R b/R/mergePHBfilePairs.R
@@ -1,10 +1,11 @@
-mergePHBfilePairs = function(inputPath = ".", outputPath = ".") {
+mergePHBfilePairs = function(inputPath = ".", outputPath = ".",
+                             timeformat = "%m/%d/%Y %H:%M:%S",
+                             desiredtz = "", configtz = NULL,
+                             timeformatName = "timeformat") {
   # merges Philips Health Band xlsx files per participant
   # as there can be multiple files per participant.
-
   fnames = dir(inputPath, recursive = FALSE, full.names = TRUE, pattern = "[.]xlsx")
   fileOverview = data.frame(filename = fnames)
-
   extractID = function(x) {
     x = basename(x)
     x = gsub(pattern = "sleep_wake", replacement = "sleepwake", x = tolower(x))
@@ -23,41 +24,31 @@ mergePHBfilePairs = function(inputPath = ".", outputPath = ".") {
       next
     }
     # Data
+    deviceSN = NULL
     if (length(file1) > 0) {
-      data1 = as.data.frame(readxl::read_excel(path = filesForThisPerson[file1], 
-                                               col_types = "text", skip = 8),
-                            row.names = FALSE)
-      header = as.data.frame(readxl::read_excel(path = filesForThisPerson[file1], 
-                                                col_types = "text", n_max = 8,
-                                                .name_repair = "unique_quiet"),
-                             row.names = FALSE)[, 1]
-      SNlocation = grep(pattern = "deviceSN", x = header)
-      if (length(SNlocation) > 0) {
-        deviceSN = unlist(strsplit(header[grep(pattern = "deviceSN", x = header)], " "))
-        deviceSN = deviceSN[length(deviceSN)]
-      } else {
-        deviceSN = NULL
-      }
-      colnames(data1)[grep(pattern = "counts", x = colnames(data1), ignore.case = TRUE)] = "counts"
-      colnames(data1)[grep(pattern = "offWrist", x = colnames(data1), ignore.case = TRUE)] = "nonwear"
+      data1 = readPHBCount(filename = filesForThisPerson[file1], timeformat = timeformat,
+                   desiredtz = desiredtz, configtz = configtz,
+                   timeformatName = timeformatName)
+      deviceSN = data1$deviceSN
     }
     # Sleep wake scores
     if (length(file2) > 0) {
-      data2 = as.data.frame(readxl::read_excel(path = filesForThisPerson[file2], col_types = "text", skip = 8), row.names = FALSE)
-      colnames(data2)[grep(pattern = "sleepWake", x = colnames(data2), ignore.case = TRUE)] = "sleep"
+      data2 = readPHBCount(filename = filesForThisPerson[file2], timeformat = timeformat,
+                           desiredtz = desiredtz, configtz = configtz,
+                           timeformatName = timeformatName)
     }
     if (length(file1) > 0 && length(file2) > 0) {
-      data2 = data2[, which(colnames(data2) != "sleepEventMarker")]
-      data = merge(data1, data2, by = "timeStamp")
+      data2$data = data2$data[, which(colnames(data2$data) != "sleepEventMarker")]
+      data = merge(data1$data, data2$data, by = "timestamp")
     } else {
       if (length(file1) > 0) {
-        data = data1
+        data = data1$data
       } else {
-        data = data2
+        data = data2$data
       }
     }
-    colnames(data)[grep(pattern = "timeStamp", x = colnames(data))] = "timestamp"
-    newName = gsub(pattern = "Sleep_Wake", replacement = "def", x =  basename(filesForThisPerson[file2]))
+    colnames(data)[grep(pattern = "timestamp", x = colnames(data))] = "timestamp"
+    newName = gsub(pattern = "Sleep_Wake", replacement = "def", x =  basename(filesForThisPerson[file2]), ignore.case = TRUE)
     newName = paste0(unlist(strsplit(newName, "[.]")) , collapse = paste0("_", deviceSN, "."))
     newName = gsub(pattern = "xlsx", replacement = "csv", x = newName)
     outputfile = paste0(outputPath, "/", newName)

diff --git a/R/readActiGraphCount.R b/R/readActiGraphCount.R
@@ -1,5 +1,9 @@
 readActiGraphCount = function(filename = NULL,
-                            timeformat = "%m/%d/%Y %H:%M:%S", tz = "", timeformatName = "timeformat") {
+                            timeformat = "%m/%d/%Y %H:%M:%S",
+                            desiredtz = "",
+                            configtz = NULL,
+                            timeformatName = "timeformat") {
+  if (length(configtz) == 0) configtz = desiredtz
   # In GGIR set timeformatName to extEpochData_timeformat
   deviceSerialNumber = NULL
 
@@ -138,7 +142,7 @@ readActiGraphCount = function(filename = NULL,
     starttime = fileHeader$value[grep(pattern = "starttime", x = fileHeader$item)]
     startdate = fileHeader$value[grep(pattern = "startdate", x = fileHeader$item)]
     timestamp = paste0(startdate, " ", starttime)
-    timestamp_POSIX = as.POSIXlt(timestamp, tz = tz,
+    timestamp_POSIX = as.POSIXct(timestamp, tz = configtz,
                                  format = timeformat)
   } else if (headerAvailable == FALSE) {
     # Extract date/timestamp from first values of column
@@ -152,7 +156,7 @@ readActiGraphCount = function(filename = NULL,
       timecol = grep("time|epoch", colnames(tmp), ignore.case = TRUE)
       timestamp = paste0(tmp[, datecol], " ", tmp[1, timecol])
       format = timeformat
-      timestamp_POSIX = as.POSIXlt(timestamp, tz = tz, format = format)
+      timestamp_POSIX = as.POSIXct(timestamp, tz = configtz, format = format)
       if (all(is.na(timestamp_POSIX))) {
         stop(paste0("\nTimestamps are not available in the file, neither has",
                     " it a header to extract the timestamps from. Therefore, the file",
@@ -169,6 +173,13 @@ readActiGraphCount = function(filename = NULL,
                   timeformat = timeformat,
                   timeformatName = timeformatName)
 
+
+  # Establish starttime in the correct timezone
+  if (configtz != desiredtz) {
+    timestamp_POSIX = as.POSIXct(x = as.numeric(timestamp_POSIX), tz = desiredtz,
+                                 origin = "1970-01-01")
+  }
+
   invisible(list(data = D, epochSize = epSizeShort,
                  startTime = timestamp_POSIX,
                  deviceSerialNumber = deviceSerialNumber))

diff --git a/R/readActicalCount.R b/R/readActicalCount.R
@@ -1,8 +1,10 @@
 readActicalCount = function(filename = NULL,
-                            timeformat = "%m/%d/%Y %H:%M:%S", tz = "",
+                            timeformat = "%m/%d/%Y %H:%M:%S",
+                            desiredtz = "",
+                            configtz = NULL,
                             timeformatName = "timeformat") {
   # In GGIR set timeformatName to extEpochData_timeformat
-
+  if (length(configtz) == 0) configtz = desiredtz
   # ! Assumptions that timeseries start before line 1000
   startindex = 300
   quote = detectQuote(fn = filename, index = startindex)
@@ -41,7 +43,7 @@ readActicalCount = function(filename = NULL,
   D = D[, grep(pattern = "time|date|counts|steps", x = colnames(D))]
   timestamp_POSIX = as.POSIXct(x = paste(D$date[1:4], D$time[1:4], sep = " "),
                                format = timeformat,
-                               tz = tz)
+                               tz = configtz)
   checkTimeFormat(timestamp_POSIX[1],
                   rawValue = paste(D$date[1], D$time[1], sep = " "),
                   timeformat = timeformat,
@@ -51,7 +53,12 @@ readActicalCount = function(filename = NULL,
   D = D[, -which(colnames(D) %in% c("date", "time"))]
   D = as.matrix(D, drop = FALSE)
   if (quote == "") D = apply(D, 2, as.numeric)
-
+
+  # Establish starttime in the correct timezone
+  if (configtz != desiredtz) {
+    timestamp_POSIX = as.POSIXct(x = as.numeric(timestamp_POSIX), tz = desiredtz,
+                                 origin = "1970-01-01")
+  }
   invisible(list(data = D, epochSize = epSizeShort,
                  startTime = timestamp_POSIX))
 }
diff --git a/R/readActiwatchCount.R b/R/readActiwatchCount.R
@@ -1,8 +1,11 @@
 readActiwatchCount = function(filename = NULL,
-                            timeformat = "%m/%d/%Y %H:%M:%S", tz = "",
+                            timeformat = "%m/%d/%Y %H:%M:%S",
+                            desiredtz = "",
+                            configtz = NULL,
                             timeformatName = "timeformat") {
   # In GGIR set timeformatName to extEpochData_timeformat
 
+  if (length(configtz) == 0) configtz = desiredtz
   fileExtension = tolower(getExtension(filename))
 
   if (fileExtension == "csv") {
@@ -36,7 +39,7 @@ readActiwatchCount = function(filename = NULL,
     D = D[, grep(pattern = "time|date|counts|sleep|nonwear", x = colnames(D))]
     timestamp_POSIX = as.POSIXct(x = paste(D$date[1:4], D$time[1:4], sep = " "),
                                  format = timeformat,
-                                 tz = tz)
+                                 tz = configtz)
     checkTimeFormat(timestamp_POSIX[1], 
                     rawValue = paste(D$date[1], D$time[1], sep = " "),
                     timeformat = timeformat,
@@ -78,7 +81,7 @@ readActiwatchCount = function(filename = NULL,
     # Get starttime 
     timestampFormat = paste0(unlist(strsplit(timeformat, " "))[1], " %H:%M")
     timestamp_POSIX = as.POSIXct(x = paste(header[2], header[3], sep = " "),
-                                 format = timestampFormat, tz = tz)
+                                 format = timestampFormat, tz = configtz)
     checkTimeFormat(timestamp_POSIX, 
                     rawValue = header[2],
                     timeformat = timeformat,
@@ -87,6 +90,12 @@ readActiwatchCount = function(filename = NULL,
   }
   D = as.matrix(D, drop = FALSE)
   if (quote == "") D = apply(D, 2, as.numeric)
+
+  # Establish starttime in the correct timezone
+  if (configtz != desiredtz) {
+    timestamp_POSIX = as.POSIXct(x = as.numeric(timestamp_POSIX), tz = desiredtz,
+                                 origin = "1970-01-01")
+  }
   invisible(list(data = D, epochSize = epSizeShort,
                  startTime = timestamp_POSIX))
 }
diff --git a/R/readFitbit.R b/R/readFitbit.R
@@ -1,4 +1,5 @@
-readFitbit = function(filename = NULL) {
+readFitbit = function(filename = NULL, desiredtz = "",
+                      configtz = NULL) {
   # Assumptions made:
   # - sleep is sampled at 30 second resolution
   # - steps are sampled at 60 second resolution
@@ -16,7 +17,7 @@ readFitbit = function(filename = NULL) {
 
   #-------------------------------------------------
   # Main code
-
+  if (length(configtz) == 0) configtz = desiredtz
   D = jsonlite::read_json(path = filename,
                 simplifyVector = FALSE,
                 flatten = FALSE)
@@ -30,15 +31,15 @@ readFitbit = function(filename = NULL) {
     for (i in 1:length(D)) {
       tmp = D[[i]][15]$levels
       data = as.data.frame(data.table::rbindlist(tmp$data, fill = TRUE))
-      data$dateTime = as.POSIXct(data$dateTime, format = "%Y-%m-%dT%H:%M:%S")
+      data$dateTime = as.POSIXct(data$dateTime, format = "%Y-%m-%dT%H:%M:%S", tz = configtz)
       if (i == 1) {
         all_data = data
       } else {
         all_data = rbind(all_data, data)
       }
       if ("shortData" %in% names(tmp)) {
         shortData = data.table::rbindlist(tmp$shortData, fill = TRUE)
-        shortData$dateTime = as.POSIXct(shortData$dateTime, format = "%Y-%m-%dT%H:%M:%S")
+        shortData$dateTime = as.POSIXct(shortData$dateTime, format = "%Y-%m-%dT%H:%M:%S", tz = configtz)
         if (i == 1) {
           all_shortData = shortData
         } else {
@@ -74,12 +75,17 @@ readFitbit = function(filename = NULL) {
   } else if (dataType == "steps" || dataType == "calories") {
     epochSize = 60
     data = as.data.frame(data.table::rbindlist(D, fill = TRUE))
-    data$dateTime = as.POSIXct(data$dateTime, format = "%m/%d/%y %H:%M:%S")
+    data$dateTime = as.POSIXct(data$dateTime, format = "%m/%d/%y %H:%M:%S", tz = configtz)
     D = handleTimeGaps(data, epochSize = 60)
     D$value = as.numeric(D$value)
     colnames(D)[2] = dataType
   } else {
     stop("File type not recognised")
   }
+  # Establish starttime in the correct timezone
+  if (configtz != desiredtz) {
+    D$dateTime = as.POSIXct(x = as.numeric(D$dateTime), tz = desiredtz,
+                               origin = "1970-01-01")
+  }
   return(D)
 }
diff --git a/R/readPHBCount.R b/R/readPHBCount.R
@@ -0,0 +1,43 @@
+readPHBCount = function(filename = NULL, timeformat = "%m/%d/%Y %H:%M:%S",
+                        desiredtz = "", configtz = NULL,
+                        timeformatName = "timeformat") {
+  if (length(configtz) == 0) configtz = desiredtz
+  deviceSN = NULL
+  if (length(grep(pattern = "datalist", x = filename, ignore.case = TRUE)) > 0) {
+    data = as.data.frame(readxl::read_excel(path = filename, 
+                                             col_types = "text", skip = 8),
+                          row.names = FALSE)
+    header = as.data.frame(readxl::read_excel(path = filename, 
+                                              col_types = "text", n_max = 8,
+                                              .name_repair = "unique_quiet"),
+                           row.names = FALSE)[, 1]
+    SNlocation = grep(pattern = "deviceSN", x = header)
+    if (length(SNlocation) > 0) {
+      deviceSN = unlist(strsplit(header[grep(pattern = "deviceSN", x = header)], " "))
+      deviceSN = deviceSN[length(deviceSN)]
+   }
+    colnames(data)[grep(pattern = "counts", x = colnames(data), ignore.case = TRUE)] = "counts"
+    colnames(data)[grep(pattern = "offWrist", x = colnames(data), ignore.case = TRUE)] = "nonwear"
+    data$counts = as.numeric(data$counts)
+    data$nonwear = as.numeric(data$counts)
+  } else {
+    data = as.data.frame(readxl::read_excel(path = filename, col_types = "text", skip = 8), row.names = FALSE)
+    colnames(data)[grep(pattern = "sleepWake", x = colnames(data), ignore.case = TRUE)] = "sleep"
+    data$sleep = as.numeric(data$sleep)
+  }
+  colnames(data)[grep(pattern = "timeStamp", x = colnames(data))] = "timestamp"
+  rawValue = data$timestamp[1]
+  data$timestamp = as.POSIXct(data$timestamp, format = timeformat, tz = configtz,
+                              origin = "1970-01-01")
+
+  checkTimeFormat(data$timestamp[1],
+                  rawValue = rawValue,
+                  timeformat = timeformat,
+                  timeformatName = timeformatName)
+  # Establish starttime in the correct timezone
+  if (configtz != desiredtz) {
+    data$timestamp = as.POSIXct(x = as.numeric(data$timestamp), tz = desiredtz,
+                                 origin = "1970-01-01")
+  }
+  invisible(list(data = data, deviceSN = deviceSN))
+}
diff --git a/man/mergePHBfilePairs.Rd b/man/mergePHBfilePairs.Rd
@@ -9,7 +9,10 @@
   xlsx file to csv.
 }
 \usage{
-  mergePHBfilePairs(inputPath = ".", outputPath = ".")
+  mergePHBfilePairs(inputPath = ".", outputPath = ".",
+                     timeformat = "\%m/\%d/\%Y \%H:\%M:\%S",
+                     desiredtz = "", configtz = NULL,
+                     timeformatName = "timeformat")
 }
 \arguments{
   \item{inputPath}{
@@ -20,6 +23,24 @@
   \item{outputPath}{
     Character, path to store the merge output
   }
+  \item{timeformat}{
+    Character, timestemp format.
+  }
+   \item{desiredtz}{
+    Character, timezone name where the accelerometer was worn. Timezone names are
+    expected to be the timezone database names, e.g. Europe/London. See also:
+    https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
+  }
+  \item{configtz}{
+    Character, timezone name where the accelerometer was configured. Leave NULL
+    if equal to desiredtz. Timezones name are expected to be the timezone database
+    names, e.g. Europe/London. See also:
+    https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
+  }
+  \item{timeformatName}{
+    Character, name of timeformat variable to print in error message when
+    timeformat is incorrect, of use to GGIR where argument names can differ.
+  }
 }
 \value{
   Function does not output values. Instead it stores the merged data as file in the location

diff --git a/man/readActiGraphCount.Rd b/man/readActiGraphCount.Rd
@@ -7,8 +7,9 @@
   Reads ActiGraph Count data file. Currently a variety of csv format are facilitated.
 }
 \usage{
-  readActiGraphCount(filename = NULL,
-                            timeformat = "\%m/\%d/\%Y \%H:\%M:\%S", tz = "",
+  readActiGraphCount(filename = NULL, 
+                            timeformat = "\%m/\%d/\%Y \%H:\%M:\%S",
+                            desiredtz = "", configtz = NULL,
                             timeformatName = "timeformat")
 }
 \arguments{
@@ -18,8 +19,16 @@
   \item{timeformat}{
     Character, timestemp format.
   }
-  \item{tz}{
-    Character, timezone name from the timezone database names.
+  \item{desiredtz}{
+    Character, timezone name where the accelerometer was worn. Timezone names are
+    expected to be the timezone database names, e.g. Europe/London. See also:
+    https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
+  }
+  \item{configtz}{
+    Character, timezone name where the accelerometer was configured. Leave NULL
+    if equal to desiredtz. Timezones name are expected to be the timezone database
+    names, e.g. Europe/London. See also:
+    https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
   }
   \item{timeformatName}{
     Character, name of timeformat variable to print in error message when

diff --git a/man/readActicalCount.Rd b/man/readActicalCount.Rd
@@ -8,7 +8,8 @@
 }
 \usage{
   readActicalCount(filename = NULL,
-                            timeformat = "\%m/\%d/\%Y \%H:\%M:\%S", tz = "",
+                            timeformat = "\%m/\%d/\%Y \%H:\%M:\%S",
+                            desiredtz = "", configtz = NULL,
                             timeformatName = "timeformat")
 }
 \arguments{
@@ -18,8 +19,16 @@
   \item{timeformat}{
     Character, timestemp format.
   }
-  \item{tz}{
-    Character, timezone name from the timezone database names.
+   \item{desiredtz}{
+    Character, timezone name where the accelerometer was worn. Timezone names are
+    expected to be the timezone database names, e.g. Europe/London. See also:
+    https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
+  }
+  \item{configtz}{
+    Character, timezone name where the accelerometer was configured. Leave NULL
+    if equal to desiredtz. Timezones name are expected to be the timezone database
+    names, e.g. Europe/London. See also:
+    https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
   }
   \item{timeformatName}{
     Character, name of timeformat variable to print in error message when