Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expose historical endpoint #212

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ export(search_stn_name)
export(search_stn_number)
export(sym)
export(syms)
export(ws_daily_flows)
export(ws_daily_levels)
importFrom(rlang,":=")
importFrom(rlang,.data)
importFrom(rlang,UQ)
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# tidyhydat 0.7.0.9000
- add historical webservice functions `ws_daily_flows` and `ws_daily_levels` (#211)

# tidyhydat 0.7.0
- bump minimum R version to 4.2.0
- dropped httr in favour of httr2
Expand Down
134 changes: 134 additions & 0 deletions R/historical-webservice.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
#' Download historical flow and level data from the ECCC web service
#'
#' Functions to retrieve historical flow and levels data from ECCC web service. This data is
#' the same as HYDAT data but provides the convenience of not having to download
#' the HYDAT database. This function is useful when a smaller amount of data is needed. If
#' you need lots of data, consider using HYDAT and the `hy_` family of functions
#'
#' @param station_number Water Survey of Canada station number.
#' @param start_date Accepts YYYY-MM-DD.
#' If only start date is supplied (i.e. YYYY-MM-DD) values are returned from the start of that day.
#' Defaults to 365 days before current date.
#' @param end_date Accepts either YYYY-MM-DD.
#' If only a date is supplied (i.e. YYYY-MM-DD) values are returned from the end of that day.
#' Defaults to current date.
#'
#'
#' @format A tibble with 6 variables:
#' \describe{
#' \item{STATION_NUMBER}{Unique 7 digit Water Survey of Canada station number}
#' \item{Date}{Observation date and time. Formatted as a POSIXct class as UTC for consistency.}
#' \item{Parameter}{Type of parameter}
#' \item{Value}{Value of the measurement.}
#' \item{Symbol}{future use}
#' }
#'
#' @seealso hy_daily_flows
#' @examples
#' \dontrun{
#'
#' flow_data <- ws_daily_flows(
#' station_number = c("08NL071", "08NM174")
#' )
#'
#' level_data <- ws_daily_level(
#' station_number = c("08NL071", "08NM174")
#' )
#' }
#' @export
ws_daily_flows <- function(
station_number,
start_date = Sys.Date() - 365,
end_date = Sys.Date()) {

get_historical_data(
station_number = station_number,
parameters = "flow",
start_date = start_date,
end_date = end_date
)
}

#' @rdname ws_daily_flows
#' @export
ws_daily_levels <- function(
station_number,
start_date = Sys.Date() - 365,
end_date = Sys.Date()) {

get_historical_data(
station_number = station_number,
parameters = "level",
start_date = start_date,
end_date = end_date
)
}


get_historical_data <- function(
station_number,
parameters = "flow",
start_date,
end_date) {
parameters <- match.arg(parameters, choices = c("level", "flow"))

## Build link for GET
baseurl <- "https://wateroffice.ec.gc.ca/services/daily_data/csv/inline?"

query_url <- construct_url(
venue = "historical",
baseurl,
station_number,
parameters,
start_date,
end_date
)

## Get data
req <- httr2::request(query_url)
req <- tidyhydat_agent(req)
resp <- httr2::req_perform(req)

## Give webservice some time
Sys.sleep(1)


## Check the respstatus
httr2::resp_check_status(resp)


if (httr2::resp_headers(resp)$`Content-Type` != "text/csv; charset=utf-8") {
stop("Response is not a csv file")
}

## Turn it into a tibble and specify correct column classes
csv_df <- readr::read_csv(
I(httr2::resp_body_string(resp)),
col_types = "cDcdc",
)


## Check here to see if csv_df has any data in it
if (nrow(csv_df) == 0) {
stop(c("No data exists for this station query during the period chosen"))
}

## Rename columns to reflect tidyhydat naming
colnames(csv_df) <- c("STATION_NUMBER", "Date", "Parameter", "Value", "Symbol")

## What stations were missed?
differ <- setdiff(unique(station_number), unique(csv_df$STATION_NUMBER))
if (length(differ) != 0) {
if (length(differ) <= 10) {
message("The following station(s) were not retrieved: ", paste0(differ, sep = " "))
message("Check station number for typos or if it is a valid station in the network")
} else {
message("More than 10 stations from the initial query were not returned. Ensure realtime and active status are correctly specified.")
}
} else {
message("All station successfully retrieved")
}

## Return it
csv_df
}
51 changes: 7 additions & 44 deletions R/realtime-webservice.R
Original file line number Diff line number Diff line change
Expand Up @@ -83,58 +83,21 @@ realtime_ws <- function(station_number,
)
}

if (!is.numeric(parameters)) stop("parameters should be a number", call. = FALSE)

if (inherits(start_date, "Date")) start_date <- paste0(start_date, " 00:00:00")
if (inherits(end_date, "Date")) end_date <- paste0(end_date, " 23:59:59")


if (!grepl("[0-9]{4}-[0-1][0-9]-[0-3][0-9] [0-2][0-9]:[0-5][0-9]:[0-5][0-9]", start_date)) {
stop(
"Invalid date format. start_date need to be in either YYYY-MM-DD or YYYY-MM-DD HH:MM:SS formats",
call. = FALSE
)
}

if (!grepl("[0-9]{4}-[0-1][0-9]-[0-3][0-9] [0-2][0-9]:[0-5][0-9]:[0-5][0-9]", end_date)) {
stop(
"Invalid date format. start_date need to be in either YYYY-MM-DD or YYYY-MM-DD HH:MM:SS formats",
call. = FALSE
)
}


if (!is.null(start_date) & !is.null(end_date)) {
if (lubridate::ymd_hms(end_date) < lubridate::ymd_hms(start_date)) {
stop(
"start_date is after end_date. Try swapping values.",
call. = FALSE
)
}
}

## Check date is in the right format
if (is.na(as.Date(start_date, format = "%Y-%m-%d")) | is.na(as.Date(end_date, format = "%Y-%m-%d"))) {
stop("Invalid date format. Dates need to be in YYYY-MM-DD format")
}
validate_params(parameters, start_date, end_date)

## Build link for GET
baseurl <- "https://wateroffice.ec.gc.ca/services/real_time_data/csv/inline?"


station_string <- paste0("stations[]=", station_number, collapse = "&")
parameters_string <- paste0("parameters[]=", parameters, collapse = "&")
date_string <- paste0(
"start_date=", substr(start_date, 1, 10), "%20", substr(start_date, 12, 19),
"&end_date=", substr(end_date, 1, 10), "%20", substr(end_date, 12, 19)
)

## paste them all together
query_url <- paste0(
query_url <- construct_url(
venue = "realtime",
baseurl,
station_string, "&",
parameters_string, "&",
date_string
station_number,
parameters,
start_date,
end_date
)

## Get data
Expand Down
72 changes: 72 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -257,4 +257,76 @@ tidyhydat_perform <- function(req, ...) {
req <- httr2::req_retry(req, max_tries = 5)
req <- httr2::req_progress(req)
httr2::req_perform(req, ...)
}

validate_params <- function(parameters, start_date, end_date) {


if (!is.numeric(parameters)) stop("parameters should be a number", call. = FALSE)


if (!grepl("[0-9]{4}-[0-1][0-9]-[0-3][0-9] [0-2][0-9]:[0-5][0-9]:[0-5][0-9]", start_date)) {
stop(
"Invalid date format. start_date need to be in either YYYY-MM-DD or YYYY-MM-DD HH:MM:SS formats",
call. = FALSE
)
}

if (!grepl("[0-9]{4}-[0-1][0-9]-[0-3][0-9] [0-2][0-9]:[0-5][0-9]:[0-5][0-9]", end_date)) {
stop(
"Invalid date format. start_date need to be in either YYYY-MM-DD or YYYY-MM-DD HH:MM:SS formats",
call. = FALSE
)
}


if (!is.null(start_date) & !is.null(end_date)) {
if (lubridate::ymd_hms(end_date) < lubridate::ymd_hms(start_date)) {
stop(
"start_date is after end_date. Try swapping values.",
call. = FALSE
)
}
}

## Check date is in the right format
if (is.na(as.Date(start_date, format = "%Y-%m-%d")) | is.na(as.Date(end_date, format = "%Y-%m-%d"))) {
stop("Invalid date format. Dates need to be in YYYY-MM-DD format")
}

invisible(TRUE)
}

construct_url <- function(
venue = "realtime",
baseurl,
station_number,
parameters,
start_date,
end_date
) {
station_string <- paste0("stations[]=", station_number, collapse = "&")
parameters_string <- paste0("parameters[]=", parameters, collapse = "&")
if (venue == "realtime") {
date_string <- paste0(
"start_date=", substr(start_date, 1, 10), "%20", substr(start_date, 12, 19),
"&end_date=", substr(end_date, 1, 10), "%20", substr(end_date, 12, 19)
)
}

if (venue == "historical"){
date_string <- paste0(
"start_date=", start_date,
"&end_date=", end_date
)
}


## paste them all together
paste0(
baseurl,
station_string, "&",
parameters_string, "&",
date_string
)
}
61 changes: 61 additions & 0 deletions man/ws_daily_flows.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading