predictions_summary.Rmd

---
title: "Predictions"
author: "Jeff Walker"
date: "1/30/2015"
output:
  pdf_document:
    toc: yes
  html_document:
    number_sections: yes
    toc: yes
---

```{r, echo=FALSE}
knitr::opts_chunk$set(warning=FALSE, echo=FALSE, message=FALSE)
```

```{r libraries, message=FALSE}
library(jsonlite)
library(ggplot2)
library(reshape2)
library(dplyr)
library(tidyr)
library(DataCombine)
library(RPostgreSQL)
library(devtools)
library(gridExtra)
library(conteStreamTemperature)
theme_set(theme_bw())

# connect to database
db <- src_postgres(dbname='conte_dev', host='felek.cns.umass.edu', user='conte', password='conte')

# load data
load('/conte/home/dhocking/conteStreamTemperature_northeast/localData/db_pull_for_predictions.RData')
```

# Get Data

```{r get-locations, message=FALSE}
# create a query that lists all locations and creates 'site' column
qry_locations <- tbl(db, 'locations') %>%
  rename(location_id=id, location_name=name, featureid=catchment_id) %>%
  select(location_id, location_name, latitude, longitude, featureid, agency_id) %>%
  left_join(tbl(db, 'agencies') %>% rename(agency_name=name, agency_id=id) %>% select(agency_name, agency_id)) %>%
  select(location_id, location_name, featureid, latitude, longitude, agency_name) %>%
  mutate(site=concat(agency_name, '_', location_name))

df_locations <- collect(qry_locations)

# check that all sites are unique
stopifnot(sum(duplicated(df_locations$site))==0)

head(df_locations)
```

Select a location for making predictions.

```{r choose-location}
location <- filter(df_locations, location_name=='MAPleas55')
catches <- location[['featureid']]
print(catches)
```

Retrieve daymet data for select catchment.

```{r get-daymet}
qry_daymet <- tbl(db, 'daymet') %>%
  left_join(select(qry_locations, site, featureid), by='featureid') %>%
#   filter(featureid %in% catches) %>%
  filter(featureid == catches) %>%
  mutate(airTemp = (tmax + tmin)/2)

climateData <- collect(qry_daymet)
summary(climateData)
```

Get the covariates for the selected catchment

```{r get-covariates}
qry_covariates <- tbl(db, 'covariates') %>%
#   filter(featureid %in% catches)
  filter(featureid == catches)

df_covariates_long <- collect(qry_covariates)
df_covariates_wide <- tidyr::spread(df_covariates_long, variable, value)

# which zone? upstream or downstream? - maybe elevation should be downstream all others upstream
df_covariates_upstream <- filter(df_covariates_wide, zone=="upstream")
glimpse(df_covariates_upstream)
```

Get spring/fall breakpoints

```{r spring-fall-bps}
springFallBPs$site <- as.character(springFallBPs$site)
mean.spring.bp <- mean(springFallBPs$finalSpringBP, na.rm = T)
mean.fall.bp <- mean(springFallBPs$finalFallBP, na.rm = T)
print(c("Spring"=mean.spring.bp, "Fall"=mean.fall.bp))
```

Join climate data, covariates, observed stream temperature, huc8, lat/lon, spring/fall breakpoints and add dOY. Then filter dataset to be within spring/fall breakpoints.

```{r merge-all}
fullDataSync <- climateData %>%
  left_join(df_covariates_upstream, by=c('featureid')) %>%
  left_join(select(tempDataSync, date, site, temp), by = c('date', 'site')) %>%
  left_join(featureid_huc8, by = c('featureid')) %>%
  left_join(featureid_lat_lon, by = c('featureid')) %>%
  mutate(year = as.numeric(format(date, "%Y"))) %>%
  left_join(springFallBPs, by = c('site', 'year')) %>%
  mutate(dOY = strptime(date, "%Y-%m-%d")$yday+1) %>%
  rename(huc = huc8) %>%
  filter(dOY >= finalSpringBP & dOY <= finalFallBP | is.na(finalSpringBP) | is.na(finalFallBP)) %>%
  filter(dOY >= mean.spring.bp & dOY <= mean.fall.bp)
summary(fullDataSync)
```

Rename some columns to match old variable names

```{r rename-data-sync}
fullDataSync <- fullDataSync %>%
  rename(Forest = forest,
         ReachElevationM = elev_nalcc,
         SurficialCoarseC = surfcoarse,
         TotDASqKM = AreaSqKM,
         Latitude = latitude,
         Longitude = longitude,
         CONUSWetland = fwswetlands) %>%
  mutate(ImpoundmentsAllSqKM = allonnet*TotDASqKM)
```

Sort `fullDataSync` by site, year and dOY. Then compute lagged airTemp and prcp.

```{r add-lagged, message=FALSE}
fullDataSync <- fullDataSync[order(fullDataSync$site,fullDataSync$year,fullDataSync$dOY),]
fullDataSync$count <- 1:length(fullDataSync$year)
fullDataSync <- fullDataSync[order(fullDataSync$count),] # just to make sure fullDataSync is ordered for the slide function

# airTemp
fullDataSync <- slide(fullDataSync, Var = "airTemp", GroupVar = "site", slideBy = -1, NewVar='airTempLagged1')
fullDataSync <- slide(fullDataSync, Var = "airTemp", GroupVar = "site", slideBy = -2, NewVar='airTempLagged2')

# prcp
fullDataSync <- slide(fullDataSync, Var = "prcp", GroupVar = "site", slideBy = -1, NewVar='prcpLagged1')
fullDataSync <- slide(fullDataSync, Var = "prcp", GroupVar = "site", slideBy = -2, NewVar='prcpLagged2')
fullDataSync <- slide(fullDataSync, Var = "prcp", GroupVar = "site", slideBy = -3, NewVar='prcpLagged3')

summary(fullDataSync)
```

Standardize covariates and climate data. Note that stdCovs computes the mean/stdev of each variable from the original data frame used to fit the model, and not the new prediction data frame. However, the mean/stdev are computed from columns that have duplicated values for each catchment. So it seems like they would be weighted more heavily towards catchments with more data. Not sure if this is intentional.

```{r standardize}
var.names <- c("Latitude", "Longitude", "airTemp", "airTempLagged1", "airTempLagged2", "prcp", "prcpLagged1", "prcpLagged2", "prcpLagged3", "dOY", "Forest", "Herbacious", "Agriculture", "Developed", "TotDASqKM", "ReachElevationM", "ImpoundmentsAllSqKM", "HydrologicGroupAB", "SurficialCoarseC", "CONUSWetland", "ReachSlopePCNT", "srad", "dayl", "swe")

fullDataSync <- fullDataSync %>%
  mutate(Developed = NA, Herbacious = NA, Agriculture = NA, HydrologicGroupAB=NA, ReachSlopePCNT=NA, HUC8=huc)

  fullDataSyncS <- stdCovs(x = fullDataSync, y = tempDataSync, var.names = var.names)
summary(fullDataSyncS)
```

Add interaction terms

```{r add-interaction}
fullDataSyncS_interact <- addInteractions(fullDataSyncS)
fullDataSyncS_interact[, setdiff(names(fullDataSyncS_interact), names(fullDataSyncS))] %>%
  summary
```

Add deployment index

```{r add-deploy-index}
fullDataSyncS_deploy <- indexDeployments(fullDataSyncS_interact, regional = TRUE)
fullDataSyncS_deploy[, setdiff(names(fullDataSyncS_deploy), names(fullDataSyncS_interact))] %>%
  summary
```

# Run Predictions

```{r run-pred, message=FALSE}
fullDataSyncS_pred <- predictTemp(data = fullDataSyncS_deploy, coef.list = coef.list, cov.list = cov.list)
```

```{r plot-pred, message=FALSE}
filter(fullDataSyncS_pred, year %in% c(2005:2007)) %>%
ggplot(aes(date)) +
  geom_line(aes(y=tempPredicted)) +
  geom_point(aes(y=temp), color='red') +
  labs(y='Temp (degC)')
```

```{r plot-scatter}
filter(fullDataSyncS_pred, !is.na(temp)) %>%
ggplot(aes(temp, tempPredicted)) +
  geom_point() +
  geom_abline(linetype=2) +
  geom_smooth(method='lm') +
  labs(x='Observed Temp (degC)', y='Predicted Temp (degC)')
```

Join the predicted temperature back with un-standardized input data and compute mean predicted temperature.

```{r}
fullDataSync <- left_join(fullDataSync, select(fullDataSyncS_pred, featureid, date, tempPredicted), by = c("featureid", "date"))
mean.pred <- mean(fullDataSync$tempPredicted, na.rm = T)
print(mean.pred)
```

Compute derived metrics.

```{r derived-metrics, message=FALSE}
derived.metrics <- deriveMetrics(fullDataSync)
glimpse(derived.metrics)
```


# Prediction Function

The prediction function `predictTemp` takes three arguments:

- `data`: dataframe of the prediction input data (standardized climate and covariate variables)
- `coef.list`: list of fitted coefficients for each covariate
- `cov.list`: list of variables used for each nested model 'level'

```{r, eval=FALSE}
fullDataSyncS_pred <- predictTemp(data = fullDataSyncS_deploy, coef.list = coef.list, cov.list = cov.list)
```

```{r str-coef-cov}
str(coef.list, max.level=2)
str(cov.list)
```

The `coef.list` object contains multiple data frames with the mean, sd, low/high values of each coefficient. For example, the fixed effects parameters are:

```{r coef-fixed}
coef.list$B.fixed
```

Here is the source of the `predictTemp` function, I'll walk through this step-by-step.

```{r, eval=FALSE}
predictTemp <- function(data, data.fit = tempDataSyncS, coef.list, cov.list) {
  B.site <- prepConditionalCoef(coef.list = coef.list, cov.list = cov.list, var.name = "site")
  B.huc <- prepConditionalCoef(coef.list = coef.list, cov.list = cov.list, var.name = "huc")
  B.year <- prepConditionalCoef(coef.list = coef.list, cov.list = cov.list, var.name = "year")
  B.ar1 <- prepConditionalCoef(coef.list = coef.list, cov.list = cov.list, var.name = "ar1")
  
  df <- prepPredictDF(data = data, coef.list = coef.list, cov.list = cov.list, var.name = "site")
  df <- prepPredictDF(data = df, coef.list = coef.list, cov.list = cov.list, var.name = "huc")
  df <- prepPredictDF(data = df, coef.list = coef.list, cov.list = cov.list, var.name = "year")
  df <- prepPredictDF(data = df, coef.list = coef.list, cov.list = cov.list, var.name = "ar1")
  
  df$trend <- NA
  df$trend <- as.vector(coef.list$B.fixed$mean %*% t(as.matrix(select(data, one_of(cov.list$fixed.ef))))) +
    rowSums(as.matrix(select(df, one_of(cov.list$site.ef))) * as.matrix(select(df, one_of(names(B.site[-1]))))) +
    rowSums(as.matrix(select(df, one_of(cov.list$huc.ef))) * as.matrix(select(df, one_of(names(B.huc[-1]))))) +
    rowSums(as.matrix(select(df, one_of(cov.list$year.ef))) * as.matrix(select(df, one_of(names(B.year[-1])))))
  
  # Add B.ar1 to predictions
  df <- mutate(df, prev.temp = c(NA, temp[(2:(nrow(data))) -1]))
  df <- mutate(df, prev.trend = c(NA, trend[(2:nrow(data)) - 1]))
  df <- mutate(df, prev.err = prev.temp - prev.trend)
  df <- mutate(df, tempPredicted = trend)
  df[which(!is.na(df$prev.err)), ]$tempPredicted <- df[which(!is.na(df$prev.err)), ]$trend + df[which(!is.na(df$prev.err)), ]$B.ar1 *   df[which(!is.na(df$prev.err)), ]$prev.err
  return(df)
}
```

## Conditional Coefficients

First prepare the conditional coefficients.

The site coefficients are unique to each site with observed data.

```{r prep-coef-site}
B.site <- prepConditionalCoef(coef.list = coef.list, cov.list = cov.list, var.name = "site")
head(B.site)
```

The huc coefficients are unique to each huc.

```{r prep-coef-huc}
B.huc <- prepConditionalCoef(coef.list = coef.list, cov.list = cov.list, var.name = "huc")
head(B.huc)
```

The year coefficients are unique to each year.

```{r prep-coef-year}
B.year <- prepConditionalCoef(coef.list = coef.list, cov.list = cov.list, var.name = "year")
head(B.year)
```

The ar1 coefficients are unique to each site with data.

```{r prep-coef-ar1}
B.ar1 <- prepConditionalCoef(coef.list = coef.list, cov.list = cov.list, var.name = "ar1")
head(B.ar1)
```

## Prepare Input Values

We then add the input values for each set of coefficients (site, huc, year, ar1) to the dataframe.

```{r prep-df, warning=FALSE}
data <- fullDataSyncS_deploy
df1 <- prepPredictDF(data = data, coef.list = coef.list, cov.list = cov.list, var.name = "site")
df1[, setdiff(names(df1), names(data))] %>% head
df2 <- prepPredictDF(data = df1, coef.list = coef.list, cov.list = cov.list, var.name = "huc")
df2[, setdiff(names(df2), names(df1))] %>% head
df3 <- prepPredictDF(data = df2, coef.list = coef.list, cov.list = cov.list, var.name = "year")
df3[, setdiff(names(df3), names(df2))] %>% head
df4 <- prepPredictDF(data = df3, coef.list = coef.list, cov.list = cov.list, var.name = "ar1")
setdiff(names(df4), names(df3))
df4[, setdiff(names(df4), names(df3))] %>% head
```


## Compute Predicted Temperature

To compute the predicted temperatures, we take the linear sum of each set of factors using matrix multiplication. The plot below shows the predicted temperature time series over all years.

```{r df-trend, warning=FALSE}
df <- df4
df$trend <- NA
df$trend <- as.vector(coef.list$B.fixed$mean %*% t(as.matrix(select(data, one_of(cov.list$fixed.ef))))) +
  rowSums(as.matrix(select(df, one_of(cov.list$site.ef))) * as.matrix(select(df, one_of(names(B.site[-1]))))) +
  rowSums(as.matrix(select(df, one_of(cov.list$huc.ef))) * as.matrix(select(df, one_of(names(B.huc[-1]))))) +
  rowSums(as.matrix(select(df, one_of(cov.list$year.ef))) * as.matrix(select(df, one_of(names(B.year[-1])))))
ggplot(df, aes(date, trend)) + geom_line()
```

The final predicted temperature is adjusted using the autoregressive residual term (`B.ar1`). The plot below shows the change in predicted temperature due to the autoregressive residuals which ranges from -2 to +2 degC.

```{r df-predict-ar1, warning=FALSE}
# compute lagged obs temp
df <- mutate(df, prev.temp = c(NA, temp[(2:(nrow(data))) -1]))
# compute lagged predicted trend
df <- mutate(df, prev.trend = c(NA, trend[(2:nrow(data)) - 1]))
# compute lagged error
df <- mutate(df, prev.err = prev.temp - prev.trend)
# copy trend to predicted temp
df <- mutate(df, tempPredicted = trend)
# add error term for days with lagged observed data
df[which(!is.na(df$prev.err)), ]$tempPredicted <- df[which(!is.na(df$prev.err)), ]$trend + 
  df[which(!is.na(df$prev.err)), ]$B.ar1 *
  df[which(!is.na(df$prev.err)), ]$prev.err
ggplot(df, aes(date)) + 
  geom_line(aes(y=tempPredicted-trend))
```

## prepConditionalCoef() Function

```{r, eval=FALSE}
prepConditionalCoef <- function(coef.list, cov.list, var.name) {
  if(var.name == "ar1") {
    B <- coef.list[[paste0("B.", var.name)]]
  } else {
    f <- paste0(var.name, " ~ coef")
    B <- dcast(coef.list[[paste0("B.", var.name)]], formula = as.formula(f), value.var = "mean") # convert long to wide
    B <- dplyr::select(B, one_of(c(var.name, cov.list[[paste0(var.name, ".ef")]]))) # recorder to match for matrix multiplcation
    names(B) <- c(names(B[1]), paste0(names(B[-1]), ".B.", var.name)) # rename so can differentiate coeficients from variables
  }
  
  return(B)
}
```

```{r}
prepConditionalCoef(coef.list = coef.list, cov.list = cov.list, var.name = "site") %>%
  head
```

```{r}
var.name <- 'site'
f <- paste0(var.name, " ~ coef")
head(coef.list[['B.site']])

# transform to site as row, coef as col, mean as value
B <- dcast(coef.list[[paste0("B.", var.name)]], formula = as.formula(f), value.var = "mean") # convert long to wide
head(B)

# order columns for matrix multiplication
mat_names <- c(var.name, cov.list[[paste0(var.name, ".ef")]])
B <- dplyr::select(B, one_of(mat_names))

# rename so can differentiate coeficients from variables
names(B) <- c(names(B[1]), paste0(names(B[-1]), ".B.", var.name)) # rename so can differentiate coeficients from variables
names(B)
B.site <- B
```

Add the site covariates to the data frame

```{r}
data <- fullDataSyncS_deploy
df <- prepPredictDF(data = data, coef.list = coef.list, cov.list = cov.list, var.name = "site")
df[, setdiff(names(df), names(data))] %>% head
```

Create two matrices with columns for covariates and coefficients

```{r}
cov.list$site.ef
names(B.site[-1])

mat_cov_site <- as.matrix(select(df, one_of(cov.list$site.ef)))
head(mat_cov_site)
dim(mat_cov_site)
mat_coef_site <- as.matrix(select(df, one_of(names(B.site[-1]))))
head(mat_coef_site)
dim(mat_coef_site)
```

Multiple two matrices and sum the rows

```{r}
value_site <- rowSums(as.matrix(select(df, one_of(cov.list$site.ef))) * as.matrix(select(df, one_of(names(B.site[-1])))))
length(value_site)
head(value_site)
```

# Model Details

## Model Theory

This equation defines the predicted temperature without the autoregressive residual term.

$$\omega_{s,h,d,y} = X^0 B^0 + X_h^{huc} B_h^{huc} + X_{s,h}^{site} B_{s,h}^{site} + X_y^{year} B_y^{year}$$

where $\omega_{s,h,d,y}$ is the expected temperature without residual error terms of site $s$, within HUC $h$, on julian day $d$ within year $y$. This value is a linear combination of four terms:

- $X^0 B^0$: fixed effects including the intercept
- $X_h^{huc} B_h^{huc}$: effects for HUC $h$
- $X_{s, h}^{site} B_{s, h}^{site}$: effects for site $s$
- $X_y^{year} B_y^{year}$: effects for year $y$

Each of these terms produces a vector of length $n_{d}$. 

## Sets of Coefficients

### Fixed Effects

The fixed effect variables include:

```{r}
coef.list$B.fixed %>% select(coef, mean, sd)
```

This plot shows the mean +/- 1 stdev for each effect excluding the intercept.

```{r plot-B-fixed}
coef.list$B.fixed %>% select(coef, mean, sd) %>%
  arrange(desc(mean)) %>%
  mutate(coef=ordered(coef, levels=coef)) %>%
  filter(coef!='intercept') %>%
  ggplot(aes(coef, mean)) +
  geom_hline(yint=0) +
  geom_bar(stat='identity', fill='grey50') +
  geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd), width=0) +
  theme(axis.text.x=element_text(angle=45, hjust=1, vjust=1)) +
  labs(x='', y='Mean +/- 1 St Dev')
```

Only one of these variables (`airTemp.TotDASqKM`) varies by day, the rest are constant for each site/catchment.

```{r}
select(data, one_of(cov.list$fixed.ef)) %>% head
```

Using matrix multiplication to compute the row-wise sum of the fixed effects. 

```{r}
x_fixed <- as.matrix(select(data, one_of(cov.list$fixed.ef))) %*% diag(coef.list$B.fixed$mean)
colnames(x_fixed) <- cov.list$fixed.ef

df_fixed <- as.data.frame(x_fixed)
df_fixed$date <- df$date
df_fixed <- gather(df_fixed, factor, value, -date)

ggplot(df_fixed, aes(date, value)) +
  geom_line() +
  facet_wrap(~factor, scales='free_y')
```

### HUC Effects

The huc effects include:

```{r}
coef.list$B.huc %>%
  select(huc, coef, mean) %>%
  spread(coef, mean)
```

For the selected catchment, which is in HUC `r df$huc %>% unique`, the parameters are:

```{r}
coef.list$B.huc %>%
  filter(huc==df$huc %>% unique) %>%
  select(huc, coef, mean, sd)
```

This plot shows the mean +/- 1 stdev for each effect within HUC `r df$huc %>% unique`.

```{r plot-B-huc}
coef.list$B.huc %>%
  filter(huc==df$huc %>% unique) %>%
  select(huc, coef, mean, sd) %>%
  arrange(desc(mean)) %>%
  mutate(coef=ordered(coef, levels=coef)) %>%
  filter(coef!='intercept') %>%
  ggplot(aes(coef, mean)) +
  geom_hline(yint=0) +
  geom_bar(stat='identity', fill='grey50') +
  geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd), width=0) +
  theme(axis.text.x=element_text(angle=45, hjust=1, vjust=1)) +
  labs(x='', y='Mean +/- 1 St Dev', title='HUC Effects')
```

All of these variables varies by day, except for the site intercept, which is always 1.

```{r}
select(data, one_of(cov.list$huc.ef)) %>% head
```

Using matrix multiplication to compute the row-wise sum of the fixed effects. 

```{r}
B_huc <- coef.list$B.huc %>%
  filter(huc==df$huc %>% unique) %>%
  select(huc, coef, mean, sd)

x_huc <- as.matrix(select(data, one_of(cov.list$huc.ef))) %*% diag(B_huc$mean)
colnames(x_huc) <- cov.list$huc.ef

df_huc <- as.data.frame(x_huc)
df_huc$date <- df$date
df_huc <- gather(df_huc, factor, value, -date)

ggplot(df_huc, aes(date, value)) +
  geom_line() +
  facet_wrap(~factor, scales='free_y')
```


### Site Effects

The site effects include:

```{r}
coef.list$B.site %>%
  select(site, coef, mean) %>%
  spread(coef, mean)
```

For the selected site, which is `r df$site %>% unique`, the parameters are:

```{r}
coef.list$B.site %>%
  filter(site==df$site %>% unique) %>%
  select(site, coef, mean, sd)
```

This plot shows the mean +/- 1 stdev for each effect for site `r df$site %>% unique`.

```{r plot-B-site}
coef.list$B.site %>%
  filter(site==df$site %>% unique) %>%
  select(site, coef, mean, sd) %>%
  arrange(desc(mean)) %>%
  mutate(coef=ordered(coef, levels=coef)) %>%
  filter(coef!='intercept') %>%
  ggplot(aes(coef, mean)) +
  geom_hline(yint=0) +
  geom_bar(stat='identity', fill='grey50') +
  geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd), width=0) +
  theme(axis.text.x=element_text(angle=45, hjust=1, vjust=1)) +
  labs(x='', y='Mean +/- 1 St Dev', title='Site Effects')
```

All of these variables varies by day, except for the site intercept, which is always 1.

```{r}
select(data, one_of(cov.list$site.ef)) %>% head
```

Using matrix multiplication to compute the row-wise sum of the fixed effects. 

```{r}
B_site <- coef.list$B.site %>%
  filter(site==df$site %>% unique) %>%
  select(site, coef, mean, sd)

x_site <- as.matrix(select(data, one_of(cov.list$site.ef))) %*% diag(B_site$mean)
colnames(x_site) <- cov.list$site.ef

df_site <- as.data.frame(x_site)
df_site$date <- df$date
df_site <- gather(df_site, factor, value, -date)

ggplot(df_site, aes(date, value)) +
  geom_line() +
  facet_wrap(~factor, scales='free_y')
```


### Year Effects

The year effects include:

```{r}
coef.list$B.year %>%
  select(year, coef, mean) %>%
  spread(coef, mean)
```

This plot shows the mean +/- 1 stdev for each year.

```{r plot-B-year}
coef.list$B.year %>%
  select(year, coef, mean) %>%
  ggplot(aes(as.numeric(as.character(year)), mean, color=coef)) +
  geom_line() +
  theme(axis.text.x=element_text(angle=45, hjust=1, vjust=1)) +
  labs(x='Year', title='Year Effects')
```

All of these variables varies by day, except for the intercept, which is always 1.

```{r}
select(data, one_of(cov.list$year.ef)) %>% head
```

Using matrix multiplication to compute the row-wise sum of the fixed effects. 

```{r}
df_year <- prepPredictDF(data = df, coef.list = coef.list, cov.list = cov.list, var.name = "year")

x_year <- as.matrix(select(df_year, one_of(cov.list$year.ef))) * as.matrix(select(df_year, one_of(names(B.year[-1]))))
colnames(x_year) <- cov.list$year.ef

df_year <- as.data.frame(x_year)
df_year$date <- df$date
df_year <- gather(df_year, factor, value, -date)

ggplot(df_year, aes(date, value)) +
  geom_line() +
  facet_wrap(~factor, scales='free_y')
```

Curious to see how variable the cubic spline is for each year.

```{r}
lst.year <- coef.list$B.year %>%
  select(year, coef, mean) %>% 
  split(f=.$year) %>% 
  lapply(FUN=function(x) { 
    y <- x$mean
    names(y) <- x$coef
    y
  })

x.year <- lapply(names(lst.year), function(yr) {
  coef <- lst.year[[yr]]
  df <- data.frame(intercept.year=rep(1, times=365),
                   dOY=seq(1, 365))
  df <- filter(df, dOY>=90, dOY<=311)
  df <- stdCovs(x = df, y = tempDataSync, var.names = c('dOY'))
  df <-  df %>%
    mutate(dOY2 = dOY^2,
           dOY3 = dOY^3)
  df <- df[, names(coef)]
  x <- as.matrix(df) %*% diag(coef)
  colnames(x) <- names(coef)
  x <- as.data.frame(x)
  data.frame(year=yr, dOY=seq(90, 311), value=rowSums(x))
})
x.year <- do.call(rbind, x.year) %>%
  mutate(year=as.numeric(as.character(year)))
```


```{r}
x.year %>%
  mutate(year_group=cut(year, breaks=seq(1990, 2015, by=5))) %>%
  ggplot(aes(dOY, value, color=year_group, group=year)) +
  geom_line() +
  scale_color_brewer(type = 'seq', palette=16)
```


## Sum All Effects

This figure shows the contributions from each set of factors. The black line is the overall sum representing the predicted temperature for this site (excluding the autoregressive error term).

```{r}
y <- data.frame(date=df$date,
                fixed=rowSums(x_fixed),
                huc=rowSums(x_huc),
                site=rowSums(x_site),
                year=rowSums(x_year)) %>%
  gather(var, value, -date)
ggplot(y, aes(date, value, color=var)) +
  stat_summary(fun.y=sum, geom='line', color='black', size=1) +
  geom_line()
```

# Example Predictions for 2006

THis section focuses on the predictions for one site  (`r location$site`), which is in catchment (`r location$featureid`) and HUC8 (`r data$huc %>% unique`), and one year (2006). This figure shows the contributions from each set of factors. The black lines are the sum of all lines within each panel.

```{r, fig.width=6, fig.height=12}
p.fixed <- filter(df_fixed, lubridate::year(date)==2006) %>%
  ggplot(aes(date, value, color=factor)) +
  stat_summary(fun.y=sum, geom='line', color='black', size=1) +
  geom_line() +
  labs(x='', y='value', title='Fixed Effects')
p.year <- filter(df_year, lubridate::year(date)==2006) %>%
  ggplot(aes(date, value, color=factor)) +
  stat_summary(fun.y=sum, geom='line', color='black', size=1) +
  geom_line() +
  labs(x='', y='value', title='Year Effects')
p.huc <- filter(df_huc, lubridate::year(date)==2006) %>%
  ggplot(aes(date, value, color=factor)) +
  stat_summary(fun.y=sum, geom='line', color='black', size=1) +
  geom_line() +
  labs(x='', y='value', title='HUC Effects')
p.site <- filter(df_site, lubridate::year(date)==2006) %>%
  ggplot(aes(date, value, color=factor)) +
  stat_summary(fun.y=sum, geom='line', color='black', size=1) +
  geom_line() +
  labs(x='', y='value', title='Site Effects')
grid.arrange(p.fixed, p.year, p.huc, p.site, ncol=1)
```

This figure takes the sum of each set of factors (black line in each panel above) and computes the overall predicted temperature (shown in this figure as the black line). This does not include the autoregressive residual term.

```{r}
filter(y, lubridate::year(date)==2006) %>%
  ggplot(aes(date, value, color=var)) +
    stat_summary(fun.y=sum, geom='line', color='black', size=1) +
    geom_line()
```