diff --git a/4c_additMetadComments_rubricUpdate.r b/4c_additMetadComments_rubricUpdate.r index 6b17504..057f80a 100644 --- a/4c_additMetadComments_rubricUpdate.r +++ b/4c_additMetadComments_rubricUpdate.r @@ -61,6 +61,15 @@ sql <- paste0("SELECT Reviewer.EGT_ID, Reviewer.response, Reviewer.date_complete FROM Reviewer WHERE (((Reviewer.EGT_ID)= ", ElementNames$EGT_ID, " ));") reviewerData <- dbGetQuery(cn, sql) + +sql <- paste0("SELECT FinalSppList.Scientific_Name, FinalSppList.Common_Name, FinalSppList.ELEMENT_GLOBAL_ID, ", + "ModelCycle.EGT_ID, ModelCycle.model_cycle, SpeciesWorkFlow.cutecode, SpeciesWorkFlow.model_type, ", + "SpeciesWorkFlow.modeled, SpeciesWorkFlow.alternate_method, SpeciesWorkFlow.existing_model ", + "FROM (FinalSppList INNER JOIN ModelCycle ON FinalSppList.ID = ModelCycle.final_spp_list_ID) ", + "INNER JOIN SpeciesWorkFlow ON ModelCycle.ID = SpeciesWorkFlow.model_cycle_ID ", + "WHERE (((FinalSppList.ELEMENT_GLOBAL_ID)= ", ElementNames$EGT_ID, "));") +modelCycleData <- dbGetQuery(cn, sql) + dbDisconnect(cn) rm(cn) @@ -128,6 +137,27 @@ sql <- paste0("update lkpSpeciesRubric set process_review = '", revUpdate$rAttri "', process_reviewNotes = '", revUpdate$rComments, "' where EGT_ID = ", ElementNames$EGT_ID, " ;") dbExecute(db, statement = sql) +## iterative +iterMatrix <- data.frame("iAttribute" = c("C","A"), + "iComments" = c("Model not re-run with new or modified data.", + "Model was re-run with new or modified data.")) +nCycles <- nrow(modelCycleData) +maxCycle <- max(modelCycleData$model_cycle) +if(nCycles > 1){ + if(nCycles == 2 & "Both" %in% modelCycleData$model_type){ + iterAtt <- "C" + } else if(TRUE %in% modelCycleData[modelCycleData$model_cycle == maxCycle, c("alternate_method","existing_model")]){ + iterAtt <- "C" + } else { + iterAtt <- "A" + } +} + +iterUpdate <- iterMatrix[match(iterAtt, iterMatrix$iAttribute),] +sql <- paste0("update lkpSpeciesRubric set interative = '", iterUpdate$iAttribute, + "', interativeNotes = '", iterUpdate$iComments, + "' where EGT_ID = ", ElementNames$EGT_ID, " ;") +dbExecute(db, statement = sql) ## clean up ---- dbDisconnect(db) diff --git a/MetadataEval_knitr.rnw b/MetadataEval_knitr.rnw index 8c3a5be..2a1a9d4 100644 --- a/MetadataEval_knitr.rnw +++ b/MetadataEval_knitr.rnw @@ -331,6 +331,7 @@ Table 3. Thresholds {\protect\NoHyper\cite{LiuEtAl2005, LiuEtAl2015}\protect\end \textbf{Model Evaluation and Intended Use} \\\\ All SDMs are sensitive to data inputs and methodological choices. Table 4 presents scoring of modeling factors based on the model evaluation rubric presented in Sofaer et al. 2019 {\protect\NoHyper\cite{SofaerEtAl2019}\protect\endNoHyper}. \Sexpr{as.character(project_blurb)} \\ + \smallskip \pagebreak @@ -544,14 +545,14 @@ NatureServe and Heritage Network Partners. \Sexpr{format(Sys.Date(), "%Y")}. Spe \bibitem{LiuEtAl2015} Liu, C., G. Newell, and M. White. 2015. On the selection of thresholds for predicting species occurrence with presence-only data. Ecology and Evolution 6:337–348. - \bibitem{SofaerEtAl2019} Sofaer, H. R., C. S. Jarnevich1, I. S. Pearse, R. L. Smyth, S. Auer, G. L. Cook, T. C. Edwards, Jr., G. F. Guala, T. G. Howard, J. T. Morisette, and H. Hamilton. (In press). The development and delivery of species distribution models to inform decision-making. BioScience. + \bibitem{SofaerEtAl2019} Sofaer, H. R., C. S. Jarnevich1, I. S. Pearse, R. L. Smyth, S. Auer, G. L. Cook, T. C. Edwards, Jr., G. F. Guala, T. G. Howard, J. T. Morisette, and H. Hamilton. 2019. The development and delivery of species distribution models to inform decision-making. BioScience 69:544-557. \end{thebibliography} \pagebreak \noindent -\begin{table} -\caption{Appendix 1. Descriptions for environmental variables included in model.} +Appendix 1. Descriptions for environmental variables included in model. +\begin{table}[H] \resizebox{\textwidth}{!}{ <>= # For variable descriptions, get variable name, source, description (3 columns) diff --git a/postReview_0_user_run_SDM.R b/postReview_0_user_run_SDM.R new file mode 100644 index 0000000..86852bd --- /dev/null +++ b/postReview_0_user_run_SDM.R @@ -0,0 +1,42 @@ +# File: postReview_0_user_run_SDM.r +# Purpose: recreate metadata after model review and export +# in the Mobi project + +library(here) +rm(list=ls()) +loc_scripts <- here() + +####### +### loop it +###### +rootPath <- file.path("H:","spp_models", "_test") + +fldrs <- list.dirs(rootPath, recursive = FALSE) + +# if you want to run fewer than the full list of folders +# subset here. +#fldrs <- fldrs[c(3,6)] +# + + + +sppVec <- unlist(lapply(strsplit(fldrs, split = "/"), FUN = function(x) x[[4]])) +sppVec + +for(sv in 1:length(sppVec)){ + + loc_model <- fldrs[[sv]] + model_species <- sppVec[[sv]] + load(file.path(loc_model, "outputs","rdata",paste0(model_species,"_runSDM_paths.Rdata"))) + # the vars we need + fn_arg_vars <- c("project_overview","model_comments","metaData_comments","modeller","project_blurb","modelrun_meta_data") + for(arg in fn_arg_vars) + assign(arg, fn_args[[arg]]) + + nm_refBoundaries <- file.path(loc_scripts,"_data","other_spatial","feature", "US_States.shp") + nm_db_file <- file.path(loc_scripts,"_data", "databases", "SDM_lookupAndTracking.sqlite") + + source(file.path(loc_scripts,"postReview_createMetadata.r")) + +} + diff --git a/postReview_MetadataEval_knitr.rnw b/postReview_MetadataEval_knitr.rnw new file mode 100644 index 0000000..4fb83f2 --- /dev/null +++ b/postReview_MetadataEval_knitr.rnw @@ -0,0 +1,634 @@ +%This knitr document is called by the knit2pdf call in 5_createMetadata.r +\documentclass{article} +\usepackage[utf8]{inputenc} +\usepackage{geometry} +\usepackage{fancyhdr} %for headers,footers +\usepackage{underscore} %needed if any text has underscores +\usepackage{rotating} +\usepackage[super,comma]{natbib} +\usepackage{hyperref} +\usepackage{graphicx} +\hypersetup{ + colorlinks=true, + linkcolor=blue, + filecolor=magenta, + urlcolor=cyan, +} +\usepackage{caption} %for managing table captions +\usepackage[table,xcdraw]{xcolor} +\usepackage{multirow} +\usepackage{array} +\usepackage{float} +\newcolumntype{P}[1]{>{\centering\arraybackslash}p{#1}} +\usepackage[normalem]{ulem} +\useunder{\uline}{\ul}{} +\usepackage{tikz} +\usetikzlibrary{shapes.geometric,calc} + +\newcommand\starscore[2]{ +\pgfmathsetmacro\pgfxa{#1+1} +\tikzstyle{scorestars}=[star, star points=5, star point ratio=2.25, draw,inner sep=0.15em,anchor=outer point 3] +\begin{tikzpicture}[baseline] + \foreach \i in {1,...,#2} { + \pgfmathparse{(\i<=#1?"yellow":"gray")} + \edef\starcolor{\pgfmathresult} + \draw (\i*1em,0) node[name=star\i,scorestars,fill=\starcolor] {}; + } + \pgfmathparse{(#1>int(#1)?int(#1+1):0} + \let\partstar=\pgfmathresult + \ifnum\partstar>0 + \pgfmathsetmacro\starpart{#1-(int(#1))} + \path [clip] ($(star\partstar.outer point 3)!(star\partstar.outer point 2)!(star\partstar.outer point 4)$) rectangle + ($(star\partstar.outer point 2 |- star\partstar.outer point 1)!\starpart!(star\partstar.outer point 1 -| star\partstar.outer point 5)$); + \fill (\partstar*1em,0) node[scorestars,fill=yellow] {}; + \fi + +,\end{tikzpicture} +} + + + +\geometry{letterpaper, top=0.45in, bottom=0.75in, left=0.75in, right=0.75in} +\pagestyle{fancy} \fancyhf{} \renewcommand\headrulewidth{0pt} %strip default header/footer stuff +%add footers +\cfoot{ +\small %small font. The double slashes is newline in fancyhdr +Species distribution model for \Sexpr{as.character(ElementNames$CommName)} (\textit{\Sexpr{as.character(ElementNames$SciName)}}). +} +\rfoot{p. \thepage} + + +\normalsize %return the font to normal + +\begin{document} + +\noindent +\begin{minipage}[b]{4.75in} %everything in this minipage will be adjacent, left of the thermometer + \LARGE \textit{\Sexpr{as.character(ElementNames[[1]])}} \\ + \normalsize Species Distribution Model (SDM) assessment metrics and metadata \\ + Common name: \href{\Sexpr{as.character(NSurl)}}{\Sexpr{as.character(ElementNames[[2]])}} \\ + Grank: \Sexpr{ paste(as.character(ElementNames[[6]])," - ",as.character(grank_desc[[2]]), sep="")}\\ + Date: \Sexpr{format(Sys.Date(), "%d %b %Y")} \\ + Code: \Sexpr{as.character(ElementNames$Code)} (EGT_ID: \Sexpr{as.character(ElementNames$EGT_ID)}) +\end{minipage} \hfill +\begin{minipage}[b]{2in} %minipage for thermometer +<>= + par(mar=c(0.9,0.2,0.2,0.2)) + if (exists("tss.summ")) { + temp <- tss.summ$mean + thermTemp <- vector("list") + if (temp < .50){ + thermTemp <- c("red", "poor") + } else if (temp < .80){ + thermTemp <- c("yellow","fair") + } else { + thermTemp <- c("green", "good") } + symbols(1, 1, thermometers=cbind(0.5, 1, temp), inches=.5, fg = thermTemp[[1]], + xaxt = "n", yaxt = "n", ann = FALSE, bty = "n", pin = c(1.2,1.2) ) + text (1,1, thermTemp[[2]], + adj = c(0.5,4), cex = .75, col = "black", xpd=NA) + text (1,1, paste("TSS=",format(round(temp,digits=2)),sep=""), + adj = c(0.5,6), cex = .75, col = "black", xpd=NA) + } else { + plot(1,1, col = "white", axes = FALSE) + box("outer","dotted") #show the outline of the fig box when debugging + text (1,1, "No evaluation", + adj = c(0.5,4), cex = .75, col = "black", xpd=NA) + } + +@ + \begin{center} + \includegraphics{figure/thermometer1-1.pdf} \\ %place it + validation success \end{center} +\end{minipage} + +\smallskip +\hrule +\medskip +\noindent +\Sexpr{as.character(project_overview)} +\smallskip +\noindent +This SDM incorporates the number of known and background locations indicated in Table 1, modeled with the random forests routine {\protect\NoHyper\cite{breiman2001, iverson2004}\protect\endNoHyper} in the R statistical environment {\protect\NoHyper\cite{liaw2002, r}\protect\endNoHyper}. We validated the model by jackknifing (also called leave-one-out {\protect\NoHyper\cite{fielding1997, fielding2002, pearson2007}\protect\endNoHyper}) by \Sexpr{tolower(as.character(group$JackknType))} for a total of \Sexpr{length(group$vals)} groups. The statistics in Table 2 report the mean and variance of validation statistics for these jackknifing runs.\\ +\medskip +\small +\begin{minipage}[t]{3in} + +\smallskip %dummy first line to align with next minipage + + +Table 1. Input statistics. Presence points are points placed in polygon-based location information or point-based observations. Groups describe groupings of points based on polygon data or spatial grouping of observations. Background points are placed throughout model area excluding known species locations. +\smallskip +\begin{center} +<>= + summ.table <- data.frame(Name=c("Presence points","Groups","Background points"), + Number=c(nrow(subset(df.full, pres == 1)), + numEOs, + nrow(subset(df.full, pres == 0)) + )) + print(xtable(summ.table), + floating = FALSE, include.rownames=FALSE) +@ +\end{center} + +\medskip + +Table 2. Validation statistics for jackknife trials. Overall Accuracy = +Correct Classification Rate, TSS = True Skill Statistic, AUC = +area under the ROC curve {\protect\NoHyper\cite{allouche2006, vaughan2005, +fielding2002}\protect\endNoHyper}. +\smallskip + +\begin{center} +<>= + if (exists("tss.summ")) { + summ.table <- data.frame(Name=c("Overall Accuracy", "Specificity", "Sensitivity", + "TSS", "Kappa", "AUC"), + Mean=c(OvAc.summ$mean, specif.summ$mean,sensit.summ$mean, + tss.summ$mean,Kappa.unw.summ$mean, + auc.summ$mean), + SD=c(OvAc.summ$sd, specif.summ$sd,sensit.summ$sd, + tss.summ$sd,Kappa.unw.summ$sd, + auc.summ$sd), + SEM=c(OvAc.summ$sem, specif.summ$sem,sensit.summ$sem, + tss.summ$sem,Kappa.unw.summ$sem, + auc.summ$sem)) + } else { + summ.table <- data.frame(Name=c("Overall Accuracy", "Specificity", "Sensitivity", + "TSS", "Kappa", "AUC"), + Mean = rep(NA, 6), SD = rep(NA, 6), SEM = rep(NA, 6)) + } + print(xtable(summ.table), + floating=FALSE, include.rownames=FALSE) +@ +\end{center} + +\medskip +\normalsize + <>= +if (exists("n.var")) { + txt <- paste0("Validation runs used ",n.var," environmental + variables, the most important of ",OriginalNumberOfEnvars," + variables (top ",(1-envarPctile)*100," percent). + Each tree was built with ",trRes[[1]]$mtry," variables + tried at each split (mtry) and ",trRes[[1]]$ntree," trees built.") +} else { + txt <- "Validation was not possible for this model (too few EOs)." +} +@ + +<>= +cat(txt) +@ +The final model was built using \Sexpr{rf.full$ntree} trees, all presence +and background points, with an mtry of \Sexpr{rf.full$mtry}, and \Sexpr{length(EnvVars$impVal)} environmental variables. +\small +\begin{center} +<>= +par(mar=c(2.8,2.5,.5,10), #bottom, left, top, right + tcl=-0.1, #tic length + cex=0.6, #text size + mgp=c(1.6,0.4,0) #placement of axis title, labels, line + ) +if (exists("perf")) { + plot(perf,lwd=2, + avg="threshold", colorize = TRUE, + #print.cutoffs.at = c(rf.full.ctoff[2], cutval.rf[2]), + #text.adj=c(-0.6,1.5), points.pch=19, points.cex=0.8, text.cex=0.8, + xlab="Avg. false positive rate", ylab="Avg. true positive rate", + colorkey.relwidth = 0.5, + colorize.palette=rainbow(256,start=3/6, end=0), colorkey.line = 1, + colorkey = FALSE + ) + + # set the color palette + rl.colors <- rev(rainbow(256,start=3/6, end=0)) + # find the min and max of the cutoffs, as used in the ROC plot + # for some reason perf gives some values over one, which confuses legend. Set it manually. + #rl.max.alpha <- max(unlist(perf@alpha.values)) + rl.max.alpha <- 1 + #rl.min.alpha <- min(unlist(perf@alpha.values)) + rl.min.alpha <- 0 + # get the y min and max of the ROC plot + rl.max.y <- max(axTicks(4)) + rl.min.y <- min(axTicks(4)) + # interpolate the cutoffs to the y axis + rl.alpha.ticks <- approxfun(c(rl.min.y, rl.max.y), + c(rl.min.alpha, rl.max.alpha))(axTicks(4)) + # set up a vector the length of colors ranging from min to max values + rl.col.cutoffs <- rev(seq(rl.min.alpha,rl.max.alpha, length=length( rl.colors ))) + # create a function to do the interpolation in later commands + rl.alpha2y <- approxfun(c(min(rl.alpha.ticks), max(rl.alpha.ticks)), + c(rl.min.y,rl.max.y)) + # place the axis, using the correct labeling scheme + axis(at=rl.alpha2y(rl.alpha.ticks),labels=round((rl.alpha.ticks),2), side=4, line=3.5) + # set up definition for what to display and then apply to y breaks and colors + rl.display.bool <- (rl.col.cutoffs >= min(rl.alpha.ticks) & + rl.col.cutoffs < max(rl.alpha.ticks)) + rl.y.lower <- rl.alpha2y(rl.col.cutoffs)[rl.display.bool] + rl.colors <- rl.colors[rl.display.bool] + rl.y.width <- rl.y.lower[2] - rl.y.lower[1] + rl.y.upper <- rl.y.lower + rl.y.width + # manually define x locations way off graph to minimize confusion + rl.x.left <- 1.3 + rl.x.right <- 1.32 + # place the bar, then the legend label + rect(rl.x.left, rl.y.lower, rl.x.right, rl.y.upper, col=rl.colors, border=rl.colors, xpd=NA) + mtext("cutoff", side=1, at = c(1.35), line = 0, cex=0.6) + mtext("legend", side=1, at = c(1.35), line = 1, cex=0.6) +} else { + plot(0.5,0.5,lwd=0, col = "white", xlim = c(0,1), ylim = c(0,1), + xlab="Avg. false positive rate", ylab="Avg. true positive rate") + text(0.5,0.5, "No evaluation") +} + + #clean up +rm(list=ls(pattern="rl.")) + +@ +\includegraphics{figure/ROCplot-1.pdf} %place it +\end{center} +Figure 1. ROC plot for all \Sexpr{length(group$vals)} validation runs, +averaged along cutoffs. + +\end{minipage}% +\hfill \begin{minipage}[t]{3.5in} + +\smallskip %dummy first line to align with previous minipage + +<>= +par(mar=c(3.2,2.5,.25,0.1), #bottom, left, top, right + tcl=-0.1, #tic length + mgp=c(1.3,0.4,0), #placement of axis title, labels, line + bty="l" + ) + #get the order for the importance charts + ord <- rev(order(EnvVars$impVal, decreasing = TRUE)[1:length(EnvVars$impVal)]) + xmin.i <- min(EnvVars$impVal) + #create importance dot chart + dotchart(EnvVars$impVal[ord], xlab = expression("lower" %->% "greater"), + xlim = c(xmin.i, max(EnvVars$impVal)), labels = EnvVars$fullName[ord], + cex = 0.62 #character size + ) + mtext("importance", side = 1, line = 2, cex = 0.62) +@ +\begin{center} +\includegraphics{figure/importanceFig-1.pdf} %place it +\end{center} +Figure 2. Relative importance of each environmental variable based on the full +model using all sites as input. Importance values (mean decrease in accuracy) are extracted from the randomForest{\protect\NoHyper\cite{liaw2002}\protect\endNoHyper} function. See Appendix 1 for variable descriptions. +\end{minipage}% + +\normalsize +\pagebreak + +<>= +par(tcl=-0.2, #tic length + cex=0.6, #text size + mgp=c(1.6,0.4,0) #placement of axis title, labels, line + ) + +# layout(matrix(c(17,2,4,6,8,17,1,3,5,7,17,10,12,14,16,17,9,11,13,15), +# nrow = 4, ncol = 5, byrow = TRUE), +# widths = c(0.15,1,1,1,1),heights=c(1,3,1,3)) +layout(matrix(c(19,2,4,6,20,19,1,3,5,20,19,8,10,12,20,19,7,9,11,20,19,14,16,18,20,19,13,15,17,20), + nrow = 6, ncol = 5, byrow = TRUE), + widths = c(0.05,1,1,1,0.1),heights=c(1,4,1,4,1,4)) + +pres.dat <- subset(df.full, pres==1) +abs.dat <- subset(df.full, pres==0) + +for (plotpi in 1:length(pPlots)){ + par(mar=c(3,2,0,0.5)) + if(is.character(pPlots[[plotpi]]$x)){ + barplot(pPlots[[plotpi]]$y, width=rep(1, length(pPlots[[plotpi]]$y)), col="grey", + xlab = pPlots[[plotpi]]$fname, ylab = NA, + names.arg=pPlots[[plotpi]]$x, space=0.1, + cex.names=0.7, las=2) + plot(1,1,axes=FALSE, type="n", xlab=NA, ylab=NA) #skip density plots if pPlot is barplot + } else { + plot(pPlots[[plotpi]]$x, pPlots[[plotpi]]$y, + type = "l", + xlab = pPlots[[plotpi]]$fname, ylab=NA) + pres.dens <- density(pres.dat[,pPlots[[plotpi]]$gridName]) + abs.dens <- density(abs.dat[,pPlots[[plotpi]]$gridName]) + par(mar=c(0,2,0.5,0.5)) + plot(pres.dens, xlim=c(min(pPlots[[plotpi]]$x), + max(pPlots[[plotpi]]$x)), + ylim=c(0,max(c(abs.dens$y,pres.dens$y))), + main=NA,xlab=NA,ylab=NA, + axes=FALSE, col="blue", lwd=2 + ) + lines(abs.dens, col="red") + } +} + mtext("log of fraction of votes", side = 2, line = -1, outer=TRUE, cex = 0.7) +@ +\includegraphics{figure/pPlotFig-1.pdf} \\ %place them, then line break +\small +Figure 3. Partial dependence plots for the \Sexpr{as.character(length(pPlots))} environmental variables with the most influence on the model. Each plot shows the effect of the variable on the probability of appropriate habitat with the effects of the other variables removed {\protect\NoHyper\cite{liaw2002}\protect\endNoHyper}. The x-axis covers the range of values for the variable assessed; the y-axis represents the effect between the variable and model response. Peaks in the black line indicate where this variable had the strongest influence on predicting appropriate habitat. Decreasing lines from left to right show a negative relationship overall; increasing lines, positive. The distribution of each category (thin red = Background points, thick blue = Presence points) is depicted at the top margin. See Appendix 1 for variable descriptions. +\normalsize + +\medskip +\medskip +\noindent +Species distribution model outputs display the probability (0-1) of a location (i.e. stream reach or raster cell) having similar environmental conditions in comparison to known presence locations. No model will ever depict sites where a targeted element will occur with certainty, it can \textit{only} depict locations it interprets as appropriate habitat for the targeted element. The delineation of suitable habitats is made by the selection of a threshold value, where locations with values above the threshold are designated as likely suitable habitat, and those with values below the threshold may be unsuitable. Threshold values are often statistically calculated. SDMs can be used in many ways and the depiction of appropriate habitat should be varied depending on intended use. For targeting field surveys, an SDM may be used to refine the search area; users should always employ additional GIS tools to further direct search efforts. A lower threshold depicting more area may be appropriate to use in this case. For a more conservative depiction of suitable habitat that shows less area, a higher threshold may be more appropriate. Different thresholds for this model (full model) are described in Table 3. + +\medskip +\noindent +\begin{minipage}{\linewidth} %keep table header with table +\small +Table 3. Thresholds {\protect\NoHyper\cite{LiuEtAl2005, LiuEtAl2015}\protect\endNoHyper} calculated from the final model. The Value column reports the threshold; Groups indicates the percentage (number in brackets) of groups within which at least one point was predicted as suitable habitat; Pts indicates the percentage of PR points predicted having suitable habitat. Total numbers of groups and presence points used in the final model are reported in Table 1. +\smallskip +\begin{center} +<>= + tbl <- sdm.thresh.table + #tbl$Citation <- gsub("(^.*)","\\\\cite{\\1}",sdm.thresh.table$Citation) + print(xtable(tbl, digits = 3, align = c("r","p{2in}","r","r","r","p{2.3in}")), + floating=FALSE, include.rownames=FALSE) +@ +\end{center} +\end{minipage} + +\normalsize +\bigskip +\noindent +\textbf{Model Evaluation and Intended Use} \\\\ +All SDMs are sensitive to data inputs and methodological choices. Table 4 summarizes ratings by reviewers of the model output. Table 5 presents scoring of modeling factors based on the model evaluation rubric presented in Sofaer et al. 2019 {\protect\NoHyper\cite{SofaerEtAl2019}\protect\endNoHyper}. \Sexpr{as.character(project_blurb)} + +\bigskip + +\noindent Table 4. Expert review results. The following is a summary of the reviews by NatureServe Network biologists received to date. Biologists assigned an overall score to the model (1 = poorest performance; 5 = highest performance) and also had an opportunity to make specific suggestions related to modeling extent. Summaries of overall scores are reported here. +\begin{table}[H] + \small + \centering + %\begin{tabular}{p{4cm}p{2cm}p{2cm}p{2cm}p{2cm}} + \begin{tabular}{c c c c c } + Mean Star Rating + & Number of reviews + & Max rating + & Min rating + & Median rating +\\ \hline + \starscore{\Sexpr{meanRating}}{5}(\Sexpr{meanRating}) + & \Sexpr{numReviewers} + & \Sexpr{maxRating} + & \Sexpr{minRating} + & \Sexpr{medianRating} +\\ \hline +\end{tabular} +\end{table} +\smallskip +\pagebreak + + +\noindent Table 5. Model evaluation results based on Sofaer et al. 2019. Scores can be attributed as ideal, acceptable, or interpret with caution. +\normalsize +\begin{table}[H] +\small +\centering +\begin{tabular}{m{2.8cm}m{5cm}P{2cm}m{5.7cm}} +\hline +Category & Metric & Score & Notes +\\ \hline +\multirow{3}{*}{Species Data} + & Presence data quality + & \Sexpr{as.character(sdm.modeluse$spdata_dataqual)} + & \Sexpr{as.character(sdm.modeluse$spdata_dataqualNotes)} +\\ \cline{2-4} + & Absence/Background Data + & \Sexpr{as.character(sdm.modeluse$spdata_abs)} + & \Sexpr{as.character(sdm.modeluse$spdata_absNotes)} +\\ \cline{2-4} + & Evaluation Data + & \Sexpr{as.character(sdm.modeluse$spdata_eval)} + & \Sexpr{as.character(sdm.modeluse$spdata_evalNotes)} +\\ \hline +\multirow{2}{2.8cm}{Environmental Predictors} + & Ecological and predictive relevance + & \Sexpr{as.character(sdm.modeluse$envvar_relevance)} + & \Sexpr{as.character(sdm.modeluse$envvar_relevanceNotes)} +\\ \cline{2-4} + & Spatial and temporal alignment + & \Sexpr{as.character(sdm.modeluse$envvar_align)} + & \Sexpr{as.character(sdm.modeluse$envvar_alignNotes)} +\\ \hline +\multirow{5}{*}{Modeling Process} + & Algorithm choice + & \Sexpr{as.character(sdm.modeluse$process_algo)} + & \Sexpr{as.character(sdm.modeluse$process_algoNotes)} +\\ \cline{2-4} + & Sensitivity + & \Sexpr{as.character(sdm.modeluse$process_sens)} + & \Sexpr{as.character(sdm.modeluse$process_sensNotes)} +\\ \cline{2-4} + & Statistical rigor + & \Sexpr{as.character(sdm.modeluse$process_rigor)} + & \Sexpr{as.character(sdm.modeluse$process_rigorNotes)} +\\ \cline{2-4} + & Performance + & \Sexpr{as.character(sdm.modeluse$process_perform)} + & \Sexpr{as.character(sdm.modeluse$process_performNotes)} +\\ \cline{2-4} + & Model review + & \Sexpr{as.character(sdm.modeluse$process_review)} + & \Sexpr{as.character(sdm.modeluse$process_reviewNotes)} +\\ \hline +\multirow{3}{*}{Model Products} + & Mapped products + & \Sexpr{as.character(sdm.modeluse$products_mapped)} + & \Sexpr{as.character(sdm.modeluse$products_mappedNotes)} +\\ \cline{2-4} + & Interpretation support products + & \Sexpr{as.character(sdm.modeluse$products_support)} + & \Sexpr{as.character(sdm.modeluse$products_supportNotes)} +\\ \cline{2-4} + & Reproducibility + & \Sexpr{as.character(sdm.modeluse$products_repo)} + & \Sexpr{as.character(sdm.modeluse$products_repoNotes)} +\\ \hline + & Iterative + & \Sexpr{as.character(sdm.modeluse$interative)} + & \Sexpr{as.character(sdm.modeluse$interativeNotes)} +\\ \hline +\end{tabular} +\end{table} + +\medskip +\noindent +<>= +if(nrow(sdm.customComments.subset) > 0){ + cat("\\textbf{Model Comments} \\\\") + cat(sdm.customComments.subset$comments) +} +@ + +\pagebreak +\medskip +\begin{center} + +<>= +#par(mar=c(0,0,4.5,0), xpd = F, cex=0.9, adj = 0.05) +nclr <- 5 +clrs <- brewer.pal('Blues',n=nclr) + +# figure out size of study area, expand if less than 889km across, +# which is 1;5,000,000 when figure is 7 inches wide (which it is here) +bbox <- bb(studyAreaExtent) +studyAreaWidth <- bbox$xmax - bbox$xmin +studyAreaHeight <- bbox$ymax - bbox$ymin +if(studyAreaWidth < 889000){ + bbox <- bb(bbox, width = 889000, relative = FALSE) + } +if(studyAreaHeight < 889000){ + bbox <- bb(bbox, height = 889000, relative = FALSE) + } + + +tmap_options(max.raster = c("plot" = 300000, "view" = 100000)) +tmap_mode("plot") +###get the basemap +# for basemap options see http://leaflet-extras.github.io/leaflet-providers/preview/ +# for native options provided by read_osm, see ?OpenStreetMap::openmap +## this is stamen-toner +#basetiles <- read_osm(studyAreaExtent, type = "stamen-toner", ext = 1.1) + +## this is Esri.WorldGrayCanvas +#mtype <- 'https://server.arcgisonline.com/ArcGIS/rest/services/Canvas/World_Light_Gray_Base/MapServer/tile/{z}/{y}/{x}.png?' + +## this is CartoDB.Positron +mtype <- 'https://a.basemaps.cartocdn.com/light_all/{z}/{x}/{y}{r}.png' +basetiles <- read_osm(bbox, type = mtype, ext = 1.1) +# plot it +qtm(basetiles) + + tm_shape(ras) + + tm_raster(palette = clrs, title = "modeled suitability", + labels = c("Low Habitat Suitability", rep(" ", nclr-2), "High Habitat Suitability")) + tm_shape(referenceBoundaries) + + tm_borders(col = "grey", lwd = 1) + + tm_shape(studyAreaExtent) + + tm_borders(col = "red", lwd = 2) + + tm_compass(north = 0, type = "arrow", position = c("left","bottom")) + + tm_scale_bar() + +@ +\includegraphics{figure/mapFig-1.png} +\end{center} +Figure 4. A generalized view of the model predictions throughout the modeled area. State boundaries are depicted as a thin gray line. The modeled area is outlined in red. Basemap: CartoDB.Positron (\copyright \href{https://www.openstreetmap.org/copyright}{OpenStreetMap}, contributors: \copyright \href{https://carto.com/attributions}{CARTO}). + +\normalsize +\pagebreak + +This distribution model would not have been possible without data sharing among organizations. Other data sets and sources may have been evaluated, but this final model includes data from these sources: +\begin{itemize} + \setlength{\itemsep}{0pt} + \setlength{\parskip}{0pt} + \setlength{\parsep}{0pt} +<>= +for(i in 1:length(sdm.dataSources$ProgramName)){ + x <- paste("\\item ", sdm.dataSources$ProgramName[[i]], "\n", sep = "") + y <- sub("&", "\\\\&", x) #escape ampersands if there are any - special character in latex + cat(y) +} +@ +\end{itemize} + +\medskip +\noindent +This model was built using a methodology developed through collaboration among the Florida Natural Areas Inventory, the New York Natural Heritage Program, the Pennsylvania Natural Heritage Program, and the Virginia Natural Heritage Program, all member programs of the NatureServe Network. It is one of a suite of species distribution models developed using the same methods, scripts, and environmental data sets. Our goal was to be consistent and transparent in our methodology, validation, and output. + +\medskip +\noindent +\setlength{\fboxsep}{5pt} +\fbox{ +\begin{minipage}[c]{0.2\linewidth} +\includegraphics[width=1.0\linewidth]{figure/NatureServeLogo}%png logo file at repository root +\end{minipage}% +\begin{minipage}[c]{0.75\linewidth} +Please cite this document and its associated SDM as: \\ +NatureServe and Heritage Network Partners. \Sexpr{format(Sys.Date(), "%Y")}. Species distribution model for \Sexpr{as.character(ElementNames$CommName)} (\textit{\Sexpr{as.character(ElementNames$SciName)}}). Created on \Sexpr{format(Sys.Date(), "%d %b %Y")}. Arlington, VA with Network partners from VA, PA, and NY. +\end{minipage} +} + +\medskip +\noindent +\textbf{References} +\small +\renewcommand{\refname}{\vskip -40 pt} %kill the header on the bibliography +\begin{thebibliography}{99}\setlength{\itemsep}{-1pt} + \bibstyle{biblatex} + \bibitem{breiman2001} Breiman, L. 2001. Random forests. Machine Learning 45:5-32. + \bibitem{iverson2004} Iverson, L. R., A. M. Prasad, and A. Liaw. 2004. + New machine learning tools for predictive vegetation mapping after + climate change: Bagging and Random Forest perform better than Regression + Tree Analysis. Landscape ecology of trees and forests.Proceedings of the + twelfth annual IALE (UK) conference, Cirencester, UK, 21-24 June 2004 317-320. + \bibitem{liaw2002} Liaw, A. and M. Wiener. 2002. Classification and + regression by randomForest. R News 2:18-22. Version \Sexpr{packageDescription("randomForest")$Version}. + \bibitem{r} R Core Team. 2016. R: A language and environment for statistical computing. + R Foundation for Statistical Computing, Vienna, Austria. URL https://www.R-project.org/. \Sexpr{R.version.string}. + \bibitem{fielding1997} Fielding, A. H. and J. F. Bell. 1997. + A review of methods for the assessment of prediction errors in + conservation presence/absence models. Environmental Conservation 24:38-49. + \bibitem{fielding2002} Fielding, A. H. 2002. What are the appropriate + characteristics of an accuracy measure? Pages 271-280 in Predicting Species + Occurrences, issues of accuracy and scale. J. M. Scott, P. J. Helglund, M. + L. Morrison, J. B. Haufler, M. G. Raphael, W. A. Wall, F. B. Samson, eds. Island Press, Washington. + \bibitem{pearson2007} Pearson, R.G. 2007. Species Distribution Modeling for + Conservation Educators and Practitioners. Synthesis. + American Museum of Natural History. Available at http://ncep.amnh.org. + \bibitem{allouche2006} Allouche, O., A. Tsoar, and R. Kadmon. 2006. + Assessing the accuracy of species distribution models: prevalence, + kappa and the true skill statistic (TSS). Journal of Applied Ecology 43:1223-1232. + \bibitem{vaughan2005} Vaughan, I. P. and S. J. Ormerod. 2005. The continuing + challenges of testing species distribution models. + Journal of Applied Ecology 42:720-730. + \bibitem{sing2005} Sing, T., O. Sander, N. Beerenwinkel, T. Lengauer. 2005. + ROCR: visualizing classifier performance in R. Bioinformatics + 21(20):3940-3941. + \bibitem{LiuEtAl2005} Liu, C., P. M. Berry, T. P. Dawson, and R. G. Pearson. 2005. + Selecting thresholds of occurrence in the prediction of species distributions. + Ecography 28:385–393. + \bibitem{LiuEtAl2015} Liu, C., G. Newell, and M. White. 2015. On the selection of + thresholds for predicting species occurrence with presence-only data. Ecology and + Evolution 6:337–348. + \bibitem{SofaerEtAl2019} Sofaer, H. R., C. S. Jarnevich1, I. S. Pearse, R. L. Smyth, S. Auer, G. L. Cook, T. C. Edwards, Jr., G. F. Guala, T. G. Howard, J. T. Morisette, and H. Hamilton. 2019. The development and delivery of species distribution models to inform decision-making. BioScience. BioScience 69:544-557. +\end{thebibliography} + +\pagebreak +\noindent + +Appendix 1. Descriptions for environmental variables included in model. +\begin{table}[H] +\resizebox{\textwidth}{!}{ +<>= + # For variable descriptions, get variable name, source, description (3 columns) +addtorow <- list() +addtorow$pos <- list() +addtorow$pos[[1]] <- c(0) +addtorow$command <- c(paste("\\hline \n",sep="")) +print(xtable(sdm.var.info, label = NULL), + include.rownames=FALSE, + sanitize.text = force, add.to.row = addtorow, floating=FALSE) +@ +} +\end{table} +\medskip + +Appendix 2. Model details for reproducibility +\begin{itemize} + \setlength{\itemsep}{0pt} + \setlength{\parskip}{0pt} + \setlength{\parsep}{0pt} + \item{All R Scripts are available at \href{https://github.com/HeritageNetwork/Regional_SDM}{github}} + \item{The repository version (repo head) used for this run was: \Sexpr{modelrun_meta_data$repo_head}} + \item{The model run name was: \Sexpr{model_run_name}} + \item{Validation metrics requiring a threshold use MTP (minimum training presence)} + \item{R version: \Sexpr{modelrun_meta_data$r_version}} + \item{Random seed for full randomForest model: \Sexpr{modelrun_meta_data$seed}} + \item{randomForest mtry: \Sexpr{rf.full$mtry}} + \item{randomForest ntrees: \Sexpr{rf.full$ntree}} +\end{itemize} + +\end{document} + + diff --git a/postReview_createMetadata.r b/postReview_createMetadata.r new file mode 100644 index 0000000..e30d2ba --- /dev/null +++ b/postReview_createMetadata.r @@ -0,0 +1,264 @@ +# File: 5_createMetadata.r +# Purpose: to summarize validation data and other information about the +# model and write it to a pdf. This pdf should accompany ALL sharing/showing +# of the SDM map. + +# For knitr to work, you need MikTex installed. See http://miktex.org/ + +# load libraries ---- +library(ROCR) #July 2010: order matters, see http://finzi.psych.upenn.edu/Rhelp10/2009-February/189936.html +library(randomForest) +library(knitr) +library(raster) +library(maptools) +library(sf) +library(RColorBrewer) +library(rasterVis) +library(RSQLite) +library(xtable) +library(stringi) +library(tables) + +library(tmap) +library(tmaptools) +library(OpenStreetMap) + + +### find and load model data ---- + +#setwd(loc_model) +#dir.create(paste0(model_species,"/outputs/metadata"), recursive = TRUE, showWarnings = FALSE) +dir.create(file.path(loc_model, "/outputs/metadata"), recursive = TRUE, showWarnings = FALSE) +#setwd(paste0(model_species,"/outputs")) + +model_run_name <- modelrun_meta_data$model_run_name +load(file.path(loc_model, "outputs","rdata",paste0(model_run_name,".RData"))) + +# rdatfiles <- list.files(file.path(loc_model, "outputs","rdata"), pattern = ".Rdata") +# rdatfile <- rdatfiles[[length(rdatfiles)]] +# load(file.path(loc_model, "outputs","rdata",rdatfile)) +# model_run_name <- strsplit(rdatfile, split = "\\.")[[1]][[1]] + +# get background poly data for the map (study area, reference boundaries) +studyAreaExtent <- st_read(file.path(loc_model,"inputs","model_input",paste0(model_run_name, "_studyArea.gpkg")), quiet = T) +referenceBoundaries <- st_read(nm_refBoundaries, quiet = TRUE) # name of state boundaries file + +r <- list.files(path = file.path(loc_model,"outputs","model_predictions"), + pattern = ".tif$", + full.names=FALSE) +fileName <- r[gsub(".tif", "", r) == model_run_name] +ras <- raster(file.path(loc_model,"outputs","model_predictions", fileName)) + +# project to match raster, just in case +studyAreaExtent <- st_transform(studyAreaExtent, as.character(ras@crs)) +referenceBoundaries <- st_transform(referenceBoundaries, as.character(ras@crs)) + +## Get Program and Data Sources info ---- +op <- options("useFancyQuotes") +options(useFancyQuotes = FALSE) + +db <- dbConnect(SQLite(),dbname=nm_db_file) +SQLquery <- paste("Select lkpModelers.ProgramName, lkpModelers.FullOrganizationName, ", + "lkpModelers.City, lkpModelers.State, lkpSpecies.sp_code ", + "FROM lkpModelers ", + "INNER JOIN lkpSpecies ON lkpModelers.ModelerID=lkpSpecies.ModelerID ", + "WHERE lkpSpecies.sp_code='", model_species, "'; ", sep="") +sdm.modeler <- dbGetQuery(db, statement = SQLquery) +# NOTE: use column should be populated with 1/0 for sources of data used +SQLquery <- paste("SELECT sp.sp_code, sr.ProgramName, sr.State ", + "FROM lkpSpecies as sp ", + "INNER JOIN mapDataSourcesToSpp as mp ON mp.EGT_ID=sp.EGT_ID ", + "INNER JOIN lkpDataSources as sr ON mp.DataSourcesID=sr.DataSourcesID ", + # "WHERE mp.use = 1 ", + "AND sp.sp_code ='", model_species, "'; ", sep="") +sdm.dataSources <- dbGetQuery(db, statement = SQLquery) +sdm.dataSources <- sdm.dataSources[order(sdm.dataSources$ProgramName),] + +SQLquery <- paste("SELECT model_end_time date, egt_id, metadata_comments comments", + " FROM tblModelResults ", + "WHERE model_run_name ='", model_run_name, "'; ", sep="") +sdm.customComments <- dbGetQuery(db, statement = SQLquery) +# assume you want the most recently entered comments, if there are multiple entries +if(nrow(sdm.customComments) > 1) { + sdm.customComments <- sdm.customComments[order(sdm.customComments$date, decreasing = TRUE),] + sdm.customComments.subset <- sdm.customComments[1,] +} else { + sdm.customComments.subset <- sdm.customComments +} + +## Get threshold information ---- +SQLquery <- paste("Select ElemCode, dateTime, cutCode, cutValue, capturedEOs, capturedPts ", + "FROM tblModelResultsCutoffs ", + "WHERE model_run_name ='", model_run_name, "'; ", sep="") +sdm.thresholds <- dbGetQuery(db, statement = SQLquery) +# filter to only most recent +#uniqueTimes <- unique(sdm.thresholds$dateTime) +#mostRecent <- uniqueTimes[order(uniqueTimes, decreasing = TRUE)][[1]] +#sdm.thresholds <- sdm.thresholds[sdm.thresholds$dateTime == mostRecent,] + +# get info about thresholds +SQLquery <- paste("SELECT cutCode, cutFullName, cutDescription, cutCitationShort, cutCitationFull, sortOrder ", + "FROM lkpThresholdTypes ", + "WHERE cutCode IN (", + toString(sQuote(sdm.thresholds$cutCode)), + ");", sep = "") +sdm.thresh.info <- dbGetQuery(db, statement = SQLquery) + +sdm.thresh.merge <- merge(sdm.thresholds, sdm.thresh.info) +#sort it +sdm.thresh.merge <- sdm.thresh.merge[order(sdm.thresh.merge$sortOrder),] +sdm.thresh.table <- sdm.thresh.merge[,c("cutFullName", "cutValue", + "capturedEOs", "capturedPts", "cutDescription")] +names(sdm.thresh.table) <- c("Threshold", "Value", "Groups","Pts","Description") +sdm.thresh.table$Groups <- paste(round(sdm.thresh.table$Groups/numEOs*100, 1), + "(",sdm.thresh.table$Groups, ")", sep="") +# sdm.thresh.table$Polys <- paste(round(sdm.thresh.table$Polys/numPys*100, 1), +# "(",sdm.thresh.table$Polys, ")", sep="") +numPts <- nrow(subset(df.full, pres == 1)) +sdm.thresh.table$Pts <- paste(round(sdm.thresh.table$Pts/numPts*100, 1), + sep="") + +## get grank definition ---- +SQLquery <- paste0("SELECT rank, rankname FROM lkpRankDefinitions where rank = '",ElementNames$rounded_g_rank,"';", sep="") +grank_desc <- dbGetQuery(db, SQLquery) + +# make a url to NatureServe Explorer +NSurl <- paste("http://explorer.natureserve.org/servlet/NatureServe?searchName=",gsub(" ", "+", ElementNames[[1]], fixed=TRUE), sep="") + +## get Model Evaluation and Use data ---- + +## Get env. var lookup table ---- +SQLquery <- paste0("SELECT gridName g from tblModelResultsVarsUsed where model_run_name = '", + model_run_name, "' and inFinalModel = 1;") +var_names <- dbGetQuery(db, SQLquery)$g +SQLquery <- paste("SELECT fullName, description ", + "FROM lkpEnvVars ", + "WHERE gridName COLLATE NOCASE IN (", + toString(sQuote(var_names)), + ") ORDER BY fullName;", sep = "") +sdm.var.info <- dbGetQuery(db, statement = SQLquery) +names(sdm.var.info) <- c("Variable Name","Variable Description") + +# escape symbols for latex +ls <- c("&","%","$","#","_","{","}") +for (l in ls) { + sdm.var.info$`Variable Name` <- gsub(l, paste0("\\",l), sdm.var.info$`Variable Name`, fixed = T) + sdm.var.info$`Variable Description` <- gsub(l, paste0("\\",l), sdm.var.info$`Variable Description`, fixed = T) +} +# replace degree symbols +for (l in 1:length(sdm.var.info$`Variable Description`)) { + new.desc <- stri_escape_unicode(sdm.var.info$`Variable Description`[l]) + if (grepl("\\u00b0",new.desc, fixed = T)) + sdm.var.info$`Variable Description`[l] <- gsub("\\u00b0", "$^\\circ$", new.desc, fixed = T) +} +# put descriptions in parboxes for multiple lines +sdm.var.info$`Variable Description` <- paste0("\\parbox{20cm}{",sdm.var.info$`Variable Description`,"}") + + +# model review data from Tracking DB +fn <- here("_data","databases", "mobi_tracker_connection_string_short.dsn") +cn <- dbConnect(odbc::odbc(), .connection_string = readChar(fn, file.info(fn)$size)) + +sql <- paste0("SELECT UserID, rating, comment + FROM ModelReviewToolOverallFeedback + WHERE (((ModelReviewToolOverallFeedback.cutecode)= '", ElementNames$Code, "' ));") +reviewData <- dbGetQuery(cn, sql) + +# get info about revisions: + # if on a cycle greater than 1 (but not a 'both' species with a cycle of 2), then count as revised + # if HUCs are getting been removed, count as revised +dbDisconnect(cn) +rm(cn) + +numReviewers <- nrow(reviewData) +if(numReviewers == 0){ + numReviewersPhrase = "" + meanRating <- "-" + minRating <- "-" + maxRating <- "-" + medianRating <- "-" +} else if (numReviewers == 1){ + numReviewersPhrase <- paste0(" (",numReviewers," reviewer)") + meanRating <- reviewData$rating + minRating <- "-" + maxRating <- "-" + medianRating <- "-" + revMatrix <- data.frame( + "rAttribute" = c("C", "Cr","A","I"), + "rComments" = c( + "Model was not reviewed by a regional, taxonomic expert.", + paste0("Model review indicates possible issues with this model.",numReviewersPhrase), + paste0("Model was reviewed by a regional, taxonomic expert.",numReviewersPhrase), + paste0("Model reviewed by a regional, taxonomic expert and given high marks.",numReviewersPhrase) + )) +} else { + numReviewersPhrase <- paste0(" (",numReviewers," reviewers)") + meanRating <- round(mean(reviewData$rating), digits = 1) + minRating <- min(reviewData$rating) + maxRating <- max(reviewData$rating) + medianRating <- median(reviewData$rating) + revMatrix <- data.frame( + "rAttribute" = c("C", "Cr","A","I"), + "rComments" = c( + "Model was not reviewed by regional, taxonomic experts.", + paste0("Model review indicates possible issues with this model.",numReviewersPhrase), + paste0("Model was reviewed by regional, taxonomic experts.",numReviewersPhrase), + paste0("Model reviewed by regional, taxonomic experts and given high marks.",numReviewersPhrase) + )) +} + +revAtt <- ifelse(nrow(reviewData) == 0 , "C", + ifelse(meanRating < 2.5, "Cr", + ifelse(meanRating < 3.5, "A", "I"))) +revUpdate <- revMatrix[match(revAtt, revMatrix$rAttribute),] + +## get Model Evaluation and Use data ---- +SQLquery <- paste("Select spdata_dataqual, spdata_abs, spdata_eval, envvar_relevance, envvar_align, process_algo, process_sens, process_rigor, process_perform, process_review, products_mapped, products_support, products_repo, interative, spdata_dataqualNotes, spdata_absNotes, spdata_evalNotes, envvar_relevanceNotes, envvar_alignNotes, process_algoNotes, process_sensNotes, process_rigorNotes, process_performNotes, process_reviewNotes, products_mappedNotes, products_supportNotes, products_repoNotes, interativeNotes ", + "FROM lkpSpeciesRubric ", + "WHERE sp_code ='", model_species, "'; ", sep="") +sdm.modeluse <- dbGetQuery(db, statement = SQLquery) + +sdm.modeluse$process_review <- as.character(revUpdate$rAttribute) +sdm.modeluse$process_reviewNotes <- as.character(revUpdate$rComments) + + +sdm.modeluse[is.na(sdm.modeluse)] <- " " +sdm.modeluse[sdm.modeluse=="I"] <- "\\cellcolor[HTML]{9AFF99} Ideal" +sdm.modeluse[sdm.modeluse=="A"] <- "\\cellcolor[HTML]{FFFFC7} Acceptable" +sdm.modeluse[sdm.modeluse=="C"] <- "\\cellcolor[HTML]{FD6864} Interpret with Caution" + +# fix greater than and less than symbol in rubric table +sdm.modeluse$process_performNotes <- gsub(">=","$\\\\geq$", sdm.modeluse$process_performNotes) +sdm.modeluse$process_performNotes <- gsub("<","$<$ ", sdm.modeluse$process_performNotes) + +## Run knitr and create metadata ---- + +# might need to run this twice. First time through tex builds the reference +# list, second time through it can then number the refs in the doc. +setwd(file.path(loc_model, "outputs","metadata")) +#copy the logo locally [spent way too long getting latex to read from G:] +dir.create("figure", showWarnings = FALSE) +file.copy(paste0(loc_scripts,"/NatureServeLogo.png"), "figure/NatureServeLogo.png") + +knit2pdf(file.path(loc_scripts,"postReview_MetadataEval_knitr.rnw"), + output=paste0(model_run_name, "_metadata.tex")) + +# delete .txt, .log etc if pdf is created successfully. +fn_ext <- c(".log",".aux",".out",".tex") +fn_ext <- paste0("_metadata", fn_ext) +if (file.exists(paste(model_run_name, "_metadata.pdf",sep=""))){ + for(i in 1:length(fn_ext)){ + fn <- paste(model_run_name, fn_ext[i],sep="") + if (file.exists(fn)){ + file.remove(fn) + } + } +} +# delete the figure folder +unlink("figure", recursive = TRUE) + +setwd(loc_scripts) + +## clean up ---- +dbDisconnect(db) +options(op)