From 5cfc8ba9c6469f12fea44c6369587f79dece1f7c Mon Sep 17 00:00:00 2001 From: ChristopherTracey Date: Tue, 3 Sep 2019 16:58:14 -0400 Subject: [PATCH 01/11] proof of concept for star system --- MetadataEval_knitr.rnw | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/MetadataEval_knitr.rnw b/MetadataEval_knitr.rnw index 8c3a5be..78123f5 100644 --- a/MetadataEval_knitr.rnw +++ b/MetadataEval_knitr.rnw @@ -22,6 +22,31 @@ \newcolumntype{P}[1]{>{\centering\arraybackslash}p{#1}} \usepackage[normalem]{ulem} \useunder{\uline}{\ul}{} +\usepackage{tikz} +\usetikzlibrary{shapes.geometric,calc} + +\newcommand\score[2]{ +\pgfmathsetmacro\pgfxa{#1+1} +\tikzstyle{scorestars}=[star, star points=5, star point ratio=2.25, draw,inner sep=0.15em,anchor=outer point 3] +\begin{tikzpicture}[baseline] + \foreach \i in {1,...,#2} { + \pgfmathparse{(\i<=#1?"yellow":"gray")} + \edef\starcolor{\pgfmathresult} + \draw (\i*1em,0) node[name=star\i,scorestars,fill=\starcolor] {}; + } + \pgfmathparse{(#1>int(#1)?int(#1+1):0} + \let\partstar=\pgfmathresult + \ifnum\partstar>0 + \pgfmathsetmacro\starpart{#1-(int(#1))} + \path [clip] ($(star\partstar.outer point 3)!(star\partstar.outer point 2)!(star\partstar.outer point 4)$) rectangle + ($(star\partstar.outer point 2 |- star\partstar.outer point 1)!\starpart!(star\partstar.outer point 1 -| star\partstar.outer point 5)$); + \fill (\partstar*1em,0) node[scorestars,fill=yellow] {}; + \fi + +,\end{tikzpicture} +} + + \geometry{letterpaper, top=0.45in, bottom=0.75in, left=0.75in, right=0.75in} \pagestyle{fancy} \fancyhf{} \renewcommand\headrulewidth{0pt} %strip default header/footer stuff @@ -331,6 +356,18 @@ Table 3. Thresholds {\protect\NoHyper\cite{LiuEtAl2005, LiuEtAl2015}\protect\end \textbf{Model Evaluation and Intended Use} \\\\ All SDMs are sensitive to data inputs and methodological choices. Table 4 presents scoring of modeling factors based on the model evaluation rubric presented in Sofaer et al. 2019 {\protect\NoHyper\cite{SofaerEtAl2019}\protect\endNoHyper}. \Sexpr{as.character(project_blurb)} \\ +\underline{Expert Review}\\ +The following is a summary of the reviews by NatureServe Network biologists received to date. Something somethng about 4/5 star reviews versus 3 star, versus 1-2 stars. +\begin{table}[H] + \small + \centering + \begin{tabular}{p{5cm}p{3cm}p{5cm}} + Star Rating & Reviewing States & Notes \\ + \hline + \score{4.4}{5} based on X reviews & PA, NY, VA & Some Notes \\ + \hline +\end{tabular} +\end{table} \smallskip \pagebreak From 2ecc0ee49a65df058a35a8ce8b32c5141fc87af5 Mon Sep 17 00:00:00 2001 From: Tim Howard Date: Fri, 4 Oct 2019 09:00:15 -0400 Subject: [PATCH 02/11] make new rnw, revert original rnw --- MetadataEval_knitr.rnw | 38 +- MetadataEval_knitr_postReview.rnw | 625 ++++++++++++++++++++++++++++++ 2 files changed, 626 insertions(+), 37 deletions(-) create mode 100644 MetadataEval_knitr_postReview.rnw diff --git a/MetadataEval_knitr.rnw b/MetadataEval_knitr.rnw index 78123f5..b3879c6 100644 --- a/MetadataEval_knitr.rnw +++ b/MetadataEval_knitr.rnw @@ -22,31 +22,6 @@ \newcolumntype{P}[1]{>{\centering\arraybackslash}p{#1}} \usepackage[normalem]{ulem} \useunder{\uline}{\ul}{} -\usepackage{tikz} -\usetikzlibrary{shapes.geometric,calc} - -\newcommand\score[2]{ -\pgfmathsetmacro\pgfxa{#1+1} -\tikzstyle{scorestars}=[star, star points=5, star point ratio=2.25, draw,inner sep=0.15em,anchor=outer point 3] -\begin{tikzpicture}[baseline] - \foreach \i in {1,...,#2} { - \pgfmathparse{(\i<=#1?"yellow":"gray")} - \edef\starcolor{\pgfmathresult} - \draw (\i*1em,0) node[name=star\i,scorestars,fill=\starcolor] {}; - } - \pgfmathparse{(#1>int(#1)?int(#1+1):0} - \let\partstar=\pgfmathresult - \ifnum\partstar>0 - \pgfmathsetmacro\starpart{#1-(int(#1))} - \path [clip] ($(star\partstar.outer point 3)!(star\partstar.outer point 2)!(star\partstar.outer point 4)$) rectangle - ($(star\partstar.outer point 2 |- star\partstar.outer point 1)!\starpart!(star\partstar.outer point 1 -| star\partstar.outer point 5)$); - \fill (\partstar*1em,0) node[scorestars,fill=yellow] {}; - \fi - -,\end{tikzpicture} -} - - \geometry{letterpaper, top=0.45in, bottom=0.75in, left=0.75in, right=0.75in} \pagestyle{fancy} \fancyhf{} \renewcommand\headrulewidth{0pt} %strip default header/footer stuff @@ -356,18 +331,7 @@ Table 3. Thresholds {\protect\NoHyper\cite{LiuEtAl2005, LiuEtAl2015}\protect\end \textbf{Model Evaluation and Intended Use} \\\\ All SDMs are sensitive to data inputs and methodological choices. Table 4 presents scoring of modeling factors based on the model evaluation rubric presented in Sofaer et al. 2019 {\protect\NoHyper\cite{SofaerEtAl2019}\protect\endNoHyper}. \Sexpr{as.character(project_blurb)} \\ -\underline{Expert Review}\\ -The following is a summary of the reviews by NatureServe Network biologists received to date. Something somethng about 4/5 star reviews versus 3 star, versus 1-2 stars. -\begin{table}[H] - \small - \centering - \begin{tabular}{p{5cm}p{3cm}p{5cm}} - Star Rating & Reviewing States & Notes \\ - \hline - \score{4.4}{5} based on X reviews & PA, NY, VA & Some Notes \\ - \hline -\end{tabular} -\end{table} + \smallskip \pagebreak diff --git a/MetadataEval_knitr_postReview.rnw b/MetadataEval_knitr_postReview.rnw new file mode 100644 index 0000000..78123f5 --- /dev/null +++ b/MetadataEval_knitr_postReview.rnw @@ -0,0 +1,625 @@ +%This knitr document is called by the knit2pdf call in 5_createMetadata.r +\documentclass{article} +\usepackage[utf8]{inputenc} +\usepackage{geometry} +\usepackage{fancyhdr} %for headers,footers +\usepackage{underscore} %needed if any text has underscores +\usepackage{rotating} +\usepackage[super,comma]{natbib} +\usepackage{hyperref} +\usepackage{graphicx} +\hypersetup{ + colorlinks=true, + linkcolor=blue, + filecolor=magenta, + urlcolor=cyan, +} +\usepackage{caption} %for managing table captions +\usepackage[table,xcdraw]{xcolor} +\usepackage{multirow} +\usepackage{array} +\usepackage{float} +\newcolumntype{P}[1]{>{\centering\arraybackslash}p{#1}} +\usepackage[normalem]{ulem} +\useunder{\uline}{\ul}{} +\usepackage{tikz} +\usetikzlibrary{shapes.geometric,calc} + +\newcommand\score[2]{ +\pgfmathsetmacro\pgfxa{#1+1} +\tikzstyle{scorestars}=[star, star points=5, star point ratio=2.25, draw,inner sep=0.15em,anchor=outer point 3] +\begin{tikzpicture}[baseline] + \foreach \i in {1,...,#2} { + \pgfmathparse{(\i<=#1?"yellow":"gray")} + \edef\starcolor{\pgfmathresult} + \draw (\i*1em,0) node[name=star\i,scorestars,fill=\starcolor] {}; + } + \pgfmathparse{(#1>int(#1)?int(#1+1):0} + \let\partstar=\pgfmathresult + \ifnum\partstar>0 + \pgfmathsetmacro\starpart{#1-(int(#1))} + \path [clip] ($(star\partstar.outer point 3)!(star\partstar.outer point 2)!(star\partstar.outer point 4)$) rectangle + ($(star\partstar.outer point 2 |- star\partstar.outer point 1)!\starpart!(star\partstar.outer point 1 -| star\partstar.outer point 5)$); + \fill (\partstar*1em,0) node[scorestars,fill=yellow] {}; + \fi + +,\end{tikzpicture} +} + + + +\geometry{letterpaper, top=0.45in, bottom=0.75in, left=0.75in, right=0.75in} +\pagestyle{fancy} \fancyhf{} \renewcommand\headrulewidth{0pt} %strip default header/footer stuff +%add footers +\cfoot{ +\small %small font. The double slashes is newline in fancyhdr +Species distribution model for \Sexpr{as.character(ElementNames$CommName)} (\textit{\Sexpr{as.character(ElementNames$SciName)}}). +} +\rfoot{p. \thepage} + + +\normalsize %return the font to normal + +\begin{document} + +\noindent +\begin{minipage}[b]{4.75in} %everything in this minipage will be adjacent, left of the thermometer + \LARGE \textit{\Sexpr{as.character(ElementNames[[1]])}} \\ + \normalsize Species Distribution Model (SDM) assessment metrics and metadata \\ + Common name: \href{\Sexpr{as.character(NSurl)}}{\Sexpr{as.character(ElementNames[[2]])}} \\ + Grank: \Sexpr{ paste(as.character(ElementNames[[6]])," - ",as.character(grank_desc[[2]]), sep="")}\\ + Date: \Sexpr{format(Sys.Date(), "%d %b %Y")} \\ + Code: \Sexpr{as.character(ElementNames$Code)} (EGT_ID: \Sexpr{as.character(ElementNames$EGT_ID)}) +\end{minipage} \hfill +\begin{minipage}[b]{2in} %minipage for thermometer +<>= + par(mar=c(0.9,0.2,0.2,0.2)) + if (exists("tss.summ")) { + temp <- tss.summ$mean + thermTemp <- vector("list") + if (temp < .50){ + thermTemp <- c("red", "poor") + } else if (temp < .80){ + thermTemp <- c("yellow","fair") + } else { + thermTemp <- c("green", "good") } + symbols(1, 1, thermometers=cbind(0.5, 1, temp), inches=.5, fg = thermTemp[[1]], + xaxt = "n", yaxt = "n", ann = FALSE, bty = "n", pin = c(1.2,1.2) ) + text (1,1, thermTemp[[2]], + adj = c(0.5,4), cex = .75, col = "black", xpd=NA) + text (1,1, paste("TSS=",format(round(temp,digits=2)),sep=""), + adj = c(0.5,6), cex = .75, col = "black", xpd=NA) + } else { + plot(1,1, col = "white", axes = FALSE) + box("outer","dotted") #show the outline of the fig box when debugging + text (1,1, "No evaluation", + adj = c(0.5,4), cex = .75, col = "black", xpd=NA) + } + +@ + \begin{center} + \includegraphics{figure/thermometer1-1.pdf} \\ %place it + validation success \end{center} +\end{minipage} + +\smallskip +\hrule +\medskip +\noindent +\Sexpr{as.character(project_overview)} +\smallskip +\noindent +This SDM incorporates the number of known and background locations indicated in Table 1, modeled with the random forests routine {\protect\NoHyper\cite{breiman2001, iverson2004}\protect\endNoHyper} in the R statistical environment {\protect\NoHyper\cite{liaw2002, r}\protect\endNoHyper}. We validated the model by jackknifing (also called leave-one-out {\protect\NoHyper\cite{fielding1997, fielding2002, pearson2007}\protect\endNoHyper}) by \Sexpr{tolower(as.character(group$JackknType))} for a total of \Sexpr{length(group$vals)} groups. The statistics in Table 2 report the mean and variance of validation statistics for these jackknifing runs.\\ +\medskip +\small +\begin{minipage}[t]{3in} + +\smallskip %dummy first line to align with next minipage + + +Table 1. Input statistics. Presence points are points placed in polygon-based location information or point-based observations. Groups describe groupings of points based on polygon data or spatial grouping of observations. Background points are placed throughout model area excluding known species locations. +\smallskip +\begin{center} +<>= + summ.table <- data.frame(Name=c("Presence points","Groups","Background points"), + Number=c(nrow(subset(df.full, pres == 1)), + numEOs, + nrow(subset(df.full, pres == 0)) + )) + print(xtable(summ.table), + floating = FALSE, include.rownames=FALSE) +@ +\end{center} + +\medskip + +Table 2. Validation statistics for jackknife trials. Overall Accuracy = +Correct Classification Rate, TSS = True Skill Statistic, AUC = +area under the ROC curve {\protect\NoHyper\cite{allouche2006, vaughan2005, +fielding2002}\protect\endNoHyper}. +\smallskip + +\begin{center} +<>= + if (exists("tss.summ")) { + summ.table <- data.frame(Name=c("Overall Accuracy", "Specificity", "Sensitivity", + "TSS", "Kappa", "AUC"), + Mean=c(OvAc.summ$mean, specif.summ$mean,sensit.summ$mean, + tss.summ$mean,Kappa.unw.summ$mean, + auc.summ$mean), + SD=c(OvAc.summ$sd, specif.summ$sd,sensit.summ$sd, + tss.summ$sd,Kappa.unw.summ$sd, + auc.summ$sd), + SEM=c(OvAc.summ$sem, specif.summ$sem,sensit.summ$sem, + tss.summ$sem,Kappa.unw.summ$sem, + auc.summ$sem)) + } else { + summ.table <- data.frame(Name=c("Overall Accuracy", "Specificity", "Sensitivity", + "TSS", "Kappa", "AUC"), + Mean = rep(NA, 6), SD = rep(NA, 6), SEM = rep(NA, 6)) + } + print(xtable(summ.table), + floating=FALSE, include.rownames=FALSE) +@ +\end{center} + +\medskip +\normalsize + <>= +if (exists("n.var")) { + txt <- paste0("Validation runs used ",n.var," environmental + variables, the most important of ",OriginalNumberOfEnvars," + variables (top ",(1-envarPctile)*100," percent). + Each tree was built with ",trRes[[1]]$mtry," variables + tried at each split (mtry) and ",trRes[[1]]$ntree," trees built.") +} else { + txt <- "Validation was not possible for this model (too few EOs)." +} +@ + +<>= +cat(txt) +@ +The final model was built using \Sexpr{rf.full$ntree} trees, all presence +and background points, with an mtry of \Sexpr{rf.full$mtry}, and \Sexpr{length(EnvVars$impVal)} environmental variables. +\small +\begin{center} +<>= +par(mar=c(2.8,2.5,.5,10), #bottom, left, top, right + tcl=-0.1, #tic length + cex=0.6, #text size + mgp=c(1.6,0.4,0) #placement of axis title, labels, line + ) +if (exists("perf")) { + plot(perf,lwd=2, + avg="threshold", colorize = TRUE, + #print.cutoffs.at = c(rf.full.ctoff[2], cutval.rf[2]), + #text.adj=c(-0.6,1.5), points.pch=19, points.cex=0.8, text.cex=0.8, + xlab="Avg. false positive rate", ylab="Avg. true positive rate", + colorkey.relwidth = 0.5, + colorize.palette=rainbow(256,start=3/6, end=0), colorkey.line = 1, + colorkey = FALSE + ) + + # set the color palette + rl.colors <- rev(rainbow(256,start=3/6, end=0)) + # find the min and max of the cutoffs, as used in the ROC plot + # for some reason perf gives some values over one, which confuses legend. Set it manually. + #rl.max.alpha <- max(unlist(perf@alpha.values)) + rl.max.alpha <- 1 + #rl.min.alpha <- min(unlist(perf@alpha.values)) + rl.min.alpha <- 0 + # get the y min and max of the ROC plot + rl.max.y <- max(axTicks(4)) + rl.min.y <- min(axTicks(4)) + # interpolate the cutoffs to the y axis + rl.alpha.ticks <- approxfun(c(rl.min.y, rl.max.y), + c(rl.min.alpha, rl.max.alpha))(axTicks(4)) + # set up a vector the length of colors ranging from min to max values + rl.col.cutoffs <- rev(seq(rl.min.alpha,rl.max.alpha, length=length( rl.colors ))) + # create a function to do the interpolation in later commands + rl.alpha2y <- approxfun(c(min(rl.alpha.ticks), max(rl.alpha.ticks)), + c(rl.min.y,rl.max.y)) + # place the axis, using the correct labeling scheme + axis(at=rl.alpha2y(rl.alpha.ticks),labels=round((rl.alpha.ticks),2), side=4, line=3.5) + # set up definition for what to display and then apply to y breaks and colors + rl.display.bool <- (rl.col.cutoffs >= min(rl.alpha.ticks) & + rl.col.cutoffs < max(rl.alpha.ticks)) + rl.y.lower <- rl.alpha2y(rl.col.cutoffs)[rl.display.bool] + rl.colors <- rl.colors[rl.display.bool] + rl.y.width <- rl.y.lower[2] - rl.y.lower[1] + rl.y.upper <- rl.y.lower + rl.y.width + # manually define x locations way off graph to minimize confusion + rl.x.left <- 1.3 + rl.x.right <- 1.32 + # place the bar, then the legend label + rect(rl.x.left, rl.y.lower, rl.x.right, rl.y.upper, col=rl.colors, border=rl.colors, xpd=NA) + mtext("cutoff", side=1, at = c(1.35), line = 0, cex=0.6) + mtext("legend", side=1, at = c(1.35), line = 1, cex=0.6) +} else { + plot(0.5,0.5,lwd=0, col = "white", xlim = c(0,1), ylim = c(0,1), + xlab="Avg. false positive rate", ylab="Avg. true positive rate") + text(0.5,0.5, "No evaluation") +} + + #clean up +rm(list=ls(pattern="rl.")) + +@ +\includegraphics{figure/ROCplot-1.pdf} %place it +\end{center} +Figure 1. ROC plot for all \Sexpr{length(group$vals)} validation runs, +averaged along cutoffs. + +\end{minipage}% +\hfill \begin{minipage}[t]{3.5in} + +\smallskip %dummy first line to align with previous minipage + +<>= +par(mar=c(3.2,2.5,.25,0.1), #bottom, left, top, right + tcl=-0.1, #tic length + mgp=c(1.3,0.4,0), #placement of axis title, labels, line + bty="l" + ) + #get the order for the importance charts + ord <- rev(order(EnvVars$impVal, decreasing = TRUE)[1:length(EnvVars$impVal)]) + xmin.i <- min(EnvVars$impVal) + #create importance dot chart + dotchart(EnvVars$impVal[ord], xlab = expression("lower" %->% "greater"), + xlim = c(xmin.i, max(EnvVars$impVal)), labels = EnvVars$fullName[ord], + cex = 0.62 #character size + ) + mtext("importance", side = 1, line = 2, cex = 0.62) +@ +\begin{center} +\includegraphics{figure/importanceFig-1.pdf} %place it +\end{center} +Figure 2. Relative importance of each environmental variable based on the full +model using all sites as input. Importance values (mean decrease in accuracy) are extracted from the randomForest{\protect\NoHyper\cite{liaw2002}\protect\endNoHyper} function. See Appendix 1 for variable descriptions. +\end{minipage}% + +\normalsize +\pagebreak + +<>= +par(tcl=-0.2, #tic length + cex=0.6, #text size + mgp=c(1.6,0.4,0) #placement of axis title, labels, line + ) + +# layout(matrix(c(17,2,4,6,8,17,1,3,5,7,17,10,12,14,16,17,9,11,13,15), +# nrow = 4, ncol = 5, byrow = TRUE), +# widths = c(0.15,1,1,1,1),heights=c(1,3,1,3)) +layout(matrix(c(19,2,4,6,20,19,1,3,5,20,19,8,10,12,20,19,7,9,11,20,19,14,16,18,20,19,13,15,17,20), + nrow = 6, ncol = 5, byrow = TRUE), + widths = c(0.05,1,1,1,0.1),heights=c(1,4,1,4,1,4)) + +pres.dat <- subset(df.full, pres==1) +abs.dat <- subset(df.full, pres==0) + +for (plotpi in 1:length(pPlots)){ + par(mar=c(3,2,0,0.5)) + if(is.character(pPlots[[plotpi]]$x)){ + barplot(pPlots[[plotpi]]$y, width=rep(1, length(pPlots[[plotpi]]$y)), col="grey", + xlab = pPlots[[plotpi]]$fname, ylab = NA, + names.arg=pPlots[[plotpi]]$x, space=0.1, + cex.names=0.7, las=2) + plot(1,1,axes=FALSE, type="n", xlab=NA, ylab=NA) #skip density plots if pPlot is barplot + } else { + plot(pPlots[[plotpi]]$x, pPlots[[plotpi]]$y, + type = "l", + xlab = pPlots[[plotpi]]$fname, ylab=NA) + pres.dens <- density(pres.dat[,pPlots[[plotpi]]$gridName]) + abs.dens <- density(abs.dat[,pPlots[[plotpi]]$gridName]) + par(mar=c(0,2,0.5,0.5)) + plot(pres.dens, xlim=c(min(pPlots[[plotpi]]$x), + max(pPlots[[plotpi]]$x)), + ylim=c(0,max(c(abs.dens$y,pres.dens$y))), + main=NA,xlab=NA,ylab=NA, + axes=FALSE, col="blue", lwd=2 + ) + lines(abs.dens, col="red") + } +} + mtext("log of fraction of votes", side = 2, line = -1, outer=TRUE, cex = 0.7) +@ +\includegraphics{figure/pPlotFig-1.pdf} \\ %place them, then line break +\small +Figure 3. Partial dependence plots for the \Sexpr{as.character(length(pPlots))} environmental variables with the most influence on the model. Each plot shows the effect of the variable on the probability of appropriate habitat with the effects of the other variables removed {\protect\NoHyper\cite{liaw2002}\protect\endNoHyper}. The x-axis covers the range of values for the variable assessed; the y-axis represents the effect between the variable and model response. Peaks in the black line indicate where this variable had the strongest influence on predicting appropriate habitat. Decreasing lines from left to right show a negative relationship overall; increasing lines, positive. The distribution of each category (thin red = Background points, thick blue = Presence points) is depicted at the top margin. See Appendix 1 for variable descriptions. +\normalsize + +\medskip +\medskip +\noindent +Species distribution model outputs display the probability (0-1) of a location (i.e. stream reach or raster cell) having similar environmental conditions in comparison to known presence locations. No model will ever depict sites where a targeted element will occur with certainty, it can \textit{only} depict locations it interprets as appropriate habitat for the targeted element. The delineation of suitable habitats is made by the selection of a threshold value, where locations with values above the threshold are designated as likely suitable habitat, and those with values below the threshold may be unsuitable. Threshold values are often statistically calculated. SDMs can be used in many ways and the depiction of appropriate habitat should be varied depending on intended use. For targeting field surveys, an SDM may be used to refine the search area; users should always employ additional GIS tools to further direct search efforts. A lower threshold depicting more area may be appropriate to use in this case. For a more conservative depiction of suitable habitat that shows less area, a higher threshold may be more appropriate. Different thresholds for this model (full model) are described in Table 3. + +\medskip +\noindent +\begin{minipage}{\linewidth} %keep table header with table +\small +Table 3. Thresholds {\protect\NoHyper\cite{LiuEtAl2005, LiuEtAl2015}\protect\endNoHyper} calculated from the final model. The Value column reports the threshold; Groups indicates the percentage (number in brackets) of groups within which at least one point was predicted as suitable habitat; Pts indicates the percentage of PR points predicted having suitable habitat. Total numbers of groups and presence points used in the final model are reported in Table 1. +\smallskip +\begin{center} +<>= + tbl <- sdm.thresh.table + #tbl$Citation <- gsub("(^.*)","\\\\cite{\\1}",sdm.thresh.table$Citation) + print(xtable(tbl, digits = 3, align = c("r","p{2in}","r","r","r","p{2.3in}")), + floating=FALSE, include.rownames=FALSE) +@ +\end{center} +\end{minipage} + +\normalsize +\bigskip +\noindent +\textbf{Model Evaluation and Intended Use} \\\\ +All SDMs are sensitive to data inputs and methodological choices. Table 4 presents scoring of modeling factors based on the model evaluation rubric presented in Sofaer et al. 2019 {\protect\NoHyper\cite{SofaerEtAl2019}\protect\endNoHyper}. \Sexpr{as.character(project_blurb)} +\\ +\underline{Expert Review}\\ +The following is a summary of the reviews by NatureServe Network biologists received to date. Something somethng about 4/5 star reviews versus 3 star, versus 1-2 stars. +\begin{table}[H] + \small + \centering + \begin{tabular}{p{5cm}p{3cm}p{5cm}} + Star Rating & Reviewing States & Notes \\ + \hline + \score{4.4}{5} based on X reviews & PA, NY, VA & Some Notes \\ + \hline +\end{tabular} +\end{table} +\smallskip +\pagebreak + + +\noindent Table 4. Model evaluation results based on Sofaer et al. 2019. Scores can be attributed as ideal, acceptable, or interpret with caution. +\normalsize +\begin{table}[H] +\small +\centering +\begin{tabular}{m{2.8cm}m{5cm}P{2cm}m{5.7cm}} +\hline +Category & Metric & Score & Notes +\\ \hline +\multirow{3}{*}{Species Data} + & Presence data quality + & \Sexpr{as.character(sdm.modeluse$spdata_dataqual)} + & \Sexpr{as.character(sdm.modeluse$spdata_dataqualNotes)} +\\ \cline{2-4} + & Absence/Background Data + & \Sexpr{as.character(sdm.modeluse$spdata_abs)} + & \Sexpr{as.character(sdm.modeluse$spdata_absNotes)} +\\ \cline{2-4} + & Evaluation Data + & \Sexpr{as.character(sdm.modeluse$spdata_eval)} + & \Sexpr{as.character(sdm.modeluse$spdata_evalNotes)} +\\ \hline +\multirow{2}{2.8cm}{Environmental Predictors} + & Ecological and predictive relevance + & \Sexpr{as.character(sdm.modeluse$envvar_relevance)} + & \Sexpr{as.character(sdm.modeluse$envvar_relevanceNotes)} +\\ \cline{2-4} + & Spatial and temporal alignment + & \Sexpr{as.character(sdm.modeluse$envvar_align)} + & \Sexpr{as.character(sdm.modeluse$envvar_alignNotes)} +\\ \hline +\multirow{5}{*}{Modeling Process} + & Algorithm choice + & \Sexpr{as.character(sdm.modeluse$process_algo)} + & \Sexpr{as.character(sdm.modeluse$process_algoNotes)} +\\ \cline{2-4} + & Sensitivity + & \Sexpr{as.character(sdm.modeluse$process_sens)} + & \Sexpr{as.character(sdm.modeluse$process_sensNotes)} +\\ \cline{2-4} + & Statistical rigor + & \Sexpr{as.character(sdm.modeluse$process_rigor)} + & \Sexpr{as.character(sdm.modeluse$process_rigorNotes)} +\\ \cline{2-4} + & Performance + & \Sexpr{as.character(sdm.modeluse$process_perform)} + & \Sexpr{as.character(sdm.modeluse$process_performNotes)} +\\ \cline{2-4} + & Model review + & \Sexpr{as.character(sdm.modeluse$process_review)} + & \Sexpr{as.character(sdm.modeluse$process_reviewNotes)} +\\ \hline +\multirow{3}{*}{Model Products} + & Mapped products + & \Sexpr{as.character(sdm.modeluse$products_mapped)} + & \Sexpr{as.character(sdm.modeluse$products_mappedNotes)} +\\ \cline{2-4} + & Interpretation support products + & \Sexpr{as.character(sdm.modeluse$products_support)} + & \Sexpr{as.character(sdm.modeluse$products_supportNotes)} +\\ \cline{2-4} + & Reproducibility + & \Sexpr{as.character(sdm.modeluse$products_repo)} + & \Sexpr{as.character(sdm.modeluse$products_repoNotes)} +\\ \hline + & Iterative + & \Sexpr{as.character(sdm.modeluse$interative)} + & \Sexpr{as.character(sdm.modeluse$interativeNotes)} +\\ \hline +\end{tabular} +\end{table} + +\medskip +\noindent +<>= +if(nrow(sdm.customComments.subset) > 0){ + cat("\\textbf{Model Comments} \\\\") + cat(sdm.customComments.subset$comments) +} +@ + +\pagebreak +\medskip +\begin{center} + +<>= +#par(mar=c(0,0,4.5,0), xpd = F, cex=0.9, adj = 0.05) +nclr <- 5 +clrs <- brewer.pal('Blues',n=nclr) + +# figure out size of study area, expand if less than 889km across, +# which is 1;5,000,000 when figure is 7 inches wide (which it is here) +bbox <- bb(studyAreaExtent) +studyAreaWidth <- bbox$xmax - bbox$xmin +studyAreaHeight <- bbox$ymax - bbox$ymin +if(studyAreaWidth < 889000){ + bbox <- bb(bbox, width = 889000, relative = FALSE) + } +if(studyAreaHeight < 889000){ + bbox <- bb(bbox, height = 889000, relative = FALSE) + } + + +tmap_options(max.raster = c("plot" = 300000, "view" = 100000)) +tmap_mode("plot") +###get the basemap +# for basemap options see http://leaflet-extras.github.io/leaflet-providers/preview/ +# for native options provided by read_osm, see ?OpenStreetMap::openmap +## this is stamen-toner +#basetiles <- read_osm(studyAreaExtent, type = "stamen-toner", ext = 1.1) + +## this is Esri.WorldGrayCanvas +#mtype <- 'https://server.arcgisonline.com/ArcGIS/rest/services/Canvas/World_Light_Gray_Base/MapServer/tile/{z}/{y}/{x}.png?' + +## this is CartoDB.Positron +mtype <- 'https://a.basemaps.cartocdn.com/light_all/{z}/{x}/{y}{r}.png' +basetiles <- read_osm(bbox, type = mtype, ext = 1.1) +# plot it +qtm(basetiles) + + tm_shape(ras) + + tm_raster(palette = clrs, title = "modeled suitability", + labels = c("Low Habitat Suitability", rep(" ", nclr-2), "High Habitat Suitability")) + tm_shape(referenceBoundaries) + + tm_borders(col = "grey", lwd = 1) + + tm_shape(studyAreaExtent) + + tm_borders(col = "red", lwd = 2) + + tm_compass(north = 0, type = "arrow", position = c("left","bottom")) + + tm_scale_bar() + +@ +\includegraphics{figure/mapFig-1.png} +\end{center} +Figure 4. A generalized view of the model predictions throughout the modeled area. State boundaries are depicted as a thin gray line. The modeled area is outlined in red. Basemap: CartoDB.Positron (\copyright \href{https://www.openstreetmap.org/copyright}{OpenStreetMap}, contributors: \copyright \href{https://carto.com/attributions}{CARTO}). + +\normalsize +\pagebreak + +This distribution model would not have been possible without data sharing among organizations. Other data sets and sources may have been evaluated, but this final model includes data from these sources: +\begin{itemize} + \setlength{\itemsep}{0pt} + \setlength{\parskip}{0pt} + \setlength{\parsep}{0pt} +<>= +for(i in 1:length(sdm.dataSources$ProgramName)){ + x <- paste("\\item ", sdm.dataSources$ProgramName[[i]], "\n", sep = "") + y <- sub("&", "\\\\&", x) #escape ampersands if there are any - special character in latex + cat(y) +} +@ +\end{itemize} + +\medskip +\noindent +This model was built using a methodology developed through collaboration among the Florida Natural Areas Inventory, the New York Natural Heritage Program, the Pennsylvania Natural Heritage Program, and the Virginia Natural Heritage Program, all member programs of the NatureServe Network. It is one of a suite of species distribution models developed using the same methods, scripts, and environmental data sets. Our goal was to be consistent and transparent in our methodology, validation, and output. + +\medskip +\noindent +\setlength{\fboxsep}{5pt} +\fbox{ +\begin{minipage}[c]{0.2\linewidth} +\includegraphics[width=1.0\linewidth]{../../../../../NatureServeLogo}%png logo file at repository root +\end{minipage}% +\begin{minipage}[c]{0.75\linewidth} +Please cite this document and its associated SDM as: \\ +NatureServe and Heritage Network Partners. \Sexpr{format(Sys.Date(), "%Y")}. Species distribution model for \Sexpr{as.character(ElementNames$CommName)} (\textit{\Sexpr{as.character(ElementNames$SciName)}}). Created on \Sexpr{format(Sys.Date(), "%d %b %Y")}. Arlington, VA with Network partners from VA, PA, and NY. +\end{minipage} +} + +\medskip +\noindent +\textbf{References} +\small +\renewcommand{\refname}{\vskip -40 pt} %kill the header on the bibliography +\begin{thebibliography}{99}\setlength{\itemsep}{-1pt} + \bibstyle{biblatex} + \bibitem{breiman2001} Breiman, L. 2001. Random forests. Machine Learning 45:5-32. + \bibitem{iverson2004} Iverson, L. R., A. M. Prasad, and A. Liaw. 2004. + New machine learning tools for predictive vegetation mapping after + climate change: Bagging and Random Forest perform better than Regression + Tree Analysis. Landscape ecology of trees and forests.Proceedings of the + twelfth annual IALE (UK) conference, Cirencester, UK, 21-24 June 2004 317-320. + \bibitem{liaw2002} Liaw, A. and M. Wiener. 2002. Classification and + regression by randomForest. R News 2:18-22. Version \Sexpr{packageDescription("randomForest")$Version}. + \bibitem{r} R Core Team. 2016. R: A language and environment for statistical computing. + R Foundation for Statistical Computing, Vienna, Austria. URL https://www.R-project.org/. \Sexpr{R.version.string}. + \bibitem{fielding1997} Fielding, A. H. and J. F. Bell. 1997. + A review of methods for the assessment of prediction errors in + conservation presence/absence models. Environmental Conservation 24:38-49. + \bibitem{fielding2002} Fielding, A. H. 2002. What are the appropriate + characteristics of an accuracy measure? Pages 271-280 in Predicting Species + Occurrences, issues of accuracy and scale. J. M. Scott, P. J. Helglund, M. + L. Morrison, J. B. Haufler, M. G. Raphael, W. A. Wall, F. B. Samson, eds. Island Press, Washington. + \bibitem{pearson2007} Pearson, R.G. 2007. Species Distribution Modeling for + Conservation Educators and Practitioners. Synthesis. + American Museum of Natural History. Available at http://ncep.amnh.org. + \bibitem{allouche2006} Allouche, O., A. Tsoar, and R. Kadmon. 2006. + Assessing the accuracy of species distribution models: prevalence, + kappa and the true skill statistic (TSS). Journal of Applied Ecology 43:1223-1232. + \bibitem{vaughan2005} Vaughan, I. P. and S. J. Ormerod. 2005. The continuing + challenges of testing species distribution models. + Journal of Applied Ecology 42:720-730. + \bibitem{sing2005} Sing, T., O. Sander, N. Beerenwinkel, T. Lengauer. 2005. + ROCR: visualizing classifier performance in R. Bioinformatics + 21(20):3940-3941. + \bibitem{LiuEtAl2005} Liu, C., P. M. Berry, T. P. Dawson, and R. G. Pearson. 2005. + Selecting thresholds of occurrence in the prediction of species distributions. + Ecography 28:385–393. + \bibitem{LiuEtAl2015} Liu, C., G. Newell, and M. White. 2015. On the selection of + thresholds for predicting species occurrence with presence-only data. Ecology and + Evolution 6:337–348. + \bibitem{SofaerEtAl2019} Sofaer, H. R., C. S. Jarnevich1, I. S. Pearse, R. L. Smyth, S. Auer, G. L. Cook, T. C. Edwards, Jr., G. F. Guala, T. G. Howard, J. T. Morisette, and H. Hamilton. (In press). The development and delivery of species distribution models to inform decision-making. BioScience. +\end{thebibliography} + +\pagebreak +\noindent + +\begin{table} +\caption{Appendix 1. Descriptions for environmental variables included in model.} +\resizebox{\textwidth}{!}{ +<>= + # For variable descriptions, get variable name, source, description (3 columns) +addtorow <- list() +addtorow$pos <- list() +addtorow$pos[[1]] <- c(0) +addtorow$command <- c(paste("\\hline \n",sep="")) +print(xtable(sdm.var.info, label = NULL), + include.rownames=FALSE, + sanitize.text = force, add.to.row = addtorow, floating=FALSE) +@ +} +\end{table} +\medskip + +Appendix 2. Model details for reproducibility + +\begin{itemize} + \setlength{\itemsep}{0pt} + \setlength{\parskip}{0pt} + \setlength{\parsep}{0pt} + \item{All R Scripts are available at \href{https://github.com/HeritageNetwork/Regional_SDM}{github}} + \item{The repository version (repo head) used for this run was: \Sexpr{modelrun_meta_data$repo_head}} + \item{The model run name was: \Sexpr{model_run_name}} + \item{Validation metrics requiring a threshold use MTP (minimum training presence)} + \item{R version: \Sexpr{modelrun_meta_data$r_version}} + \item{Random seed for full randomForest model: \Sexpr{modelrun_meta_data$seed}} + \item{randomForest mtry: \Sexpr{rf.full$mtry}} + \item{randomForest ntrees: \Sexpr{rf.full$ntree}} +\end{itemize} + +\end{document} + + From 50237604007bfa759a63d08dd0119ed746922202 Mon Sep 17 00:00:00 2001 From: Tim Howard Date: Fri, 4 Oct 2019 09:33:22 -0400 Subject: [PATCH 03/11] change file names --- ...w.rnw => postReview_MetadataEval_knitr.rnw | 0 postReview_createMetadata.r | 208 ++++++++++++++++++ 2 files changed, 208 insertions(+) rename MetadataEval_knitr_postReview.rnw => postReview_MetadataEval_knitr.rnw (100%) create mode 100644 postReview_createMetadata.r diff --git a/MetadataEval_knitr_postReview.rnw b/postReview_MetadataEval_knitr.rnw similarity index 100% rename from MetadataEval_knitr_postReview.rnw rename to postReview_MetadataEval_knitr.rnw diff --git a/postReview_createMetadata.r b/postReview_createMetadata.r new file mode 100644 index 0000000..bda6aa9 --- /dev/null +++ b/postReview_createMetadata.r @@ -0,0 +1,208 @@ +# File: 5_createMetadata.r +# Purpose: to summarize validation data and other information about the +# model and write it to a pdf. This pdf should accompany ALL sharing/showing +# of the SDM map. + +# For knitr to work, you need MikTex installed. See http://miktex.org/ + +# load libraries ---- +library(ROCR) #July 2010: order matters, see http://finzi.psych.upenn.edu/Rhelp10/2009-February/189936.html +library(randomForest) +library(knitr) +library(raster) +library(maptools) +library(sf) +library(RColorBrewer) +library(rasterVis) +library(RSQLite) +library(xtable) +library(stringi) +library(tables) + +library(tmap) +library(tmaptools) +library(OpenStreetMap) + + +### find and load model data ---- +## three lines need your attention. The one directly below (loc_scripts), +## about line 35 where you choose which Rdata file to use, +## and about line 46 where you choose which record to use + +### temp debugging +loc_model <- "G:/tim/_Regional_SDM/_data/species" +loc_envVars <- gsub("F:","G:",loc_envVars) +loc_scripts <- "G:/tim/_Regional_SDM" +nm_refBoundaries <- gsub("F:","G:",nm_refBoundaries) +nm_db_file <- gsub("F:","G:",nm_db_file) +#### + +setwd(loc_model) +dir.create(paste0(model_species,"/outputs/metadata"), recursive = T, showWarnings = F) +setwd(paste0(model_species,"/outputs")) +load(paste0("rdata/", modelrun_meta_data$model_run_name,".Rdata")) + +# get background poly data for the map (study area, reference boundaries) +studyAreaExtent <- st_read(here("_data","species",model_species,"inputs","model_input",paste0(model_run_name, "_studyArea.gpkg")), quiet = T) +referenceBoundaries <- st_read(nm_refBoundaries, quiet = T) # name of state boundaries file + +r <- dir(path = "model_predictions", pattern = ".tif$",full.names=FALSE) +fileName <- r[gsub(".tif", "", r) == model_run_name] +ras <- raster(paste0("model_predictions/", fileName)) + +# project to match raster, just in case +studyAreaExtent <- st_transform(studyAreaExtent, as.character(ras@crs)) +referenceBoundaries <- st_transform(referenceBoundaries, as.character(ras@crs)) + +## Get Program and Data Sources info ---- +op <- options("useFancyQuotes") +options(useFancyQuotes = FALSE) + +db <- dbConnect(SQLite(),dbname=nm_db_file) +SQLquery <- paste("Select lkpModelers.ProgramName, lkpModelers.FullOrganizationName, ", + "lkpModelers.City, lkpModelers.State, lkpSpecies.sp_code ", + "FROM lkpModelers ", + "INNER JOIN lkpSpecies ON lkpModelers.ModelerID=lkpSpecies.ModelerID ", + "WHERE lkpSpecies.sp_code='", model_species, "'; ", sep="") +sdm.modeler <- dbGetQuery(db, statement = SQLquery) +# NOTE: use column should be populated with 1/0 for sources of data used +SQLquery <- paste("SELECT sp.sp_code, sr.ProgramName, sr.State ", + "FROM lkpSpecies as sp ", + "INNER JOIN mapDataSourcesToSpp as mp ON mp.EGT_ID=sp.EGT_ID ", + "INNER JOIN lkpDataSources as sr ON mp.DataSourcesID=sr.DataSourcesID ", + # "WHERE mp.use = 1 ", + "AND sp.sp_code ='", model_species, "'; ", sep="") +sdm.dataSources <- dbGetQuery(db, statement = SQLquery) +sdm.dataSources <- sdm.dataSources[order(sdm.dataSources$ProgramName),] + +SQLquery <- paste("SELECT model_end_time date, egt_id, metadata_comments comments", + " FROM tblModelResults ", + "WHERE model_run_name ='", model_run_name, "'; ", sep="") +sdm.customComments <- dbGetQuery(db, statement = SQLquery) +# assume you want the most recently entered comments, if there are multiple entries +if(nrow(sdm.customComments) > 1) { + sdm.customComments <- sdm.customComments[order(sdm.customComments$date, decreasing = TRUE),] + sdm.customComments.subset <- sdm.customComments[1,] +} else { + sdm.customComments.subset <- sdm.customComments +} + +## Get threshold information ---- +SQLquery <- paste("Select ElemCode, dateTime, cutCode, cutValue, capturedEOs, capturedPts ", + "FROM tblModelResultsCutoffs ", + "WHERE model_run_name ='", model_run_name, "'; ", sep="") +sdm.thresholds <- dbGetQuery(db, statement = SQLquery) +# filter to only most recent +#uniqueTimes <- unique(sdm.thresholds$dateTime) +#mostRecent <- uniqueTimes[order(uniqueTimes, decreasing = TRUE)][[1]] +#sdm.thresholds <- sdm.thresholds[sdm.thresholds$dateTime == mostRecent,] + +# get info about thresholds +SQLquery <- paste("SELECT cutCode, cutFullName, cutDescription, cutCitationShort, cutCitationFull, sortOrder ", + "FROM lkpThresholdTypes ", + "WHERE cutCode IN (", + toString(sQuote(sdm.thresholds$cutCode)), + ");", sep = "") +sdm.thresh.info <- dbGetQuery(db, statement = SQLquery) + +sdm.thresh.merge <- merge(sdm.thresholds, sdm.thresh.info) +#sort it +sdm.thresh.merge <- sdm.thresh.merge[order(sdm.thresh.merge$sortOrder),] +sdm.thresh.table <- sdm.thresh.merge[,c("cutFullName", "cutValue", + "capturedEOs", "capturedPts", "cutDescription")] +names(sdm.thresh.table) <- c("Threshold", "Value", "Groups","Pts","Description") +sdm.thresh.table$Groups <- paste(round(sdm.thresh.table$Groups/numEOs*100, 1), + "(",sdm.thresh.table$Groups, ")", sep="") +# sdm.thresh.table$Polys <- paste(round(sdm.thresh.table$Polys/numPys*100, 1), +# "(",sdm.thresh.table$Polys, ")", sep="") +numPts <- nrow(subset(df.full, pres == 1)) +sdm.thresh.table$Pts <- paste(round(sdm.thresh.table$Pts/numPts*100, 1), + sep="") + +## get grank definition ---- +SQLquery <- paste0("SELECT rank, rankname FROM lkpRankDefinitions where rank = '",ElementNames$rounded_g_rank,"';", sep="") +grank_desc <- dbGetQuery(db, SQLquery) + +# make a url to NatureServe Explorer +NSurl <- paste("http://explorer.natureserve.org/servlet/NatureServe?searchName=",gsub(" ", "+", ElementNames[[1]], fixed=TRUE), sep="") + +## get Model Evaluation and Use data ---- +SQLquery <- paste("Select spdata_dataqual, spdata_abs, spdata_eval, envvar_relevance, envvar_align, process_algo, process_sens, process_rigor, process_perform, process_review, products_mapped, products_support, products_repo, interative, spdata_dataqualNotes, spdata_absNotes, spdata_evalNotes, envvar_relevanceNotes, envvar_alignNotes, process_algoNotes, process_sensNotes, process_rigorNotes, process_performNotes, process_reviewNotes, products_mappedNotes, products_supportNotes, products_repoNotes, interativeNotes ", + "FROM lkpSpeciesRubric ", + "WHERE sp_code ='", model_species, "'; ", sep="") +sdm.modeluse <- dbGetQuery(db, statement = SQLquery) +sdm.modeluse[is.na(sdm.modeluse)] <- " " +sdm.modeluse[sdm.modeluse=="I"] <- "\\cellcolor[HTML]{9AFF99} Ideal" +sdm.modeluse[sdm.modeluse=="A"] <- "\\cellcolor[HTML]{FFFFC7} Acceptable" +sdm.modeluse[sdm.modeluse=="C"] <- "\\cellcolor[HTML]{FD6864} Interpret with Caution" + +## Get env. var lookup table ---- +SQLquery <- paste0("SELECT gridName g from tblModelResultsVarsUsed where model_run_name = '", + model_run_name, "' and inFinalModel = 1;") +var_names <- dbGetQuery(db, SQLquery)$g +SQLquery <- paste("SELECT fullName, description ", + "FROM lkpEnvVars ", + "WHERE gridName COLLATE NOCASE IN (", + toString(sQuote(var_names)), + ") ORDER BY fullName;", sep = "") +sdm.var.info <- dbGetQuery(db, statement = SQLquery) +names(sdm.var.info) <- c("Variable Name","Variable Description") + +# escape symbols for latex +ls <- c("&","%","$","#","_","{","}") +for (l in ls) { + sdm.var.info$`Variable Name` <- gsub(l, paste0("\\",l), sdm.var.info$`Variable Name`, fixed = T) + sdm.var.info$`Variable Description` <- gsub(l, paste0("\\",l), sdm.var.info$`Variable Description`, fixed = T) +} +# replace degree symbols +for (l in 1:length(sdm.var.info$`Variable Description`)) { + new.desc <- stri_escape_unicode(sdm.var.info$`Variable Description`[l]) + if (grepl("\\u00b0",new.desc, fixed = T)) + sdm.var.info$`Variable Description`[l] <- gsub("\\u00b0", "$^\\circ$", new.desc, fixed = T) +} +# put descriptions in parboxes for multiple lines +sdm.var.info$`Variable Description` <- paste0("\\parbox{20cm}{",sdm.var.info$`Variable Description`,"}") + +# fix greater than and less than symbol in rubric table +sdm.modeluse$process_performNotes <- gsub(">=","$\\\\geq$", sdm.modeluse$process_performNotes) +sdm.modeluse$process_performNotes <- gsub("<","$<$ ", sdm.modeluse$process_performNotes) + + +## Run knitr and create metadata ---- + +# writing to the same folder as a grid might cause problems. +# if errors check that first +# more explanation: tex looks for and uses aux files, which are also used +# by esri. If there's a non-tex aux file, knitr bails. + +# Also, might need to run this twice. First time through tex builds the reference +# list, second time through it can then number the refs in the doc. + +setwd("metadata") +# knit2pdf errors for some reason...just knit then call directly +#knit(paste(loc_scripts,"MetadataEval_knitr.rnw",sep="/"), output=paste(model_run_name, ".tex",sep="")) +knit2pdf(paste(loc_scripts,"MetadataEval_knitr.rnw",sep="/"), output=paste(model_run_name, ".tex",sep="")) +knit2pdf(paste(loc_scripts,"MetadataEval_knitr_postReview.rnw",sep="/"), output=paste(model_run_name, ".tex",sep="")) +#call <- paste0("pdflatex -interaction=nonstopmode ",model_run_name , ".tex") +# call <- paste0("pdflatex -halt-on-error -interaction=nonstopmode ",model_run_name , ".tex") # this stops execution if there is an error. Not really necessary +#system(call) +#system(call) # 2nd run to apply citation numbers + + +# delete .txt, .log etc if pdf is created successfully. +fn_ext <- c(".log",".aux",".out") +if (file.exists(paste(model_run_name, ".pdf",sep=""))){ + #setInternet2(TRUE) + #download.file(fileURL ,destfile,method="auto") + for(i in 1:NROW(fn_ext)){ + fn <- paste(model_run_name, fn_ext[i],sep="") + if (file.exists(fn)){ + file.remove(fn) + } + } +} + + +## clean up ---- +dbDisconnect(db) +options(op) From 2b87d3fe175f3c64636610e5cefa7e6ad9706ce6 Mon Sep 17 00:00:00 2001 From: Tim Howard Date: Fri, 4 Oct 2019 16:17:45 -0400 Subject: [PATCH 04/11] rebric table and star table, first version --- postReview_MetadataEval_knitr.rnw | 26 ++++++++---- postReview_createMetadata.r | 66 +++++++++++++++++++++++++------ 2 files changed, 73 insertions(+), 19 deletions(-) diff --git a/postReview_MetadataEval_knitr.rnw b/postReview_MetadataEval_knitr.rnw index 78123f5..48dcc73 100644 --- a/postReview_MetadataEval_knitr.rnw +++ b/postReview_MetadataEval_knitr.rnw @@ -25,7 +25,7 @@ \usepackage{tikz} \usetikzlibrary{shapes.geometric,calc} -\newcommand\score[2]{ +\newcommand\starscore[2]{ \pgfmathsetmacro\pgfxa{#1+1} \tikzstyle{scorestars}=[star, star points=5, star point ratio=2.25, draw,inner sep=0.15em,anchor=outer point 3] \begin{tikzpicture}[baseline] @@ -355,17 +355,27 @@ Table 3. Thresholds {\protect\NoHyper\cite{LiuEtAl2005, LiuEtAl2015}\protect\end \noindent \textbf{Model Evaluation and Intended Use} \\\\ All SDMs are sensitive to data inputs and methodological choices. Table 4 presents scoring of modeling factors based on the model evaluation rubric presented in Sofaer et al. 2019 {\protect\NoHyper\cite{SofaerEtAl2019}\protect\endNoHyper}. \Sexpr{as.character(project_blurb)} -\\ + +\bigskip \underline{Expert Review}\\ -The following is a summary of the reviews by NatureServe Network biologists received to date. Something somethng about 4/5 star reviews versus 3 star, versus 1-2 stars. +The following is a summary of the reviews by NatureServe Network biologists received to date. Biologists assigned an overall score to the model (1 = poorest performance; 5 = highest performance) and also had an opportunity to make specific suggestions related to modeling extent. Summaries of overall scores are reported here. \begin{table}[H] \small \centering - \begin{tabular}{p{5cm}p{3cm}p{5cm}} - Star Rating & Reviewing States & Notes \\ - \hline - \score{4.4}{5} based on X reviews & PA, NY, VA & Some Notes \\ - \hline + %\begin{tabular}{p{4cm}p{2cm}p{2cm}p{2cm}p{2cm}} + \begin{tabular}{c c c c c } + Mean Star Rating + & Number of reviews + & Max rating + & Min rating + & Median rating +\\ \hline + \starscore{\Sexpr{meanRating}}{5}(\Sexpr{meanRating}) + & \Sexpr{numReviewers} + & \Sexpr{maxRating} + & \Sexpr{minRating} + & \Sexpr{medianRating} +\\ \hline \end{tabular} \end{table} \smallskip diff --git a/postReview_createMetadata.r b/postReview_createMetadata.r index bda6aa9..85bef5a 100644 --- a/postReview_createMetadata.r +++ b/postReview_createMetadata.r @@ -127,14 +127,6 @@ grank_desc <- dbGetQuery(db, SQLquery) NSurl <- paste("http://explorer.natureserve.org/servlet/NatureServe?searchName=",gsub(" ", "+", ElementNames[[1]], fixed=TRUE), sep="") ## get Model Evaluation and Use data ---- -SQLquery <- paste("Select spdata_dataqual, spdata_abs, spdata_eval, envvar_relevance, envvar_align, process_algo, process_sens, process_rigor, process_perform, process_review, products_mapped, products_support, products_repo, interative, spdata_dataqualNotes, spdata_absNotes, spdata_evalNotes, envvar_relevanceNotes, envvar_alignNotes, process_algoNotes, process_sensNotes, process_rigorNotes, process_performNotes, process_reviewNotes, products_mappedNotes, products_supportNotes, products_repoNotes, interativeNotes ", - "FROM lkpSpeciesRubric ", - "WHERE sp_code ='", model_species, "'; ", sep="") -sdm.modeluse <- dbGetQuery(db, statement = SQLquery) -sdm.modeluse[is.na(sdm.modeluse)] <- " " -sdm.modeluse[sdm.modeluse=="I"] <- "\\cellcolor[HTML]{9AFF99} Ideal" -sdm.modeluse[sdm.modeluse=="A"] <- "\\cellcolor[HTML]{FFFFC7} Acceptable" -sdm.modeluse[sdm.modeluse=="C"] <- "\\cellcolor[HTML]{FD6864} Interpret with Caution" ## Get env. var lookup table ---- SQLquery <- paste0("SELECT gridName g from tblModelResultsVarsUsed where model_run_name = '", @@ -163,11 +155,63 @@ for (l in 1:length(sdm.var.info$`Variable Description`)) { # put descriptions in parboxes for multiple lines sdm.var.info$`Variable Description` <- paste0("\\parbox{20cm}{",sdm.var.info$`Variable Description`,"}") + +# model review data from Tracking DB +fn <- here("_data","databases", "mobi_tracker_connection_string_short.dsn") +cn <- dbConnect(odbc::odbc(), .connection_string = readChar(fn, file.info(fn)$size)) + +sql <- paste0("SELECT UserID, rating, comment + FROM ModelReviewToolOverallFeedback + WHERE (((ModelReviewToolOverallFeedback.cutecode)= '", ElementNames$Code, "' ));") +reviewData <- dbGetQuery(cn, sql) + +# get info about revisions: + # if on a cycle greater than 1 (but not a 'both' species with a cycle of 2), then count as revised + # if HUCs are getting been removed, count as revised + + +dbDisconnect(cn) +rm(cn) + +meanRating <- mean(reviewData$rating) +minRating <- min(reviewData$rating) +maxRating <- max(reviewData$rating) +medianRating <- median(reviewData$rating) +numReviewers <- nrow(reviewData) +numReviewersPhrase <- ifelse(numReviewers == 0, "", + ifelse(numReviewers == 1, paste0(" (",numReviewers," reviewer)"), + paste0(" (",numReviewers," reviewers)"))) + +anotherNumReviewersPhrase <- ifelse(numReviewers == 0, "", + ifelse(numReviewers == 1, paste0("Based on ",numReviewers," review."), + paste0("Based on ",numReviewers," reviewers"))) + +revMatrix <- data.frame( + "rAttribute" = c("C", "Cr","A","I"), + "rComments" = c( + "Model was not reviewed by regional, taxonomic experts.", + paste0("Model review indicates possible issues with this model.",numReviewersPhrase), + paste0("Model was reviewed by regional, taxonomic experts.",numReviewersPhrase), + paste0("Model reviewed by regional, taxonomic experts with high marks.",numReviewersPhrase) + )) +revAtt <- ifelse(nrow(reviewData) == 0 , "C", + ifelse(meanRating < 2.5, "Cr", + ifelse(meanRating < 3.5, "A", "I"))) +revUpdate <- revMatrix[match(revAtt, revMatrix$rAttribute),] + +sdm.modeluse$process_review <- as.character(revUpdate$rAttribute) +sdm.modeluse$process_reviewNotes <- as.character(revUpdate$rComments) + + +sdm.modeluse[is.na(sdm.modeluse)] <- " " +sdm.modeluse[sdm.modeluse=="I"] <- "\\cellcolor[HTML]{9AFF99} Ideal" +sdm.modeluse[sdm.modeluse=="A"] <- "\\cellcolor[HTML]{FFFFC7} Acceptable" +sdm.modeluse[sdm.modeluse=="C"] <- "\\cellcolor[HTML]{FD6864} Interpret with Caution" + # fix greater than and less than symbol in rubric table sdm.modeluse$process_performNotes <- gsub(">=","$\\\\geq$", sdm.modeluse$process_performNotes) sdm.modeluse$process_performNotes <- gsub("<","$<$ ", sdm.modeluse$process_performNotes) - ## Run knitr and create metadata ---- # writing to the same folder as a grid might cause problems. @@ -181,8 +225,8 @@ sdm.modeluse$process_performNotes <- gsub("<","$<$ ", sdm.modeluse$process_perfo setwd("metadata") # knit2pdf errors for some reason...just knit then call directly #knit(paste(loc_scripts,"MetadataEval_knitr.rnw",sep="/"), output=paste(model_run_name, ".tex",sep="")) -knit2pdf(paste(loc_scripts,"MetadataEval_knitr.rnw",sep="/"), output=paste(model_run_name, ".tex",sep="")) -knit2pdf(paste(loc_scripts,"MetadataEval_knitr_postReview.rnw",sep="/"), output=paste(model_run_name, ".tex",sep="")) +#knit2pdf(paste(loc_scripts,"MetadataEval_knitr.rnw",sep="/"), output=paste(model_run_name, ".tex",sep="")) +knit2pdf(paste(loc_scripts,"postReview_MetadataEval_knitr.rnw",sep="/"), output=paste(model_run_name, ".tex",sep="")) #call <- paste0("pdflatex -interaction=nonstopmode ",model_run_name , ".tex") # call <- paste0("pdflatex -halt-on-error -interaction=nonstopmode ",model_run_name , ".tex") # this stops execution if there is an error. Not really necessary #system(call) From c7662946c596d5681138e935863f05106169d6f2 Mon Sep 17 00:00:00 2001 From: Tim Howard Date: Mon, 7 Oct 2019 09:20:31 -0400 Subject: [PATCH 05/11] fix Appendix labeling and placment --- MetadataEval_knitr.rnw | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MetadataEval_knitr.rnw b/MetadataEval_knitr.rnw index b3879c6..056000e 100644 --- a/MetadataEval_knitr.rnw +++ b/MetadataEval_knitr.rnw @@ -551,8 +551,8 @@ NatureServe and Heritage Network Partners. \Sexpr{format(Sys.Date(), "%Y")}. Spe \pagebreak \noindent -\begin{table} -\caption{Appendix 1. Descriptions for environmental variables included in model.} +Appendix 1. Descriptions for environmental variables included in model. +\begin{table}[H] \resizebox{\textwidth}{!}{ <>= # For variable descriptions, get variable name, source, description (3 columns) From 62bb8355e1d54ddf259a392687bc5a9fadb84d7c Mon Sep 17 00:00:00 2001 From: Tim Howard Date: Mon, 7 Oct 2019 09:21:26 -0400 Subject: [PATCH 06/11] model review table labeling --- postReview_MetadataEval_knitr.rnw | 13 ++++++------- postReview_createMetadata.r | 6 ++++++ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/postReview_MetadataEval_knitr.rnw b/postReview_MetadataEval_knitr.rnw index 48dcc73..65ca40e 100644 --- a/postReview_MetadataEval_knitr.rnw +++ b/postReview_MetadataEval_knitr.rnw @@ -354,11 +354,11 @@ Table 3. Thresholds {\protect\NoHyper\cite{LiuEtAl2005, LiuEtAl2015}\protect\end \bigskip \noindent \textbf{Model Evaluation and Intended Use} \\\\ -All SDMs are sensitive to data inputs and methodological choices. Table 4 presents scoring of modeling factors based on the model evaluation rubric presented in Sofaer et al. 2019 {\protect\NoHyper\cite{SofaerEtAl2019}\protect\endNoHyper}. \Sexpr{as.character(project_blurb)} +All SDMs are sensitive to data inputs and methodological choices. Table 4 summarizes ratings by reviewers of the model output. Table 5 presents scoring of modeling factors based on the model evaluation rubric presented in Sofaer et al. 2019 {\protect\NoHyper\cite{SofaerEtAl2019}\protect\endNoHyper}. \Sexpr{as.character(project_blurb)} \bigskip -\underline{Expert Review}\\ -The following is a summary of the reviews by NatureServe Network biologists received to date. Biologists assigned an overall score to the model (1 = poorest performance; 5 = highest performance) and also had an opportunity to make specific suggestions related to modeling extent. Summaries of overall scores are reported here. + +\noindent Table 4. Expert review results. The following is a summary of the reviews by NatureServe Network biologists received to date. Biologists assigned an overall score to the model (1 = poorest performance; 5 = highest performance) and also had an opportunity to make specific suggestions related to modeling extent. Summaries of overall scores are reported here. \begin{table}[H] \small \centering @@ -382,7 +382,7 @@ The following is a summary of the reviews by NatureServe Network biologists rece \pagebreak -\noindent Table 4. Model evaluation results based on Sofaer et al. 2019. Scores can be attributed as ideal, acceptable, or interpret with caution. +\noindent Table 5. Model evaluation results based on Sofaer et al. 2019. Scores can be attributed as ideal, acceptable, or interpret with caution. \normalsize \begin{table}[H] \small @@ -597,8 +597,8 @@ NatureServe and Heritage Network Partners. \Sexpr{format(Sys.Date(), "%Y")}. Spe \pagebreak \noindent -\begin{table} -\caption{Appendix 1. Descriptions for environmental variables included in model.} +Appendix 1. Descriptions for environmental variables included in model. +\begin{table}[H] \resizebox{\textwidth}{!}{ <>= # For variable descriptions, get variable name, source, description (3 columns) @@ -615,7 +615,6 @@ print(xtable(sdm.var.info, label = NULL), \medskip Appendix 2. Model details for reproducibility - \begin{itemize} \setlength{\itemsep}{0pt} \setlength{\parskip}{0pt} diff --git a/postReview_createMetadata.r b/postReview_createMetadata.r index 85bef5a..f3f6974 100644 --- a/postReview_createMetadata.r +++ b/postReview_createMetadata.r @@ -199,6 +199,12 @@ revAtt <- ifelse(nrow(reviewData) == 0 , "C", ifelse(meanRating < 3.5, "A", "I"))) revUpdate <- revMatrix[match(revAtt, revMatrix$rAttribute),] +## get Model Evaluation and Use data ---- +SQLquery <- paste("Select spdata_dataqual, spdata_abs, spdata_eval, envvar_relevance, envvar_align, process_algo, process_sens, process_rigor, process_perform, process_review, products_mapped, products_support, products_repo, interative, spdata_dataqualNotes, spdata_absNotes, spdata_evalNotes, envvar_relevanceNotes, envvar_alignNotes, process_algoNotes, process_sensNotes, process_rigorNotes, process_performNotes, process_reviewNotes, products_mappedNotes, products_supportNotes, products_repoNotes, interativeNotes ", + "FROM lkpSpeciesRubric ", + "WHERE sp_code ='", model_species, "'; ", sep="") +sdm.modeluse <- dbGetQuery(db, statement = SQLquery) + sdm.modeluse$process_review <- as.character(revUpdate$rAttribute) sdm.modeluse$process_reviewNotes <- as.character(revUpdate$rComments) From 12e48d750c9c9d8d9e4e332c66698251f86104c3 Mon Sep 17 00:00:00 2001 From: Tim Howard Date: Mon, 7 Oct 2019 13:35:41 -0400 Subject: [PATCH 07/11] handle iterative portion of rubric --- 4c_additMetadComments_rubricUpdate.r | 30 ++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/4c_additMetadComments_rubricUpdate.r b/4c_additMetadComments_rubricUpdate.r index 6b17504..057f80a 100644 --- a/4c_additMetadComments_rubricUpdate.r +++ b/4c_additMetadComments_rubricUpdate.r @@ -61,6 +61,15 @@ sql <- paste0("SELECT Reviewer.EGT_ID, Reviewer.response, Reviewer.date_complete FROM Reviewer WHERE (((Reviewer.EGT_ID)= ", ElementNames$EGT_ID, " ));") reviewerData <- dbGetQuery(cn, sql) + +sql <- paste0("SELECT FinalSppList.Scientific_Name, FinalSppList.Common_Name, FinalSppList.ELEMENT_GLOBAL_ID, ", + "ModelCycle.EGT_ID, ModelCycle.model_cycle, SpeciesWorkFlow.cutecode, SpeciesWorkFlow.model_type, ", + "SpeciesWorkFlow.modeled, SpeciesWorkFlow.alternate_method, SpeciesWorkFlow.existing_model ", + "FROM (FinalSppList INNER JOIN ModelCycle ON FinalSppList.ID = ModelCycle.final_spp_list_ID) ", + "INNER JOIN SpeciesWorkFlow ON ModelCycle.ID = SpeciesWorkFlow.model_cycle_ID ", + "WHERE (((FinalSppList.ELEMENT_GLOBAL_ID)= ", ElementNames$EGT_ID, "));") +modelCycleData <- dbGetQuery(cn, sql) + dbDisconnect(cn) rm(cn) @@ -128,6 +137,27 @@ sql <- paste0("update lkpSpeciesRubric set process_review = '", revUpdate$rAttri "', process_reviewNotes = '", revUpdate$rComments, "' where EGT_ID = ", ElementNames$EGT_ID, " ;") dbExecute(db, statement = sql) +## iterative +iterMatrix <- data.frame("iAttribute" = c("C","A"), + "iComments" = c("Model not re-run with new or modified data.", + "Model was re-run with new or modified data.")) +nCycles <- nrow(modelCycleData) +maxCycle <- max(modelCycleData$model_cycle) +if(nCycles > 1){ + if(nCycles == 2 & "Both" %in% modelCycleData$model_type){ + iterAtt <- "C" + } else if(TRUE %in% modelCycleData[modelCycleData$model_cycle == maxCycle, c("alternate_method","existing_model")]){ + iterAtt <- "C" + } else { + iterAtt <- "A" + } +} + +iterUpdate <- iterMatrix[match(iterAtt, iterMatrix$iAttribute),] +sql <- paste0("update lkpSpeciesRubric set interative = '", iterUpdate$iAttribute, + "', interativeNotes = '", iterUpdate$iComments, + "' where EGT_ID = ", ElementNames$EGT_ID, " ;") +dbExecute(db, statement = sql) ## clean up ---- dbDisconnect(db) From 661d49747423cecbaa3618a0270f71291d1e2cf8 Mon Sep 17 00:00:00 2001 From: Tim Howard Date: Mon, 7 Oct 2019 13:36:34 -0400 Subject: [PATCH 08/11] better language for zero and single reviewers --- postReview_createMetadata.r | 58 +++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 22 deletions(-) diff --git a/postReview_createMetadata.r b/postReview_createMetadata.r index f3f6974..79602f2 100644 --- a/postReview_createMetadata.r +++ b/postReview_createMetadata.r @@ -168,32 +168,46 @@ reviewData <- dbGetQuery(cn, sql) # get info about revisions: # if on a cycle greater than 1 (but not a 'both' species with a cycle of 2), then count as revised # if HUCs are getting been removed, count as revised - - dbDisconnect(cn) rm(cn) -meanRating <- mean(reviewData$rating) -minRating <- min(reviewData$rating) -maxRating <- max(reviewData$rating) -medianRating <- median(reviewData$rating) numReviewers <- nrow(reviewData) -numReviewersPhrase <- ifelse(numReviewers == 0, "", - ifelse(numReviewers == 1, paste0(" (",numReviewers," reviewer)"), - paste0(" (",numReviewers," reviewers)"))) - -anotherNumReviewersPhrase <- ifelse(numReviewers == 0, "", - ifelse(numReviewers == 1, paste0("Based on ",numReviewers," review."), - paste0("Based on ",numReviewers," reviewers"))) - -revMatrix <- data.frame( - "rAttribute" = c("C", "Cr","A","I"), - "rComments" = c( - "Model was not reviewed by regional, taxonomic experts.", - paste0("Model review indicates possible issues with this model.",numReviewersPhrase), - paste0("Model was reviewed by regional, taxonomic experts.",numReviewersPhrase), - paste0("Model reviewed by regional, taxonomic experts with high marks.",numReviewersPhrase) - )) +if(numReviewers == 0){ + numReviewersPhrase = "" + meanRating <- "-" + minRating <- "-" + maxRating <- "-" + medianRating <- "-" +} else if (numReviewers == 1){ + numReviewersPhrase <- paste0(" (",numReviewers," reviewer)") + meanRating <- reviewData$rating + minRating <- "-" + maxRating <- "-" + medianRating <- "-" + revMatrix <- data.frame( + "rAttribute" = c("C", "Cr","A","I"), + "rComments" = c( + "Model was not reviewed by regional, taxonomic experts.", + paste0("Model review indicates possible issues with this model.",numReviewersPhrase), + paste0("Model was reviewed by a regional, taxonomic expert.",numReviewersPhrase), + paste0("Model reviewed by a regional, taxonomic expert and given high marks.",numReviewersPhrase) + )) +} else { + numReviewersPhrase <- paste0(" (",numReviewers," reviewers)") + meanRating <- mean(reviewData$rating) + minRating <- min(reviewData$rating) + maxRating <- max(reviewData$rating) + medianRating <- median(reviewData$rating) + revMatrix <- data.frame( + "rAttribute" = c("C", "Cr","A","I"), + "rComments" = c( + "Model was not reviewed by regional, taxonomic experts.", + paste0("Model review indicates possible issues with this model.",numReviewersPhrase), + paste0("Model was reviewed by regional, taxonomic experts.",numReviewersPhrase), + paste0("Model reviewed by regional, taxonomic experts and given high marks.",numReviewersPhrase) + )) +} + revAtt <- ifelse(nrow(reviewData) == 0 , "C", ifelse(meanRating < 2.5, "Cr", ifelse(meanRating < 3.5, "A", "I"))) From b60dbcfca3d7fa940577f58ac5b03c57e264fd5d Mon Sep 17 00:00:00 2001 From: Tim Howard Date: Tue, 29 Oct 2019 07:48:45 -0400 Subject: [PATCH 09/11] update Sofaer citation from in-press --- MetadataEval_knitr.rnw | 2 +- postReview_MetadataEval_knitr.rnw | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/MetadataEval_knitr.rnw b/MetadataEval_knitr.rnw index 056000e..2a1a9d4 100644 --- a/MetadataEval_knitr.rnw +++ b/MetadataEval_knitr.rnw @@ -545,7 +545,7 @@ NatureServe and Heritage Network Partners. \Sexpr{format(Sys.Date(), "%Y")}. Spe \bibitem{LiuEtAl2015} Liu, C., G. Newell, and M. White. 2015. On the selection of thresholds for predicting species occurrence with presence-only data. Ecology and Evolution 6:337–348. - \bibitem{SofaerEtAl2019} Sofaer, H. R., C. S. Jarnevich1, I. S. Pearse, R. L. Smyth, S. Auer, G. L. Cook, T. C. Edwards, Jr., G. F. Guala, T. G. Howard, J. T. Morisette, and H. Hamilton. (In press). The development and delivery of species distribution models to inform decision-making. BioScience. + \bibitem{SofaerEtAl2019} Sofaer, H. R., C. S. Jarnevich1, I. S. Pearse, R. L. Smyth, S. Auer, G. L. Cook, T. C. Edwards, Jr., G. F. Guala, T. G. Howard, J. T. Morisette, and H. Hamilton. 2019. The development and delivery of species distribution models to inform decision-making. BioScience 69:544-557. \end{thebibliography} \pagebreak diff --git a/postReview_MetadataEval_knitr.rnw b/postReview_MetadataEval_knitr.rnw index 65ca40e..cd2bc1d 100644 --- a/postReview_MetadataEval_knitr.rnw +++ b/postReview_MetadataEval_knitr.rnw @@ -591,7 +591,7 @@ NatureServe and Heritage Network Partners. \Sexpr{format(Sys.Date(), "%Y")}. Spe \bibitem{LiuEtAl2015} Liu, C., G. Newell, and M. White. 2015. On the selection of thresholds for predicting species occurrence with presence-only data. Ecology and Evolution 6:337–348. - \bibitem{SofaerEtAl2019} Sofaer, H. R., C. S. Jarnevich1, I. S. Pearse, R. L. Smyth, S. Auer, G. L. Cook, T. C. Edwards, Jr., G. F. Guala, T. G. Howard, J. T. Morisette, and H. Hamilton. (In press). The development and delivery of species distribution models to inform decision-making. BioScience. + \bibitem{SofaerEtAl2019} Sofaer, H. R., C. S. Jarnevich1, I. S. Pearse, R. L. Smyth, S. Auer, G. L. Cook, T. C. Edwards, Jr., G. F. Guala, T. G. Howard, J. T. Morisette, and H. Hamilton. 2019. The development and delivery of species distribution models to inform decision-making. BioScience. BioScience 69:544-557. \end{thebibliography} \pagebreak From 3762188135c2773c6fd861c4cff8517957b3a0e2 Mon Sep 17 00:00:00 2001 From: Tim Howard Date: Tue, 29 Oct 2019 08:02:57 -0400 Subject: [PATCH 10/11] plural to singular typo --- postReview_createMetadata.r | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/postReview_createMetadata.r b/postReview_createMetadata.r index 79602f2..685d0c0 100644 --- a/postReview_createMetadata.r +++ b/postReview_createMetadata.r @@ -187,7 +187,7 @@ if(numReviewers == 0){ revMatrix <- data.frame( "rAttribute" = c("C", "Cr","A","I"), "rComments" = c( - "Model was not reviewed by regional, taxonomic experts.", + "Model was not reviewed by a regional, taxonomic expert.", paste0("Model review indicates possible issues with this model.",numReviewersPhrase), paste0("Model was reviewed by a regional, taxonomic expert.",numReviewersPhrase), paste0("Model reviewed by a regional, taxonomic expert and given high marks.",numReviewersPhrase) From 4391fcf2e8936c0530bf592226b9d38309d92374 Mon Sep 17 00:00:00 2001 From: Tim Howard Date: Tue, 29 Oct 2019 13:33:12 -0400 Subject: [PATCH 11/11] loop post-review metadata, plus tweaks --- postReview_0_user_run_SDM.R | 42 +++++++++++++++++ postReview_MetadataEval_knitr.rnw | 2 +- postReview_createMetadata.r | 76 ++++++++++++++----------------- 3 files changed, 77 insertions(+), 43 deletions(-) create mode 100644 postReview_0_user_run_SDM.R diff --git a/postReview_0_user_run_SDM.R b/postReview_0_user_run_SDM.R new file mode 100644 index 0000000..86852bd --- /dev/null +++ b/postReview_0_user_run_SDM.R @@ -0,0 +1,42 @@ +# File: postReview_0_user_run_SDM.r +# Purpose: recreate metadata after model review and export +# in the Mobi project + +library(here) +rm(list=ls()) +loc_scripts <- here() + +####### +### loop it +###### +rootPath <- file.path("H:","spp_models", "_test") + +fldrs <- list.dirs(rootPath, recursive = FALSE) + +# if you want to run fewer than the full list of folders +# subset here. +#fldrs <- fldrs[c(3,6)] +# + + + +sppVec <- unlist(lapply(strsplit(fldrs, split = "/"), FUN = function(x) x[[4]])) +sppVec + +for(sv in 1:length(sppVec)){ + + loc_model <- fldrs[[sv]] + model_species <- sppVec[[sv]] + load(file.path(loc_model, "outputs","rdata",paste0(model_species,"_runSDM_paths.Rdata"))) + # the vars we need + fn_arg_vars <- c("project_overview","model_comments","metaData_comments","modeller","project_blurb","modelrun_meta_data") + for(arg in fn_arg_vars) + assign(arg, fn_args[[arg]]) + + nm_refBoundaries <- file.path(loc_scripts,"_data","other_spatial","feature", "US_States.shp") + nm_db_file <- file.path(loc_scripts,"_data", "databases", "SDM_lookupAndTracking.sqlite") + + source(file.path(loc_scripts,"postReview_createMetadata.r")) + +} + diff --git a/postReview_MetadataEval_knitr.rnw b/postReview_MetadataEval_knitr.rnw index cd2bc1d..4fb83f2 100644 --- a/postReview_MetadataEval_knitr.rnw +++ b/postReview_MetadataEval_knitr.rnw @@ -541,7 +541,7 @@ This model was built using a methodology developed through collaboration among t \setlength{\fboxsep}{5pt} \fbox{ \begin{minipage}[c]{0.2\linewidth} -\includegraphics[width=1.0\linewidth]{../../../../../NatureServeLogo}%png logo file at repository root +\includegraphics[width=1.0\linewidth]{figure/NatureServeLogo}%png logo file at repository root \end{minipage}% \begin{minipage}[c]{0.75\linewidth} Please cite this document and its associated SDM as: \\ diff --git a/postReview_createMetadata.r b/postReview_createMetadata.r index 685d0c0..e30d2ba 100644 --- a/postReview_createMetadata.r +++ b/postReview_createMetadata.r @@ -25,30 +25,29 @@ library(OpenStreetMap) ### find and load model data ---- -## three lines need your attention. The one directly below (loc_scripts), -## about line 35 where you choose which Rdata file to use, -## and about line 46 where you choose which record to use - -### temp debugging -loc_model <- "G:/tim/_Regional_SDM/_data/species" -loc_envVars <- gsub("F:","G:",loc_envVars) -loc_scripts <- "G:/tim/_Regional_SDM" -nm_refBoundaries <- gsub("F:","G:",nm_refBoundaries) -nm_db_file <- gsub("F:","G:",nm_db_file) -#### - -setwd(loc_model) -dir.create(paste0(model_species,"/outputs/metadata"), recursive = T, showWarnings = F) -setwd(paste0(model_species,"/outputs")) -load(paste0("rdata/", modelrun_meta_data$model_run_name,".Rdata")) + +#setwd(loc_model) +#dir.create(paste0(model_species,"/outputs/metadata"), recursive = TRUE, showWarnings = FALSE) +dir.create(file.path(loc_model, "/outputs/metadata"), recursive = TRUE, showWarnings = FALSE) +#setwd(paste0(model_species,"/outputs")) + +model_run_name <- modelrun_meta_data$model_run_name +load(file.path(loc_model, "outputs","rdata",paste0(model_run_name,".RData"))) + +# rdatfiles <- list.files(file.path(loc_model, "outputs","rdata"), pattern = ".Rdata") +# rdatfile <- rdatfiles[[length(rdatfiles)]] +# load(file.path(loc_model, "outputs","rdata",rdatfile)) +# model_run_name <- strsplit(rdatfile, split = "\\.")[[1]][[1]] # get background poly data for the map (study area, reference boundaries) -studyAreaExtent <- st_read(here("_data","species",model_species,"inputs","model_input",paste0(model_run_name, "_studyArea.gpkg")), quiet = T) -referenceBoundaries <- st_read(nm_refBoundaries, quiet = T) # name of state boundaries file +studyAreaExtent <- st_read(file.path(loc_model,"inputs","model_input",paste0(model_run_name, "_studyArea.gpkg")), quiet = T) +referenceBoundaries <- st_read(nm_refBoundaries, quiet = TRUE) # name of state boundaries file -r <- dir(path = "model_predictions", pattern = ".tif$",full.names=FALSE) +r <- list.files(path = file.path(loc_model,"outputs","model_predictions"), + pattern = ".tif$", + full.names=FALSE) fileName <- r[gsub(".tif", "", r) == model_run_name] -ras <- raster(paste0("model_predictions/", fileName)) +ras <- raster(file.path(loc_model,"outputs","model_predictions", fileName)) # project to match raster, just in case studyAreaExtent <- st_transform(studyAreaExtent, as.character(ras@crs)) @@ -194,7 +193,7 @@ if(numReviewers == 0){ )) } else { numReviewersPhrase <- paste0(" (",numReviewers," reviewers)") - meanRating <- mean(reviewData$rating) + meanRating <- round(mean(reviewData$rating), digits = 1) minRating <- min(reviewData$rating) maxRating <- max(reviewData$rating) medianRating <- median(reviewData$rating) @@ -234,38 +233,31 @@ sdm.modeluse$process_performNotes <- gsub("<","$<$ ", sdm.modeluse$process_perfo ## Run knitr and create metadata ---- -# writing to the same folder as a grid might cause problems. -# if errors check that first -# more explanation: tex looks for and uses aux files, which are also used -# by esri. If there's a non-tex aux file, knitr bails. - -# Also, might need to run this twice. First time through tex builds the reference +# might need to run this twice. First time through tex builds the reference # list, second time through it can then number the refs in the doc. +setwd(file.path(loc_model, "outputs","metadata")) +#copy the logo locally [spent way too long getting latex to read from G:] +dir.create("figure", showWarnings = FALSE) +file.copy(paste0(loc_scripts,"/NatureServeLogo.png"), "figure/NatureServeLogo.png") -setwd("metadata") -# knit2pdf errors for some reason...just knit then call directly -#knit(paste(loc_scripts,"MetadataEval_knitr.rnw",sep="/"), output=paste(model_run_name, ".tex",sep="")) -#knit2pdf(paste(loc_scripts,"MetadataEval_knitr.rnw",sep="/"), output=paste(model_run_name, ".tex",sep="")) -knit2pdf(paste(loc_scripts,"postReview_MetadataEval_knitr.rnw",sep="/"), output=paste(model_run_name, ".tex",sep="")) -#call <- paste0("pdflatex -interaction=nonstopmode ",model_run_name , ".tex") -# call <- paste0("pdflatex -halt-on-error -interaction=nonstopmode ",model_run_name , ".tex") # this stops execution if there is an error. Not really necessary -#system(call) -#system(call) # 2nd run to apply citation numbers - +knit2pdf(file.path(loc_scripts,"postReview_MetadataEval_knitr.rnw"), + output=paste0(model_run_name, "_metadata.tex")) # delete .txt, .log etc if pdf is created successfully. -fn_ext <- c(".log",".aux",".out") -if (file.exists(paste(model_run_name, ".pdf",sep=""))){ - #setInternet2(TRUE) - #download.file(fileURL ,destfile,method="auto") - for(i in 1:NROW(fn_ext)){ +fn_ext <- c(".log",".aux",".out",".tex") +fn_ext <- paste0("_metadata", fn_ext) +if (file.exists(paste(model_run_name, "_metadata.pdf",sep=""))){ + for(i in 1:length(fn_ext)){ fn <- paste(model_run_name, fn_ext[i],sep="") if (file.exists(fn)){ file.remove(fn) } } } +# delete the figure folder +unlink("figure", recursive = TRUE) +setwd(loc_scripts) ## clean up ---- dbDisconnect(db)