From 2b5ddbeb074f1ac9e97c3cc298daabaadf438e8c Mon Sep 17 00:00:00 2001 From: "Carter T. Butts" Date: Fri, 16 Jun 2023 22:02:21 -0700 Subject: [PATCH] Various tweaks needed to get the package to CRAN, but not substantively important. --- DESCRIPTION | 7 ++++--- README.md | 12 +++++++++++- index.md | 12 +++++++++++- man/EGPHazard.Rd | 25 +++++++++++-------------- man/EGPRateEst.Rd | 26 ++++++++++++-------------- man/durations.Rd | 37 +++++++++++++------------------------ man/ergmgp-package.Rd | 3 +++ man/simEGP.Rd | 24 ++++++++++++++++-------- 8 files changed, 81 insertions(+), 65 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6a9f3ac..e627868 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,9 +1,10 @@ Package: ergmgp Version: 0.1 -Date: 2023-02-27 +Date: 2023-06-09 Title: Tools for Modeling ERGM Generating Processes Authors@R: c( - person("Carter T.", "Butts", role=c("aut","cre"), email="buttsc@uci.edu") + person("Carter T.", "Butts", role=c("aut","cre"), email="buttsc@uci.edu"), + person(family="Statnet Commons", role=c("ctb")) ) Depends: network (>= 1.15), @@ -14,7 +15,7 @@ Imports: statnet.common (>= 4.2.0) LinkingTo: ergm -Description: Tools for continuous time processes with well-defined ERGM equilibria. +Description: Provides tools for simulating draws from continuous time processes with well-defined exponential family random graph (ERGM) equilibria, i.e. ERGM generating processes (EGPs). A number of EGPs are supported, including the families identified in Butts (2023) , as are functions for hazard calculation and timing calibration. License: GPL-3 + file LICENSE URL: https://statnet.org BugReports: https://github.com/statnet/ergmgp/issues diff --git a/README.md b/README.md index 6af7235..20db469 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,17 @@ An overview of supported EGPs and pointers to other help pages can be obtained after loading the package with `help(ergmgp)`. Additional information on EGPs can also be found at the reference below. -## Installing from Within R +## Installing from CRAN + +The easiest way to install the package is to use CRAN. From within `R`, simply use + +``` +install.packages("ergmgp") +``` + +which will install `ergmgp` and its dependencies. Calling `library(ergmgp)` will subsequently load the package, and away you go. + +## Installing Directly from GitHub To install from GitHub, first ensure that you have the `devtools` package installed and loaded. Then, type the following: diff --git a/index.md b/index.md index 6af7235..20db469 100644 --- a/index.md +++ b/index.md @@ -4,7 +4,17 @@ An overview of supported EGPs and pointers to other help pages can be obtained after loading the package with `help(ergmgp)`. Additional information on EGPs can also be found at the reference below. -## Installing from Within R +## Installing from CRAN + +The easiest way to install the package is to use CRAN. From within `R`, simply use + +``` +install.packages("ergmgp") +``` + +which will install `ergmgp` and its dependencies. Calling `library(ergmgp)` will subsequently load the package, and away you go. + +## Installing Directly from GitHub To install from GitHub, first ensure that you have the `devtools` package installed and loaded. Then, type the following: diff --git a/man/EGPHazard.Rd b/man/EGPHazard.Rd index 2ef49ed..774c327 100644 --- a/man/EGPHazard.Rd +++ b/man/EGPHazard.Rd @@ -1,17 +1,19 @@ \name{EGPHazard} \alias{EGPHazard} -%- Also NEED an '\alias' for EACH other topic documented here. + \title{ Calculate Transition Hazards for an ERGM Generating Process } + \description{ Given an EGP and an initial state, calculate the transition rates to one or more neighboring states. } + \usage{ EGPHazard(form, coef, toggles = NULL, rate.factor = 1, process = c("LERGM", "CRSAOM", "CI", "DS", "CDCSTERGM", "CFCSTERGM", "CSTERGM", "CTERGM")) } -%- maybe also 'usage' for other objects documented here. + \arguments{ \item{form}{ an ERGM formula for the EGP (or a list with \code{formation} and \code{dissolution} formulas, for \code{CSTERGM} processes). The left-hand side is used as the current state when computing transition rates. @@ -29,6 +31,7 @@ rate or pacing factor (sets the time scale). the ERGM generating process to use. } } + \details{ An ERGM generating process (EGP) is a continuous time graph process with an equilibrium distribution having a known ERGM form. See \code{\link{ergmgp}} for an overview of EGPs, including the specifications supported here. @@ -36,24 +39,23 @@ An ERGM generating process (EGP) is a continuous time graph process with an equi EGP specifications are as per \code{\link{simEGP}}. Transition rates for all currently implemented EGPs follow the specifications of Butts (2023), with the trivial addition of a pacing constant for all families (which simply sets the timescale). } + \value{ a matrix containing the toggles, indicators for whether each event would have been a formation event, and the log event hazards (one row per toggle). } + \references{ Butts, Carter T. (2023). \dQuote{Continuous Time Graph Processes with Known ERGM Equilibria: Contextual Review, Extensions, and Synthesis.} \emph{Journal of Mathematical Sociology}. \doi{10.1080/0022250X.2023.2180001} } + \author{ Carter T. Butts \email{buttsc@uci.edu} } -%\note{ -%% ~~further notes~~ -%} - -%% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{ergmgp}} for information on EGPs, \code{\link[ergm]{ergm}} for information on ERGM specifications, \code{\link{simEGP}} } + \examples{ #Simulate a small network with triadic dependence n <- 25 @@ -78,12 +80,7 @@ a <- function(z){(z-min(z))/diff(range(z))} plot(net, edge.col = hsv(a(ldissr[,4])*0.6)) #Blue=fast, red=slow } -% Add one or more standard keywords, see file 'KEYWORDS' in the -% R documentation directory (show via RShowDoc("KEYWORDS")): + \keyword{ graphs } \keyword{ models } -% Use only one keyword per line. -% For non-standard keywords, use \concept instead of \keyword: -% \concept{ ~cpt1 } -% \concept{ ~cpt2 } -% Use only one concept per line. + diff --git a/man/EGPRateEst.Rd b/man/EGPRateEst.Rd index d754153..7f7872e 100644 --- a/man/EGPRateEst.Rd +++ b/man/EGPRateEst.Rd @@ -1,19 +1,21 @@ \name{EGPRateEst} \alias{EGPRateEst} -%- Also NEED an '\alias' for EACH other topic documented here. + \title{ Estimate Event Rates for an ERGM Generating Process } + \description{ Given an EGP, estimate either the expected time required for a specified number of transitions to occur, or the expected number of transitions within a specified time period. } + \usage{ EGPRateEst(formula, coef, process = c("LERGM", "CRSAOM", "CI", "DS", "CDCSTERGM", "CFCSTERGM", "CSTERGM", "CTERGM"), time.target = NULL, event.target = NULL, reps = 25, cores = 1, rate.factor = 1, verbose = FALSE, ...) } -%- maybe also 'usage' for other objects documented here. + \arguments{ \item{formula}{ an ERGM formula for the EGP (or a list with \code{formation} and \code{dissolution} formulas, for \code{CSTERGM} processes). The left-hand side is used as the initial state. @@ -47,6 +49,7 @@ logical; show progress information? additional arguments to \code{\link{simEGP}}. } } + \details{ This function can be used to estimate the expected amount of time needed for a specific number of transitions to be realized (in which case \code{event.target} should be supplied) or the expected number of transition events occurring within a specified time period (in which case \code{time.target} should be supplied). Either way, one of \code{time.target} and \code{event.target} must be given. The function works by simulating \code{reps} trajectories (using \code{simEGP}) for the specified time/number of events, and returning the mean outcome (along with some other associated statistics). @@ -54,24 +57,23 @@ A typical use case for this function is to calibrate the simulation time needed Note that, at present, all trajectories have the same starting point (the network on the left-hand side of the input formula). They are hence coupled by the initial condition (despite being otherwise independent). When equilibrium estimates from short sequences are desired, it may be wise to call this function more than once with different starting networks and integrate the results. } + \value{ A vector containing the mean outcome (time or event count), its standard error, the standard deviation of the outcome, and the number of replicates used. } + \references{ Butts, Carter T. (2023). \dQuote{Continuous Time Graph Processes with Known ERGM Equilibria: Contextual Review, Extensions, and Synthesis.} \emph{Journal of Mathematical Sociology}. \doi{10.1080/0022250X.2023.2180001} } + \author{ Carter T. Butts \email{buttsc@uci.edu} } -%\note{ -%% ~~further notes~~ -%} - -%% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{ergmgp}} for information on EGPs, \code{\link[ergm]{ergm}} for information on ERGM specifications, \code{\link{simEGP}} } + \examples{ #Simulate a small network with triadic dependence n <- 25 @@ -91,12 +93,8 @@ eevents <- EGPRateEst(net ~ edges + esp(0), coef = co, process = "LERGM", eevents #Expectation should be close to 500 } -% Add one or more standard keywords, see file 'KEYWORDS' in the -% R documentation directory (show via RShowDoc("KEYWORDS")): + \keyword{ graphs } \keyword{ models } -% Use only one keyword per line. -% For non-standard keywords, use \concept instead of \keyword: -% \concept{ ~cpt1 } -% \concept{ ~cpt2 } -% Use only one concept per line. + + diff --git a/man/durations.Rd b/man/durations.Rd index 5b3c66e..3df126e 100644 --- a/man/durations.Rd +++ b/man/durations.Rd @@ -1,16 +1,19 @@ \name{durations} \alias{durations} -%- Also NEED an '\alias' for EACH other topic documented here. + \title{ Obtain Edge Spell Durations from an ERGM Generating Process Trajectory } + \description{ Given an input trajectory (in networkDynamic form, or network form with additional attributes), return the set of all edge durations (along with censoring information, if desired). } + \usage{ durations(net, censor = c("obs", "omit"), return.censoring = TRUE) } -%- maybe also 'usage' for other objects documented here. + + \arguments{ \item{net}{ a \code{network} or \code{networkDynamic} object containing the trajectory information. @@ -22,6 +25,7 @@ how should censoring be handled? (Currently, only returning observed spell leng logical; return censoring information? } } + \details{ This function extracts information on edge spells (periods of time in which edges are present) from the input network, and returns the spell durations (optionally, together with censoring information). The durations should not be assumed to be in any particular order; this function is generally invoked to examine duration distributions. @@ -31,31 +35,21 @@ Spells may be left-censored, right-censored, or both. \code{censor=="obs"} resu When using \code{durations} to estimate equilibrium duration distributions, it is important to bear in mind that EGP trajectories stopped by event count are not terminated at a random time, and hence will provide biased estimates. Consider using \code{\link{EGPRateEst}} to calibrate a reasonable simulation time, and sampling with a temporal stopping rule. } + \value{ A vector of spell durations (order not guaranteed), or a matrix containing said durations and censoring indicators (0=uncensored, 1=right-censored, 2=left-censored, and 3=interval censored). -%% ~Describe the value returned -%% If it is a LIST, use -%% \item{comp1 }{Description of 'comp1'} -%% \item{comp2 }{Description of 'comp2'} -%% ... } -%\references{ -%% ~put references to the literature/web site here ~ -%} + \author{ Carter T. Butts \email{buttsc@uci.edu} } -%\note{ -%% ~~further notes~~ -%} - -%% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{simEGP}} } + \examples{ -\dontrun{ #Examples are a bit slow, so not automatically run +\donttest{ #Examples are a bit slow, so not automatically run #Generate a simple CD-CSTERGM trajectory; equilibrium mean outdegree #is 2, dissolution rate is 1/3 @@ -76,7 +70,7 @@ net <- simulate(network.initialize(n)~edges, coef=log(2/(n-3))) traj <- simEGP(net~edges, coef=list(formation=log(2/(n-3)*1/3), dissolution=log(1/3)), time=500, process="CDCSTERGM", return.networkDynamic=TRUE, verbose=FALSE) -slice <- traj %t% 499 #Take a slice near the end +slice <- traj \%t\% 499 #Take a slice near the end network.edgecount(slice)/(n-1) #Mean degree apx 2 dur <- durations(traj) #Get durations head(dur) #More of a mix @@ -85,13 +79,8 @@ hist(dur[,1], xlab="Time", main="Duration Distribution") #Visualize } } -% Add one or more standard keywords, see file 'KEYWORDS' in the -% R documentation directory (show via RShowDoc("KEYWORDS")): + \keyword{ manip } \keyword{ graphs } \keyword{ survival } -% Use only one keyword per line. -% For non-standard keywords, use \concept instead of \keyword: -% \concept{ ~cpt1 } -% \concept{ ~cpt2 } -% Use only one concept per line. + diff --git a/man/ergmgp-package.Rd b/man/ergmgp-package.Rd index cd8328c..7aa5d15 100644 --- a/man/ergmgp-package.Rd +++ b/man/ergmgp-package.Rd @@ -3,10 +3,13 @@ \alias{ergmgp-package} \alias{ergmgp} \alias{EGP_init} + \title{Tools for Modeling ERGM Generating Processes} + \description{ Tools for simulation and analysis of continuous time graph processes with equilibria that can be described in exponential family random graph (ERGM) form. } + \details{ A random graph \eqn{G} on support \eqn{\mathcal{G}}{Supp(G)} is said to be expressed in exponential family random graph (ERGM) form when its probability mass function (pmf) is written as diff --git a/man/simEGP.Rd b/man/simEGP.Rd index 86c8c21..c6e84fe 100644 --- a/man/simEGP.Rd +++ b/man/simEGP.Rd @@ -1,13 +1,15 @@ \name{simEGP} \alias{simEGP} \alias{simEGPTraj} -%- Also NEED an '\alias' for EACH other topic documented here. + \title{ Simulate Trajectories from an ERGM Generating Process } + \description{ Given an \code{\link[ergm]{ergm}} formula, simulate trajectories from a continuous time graph process having the specified ERGM as a limiting distribution. A number of different processes are supported, and termination may be specified either by phenomenological time or event counts. } + \usage{ simEGP(form, coef, events = 1, time = NULL, rate.factor = 1, time.offset = 0, event.offset = 0, process = c("LERGM", "CRSAOM", @@ -24,7 +26,7 @@ simEGPTraj(form, coef, events = 1, time = NULL, checkpoints = 1, return.changetime = FALSE, return.history = FALSE, verbose = TRUE, trace.interval = 100, statsonly = FALSE, monitor = NULL) } -%- maybe also 'usage' for other objects documented here. + \arguments{ \item{form}{ an \code{\link[ergm]{ergm}} formula defining terms for the EGP; the left-hand side must be a network object, whose properties are used to determine the state space. For the \code{CSTERGM} process, a list containing two such formulas must be used, with named elements \code{formation} (for the formation model) and \code{dissolution} (for the dissolution model). @@ -93,6 +95,7 @@ optionally, an \code{\link[ergm]{ergm}} formula with additional statistics to tr additional arguments (currently unused). } } + \details{ An ERGM generating process (EGP) is a continuous time graph process with an equilibrium distribution having a known ERGM form. See \code{\link{ergmgp}} for an overview of EGPs, including the specifications supported here. @@ -106,27 +109,31 @@ Simulation itself follows the discrete event approach described in Butts (2023). To obtain equilibrium graph distributions from an EGP, it is generally (much) more efficient to use the \code{simulate} functions in the \code{\link[ergm]{ergm}} package: they employ MCMC algorithms that are unconstrained by the need to follow realistic trajectories, and that are optimized for rapid mixing. (In particular, note that many systems can become \emph{kinetically trapped}, spending very long periods in metastable states that are far from equilibrium. This can be a real-world phenomenon, but is not always desirable from a computational point of view. Functions such as \code{simEGP} are intended to faithfully reproduce such dynamics, while MCMC algorithms are intended to avoid them.) Comparison of late-phase draws from a \code{simERGMPot} trajectory with equilibrium ERGM draws can be used to evaluate convergence to equilibrium behavior (where desired); alternately, \code{simEGP} can be seeded with ERGM draws to follow trajectories from equilibrated states. Consult the \code{ergm} package documentation for details. } + \value{ For \code{simEGP}, a network object containing the final graph state, with network attributes \code{Time}, \code{Events}, and \code{Potential} listing the time, event count, and ERGM potential at the end of the simulation interval. See above for additional attributes that may be added if history retention is activated. If \code{return.networkDynamic==TRUE}, then the return value is instead a \code{\link[networkDynamic]{networkDynamic}} object containing the event history as edge activity data; be aware that an edge will exist in this object if any corresponding edge is ever active, so the raw graph state should not be used to access the final system state. Instead, use the \code{\link[networkDynamic]{network.extract}} method to query the network state at the desired time point. For \code{simERGMPotTraj}, a list containing the simulated trajectories. These are either \emph{network.list} objects containing the networks at each checkpoint (with time, step, and potential attribute as described above), or else matrices of trace statistics (if \code{statsonly==TRUE}). Note that the statistics are in any event included as an attribute to each network list, so the effect of \code{statsonly==TRUE} is simply not to retain the graph states. } + \references{ Butts, Carter T. (2023). \dQuote{Continuous Time Graph Processes with Known ERGM Equilibria: Contextual Review, Extensions, and Synthesis.} \emph{Journal of Mathematical Sociology}. \doi{10.1080/0022250X.2023.2180001} } + \author{ Carter T. Butts \email{buttsc@uci.edu} } + \note{ Using \code{steps} to control trajectory termination will lead to biased samples (sometimes severely so); this is because transitions are not random times. If your goal is to obtain equilibrium draws (or draws en route thereto), use \code{time} to set the stopping point. See \code{\link{EGPRateEst}} for a simple tool for calibrating simulation times. } - \seealso{ \code{\link{ergmgp}} for information on EGPs, \code{\link[ergm]{ergm}} for information on ERGM specifications, \code{\link{EGPHazard}}, \code{\link{EGPRateEst}}, \code{\link[networkDynamic]{networkDynamic}} } + \examples{ -\dontrun{ +\donttest{ #Small example of 2-ribbon generation n<-100 set.seed(1331) @@ -143,11 +150,12 @@ sim<-simEGPTraj(net~edges+kstar(2)+nsp(1:2), trajectories = 2, mc.cores = 1, log.sampling = TRUE, process = "LERGM", verbose = TRUE) length(sim)==2 #One entry per simulated trajectory -par(mfrow=c(2,3)) +op<-par(mfrow=c(2,3)) for(i in 1:6) #Show the first trajectory plot(sim[[1]][[i]],main=paste("Time",round(sim[[1]][[i]]\%n\%"Time",2))) summary(sim[[2]]~edges+kstar(2)) #Show selected stats from the second attributes(sim[[1]]) #Show precomputed statistics +par(op) #A simple example with statsonly set.seed(1331) @@ -155,13 +163,13 @@ sim<-simEGPTraj(net~edges+esp(0), coef = c(log(2)-log(n), -1), time = 200, checkpoints = 25, process = "LERGM", statsonly = TRUE, monitor = ~triangle) sim #Note the monitor stat -par(mfrow=c(1,1)) +op<-par(mfrow=c(1,1)) plot(sim[,"Time"], sim[,"edges"], type = "l") #Time by edge count lines(sim[,"Time"], sim[,"esp0"], col = 2) #Add ESP(0)s +par(op) } } -% Add one or more standard keywords, see file 'KEYWORDS' in the -% R documentation directory. \keyword{ models }% use one of RShowDoc("KEYWORDS") \keyword{ graphs }% __ONLY ONE__ keyword per line +