diff --git a/DESCRIPTION b/DESCRIPTION
index 936e6ccfa..109b33bf4 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -49,6 +49,7 @@ Suggests:
     testthat (>= 3.0.2),
     tergm (>= 4.0.0),
     ergm.count (>= 4.0),
+    ergm.tapered (>= 1.0.0),
     networkDynamic (>= 0.10.1),
     withr,
     covr,
diff --git a/R/control.ergm.R b/R/control.ergm.R
index 564a3d86c..2e473ca71 100644
--- a/R/control.ergm.R
+++ b/R/control.ergm.R
@@ -128,6 +128,9 @@
 #'   necessary. Note that this can be very dangerous unless you know
 #'   what you are doing.
 #'
+#' @param MPLE.save.xmat If `TRUE`, the design matrix of change statistics
+#'   from the pseudo-likelihood computation will be returned.
+#'
 #' @template control_MCMC_prop
 #'
 #' @param MCMC.interval Number of proposals between sampled statistics.
@@ -275,6 +278,8 @@
 #' may increase the target ESS to reduce the MCMC standard error.
 #' @param MCMLE.metric Method to calculate the loglikelihood approximation.
 #' See Hummel et al (2010) for an explanation of "lognormal" and "naive".
+#' "Kpenalty" is used to specify the use of a tapering model with a penalty for
+#' kurtosis. It is used by the \code{ergm.tapered} package and is not usually user specified.
 #' @param MCMLE.method Deprecated. By default, ergm uses \code{trust}, and
 #' falls back to \code{optim} with Nelder-Mead method when \code{trust} fails.
 #' @param MCMLE.dampening (logical) Should likelihood dampening be used?
@@ -420,6 +425,10 @@
 #'   for CD.
 #' 
 #' @param loglik See \code{\link{control.ergm.bridge}}
+#' @param MCMC.esteq.exclude.statistics character string pattern. statistics with names that contain this string will be excluded from 
+#' the estimating equations. For example, this supports tapering models which may have terms with non-specific means.
+#' @param MCMLE.varweight numeric multiplier for covariance term in loglikelihood
+#' where 0.5 is the "true" value but this can be increased or decreased.
 #' @template term_options
 #' @template control_MCMC_parallel
 #' @template seed
@@ -478,6 +487,7 @@ control.ergm<-function(drop=TRUE,
                        MPLE.nonident=c("warning","message","error"),
                        MPLE.nonident.tol=1e-10,
                        MPLE.constraints.ignore=FALSE,
+                       MPLE.save.xmat=FALSE,
 
                        MCMC.prop=trim_env(~sparse),
                        MCMC.prop.weights="default", MCMC.prop.args=list(),
@@ -500,6 +510,7 @@ control.ergm<-function(drop=TRUE,
                        MCMC.maxedges=Inf,
                        MCMC.addto.se=TRUE,
                        MCMC.packagenames=c(),
+                       MCMC.esteq.exclude.statistics=NULL,
 
                        SAN.maxit=4,
                        SAN.nsteps.times=8,
@@ -540,12 +551,13 @@ control.ergm<-function(drop=TRUE,
 
                        MCMLE.min.depfac=2,
                        MCMLE.sampsize.boost.pow=0.5,
+                       MCMLE.varweight=0.5,
 
                        MCMLE.MCMC.precision=if(startsWith("confidence", MCMLE.termination[1])) 0.1 else 0.005,
                        MCMLE.MCMC.max.ESS.frac=0.1,
                        MCMLE.metric=c("lognormal", "logtaylor",
                          "Median.Likelihood",
-                         "EF.Likelihood", "naive"),
+                         "EF.Likelihood", "Kpenalty", "naive"),
                        MCMLE.method=c("BFGS","Nelder-Mead"),
                        MCMLE.dampening=FALSE,
                        MCMLE.dampening.min.ess=20,
@@ -685,6 +697,9 @@ ADAPTIVE_MCMC_CONTROLS <- c("MCMC.effectiveSize", "MCMC.effectiveSize.damp", "MC
 PARALLEL_MCMC_CONTROLS <- c("parallel","parallel.type","parallel.version.check")
 OBS_MCMC_CONTROLS <- c("MCMC.base.samplesize", "MCMC.base.effectiveSize", "MCMC.samplesize", "MCMC.effectiveSize", "MCMC.interval", "MCMC.burnin")
 MPLE_CONTROLS <- c("MPLE.samplesize","MPLE.type","MPLE.maxit")
+STATIC_MCMLE_CONTROLS <- c("MCMC.esteq.exclude.statistics", "MCMLE.varweight", 
+                           "MCMLE.dampening", "MCMLE.dampening.min.ess", "MCMLE.dampening.level")
+STATIC_TAPERING_CONTROLS <- c("MCMLE.kurtosis.prior", "MCMLE.kurtosis.location", "MCMLE.kurtosis.scale", "MCMLE.kurtosis.penalty")
 
 remap_algorithm_MCMC_controls <- function(control, algorithm){
   CTRLS <- c(SCALABLE_MCMC_CONTROLS, STATIC_MCMC_CONTROLS, ADAPTIVE_MCMC_CONTROLS) %>% keep(startsWith,"MCMC.") %>% substr(6, 10000L)
diff --git a/R/control.ergm.bridge.R b/R/control.ergm.bridge.R
index 6e5d65102..9e5ece980 100644
--- a/R/control.ergm.bridge.R
+++ b/R/control.ergm.bridge.R
@@ -41,6 +41,8 @@
 #' @template control_MCMC_parallel
 #' @template seed
 #' @template control_MCMC_packagenames
+#' @param MCMC.esteq.exclude.statistics character string pattern. statistics with names that contain this string will be excluded from 
+#' the estimating equations. For example, this supports tapering models which may have terms with non-specific means.
 #' @template control_dots
 #' @return A list with arguments as components.
 #' @seealso [ergm.bridge.llr()]
@@ -69,6 +71,7 @@ control.ergm.bridge<-function(bridge.nsteps=16, # Number of geometric bridges to
 
                               MCMC.maxedges=Inf,
                               MCMC.packagenames=c(),
+                              MCMC.esteq.exclude.statistics=NULL,
                               
                               term.options=list(),
                               seed=NULL,
diff --git a/R/control.simulate.R b/R/control.simulate.R
index 9d5668149..a6c54364c 100644
--- a/R/control.simulate.R
+++ b/R/control.simulate.R
@@ -47,6 +47,8 @@
 #' @template term_options
 #' @template control_MCMC_parallel
 #' @template control_MCMC_packagenames
+#' @param MCMC.esteq.exclude.statistics character string pattern. statistics with names that contain this string will be excluded from 
+#' the estimating equations. For example, this supports tapering models which may have terms with non-specific means.
 #' @template control_dots
 #' @return A list with arguments as components.
 #' @seealso \code{\link{simulate.ergm}}, \code{\link{simulate.formula}}.
@@ -79,6 +81,7 @@ control.simulate.formula.ergm<-function(MCMC.burnin=MCMC.interval*16,
                                         
                                         MCMC.maxedges=Inf,
                                         MCMC.packagenames=c(),
+                                        MCMC.esteq.exclude.statistics=NULL,
                                         
                                         MCMC.runtime.traceplot=FALSE,  
                                         network.output="network",
@@ -114,7 +117,7 @@ control.simulate.formula<-control.simulate.formula.ergm
 #' @description While the others supply a full set of simulation
 #'   settings, `control.simulate.ergm` when passed as a control
 #'   parameter to [simulate.ergm()] allows some settings to be
-#'   inherited from the ERGM stimation while overriding others.
+#'   inherited from the ERGM estimation while overriding others.
 #' 
 #' @export control.simulate.ergm
 control.simulate.ergm<-function(MCMC.burnin=NULL,
@@ -140,6 +143,7 @@ control.simulate.ergm<-function(MCMC.burnin=NULL,
                                 
                                 MCMC.maxedges=Inf,
                                 MCMC.packagenames=NULL,
+                                MCMC.esteq.exclude.statistics=NULL,
                                 
                                 MCMC.runtime.traceplot=FALSE,
                                 network.output="network",
diff --git a/R/ergm.CD.fixed.R b/R/ergm.CD.fixed.R
index 1f306d841..caf832429 100644
--- a/R/ergm.CD.fixed.R
+++ b/R/ergm.CD.fixed.R
@@ -122,6 +122,11 @@ ergm.CD.fixed <- function(init, nw, model,
   mcmc.init <- init
   finished <- FALSE
 
+  control.llik <- list(MCMLE.metric=control$CD.metric,
+                       MCMLE.varweight=control$MCMLE.varweight, MCMLE.dampening=control$CD.dampening,
+                       MCMLE.dampening.min.ess=control$CD.dampening.min.ess,
+                       MCMLE.dampening.level=control$CD.dampening.level)
+
   for(iteration in 1:control$CD.maxit){
     if(iteration == control$CD.maxit) finished <- TRUE
     if(verbose){
@@ -233,10 +238,8 @@ ergm.CD.fixed <- function(init, nw, model,
                        calc.mcmc.se=FALSE,
                        hessianflag=control$main.hessian,
                        method=control$CD.method,
-                       dampening=control$CD.dampening,
-                       dampening.min.ess=control$CD.dampening.min.ess,
-                       dampening.level=control$CD.dampening.level,
                        metric=control$CD.metric,
+                       control.llik=control.llik,
                        steplen=steplen,
                        verbose=verbose,
                        estimateonly=!finished)
diff --git a/R/ergm.MCMCse.R b/R/ergm.MCMCse.R
index 9d0eae258..c0a570534 100644
--- a/R/ergm.MCMCse.R
+++ b/R/ergm.MCMCse.R
@@ -24,7 +24,7 @@
 #'   latter are always 1 for lognormal metric.
 #' @noRd
 ergm.MCMCse <- function(model, theta, init, statsmatrices, statsmatrices.obs,
-                        H, H.obs, metric = c("IS", "lognormal")) {
+                        H, H.obs, metric = c("IS", "lognormal"), exclude=NULL) {
   metric <- match.arg(metric)
 
   # Transform theta to eta
@@ -39,7 +39,7 @@ ergm.MCMCse <- function(model, theta, init, statsmatrices, statsmatrices.obs,
   av <- colMeans.mcmc.list(statsmatrices)
 # av <- apply(statsmatrices,2,median)
   xsims <- sweep.mcmc.list(statsmatrices, av, "-")
-  gsims <- ergm.estfun(xsims, theta=theta, model=model)
+  gsims <- ergm.estfun(xsims, theta=theta, model=model, exclude=exclude)
   xobs <- -av
   xsims <- xsims[,!offsetmap, drop=FALSE]
   xsim <- as.matrix(xsims)
@@ -49,7 +49,7 @@ ergm.MCMCse <- function(model, theta, init, statsmatrices, statsmatrices.obs,
    av.obs <- colMeans.mcmc.list(statsmatrices.obs)
 #  av.obs <- apply(statsmatrices.obs, 2, median)
    xsims.obs <- sweep.mcmc.list(statsmatrices.obs, av.obs,"-")
-   gsims.obs <- ergm.estfun(xsims.obs, theta=theta, model=model)
+   gsims.obs <- ergm.estfun(xsims.obs, theta=theta, model=model, exclude=exclude)
    xsims.obs <- xsims.obs[,!offsetmap, drop=FALSE]
    xsim.obs <- as.matrix(xsims.obs)
    gsim.obs <- as.matrix(gsims.obs)
@@ -126,6 +126,14 @@ ergm.MCMCse <- function(model, theta, init, statsmatrices, statsmatrices.obs,
     mc.cov[!novar,!novar] <- mc.cov0
   }
 
+  if(!is.null(exclude)){
+    x.exclude <- match(exclude,names(theta))
+    if(!is.na(x.exclude)){
+     offsettheta[x.exclude] <- TRUE
+    }
+  }
+
+
   mc.cov.offset[!offsettheta,!offsettheta] <- mc.cov
 
   rownames(mc.cov.offset) <- colnames(mc.cov.offset) <- param_names(model)
diff --git a/R/ergm.MCMLE.R b/R/ergm.MCMLE.R
index 1d52c995e..25870384a 100644
--- a/R/ergm.MCMLE.R
+++ b/R/ergm.MCMLE.R
@@ -211,6 +211,11 @@ ergm.MCMLE <- function(init, nw, model,
     rm(state)
   }
 
+  control.llik <- list()
+  control.transfer <- c(STATIC_MCMLE_CONTROLS, STATIC_TAPERING_CONTROLS)
+  for(arg in control.transfer)
+    if(!is.null(control[[arg]]))
+      control.llik[arg] <- list(control[[arg]])
   
   for(iteration in 1:control$MCMLE.maxit){
     if(verbose){
@@ -292,7 +297,7 @@ ergm.MCMLE <- function(init, nw, model,
     }
 
     # Compute the sample estimating equations and the convergence p-value. 
-    esteqs <- ergm.estfun(statsmatrices, theta=mcmc.init, model=model)
+    esteqs <- ergm.estfun(statsmatrices, theta=mcmc.init, model=model, exclude=control$MCMC.esteq.exclude.statistics)
     esteq <- as.matrix(esteqs)
     if(isTRUE(all.equal(apply(esteq,2,stats::sd), rep(0,ncol(esteq)), check.names=FALSE))&&!all(esteq==0))
       stop("Unconstrained MCMC sampling did not mix at all. Optimization cannot continue.")
@@ -302,7 +307,7 @@ ergm.MCMLE <- function(init, nw, model,
                              nonident_action = control$MCMLE.nonident,
                              nonvar_action = control$MCMLE.nonvar)
 
-    esteqs.obs <- if(obs) ergm.estfun(statsmatrices.obs, theta=mcmc.init, model=model) else NULL
+    esteqs.obs <- if(obs) ergm.estfun(statsmatrices.obs, theta=mcmc.init, model=model, exclude=control$MCMC.esteq.exclude.statistics) else NULL
     esteq.obs <- if(obs) as.matrix(esteqs.obs) else NULL
 
     # Update the interval to be used.
@@ -411,10 +416,8 @@ ergm.MCMLE <- function(init, nw, model,
                        calc.mcmc.se=control$MCMLE.termination == "precision" || (control$MCMC.addto.se && last.adequate) || iteration == control$MCMLE.maxit,
                        hessianflag=control$main.hessian,
                        method=control$MCMLE.method,
-                       dampening=control$MCMLE.dampening,
-                       dampening.min.ess=control$MCMLE.dampening.min.ess,
-                       dampening.level=control$MCMLE.dampening.level,
                        metric=control$MCMLE.metric,
+                       control.llik=control.llik,
                        steplen=steplen,
                        verbose=verbose,
                        estimateonly=!calc.MCSE)
@@ -549,7 +552,7 @@ ergm.MCMLE <- function(init, nw, model,
           control$MCMC.effectiveSize <- round(control$MCMC.effectiveSize * prec.scl)
           if(control$MCMC.effectiveSize/control$MCMC.samplesize>control$MCMLE.MCMC.max.ESS.frac) control$MCMC.samplesize <- control$MCMC.effectiveSize/control$MCMLE.MCMC.max.ESS.frac
           # control$MCMC.samplesize <- round(control$MCMC.samplesize * prec.scl)
-          message("Increasing target MCMC sample size to ", control$MCMC.samplesize, ", ESS to",control$MCMC.effectiveSize,".")
+          message("Increasing target MCMC sample size to ", control$MCMC.samplesize, ", ESS to ",control$MCMC.effectiveSize,".")
         } else { # Fixed-interval sampling
           control$MCMC.samplesize <- round(control$MCMC.samplesize * prec.scl)
           control$MCMC.burnin <- round(control$MCMC.burnin * prec.scl)
diff --git a/R/ergm.R b/R/ergm.R
index 64d37617c..33f79ce3e 100644
--- a/R/ergm.R
+++ b/R/ergm.R
@@ -754,7 +754,7 @@ ergm <- function(formula, response=NULL,
                                                                nonvar_action = control$MPLE.nonvar))
          )
 
-  initialfit$xmat.full <- NULL # No longer needed but takes up space.
+  if(!control$MPLE.save.xmat) initialfit$xmat.full <- NULL # No longer needed but takes up space.
 
   estimate.desc <- switch(estimate,
                           MPLE = if(MPLE.is.MLE) "Maximum Likelihood"
diff --git a/R/ergm.bridge.R b/R/ergm.bridge.R
index cc09e4f22..eb120862d 100644
--- a/R/ergm.bridge.R
+++ b/R/ergm.bridge.R
@@ -154,6 +154,12 @@ ergm.bridge.llr<-function(object, response=NULL, reference=~Bernoulli, constrain
 
   ## Miscellaneous settings
   Dtheta.Du <- (to-from)[!state[[1]]$model$etamap$offsettheta]
+  if(!is.null(control$MCMC.esteq.exclude.statistics)){
+    x.exclude <- match(control$MCMC.esteq.exclude.statistics,names(Dtheta.Du))
+    if(!is.na(x.exclude)){
+      Dtheta.Du <- Dtheta.Du[-x.exclude]
+    }
+  }
 
   ## Handle target statistics, if passed.
   if(!is.null(target.stats)){
@@ -167,8 +173,11 @@ ergm.bridge.llr<-function(object, response=NULL, reference=~Bernoulli, constrain
 
   ## Helper function to calculate Dtheta.Du %*% Deta.Dtheta %*% g(y)
   llrsamp <- function(samp, theta){
-    if(is.mcmc.list(samp)) lapply.mcmc.list(ergm.estfun(samp, theta, state[[1]]$model$etamap), `%*%`, Dtheta.Du)
-    else sum(ergm.estfun(samp, theta, state[[1]]$model$etamap) * Dtheta.Du)
+    if(is.mcmc.list(samp)){
+      lapply.mcmc.list(ergm.estfun(samp, theta, state[[1]]$model$etamap, exclude=control$MCMC.esteq.exclude.statistics), `%*%`, Dtheta.Du)
+    }else{
+      sum(ergm.estfun(samp, theta, state[[1]]$model$etamap, exclude=control$MCMC.esteq.exclude.statistics) * Dtheta.Du)
+    }
   }
 
 
diff --git a/R/ergm.estimate.R b/R/ergm.estimate.R
index 9634a41ac..24959cf4b 100644
--- a/R/ergm.estimate.R
+++ b/R/ergm.estimate.R
@@ -37,10 +37,10 @@
 #   trace           : a non-negative interger specifying how much tracing
 #                     information should be printed by the <optim> routine;
 #                     default=6*'verbose'
-#   dampening       : (logical) should likelihood dampening be used?
-#  dampening.min.ess: effective sample size below which dampening is used
-#   dampening.level : proportional distance from boundary of the convex hull
-#                     move
+#   control.llik    : A list of control values for the log-likelihood functions. These include:
+#     MCMLE.dampening       : (logical) should likelihood dampening be used?
+#    MCMLE.dampening.min.ess: effective sample size below which dampening is used
+#     MCMLE.dampening.level : proportional distance from boundary of the convex hull
 #   estimateonly    : whether only the estimates (vs. the estimates and the
 #                     standard errors) should be calculated; default=FALSE
 #
@@ -61,9 +61,7 @@ ergm.estimate<-function(init, model, statsmatrices, statsmatrices.obs=NULL,
                         method="Nelder-Mead",
                         calc.mcmc.se=TRUE, hessianflag=TRUE,
                         verbose=FALSE, trace=6*verbose,
-                        dampening=FALSE,
-                        dampening.min.ess=100,
-                        dampening.level=0.1,
+                        control.llik=control.logLik.ergm(),
                         steplen=1,
                         cov.type="normal",# cov.type="robust", 
                         estimateonly=FALSE, ...) {
@@ -147,9 +145,6 @@ ergm.estimate<-function(init, model, statsmatrices, statsmatrices.obs=NULL,
     
   # Choose appropriate loglikelihood, gradient, and Hessian functions
   # depending on metric chosen and also whether obsprocess==TRUE
-  # Also, choose varweight multiplier for covariance term in loglikelihood
-  # where 0.5 is the "true" value but this can be increased or decreased
-  varweight <- 0.5
   if (verbose) { message("Using ", metric, " metric (see control.ergm function).") }
   if (obsprocess) {
     loglikelihoodfn <- switch(metric,
@@ -158,6 +153,7 @@ ergm.estimate<-function(init, model, statsmatrices, statsmatrices.obs=NULL,
                               logtaylor=llik.fun.obs.lognormal,
                               Median.Likelihood=llik.fun.obs.robust,
                               EF.Likelihood=llik.fun.obs.lognormal,
+                              Kpenalty=ergm.tapered::llik.fun.obs.Kpenalty,
                               llik.fun.obs.IS)
     gradientfn <- switch(metric,
                          Likelihood=llik.grad.obs.IS,
@@ -165,6 +161,7 @@ ergm.estimate<-function(init, model, statsmatrices, statsmatrices.obs=NULL,
                          logtaylor=llik.grad.obs.IS,
                          Median.Likelihood=llik.grad.obs.IS,
                          EF.Likelihood=llik.grad.obs.IS,
+                         Kpenalty=ergm.tapered::llik.grad.obs.Kpenalty.numDeriv,
                          llik.grad.obs.IS)
     Hessianfn <- switch(metric,
                         Likelihood=llik.hessian.obs.IS,
@@ -172,6 +169,7 @@ ergm.estimate<-function(init, model, statsmatrices, statsmatrices.obs=NULL,
                         logtaylor=llik.hessian.obs.IS,
                         Median.Likelihood=llik.hessian.obs.IS,
                         EF.Likelihood=llik.hessian.obs.IS,
+                        Kpenalty=ergm.tapered::llik.hessian.obs.Kpenalty.numDeriv,
                         llik.hessian.obs.IS)
   } else {
     loglikelihoodfn <- switch(metric,
@@ -180,6 +178,7 @@ ergm.estimate<-function(init, model, statsmatrices, statsmatrices.obs=NULL,
                               logtaylor=llik.fun.logtaylor,
                               Median.Likelihood=llik.fun.median,
                               EF.Likelihood=llik.fun.EF,
+                              Kpenalty=ergm.tapered::llik.fun.Kpenalty,
                               llik.fun.IS)
     gradientfn <- switch(metric,
                          Likelihood=llik.grad.IS,
@@ -187,6 +186,7 @@ ergm.estimate<-function(init, model, statsmatrices, statsmatrices.obs=NULL,
                          logtaylor=llik.grad.IS,
                          Median.Likelihood=llik.grad.IS,
                          EF.Likelihood=llik.grad.IS,
+                         Kpenalty=ergm.tapered::llik.grad.Kpenalty,
                          llik.grad.IS)
     Hessianfn <- switch(metric,
                         Likelihood=llik.hessian.IS,
@@ -194,6 +194,7 @@ ergm.estimate<-function(init, model, statsmatrices, statsmatrices.obs=NULL,
                         logtaylor=llik.hessian.IS,
                         Median.Likelihood=llik.hessian.IS,
                         EF.Likelihood=llik.hessian.IS,
+                        Kpenalty=llik.hessian.IS,
                         llik.hessian.IS)
   }
   
@@ -272,12 +273,10 @@ ergm.estimate<-function(init, model, statsmatrices, statsmatrices.obs=NULL,
                       parscale=rep(1,length(guess)), minimize=FALSE,
                       xsim=xsim,
                       xsim.obs=xsim.obs,
-                      varweight=varweight,
-                      dampening=dampening,
-                      dampening.min.ess=dampening.min.ess,
-                      dampening.level=dampening.level,
-                      eta0=eta0, etamap=etamap.no),
-                silent=FALSE)
+                      eta0=eta0, etamap=etamap.no,
+                      control.llik=control.llik),
+            silent=FALSE)
+
     if(inherits(Lout,"try-error")) {
       message("MLE could not be found. Trying Nelder-Mead...")
       Lout <- try(optim(par=guess, 
@@ -288,11 +287,8 @@ ergm.estimate<-function(init, model, statsmatrices, statsmatrices.obs=NULL,
                                      reltol=nr.reltol),
                         xsim=xsim,
                         xsim.obs=xsim.obs,
-                        varweight=varweight,
-                        dampening=dampening,
-                        dampening.min.ess=dampening.min.ess,
-                        dampening.level=dampening.level,
-                        eta0=eta0, etamap=etamap.no),
+                        eta0=eta0, etamap=etamap.no,
+                        control.llik=control.llik),
               silent=FALSE)
       if(inherits(Lout,"try-error")){
         message(paste("No direct MLE exists!"))
@@ -319,8 +315,8 @@ ergm.estimate<-function(init, model, statsmatrices, statsmatrices.obs=NULL,
     gradienttheta <- llik.grad.IS(theta=Lout$par,
                         xsim=xsim,
                         xsim.obs=xsim.obs,
-                        varweight=varweight,
-                        eta0=eta0, etamap=etamap.no)
+                        eta0=eta0, etamap=etamap.no,
+                        control.llik=control.llik)
     gradient <- rep(NA, length=length(init))
     gradient[!model$etamap$offsettheta] <- gradienttheta
     #
@@ -331,12 +327,11 @@ ergm.estimate<-function(init, model, statsmatrices, statsmatrices.obs=NULL,
     mc.cov <- matrix(NA, length(theta), length(theta))
     covar <- NA
     if(!hessianflag || steplen!=1){
-      Lout$hessian <- Hessianfn(theta=Lout$par,
+      Lout$hessian <- llik.hessian.IS(theta=Lout$par,
                         xsim=xsim.orig,
                         xsim.obs=xsim.orig.obs,
-                        varweight=varweight,
-                        eta0=eta0, etamap=etamap.no
-                        )
+                        eta0=eta0, etamap=etamap.no,
+                        control.llik=control.llik)
     }
     
     covar <- matrix(NA, ncol=length(theta), nrow=length(theta))
@@ -350,7 +345,8 @@ ergm.estimate<-function(init, model, statsmatrices, statsmatrices.obs=NULL,
     if(calc.mcmc.se){
       if (verbose) message("Starting MCMC s.e. computation.")
       mcse.metric <-
-        if ((metric == "lognormal" || metric == "Likelihood") && length(model$etamap$curved) == 0) "lognormal"
+        if ((metric %in% c("lognormal", "Likelihood", "Kpenalty")) 
+          && length(model$etamap$curved) == 0) "lognormal"
         else "IS"
       mc.cov <- ergm.MCMCse(model = model, theta = theta, init = init,
                             statsmatrices = statsmatrices,
diff --git a/R/ergm.getMCMCsample.R b/R/ergm.getMCMCsample.R
index ec992192b..781945356 100644
--- a/R/ergm.getMCMCsample.R
+++ b/R/ergm.getMCMCsample.R
@@ -239,9 +239,12 @@ ergm_MCMC_sample <- function(state, control, theta=NULL,
         if(verbose) message("Increasing thinning to ",interval,".")
       }
       
-      esteq <- lapply(sms, function(sm) NVL3(theta, ergm.estfun(sm, ., as.ergm_model(state0[[1]])), sm[,!as.ergm_model(state0[[1]])$etamap$offsetmap,drop=FALSE])) %>%
+      esteq <- lapply(sms, function(sm) NVL3(theta, ergm.estfun(sm, ., as.ergm_model(state0[[1]]),
+        exclude=control$MCMC.esteq.exclude.statistics), sm[,!as.ergm_model(state0[[1]])$etamap$offsetmap,drop=FALSE])) %>%
         lapply.mcmc.list(mcmc, start=1, thin=interval) %>% lapply.mcmc.list(`-`)
 
+      if("Taper_Penalty" %in% dimnames(esteq[[1]])[[2]]) {browser()}
+
       if(control.parallel$MCMC.runtime.traceplot){
         plot(window(esteq, thin=thin(esteq)*max(1,floor(niter(esteq)/1000)))
             ,ask=FALSE,smooth=TRUE,density=FALSE)
@@ -292,7 +295,8 @@ ergm_MCMC_sample <- function(state, control, theta=NULL,
     if(!is.null(nws)) nws <- map(outl, "saved")
     
     if(control.parallel$MCMC.runtime.traceplot){
-      lapply(sms, function(sm) NVL3(theta, ergm.estfun(sm, ., as.ergm_model(state0[[1]])), sm[,!as.ergm_model(state0[[1]])$etamap$offsetmap,drop=FALSE])) %>% lapply(mcmc, start=control.parallel$MCMC.burnin+1, thin=control.parallel$MCMC.interval) %>% as.mcmc.list() %>% window(., thin=thin(.)*max(1,floor(niter(.)/1000))) %>% plot(ask=FALSE,smooth=TRUE,density=FALSE)
+      lapply(sms, function(sm) NVL3(theta, ergm.estfun(sm, ., as.ergm_model(state0[[1]]),
+        exclude=control$MCMC.esteq.exclude.statistics), sm[,!as.ergm_model(state0[[1]])$etamap$offsetmap,drop=FALSE])) %>% lapply(mcmc, start=control.parallel$MCMC.burnin+1, thin=control.parallel$MCMC.interval) %>% as.mcmc.list() %>% window(., thin=thin(.)*max(1,floor(niter(.)/1000))) %>% plot(ask=FALSE,smooth=TRUE,density=FALSE)
     }
   }
 
diff --git a/R/ergm.llik.R b/R/ergm.llik.R
index 6df796649..5b50b970c 100644
--- a/R/ergm.llik.R
+++ b/R/ergm.llik.R
@@ -32,7 +32,7 @@
 #                of the 'etamap'
 #   xsim       : the matrix of simulated statistics 
 #   probs      : the probability weight for each row of the stats matrix
-#   varweight  : the weight by which the variance of the base predictions will be 
+#   control.llik$MCMLE.varweight  : the weight by which the variance of the base predictions will be 
 #                scaled; the name of this param was changed from 'penalty' to better 
 #                reflect what this parameter actually is; default=0.5, which is the 
 #                "true"  weight, in the sense that the lognormal approximation is
@@ -60,9 +60,10 @@
 #####################################################################################
 
 llik.fun.lognormal <- function(theta, xsim, xsim.obs=NULL,
-                     varweight=0.5,
-                     dampening=FALSE,dampening.min.ess=100, dampening.level=0.1,
-                     eta0, etamap){
+                               eta0, etamap, 
+                               control.llik=control.logLik.ergm(),
+                               ...){
+
   # Convert theta to eta
   eta <- ergm.eta(theta, etamap)
 
@@ -72,7 +73,7 @@ llik.fun.lognormal <- function(theta, xsim, xsim.obs=NULL,
   basepred <- xsim %*% etaparam
   mb <- lweighted.mean(basepred,lrowweights(xsim))
   vb <- lweighted.var(basepred,lrowweights(xsim))
-  - mb - varweight*vb
+  - mb - control.llik$MCMLE.varweight*vb
 }
 
 
@@ -82,9 +83,9 @@ llik.fun.lognormal <- function(theta, xsim, xsim.obs=NULL,
 #####################################################################################
 
 llik.grad.IS <- function(theta, xsim,  xsim.obs=NULL,
-                     varweight=0.5,
-                     dampening=FALSE,dampening.min.ess=100, dampening.level=0.1,
-                     eta0, etamap){
+                         eta0, etamap, 
+                         control.llik=control.logLik.ergm(),
+                         ...){
 
   # Obtain canonical parameters incl. offsets and difference from sampled-from
   eta <- ergm.eta(theta, etamap)
@@ -114,9 +115,9 @@ llik.grad.IS <- function(theta, xsim,  xsim.obs=NULL,
 #       this is equation (3.5) of Hunter & Handcock (2006)
 #####################################################################################
 llik.hessian.IS <- function(theta, xsim, xsim.obs=NULL,
-                     varweight=0.5,
-                     dampening=FALSE,dampening.min.ess=100, dampening.level=0.1,
-                     eta0, etamap){
+                            eta0, etamap, 
+                            control.llik=control.logLik.ergm(),
+                            ...){
   # Obtain canonical parameters incl. offsets and difference from sampled-from
   eta <- ergm.eta(theta, etamap)
   etaparam <- eta-eta0
@@ -141,9 +142,9 @@ llik.hessian.IS <- function(theta, xsim, xsim.obs=NULL,
 #####################################################################################
 
 llik.fun.EF <- function(theta, xsim, xsim.obs=NULL,
-                     varweight=0.5,
-                     dampening=FALSE,dampening.min.ess=100, dampening.level=0.1,
-                     eta0, etamap){
+                        eta0, etamap, 
+                        control.llik=control.logLik.ergm(),
+                        ...){
   eta <- ergm.eta(theta, etamap)
   etaparam <- eta-eta0
   basepred <- xsim %*% etaparam
@@ -161,9 +162,9 @@ llik.fun.EF <- function(theta, xsim, xsim.obs=NULL,
 #####################################################################################
 
 llik.fun.IS <- function(theta, xsim, xsim.obs=NULL, 
-                     varweight=0.5,
-                     dampening=FALSE,dampening.min.ess=100, dampening.level=0.1,
-                     eta0, etamap){
+                        eta0, etamap, 
+                        control.llik=control.logLik.ergm(),
+                        ...){
   # Obtain canonical parameters incl. offsets and difference from sampled-from
   eta <- ergm.eta(theta, etamap)
   etaparam <- eta-eta0
@@ -179,9 +180,9 @@ llik.fun.IS <- function(theta, xsim, xsim.obs=NULL,
 #####################################################################################
 
 llik.fun.median <- function(theta, xsim, xsim.obs=NULL,
-                     varweight=0.5,
-                     dampening=FALSE,dampening.min.ess=100, dampening.level=0.1,
-                     eta0, etamap){
+                            eta0, etamap, 
+                            control.llik=control.logLik.ergm(),
+                            ...){
   # Convert theta to eta
   eta <- ergm.eta(theta, etamap)
 
@@ -201,27 +202,27 @@ llik.fun.median <- function(theta, xsim, xsim.obs=NULL,
 # 
 # This is the log-likelihood ratio (and not its negative)
 #
-  - (mb + varweight*sdb*sdb)
+  - (mb + control.llik$MCMLE.varweight*sdb*sdb)
 }
 
 llik.fun.logtaylor <- function(theta, xsim, xsim.obs=NULL, 
-		                     varweight=0.5,
-	 	                     dampening=FALSE,dampening.min.ess=100, dampening.level=0.1, 
-	 	                     eta0, etamap){ 
+                               eta0, etamap, 
+                               control.llik=control.logLik.ergm(),
+                               ...){
 	 	  # Convert theta to eta 
 	 	  eta <- ergm.eta(theta, etamap) 
  	 
  	  # Calculate approximation to l(eta) - l(eta0) using a lognormal approximation 
 	 	  etaparam <- eta-eta0 
 	 	  # 
-	 	  if (dampening) { 
+	 	  if (control.llik$MCMLE.dampening) { 
 	 	    #if theta_extended is (almost) outside convex hull don't trust theta 
-	 	    eta_extended <- eta + etaparam*dampening.level 
+	 	    eta_extended <- eta + etaparam*control.llik$MCMLE.dampening.level 
 	 	    expon_extended <- xsim %*% (eta_extended - eta0) 
 	 	    wts <- exp(expon_extended) 
 	 	    ess <- ceiling(sum(wts)^2/sum(wts^2)) 
 	 	#   https://xianblog.wordpress.com/2010/09/24/effective-sample-size/ 
-	 	    if(!is.na(ess) && {ess<dampening.min.ess}){ return(-Inf) } #.005*length(wts)) 
+	 	    if(!is.na(ess) && {ess<control.llik$MCMLE.dampening.min.ess}){ return(-Inf) } #.005*length(wts)) 
 	 	  } 
 	 	 
 	 	  basepred <- xsim %*% etaparam 
diff --git a/R/ergm.llik.obs.R b/R/ergm.llik.obs.R
index 2ec389c19..887011828 100644
--- a/R/ergm.llik.obs.R
+++ b/R/ergm.llik.obs.R
@@ -33,7 +33,7 @@
 #   probs      : the probability weight for each row of the stats matrix
 #   xsim.obs  : the 'xsim' counterpart for observation process
 #   probs.obs : the 'probs' counterpart for observation process
-#   varweight  : the weight by which the variance of the base predictions will be
+#   control.llik$MCMLE.varweight  : the weight by which the variance of the base predictions will be
 #                scaled; the name of this param was changed from 'penalty' to better
 #                reflect what this parameter actually is; default=0.5, which is the
 #                "true"  weight, in the sense that the lognormal approximation is
@@ -58,9 +58,9 @@
 #        normally  distributed so that exp(eta * stats) is lognormal
 #####################################################################################                           
 llik.fun.obs.lognormal <- function(theta, xsim, xsim.obs=NULL,
-                     varweight=0.5,
-                     dampening=FALSE,dampening.min.ess=100, dampening.level=0.1,
-                     eta0, etamap){
+                                   eta0, etamap,
+                                   control.llik=control.logLik.ergm(),
+                                   ...){
   eta <- ergm.eta(theta, etamap)
   etaparam <- eta-eta0
 # These lines standardize:
@@ -78,7 +78,7 @@ llik.fun.obs.lognormal <- function(theta, xsim, xsim.obs=NULL,
 # 
 # This is the log-likelihood ratio (and not its negative)
 #
-  (mm + varweight*vm) - (mb + varweight*vb)
+  (mm + control.llik$MCMLE.varweight*vm) - (mb + control.llik$MCMLE.varweight*vb)
 }
 
 
@@ -88,9 +88,9 @@ llik.fun.obs.lognormal <- function(theta, xsim, xsim.obs=NULL,
 #   llg: the gradient of the not-offset eta parameters with ??
 #####################################################################################
 llik.grad.obs.IS <- function(theta, xsim,  xsim.obs=NULL,
-                      varweight=0.5,
-                      dampening=FALSE,dampening.min.ess=100, dampening.level=0.1,
-                      eta0, etamap){
+                             eta0, etamap,
+                             control.llik=control.logLik.ergm(),
+                             ...){
   # Obtain canonical parameters incl. offsets and difference from sampled-from
   eta <- ergm.eta(theta, etamap)
   etaparam <- eta-eta0
@@ -117,9 +117,9 @@ llik.grad.obs.IS <- function(theta, xsim,  xsim.obs=NULL,
 #####################################################################################
 
 llik.hessian.obs.IS <- function(theta, xsim, xsim.obs=NULL,
-                     varweight=0.5,
-                     dampening=FALSE,dampening.min.ess=100, dampening.level=0.1,
-                     eta0, etamap){
+                                eta0, etamap,
+                                control.llik=control.logLik.ergm(),
+                                ...){
   # Obtain canonical parameters incl. offsets and difference from sampled-from
   eta <- ergm.eta(theta, etamap)
   etaparam <- eta-eta0
@@ -149,9 +149,9 @@ llik.hessian.obs.IS <- function(theta, xsim, xsim.obs=NULL,
 #####################################################################################
 
 llik.fun.obs.IS <- function(theta, xsim, xsim.obs=NULL, 
-                     varweight=0.5,
-                     dampening=FALSE,dampening.min.ess=100, dampening.level=0.1,
-                     eta0, etamap){
+                            eta0, etamap,
+                            control.llik=control.logLik.ergm(),
+                            ...){
   # Obtain canonical parameters incl. offsets and difference from sampled-from
   eta <- ergm.eta(theta, etamap)
   etaparam <- eta-eta0
@@ -170,9 +170,9 @@ llik.fun.obs.IS <- function(theta, xsim, xsim.obs=NULL,
 #####################################################################################
 
 llik.fun.obs.robust<- function(theta, xsim, xsim.obs=NULL,
-                     varweight=0.5,
-                     dampening=FALSE,dampening.min.ess=100, dampening.level=0.1,
-                     eta0, etamap){
+                               eta0, etamap,
+                               control.llik=control.logLik.ergm(),
+                               ...){
   eta <- ergm.eta(theta, etamap)
   etaparam <- eta-eta0
 
@@ -191,7 +191,7 @@ llik.fun.obs.robust<- function(theta, xsim, xsim.obs=NULL,
 # 
 # This is the log-likelihood ratio (and not its negative)
 #
-  (mm + varweight*vm*vm) - (mb + varweight*vb*vb)
+  (mm + control.llik$MCMLE.varweight*vm*vm) - (mb + control.llik$MCMLE.varweight*vb*vb)
 }
 
 llik.fun.obs.robust<- llik.fun.obs.IS 
diff --git a/R/ergm.mple.R b/R/ergm.mple.R
index 59a29f696..255f85f32 100644
--- a/R/ergm.mple.R
+++ b/R/ergm.mple.R
@@ -95,6 +95,7 @@ ergm.mple<-function(nw, fd, m, init=NULL,
    mplefit.summary <- mplefit
   }else{
    if(MPLEtype=="logitreg"){
+    glm.result <- NULL
     mplefit <- model.matrix(terms(pl$zy ~ .-1,data=data.frame(pl$xmat)),
                            data=data.frame(pl$xmat))
     mplefit <- ergm.logitreg(x=mplefit, y=pl$zy, m=m, wt=pl$wend,
@@ -197,6 +198,7 @@ ergm.mple<-function(nw, fd, m, init=NULL,
       mple.lik.null = structure(
         ERRVL(try(logLik(mplefit.null), silent=TRUE), -mplefit.null$deviance/2),
         nobs = nobs, df = df, class="logLik"),
+      glm.result = if(save.xmat) glm.result,
       xmat.full = if(save.xmat) pl$xmat.full
       ),
       class="ergm")
diff --git a/R/ergm.robmon.R b/R/ergm.robmon.R
index ec5bb1f31..7ddc42eea 100644
--- a/R/ergm.robmon.R
+++ b/R/ergm.robmon.R
@@ -56,6 +56,11 @@ ergm.robmon <- function(init, nw, model,
 
   control <- remap_algorithm_MCMC_controls(control, "RM")
 
+  control.llik <- list()
+  for(arg in STATIC_MCMLE_CONTROLS)
+    if(!is.null(control[[arg]]))
+      control.llik[arg] <- list(control[[arg]])
+
   #phase 1:  Estimate diagonal elements of D matrix (covariance matrix for init)
   n1 <- control$SA.phase1_n
   if(is.null(n1)) {n1 <- 7 + 3 * model$etamap$etalength} #default value
@@ -166,6 +171,7 @@ message(paste("theta new:",theta,""))
                    hessianflag=control$main.hessian,
                    method=control$MCMLE.method,
                    metric=control$MCMLE.metric,
+                   control.llik=control.llik,
                    verbose=verbose)
 
   ve$sample <- ergm.sample.tomcmc(ve$sample, control)
diff --git a/R/ergm.stocapprox.R b/R/ergm.stocapprox.R
index dab631c20..f6aa24855 100644
--- a/R/ergm.stocapprox.R
+++ b/R/ergm.stocapprox.R
@@ -57,6 +57,12 @@ ergm.stocapprox <- function(init, nw, model,
     stats <- model$nw.stats - NVL(model$target.stats,model$nw.stats)
     target.stats <- model$target.stats
   })
+
+  control.llik <- list(MCMLE.metric=control$MCMLE.metric, MCMLE.trustregion=control$SA.trustregion,
+                       MCMLE.varweight=control$MCMLE.varweight, MCMLE.dampening=control$MCMLE.dampening,
+                       MCMLE.dampening.min.ess=control$MCMLE.dampening.min.ess,
+                       MCMLE.dampening.level=control$MCMLE.dampening.level)
+
 # message(paste("Phase 1: ",n1,"iterations"))
 # message(paste(" (interval=",control$MCMC.interval,")",sep=""))
   nw.orig <- nw
@@ -127,6 +133,7 @@ ergm.stocapprox <- function(init, nw, model,
                    hessianflag=control$main.hessian,
                    method=control$MCMLE.method,
                    metric=control$MCMLE.metric,
+                   control.llik=control.llik,
                    verbose=verbose)
 #
 # Important: Keep R-M (pre-NR) theta
diff --git a/R/ergm_estfun.R b/R/ergm_estfun.R
index 458bd75ab..37c8a0c0e 100644
--- a/R/ergm_estfun.R
+++ b/R/ergm_estfun.R
@@ -30,36 +30,69 @@
 #'   \eqn{q}-vectors of estimating function values.
 #' @keywords internal
 #' @export
-ergm.estfun <- function(stats, theta, model, ...){
+ergm.estfun <- function(stats, theta, model, exclude=NULL, ...){
   UseMethod("ergm.estfun")
 }
 
 #' @describeIn ergm.estfun Method for numeric vectors of length \eqn{p}.
 #' @export
-ergm.estfun.numeric <- function(stats, theta, model, ...){
+ergm.estfun.numeric <- function(stats, theta, model, exclude=NULL, ...){
   etamap <- if(is(model, "ergm_model")) model$etamap else model
   estf <- c(ergm.etagradmult(theta,stats,etamap))[!etamap$offsettheta]
   names(estf) <- (if(is(model, "ergm_model")) param_names(model, FALSE) else names(theta))[!etamap$offsettheta]
+  if(is(model, "ergm_model")){
+   names(estf) <- param_names(model, FALSE)[!etamap$offsettheta]
+  }else{
+    if(!is.null(names(theta))){
+      names(estf) <- names(theta)[!etamap$offsettheta]
+    }else{
+      a <- names(stats)[!etamap$offsettheta]
+      if(length(a)==length(estf)) names(estf) <- a
+    }
+  }
+  # Exclude statistics that are not expected to have a specific mean
+  if(!is.null(exclude)){
+    x.exclude <- match(exclude,names(estf))
+    if(!is.na(x.exclude)){
+     estf <- estf[-x.exclude]
+    }
+  }
   -estf
 }
 
 #' @describeIn ergm.estfun Method for matrices with \eqn{p} columns.
 #' @export
-ergm.estfun.matrix <- function(stats, theta, model, ...){
+ergm.estfun.matrix <- function(stats, theta, model, exclude=NULL, ...){
   etamap <- if(is(model, "ergm_model")) model$etamap else model
   estf <- t(ergm.etagradmult(theta,t(as.matrix(stats)),etamap))[,!etamap$offsettheta,drop=FALSE]
-  colnames(estf) <- (if(is(model, "ergm_model")) param_names(model, FALSE) else names(theta))[!etamap$offsettheta]
+  if(is(model, "ergm_model")){
+   colnames(estf) <- param_names(model, FALSE)[!etamap$offsettheta]
+  }else{
+    if(!is.null(names(theta))){
+      colnames(estf) <- names(theta)[!etamap$offsettheta]
+    }else{
+      a <- colnames(stats)[!etamap$offsettheta]
+      if(length(a)==ncol(estf)) colnames(estf) <- a
+    }
+  }
+# Exclude statistics that are not expected to have a specific mean
+  if(!is.null(exclude)){
+    x.exclude <- match(exclude,colnames(as.matrix(estf)))
+    if(!is.na(x.exclude)){
+     estf <- estf[,-x.exclude,drop=FALSE]
+    }
+  }
   -estf
 }
 
 #' @describeIn ergm.estfun Method for [`mcmc`] objects with \eqn{p} variables.
 #' @export
-ergm.estfun.mcmc <- function(stats, theta, model, ...){
-  mcmc(ergm.estfun(as.matrix(stats), theta, model), start=start(stats), end=end(stats), thin=thin(stats))
+ergm.estfun.mcmc <- function(stats, theta, model, exclude=NULL, ...){
+  mcmc(ergm.estfun(as.matrix(stats), theta, model, exclude=exclude), start=start(stats), end=end(stats), thin=thin(stats))
 }
 
 #' @describeIn ergm.estfun Method for  [`mcmc.list`] objects with \eqn{p} variables.
 #' @export
-ergm.estfun.mcmc.list <- function(stats, theta, model, ...){
-  lapply.mcmc.list(stats, ergm.estfun, theta, model)
+ergm.estfun.mcmc.list <- function(stats, theta, model, exclude=NULL, ...){
+  lapply.mcmc.list(stats, ergm.estfun, theta, model, exclude=exclude)
 }
diff --git a/R/logLik.ergm.R b/R/logLik.ergm.R
index f584b071a..72aff98ec 100644
--- a/R/logLik.ergm.R
+++ b/R/logLik.ergm.R
@@ -88,7 +88,7 @@ logLik.ergm<-function(object, add=FALSE, force.reeval=FALSE, eval.loglik=add ||
 
   bridge.names <- names(formals(control.ergm.bridge))
   control.bridge <- control[intersect(bridge.names, names(control))]
-  control.transfer <- intersect(c("MCMC.samplesize", SCALABLE_MCMC_CONTROLS, STATIC_MCMC_CONTROLS, PARALLEL_MCMC_CONTROLS, MPLE_CONTROLS), bridge.names)
+  control.transfer <- intersect(c("MCMC.samplesize", SCALABLE_MCMC_CONTROLS, STATIC_MCMC_CONTROLS, PARALLEL_MCMC_CONTROLS, MPLE_CONTROLS, STATIC_MCMLE_CONTROLS), bridge.names)
   for(arg in control.transfer)
     if(is.null(control.bridge[[arg]]))
       control.bridge[[arg]] <- object$control[[arg]]
diff --git a/R/mcmc.diagnostics.ergm.R b/R/mcmc.diagnostics.ergm.R
index c0232d145..dc9adef79 100644
--- a/R/mcmc.diagnostics.ergm.R
+++ b/R/mcmc.diagnostics.ergm.R
@@ -207,8 +207,8 @@ mcmc.diagnostics.ergm <- function(object,
 
   if(esteq){
     if (!is.null(coef(object)) && !is.null(object$etamap)) {
-      sm <- ergm.estfun(sm, theta=coef(object), model=object$etamap) %>% lapply.mcmc.list(`-`)
-      if(!is.null(sm.obs)) sm.obs <- ergm.estfun(sm.obs, theta=coef(object), model=object$etamap) %>% lapply.mcmc.list(`-`)
+      sm <- ergm.estfun(sm, theta=coef(object), model=object$etamap, exclude=object$control$MCMC.esteq.exclude.statistics) %>% lapply.mcmc.list(`-`)
+      if(!is.null(sm.obs)) sm.obs <- ergm.estfun(sm.obs, theta=coef(object), model=object$etamap, exclude=object$control$MCMC.esteq.exclude.statistics) %>% lapply.mcmc.list(`-`)
     }
   }
 
diff --git a/R/parallel.utils.R b/R/parallel.utils.R
index d8297a6b6..67af089b4 100644
--- a/R/parallel.utils.R
+++ b/R/parallel.utils.R
@@ -86,7 +86,7 @@ ergm.MCMC.packagenames <- local({
 #' multiple threads run in parallel. This is done using the
 #' [OpenMP](https://www.openmp.org/) API.
 #'
-#' However, this introduces a nontrivial amont of computational
+#' However, this introduces a nontrivial amount of computational
 #' overhead. See below for a list of the major factors affecting
 #' whether it is worthwhile.}}
 #'
diff --git a/R/summary.ergm.R b/R/summary.ergm.R
index eb7445bd7..e62ec4289 100644
--- a/R/summary.ergm.R
+++ b/R/summary.ergm.R
@@ -78,7 +78,7 @@ summary.ergm <- function (object, ...,
     warn(paste0("This object was fit with ", sQuote("ergm"), " version ", objver, " or earlier. Summarizing it with version ", nextver, " or later may return incorrect results or fail."))
   }
 
-  if("digits" %in% names(list(...))) warn("summary.ergm() no lnger takes a digits= argument.")
+  if("digits" %in% names(list(...))) warn("summary.ergm() no longer takes a digits= argument.")
   control <- object$control
   pseudolikelihood <- object$estimate=="MPLE"
   independence <- NVL(object$MPLE_is_MLE, is.dyad.independent(object))
diff --git a/R/zzz.R b/R/zzz.R
index 2801a5e62..6f3a9b799 100644
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -17,7 +17,7 @@
 }
 
 .onLoad <- function(libname, pkgname){
-  # . is used as a placeholder by stantet.common::NVL3().
+  # . is used as a placeholder by statnet.common::NVL3().
   utils::globalVariables(".")
 
   default_options(ergm.eval.loglik=TRUE,
diff --git a/man/control.ergm.Rd b/man/control.ergm.Rd
index 2caacf260..26fe689b2 100644
--- a/man/control.ergm.Rd
+++ b/man/control.ergm.Rd
@@ -21,6 +21,7 @@ control.ergm(
   MPLE.nonident = c("warning", "message", "error"),
   MPLE.nonident.tol = 1e-10,
   MPLE.constraints.ignore = FALSE,
+  MPLE.save.xmat = FALSE,
   MCMC.prop = trim_env(~sparse),
   MCMC.prop.weights = "default",
   MCMC.prop.args = list(),
@@ -43,6 +44,7 @@ control.ergm(
   MCMC.maxedges = Inf,
   MCMC.addto.se = TRUE,
   MCMC.packagenames = c(),
+  MCMC.esteq.exclude.statistics = NULL,
   SAN.maxit = 4,
   SAN.nsteps.times = 8,
   SAN = control.san(term.options = term.options, SAN.maxit = SAN.maxit, SAN.prop.weights
@@ -74,11 +76,12 @@ control.ergm(
   obs.MCMC.impute.default_density = function(nw) 2/network.size(nw),
   MCMLE.min.depfac = 2,
   MCMLE.sampsize.boost.pow = 0.5,
+  MCMLE.varweight = 0.5,
   MCMLE.MCMC.precision = if (startsWith("confidence", MCMLE.termination[1])) 0.1 else
     0.005,
   MCMLE.MCMC.max.ESS.frac = 0.1,
   MCMLE.metric = c("lognormal", "logtaylor", "Median.Likelihood", "EF.Likelihood",
-    "naive"),
+    "Kpenalty", "naive"),
   MCMLE.method = c("BFGS", "Nelder-Mead"),
   MCMLE.dampening = FALSE,
   MCMLE.dampening.min.ess = 20,
@@ -294,6 +297,9 @@ missingness. This can be used to avert evaluating and storing the
 necessary. Note that this can be very dangerous unless you know
 what you are doing.}
 
+\item{MPLE.save.xmat}{If \code{TRUE}, the design matrix of change statistics
+from the pseudo-likelihood computation will be returned.}
+
 \item{MCMC.prop}{Specifies the proposal (directly) and/or
 a series of "hints" about the structure of the model being
 sampled. The specification is in the form of a one-sided formula
@@ -388,6 +394,9 @@ algorithm to the estimates' standard errors.}
 statistic functions in addition to those autodetected. This argument should
 not be needed outside of very strange setups.}
 
+\item{MCMC.esteq.exclude.statistics}{character string pattern. statistics with names that contain this string will be excluded from
+the estimating equations. For example, this supports tapering models which may have terms with non-specific means.}
+
 \item{SAN.maxit}{When \code{target.stats} argument is passed to
 \code{\link[=ergm]{ergm()}}, the maximum number of attempts to use \code{\link{san}}
 to obtain a network with statistics close to those specified.}
@@ -489,6 +498,9 @@ imputation density when number of non-missing dyads is too low.}
 
 \item{MCMLE.min.depfac, MCMLE.sampsize.boost.pow}{When using adaptive MCMC effective size, and methods that increase the MCMC sample size, use \code{MCMLE.sampsize.boost.pow} as the power of the boost amount (relative to the boost of the target effective size), but ensure that sample size is no less than \code{MCMLE.min.depfac} times the target effective size.}
 
+\item{MCMLE.varweight}{numeric multiplier for covariance term in loglikelihood
+where 0.5 is the "true" value but this can be increased or decreased.}
+
 \item{MCMLE.MCMC.precision, MCMLE.MCMC.max.ESS.frac}{\code{MCMLE.MCMC.precision} is a vector of upper bounds on the standard
 errors induced by the MCMC algorithm, expressed as a percentage of the total
 standard error. The MCMLE algorithm will terminate when the MCMC standard
@@ -499,7 +511,9 @@ If effective sample size is used (see \code{MCMC.effectiveSize}), then ergm
 may increase the target ESS to reduce the MCMC standard error.}
 
 \item{MCMLE.metric}{Method to calculate the loglikelihood approximation.
-See Hummel et al (2010) for an explanation of "lognormal" and "naive".}
+See Hummel et al (2010) for an explanation of "lognormal" and "naive".
+"Kpenalty" is used to specify the use of a tapering model with a penalty for
+kurtosis. It is used by the \code{ergm.tapered} package and is not usually user specified.}
 
 \item{MCMLE.method}{Deprecated. By default, ergm uses \code{trust}, and
 falls back to \code{optim} with Nelder-Mead method when \code{trust} fails.}
diff --git a/man/control.ergm.bridge.Rd b/man/control.ergm.bridge.Rd
index bce01c524..b5e05a773 100644
--- a/man/control.ergm.bridge.Rd
+++ b/man/control.ergm.bridge.Rd
@@ -26,6 +26,7 @@ control.ergm.bridge(
   obs.MCMC.prop.args = MCMC.prop.args,
   MCMC.maxedges = Inf,
   MCMC.packagenames = c(),
+  MCMC.esteq.exclude.statistics = NULL,
   term.options = list(),
   seed = NULL,
   parallel = 0,
@@ -51,7 +52,7 @@ control.logLik.ergm(
   obs.MCMC.prop = MCMC.prop,
   obs.MCMC.prop.weights = MCMC.prop.weights,
   obs.MCMC.prop.args = MCMC.prop.args,
-  MCMC.maxedges = NULL,
+  MCMC.maxedges = Inf,
   MCMC.packagenames = NULL,
   term.options = NULL,
   seed = NULL,
@@ -113,6 +114,9 @@ specifying additional arguments to proposal.}
 statistic functions in addition to those autodetected. This argument should
 not be needed outside of very strange setups.}
 
+\item{MCMC.esteq.exclude.statistics}{character string pattern. statistics with names that contain this string will be excluded from
+the estimating equations. For example, this supports tapering models which may have terms with non-specific means.}
+
 \item{term.options}{A list of additional arguments to be passed to term initializers. See \code{\link[=term.options]{? term.options}}.}
 
 \item{seed}{Seed value (integer) for the random number generator.  See
diff --git a/man/control.simulate.ergm.Rd b/man/control.simulate.ergm.Rd
index b214a6539..b8a97ea5f 100644
--- a/man/control.simulate.ergm.Rd
+++ b/man/control.simulate.ergm.Rd
@@ -27,6 +27,7 @@ control.simulate.formula.ergm(
   MCMC.effectiveSize.order.max = NULL,
   MCMC.maxedges = Inf,
   MCMC.packagenames = c(),
+  MCMC.esteq.exclude.statistics = NULL,
   MCMC.runtime.traceplot = FALSE,
   network.output = "network",
   term.options = NULL,
@@ -57,6 +58,7 @@ control.simulate(
   MCMC.effectiveSize.order.max = NULL,
   MCMC.maxedges = Inf,
   MCMC.packagenames = c(),
+  MCMC.esteq.exclude.statistics = NULL,
   MCMC.runtime.traceplot = FALSE,
   network.output = "network",
   term.options = NULL,
@@ -87,6 +89,7 @@ control.simulate.formula(
   MCMC.effectiveSize.order.max = NULL,
   MCMC.maxedges = Inf,
   MCMC.packagenames = c(),
+  MCMC.esteq.exclude.statistics = NULL,
   MCMC.runtime.traceplot = FALSE,
   network.output = "network",
   term.options = NULL,
@@ -118,6 +121,7 @@ control.simulate.ergm(
   MCMC.effectiveSize.order.max = NULL,
   MCMC.maxedges = Inf,
   MCMC.packagenames = NULL,
+  MCMC.esteq.exclude.statistics = NULL,
   MCMC.runtime.traceplot = FALSE,
   network.output = "network",
   term.options = NULL,
@@ -208,6 +212,9 @@ sample size and the variance for the Geweke diagnostic.}
 statistic functions in addition to those autodetected. This argument should
 not be needed outside of very strange setups.}
 
+\item{MCMC.esteq.exclude.statistics}{character string pattern. statistics with names that contain this string will be excluded from
+the estimating equations. For example, this supports tapering models which may have terms with non-specific means.}
+
 \item{MCMC.runtime.traceplot}{Logical: If \code{TRUE}, plot traceplots of the MCMC
 sample.}
 
@@ -254,7 +261,7 @@ function.
 While the others supply a full set of simulation
 settings, \code{control.simulate.ergm} when passed as a control
 parameter to \code{\link[=simulate.ergm]{simulate.ergm()}} allows some settings to be
-inherited from the ERGM stimation while overriding others.
+inherited from the ERGM estimation while overriding others.
 }
 \details{
 This function is only used within a call to the \code{\link{simulate}}
diff --git a/man/ergm-parallel.Rd b/man/ergm-parallel.Rd
index cbaef6d46..03f758bd0 100644
--- a/man/ergm-parallel.Rd
+++ b/man/ergm-parallel.Rd
@@ -114,7 +114,7 @@ sampling by evaluating qualified terms' change statistics in
 multiple threads run in parallel. This is done using the
 \href{https://www.openmp.org/}{OpenMP} API.
 
-However, this introduces a nontrivial amont of computational
+However, this introduces a nontrivial amount of computational
 overhead. See below for a list of the major factors affecting
 whether it is worthwhile.}}
 
diff --git a/man/ergm.estfun.Rd b/man/ergm.estfun.Rd
index 5ed1df139..6302603ab 100644
--- a/man/ergm.estfun.Rd
+++ b/man/ergm.estfun.Rd
@@ -8,15 +8,15 @@
 \alias{ergm.estfun.mcmc.list}
 \title{Compute the Sample Estimating Function Values of an ERGM.}
 \usage{
-ergm.estfun(stats, theta, model, ...)
+ergm.estfun(stats, theta, model, exclude = NULL, ...)
 
-\method{ergm.estfun}{numeric}(stats, theta, model, ...)
+\method{ergm.estfun}{numeric}(stats, theta, model, exclude = NULL, ...)
 
-\method{ergm.estfun}{matrix}(stats, theta, model, ...)
+\method{ergm.estfun}{matrix}(stats, theta, model, exclude = NULL, ...)
 
-\method{ergm.estfun}{mcmc}(stats, theta, model, ...)
+\method{ergm.estfun}{mcmc}(stats, theta, model, exclude = NULL, ...)
 
-\method{ergm.estfun}{mcmc.list}(stats, theta, model, ...)
+\method{ergm.estfun}{mcmc.list}(stats, theta, model, exclude = NULL, ...)
 }
 \arguments{
 \item{stats}{An object representing sample statistics with observed values subtracted out.}