From 453c7f3124e3cf2f3642c1a6e60b3ea51107f17b Mon Sep 17 00:00:00 2001
From: Marie-Laure DELIGNETTE-MULLER <mdeligne>
Date: Mon, 20 Nov 2023 10:35:58 +0100
Subject: [PATCH] Add of selectitems in the package

---
 NAMESPACE                                     |  1 +
 NEWS.md                                       |  2 +
 .../techdoc/functions2add => R}/selectitems.R | 36 ++++-----
 .../functions2add => man}/selectitems.Rd      | 76 +++++++++++--------
 share/todolist.md                             |  3 +-
 5 files changed, 65 insertions(+), 53 deletions(-)
 rename {share/techdoc/functions2add => R}/selectitems.R (60%)
 rename {share/techdoc/functions2add => man}/selectitems.Rd (67%)
diff --git a/NAMESPACE b/NAMESPACE
index 06f2be3d..aaf155e7 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -63,6 +63,7 @@ export(trendplot)
 export(sensitivityplot)
 export(bmdplot)
 export(selectgroups)
+export(selectitems)
 export(PCAdataplot)
 export(formatdata4DRomics)
 
diff --git a/NEWS.md b/NEWS.md
index 57069379..7639dcd1 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -8,6 +8,8 @@ NEW FEATURES
 - Add of xlab and ylab to plots from curvesplot() (signal or scaled signal for y-axis) and change the color lab in "scaled signal" in plots from bmdplotwithgradient() when the signal is scaled.
 - Add the possibility (new argument addBMD of curvesplot()) to add points at BMD-BMR values on curvesplots.
 - Add the Peer Community Journal citation.
+- Add of the function selectitems() proposing filters to retain
+only the items associated to the best estimated BMD values in DRomics workflow output.
 
 BUG FIXES
 
diff --git a/share/techdoc/functions2add/selectitems.R b/R/selectitems.R
similarity index 60%
rename from share/techdoc/functions2add/selectitems.R
rename to R/selectitems.R
index 64ec39d5..d69afcf5 100644
--- a/share/techdoc/functions2add/selectitems.R
+++ b/R/selectitems.R
@@ -1,65 +1,65 @@
-selectitems <- function(extendedres,
+selectitems <- function(res,
                         BMDfilter = c("definedCI", "finiteCI", "definedBMD", "none"),
                         BMDtype = c("zSD", "xfold")
                         )
 {
-  if (missing(extendedres) | !is.data.frame(extendedres))
+  if (missing(res) | !is.data.frame(res))
     stop("The first argument of selectitems must be a dataframe 
     (see ?selectitems for details).")
   
   BMDfilter <- match.arg(BMDfilter, c("definedCI", "finiteCI", "definedBMD", "none"))
   BMDtype <- match.arg(BMDtype, c("zSD", "xfold"))
-  cnames <- colnames(extendedres)
+  cnames <- colnames(res)
   
   # Definition of the filter to apply 
   if ((BMDtype == "zSD") & (BMDfilter != "none"))
   {  
     if (any(!is.element(c("BMD.zSD"), cnames)))
       stop("The first argument of selectitems must be a dataframe
-      containing at least columns named id and BMD.zSD.")
-    BMD <- extendedres$BMD.zSD
+      containing a column named BMD.zSD.")
+    BMD <- res$BMD.zSD
     if ((BMDfilter ==  "definedCI") | (BMDfilter ==  "finiteCI"))
     {
       if (any(!is.element(c("BMD.zSD.upper", "BMD.zSD.lower"), cnames)) )
       stop("To apply a filter on BMD.zSD confidence intervals, the first argument of selectitems 
-      must be a dataframe containing at least columns named id and BMD.zSD, BMD.zSD.lower, BMD.zSD.upper.")
-      BMDupper <- extendedres$BMD.zSD.upper
-      BMDlower <- extendedres$BMD.zSD.lower
+      must be a dataframe containing columns named BMD.zSD, BMD.zSD.lower, BMD.zSD.upper.")
+      BMDupper <- res$BMD.zSD.upper
+      BMDlower <- res$BMD.zSD.lower
     }
   } else #so if (BMDtype == "xfold")
   if ((BMDtype == "xfold") & (BMDfilter != "none"))
   {
     if (any(!is.element(c("BMD.xfold"), cnames)))
       stop("The first argument of selectitems must be a dataframe
-      containing at least columns named id and BMD.xfold.")
-    BMD <- extendedres$BMD.xfold
+      containing a column named BMD.xfold.")
+    BMD <- res$BMD.xfold
     if ((BMDfilter ==  "definedCI") | (BMDfilter ==  "finiteCI"))
     {
       if (any(!is.element(c("BMD.xfold.upper","BMD.xfold.lower"), cnames)))
         stop("To apply a filter on BMD.xfold confidence intervals, the first argument of selectitems 
-      must be a dataframe containing at least columns named id and BMD.xfold, BMD.xfold.lower, BMD.xfold.upper.")
-      BMDupper <- extendedres$BMD.xfold.upper
-      BMDlower <- extendedres$BMD.xfold.lower
+      must be a dataframe containing columns named BMD.xfold, BMD.xfold.lower, BMD.xfold.upper.")
+      BMDupper <- res$BMD.xfold.upper
+      BMDlower <- res$BMD.xfold.lower
     }
   }
   
   # Filtering
   if (BMDfilter == "definedCI")
   {
-    subextendedres <- extendedres[!is.na(BMD) & !is.na(BMDupper) & !is.na(BMDlower), ]
+    subres <- res[!is.na(BMD) & !is.na(BMDupper) & !is.na(BMDlower), ]
   } else
     if (BMDfilter == "finiteCI")
     {
-      subextendedres <- extendedres[is.finite(BMD) & is.finite(BMDupper) & is.finite(BMDlower), ]
+      subres <- res[is.finite(BMD) & is.finite(BMDupper) & is.finite(BMDlower), ]
     } else
       if (BMDfilter == "definedBMD")
       {
-        subextendedres <- extendedres[!is.na(BMD), ]
+        subres <- res[!is.na(BMD), ]
       } else
         if (BMDfilter == "none")
         {
-          subextendedres <- extendedres
+          subres <- res
         }  
-  return(subextendedres)
+  return(subres)
 }
 
diff --git a/share/techdoc/functions2add/selectitems.Rd b/man/selectitems.Rd
similarity index 67%
rename from share/techdoc/functions2add/selectitems.Rd
rename to man/selectitems.Rd
index cacc9d09..beb5522c 100644
--- a/share/techdoc/functions2add/selectitems.Rd
+++ b/man/selectitems.Rd
@@ -1,10 +1,10 @@
 \name{selectitems}
 \alias{selectitems}
-\title{BMD plot optionally with confidence intervals on BMD}
+\title{Selection of items to be retained in DRomics workflow output}
 
 \description{
 Selection of items (e.g. transcripts, metabolites, ...)
-to keep from the output of the DRomics workflow, for further biological annotation and interpretation.
+to be retained in DRomics workflow output for further biological annotation and interpretation.
 }
 
 \usage{
@@ -14,31 +14,31 @@ selectitems(res,
 }
 
 \arguments{
-\item{res}{the dataframe of results provided by 
+\item{res}{The dataframe of results provided by 
  \code{\link{bmdboot}} or \code{\link{bmdcalc}} (\code{res}) 
- or a subset of this data frame (selected lines). 
+ or a subset of this data frame. 
  
  Even if this function is intended to be used just after the calculation
  of BMD values, before the biological annotation, it can also be used
  within the interpretation workflow, on an extended dataframe
  with additional columns coming for example from the biological annotation of items, 
- and with some lines replicated if their corresponding item has more than one annotation. 
+ and with some lines replicated for items with more than one annotation. 
  
  In any case the dataframe
  must at least contain the column giving the BMD values (\code{BMD.zSD} or \code{BMD.xfold}
- depending of chosen BMDtype), identification of each curve (\code{id}),
- and if \code{BMDfilter} is at \code{"CIdefined"} or \code{"CIfinite"},
+ depending on the chosen BMDtype), identification of each curve (\code{id}),
+ and if \code{BMDfilter} is set to \code{"CIdefined"} or \code{"CIfinite"},
  the columns \code{BMD.zSD.lower}, \code{BMD.zSD.upper} or
- \code{BMD.xfold.lower}, \code{BMD.xfold.upper} depending of the argument \code{BMDtype}.}
+ \code{BMD.xfold.lower}, \code{BMD.xfold.upper} depending on the argument \code{BMDtype}.}
 
 \item{BMDfilter}{If not \code{"none"}, the type of filter applied, based on BMD estimation.
 If \code{"definedCI"} (default choice), all items for which point and interval estimates
-of the BMD were successfully calculated are kept 
+of the BMD were successfully calculated are retained 
 (so items for which the bootstrap procedure failed are excluded). 
 If \code{"finiteCI"}, all items for which point and interval estimates of the BMD 
-were successfully calculated and gave values within the range of tested/observed doses.
+were successfully calculated and gave values within the range of tested/observed doses are retained.
 If \code{"definedBMD"}, all items for which the point estimate of the BMD 
-were estimated at a value within the range of tested/observed doses. 
+was estimated at a value within the range of tested/observed doses are retained. 
 }
 
 \item{BMDtype}{The type of BMD used for the previously
@@ -47,25 +47,25 @@ described filtering procedure, \code{"zSD"} (default choice) or \code{"xfold"}.}
 }
 
 \details{
-Using the argument \code{BMDfilter} three filters are proposed to keep, from the modeling 
-DRomics workflow, only the items associated to the best well-estimated BMD values.
-By default we recommend to keep only the items for which the BMD and its
-confidence interval are defined (using \code{"CIdefined"}). 
+Using the argument \code{BMDfilter} three filters are proposed to retain
+only the items associated to the best estimated BMD values.
+By default we recommend to retain only the items for which the BMD and its
+confidence interval are defined (using \code{"CIdefined"}) 
 (so excluding items for which the bootstrap procedure failed).
 One can be even more restrictive by 
-keeping items only if the BMD confidence interval is within the range of
+retaining items only if the BMD confidence interval is within the range of
 tested/observed doses (using \code{"CIfinite"}), or less restrictive 
-(using \code{"BMDIdefined"}) only requiring the BMD
-point estimate to be defined within the range of tested/observed doses 
-(if it is not it is coded as \code{NA} in the results dataframe from 
-\code{\link{bmdboot}} or \code{\link{bmdcalc}}.
+(using \code{"BMDIdefined"}) requiring that the BMD
+point estimate only must be defined within the range of tested/observed doses 
+(let us recall that in the \code{\link{bmdcalc}} output, 
+if it is not the case the BMD is coded \code{NA}).
 
 We propose an option \code{"none"} only in case, in the future, we add
 other filters not based on the BMD.
 }
 
 \value{ 
-   a dataframe corresponding to a subset of res given in input, that 
+   A dataframe corresponding to a subset of res given in input, that 
    can be used for biological annotation and further exploration.
 }
 
@@ -80,7 +80,10 @@ Marie-Laure Delignette-Muller
 
 \examples{
 
-# (1) a toy example (a very small subsample of a microarray data set) 
+# (1) a toy example 
+# on a very small subsample of a microarray data set
+# and a very smal number of bootstrap iterations 
+# (clearly not sufficient, but it is just for illustration)
 #
 datafilename <- system.file("extdata", "transcripto_very_small_sample.txt",
                             package = "DRomics")
@@ -101,35 +104,42 @@ set.seed(1234) # to get reproducible results with a so small number of iteration
 # !!!! but the run will be longer
 
 ### (1.a) Examples on BMD.xfold (with some undefined BMD.xfold values)
-# plot of BMDs with no filtering
+
+# Plot of BMDs with no filtering
 subres <- selectitems(b$res, BMDfilter = "none")
 bmdplot(subres, BMDtype = "xfold", point.size = 3, add.CI = TRUE)
-# keeping all items with defined BMD point estimate
+
+# Plot of items with defined BMD point estimate
 subres <- selectitems(b$res, BMDtype = "xfold", BMDfilter = "definedBMD")
 bmdplot(subres, BMDtype = "xfold", point.size = 3, add.CI = TRUE)
-# keeping all items with defined BMD point estimate and CI bounds
+
+# Plot of items with defined BMD point estimate and CI bounds
 subres <- selectitems(b$res, BMDtype = "xfold", BMDfilter = "definedCI")
 bmdplot(subres, BMDtype = "xfold", point.size = 3, add.CI = TRUE)
-# keeping all items with finite BMD point estimate and CI bounds
+
+# Plot of items with finite BMD point estimate and CI bounds
 subres <- selectitems(b$res, BMDtype = "xfold", BMDfilter = "finiteCI") 
 bmdplot(subres, BMDtype = "xfold", point.size = 3, add.CI = TRUE)
 
+\donttest{
+
 ### (1.b) Examples on BMD.zSD (with no undefined BMD.zSD values)
-# plot of BMDs with no filtering
+
+# Plot of BMDs with no filtering
 subres <- selectitems(b$res, BMDfilter = "none")
 bmdplot(subres, BMDtype = "zSD", point.size = 3, add.CI = TRUE)
-# keeping all items with defined BMD point estimate
+
+# Plot items with defined BMD point estimate (the same on this ex.)
 subres <- selectitems(b$res, BMDtype = "zSD", BMDfilter = "definedBMD")
 bmdplot(subres, BMDtype = "zSD", point.size = 3, add.CI = TRUE)
-# keeping all items with defined BMD point estimate and CI bounds
+
+# Plot of items with defined BMD point estimate and CI bounds
 subres <- selectitems(b$res, BMDtype = "zSD", BMDfilter = "definedCI")
 bmdplot(subres, BMDtype = "zSD", point.size = 3, add.CI = TRUE)
-# keeping all items with finite BMD point estimate and CI bounds
+
+# Plot of items with finite BMD point estimate and CI bounds
 subres <- selectitems(b$res, BMDtype = "zSD", BMDfilter = "finiteCI") 
 bmdplot(subres, BMDtype = "zSD", point.size = 3, add.CI = TRUE)
-
-
-\donttest{
 }
 
 }
diff --git a/share/todolist.md b/share/todolist.md
index 5ba8858f..a4036f62 100644
--- a/share/todolist.md
+++ b/share/todolist.md
@@ -17,7 +17,6 @@ of this modification at the launch of the package ? Ajouter un encart en haut de
 1. [X] Add an explanation of minBMD in the vignette, in ?bmdcalcl and in the step 4 of Shiny app. 1 (ML)
 1. [X] Gérer le souci des décimales dans le sensitivityplot (taille de points) : faire un meilleur choix des valeurs à afficher (sur ech log si effectifs très diff) (ML)
 1. [X] Dans les curvesplot, en option, ajouter un point là où la BMD est atteinte (ML - still to add an example in the vignette)
-1. [ ] Ajouter des colonnes à la sortie de DRomics via bmdboot (defined.BMD.zSD, finite.CI.BMD.zSD, ...) (ML) Ellis et Sophie y sont favorables mais je diffère car ça m'embête de les ajouter et de ne pas les prendre en compte dans selectgroups(), alors qu'en pratique il me paraît nécessaire de faire a minima une sélection sur BMDdefined. A REDISCUTER !!!!!!!!!!!!!!!!
 1. [X] Ajouter la publi PCI partout quand elle sera sortie (ML et A)
 1. [X] Mettre un bouton d'aide i à côté du keep all experimental levels dans shiny (appli DRomicsInterpreter, step 2, helplabel2step2 dans global.R) (ML, A)
 1. [ ] Faire en sorte que l'on puisse appliquer plot(f, items = "unseul", BMDoutput = bootstrapfaitjusteaveccetitem) (ML)
@@ -28,7 +27,7 @@ of this modification at the launch of the package ? Ajouter un encart en haut de
 1. [ ] Ajouter des arguments line.alpha et line.size et point.alpha à sensitivityplot(ML)
 1. [ ] Ajouter des options "boxplot" dans BMDsummary de sensitivityplot(A)
 1. [ ] dans les applis shiny et la vignette enlever les fonds gris avec un +theme_bw() quand le theme n'est pas défini dans la fonction (ML puis A)
-1. [ ] Ajouter une fonction selectitems permettant de filtrer les sorties de DRomics notamment sur la base des résultats du bootstrap (par défaut on ne garde 
+1. [X] Ajouter une fonction selectitems permettant de filtrer les sorties de DRomics notamment sur la base des résultats du bootstrap (par défaut on ne garde 
 que les items avec BMD et IC de la BMD définis, pour la BMD-zSD par défaut) - penser à faire de la prog défensive si les utilisateurs ne mettent pas le bootstrap, filtre possible sur autre chose.... (ML)
 1. [ ] revoir l'argument remove.infinite de plot.bmdboot, sa valeur par défaut et son fonctionnement, pour que ce soit harmonisé avec les valeurs par défaut 
 de selectitems (pb avec xfold, vérifier que ça marche bien) ou enlever cette fonctionnalité et considérer que c'est fait avec selectitems et quand ce n'est aps fait gérer graphiquement les infinis