Skip to content

Commit

Permalink
Update readme and add some examples with benchdamic
Browse files Browse the repository at this point in the history
  • Loading branch information
sdgamboa committed Sep 20, 2024
1 parent 19b473f commit 77d57d4
Show file tree
Hide file tree
Showing 3 changed files with 157 additions and 0 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,8 @@ BiocManager::install("waldronlab/MicrobiomeBenchmarkDataAnalyses")
+ R package issues: https://github.com/waldronlab/MicrobiomeBenchmarkData/issues
+ Scripts for preparing the datasets: https://github.com/waldronlab/MicrobiomeBenchmarkDataPrep
+ Code for reproducibility of the analyses: https://waldronlab.io/MicrobiomeBenchmarkDataAnalyses


## TODO

+ Create a custom function for plotting enrichment and positives.
74 changes: 74 additions & 0 deletions inst/scripts/example.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
library(benchdamic)
library(ggpubr)
data("ps_plaque_16S")
data("microbial_metabolism")

# Extract genera from the phyloseq tax_table slot
genera <- phyloseq::tax_table(ps_plaque_16S)[, "GENUS"]
# Genera as rownames of microbial_metabolism data.frame
rownames(microbial_metabolism) <- microbial_metabolism$Genus
# Match OTUs to their metabolism
priorInfo <- data.frame(genera,
"Type" = microbial_metabolism[genera, "Type"])
# Unmatched genera becomes "Unknown"
unknown_metabolism <- is.na(priorInfo$Type)
priorInfo[unknown_metabolism, "Type"] <- "Unknown"
priorInfo$Type <- factor(priorInfo$Type)
# Add a more informative names column
priorInfo[, "newNames"] <- paste0(rownames(priorInfo), priorInfo[, "GENUS"])

# Add some normalization/scaling factors to the phyloseq object
my_norm <- setNormalizations(fun = c("norm_edgeR", "norm_CSS"),
method = c("TMM", "CSS"))
ps_plaque_16S <- runNormalizations(normalization_list = my_norm,
object = ps_plaque_16S)

# Initialize some limma based methods
my_limma <- set_limma(design = ~ 1 + RSID + HMP_BODY_SUBSITE,
coef = "HMP_BODY_SUBSITESupragingival Plaque",
norm = c("TMM", "CSS"))

# Make sure the subject ID variable is a factor
phyloseq::sample_data(ps_plaque_16S)[, "RSID"] <- as.factor(
phyloseq::sample_data(ps_plaque_16S)[["RSID"]])

# Perform DA analysis
Plaque_16S_DA <- runDA(method_list = my_limma, object = ps_plaque_16S)



## Fitlering by adjusted P-value of 0.1 and 0 logFC
enrichment1 <- createEnrichment(
object = Plaque_16S_DA,
priorKnowledge = priorInfo, enrichmentCol = "Type", namesCol = "GENUS",
slot = "pValMat", colName = "adjP", type = "pvalue", direction = "logFC",
threshold_pvalue = 0.1, threshold_logfc = 0, top = NULL, verbose = TRUE
)
p1 <- plotEnrichment(enrichment1, enrichmentCol = "Type")

## Filtering by raw P-value of 0.1 and 0 logFC
enrichment2 <- createEnrichment(
object = Plaque_16S_DA,
priorKnowledge = priorInfo, enrichmentCol = "Type", namesCol = "GENUS",
slot = "pValMat", colName = "rawP", type = "pvalue", direction = "logFC",
threshold_pvalue = 0.1, threshold_logfc = 0, top = NULL, verbose = TRUE
)
p2 <- plotEnrichment(enrichment2, enrichmentCol = "Type")


## Filtering by threshold of p-value of 0? (ignored?) and 0 logFC
enrichment3 <- createEnrichment(
object = Plaque_16S_DA,
priorKnowledge = priorInfo, enrichmentCol = "Type", namesCol = "GENUS",
slot = "statInfo", colName = "logFC", type = "logfc", direction = "logFC",
threshold_pvalue = 0, threshold_logfc = 0, top = NULL, verbose = TRUE
)
p3 <- plotEnrichment(enrichment3, enrichmentCol = "Type")

pL <- list(p1, p2, p3)
ggarrange(plotlist = pL, nrow = 1)





78 changes: 78 additions & 0 deletions inst/scripts/example2.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
library(benchdamic)
library(ggpubr)
data("ps_plaque_16S")
data("microbial_metabolism")

# Extract genera from the phyloseq tax_table slot
genera <- phyloseq::tax_table(ps_plaque_16S)[, "GENUS"]
# Genera as rownames of microbial_metabolism data.frame
rownames(microbial_metabolism) <- microbial_metabolism$Genus
# Match OTUs to their metabolism
priorInfo <- data.frame(genera,
"Type" = microbial_metabolism[genera, "Type"])
# Unmatched genera becomes "Unknown"
unknown_metabolism <- is.na(priorInfo$Type)
priorInfo[unknown_metabolism, "Type"] <- "Unknown"
priorInfo$Type <- factor(priorInfo$Type)
# Add a more informative names column
priorInfo[, "newNames"] <- paste0(rownames(priorInfo), priorInfo[, "GENUS"])

# Add some normalization/scaling factors to the phyloseq object
my_norm <- setNormalizations(fun = c("norm_edgeR", "norm_CSS"),
method = c("TMM", "CSS"))
ps_plaque_16S <- runNormalizations(normalization_list = my_norm,
object = ps_plaque_16S)
# Initialize some limma based methods
my_limma <- set_limma(design = ~ 1 + RSID + HMP_BODY_SUBSITE,
coef = "HMP_BODY_SUBSITESupragingival Plaque",
norm = c("TMM", "CSS"))

# Make sure the subject ID variable is a factor
phyloseq::sample_data(ps_plaque_16S)[, "RSID"] <- as.factor(
phyloseq::sample_data(ps_plaque_16S)[["RSID"]])

# Perform DA analysis
Plaque_16S_DA <- runDA(method_list = my_limma, object = ps_plaque_16S)

# Count TPs and FPs, from the top 1 to the top 20 features.
# As direction is supplied, features are ordered by "logFC" absolute values.


## is ranking based on raw pvalue?
positives1 <- createPositives(
object = Plaque_16S_DA,
priorKnowledge = priorInfo, enrichmentCol = "Type",
namesCol = "newNames",
slot = "pValMat", colName = "rawP",
type = "pvalue", direction = "logFC",
threshold_pvalue = 1,
threshold_logfc = 0,
top = 1:20, alternative = "greater",
verbose = FALSE,
TP = list(c("DOWN Abundant", "Anaerobic"), c("UP Abundant", "Aerobic")),
FP = list(c("DOWN Abundant", "Aerobic"), c("UP Abundant", "Anaerobic"))
)
p1 <- plotPositives(positives = positives1)


## Is ranking based on logFC?
positives2 <- createPositives(
object = Plaque_16S_DA,
priorKnowledge = priorInfo, enrichmentCol = "Type",
namesCol = "newNames",
slot = "statInfo", colName = "logFC",
type = "logfc", direction = "logFC",
threshold_pvalue = 1,
threshold_logfc = 0,
top = 1:20, alternative = "greater",
verbose = FALSE,
TP = list(c("DOWN Abundant", "Anaerobic"), c("UP Abundant", "Aerobic")),
FP = list(c("DOWN Abundant", "Aerobic"), c("UP Abundant", "Anaerobic"))
)
p2 <- plotPositives(positives = positives2)


pL <- list(p1, p2)
ggarrange(plotlist = pL, nrow = 1)

sessioninfo::session_info()

0 comments on commit 77d57d4

Please sign in to comment.