Updated vignettes.

SydneyBioX · Jul 27, 2024 · af7438e · af7438e
1 parent dc17141
commit af7438e
Show file tree

Hide file tree

Showing 4 changed files with 52 additions and 32 deletions.
diff --git a/man/figures/Fig1_schematic.png b/man/figures/Fig1_schematic.png
diff --git a/man/figures/clustSIGNAL_hex.png b/man/figures/clustSIGNAL_hex.png
diff --git a/vignettes/seqFISH_mouseEmbryo.Rmd → vignettes/1_seqFISH_mouseEmbryo.Rmd b/vignettes/seqFISH_mouseEmbryo.Rmd → vignettes/1_seqFISH_mouseEmbryo.Rmd
@@ -1,9 +1,15 @@
 ---
 title: "Single sample analysis"
+author:
+  - Pratibha Panwar
 date: "`r Sys.Date()`"
-output: rmarkdown::html_vignette
-vignette: >
-  %\VignetteIndexEntry{Getting started: clustSIGNAL}
+output:
+  BiocStyle::html_document:
+    toc_float: true
+  BiocStyle::pdf_document: default
+package: clustSIGNAL
+vignette: |
+  %\VignetteIndexEntry{clustSIGNAL: Single sample analysis}
   %\VignetteEngine{knitr::knitr}
   %\VignetteEncoding{UTF-8}
 ---
@@ -41,14 +47,13 @@ names(colData(spe))
 
 To run clustSIGNAL, we need the column names of sample and cell labels in the colData dataframe of the spatial experiment object. Here, the cell labels are in the column 'uniqueID' and sample labels are in 'embryo' column. 
 
-# Run clustSIGNAL
+# Running clustSIGNAL
 
 ```{r}
 set.seed(100)
 samples = "embryo"
 cells = "uniqueID"
-dimRed = "PCA"
-res_emb = clustSIGNAL(spe, samples, cells, dimRed, cluster.fun = "leiden", outputs = "a")
+res_emb = clustSIGNAL(spe, samples, cells, outputs = "a")
 ```
 
 The output variable is a list that can contain dataframe of cluster names, matrix of NN neighbours of each cell, final spe object, or a combination of these, depending on the choice of 'outputs' selected.
@@ -66,7 +71,7 @@ spe = res_emb$spe_final
 spe
 ```
 
-# Calculate clustering metrics
+# Calculating clustering metrics
 
 ```{r}
 # calculating silhouette width
@@ -87,7 +92,7 @@ as.data.frame(colData(spe)) %>%
             mean_Entropy = mean(entropy))
 ```
 
-# Visualise clustSIGNAL outputs
+# Visualising clustSIGNAL outputs
 
 ```{r}
 colors = c("#635547", "#8EC792", "#9e6762", "#FACB12", "#3F84AA", "#0F4A9C", 
@@ -100,14 +105,14 @@ colors = c("#635547", "#8EC792", "#9e6762", "#FACB12", "#3F84AA", "#0F4A9C",
            "#DA5921", "#E1C239", "#9DD84A")
 ```
 
-The entropy measures can be very useful in assessing the tissue composition of samples - homogeneous (low entropy) with domain-like regions, or heterogeneous (high entropy) with uniform distribution of cells.
+## Entropy spread and distribution
 
 ```{r}
 # Histogram of entropy spread
 hst_ent <- as.data.frame(colData(spe)) %>%
   ggplot(aes(entropy)) +
   geom_histogram(binwidth = 0.05) +
-  ggtitle("Entropy spread of regions") +
+  ggtitle("A") +
   labs(x = "Entropy", y = "Number of regions") +
   theme_grey() +
   theme(text = element_text(size = 12))
@@ -120,15 +125,17 @@ spt_ent <- as.data.frame(colData(spe)) %>%
              aes(colour = entropy)) +
   scale_colour_gradient2("Entropy", low = "grey", high = "blue") +
   scale_size_continuous(range = c(0, max(spe$entropy))) +
-  ggtitle("Spatial distribution of region entropy") +
+  ggtitle("B") +
   labs(x = "x-coordinate", y = "y-coordinate") +
   theme_classic() +
   theme(text = element_text(size = 12))
 
 hst_ent + spt_ent
 ```
 
-We can also visualize the spatial plot and entropy distribution of the clusters, which provide spatial context of the cells and their neighbourhoods, as well as the compositions of the neighbouhoods.
+The spread (A) and spatial distribution (B) of region entropy measures can be very useful in assessing the tissue composition of samples - low entropy regions are more homogeneous with domain-like structure, whereas high entropy regions are heterogeneous with more uniform distribution of cells.
+
+## clustSIGNAL clusters visualisation
 
 ```{r}
 df_ent = as.data.frame(colData(spe))
@@ -139,7 +146,7 @@ spt_clust <- df_ent %>%
              y = -spatialCoords(spe)[, 2])) +
   geom_point(size = 0.5, aes(colour = reCluster)) +
   scale_color_manual(values = colors) +
-  ggtitle("Spatial distribution of clusters") +
+  ggtitle("A") +
   labs(x = "x-coordinate", y = "y-coordinate") +
   guides(color = guide_legend(title = "Clusters", 
                               override.aes = list(size = 3))) +
@@ -162,12 +169,14 @@ box_clust <- df_ent %>%
   ggplot(aes(x = reCluster, y = entropy, fill = reCluster)) +
   geom_boxplot() +
   scale_fill_manual(values = colors_ent) +
-  ggtitle("Entropy ditribution of clusters") +
+  ggtitle("B") +
   labs(x = "clustSIGNAL clusters", y = "Entropy") +
   theme_classic() +
   theme(legend.position = "none",
-        text = element_text(size = 15),
-        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
+        text = element_text(size = 12),
+        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
 
 spt_clust + box_clust + patchwork::plot_layout(guides = "collect", widths = c(1, 2))
 ```
+
+The spatial location (A) and entropy distribution (B) of the clusters provide spatial context of the cluster cells and their neighbourhoods, as well as the compositions of the neighbouhoods.
diff --git a/vignettes/MERFISH_mouseHypothalamus.Rmd → vignettes/2_MERFISH_mouseHypothalamus.Rmd b/vignettes/MERFISH_mouseHypothalamus.Rmd → vignettes/2_MERFISH_mouseHypothalamus.Rmd
@@ -1,9 +1,15 @@
 ---
 title: "Multisample analysis"
+author:
+  - Pratibha Panwar
 date: "`r Sys.Date()`"
-output: rmarkdown::html_vignette
-vignette: >
-  %\VignetteIndexEntry{Getting started: clustSIGNAL}
+output:
+  BiocStyle::html_document:
+    toc_float: true
+  BiocStyle::pdf_document: default
+package: clustSIGNAL
+vignette: |
+  %\VignetteIndexEntry{clustSIGNAL: Multisample analysis}
   %\VignetteEngine{knitr::knitr}
   %\VignetteEncoding{UTF-8}
 ---
@@ -41,22 +47,21 @@ names(colData(spe2))
 
 To run clustSIGNAL, we need the column names of sample and cell IDs in the colData dataframe of the spatial experiment object. Here, the cell IDs are in the column 'Cell_ID' and sample IDs are in 'samples' column.
 
-# Run clustSIGNAL
+# Running clustSIGNAL
 
 ```{r}
 set.seed(101)
 samples = "samples"
 cells = "Cell_ID"
-dimRed = "PCA"
-res_hyp = clustSIGNAL(spe2, samples, cells, dimRed, outputs = "a")
+res_hyp = clustSIGNAL(spe2, samples, cells, outputs = "a")
 ```
 
 ```{r}
 spe2 = res_hyp$spe_final
 spe2
 ```
 
-# Calculate clustering metrics
+# Calculating clustering metrics
 
 ```{r}
 samplesList <- levels(spe2[[samples]])
@@ -89,7 +94,7 @@ as.data.frame(colData(spe2)) %>%
             mean_Entropy = mean(entropy))
 ```
 
-# Visualise clustSIGNAL outputs
+# Visualising clustSIGNAL outputs
 
 ```{r}
 colors = c("#635547", "#8EC792", "#9e6762", "#FACB12", "#3F84AA", "#0F4A9C", 
@@ -102,14 +107,14 @@ colors = c("#635547", "#8EC792", "#9e6762", "#FACB12", "#3F84AA", "#0F4A9C",
            "#DA5921", "#E1C239", "#9DD84A")
 ```
 
-The entropy measures can be very useful in assessing the tissue composition of samples - homogeneous (low entropy) with domain-like regions, or heterogeneous (high entropy) with uniform distribution of cells.
+## Entropy spread and distribution
 
 ```{r}
 # Histogram of entropy spread
 hst_ent <- as.data.frame(colData(spe2)) %>%
   ggplot(aes(entropy)) +
   geom_histogram(binwidth = 0.05) +
-  ggtitle("Entropy spread of regions") +
+  ggtitle("A") +
   facet_wrap(vars(samples), nrow = 1) +
   labs(x = "Entropy", y = "Number of regions") +
   theme_grey() +
@@ -124,15 +129,18 @@ spt_ent <- as.data.frame(colData(spe2)) %>%
   scale_colour_gradient2("Entropy", low = "grey", high = "blue") +
   scale_size_continuous(range = c(0, max(spe2$entropy))) +
   facet_wrap(vars(samples), scales = "free", nrow = 1) +
-  ggtitle("Spatial distribution of region entropy") +
+  ggtitle("B") +
   labs(x = "x-coordinate", y = "y-coordinate") +
   theme_classic() +
-  theme(text = element_text(size = 12))
+  theme(text = element_text(size = 12),
+        axis.text.x = element_text(angle = 90, vjust = 0.5))
 
 hst_ent / spt_ent
 ```
 
-We can also visualize the spatial plot and entropy distribution of the clusters, which provide spatial context of the cells and their neighbourhoods, as well as the compositions of the neighbouhoods.
+In multisample analysis, the spread (A) and spatial distribution (B) of region entropy measures can be useful in assessing and comparing the tissue structure in the samples.
+
+## clustSIGNAL clusters visualisation
 
 ```{r}
 df_ent = as.data.frame(colData(spe2))
@@ -148,7 +156,8 @@ spt_clust <- df_ent %>%
   guides(color = guide_legend(title = "Clusters", 
                               override.aes = list(size = 3))) +
   theme_classic() +
-  theme(text = element_text(size = 12))
+  theme(text = element_text(size = 12),
+        axis.text.x = element_text(angle = 90, vjust = 0.5))
 
 box_clust = list()
 for (s in samplesList) {
@@ -176,11 +185,13 @@ for (s in samplesList) {
     theme_classic() +
     theme(legend.position = "none",
           text = element_text(size = 12),
-          axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
+          axis.text.x = element_text(angle = 90, vjust = 0.5))
 }
 
-spt_clust / (patchwork::wrap_plots(box_clust[1:4], nrow = 1) + 
+spt_clust / (patchwork::wrap_plots(box_clust[1:3], nrow = 1) + 
                plot_layout(axes = "collect")) + 
   plot_layout(guides = "collect") +
   plot_annotation(title = "Spatial and entropy distributions of clusters")
 ```
+
+The spatial location (top) and entropy distribution (bottom) of the clusters can be compared in a multisample analysis, providing spatial context of the cluster cells and their neighbourhood compositions in the different samples.