Skip to content

Commit

Permalink
Merge branch 'main' of github.com:openproblems-bio/task_grn_inference
Browse files Browse the repository at this point in the history
  • Loading branch information
janursa committed Aug 3, 2024
2 parents e6d3d1d + 72aa8b1 commit 25cabcc
Showing 1 changed file with 241 additions and 0 deletions.
241 changes: 241 additions & 0 deletions src/methods/granie/script.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@

set.seed(42)

library(Seurat)
library(Signac)
library(Matrix)
library(GRaNIEverse)
library(GRaNIE)
library(qs)
library(BSgenome.Hsapiens.UCSC.hg38)
library(EnsDb.Hsapiens.v86)
library(EnsDb.Mmusculus.v79)
library(BSgenome.Mmusculus.UCSC.mm39)
library(dplyr)

## VIASH START
par <- list(
file_rna = "resources_test/grn-benchmark/multiomics_r/rna.rds",
files_atac = "resources_test/grn-benchmark/multiomics_r/atac.rds",
temp_dir = "output/granie/",
preprocessing_clusteringMethod = 1, # Seurat::FindClusters: (1 = original Louvain algorithm, 2 = Louvain algorithm with multilevel refinement, 3 = SLM algorithm, 4 = Leiden algorithm)
preprocessing_clusterResolution = 14, # Typically between 5 and 20
preprocessing_SCT_nDimensions = 50, # Default 50
genomeAssembly = "hg38",
GRaNIE_corMethod = "spearman",
GRaNIE_includeSexChr = TRUE,
GRaNIE_promoterRange = 250000,
GRaNIE_TF_peak.fdr.threshold = 0.2,
GRaNIE_peak_gene.fdr.threshold = 0.2,
GRaNIE_nCores = 4,
peak_gene = "output/granie/peak_gene.csv", # not yet implemented, should I?
prediction= "output/granie/prediction.csv",
useWeightingLinks = FALSE,
forceRerun = FALSE
)

print(par)
# meta <- list(
# functionality_name = "my_method_r"
# )
## VIASH END

#### STANDARD ASSIGNMENTS ###
file_seurat = "seurat_granie.qs"
outputDir = par$temp_dir

if (!dir.exists(outputDir)) {
dir.create(outputDir, recursive = TRUE)
}

setwd(outputDir)


#########################
# Downloading resources #
#########################
file_hocomoco_v12 = "https://s3.embl.de/zaugg-web/GRaNIE/TFBS/hg38/PWMScan_HOCOMOCOv12_H12INVIVO.tar.gz"

destfile <- "PWMScan_HOCOMOCOv12_H12INVIVO.tar.gz"

if (!file.exists(destfile)) {

options(timeout = 1200)
download.file(file_hocomoco_v12, destfile)
}

# Define the directory to extract the files to
exdir <- "PWMScan_HOCOMOCOv12_H12INVIVO"

GRaNIE_TFBSFolder = paste0(exdir, "/PWMScan_HOCOMOCOv12/H12INVIVO")

if (!file.exists(GRaNIE_TFBSFolder)) {
untar(destfile, exdir = exdir)
}

if (par$genomeAssembly == "hg38"){
file_RNA_URL = "https://s3.embl.de/zaugg-web/GRaNIEverse/features_RNA_hg38.tsv.gz"

} else if (par$genomeAssembly == "mm10") {
file_RNA_URL = "https://s3.embl.de/zaugg-web/GRaNIEverse/features_RNA_mm10.tsv.gz"
}

file_RNA <- paste0("features_RNA_", par$genomeAssembly, ".tsv.gz")
if (!file.exists(file_RNA)) {
options(timeout = 1200)
download.file(file_RNA_URL, file_RNA)
}


###################
# Preprocess data #
###################

if (par.l$forceRerun | !file.exists(file_seurat)) {

# Sparse matrix
rna.m = readRDS(par$file_rna)

seurat_object <- CreateSeuratObject(count = rna.m, project = "PBMC", min.cells = 1, min.features = 1, assay = "RNA")

# RangedSummarizedExperiment
atac = readRDS(par$file_atac)

# Extract counts and metadata from the RangedSummarizedExperiment
atac_counts <- assays(atac)$counts

rownames(atac_counts) = paste0(seqnames(rowRanges(atac)) %>% as.character(), ":", start(rowRanges(atac)), "-", end(rowRanges(atac)))

# Create a ChromatinAssay
chrom_assay <- CreateChromatinAssay(
counts = atac_counts,
sep = c(":", "-"),
genome = 'hg38',
fragments = NULL,
min.cells = 1,
min.features = 1,
colData = DataFrame(colData(atac))
)

if (par$genomeAssembly == "hg38"){
annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86)

} else if (par$genomeAssembly == "mm10") {
annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Mmusculus.v79)
}

seqlevelsStyle(annotations) <- "UCSC"
genome(annotations) <- par$genomeAssembly
Annotation(chrom_assay) <- annotations

# Unify cells
# Identify the common cells between the RNA and ATAC assays
common_cells <- intersect(colnames(seurat_object[["RNA"]]), colnames(chrom_assay))

# Subset the Seurat object to include only the common cells
chrom_assay <- subset(chrom_assay, cells = common_cells)

seurat_object[["peaks"]] = chrom_assay

qs::qsave(seurat_object, "seurat_granie.qs")

} else {

seurat_object = qs::qread(file_seurat)

}

output_seuratProcessed = paste0(outputDir, "/seuratObject.qs")
if (!file.exists(output_seuratProcessed)) {
prepareData = TRUE
} else {
prepareData = FALSE
}


###################
# Preprocess data #
###################

# Take output from preprocessing steps
GRaNIE_file_peaks = paste0(outputDir, "/atac.pseudobulkFromClusters_res", par$preprocessing_clusterResolution, "_mean.tsv.gz")
GRaNIE_file_rna = paste0(outputDir, "/rna.pseudobulkFromClusters_res", par$preprocessing_clusterResolution, "_mean.tsv.gz")
GRaNIE_file_metadata = paste0(outputDir, "/metadata_res", par$preprocessing_clusterResolution, "_mean.tsv.gz")

if (file.exists(GRaNIE_file_peaks) & file.exists(GRaNIE_file_metadata) & file.exists(GRaNIE_file_rna) & !par.l$forceRerun) {

cat("Preprocessing skipped because all files alreadx exist anf forceRerun = FALSE.")

} else {

seurat_object = prepareSeuratData_GRaNIE(seurat_object,
outputDir = par$outputDir,
file_RNA_features = file_RNA,
assayName_RNA_raw = "RNA", assayName_ATAC = "peaks",
prepareData = prepareData,
SCT_nDimensions = par$preprocessing_SCT_nDimensions,
dimensionsToIgnore_LSI_ATAC = 1,
pseudobulk_source = "cluster",
countAggregation = "mean",
clusteringAlgorithm = par$preprocessing_clusteringMethod,
clusterResolutions = par$preprocessing_clusterResolution,
saveSeuratObject = TRUE)

}



##############
# Run GRaNIE #
##############

GRN = runGRaNIE(
dir_output = par$temp_dir,
datasetName = "undescribed",
GRaNIE_file_peaks,
GRaNIE_file_rna,
GRaNIE_file_metadata,
TFBS_source = "custom",
TFBS_folder = GRaNIE_TFBSFolder,
genomeAssembly = par$genomeAssembly,
normalization_peaks = "none",
idColumn_peaks = "peakID",
normalization_rna = "none",
idColumn_RNA = "ENSEMBL",
includeSexChr = par$GRaNIE_includeSexChr,
minCV = 0,
minNormalizedMean_peaks = NULL,
minNormalizedMean_RNA = NULL,
minSizePeaks = 5,
corMethod = par$GRaNIE_corMethod,
promoterRange = par$GRaNIE_promoterRange,
useGCCorrection = FALSE,
TF_peak.fdr.threshold = par$GRaNIE_TF_peak.fdr.threshold,
peak_gene.fdr.threshold = par$GRaNIE_peak_gene.fdr.threshold,
runTFClassification = FALSE,
runNetworkAnalyses = FALSE,
nCores = par$GRaNIE_nCores,
forceRerun = TRUE
)

# Post-process GRN
connections.df = getGRNConnections(GRN,
include_TF_gene_correlations = TRUE,
include_peakMetadata = TRUE,
include_TFMetadata = TRUE,
include_geneMetadata = TRUE)

final.df = connections.df %>%
dplyr::select(TF.name, gene.name, TF_gene.r) %>%
dplyr::rename(source = TF.name, target = gene.name)

if (par$useWeightingLinks) {
final.df = dplyr::mutate(final.df, weight = abs(.data$TF_gene.r))
} else {
final.df = dplyr::mutate(final.df, weight = 1)
}

final.df %>%
dplyr::select(source, target, weight) %>%
readr::write_csv(par$prediction)

0 comments on commit 25cabcc

Please sign in to comment.