Skip to content

Commit

Permalink
figr image is created
Browse files Browse the repository at this point in the history
  • Loading branch information
matin authored and matin committed Jul 19, 2024
1 parent 57a8fea commit e7915ca
Show file tree
Hide file tree
Showing 10 changed files with 420 additions and 91 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
.DS_Store
resources/
output/
bin*
Expand Down
39 changes: 39 additions & 0 deletions dockerfiles/figr/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Use the base image
FROM ghcr.io/openproblems-bio/base_r:1.0.2

# Install required dependencies for the R packages
RUN apt-get update && apt-get install -y \
libcurl4-openssl-dev \
libxml2-dev \
libssl-dev \
&& rm -rf /var/lib/apt/lists/*

# Install R packages
RUN R -e "install.packages(c('dplyr', 'FNN', 'chromVAR', 'doParallel', 'BuenColors', 'FigR', 'BSgenome.Hsapiens.UCSC.hg38'),
repos='http://cran.rstudio.com/')"

# Set the working directory
WORKDIR /workspace

# Default command
CMD ["R"]
# Use the base image
FROM ghcr.io/openproblems-bio/base_r:1.0.2

# Install required dependencies for the R packages
RUN apt-get update && apt-get install -y \
libcurl4-openssl-dev \
libxml2-dev \
libssl-dev \
&& rm -rf /var/lib/apt/lists/*

# Install R packages
RUN R -e "install.packages(c('dplyr', 'FNN', 'chromVAR', 'doParallel', 'BuenColors', 'FigR', 'BSgenome.Hsapiens.UCSC.hg38'),
repos='http://cran.rstudio.com/')"

# Set the working directory
WORKDIR /workspace

# Default command
CMD ["R"]

30 changes: 30 additions & 0 deletions scripts/run_grn_inference.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/bash

RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)"
resources_dir="s3://openproblems-data/resources/perturbation_prediction/datasets/"
publish_dir="s3://openproblems-data/resources/perturbation_prediction/results/${RUN_ID}"

cat > /tmp/params.yaml << HERE
param_list:
- id: neurips-2023-data
de_train_h5ad: "$resources_dir/neurips-2023-data/de_train.h5ad"
de_test_h5ad: "$resources_dir/neurips-2023-data/de_test.h5ad"
id_map: "$resources_dir/neurips-2023-data/id_map.csv"
layer: clipped_sign_log10_pval
# - id: neurips-2023-kaggle
# de_train_h5ad: "$resources_dir/neurips-2023-kaggle/de_train.h5ad"
# de_test_h5ad: "$resources_dir/neurips-2023-kaggle/de_test.h5ad"
# id_map: "$resources_dir/neurips-2023-kaggle/id_map.csv"
# layer: sign_log10_pval
output_state: "state.yaml"
publish_dir: "$publish_dir"
HERE

tw launch openproblems-bio/task_perturbation_prediction \
--revision build/main \
--pull-latest \
--main-script target/nextflow/workflows/run_benchmark/main.nf \
--workspace 53907369739130 \
--compute-env 6TeIFgV5OY4pJCk8I0bfOh \
--params-file /tmp/params.yaml \
--config src/common/nextflow_helpers/labels_tw.config
89 changes: 89 additions & 0 deletions src/common/nextflow_helpers/labels_tw.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
process {
executor = 'awsbatch'

// Default disk space
disk = 50.GB

// Retry for exit codes that have something to do with memory issues
errorStrategy = { task.attempt < 3 && task.exitStatus in (137) ? 'retry' : 'ignore' }
maxRetries = 3
maxMemory = null

// Resource labels
withLabel: lowcpu { cpus = 5 }
withLabel: midcpu { cpus = 15 }
withLabel: highcpu { cpus = 30 }
withLabel: lowmem {
memory = { get_memory( 20.GB * task.attempt ) }
disk = { 50.GB * task.attempt }
}
withLabel: midmem {
memory = { get_memory( 50.GB * task.attempt ) }
disk = { 100.GB * task.attempt }
}
withLabel: highmem {
memory = { get_memory( 100.GB * task.attempt ) }
disk = { 200.GB * task.attempt }
}
withLabel: veryhighmem {
memory = { get_memory( 200.GB * task.attempt ) }
disk = { 400.GB * task.attempt }
}
withLabel: lowsharedmem {
containerOptions = { "--shm-size ${String.format("%.0f",task.memory.mega * 0.05)}" }
}
withLabel: midsharedmem {
containerOptions = { "--shm-size ${String.format("%.0f",task.memory.mega * 0.1)}" }
}
withLabel: highsharedmem {
containerOptions = { "--shm-size ${String.format("%.0f",task.memory.mega * 0.25)}" }
}
withLabel: gpu {
memory = 100.GB
cpus = 20
accelerator = 1
}
withLabel: midgpu { // aiming for g4dn.12xlarge
memory = 150.GB
cpus = 40
accelerator = 4
}
withLabel: highgpu { // aiming for g4dn.metal
memory = 300.GB
cpus = 80
accelerator = 8
}
// make sure publishstates gets enough disk space and memory
withName:'.*publishStatesProc' {
memory = '16GB'
disk = '100GB'
}
}

def get_memory(to_compare) {
if (!process.containsKey("maxMemory") || !process.maxMemory) {
return to_compare
}

try {
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
return process.maxMemory
}
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
return max_memory as nextflow.util.MemoryUnit
}
else {
return to_compare
}
} catch (all) {
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
System.exit(1)
}
}

// set tracing file
trace {
enabled = true
overwrite = true
file = "${params.publish_dir}/trace.txt"
}
38 changes: 0 additions & 38 deletions src/methods/figr/Singularity.def

This file was deleted.

41 changes: 0 additions & 41 deletions src/methods/figr/_script.py

This file was deleted.

12 changes: 8 additions & 4 deletions src/methods/figr/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ functionality:
- name: dummpy
type: string
required: false
required: true
- name: --multiomics_atac
type: file
example: resources/grn-benchmark/multiomics_r/atac.rds
Expand All @@ -31,6 +32,7 @@ functionality:
- name: dummpy
type: string
required: false
required: true
- name: --prediction
__merge__: ../../api/file_prediction.yaml
required: true
Expand All @@ -47,10 +49,12 @@ functionality:
- name: topic
type: string
required: false
required: true

- name: --temp_dir
type: string
direction: input
default: 'output/figr'
- name: --num_workers
type: integer
direction: input
Expand All @@ -66,10 +70,10 @@ functionality:

platforms:
- type: docker
image: ghcr.io/openproblems-bio/base_r:1.0.2
setup:
- type: r
packages: [dplyr, FNN, chromVAR, doParallel, BuenColors, FigR, BSgenome.Hsapiens.UCSC.hg38]
image: janursa/figr:19-08-2024
# setup:
# - type: r
# packages: [dplyr, FNN, chromVAR, doParallel, BuenColors, FigR, BSgenome.Hsapiens.UCSC.hg38]

- type: native
- type: nextflow
Expand Down
2 changes: 2 additions & 0 deletions src/methods/figr/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
viash build src/methods/figr/config.vsh.yaml -p docker -o bin_figr
bin_figr/figr --multiomics_rna resources/grn-benchmark/multiomics_r/rna.rds --multiomics_atac resources/grn-benchmark/multiomics_r/atac.rds --prediction bin_figr/prediction.csv --cell_topic resources/grn-benchmark/supp/cell_topic.csv
16 changes: 8 additions & 8 deletions src/methods/figr/script.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ library(dplyr)
library(FNN)
library(chromVAR)
library(doParallel)
library(BuenColors)
library(FigR)
library(BSgenome.Hsapiens.UCSC.hg38)

Expand Down Expand Up @@ -33,7 +32,8 @@ cellknn_func <- function(par) {
print(dim(cellkNN))
saveRDS(cellkNN, paste0(par$temp_dir, "cellkNN.rds"))
}

print(par)
cellknn_func(par)

## Step1: Peak-gene association testing
peak_gene_func <- function(par){
Expand Down Expand Up @@ -144,9 +144,9 @@ filter_figr_grn <- function(par) {
write.csv(figr_grn, file = par$prediction, row.names = FALSE)
}

cellknn_func(par)
peak_gene_func(par)
dorc_genes_func(par)
tf_gene_association_func(par)
extract_peak_gene_func(par)
filter_figr_grn(par)

# peak_gene_func(par)
# dorc_genes_func(par)
# tf_gene_association_func(par)
# extract_peak_gene_func(par)
# filter_figr_grn(par)
Loading

0 comments on commit e7915ca

Please sign in to comment.