Skip to content

Commit

Permalink
Merge pull request #15 from openproblems-bio/jalil
Browse files Browse the repository at this point in the history
process multiomics workflow updated
  • Loading branch information
janursa authored Oct 5, 2024
2 parents 885a27d + bfc18ad commit a32b952
Show file tree
Hide file tree
Showing 18 changed files with 227 additions and 139 deletions.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
29 changes: 29 additions & 0 deletions scripts/run_process_multiomics_dataset.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/bin/bash

RUN_ID="process_multiomics"
# resources_dir="s3://openproblems-data/resources/grn/"
resources_dir="resources"
publish_dir="${resources_dir}/results/${RUN_ID}"

cat > ./params/${RUN_ID}.yaml << HERE
param_list:
- id: process_multiomics
multiome_counts: $resources_dir/datasets_raw/multiome_counts.h5ad
output_state: "state.yaml"
publish_dir: "$publish_dir"
HERE


# ./tw-windows-x86_64.exe launch https://github.com/openproblems-bio/task_grn_inference.git `
# --revision build/main --pull-latest `
# --main-script target/nextflow/workflows/process_multiomics/main.nf `
# --workspace 53907369739130 --compute-env 6TeIFgV5OY4pJCk8I0bfOh `
# --params-file ./params/process_multiomics.yaml `
# --config src/common/nextflow_helpers/labels_tw.config


nextflow run . \
-main-script target/nextflow/workflows/process_multiomics/main.nf \
-profile docker -with-trace -c src/common/nextflow_helpers/labels_ci.config \
-params-file params/${RUN_ID}.yaml
28 changes: 28 additions & 0 deletions scripts/run_process_perturbation_dataset.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/bin/bash

RUN_ID="process_perturbation"
resources_dir="s3://openproblems-data/resources/grn/"
publish_dir="${resources_dir}/results/${RUN_ID}"

cat > ./params/${RUN_ID}.yaml << HERE
param_list:
- id: test_process_perturatbion
perturbation_counts: $resources_dir/datasets_raw/perturbation_counts.h5ad
output_state: "state.yaml"
publish_dir: "$publish_dir"
HERE


# ./tw-windows-x86_64.exe launch https://github.com/openproblems-bio/task_grn_inference.git `
# --revision build/main --pull-latest `
# --main-script target/nextflow/workflows/process_perturbation/main.nf `
# --workspace 53907369739130 --compute-env 6TeIFgV5OY4pJCk8I0bfOh `
# --params-file ./params/process_perturbation.yaml `
# --config src/common/nextflow_helpers/labels_tw.config


nextflow run . \
-main-script target/nextflow/workflows/process_perturbation/main.nf \
-profile docker -with-trace -c src/common/nextflow_helpers/labels_ci.config \
-params-file params/${RUN_ID}.yaml
28 changes: 0 additions & 28 deletions scripts/run_process_perturbation_tw.sh

This file was deleted.

1 change: 1 addition & 0 deletions src/methods/multi_omics/figr/script.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ dir.create(par$temp_dir, recursive = TRUE, showWarnings = TRUE)
atac = readRDS(par$multiomics_atac_r)
rna = readRDS(par$multiomics_rna_r)


colnames(atac) <- gsub("-", "", colnames(atac))
colnames(rna) <- gsub("-", "", colnames(rna))

Expand Down
40 changes: 0 additions & 40 deletions src/process_data/multiomics/batch_correction/script.py

This file was deleted.

15 changes: 2 additions & 13 deletions src/process_data/multiomics/format_data/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,26 +8,15 @@ functionality:
arguments:
- name: --multiome_counts
type: file
required: false
required: true
direction: input
example: resources/datasets_raw/multiome_counts.h5ad

- name: --multiomics_rna
type: file
required: false
direction: output
example: resources/grn-benchmark/multiomics_rna.h5ad
- name: --multiomics_rna_d0
type: file
required: false
direction: output
example: resources/grn-benchmark/multiomics_rna_d0.h5ad

- name: --multiomics_rna_d0_hvg
type: file
required: false
direction: output
example: resources/grn-benchmark/multiomics_rna_d0_hvg.h5ad

- name: --multiomics_atac
type: file
required: false
Expand Down
27 changes: 9 additions & 18 deletions src/process_data/multiomics/format_data/script.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import anndata as ad
import scanpy as sc
import numpy as np
## VIASH START
par = {
# 'multiome_counts': 'resources/datasets_raw/multiome_counts.h5ad',
Expand All @@ -26,22 +27,14 @@
multiomics_rna = multiomics[:,multiomics.var.feature_types=='Gene Expression']
multiomics_rna.var = multiomics_rna.var[['gene_ids', 'interval']]

def high_coverage(adata):
threshold = 0.1
mask = adata.X!=0
mask_obs = (np.sum(mask, axis=1).A.flatten()/mask.shape[1])>threshold
mask_var = (np.sum(mask, axis=0).A.flatten()/mask.shape[0])>threshold
adata.obs['high_coverage'] = mask_obs
adata.var['high_coverage'] = mask_var
high_coverage(multiomics_rna)

# hvgs
var = sc.pp.highly_variable_genes(multiomics_rna, flavor='seurat_v3', n_top_genes=7000, inplace=False)
multiomics_rna.var['highly_variable'] = var.highly_variable

# subset to donor 0
multiomics_rna_d0 = multiomics_rna[multiomics_rna.obs.donor_id=='donor_0', :]
multiomics_rna_d0_hvg = multiomics_rna[multiomics_rna.obs.donor_id=='donor_0', multiomics_rna.var.highly_variable]
# def high_coverage(adata):
# threshold = 0.1
# mask = adata.X!=0
# mask_obs = (np.sum(mask, axis=1).A.flatten()/mask.shape[1])>threshold
# mask_var = (np.sum(mask, axis=0).A.flatten()/mask.shape[0])>threshold
# adata.obs['high_coverage'] = mask_obs
# adata.var['high_coverage'] = mask_var
# high_coverage(multiomics_rna)
#------ ATAC
multiomics_atac = multiomics[:,multiomics.var.feature_types=='Peaks']
multiomics_atac.var = multiomics_atac.var[[]]
Expand All @@ -62,6 +55,4 @@ def high_coverage(adata):
multiomics_atac.obs['donor_id'] = multiomics_atac.obs['donor_id'].map(donor_map)

multiomics_rna.write(par['multiomics_rna'])
multiomics_rna_h0.write(par['multiomics_rna_h0'])
multiomics_rna_h0_hvg.write(par['multiomics_rna_h0_hvg'])
multiomics_atac.write(par['multiomics_atac'])
33 changes: 14 additions & 19 deletions src/process_data/multiomics/format_resources_r/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,50 +8,45 @@ functionality:
arguments:
- name: --rna_matrix
type: file
required: false
required: true
direction: input
default: output/scRNA/X_matrix.mtx

example: output/scRNA/X_matrix.mtx
- name: --atac_matrix
type: file
required: false
required: true
direction: input
default: output/scATAC/X_matrix.mtx

example: output/scATAC/X_matrix.mtx
- name: --rna_gene_annot
type: file
required: false
required: true
direction: input
default: output/scRNA/annotation_gene.csv

example: output/scRNA/annotation_gene.csv
- name: --rna_cell_annot
type: file
required: false
required: true
direction: input
default: output/scRNA/annotation_cell.csv

example: output/scRNA/annotation_cell.csv
- name: --atac_peak_annot
type: file
required: false
required: true
direction: input
default: output/scATAC/annotation_gene.csv

example: output/scATAC/annotation_gene.csv
- name: --atac_cell_annot
type: file
required: false
required: true
direction: input
default: output/scATAC/annotation_cell.csv
example: output/scATAC/annotation_cell.csv

- name: --rna_rds
type: file
required: false
direction: output
default: resources/grn-benchmark/multiomics_r/rna.rds
example: resources/grn-benchmark/multiomics_r/rna.rds
- name: --atac_rds
type: file
required: false
direction: output
default: resources/grn-benchmark/multiomics_r/atac.rds
example: resources/grn-benchmark/multiomics_r/atac.rds



Expand Down
1 change: 1 addition & 0 deletions src/process_data/multiomics/format_resources_r/script.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ annotation_peak_filtered <- annotation_peak[filter_indices, ]
# Filter the rows in X
X_filtered <- X[filter_indices, ]


# Create the SummarizedExperiment object with the filtered data
atac <- SummarizedExperiment(assays = list(counts = X_filtered),
rowRanges = GRanges(annotation_peak_filtered$seqname,
Expand Down
21 changes: 10 additions & 11 deletions src/process_data/multiomics/multiome_matrix/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,51 +8,50 @@ functionality:
arguments:
- name: --multiomics_rna
type: file
required: false
required: true
direction: input
default: resources/grn-benchmark/multiomics_rna.h5ad
example: resources/grn-benchmark/multiomics_rna.h5ad

- name: --multiomics_atac
type: file
required: false
required: true
direction: input
default: resources/grn-benchmark/multiomics_atac.h5ad
example: resources/grn-benchmark/multiomics_atac.h5ad

- name: --rna_matrix
type: file
required: false
direction: output
default: output/scRNA/X_matrix.mtx

example: output/scRNA/X_matrix.mtx
- name: --atac_matrix
type: file
required: false
direction: output
default: output/scATAC/X_matrix.mtx
example: output/scATAC/X_matrix.mtx

- name: --rna_gene_annot
type: file
required: false
direction: output
default: output/scRNA/annotation_gene.csv
example: output/scRNA/annotation_gene.csv

- name: --rna_cell_annot
type: file
required: false
direction: output
default: output/scRNA/annotation_cell.csv
example: output/scRNA/annotation_cell.csv

- name: --atac_peak_annot
type: file
required: false
direction: output
default: output/scATAC/annotation_gene.csv
example: output/scATAC/annotation_gene.csv

- name: --atac_cell_annot
type: file
required: false
direction: output
default: output/scATAC/annotation_cell.csv
example: output/scATAC/annotation_cell.csv
resources:
- type: python_script
path: script.py
Expand Down
45 changes: 45 additions & 0 deletions src/process_data/multiomics/subset_hvg/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@

functionality:
name: subset_hvg
namespace: "multiomics"
info:
label: subset_hvg
summary: "Receives multiomics data and subsets it for hvg"
arguments:
- name: --multiomics_rna
type: file
required: true
direction: input
example: resources/grn-benchmark/multiomics_rna.h5ad
- name: --multiomics_atac
type: file
required: true
direction: input
example: resources/grn-benchmark/multiomics_atac.h5ad

- name: --multiomics_rna_d0_hvg
type: file
required: false
direction: output
example: resources/grn-benchmark/multiomics_rna_d0_hvg.h5ad
- name: --multiomics_atac_d0
type: file
required: false
direction: output
example: resources/grn-benchmark/multiomics_atac_d0.h5ad

resources:
- type: python_script
path: script.py
platforms:
- type: docker
image: ghcr.io/openproblems-bio/base_python:1.0.4
setup:
- type: python
packages: [ scikit-misc ]


- type: native
- type: nextflow
directives:
label: [midtime,midmem,midcpu]
Loading

0 comments on commit a32b952

Please sign in to comment.