Skip to content

Commit

Permalink
workflow reduced to no on the fly models
Browse files Browse the repository at this point in the history
  • Loading branch information
janursa committed Sep 13, 2024
1 parent b9385f2 commit 7862520
Show file tree
Hide file tree
Showing 11 changed files with 533 additions and 410 deletions.
434 changes: 245 additions & 189 deletions runs.ipynb

Large diffs are not rendered by default.

119 changes: 119 additions & 0 deletions scripts/repo/run_grn_evaluation copy.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
#!/bin/bash

# RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)"
# reg_type=${1} #GB, ridge
viash ns build --parallel
reg_type=ridge

RUN_ID="grn_evaluation_all_${reg_type}"
resources_dir="s3://openproblems-data/resources/grn"
# resources_dir="./resources"
publish_dir="${resources_dir}/results/${RUN_ID}"
grn_models_folder="${resources_dir}/grn_models"

subsample=-2
max_workers=10
layer=scgen_pearson
metric_ids="[regression_1, regression_2]"

param_file="./params/${RUN_ID}.yaml"

grn_names=(
"scglue"
"scenicplus"
"celloracle"
"granie"
"figr"
"collectri"
"genie3"
"grnboost2"
"ppcor"
"portia"
)
# Start writing to the YAML file
cat > $param_file << HERE
param_list:
HERE

append_entry() {
cat >> $param_file << HERE
- id: ${reg_type}_${1}
metric_ids: ${metric_ids}
perturbation_data: ${resources_dir}/grn-benchmark/perturbation_data.h5ad
multiomics_rna: ${resources_dir}/grn-benchmark/multiomics_rna.h5ad
reg_type: $reg_type
method_id: $1
subsample: $subsample
max_workers: $max_workers
tf_all: ${resources_dir}/prior/tf_all.csv
layer: ${layer}
consensus: ${resources_dir}/prior/consensus-num-regulators.json
prediction: ${grn_models_folder}/$1.csv
HERE
}

append_entry_control() {
cat >> $param_file << HERE
- id: ${reg_type}_${1}
metric_ids: ${metric_ids}
perturbation_data: ${resources_dir}/grn-benchmark/perturbation_data.h5ad
multiomics_rna: ${resources_dir}/grn-benchmark/multiomics_rna.h5ad
reg_type: $reg_type
method_id: $1
subsample: $subsample
max_workers: $max_workers
tf_all: ${resources_dir}/prior/tf_all.csv
layer: ${layer}
consensus: ${resources_dir}/prior/consensus-num-regulators.json
causal: ${2}
corr_method: ${3}
prediction: ${resources_dir}/grn_models/collectri.csv
cell_type_specific: ${4}
metacell: ${5}
impute: ${6}
HERE

}

Loop through grn_names and layers
for grn_name in "${grn_names[@]}"; do
append_entry "$grn_name"
done

## controls
append_entry_control "negative_control" "" "" "false" "false" "false"
append_entry_control "positive_control" "" "" "false" "false" "false"
append_entry_control "baseline_pearson" "false" "pearson" "false" "false" "false"
append_entry_control "baseline_dotproduct" "false" "dotproduct" "false" "false" "false"
append_entry_control "baseline_dotproduct_causal" "true" "dotproduct" "false" "false" "false"
append_entry_control "baseline_dotproduct_causal_cell_type" "true" "dotproduct" "true" "false" "false"
append_entry_control "baseline_dotproduct_causal_metacell" "true" "dotproduct" "false" "true" "false"
append_entry_control "baseline_dotproduct_causal_impute" "true" "dotproduct" "false" "false" "true"
append_entry_control "baseline_corr_causal_spearman" "true" "spearman"


# Append the remaining output_state and publish_dir to the YAML file
cat >> $param_file << HERE
output_state: "state.yaml"
publish_dir: "$publish_dir"
HERE

# nextflow run . \
# -main-script target/nextflow/workflows/run_grn_evaluation/main.nf \
# -profile docker \
# -with-trace \
# -c src/common/nextflow_helpers/labels_ci.config \
# -params-file ${param_file}
# subl resources/results/grn_evaluation_all_ridge/scores.yaml

# ./tw-windows-x86_64.exe launch `
# https://github.com/openproblems-bio/task_grn_inference.git `
# --revision build/main `
# --pull-latest `
# --main-script target/nextflow/workflows/run_grn_evaluation/main.nf `
# --workspace 53907369739130 `
# --compute-env 6TeIFgV5OY4pJCk8I0bfOh `
# --params-file ./params/grn_evaluation_so_ridge.yaml `
# --config src/common/nextflow_helpers/labels_tw.config


55 changes: 55 additions & 0 deletions scripts/run_baselines.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
echo "baseline pearson"
viash run src/control_methods/baseline_corr/config.vsh.yaml -- --multiomics_rna resources/grn-benchmark/multiomics_rna.h5ad \
--tf_all resources/prior/tf_all.csv \
--causal false \
--corr_method pearson \
--cell_type_specific false \
--metacell false \
--impute false \
--prediction resources/grn_models/baselines/baseline_pearson.csv

echo "baseline dotproduct"
viash run src/control_methods/baseline_corr/config.vsh.yaml -- --multiomics_rna resources/grn-benchmark/multiomics_rna.h5ad \
--tf_all resources/prior/tf_all.csv \
--causal false \
--corr_method dotproduct \
--cell_type_specific false \
--metacell false \
--impute false \
--prediction resources/grn_models/baselines/baseline_dotproduct.csv

echo "baseline dotproduct causal"
viash run src/control_methods/baseline_corr/config.vsh.yaml -- --multiomics_rna resources/grn-benchmark/multiomics_rna.h5ad \
--tf_all resources/prior/tf_all.csv \
--causal true \
--corr_method dotproduct \
--cell_type_specific false \
--metacell false \
--impute false \
--prediction resources/grn_models/baselines/baseline_dotproduct_causal.csv

echo "baseline causal cell type"
viash run src/control_methods/baseline_corr/config.vsh.yaml -- --multiomics_rna resources/grn-benchmark/multiomics_rna.h5ad \
--tf_all resources/prior/tf_all.csv \
--causal true \
--corr_method dotproduct \
--cell_type_specific true \
--metacell false \
--impute false \
--prediction resources/grn_models/baselines/baseline_dotproduct_causal_celltype.csv

echo "baseline dotproduct causal metacell"
viash run src/control_methods/baseline_corr/config.vsh.yaml -- --multiomics_rna resources/grn-benchmark/multiomics_rna.h5ad \
--tf_all resources/prior/tf_all.csv \
--causal true \
--corr_method dotproduct \
--cell_type_specific false \
--metacell true \
--impute false \
--prediction resources/grn_models/baselines/baseline_dotproduct_causal_metacell.csv

echo "positive control"
viash run src/control_methods/positive_control/config.vsh.yaml -- --multiomics_rna resources/grn-benchmark/multiomics_rna.h5ad \
--perturbation_data resources/grn-benchmark/perturbation_data.h5ad \
--tf_all resources/prior/tf_all.csv \
--prediction resources/grn_models/baselines/positive_control.csv
74 changes: 28 additions & 46 deletions scripts/run_grn_evaluation.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@ viash ns build --parallel
reg_type=ridge

RUN_ID="grn_evaluation_all_${reg_type}"
resources_dir="s3://openproblems-data/resources/grn"
# resources_dir="./resources"
# resources_dir="s3://openproblems-data/resources/grn"
resources_dir="./resources"
publish_dir="${resources_dir}/results/${RUN_ID}"
grn_models_folder="${resources_dir}/grn_models"

subsample=-2
max_workers=10
layer=scgen_pearson
metric_ids="[regression_1, regression_2]"
metric_ids="[regression_1]"

param_file="./params/${RUN_ID}.yaml"

Expand All @@ -30,6 +30,15 @@ grn_names=(
"ppcor"
"portia"
)

baseline_models=(
baseline_pearson
baseline_dotproduct
baseline_dotproduct_causal
baseline_dotproduct_causal_celltype
baseline_dotproduct_causal_metacell
positive_control
)
# Start writing to the YAML file
cat > $param_file << HERE
param_list:
Expand All @@ -48,63 +57,36 @@ append_entry() {
tf_all: ${resources_dir}/prior/tf_all.csv
layer: ${layer}
consensus: ${resources_dir}/prior/consensus-num-regulators.json
prediction: ${grn_models_folder}/$1.csv
prediction: ${2}/$1.csv
HERE
}

append_entry_control() {
cat >> $param_file << HERE
- id: ${reg_type}_${1}
metric_ids: ${metric_ids}
perturbation_data: ${resources_dir}/grn-benchmark/perturbation_data.h5ad
multiomics_rna: ${resources_dir}/grn-benchmark/multiomics_rna.h5ad
reg_type: $reg_type
method_id: $1
subsample: $subsample
max_workers: $max_workers
tf_all: ${resources_dir}/prior/tf_all.csv
layer: ${layer}
consensus: ${resources_dir}/prior/consensus-num-regulators.json
causal: ${2}
corr_method: ${3}
prediction: ${resources_dir}/grn_models/collectri.csv
cell_type_specific: ${4}
metacell: ${5}
impute: ${6}
HERE

}
# folder=${grn_models_folder}
# # Loop through grn_names and layers
# for grn_name in "${grn_names[@]}"; do
# append_entry "$grn_name" "$folder"
# done

Loop through grn_names and layers
for grn_name in "${grn_names[@]}"; do
append_entry "$grn_name"
folder=${grn_models_folder}/baselines
for grn_name in "${baseline_models[@]}"; do
append_entry "$grn_name" "$folder"
done

## controls
append_entry_control "negative_control" "" "" "false" "false" "false"
append_entry_control "positive_control" "" "" "false" "false" "false"
append_entry_control "baseline_pearson" "false" "pearson" "false" "false" "false"
append_entry_control "baseline_dotproduct" "false" "dotproduct" "false" "false" "false"
append_entry_control "baseline_dotproduct_causal" "true" "dotproduct" "false" "false" "false"
append_entry_control "baseline_dotproduct_causal_cell_type" "true" "dotproduct" "true" "false" "false"
append_entry_control "baseline_dotproduct_causal_metacell" "true" "dotproduct" "false" "true" "false"
append_entry_control "baseline_dotproduct_causal_impute" "true" "dotproduct" "false" "false" "true"
append_entry_control "baseline_corr_causal_spearman" "true" "spearman"


# Append the remaining output_state and publish_dir to the YAML file
cat >> $param_file << HERE
output_state: "state.yaml"
publish_dir: "$publish_dir"
HERE

# nextflow run . \
# -main-script target/nextflow/workflows/run_grn_evaluation/main.nf \
# -profile docker \
# -with-trace \
# -c src/common/nextflow_helpers/labels_ci.config \
# -params-file ${param_file}
# subl resources/results/grn_evaluation_all_ridge/scores.yaml
nextflow run . \
-main-script target/nextflow/workflows/run_grn_evaluation/main.nf \
-profile docker \
-with-trace \
-c src/common/nextflow_helpers/labels_ci.config \
-params-file ${param_file}
subl resources/results/grn_evaluation_all_ridge/scores.yaml

# ./tw-windows-x86_64.exe launch `
# https://github.com/openproblems-bio/task_grn_inference.git `
Expand Down
13 changes: 8 additions & 5 deletions src/control_methods/baseline_corr/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import scanpy as sc
from tqdm import tqdm
from scipy.stats import spearmanr
from sklearn.preprocessing import StandardScaler

## VIASH START
par = {
Expand All @@ -14,7 +15,8 @@

## VIASH END

def select_top_links(net, par):
def process_links(net, par):
net = net[net.source!=net.target]
net_sorted = net.reindex(net['weight'].abs().sort_values(ascending=False).index)
net = net_sorted.head(par['max_n_links']).reset_index(drop=True)
return net
Expand All @@ -24,9 +26,11 @@ def create_corr_net(X: np.ndarray, groups: np.ndarray, method="pearson"):
for group in tqdm(np.unique(groups), desc="Processing groups"):
X_sub = X[groups == group, :]
if method == "dotproduct":
net = X_sub.T.dot(X_sub)
X_sub = StandardScaler().fit_transform(X_sub)
net = np.dot(X_sub.T, X_sub) / X_sub.shape[0]
elif method == "pearson":
net = np.corrcoef(X_sub.T)
# net = pd.DataFrame(X_sub).transpose().corr().values.to_numpy()
net = np.nan_to_num(net, nan=0.0, posinf=0.0, neginf=0.0)
elif method == "spearman":
net = spearmanr(X_sub).statistic
Expand All @@ -41,7 +45,7 @@ def create_corr_net(X: np.ndarray, groups: np.ndarray, method="pearson"):
net = net.reset_index().melt(id_vars='index', var_name='source', value_name='weight')
net.rename(columns={'index': 'target'}, inplace=True)

net = select_top_links(net, par)
net = process_links(net, par)
net['cell_type'] = group
if i==0:
grn = net
Expand All @@ -53,8 +57,7 @@ def create_corr_net(X: np.ndarray, groups: np.ndarray, method="pearson"):
if par['cell_type_specific']==False:
grn.drop(columns=['cell_type'], inplace=True)
grn = grn.groupby(['source', 'target']).mean().reset_index()
net = select_top_links(net, par)

grn = process_links(grn, par)
return grn
print('Read data')
multiomics_rna = ad.read_h5ad(par["multiomics_rna"])
Expand Down
5 changes: 5 additions & 0 deletions src/control_methods/positive_control/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ functionality:
required: true
direction: input
example: resources_test/grn-benchmark/perturbation_data.h5ad
- name: --layer
type: string
direction: input
required: false
default: scgen_pearson

resources:
- type: python_script
Expand Down
8 changes: 8 additions & 0 deletions src/control_methods/positive_control/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,14 @@ def create_positive_control(X: np.ndarray, groups: np.ndarray):

pivoted_net = pivoted_net.rename(columns={'index': 'target'})
pivoted_net = pivoted_net[pivoted_net['weight'] != 0]


def process_links(net, par):
net = net[net.source!=net.target]
net_sorted = net.reindex(net['weight'].abs().sort_values(ascending=False).index)
net = net_sorted.head(par['max_n_links']).reset_index(drop=True)
return net
pivoted_net = process_links(pivoted_net, par)
print('Saving')
pivoted_net.to_csv(par["prediction"])

2 changes: 1 addition & 1 deletion src/exp_analysis/test.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
viash run src/exp_analysis/config.vsh.yaml -- \
--perturbation_data resources/grn-benchmark/perturbation_data.h5ad \
--prediction output/baseline_corr.csv \
--prediction resources/grn_models/genie3.csv \

Loading

0 comments on commit 7862520

Please sign in to comment.