diff --git a/runs.ipynb b/runs.ipynb
index df7e78d1d..4b2c295c0 100644
--- a/runs.ipynb
+++ b/runs.ipynb
@@ -2592,7 +2592,7 @@
},
{
"cell_type": "code",
- "execution_count": 104,
+ "execution_count": 112,
"metadata": {},
"outputs": [
{
@@ -2600,6 +2600,7 @@
"output_type": "stream",
"text": [
"download: s3://openproblems-data/resources/grn/results/grn_evaluation_all_ridge/scores.yaml to resources/results/grn_evaluation_all_ridge/scores.yaml\n",
+ "download: s3://openproblems-data/resources/grn/results/grn_evaluation_all_ridge/metric_configs.yaml to resources/results/grn_evaluation_all_ridge/metric_configs.yaml\n",
"download: s3://openproblems-data/resources/grn/results/grn_evaluation_all_ridge/trace.txt to resources/results/grn_evaluation_all_ridge/trace.txt\n"
]
}
@@ -2610,329 +2611,381 @@
},
{
"cell_type": "code",
- "execution_count": 106,
+ "execution_count": 113,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
- "
\n",
+ "\n",
" \n",
" \n",
" | \n",
- " ex(False)_tf(-1) | \n",
- " ex(True)_tf(-1) | \n",
- " static-theta-0.0 | \n",
- " static-theta-0.5 | \n",
+ " ex(False)_tf(-1) | \n",
+ " ex(True)_tf(-1) | \n",
+ " static-theta-0.0 | \n",
+ " static-theta-0.5 | \n",
"
\n",
" \n",
" \n",
" \n",
- " negative_control | \n",
- " -0.035493 | \n",
- " -0.034779 | \n",
- " 0.379416 | \n",
- " 0.504639 | \n",
+ " collectri | \n",
+ " -0.100238 | \n",
+ " -0.211182 | \n",
+ " 0.489316 | \n",
+ " 0.514896 | \n",
"
\n",
" \n",
- " baseline_pearson | \n",
- " -0.100238 | \n",
- " -0.211182 | \n",
- " 0.489316 | \n",
- " 0.514896 | \n",
+ " ppcor | \n",
+ " 0.027029 | \n",
+ " 0.018207 | \n",
+ " 0.224514 | \n",
+ " 0.526332 | \n",
"
\n",
" \n",
- " baseline_dotproduct | \n",
- " -0.100238 | \n",
- " -0.211182 | \n",
- " 0.489316 | \n",
- " 0.514896 | \n",
+ " celloracle | \n",
+ " 0.171687 | \n",
+ " 0.235811 | \n",
+ " 0.432617 | \n",
+ " 0.536534 | \n",
"
\n",
" \n",
- " baseline_pearson_causal | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
+ " baseline_dotproduct_causal | \n",
+ " 0.072327 | \n",
+ " 0.392805 | \n",
+ " 0.171613 | \n",
+ " 0.533977 | \n",
"
\n",
" \n",
- " baseline_dotproduct_causal | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
+ " baseline_dotproduct_causal_metacell | \n",
+ " -0.522164 | \n",
+ " 0.225392 | \n",
+ " 0.402120 | \n",
+ " 0.525323 | \n",
"
\n",
" \n",
- " baseline_dotproduct_causal_cell_type | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
+ " scglue | \n",
+ " 0.245670 | \n",
+ " 0.289934 | \n",
+ " 0.810389 | \n",
+ " 0.599267 | \n",
"
\n",
" \n",
- " baseline_dotproduct_causal_metacell | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
+ " scenicplus | \n",
+ " 0.301834 | \n",
+ " 0.392452 | \n",
+ " 0.698092 | \n",
+ " 0.596971 | \n",
"
\n",
" \n",
- " positive_control | \n",
- " 0.628303 | \n",
- " 0.629964 | \n",
- " 0.683244 | \n",
- " 0.741396 | \n",
+ " baseline_pearson | \n",
+ " 0.012680 | \n",
+ " 0.062178 | \n",
+ " 0.302822 | \n",
+ " 0.512141 | \n",
"
\n",
" \n",
- " collectri | \n",
- " -0.100238 | \n",
- " -0.211182 | \n",
- " 0.489316 | \n",
- " 0.514896 | \n",
+ " genie3 | \n",
+ " 0.200146 | \n",
+ " 0.335431 | \n",
+ " 0.827109 | \n",
+ " 0.582196 | \n",
"
\n",
" \n",
- " granie | \n",
- " 0.108554 | \n",
- " 0.209125 | \n",
- " 0.356784 | \n",
- " 0.526008 | \n",
+ " grnboost2 | \n",
+ " 0.264538 | \n",
+ " 0.426411 | \n",
+ " 0.830384 | \n",
+ " 0.584299 | \n",
"
\n",
" \n",
- " figr | \n",
- " 0.154044 | \n",
- " 0.220225 | \n",
- " 0.680781 | \n",
- " 0.565727 | \n",
+ " figr | \n",
+ " 0.154044 | \n",
+ " 0.220225 | \n",
+ " 0.680781 | \n",
+ " 0.565727 | \n",
"
\n",
" \n",
- " celloracle | \n",
- " 0.208249 | \n",
- " 0.258602 | \n",
- " 0.432617 | \n",
- " 0.536534 | \n",
+ " portia | \n",
+ " 0.013737 | \n",
+ " 0.033267 | \n",
+ " 0.491804 | \n",
+ " 0.537863 | \n",
"
\n",
" \n",
- " scglue | \n",
- " 0.245670 | \n",
- " 0.289934 | \n",
- " 0.810389 | \n",
- " 0.599267 | \n",
+ " granie | \n",
+ " 0.108554 | \n",
+ " 0.209125 | \n",
+ " 0.356784 | \n",
+ " 0.526008 | \n",
"
\n",
" \n",
- " scenicplus | \n",
- " 0.301834 | \n",
- " 0.392452 | \n",
- " 0.698092 | \n",
- " 0.596971 | \n",
+ " negative_control | \n",
+ " -0.014667 | \n",
+ " -0.014700 | \n",
+ " 0.182615 | \n",
+ " 0.495925 | \n",
"
\n",
" \n",
- " portia | \n",
- " 0.013737 | \n",
- " 0.033267 | \n",
- " 0.491804 | \n",
- " 0.537863 | \n",
+ " baseline_dotproduct_causal_cell_type | \n",
+ " 0.072550 | \n",
+ " 0.357757 | \n",
+ " 0.171613 | \n",
+ " 0.531513 | \n",
"
\n",
" \n",
- " ppcor | \n",
- " 0.027029 | \n",
- " 0.018207 | \n",
- " 0.224514 | \n",
- " 0.526332 | \n",
+ " baseline_corr_causal_spearman | \n",
+ " -0.100238 | \n",
+ " -0.211182 | \n",
+ " 0.489316 | \n",
+ " 0.514896 | \n",
"
\n",
" \n",
- " grnboost2 | \n",
- " 0.264538 | \n",
- " 0.426411 | \n",
- " 0.830384 | \n",
- " 0.584299 | \n",
- "
\n",
- " \n",
- " genie3 | \n",
- " 0.200146 | \n",
- " 0.335431 | \n",
- " 0.827109 | \n",
- " 0.582196 | \n",
+ " baseline_dotproduct | \n",
+ " 0.068719 | \n",
+ " 0.345421 | \n",
+ " 0.470081 | \n",
+ " 0.527163 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 106,
+ "execution_count": 113,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "models_all = ['negative_control', 'baseline_pearson', 'baseline_dotproduct', 'baseline_pearson_causal', 'baseline_dotproduct_causal', 'baseline_dotproduct_causal_cell_type', 'baseline_dotproduct_causal_metacell', 'positive_control', 'collectri','granie', 'figr', 'celloracle', 'scglue', 'scenicplus', 'portia','ppcor', 'grnboost2', 'genie3']\n",
+ "\n",
+ "models_all = ['negative_control', 'baseline_pearson', 'baseline_dotproduct', \n",
+ " 'baseline_pearson_causal', 'baseline_dotproduct_causal', 'baseline_dotproduct_causal_cell_type', \n",
+ " 'baseline_dotproduct_causal_metacell', 'positive_control', 'collectri','granie', 'figr', 'celloracle', \n",
+ " 'scglue', 'scenicplus', 'portia','ppcor', 'grnboost2', 'genie3']\n",
+ "models_all = ['negative_control', 'baseline_pearson', 'baseline_dotproduct', \n",
+ " 'baseline_dotproduct_causal', 'baseline_dotproduct_causal_cell_type', \n",
+ " 'baseline_dotproduct_causal_metacell', 'positive_control']\n",
"def extract_data(data, reg='reg1', dataset_id='scgen_pearson'):\n",
" i = 0\n",
" for entry in data:\n",
@@ -2962,8 +3015,11 @@
"result_file = f'{base_folder}/scores.yaml'\n",
"with open(result_file, 'r') as file:\n",
" data = yaml.safe_load(file)\n",
- "df_reg1 = extract_data(data, reg='reg1').reindex(models_all).drop(columns=['Mean'])\n",
- "df_reg2 = extract_data(data, reg='reg2').reindex(models_all).drop(columns=['Mean'])\n",
+ "# df_reg1 = extract_data(data, reg='reg1').reindex(models_all).drop(columns=['Mean'])\n",
+ "# df_reg2 = extract_data(data, reg='reg2').reindex(models_all).drop(columns=['Mean'])\n",
+ "\n",
+ "df_reg1 = extract_data(data, reg='reg1').drop(columns=['Mean'])\n",
+ "df_reg2 = extract_data(data, reg='reg2').drop(columns=['Mean'])\n",
"df_all = pd.concat([df_reg1, df_reg2], axis=1).fillna(0)\n",
"# df_all[df_all<0]=0\n",
"# df_all = (df_all-df_all.min(axis=0))/(df_all.max(axis=0)-df_all.min(axis=0))\n",
diff --git a/scripts/repo/run_grn_evaluation copy.sh b/scripts/repo/run_grn_evaluation copy.sh
new file mode 100644
index 000000000..aec5d1f82
--- /dev/null
+++ b/scripts/repo/run_grn_evaluation copy.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+
+# RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)"
+# reg_type=${1} #GB, ridge
+viash ns build --parallel
+reg_type=ridge
+
+RUN_ID="grn_evaluation_all_${reg_type}"
+resources_dir="s3://openproblems-data/resources/grn"
+# resources_dir="./resources"
+publish_dir="${resources_dir}/results/${RUN_ID}"
+grn_models_folder="${resources_dir}/grn_models"
+
+subsample=-2
+max_workers=10
+layer=scgen_pearson
+metric_ids="[regression_1, regression_2]"
+
+param_file="./params/${RUN_ID}.yaml"
+
+grn_names=(
+ "scglue"
+ "scenicplus"
+ "celloracle"
+ "granie"
+ "figr"
+ "collectri"
+ "genie3"
+ "grnboost2"
+ "ppcor"
+ "portia"
+ )
+# Start writing to the YAML file
+cat > $param_file << HERE
+param_list:
+HERE
+
+append_entry() {
+ cat >> $param_file << HERE
+ - id: ${reg_type}_${1}
+ metric_ids: ${metric_ids}
+ perturbation_data: ${resources_dir}/grn-benchmark/perturbation_data.h5ad
+ multiomics_rna: ${resources_dir}/grn-benchmark/multiomics_rna.h5ad
+ reg_type: $reg_type
+ method_id: $1
+ subsample: $subsample
+ max_workers: $max_workers
+ tf_all: ${resources_dir}/prior/tf_all.csv
+ layer: ${layer}
+ consensus: ${resources_dir}/prior/consensus-num-regulators.json
+ prediction: ${grn_models_folder}/$1.csv
+HERE
+}
+
+append_entry_control() {
+ cat >> $param_file << HERE
+ - id: ${reg_type}_${1}
+ metric_ids: ${metric_ids}
+ perturbation_data: ${resources_dir}/grn-benchmark/perturbation_data.h5ad
+ multiomics_rna: ${resources_dir}/grn-benchmark/multiomics_rna.h5ad
+ reg_type: $reg_type
+ method_id: $1
+ subsample: $subsample
+ max_workers: $max_workers
+ tf_all: ${resources_dir}/prior/tf_all.csv
+ layer: ${layer}
+ consensus: ${resources_dir}/prior/consensus-num-regulators.json
+ causal: ${2}
+ corr_method: ${3}
+ prediction: ${resources_dir}/grn_models/collectri.csv
+ cell_type_specific: ${4}
+ metacell: ${5}
+ impute: ${6}
+HERE
+
+}
+
+Loop through grn_names and layers
+for grn_name in "${grn_names[@]}"; do
+ append_entry "$grn_name"
+done
+
+## controls
+append_entry_control "negative_control" "" "" "false" "false" "false"
+append_entry_control "positive_control" "" "" "false" "false" "false"
+append_entry_control "baseline_pearson" "false" "pearson" "false" "false" "false"
+append_entry_control "baseline_dotproduct" "false" "dotproduct" "false" "false" "false"
+append_entry_control "baseline_dotproduct_causal" "true" "dotproduct" "false" "false" "false"
+append_entry_control "baseline_dotproduct_causal_cell_type" "true" "dotproduct" "true" "false" "false"
+append_entry_control "baseline_dotproduct_causal_metacell" "true" "dotproduct" "false" "true" "false"
+append_entry_control "baseline_dotproduct_causal_impute" "true" "dotproduct" "false" "false" "true"
+append_entry_control "baseline_corr_causal_spearman" "true" "spearman"
+
+
+# Append the remaining output_state and publish_dir to the YAML file
+cat >> $param_file << HERE
+output_state: "state.yaml"
+publish_dir: "$publish_dir"
+HERE
+
+# nextflow run . \
+# -main-script target/nextflow/workflows/run_grn_evaluation/main.nf \
+# -profile docker \
+# -with-trace \
+# -c src/common/nextflow_helpers/labels_ci.config \
+# -params-file ${param_file}
+# subl resources/results/grn_evaluation_all_ridge/scores.yaml
+
+# ./tw-windows-x86_64.exe launch `
+# https://github.com/openproblems-bio/task_grn_inference.git `
+# --revision build/main `
+# --pull-latest `
+# --main-script target/nextflow/workflows/run_grn_evaluation/main.nf `
+# --workspace 53907369739130 `
+# --compute-env 6TeIFgV5OY4pJCk8I0bfOh `
+# --params-file ./params/grn_evaluation_so_ridge.yaml `
+# --config src/common/nextflow_helpers/labels_tw.config
+
+
diff --git a/scripts/run_baselines.sh b/scripts/run_baselines.sh
new file mode 100644
index 000000000..4d32eb58f
--- /dev/null
+++ b/scripts/run_baselines.sh
@@ -0,0 +1,55 @@
+echo "baseline pearson"
+viash run src/control_methods/baseline_corr/config.vsh.yaml -- --multiomics_rna resources/grn-benchmark/multiomics_rna.h5ad \
+ --tf_all resources/prior/tf_all.csv \
+ --causal false \
+ --corr_method pearson \
+ --cell_type_specific false \
+ --metacell false \
+ --impute false \
+ --prediction resources/grn_models/baselines/baseline_pearson.csv
+
+echo "baseline dotproduct"
+viash run src/control_methods/baseline_corr/config.vsh.yaml -- --multiomics_rna resources/grn-benchmark/multiomics_rna.h5ad \
+ --tf_all resources/prior/tf_all.csv \
+ --causal false \
+ --corr_method dotproduct \
+ --cell_type_specific false \
+ --metacell false \
+ --impute false \
+ --prediction resources/grn_models/baselines/baseline_dotproduct.csv
+
+echo "baseline dotproduct causal"
+viash run src/control_methods/baseline_corr/config.vsh.yaml -- --multiomics_rna resources/grn-benchmark/multiomics_rna.h5ad \
+ --tf_all resources/prior/tf_all.csv \
+ --causal true \
+ --corr_method dotproduct \
+ --cell_type_specific false \
+ --metacell false \
+ --impute false \
+ --prediction resources/grn_models/baselines/baseline_dotproduct_causal.csv
+
+echo "baseline causal cell type"
+viash run src/control_methods/baseline_corr/config.vsh.yaml -- --multiomics_rna resources/grn-benchmark/multiomics_rna.h5ad \
+ --tf_all resources/prior/tf_all.csv \
+ --causal true \
+ --corr_method dotproduct \
+ --cell_type_specific true \
+ --metacell false \
+ --impute false \
+ --prediction resources/grn_models/baselines/baseline_dotproduct_causal_celltype.csv
+
+echo "baseline dotproduct causal metacell"
+viash run src/control_methods/baseline_corr/config.vsh.yaml -- --multiomics_rna resources/grn-benchmark/multiomics_rna.h5ad \
+ --tf_all resources/prior/tf_all.csv \
+ --causal true \
+ --corr_method dotproduct \
+ --cell_type_specific false \
+ --metacell true \
+ --impute false \
+ --prediction resources/grn_models/baselines/baseline_dotproduct_causal_metacell.csv
+
+echo "positive control"
+viash run src/control_methods/positive_control/config.vsh.yaml -- --multiomics_rna resources/grn-benchmark/multiomics_rna.h5ad \
+ --perturbation_data resources/grn-benchmark/perturbation_data.h5ad \
+ --tf_all resources/prior/tf_all.csv \
+ --prediction resources/grn_models/baselines/positive_control.csv
\ No newline at end of file
diff --git a/scripts/run_grn_evaluation.sh b/scripts/run_grn_evaluation.sh
index aec5d1f82..b63d20122 100644
--- a/scripts/run_grn_evaluation.sh
+++ b/scripts/run_grn_evaluation.sh
@@ -6,15 +6,15 @@ viash ns build --parallel
reg_type=ridge
RUN_ID="grn_evaluation_all_${reg_type}"
-resources_dir="s3://openproblems-data/resources/grn"
-# resources_dir="./resources"
+# resources_dir="s3://openproblems-data/resources/grn"
+resources_dir="./resources"
publish_dir="${resources_dir}/results/${RUN_ID}"
grn_models_folder="${resources_dir}/grn_models"
subsample=-2
max_workers=10
layer=scgen_pearson
-metric_ids="[regression_1, regression_2]"
+metric_ids="[regression_1]"
param_file="./params/${RUN_ID}.yaml"
@@ -30,6 +30,15 @@ grn_names=(
"ppcor"
"portia"
)
+
+baseline_models=(
+ baseline_pearson
+ baseline_dotproduct
+ baseline_dotproduct_causal
+ baseline_dotproduct_causal_celltype
+ baseline_dotproduct_causal_metacell
+ positive_control
+ )
# Start writing to the YAML file
cat > $param_file << HERE
param_list:
@@ -48,49 +57,22 @@ append_entry() {
tf_all: ${resources_dir}/prior/tf_all.csv
layer: ${layer}
consensus: ${resources_dir}/prior/consensus-num-regulators.json
- prediction: ${grn_models_folder}/$1.csv
+ prediction: ${2}/$1.csv
HERE
}
-append_entry_control() {
- cat >> $param_file << HERE
- - id: ${reg_type}_${1}
- metric_ids: ${metric_ids}
- perturbation_data: ${resources_dir}/grn-benchmark/perturbation_data.h5ad
- multiomics_rna: ${resources_dir}/grn-benchmark/multiomics_rna.h5ad
- reg_type: $reg_type
- method_id: $1
- subsample: $subsample
- max_workers: $max_workers
- tf_all: ${resources_dir}/prior/tf_all.csv
- layer: ${layer}
- consensus: ${resources_dir}/prior/consensus-num-regulators.json
- causal: ${2}
- corr_method: ${3}
- prediction: ${resources_dir}/grn_models/collectri.csv
- cell_type_specific: ${4}
- metacell: ${5}
- impute: ${6}
-HERE
-}
+# folder=${grn_models_folder}
+# # Loop through grn_names and layers
+# for grn_name in "${grn_names[@]}"; do
+# append_entry "$grn_name" "$folder"
+# done
-Loop through grn_names and layers
-for grn_name in "${grn_names[@]}"; do
- append_entry "$grn_name"
+folder=${grn_models_folder}/baselines
+for grn_name in "${baseline_models[@]}"; do
+ append_entry "$grn_name" "$folder"
done
-## controls
-append_entry_control "negative_control" "" "" "false" "false" "false"
-append_entry_control "positive_control" "" "" "false" "false" "false"
-append_entry_control "baseline_pearson" "false" "pearson" "false" "false" "false"
-append_entry_control "baseline_dotproduct" "false" "dotproduct" "false" "false" "false"
-append_entry_control "baseline_dotproduct_causal" "true" "dotproduct" "false" "false" "false"
-append_entry_control "baseline_dotproduct_causal_cell_type" "true" "dotproduct" "true" "false" "false"
-append_entry_control "baseline_dotproduct_causal_metacell" "true" "dotproduct" "false" "true" "false"
-append_entry_control "baseline_dotproduct_causal_impute" "true" "dotproduct" "false" "false" "true"
-append_entry_control "baseline_corr_causal_spearman" "true" "spearman"
-
# Append the remaining output_state and publish_dir to the YAML file
cat >> $param_file << HERE
@@ -98,13 +80,13 @@ output_state: "state.yaml"
publish_dir: "$publish_dir"
HERE
-# nextflow run . \
-# -main-script target/nextflow/workflows/run_grn_evaluation/main.nf \
-# -profile docker \
-# -with-trace \
-# -c src/common/nextflow_helpers/labels_ci.config \
-# -params-file ${param_file}
-# subl resources/results/grn_evaluation_all_ridge/scores.yaml
+nextflow run . \
+ -main-script target/nextflow/workflows/run_grn_evaluation/main.nf \
+ -profile docker \
+ -with-trace \
+ -c src/common/nextflow_helpers/labels_ci.config \
+ -params-file ${param_file}
+subl resources/results/grn_evaluation_all_ridge/scores.yaml
# ./tw-windows-x86_64.exe launch `
# https://github.com/openproblems-bio/task_grn_inference.git `
diff --git a/src/control_methods/baseline_corr/script.py b/src/control_methods/baseline_corr/script.py
index ec867657f..50b332806 100644
--- a/src/control_methods/baseline_corr/script.py
+++ b/src/control_methods/baseline_corr/script.py
@@ -5,6 +5,7 @@
import scanpy as sc
from tqdm import tqdm
from scipy.stats import spearmanr
+from sklearn.preprocessing import StandardScaler
## VIASH START
par = {
@@ -14,7 +15,8 @@
## VIASH END
-def select_top_links(net, par):
+def process_links(net, par):
+ net = net[net.source!=net.target]
net_sorted = net.reindex(net['weight'].abs().sort_values(ascending=False).index)
net = net_sorted.head(par['max_n_links']).reset_index(drop=True)
return net
@@ -24,9 +26,11 @@ def create_corr_net(X: np.ndarray, groups: np.ndarray, method="pearson"):
for group in tqdm(np.unique(groups), desc="Processing groups"):
X_sub = X[groups == group, :]
if method == "dotproduct":
- net = X_sub.T.dot(X_sub)
+ X_sub = StandardScaler().fit_transform(X_sub)
+ net = np.dot(X_sub.T, X_sub) / X_sub.shape[0]
elif method == "pearson":
net = np.corrcoef(X_sub.T)
+ # net = pd.DataFrame(X_sub).transpose().corr().values.to_numpy()
net = np.nan_to_num(net, nan=0.0, posinf=0.0, neginf=0.0)
elif method == "spearman":
net = spearmanr(X_sub).statistic
@@ -41,7 +45,7 @@ def create_corr_net(X: np.ndarray, groups: np.ndarray, method="pearson"):
net = net.reset_index().melt(id_vars='index', var_name='source', value_name='weight')
net.rename(columns={'index': 'target'}, inplace=True)
- net = select_top_links(net, par)
+ net = process_links(net, par)
net['cell_type'] = group
if i==0:
grn = net
@@ -53,8 +57,7 @@ def create_corr_net(X: np.ndarray, groups: np.ndarray, method="pearson"):
if par['cell_type_specific']==False:
grn.drop(columns=['cell_type'], inplace=True)
grn = grn.groupby(['source', 'target']).mean().reset_index()
- net = select_top_links(net, par)
-
+ grn = process_links(grn, par)
return grn
print('Read data')
multiomics_rna = ad.read_h5ad(par["multiomics_rna"])
diff --git a/src/control_methods/positive_control/config.vsh.yaml b/src/control_methods/positive_control/config.vsh.yaml
index 5a3c237c7..38031acb4 100644
--- a/src/control_methods/positive_control/config.vsh.yaml
+++ b/src/control_methods/positive_control/config.vsh.yaml
@@ -13,6 +13,11 @@ functionality:
required: true
direction: input
example: resources_test/grn-benchmark/perturbation_data.h5ad
+ - name: --layer
+ type: string
+ direction: input
+ required: false
+ default: scgen_pearson
resources:
- type: python_script
diff --git a/src/control_methods/positive_control/script.py b/src/control_methods/positive_control/script.py
index 8f1b715ce..c3457a699 100644
--- a/src/control_methods/positive_control/script.py
+++ b/src/control_methods/positive_control/script.py
@@ -42,6 +42,14 @@ def create_positive_control(X: np.ndarray, groups: np.ndarray):
pivoted_net = pivoted_net.rename(columns={'index': 'target'})
pivoted_net = pivoted_net[pivoted_net['weight'] != 0]
+
+
+def process_links(net, par):
+ net = net[net.source!=net.target]
+ net_sorted = net.reindex(net['weight'].abs().sort_values(ascending=False).index)
+ net = net_sorted.head(par['max_n_links']).reset_index(drop=True)
+ return net
+pivoted_net = process_links(pivoted_net, par)
print('Saving')
pivoted_net.to_csv(par["prediction"])
diff --git a/src/exp_analysis/test.sh b/src/exp_analysis/test.sh
index 67cba6c60..36dbed0be 100644
--- a/src/exp_analysis/test.sh
+++ b/src/exp_analysis/test.sh
@@ -1,4 +1,4 @@
viash run src/exp_analysis/config.vsh.yaml -- \
--perturbation_data resources/grn-benchmark/perturbation_data.h5ad \
- --prediction output/baseline_corr.csv \
+ --prediction resources/grn_models/genie3.csv \
diff --git a/src/workflows/run_grn_evaluation/config.vsh.yaml b/src/workflows/run_grn_evaluation/config.vsh.yaml
index 9dd8b6c40..26259f5c1 100644
--- a/src/workflows/run_grn_evaluation/config.vsh.yaml
+++ b/src/workflows/run_grn_evaluation/config.vsh.yaml
@@ -49,30 +49,30 @@ functionality:
required: false
direction: input
default: resources/prior/consensus.json
- - name: --causal
- type: boolean
- required: false
- direction: input
- - name: --corr_method
- type: string
- required: false
- direction: input
- default: pearson
- - name: --cell_type_specific
- type: boolean
- required: false
- direction: input
- default: false
- - name: --metacell
- type: boolean
- required: false
- direction: input
- default: false
- - name: --impute
- type: boolean
- required: false
- direction: input
- default: false
+ # - name: --causal
+ # type: boolean
+ # required: false
+ # direction: input
+ # - name: --corr_method
+ # type: string
+ # required: false
+ # direction: input
+ # default: pearson
+ # - name: --cell_type_specific
+ # type: boolean
+ # required: false
+ # direction: input
+ # default: false
+ # - name: --metacell
+ # type: boolean
+ # required: false
+ # direction: input
+ # default: false
+ # - name: --impute
+ # type: boolean
+ # required: false
+ # direction: input
+ # default: false
- name: Outputs
arguments:
diff --git a/src/workflows/run_grn_evaluation/main.nf b/src/workflows/run_grn_evaluation/main.nf
index 0ed83daf6..928051386 100644
--- a/src/workflows/run_grn_evaluation/main.nf
+++ b/src/workflows/run_grn_evaluation/main.nf
@@ -26,22 +26,22 @@ workflow run_wf {
[id, state + ["_meta": [join_id: id]]]
}
- | positive_control.run(
- runIf: { id, state ->
- state.method_id == 'positive_control'
- },
- fromState: [
- perturbation_data: "perturbation_data",
- multiomics_rna: "multiomics_rna",
- layer: "layer",
- tf_all: "tf_all"
- ],
- toState: {id, output, state ->
- state + [
- prediction: output.prediction
- ]
- }
- )
+ // | positive_control.run(
+ // runIf: { id, state ->
+ // state.method_id == 'positive_control'
+ // },
+ // fromState: [
+ // perturbation_data: "perturbation_data",
+ // multiomics_rna: "multiomics_rna",
+ // layer: "layer",
+ // tf_all: "tf_all"
+ // ],
+ // toState: {id, output, state ->
+ // state + [
+ // prediction: output.prediction
+ // ]
+ // }
+ // )
| baseline_corr.run(
runIf: { id, state ->
state.method_id == 'baseline_pearson'
@@ -63,128 +63,22 @@ workflow run_wf {
]
}
)
- | baseline_corr.run(
- runIf: { id, state ->
- state.method_id == 'baseline_dotproduct'
- },
- fromState: [
- multiomics_rna: "multiomics_rna",
- layer: "layer",
- tf_all: "tf_all",
- causal: "causal",
- corr_method: "corr_method",
- cell_type_specific: "cell_type_specific",
- metacell: "metacell",
- impute: "impute"
-
- ],
- toState: {id, output, state ->
- state + [
- prediction: output.prediction
- ]
- }
- )
- | baseline_corr.run(
- runIf: { id, state ->
- state.method_id == 'baseline_dotproduct_causal'
- },
- fromState: [
- multiomics_rna: "multiomics_rna",
- layer: "layer",
- tf_all: "tf_all",
- causal: "causal",
- corr_method: "corr_method",
- cell_type_specific: "cell_type_specific",
- metacell: "metacell",
- impute: "impute"
-
- ],
- toState: {id, output, state ->
- state + [
- prediction: output.prediction
- ]
- }
- )
- | baseline_corr.run(
- runIf: { id, state ->
- state.method_id == 'baseline_dotproduct_causal_cell_type'
- },
- fromState: [
- multiomics_rna: "multiomics_rna",
- layer: "layer",
- tf_all: "tf_all",
- causal: "causal",
- corr_method: "corr_method",
- cell_type_specific: "cell_type_specific",
- metacell: "metacell",
- impute: "impute"
-
- ],
- toState: {id, output, state ->
- state + [
- prediction: output.prediction
- ]
- }
- )
- | baseline_corr.run(
- runIf: { id, state ->
- state.method_id == 'baseline_dotproduct_causal_metacell'
- },
- fromState: [
- multiomics_rna: "multiomics_rna",
- layer: "layer",
- tf_all: "tf_all",
- causal: "causal",
- corr_method: "corr_method",
- cell_type_specific: "cell_type_specific",
- metacell: "metacell",
- impute: "impute"
-
- ],
- toState: {id, output, state ->
- state + [
- prediction: output.prediction
- ]
- }
- )
- | baseline_corr.run(
- runIf: { id, state ->
- state.method_id == 'baseline_dotproduct_causal_impute'
- },
- fromState: [
- multiomics_rna: "multiomics_rna",
- layer: "layer",
- tf_all: "tf_all",
- causal: "causal",
- corr_method: "corr_method",
- cell_type_specific: "cell_type_specific",
- metacell: "metacell",
- impute: "impute"
-
- ],
- toState: {id, output, state ->
- state + [
- prediction: output.prediction
- ]
- }
- )
-
- | negative_control.run(
- runIf: { id, state ->
- state.method_id == 'negative_control'
- },
- fromState: [
- perturbation_data: "perturbation_data",
- multiomics_rna: "multiomics_rna",
- tf_all: "tf_all",
- ],
- toState: {id, output, state ->
- state + [
- prediction: output.prediction
- ]
- }
- )
+ // | negative_control.run(
+ // runIf: { id, state ->
+ // state.method_id == 'negative_control'
+ // },
+ // fromState: [
+ // perturbation_data: "perturbation_data",
+ // multiomics_rna: "multiomics_rna",
+ // tf_all: "tf_all",
+ // ],
+ // toState: {id, output, state ->
+ // state + [
+ // prediction: output.prediction
+ // ]
+ // }
+ // )
// run all metrics
| runEach(
diff --git a/test.sh b/test.sh
index b4665d8d6..fde2fae79 100644
--- a/test.sh
+++ b/test.sh
@@ -1,16 +1,17 @@
-viash run src/control_methods/baseline_corr/config.vsh.yaml -- --multiomics_rna resources/grn-benchmark/multiomics_rna.h5ad \
- --tf_all resources/prior/tf_all.csv \
- --causal true \
- --prediction output/baseline_causal.csv
+# viash run src/control_methods/baseline_corr/config.vsh.yaml -- --multiomics_rna resources/grn-benchmark/multiomics_rna.h5ad \
+# --tf_all resources/prior/tf_all.csv \
+# --causal true \
+# --prediction output/baseline_causal.csv
-viash run src/metrics/regression_1/config.vsh.yaml -- --perturbation_data resources/grn-benchmark/perturbation_data.h5ad \
- --tf_all resources/prior/tf_all.csv \
- --prediction output/baseline_causal.csv \
- --score output/score_causal.h5ad
+# viash run src/metrics/regression_1/config.vsh.yaml -- --perturbation_data resources/grn-benchmark/perturbation_data.h5ad \
+# --tf_all resources/prior/tf_all.csv \
+# --prediction output/baseline_causal.csv \
+# --score output/score_causal.h5ad
viash run src/control_methods/baseline_corr/config.vsh.yaml -- --multiomics_rna resources/grn-benchmark/multiomics_rna.h5ad \
--tf_all resources/prior/tf_all.csv \
--causal false \
+ --corr_method pearson \
--prediction output/baseline_noncausal.csv
viash run src/metrics/regression_1/config.vsh.yaml -- --perturbation_data resources/grn-benchmark/perturbation_data.h5ad \