bug in workflow run grn evaluation fixed

openproblems-bio · Aug 11, 2024 · ba24f2e · ba24f2e
1 parent 1a657f7
commit ba24f2e
Show file tree

Hide file tree

Showing 12 changed files with 323 additions and 33 deletions.
diff --git a/params/celloracle_test.yaml b/params/celloracle_test.yaml
@@ -5,4 +5,4 @@ param_list:
     num_workers: 20
     temp_dir: ./tmp/celloracle
 output_state: "state.yaml"
-publish_dir: "s3://openproblems-data/resources/grn/results/celloracle_test"
+publish_dir: "./output/celloracle_test"
diff --git a/params/process_perturbation.yaml b/params/process_perturbation.yaml
@@ -0,0 +1,6 @@
+param_list:
+  - id: test_process_perturatbion
+    perturbation_counts: s3://openproblems-data/resources/grn/datasets_raw/perturbation_counts.h5ad,
+
+output_state: "state.yaml"
+publish_dir: "s3://openproblems-data/resources/grn/results/process_perturbation"
diff --git a/params/subsample_200_ridge.yaml b/params/subsample_200_ridge.yaml
@@ -0,0 +1,273 @@
+param_list:
+  - id: pearson_celloracle
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: pearson
+    prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv 
+    reg_type: ridge
+    method_id: celloracle
+    subsample: 200
+    max_workers: 20
+
+  - id: lognorm_celloracle
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: lognorm
+    prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv 
+    reg_type: ridge
+    method_id: celloracle
+    subsample: 200
+    max_workers: 20
+
+  - id: scgen_pearson_celloracle
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: scgen_pearson
+    prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv 
+    reg_type: ridge
+    method_id: celloracle
+    subsample: 200
+    max_workers: 20
+
+  - id: scgen_lognorm_celloracle
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: scgen_lognorm
+    prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv 
+    reg_type: ridge
+    method_id: celloracle
+    subsample: 200
+    max_workers: 20
+
+  - id: seurat_pearson_celloracle
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: seurat_pearson
+    prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv 
+    reg_type: ridge
+    method_id: celloracle
+    subsample: 200
+    max_workers: 20
+
+  - id: seurat_lognorm_celloracle
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: seurat_lognorm
+    prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv 
+    reg_type: ridge
+    method_id: celloracle
+    subsample: 200
+    max_workers: 20
+
+  - id: pearson_scenicplus
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: pearson
+    prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv 
+    reg_type: ridge
+    method_id: scenicplus
+    subsample: 200
+    max_workers: 20
+
+  - id: lognorm_scenicplus
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: lognorm
+    prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv 
+    reg_type: ridge
+    method_id: scenicplus
+    subsample: 200
+    max_workers: 20
+
+  - id: scgen_pearson_scenicplus
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: scgen_pearson
+    prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv 
+    reg_type: ridge
+    method_id: scenicplus
+    subsample: 200
+    max_workers: 20
+
+  - id: scgen_lognorm_scenicplus
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: scgen_lognorm
+    prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv 
+    reg_type: ridge
+    method_id: scenicplus
+    subsample: 200
+    max_workers: 20
+
+  - id: seurat_pearson_scenicplus
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: seurat_pearson
+    prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv 
+    reg_type: ridge
+    method_id: scenicplus
+    subsample: 200
+    max_workers: 20
+
+  - id: seurat_lognorm_scenicplus
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: seurat_lognorm
+    prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv 
+    reg_type: ridge
+    method_id: scenicplus
+    subsample: 200
+    max_workers: 20
+
+  - id: pearson_figr
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: pearson
+    prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv 
+    reg_type: ridge
+    method_id: figr
+    subsample: 200
+    max_workers: 20
+
+  - id: lognorm_figr
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: lognorm
+    prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv 
+    reg_type: ridge
+    method_id: figr
+    subsample: 200
+    max_workers: 20
+
+  - id: scgen_pearson_figr
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: scgen_pearson
+    prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv 
+    reg_type: ridge
+    method_id: figr
+    subsample: 200
+    max_workers: 20
+
+  - id: scgen_lognorm_figr
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: scgen_lognorm
+    prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv 
+    reg_type: ridge
+    method_id: figr
+    subsample: 200
+    max_workers: 20
+
+  - id: seurat_pearson_figr
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: seurat_pearson
+    prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv 
+    reg_type: ridge
+    method_id: figr
+    subsample: 200
+    max_workers: 20
+
+  - id: seurat_lognorm_figr
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: seurat_lognorm
+    prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv 
+    reg_type: ridge
+    method_id: figr
+    subsample: 200
+    max_workers: 20
+
+  - id: pearson_granie
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: pearson
+    prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv 
+    reg_type: ridge
+    method_id: granie
+    subsample: 200
+    max_workers: 20
+
+  - id: lognorm_granie
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: lognorm
+    prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv 
+    reg_type: ridge
+    method_id: granie
+    subsample: 200
+    max_workers: 20
+
+  - id: scgen_pearson_granie
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: scgen_pearson
+    prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv 
+    reg_type: ridge
+    method_id: granie
+    subsample: 200
+    max_workers: 20
+
+  - id: scgen_lognorm_granie
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: scgen_lognorm
+    prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv 
+    reg_type: ridge
+    method_id: granie
+    subsample: 200
+    max_workers: 20
+
+  - id: seurat_pearson_granie
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: seurat_pearson
+    prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv 
+    reg_type: ridge
+    method_id: granie
+    subsample: 200
+    max_workers: 20
+
+  - id: seurat_lognorm_granie
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: seurat_lognorm
+    prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv 
+    reg_type: ridge
+    method_id: granie
+    subsample: 200
+    max_workers: 20
+
+  - id: pearson_scglue
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: pearson
+    prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv 
+    reg_type: ridge
+    method_id: scglue
+    subsample: 200
+    max_workers: 20
+
+  - id: lognorm_scglue
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: lognorm
+    prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv 
+    reg_type: ridge
+    method_id: scglue
+    subsample: 200
+    max_workers: 20
+
+  - id: scgen_pearson_scglue
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: scgen_pearson
+    prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv 
+    reg_type: ridge
+    method_id: scglue
+    subsample: 200
+    max_workers: 20
+
+  - id: scgen_lognorm_scglue
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: scgen_lognorm
+    prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv 
+    reg_type: ridge
+    method_id: scglue
+    subsample: 200
+    max_workers: 20
+
+  - id: seurat_pearson_scglue
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: seurat_pearson
+    prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv 
+    reg_type: ridge
+    method_id: scglue
+    subsample: 200
+    max_workers: 20
+
+  - id: seurat_lognorm_scglue
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: seurat_lognorm
+    prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv 
+    reg_type: ridge
+    method_id: scglue
+    subsample: 200
+    max_workers: 20
+
+output_state: "state.yaml"
+publish_dir: "s3://openproblems-data/resources/grn/results/subsample_200_ridge"
diff --git a/scripts/run_grn_evaluation_tw.sh b/scripts/run_grn_evaluation_tw.sh
@@ -4,14 +4,15 @@
 
 
 RUN_ID="subsample_200_ridge"
-resources_dir="s3://openproblems-data/resources/grn/"
+resources_dir="s3://openproblems-data/resources/grn"
 # resources_dir="resources/"
-
 publish_dir="s3://openproblems-data/resources/grn/results/${RUN_ID}"
 reg_type=ridge
 subsample=200
 max_workers=20
 
+param_file="./params/${RUN_ID}.yaml"
+
 grn_names=(
     "celloracle"
     "scenicplus"
@@ -22,14 +23,14 @@ grn_names=(
 layers=("pearson" "lognorm" "scgen_pearson" "scgen_lognorm" "seurat_pearson" "seurat_lognorm")
 
 # Start writing to the YAML file
-cat > ./params/params_${RUN_ID}.yaml << HERE
+cat > $param_file << HERE
 param_list:
 HERE
 
 # Nested loops to iterate over grn_names and layers
 for grn_name in "${grn_names[@]}"; do
   for layer in "${layers[@]}"; do
-    cat >> ./params/params_${RUN_ID}.yaml << HERE
+    cat >> $param_file << HERE
   - id: ${layer}_${grn_name}
     perturbation_data: ${resources_dir}/grn-benchmark/perturbation_data.h5ad
     layer: ${layer}
@@ -46,7 +47,7 @@ done
 
 # append negative control
 # grn_name="negative_control"
-# cat >> ./params/params_${RUN_ID}.yaml << HERE
+# cat >> $param_file << HERE
 #   - id: ${layer}_${grn_name}
 #     perturbation_data: ${perturbation_data}
 #     layer: ${layer}
@@ -61,7 +62,7 @@ done
 # # append the positive controls
 # grn_name="positive_control"
 # for layer in "${layers[@]}"; do
-#   cat >> ./params/params_${RUN_ID}.yaml << HERE
+#   cat >> $param_file << HERE
 #   - id: ${layer}_${grn_name}
 #     perturbation_data: ${perturbation_data}
 #     layer: ${layer}
@@ -75,18 +76,25 @@ done
 # done
 
 # Append the remaining output_state and publish_dir to the YAML file
-cat >> ./params/params_${RUN_ID}.yaml << HERE
+cat >> $param_file << HERE
 output_state: "state.yaml"
 publish_dir: "$publish_dir"
 HERE
 
 
-# nextflow run . \
-#   -main-script  target/nextflow/workflows/run_grn_evaluation/main.nf \
-#   -profile docker \
-#   -with-trace \
-#   -c src/common/nextflow_helpers/labels_ci.config \
-#    -params-file ./params/params_${RUN_ID}.yaml
-
-
-# ./tw-windows-x86_64.exe launch https://github.com/openproblems-bio/task_grn_benchmark.git --revision build/main --pull-latest --main-script target/nextflow/workflows/run_grn_evaluation/main.nf --workspace 53907369739130 --compute-env 6TeIFgV5OY4pJCk8I0bfOh --params-file ./params/params.yaml --config src/common/nextflow_helpers/labels_tw.config
+nextflow run . \
+  -main-script  target/nextflow/workflows/run_grn_evaluation/main.nf \
+  -profile docker \
+  -with-trace \
+  -c src/common/nextflow_helpers/labels_ci.config \
+   -params-file ${param_file}
+
+# ./tw-windows-x86_64.exe launch `
+#     https://github.com/openproblems-bio/task_grn_benchmark.git `
+#     --revision build/main `
+#     --pull-latest `
+#     --main-script target/nextflow/workflows/run_grn_evaluation/main.nf `
+#     --workspace 53907369739130 `
+#     --compute-env 6TeIFgV5OY4pJCk8I0bfOh `
+#     --params-file ./params/subsample_200_ridge.yaml `
+#     --config src/common/nextflow_helpers/labels_tw.config
diff --git a/scripts/run_grn_inference.sh b/scripts/run_grn_inference.sh
@@ -5,8 +5,10 @@
 
 RUN_ID="celloracle_test"
 # resources_dir="s3://openproblems-data/resources_test/grn/"
+# publish_dir="s3://openproblems-data/resources/grn/results/${RUN_ID}"
+
 resources_dir="./resources_test"
-publish_dir="s3://openproblems-data/resources/grn/results/${RUN_ID}"
+publish_dir="./output/${RUN_ID}"
 num_workers=20