Skip to content

Commit

Permalink
bug in workflow run grn evaluation fixed
Browse files Browse the repository at this point in the history
  • Loading branch information
janursa committed Aug 11, 2024
1 parent 1a657f7 commit ba24f2e
Show file tree
Hide file tree
Showing 12 changed files with 323 additions and 33 deletions.
2 changes: 1 addition & 1 deletion params/celloracle_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ param_list:
num_workers: 20
temp_dir: ./tmp/celloracle
output_state: "state.yaml"
publish_dir: "s3://openproblems-data/resources/grn/results/celloracle_test"
publish_dir: "./output/celloracle_test"
6 changes: 6 additions & 0 deletions params/process_perturbation.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
param_list:
- id: test_process_perturatbion
perturbation_counts: s3://openproblems-data/resources/grn/datasets_raw/perturbation_counts.h5ad,

output_state: "state.yaml"
publish_dir: "s3://openproblems-data/resources/grn/results/process_perturbation"
273 changes: 273 additions & 0 deletions params/subsample_200_ridge.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,273 @@
param_list:
- id: pearson_celloracle
perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
layer: pearson
prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv
reg_type: ridge
method_id: celloracle
subsample: 200
max_workers: 20

- id: lognorm_celloracle
perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
layer: lognorm
prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv
reg_type: ridge
method_id: celloracle
subsample: 200
max_workers: 20

- id: scgen_pearson_celloracle
perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
layer: scgen_pearson
prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv
reg_type: ridge
method_id: celloracle
subsample: 200
max_workers: 20

- id: scgen_lognorm_celloracle
perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
layer: scgen_lognorm
prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv
reg_type: ridge
method_id: celloracle
subsample: 200
max_workers: 20

- id: seurat_pearson_celloracle
perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
layer: seurat_pearson
prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv
reg_type: ridge
method_id: celloracle
subsample: 200
max_workers: 20

- id: seurat_lognorm_celloracle
perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
layer: seurat_lognorm
prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv
reg_type: ridge
method_id: celloracle
subsample: 200
max_workers: 20

- id: pearson_scenicplus
perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
layer: pearson
prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv
reg_type: ridge
method_id: scenicplus
subsample: 200
max_workers: 20

- id: lognorm_scenicplus
perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
layer: lognorm
prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv
reg_type: ridge
method_id: scenicplus
subsample: 200
max_workers: 20

- id: scgen_pearson_scenicplus
perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
layer: scgen_pearson
prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv
reg_type: ridge
method_id: scenicplus
subsample: 200
max_workers: 20

- id: scgen_lognorm_scenicplus
perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
layer: scgen_lognorm
prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv
reg_type: ridge
method_id: scenicplus
subsample: 200
max_workers: 20

- id: seurat_pearson_scenicplus
perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
layer: seurat_pearson
prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv
reg_type: ridge
method_id: scenicplus
subsample: 200
max_workers: 20

- id: seurat_lognorm_scenicplus
perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
layer: seurat_lognorm
prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv
reg_type: ridge
method_id: scenicplus
subsample: 200
max_workers: 20

- id: pearson_figr
perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
layer: pearson
prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv
reg_type: ridge
method_id: figr
subsample: 200
max_workers: 20

- id: lognorm_figr
perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
layer: lognorm
prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv
reg_type: ridge
method_id: figr
subsample: 200
max_workers: 20

- id: scgen_pearson_figr
perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
layer: scgen_pearson
prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv
reg_type: ridge
method_id: figr
subsample: 200
max_workers: 20

- id: scgen_lognorm_figr
perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
layer: scgen_lognorm
prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv
reg_type: ridge
method_id: figr
subsample: 200
max_workers: 20

- id: seurat_pearson_figr
perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
layer: seurat_pearson
prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv
reg_type: ridge
method_id: figr
subsample: 200
max_workers: 20

- id: seurat_lognorm_figr
perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
layer: seurat_lognorm
prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv
reg_type: ridge
method_id: figr
subsample: 200
max_workers: 20

- id: pearson_granie
perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
layer: pearson
prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv
reg_type: ridge
method_id: granie
subsample: 200
max_workers: 20

- id: lognorm_granie
perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
layer: lognorm
prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv
reg_type: ridge
method_id: granie
subsample: 200
max_workers: 20

- id: scgen_pearson_granie
perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
layer: scgen_pearson
prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv
reg_type: ridge
method_id: granie
subsample: 200
max_workers: 20

- id: scgen_lognorm_granie
perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
layer: scgen_lognorm
prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv
reg_type: ridge
method_id: granie
subsample: 200
max_workers: 20

- id: seurat_pearson_granie
perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
layer: seurat_pearson
prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv
reg_type: ridge
method_id: granie
subsample: 200
max_workers: 20

- id: seurat_lognorm_granie
perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
layer: seurat_lognorm
prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv
reg_type: ridge
method_id: granie
subsample: 200
max_workers: 20

- id: pearson_scglue
perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
layer: pearson
prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv
reg_type: ridge
method_id: scglue
subsample: 200
max_workers: 20

- id: lognorm_scglue
perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
layer: lognorm
prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv
reg_type: ridge
method_id: scglue
subsample: 200
max_workers: 20

- id: scgen_pearson_scglue
perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
layer: scgen_pearson
prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv
reg_type: ridge
method_id: scglue
subsample: 200
max_workers: 20

- id: scgen_lognorm_scglue
perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
layer: scgen_lognorm
prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv
reg_type: ridge
method_id: scglue
subsample: 200
max_workers: 20

- id: seurat_pearson_scglue
perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
layer: seurat_pearson
prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv
reg_type: ridge
method_id: scglue
subsample: 200
max_workers: 20

- id: seurat_lognorm_scglue
perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
layer: seurat_lognorm
prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv
reg_type: ridge
method_id: scglue
subsample: 200
max_workers: 20

output_state: "state.yaml"
publish_dir: "s3://openproblems-data/resources/grn/results/subsample_200_ridge"
40 changes: 24 additions & 16 deletions scripts/run_grn_evaluation_tw.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@


RUN_ID="subsample_200_ridge"
resources_dir="s3://openproblems-data/resources/grn/"
resources_dir="s3://openproblems-data/resources/grn"
# resources_dir="resources/"

publish_dir="s3://openproblems-data/resources/grn/results/${RUN_ID}"
reg_type=ridge
subsample=200
max_workers=20

param_file="./params/${RUN_ID}.yaml"

grn_names=(
"celloracle"
"scenicplus"
Expand All @@ -22,14 +23,14 @@ grn_names=(
layers=("pearson" "lognorm" "scgen_pearson" "scgen_lognorm" "seurat_pearson" "seurat_lognorm")

# Start writing to the YAML file
cat > ./params/params_${RUN_ID}.yaml << HERE
cat > $param_file << HERE
param_list:
HERE

# Nested loops to iterate over grn_names and layers
for grn_name in "${grn_names[@]}"; do
for layer in "${layers[@]}"; do
cat >> ./params/params_${RUN_ID}.yaml << HERE
cat >> $param_file << HERE
- id: ${layer}_${grn_name}
perturbation_data: ${resources_dir}/grn-benchmark/perturbation_data.h5ad
layer: ${layer}
Expand All @@ -46,7 +47,7 @@ done

# append negative control
# grn_name="negative_control"
# cat >> ./params/params_${RUN_ID}.yaml << HERE
# cat >> $param_file << HERE
# - id: ${layer}_${grn_name}
# perturbation_data: ${perturbation_data}
# layer: ${layer}
Expand All @@ -61,7 +62,7 @@ done
# # append the positive controls
# grn_name="positive_control"
# for layer in "${layers[@]}"; do
# cat >> ./params/params_${RUN_ID}.yaml << HERE
# cat >> $param_file << HERE
# - id: ${layer}_${grn_name}
# perturbation_data: ${perturbation_data}
# layer: ${layer}
Expand All @@ -75,18 +76,25 @@ done
# done

# Append the remaining output_state and publish_dir to the YAML file
cat >> ./params/params_${RUN_ID}.yaml << HERE
cat >> $param_file << HERE
output_state: "state.yaml"
publish_dir: "$publish_dir"
HERE


# nextflow run . \
# -main-script target/nextflow/workflows/run_grn_evaluation/main.nf \
# -profile docker \
# -with-trace \
# -c src/common/nextflow_helpers/labels_ci.config \
# -params-file ./params/params_${RUN_ID}.yaml


# ./tw-windows-x86_64.exe launch https://github.com/openproblems-bio/task_grn_benchmark.git --revision build/main --pull-latest --main-script target/nextflow/workflows/run_grn_evaluation/main.nf --workspace 53907369739130 --compute-env 6TeIFgV5OY4pJCk8I0bfOh --params-file ./params/params.yaml --config src/common/nextflow_helpers/labels_tw.config
nextflow run . \
-main-script target/nextflow/workflows/run_grn_evaluation/main.nf \
-profile docker \
-with-trace \
-c src/common/nextflow_helpers/labels_ci.config \
-params-file ${param_file}

# ./tw-windows-x86_64.exe launch `
# https://github.com/openproblems-bio/task_grn_benchmark.git `
# --revision build/main `
# --pull-latest `
# --main-script target/nextflow/workflows/run_grn_evaluation/main.nf `
# --workspace 53907369739130 `
# --compute-env 6TeIFgV5OY4pJCk8I0bfOh `
# --params-file ./params/subsample_200_ridge.yaml `
# --config src/common/nextflow_helpers/labels_tw.config
4 changes: 3 additions & 1 deletion scripts/run_grn_inference.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@

RUN_ID="celloracle_test"
# resources_dir="s3://openproblems-data/resources_test/grn/"
# publish_dir="s3://openproblems-data/resources/grn/results/${RUN_ID}"

resources_dir="./resources_test"
publish_dir="s3://openproblems-data/resources/grn/results/${RUN_ID}"
publish_dir="./output/${RUN_ID}"
num_workers=20


Expand Down
Loading

0 comments on commit ba24f2e

Please sign in to comment.