From b5a6f0adfe22c99bac27bab6b06b54852b82dec3 Mon Sep 17 00:00:00 2001 From: jalil Date: Fri, 13 Sep 2024 21:38:43 +0200 Subject: [PATCH] onedaytime try --- scripts/run_baselines.sh | 92 ++++++++++--------- scripts/run_benchmark_single_omics.sh | 2 +- src/control_methods/baseline_corr/script.py | 30 ++---- src/exp_analysis/test.sh | 2 +- .../single_omics/ennet/config.vsh.yaml | 2 +- src/methods/single_omics/pidc/config.vsh.yaml | 2 +- .../single_omics/scsgl/config.vsh.yaml | 4 +- .../single_omics/tigress/config.vsh.yaml | 2 +- src/metrics/regression_1/test.sh | 2 +- .../config.vsh.yaml | 2 +- 10 files changed, 66 insertions(+), 74 deletions(-) diff --git a/scripts/run_baselines.sh b/scripts/run_baselines.sh index 6108cc35e..d6c7a69a4 100644 --- a/scripts/run_baselines.sh +++ b/scripts/run_baselines.sh @@ -1,53 +1,63 @@ -echo "negative control" -viash run src/control_methods/negative_control/config.vsh.yaml -- --multiomics_rna resources/grn-benchmark/multiomics_rna.h5ad \ - --perturbation_data resources/grn-benchmark/perturbation_data.h5ad \ - --tf_all resources/prior/tf_all.csv \ - --prediction resources/grn_models/baselines/negative_control.csv +# echo "negative control" +# viash run src/control_methods/negative_control/config.vsh.yaml -- --multiomics_rna resources/grn-benchmark/multiomics_rna.h5ad \ +# --perturbation_data resources/grn-benchmark/perturbation_data.h5ad \ +# --tf_all resources/prior/tf_all.csv \ +# --prediction resources/grn_models/baselines/negative_control.csv -echo "baseline pearson" -viash run src/control_methods/baseline_corr/config.vsh.yaml -- --multiomics_rna resources/grn-benchmark/multiomics_rna.h5ad \ - --tf_all resources/prior/tf_all.csv \ - --causal false \ - --corr_method pearson \ - --cell_type_specific false \ - --metacell false \ - --impute false \ - --prediction resources/grn_models/baselines/baseline_pearson.csv +# echo "baseline pearson" +# viash run src/control_methods/baseline_corr/config.vsh.yaml -- --multiomics_rna resources/grn-benchmark/multiomics_rna.h5ad \ +# --tf_all resources/prior/tf_all.csv \ +# --causal false \ +# --corr_method pearson \ +# --cell_type_specific false \ +# --metacell false \ +# --impute false \ +# --prediction resources/grn_models/baselines/baseline_pearson.csv -echo "baseline pearson causal" -viash run src/control_methods/baseline_corr/config.vsh.yaml -- --multiomics_rna resources/grn-benchmark/multiomics_rna.h5ad \ - --tf_all resources/prior/tf_all.csv \ - --causal true \ - --corr_method pearson \ - --cell_type_specific false \ - --metacell false \ - --impute false \ - --prediction resources/grn_models/baselines/baseline_pearson_causal.csv +# echo "baseline pearson causal" +# viash run src/control_methods/baseline_corr/config.vsh.yaml -- --multiomics_rna resources/grn-benchmark/multiomics_rna.h5ad \ +# --tf_all resources/prior/tf_all.csv \ +# --causal true \ +# --corr_method pearson \ +# --cell_type_specific false \ +# --metacell false \ +# --impute false \ +# --prediction resources/grn_models/baselines/baseline_pearson_causal.csv -echo "baseline causal cell type" -viash run src/control_methods/baseline_corr/config.vsh.yaml -- --multiomics_rna resources/grn-benchmark/multiomics_rna.h5ad \ - --tf_all resources/prior/tf_all.csv \ - --causal true \ - --corr_method pearson \ - --cell_type_specific true \ - --metacell false \ - --impute false \ - --prediction resources/grn_models/baselines/baseline_pearson_causal_celltype.csv +# echo "baseline causal cell type" +# viash run src/control_methods/baseline_corr/config.vsh.yaml -- --multiomics_rna resources/grn-benchmark/multiomics_rna.h5ad \ +# --tf_all resources/prior/tf_all.csv \ +# --causal true \ +# --corr_method pearson \ +# --cell_type_specific true \ +# --metacell false \ +# --impute false \ +# --prediction resources/grn_models/baselines/baseline_pearson_causal_celltype.csv + +# echo "baseline pearson causal metacell" +# viash run src/control_methods/baseline_corr/config.vsh.yaml -- --multiomics_rna resources/grn-benchmark/multiomics_rna.h5ad \ +# --tf_all resources/prior/tf_all.csv \ +# --causal true \ +# --corr_method pearson \ +# --cell_type_specific false \ +# --metacell true \ +# --impute false \ +# --prediction resources/grn_models/baselines/baseline_pearson_causal_metacell.csv -echo "baseline pearson causal metacell" +echo "baseline pearson causal imputation" viash run src/control_methods/baseline_corr/config.vsh.yaml -- --multiomics_rna resources/grn-benchmark/multiomics_rna.h5ad \ --tf_all resources/prior/tf_all.csv \ --causal true \ --corr_method pearson \ --cell_type_specific false \ - --metacell true \ - --impute false \ - --prediction resources/grn_models/baselines/baseline_pearson_causal_metacell.csv + --metacell false \ + --impute true \ + --prediction resources/grn_models/baselines/baseline_pearson_causal_impute.csv -echo "positive control" -viash run src/control_methods/positive_control/config.vsh.yaml -- --multiomics_rna resources/grn-benchmark/multiomics_rna.h5ad \ - --perturbation_data resources/grn-benchmark/perturbation_data.h5ad \ - --tf_all resources/prior/tf_all.csv \ - --prediction resources/grn_models/baselines/positive_control.csv \ No newline at end of file +# echo "positive control" +# viash run src/control_methods/positive_control/config.vsh.yaml -- --multiomics_rna resources/grn-benchmark/multiomics_rna.h5ad \ +# --perturbation_data resources/grn-benchmark/perturbation_data.h5ad \ +# --tf_all resources/prior/tf_all.csv \ +# --prediction resources/grn_models/baselines/positive_control.csv \ No newline at end of file diff --git a/scripts/run_benchmark_single_omics.sh b/scripts/run_benchmark_single_omics.sh index b13dbb62c..3b8e39d51 100644 --- a/scripts/run_benchmark_single_omics.sh +++ b/scripts/run_benchmark_single_omics.sh @@ -3,7 +3,7 @@ # RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)" RUN_ID="single_omics_inference" # resources_dir="./resources_test/" -resources_dir="s3://openproblems-data/resources/grn" +resources_dir="s3://openproblems-data/resources_test/grn" publish_dir="${resources_dir}/results/${RUN_ID}" diff --git a/src/control_methods/baseline_corr/script.py b/src/control_methods/baseline_corr/script.py index 258d04924..0be6e540f 100644 --- a/src/control_methods/baseline_corr/script.py +++ b/src/control_methods/baseline_corr/script.py @@ -93,34 +93,16 @@ def create_meta_cells(df, n_cells=15): if par['impute']: print("imputing") - # import magic - # import scprep + import magic + import scprep - # magic_operator = magic.MAGIC() + magic_operator = magic.MAGIC() - # multiomics_rna = magic_operator.fit_transform(multiomics_rna) - from sklearn.impute import KNNImputer - import numpy as np - - print("Imputing with KNN") - - # Convert to dense if the matrix is sparse - if sc.sparse.issparse(multiomics_rna.X): - multiomics_rna_dense = multiomics_rna.X.toarray() - else: - multiomics_rna_dense = multiomics_rna.X - - # Apply KNN imputation - knn_imputer = KNNImputer(n_neighbors=5) # You can adjust the number of neighbors - multiomics_rna_imputed = knn_imputer.fit_transform(multiomics_rna_dense) - - # Update the AnnData object with the imputed values - multiomics_rna.X = multiomics_rna_imputed - print('zero ration: ', (multiomics_rna.X==0).sum()/multiomics_rna.size) + multiomics_rna = magic_operator.fit_transform(multiomics_rna) + + print('zero ration: ', (multiomics_rna.X==0).sum()/multiomics_rna.X.size) print('Create corr net') net = create_corr_net(multiomics_rna.X, groups, par['corr_method']) - - print('Output GRN') net.to_csv(par['prediction']) diff --git a/src/exp_analysis/test.sh b/src/exp_analysis/test.sh index 36dbed0be..004b91d18 100644 --- a/src/exp_analysis/test.sh +++ b/src/exp_analysis/test.sh @@ -1,4 +1,4 @@ viash run src/exp_analysis/config.vsh.yaml -- \ --perturbation_data resources/grn-benchmark/perturbation_data.h5ad \ - --prediction resources/grn_models/genie3.csv \ + --prediction resources/grn_models/baselines/positive_control.csv \ diff --git a/src/methods/single_omics/ennet/config.vsh.yaml b/src/methods/single_omics/ennet/config.vsh.yaml index d8dc49a99..e799e62e0 100644 --- a/src/methods/single_omics/ennet/config.vsh.yaml +++ b/src/methods/single_omics/ennet/config.vsh.yaml @@ -34,4 +34,4 @@ platforms: - type: native - type: nextflow directives: - label: [onedaytime,midmem,midcpu] + label: [onedaytime, highmem, midcpu] diff --git a/src/methods/single_omics/pidc/config.vsh.yaml b/src/methods/single_omics/pidc/config.vsh.yaml index 4923edbe6..a82483795 100644 --- a/src/methods/single_omics/pidc/config.vsh.yaml +++ b/src/methods/single_omics/pidc/config.vsh.yaml @@ -22,4 +22,4 @@ platforms: - type: native - type: nextflow directives: - label: [onedaytime, midmem,midcpu] + label: [onedaytime, highmem,midcpu] diff --git a/src/methods/single_omics/scsgl/config.vsh.yaml b/src/methods/single_omics/scsgl/config.vsh.yaml index 467c7bdac..ff7dd66b8 100644 --- a/src/methods/single_omics/scsgl/config.vsh.yaml +++ b/src/methods/single_omics/scsgl/config.vsh.yaml @@ -18,8 +18,8 @@ platforms: image: apassemi/scsgl setup: - type: python - packages: [ anndata, numba==0.53.1, scipy==1.6.3, pandas==1.2.4, rpy2==3.4.4, numpy==1.20.2, scikit-learn==0.24.1, PyYAML==6.0.2 ] + packages: [ anndata, numba==0.53.1, scipy==1.6.3, pandas==1.2.4, rpy2==3.4.4, numpy==1.20.2, scikit-learn==0.24.1, PyYAML==6.0.2, pysrc3==0.1.3 ] - type: native - type: nextflow directives: - label: [onedaytime, midmem,midcpu] + label: [onedaytime, highmem,midcpu] diff --git a/src/methods/single_omics/tigress/config.vsh.yaml b/src/methods/single_omics/tigress/config.vsh.yaml index b4d53854b..b9506cdbf 100644 --- a/src/methods/single_omics/tigress/config.vsh.yaml +++ b/src/methods/single_omics/tigress/config.vsh.yaml @@ -30,4 +30,4 @@ platforms: - type: native - type: nextflow directives: - label: [onedaytime, midmem, highcpu] + label: [onedaytime, highmem, highcpu] diff --git a/src/metrics/regression_1/test.sh b/src/metrics/regression_1/test.sh index 80550ae03..e793c500e 100644 --- a/src/metrics/regression_1/test.sh +++ b/src/metrics/regression_1/test.sh @@ -1,5 +1,5 @@ viash run src/metrics/regression_1/config.vsh.yaml -- \ --perturbation_data resources/grn-benchmark/perturbation_data.h5ad \ --tf_all resources/prior/tf_all.csv \ - --prediction output/baseline_corr.csv \ + --prediction resources/grn_models/baselines/baseline_pearson_causal_impute.csv \ --score output/score.h5ad \ No newline at end of file diff --git a/src/workflows/run_benchmark_single_omics/config.vsh.yaml b/src/workflows/run_benchmark_single_omics/config.vsh.yaml index d31d923da..aa6261341 100644 --- a/src/workflows/run_benchmark_single_omics/config.vsh.yaml +++ b/src/workflows/run_benchmark_single_omics/config.vsh.yaml @@ -101,4 +101,4 @@ functionality: platforms: - type: nextflow directives: - label: [ threedaystime, midmem, highcpu, gpu] + label: [ onedaytime, highmem, highcpu, gpu]