From aa3da45ad18f1c24ee24aee91bac4f2295acf4b0 Mon Sep 17 00:00:00 2001 From: Jalil Nourisa Date: Tue, 17 Sep 2024 08:14:23 +0200 Subject: [PATCH] reg1 fixed --- runs.ipynb | 93 +++++++++++++------ scripts/run_grn_evaluation.sh | 50 +++++----- src/metrics/regression_1/script.py | 6 +- .../run_grn_evaluation/config.vsh.yaml | 3 - 4 files changed, 91 insertions(+), 61 deletions(-) diff --git a/runs.ipynb b/runs.ipynb index fe24e8384..fec7f1843 100644 --- a/runs.ipynb +++ b/runs.ipynb @@ -3816,64 +3816,104 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "tag = \"celltype_donor_0_subset_ridge\"\n", + "!aws s3 sync s3://openproblems-data/resources/grn/results/{tag} resources/results/{tag}" + ] + }, + { + "cell_type": "code", + "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", - "\n", + "
\n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
 ex(False)_tf(-1)ex(True)_tf(-1)static-theta-0.0static-theta-0.5Rankstatic-theta-0.0static-theta-0.5Rank
grnboost20.2656090.4032310.8310480.5883931grnboost20.4784480.5848241
portia0.0146770.0353050.5804740.5352892positive_control0.4069630.5962132
portia0.3255340.4927254
pearson_causal0.3370340.5439063
\n" ], "text/plain": [ - "" + "" ] }, - "execution_count": 42, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "\n", + "base_folder = f'resources/results/{tag}'\n", "models_all = ['negative_control', 'baseline_pearson', \n", " 'baseline_pearson_causal', 'baseline_pearson_causal_celltype', \n", " 'baseline_pearson_causal_metacell', 'positive_control', 'collectri','granie', 'figr', 'celloracle', \n", @@ -3903,8 +3943,7 @@ " return df_reg\n", "import yaml\n", "import pandas as pd\n", - "# base_folder = 'resources/results/grn_evaluation_all_ridge/'\n", - "base_folder = 'resources/results/single_omics_inference'\n", + "\n", "\n", "result_file = f'{base_folder}/scores.yaml'\n", "with open(result_file, 'r') as file:\n", @@ -3915,7 +3954,7 @@ "else:\n", " df_reg1 = extract_data(data, reg='reg1').drop(columns=['Mean'])\n", " df_reg2 = extract_data(data, reg='reg2').drop(columns=['Mean'])\n", - "df_all = pd.concat([df_reg1,df_reg2], axis=1).fillna(0)\n", + "df_all = pd.concat([df_reg1, df_reg2], axis=1).fillna(0)\n", "df_all[df_all<0]=0\n", "df_all_n = (df_all-df_all.min(axis=0))/(df_all.max(axis=0)-df_all.min(axis=0))\n", "df_all['Rank'] = df_all_n.mean(axis=1).rank(ascending=False).astype(int)\n", diff --git a/scripts/run_grn_evaluation.sh b/scripts/run_grn_evaluation.sh index f975f0922..52a15aad3 100644 --- a/scripts/run_grn_evaluation.sh +++ b/scripts/run_grn_evaluation.sh @@ -5,11 +5,11 @@ # viash ns build --parallel reg_type=ridge -RUN_ID="grn_evaluation_all_${reg_type}" +RUN_ID="celltype_donor_0_subset_${reg_type}" resources_dir="s3://openproblems-data/resources/grn" # resources_dir="./resources" publish_dir="${resources_dir}/results/${RUN_ID}" -grn_models_folder="${resources_dir}/grn_models" +grn_models_folder="${resources_dir}/grn_models/donor_0_celltype" subsample=-2 max_workers=10 @@ -19,26 +19,20 @@ metric_ids="[regression_1, regression_2]" param_file="./params/${RUN_ID}.yaml" grn_names=( - "scglue" - "scenicplus" - "celloracle" - "granie" - "figr" - "collectri" - "genie3" + # "scglue" + # "scenicplus" + # "celloracle" + # "granie" + # "figr" + # "collectri" + # "genie3" "grnboost2" - "ppcor" + # "ppcor" "portia" + "positive_control" + "pearson_causal" ) -baseline_models=( - baseline_pearson - baseline_pearson_causal - baseline_pearson_causal_celltype - baseline_pearson_causal_metacell - baseline_pearson_causal_impute - positive_control - ) # Start writing to the YAML file cat > $param_file << HERE param_list: @@ -57,23 +51,15 @@ append_entry() { tf_all: ${resources_dir}/prior/tf_all.csv layer: ${layer} consensus: ${resources_dir}/prior/consensus-num-regulators.json - prediction: ${2}/$1.csv + prediction: ${grn_models_folder}/$1.csv HERE } - -folder=${grn_models_folder} # Loop through grn_names and layers for grn_name in "${grn_names[@]}"; do - append_entry "$grn_name" "$folder" + append_entry "$grn_name" done -folder=${grn_models_folder}/baselines -for grn_name in "${baseline_models[@]}"; do - append_entry "$grn_name" "$folder" -done - - # Append the remaining output_state and publish_dir to the YAML file cat >> $param_file << HERE output_state: "state.yaml" @@ -99,3 +85,11 @@ HERE # --config src/common/nextflow_helpers/labels_tw.config +./tw launch https://github.com/openproblems-bio/task_grn_inference \ + --revision build/main \ + --pull-latest \ + --main-script target/nextflow/workflows/run_grn_evaluation/main.nf \ + --workspace 53907369739130 \ + --compute-env 6TeIFgV5OY4pJCk8I0bfOh \ + --params-file ${param_file} \ + --config src/common/nextflow_helpers/labels_tw.config diff --git a/src/metrics/regression_1/script.py b/src/metrics/regression_1/script.py index a4196072b..ee7947e7d 100644 --- a/src/metrics/regression_1/script.py +++ b/src/metrics/regression_1/script.py @@ -19,9 +19,9 @@ 'max_workers': 4, } ## VIASH END -meta = { - "resources_dir":'src/metrics/regression_1/' -} +# meta = { +# "resources_dir":'src/metrics/regression_1/' +# } sys.path.append(meta["resources_dir"]) from main import main diff --git a/src/workflows/run_grn_evaluation/config.vsh.yaml b/src/workflows/run_grn_evaluation/config.vsh.yaml index d0ee881bf..2b1d7e3f8 100644 --- a/src/workflows/run_grn_evaluation/config.vsh.yaml +++ b/src/workflows/run_grn_evaluation/config.vsh.yaml @@ -12,9 +12,6 @@ functionality: - name: --perturbation_data type: file direction: input - - name: --multiomics_rna - type: file - direction: input - name: --layer type: string direction: input