Skip to content

Commit

Permalink
baseline models are structured. util added
Browse files Browse the repository at this point in the history
  • Loading branch information
janursa committed Sep 19, 2024
1 parent 79a6f65 commit 928aaaf
Show file tree
Hide file tree
Showing 11 changed files with 152 additions and 339 deletions.
Binary file added AWSCLIV2.pkg
Binary file not shown.
95 changes: 91 additions & 4 deletions runs.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,28 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting PyYAML\n",
" Downloading PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl.metadata (2.1 kB)\n",
"Downloading PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl (171 kB)\n",
"Installing collected packages: PyYAML\n",
"Successfully installed PyYAML-6.0.2\n"
]
}
],
"source": [
"!pip install PyYAML"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -110,9 +131,24 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"AnnData object with n_obs × n_vars = 8133 × 22787\n",
" obs: 'cell_type', 'donor_id'\n",
" var: 'gene_ids', 'interval', 'mean', 'std'\n",
" uns: 'log1p'\n",
" layers: 'lognorm'"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": []
},
{
Expand All @@ -126,7 +162,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### Baseline scores on donor 0. Normalized, non cell type specific"
"### Baseline scores on donor 0. Normalized, non cell type specific, full "
]
},
{
Expand Down Expand Up @@ -357,6 +393,57 @@
"!aws s3 sync resources/grn-benchmark s3://openproblems-data/resources/grn/grn-benchmark --delete"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"delete: resources/grn-benchmark/multiomics_rna_qc.h5ad\n",
"download: s3://openproblems-data/resources/grn/grn-benchmark/multiomics_rna.h5ad to resources/grn-benchmark/multiomics_rna.h5ad\n",
"download: s3://openproblems-data/resources/grn/grn-benchmark/multiomics_rna_0.h5ad to resources/grn-benchmark/multiomics_rna_0.h5ad\n"
]
}
],
"source": [
"!aws s3 sync s3://openproblems-data/resources/grn/grn-benchmark resources/grn-benchmark --delete"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# To be categorized"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"if par['metacell']:\n",
" print('metacell')\n",
" def create_meta_cells(df, n_cells=15):\n",
" meta_x = []\n",
" for i in range(0, df.shape[0], n_cells):\n",
" meta_x.append(df.iloc[i:i+n_cells, :].sum(axis=0).values)\n",
" df = pd.DataFrame(meta_x, columns=df.columns)\n",
" return df\n",
" \n",
" adata_df = pd.DataFrame(multiomics_rna.X.todense(), columns=multiomics_rna.var_names)\n",
" adata_df['cell_type'] = multiomics_rna.obs['cell_type'].values\n",
" adata_df['donor_id'] = multiomics_rna.obs['donor_id'].values\n",
" df = adata_df.groupby(['cell_type','donor_id']).apply(lambda df: create_meta_cells(df))\n",
" X = df.values\n",
" var = pd.DataFrame(index=df.columns)\n",
" obs = df.reset_index()[['cell_type','donor_id']]\n",
" multiomics_rna = ad.AnnData(X=X, obs=obs, var=var)"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down
5 changes: 1 addition & 4 deletions src/api/comp_method.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,8 @@ functionality:
default: false
description: subset rna seq data to only 7000 hvgs to reduce dimensionality




resources:
- path: ../utils/util.py
- path: /src/utils/util.py

test_resources:
- type: python_script
Expand Down
45 changes: 0 additions & 45 deletions src/control_methods/baseline_corr/config.vsh.yaml

This file was deleted.

118 changes: 0 additions & 118 deletions src/control_methods/baseline_corr/script.py

This file was deleted.

5 changes: 0 additions & 5 deletions src/control_methods/baseline_corr/test.sh

This file was deleted.

29 changes: 5 additions & 24 deletions src/control_methods/pearson/script.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,8 @@
import os
import pandas as pd
import numpy as np
import anndata as ad
import scanpy as sc
from tqdm import tqdm
from scipy.stats import spearmanr
from sklearn.preprocessing import StandardScaler
from utils.util import process_data, create_corr_net

## VIASH START
par = {
'multiomics_rna': 'resources/grn-benchmark/multiomics_rna_0.h5ad',
'tf_all': 'resources/prior/tf_all.csv',
'causal': False,
'cell_type_specific': True,
'max_n_links': 50000,
'prediction': 'resources/grn_models/donor_0_default/pearson.csv',
Expand All @@ -22,20 +12,11 @@
}
## VIASH END
print(par)
import sys
sys.path.append('./src/utils')
from util import create_corr_net

print('Read data')
multiomics_rna = ad.read_h5ad(par["multiomics_rna"])
process_data(multiomics_rna)

gene_names = multiomics_rna.var_names.to_numpy()
tf_all = np.loadtxt(par['tf_all'], dtype=str)
groups = multiomics_rna.obs.cell_type
tf_all = np.intersect1d(tf_all, gene_names)
n_tfs = len(tf_all)


print('Create corr net')
net = create_corr_net(multiomics_rna.X, multiomics_rna.var_names, groups, par, tf_all=None)

par['causal'] = False
net = create_corr_net(par)
print('Output GRN')
net.to_csv(par['prediction'])
5 changes: 2 additions & 3 deletions src/control_methods/pearson/test.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
viash run src/control_methods/baseline_corr/config.vsh.yaml -- \
viash run src/control_methods/pearson/config.vsh.yaml -- \
--prediction output/baseline_corr.csv \
--multiomics_rna resources/grn-benchmark/multiomics_rna.h5ad \
--tf_all resources/prior/tf_all.csv \
--causal true
--tf_all resources/prior/tf_all.csv
Loading

0 comments on commit 928aaaf

Please sign in to comment.