Skip to content

Commit

Permalink
test resourcs added
Browse files Browse the repository at this point in the history
  • Loading branch information
matin authored and matin committed Jul 22, 2024
1 parent 1d083dd commit 47efd3d
Show file tree
Hide file tree
Showing 3 changed files with 2,053 additions and 12 deletions.
167 changes: 159 additions & 8 deletions notebooks/create_resources.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,13 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import anndata as ad\n",
"import pandas as pd\n",
"\n",
"import numpy as np\n",
"data_dir = '../../perturb-multiomics-grn/output/'\n",
"\n",
Expand Down Expand Up @@ -161,16 +162,122 @@
"adata_bulk.write(f'{resource_dir}/perturbation_data.h5ad')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# test rresources"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"test_resource_dir = f'{resource_dir}/../../resources_test/grn-benchmark'\n",
"os.makedirs(test_resource_dir, exist_ok=True)"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [],
"source": [
"adata_rna = ad.read_h5ad(f'{resource_dir}/multiomics_rna.h5ad')\n",
"adata_atac = ad.read_h5ad(f'{resource_dir}/multiomics_atac.h5ad')"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [],
"source": [
"\n",
"peaks = pd.read_csv(f'{resource_dir}/peak_gene_models/granie.csv').peak.to_numpy()\n",
"hvgs = ad.read_h5ad(f'{resource_dir}/prior_data.h5ad').uns['hvgs']\n",
"genes_multi = ad.read_h5ad(f'{resource_dir}/prior_data.h5ad').uns['gene_names']\n",
"tfs = ad.read_h5ad(f'{resource_dir}/prior_data.h5ad').uns['tf_list']\n",
"genes = set(tfs) & set(genes_multi)\n"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [],
"source": [
"# shorten rna \n",
"mask = adata_rna.obs.donor_id=='donor_0'\n",
"adata_rna_s = adata_rna[mask]\n",
"random_indices = np.random.choice(adata_rna_s.obs.index, 1000, replace=False)\n",
"adata_rna_s = adata_rna_s[random_indices, adata_rna_s.var_names.isin(genes)]"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"View of AnnData object with n_obs × n_vars = 1000 × 4962\n",
" obs: 'cell_type', 'donor_id'"
]
},
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# shorten atac\n",
"adata_atac_s = adata_atac[adata_atac.obs.index.isin(adata_rna_s.obs.index), adata_atac.var.index.isin(peaks)]\n",
"adata_atac_s"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [],
"source": [
"adata_rna_s.write(f'{test_resource_dir}/multiomics_rna.h5ad')\n",
"adata_atac_s.write(f'{test_resource_dir}/multiomics_atac.h5ad')\n"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [],
"source": [
"# shorten perturbation\n",
"adata_bulk = ad.read_h5ad(f'{resource_dir}/perturbation_data.h5ad')\n",
"adata_bulk[:200, adata_bulk.var_names.isin(genes)].write(f'{test_resource_dir}/perturbation_data.h5ad')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Prior"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## tf names\n"
]
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -179,7 +286,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -188,13 +295,59 @@
"prior_adata.uns['tf_list'] = tf_list\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## gene names"
]
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"bulk_adata = ad.read_h5ad(f'{resource_dir}/perturbation_data.h5ad')\n",
"prior_adata.uns['gene_names_pert'] = bulk_adata.var_names.to_numpy()\n"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"prior_adata.write(f'{resource_dir}/prior_data.h5ad')"
"bulk_adata = ad.read_h5ad(f'{resource_dir}/multiomics_rna.h5ad')\n",
"prior_adata.uns['gene_names'] = bulk_adata.var_names.to_numpy()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"bulk_adata = ad.read_h5ad(f'{resource_dir}/multiomics_atac.h5ad')\n",
"prior_adata.uns['peak'] = bulk_adata.var_names.to_numpy()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"prior_adata.uns['hvgs'] = np.loadtxt(f'{resource_dir}/hvgs.txt', dtype=str)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"prior_adata.write(f'{resource_dir}/prior_data.h5ad')\n"
]
},
{
Expand All @@ -209,9 +362,7 @@
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"bulk_adata = ad.read_h5ad(f'{work_dir}/preprocess/bulk_adata_integrated.h5ad')"
]
"source": []
},
{
"cell_type": "code",
Expand Down
Loading

0 comments on commit 47efd3d

Please sign in to comment.