Skip to content

Commit

Permalink
scenic+ dockerfile needs update
Browse files Browse the repository at this point in the history
  • Loading branch information
janursa committed Sep 17, 2024
1 parent 3dd3948 commit e7a9781
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 39 deletions.
36 changes: 36 additions & 0 deletions runs.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -3799,6 +3799,42 @@
"!bash scripts/run_grn_evaluation.sh {tag} {reg_type}"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"import scanpy as sc \n",
"import anndata as ad \n",
"multiomics_rna = ad.read_h5ad('resources/grn-benchmark/multiomics_rna.h5ad')\n",
"multiomics_rna.layers['counts'] = multiomics_rna.X.copy()\n",
"sc.pp.normalize_total(multiomics_rna)\n",
"sc.pp.log1p(multiomics_rna)\n",
"sc.pp.scale(multiomics_rna)\n",
"multiomics_rna.layers['lognorm'] = multiomics_rna.X.copy()\n",
"multiomics_rna.X = multiomics_rna.layers['counts']\n",
"del multiomics_rna.layers['counts']\n",
"multiomics_rna.write('resources/grn-benchmark/multiomics_rna.h5ad')"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"multiomics_rna = ad.read_h5ad('resources/grn-benchmark/multiomics_rna_0.h5ad')\n",
"multiomics_rna.layers['counts'] = multiomics_rna.X.copy()\n",
"sc.pp.normalize_total(multiomics_rna)\n",
"sc.pp.log1p(multiomics_rna)\n",
"sc.pp.scale(multiomics_rna)\n",
"multiomics_rna.layers['lognorm'] = multiomics_rna.X.copy()\n",
"multiomics_rna.X = multiomics_rna.layers['counts']\n",
"del multiomics_rna.layers['counts']\n",
"multiomics_rna.write('resources/grn-benchmark/multiomics_rna_0.h5ad')"
]
},
{
"cell_type": "code",
"execution_count": 4,
Expand Down
6 changes: 4 additions & 2 deletions scripts/sbatch/batch_scglue.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
#SBATCH --error=logs/%j.err
#SBATCH --mail-type=END
#SBATCH [email protected]
#SBATCH --cpus-per-task=20
#SBATCH --mem=64G
#SBATCH --partition=gpu
#SBATCH --cpus-per-task=20
#SBATCH --gres=gpu:1

singularity exec ../../images/scglue python src/methods/multi_omics/scglue/script.py
singularity exec --nv ../../images/scglue python src/methods/multi_omics/scglue/script.py
6 changes: 4 additions & 2 deletions src/methods/multi_omics/scenicplus/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,13 @@
def process_atac(par):
print("---Run pre-process started ---", flush=True)
# Create one individual ATAC-seq file per donor
adata_atac = anndata.read_h5ad(par['multiomics_atac'])
fragments_dict = {}
for donor_id in unique_donor_ids:
filepath = os.path.join(atac_dir, f'{donor_id}.tsv')
if not os.path.exists(filepath):
print(f'Create tsv file {filepath}')
adata_atac = anndata.read_h5ad(par['multiomics_atac'])

adata_atac.obs.cell_type = [s.replace(' ', '_') for s in adata_atac.obs.cell_type]
adata_atac.obs.donor_id = [s.replace(' ', '_') for s in adata_atac.obs.donor_id]
adata_atac_one_donor = adata_atac[adata_atac.obs.donor_id == donor_id]
Expand All @@ -97,7 +98,7 @@ def process_atac(par):
counts = coo_matrix.data
row_names = adata_atac_one_donor.obs.index[rows]
coord_names = adata_atac_one_donor.var.index[cols]
del adata_atac, adata_atac_one_donor
del adata_atac_one_donor
gc.collect()
d = {
'chromosome': np.asarray([s.split(':')[0] for s in coord_names]),
Expand Down Expand Up @@ -147,6 +148,7 @@ def process_atac(par):
names=['Chromosome', 'End']
)
chromsizes.insert(1, 'Start', 0)


print(f'Start pseudobulking')
os.makedirs(os.path.join(par['temp_dir'], 'consensus_peak_calling'), exist_ok=True)
Expand Down
22 changes: 16 additions & 6 deletions src/methods/multi_omics/scglue/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def run_grn(par):
rna[:, np.union1d(genes, tfs)].write_loom(f"{par['temp_dir']}/rna.loom")
np.savetxt(f"{par['temp_dir']}/tfs.txt", tfs, fmt="%s")

# Construct the command
#Construct the command
command = ['pyscenic', 'grn', f"{par['temp_dir']}/rna.loom",
f"{par['temp_dir']}/tfs.txt", '-o', f"{par['temp_dir']}/draft_grn.csv",
'--seed', '0', '--num_workers', f"{par['num_workers']}",
Expand All @@ -171,7 +171,15 @@ def run_grn(par):
print("Command executed successfully")
else:
print("Command failed with return code", result.returncode)
def create_prior(par):
atac = ad.read_h5ad(f"{par['temp_dir']}/atac-emb.h5ad")
rna = ad.read_h5ad(f"{par['temp_dir']}/rna-emb.h5ad")
motif_bed = scglue.genomics.read_bed(par['motif_file'])
fs = pd.Index(motif_bed["name"]).intersection(rna.var_names)

atac.var["name"] = atac.var_names

peaks = atac.var.index

print("Generate TF cis-regulatory ranking bridged by ATAC peaks", flush=True)
peak_bed = scglue.genomics.Bed(atac.var.loc[peaks])
Expand All @@ -193,6 +201,7 @@ def run_grn(par):
)

### Prepare data for pruning
print("Prepare data for pruning ")

gene2tf_rank_glue.columns = gene2tf_rank_glue.columns + "_glue"
gene2tf_rank_supp.columns = gene2tf_rank_supp.columns + "_supp"
Expand Down Expand Up @@ -298,11 +307,12 @@ def main(par):
rna = ad.read_h5ad(par['multiomics_rna'])
atac = ad.read_h5ad(par['multiomics_atac'])

print('Preprocess data', flush=True)
preprocess(rna, atac, par)
print('Train a model', flush=True)
training(par)
run_grn(par)
# print('Preprocess data', flush=True)
# preprocess(rna, atac, par)
# print('Train a model', flush=True)
# training(par)
# run_grn(par)
create_prior(par)
prune_grn(par)
print('Curate predictions', flush=True)
pruned_grn = pd.read_csv(
Expand Down
42 changes: 13 additions & 29 deletions src/methods/single_omics/grnboost2/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
from distributed import Client, LocalCluster
from tqdm import tqdm
import subprocess
import scanpy as sc




Expand All @@ -28,10 +26,7 @@ def process_links(net, par):
return net
# Load scRNA-seq data
adata_rna = anndata.read_h5ad(par['multiomics_rna'])
print('Noramlize data')
sc.pp.normalize_total(adata_rna)
sc.pp.log1p(adata_rna)
sc.pp.scale(adata_rna)
adata_rna.X = adata_rna.layers['lognorm'] #TODO: fix this
groups = adata_rna.obs.cell_type
gene_names = adata_rna.var.gene_ids.index.to_numpy()
X = adata_rna.X
Expand All @@ -40,22 +35,21 @@ def process_links(net, par):
tfs = np.loadtxt(par["tf_all"], dtype=str)
tf_names = [gene_name for gene_name in gene_names if (gene_name in tfs)]

# GRN inference
client = Client(processes=False)

def infer_grn(X, par):
print("Infer grn", flush=True)

network = grnboost2(X, client_or_address=client, gene_names=gene_names, tf_names=tf_names)
network.rename(columns={'TF': 'source', 'target': 'target', 'importance': 'weight'}, inplace=True)
network.reset_index(drop=True, inplace=True)
network = process_links(network, par)

return network


if par['cell_type_specific']:
# GRN inference
client = Client(processes=False)

def infer_grn(X, par):
print("Infer grn", flush=True)

network = grnboost2(X, client_or_address=client, gene_names=gene_names, tf_names=tf_names)
network.rename(columns={'TF': 'source', 'target': 'target', 'importance': 'weight'}, inplace=True)
network.reset_index(drop=True, inplace=True)
network = process_links(network, par)

return network
i = 0
for group in tqdm(np.unique(groups), desc="Processing groups"):
X_sub = X[groups == group, :]
Expand All @@ -67,17 +61,7 @@ def infer_grn(X, par):
grn = pd.concat([grn, net], axis=0).reset_index(drop=True)
i += 1
else:
local_cluster = LocalCluster(n_workers=10,
threads_per_worker=1)

custom_client = Client(local_cluster)
network = grnboost2(X, client_or_address=custom_client, gene_names=gene_names, tf_names=tf_names)
network.rename(columns={'TF': 'source', 'target': 'target', 'importance': 'weight'}, inplace=True)
network.reset_index(drop=True, inplace=True)
grn = process_links(network, par)
custom_client.close()
local_cluster.close()

grn = infer_grn(X, par)

# Save inferred GRN
grn.to_csv(par['prediction'], sep=',')
Expand Down

0 comments on commit e7a9781

Please sign in to comment.