From 688770beaec6d025b697ee4e3fae2531bfbf95a0 Mon Sep 17 00:00:00 2001 From: Xichen Wu Date: Mon, 13 Nov 2023 00:22:02 +0100 Subject: [PATCH] disable new hashing methods + add mudata --- bin/summary_gene.py | 49 +++++++++++++++++++++++++++++++++++++++++++++ nextflow.config | 6 +++--- 2 files changed, 52 insertions(+), 3 deletions(-) diff --git a/bin/summary_gene.py b/bin/summary_gene.py index 7e50812..0389d7b 100755 --- a/bin/summary_gene.py +++ b/bin/summary_gene.py @@ -43,6 +43,15 @@ def demuxlet_summary(demuxlet_res, raw_adata, raw_mudata): adata.write("genetic_summary/adata/adata_with_"+os.path.basename(x)+".h5ad") assign.append(demuxlet_assign) + if raw_mudata is not None: + mudata = raw_mudata.copy() + mudata['rna'].obs = mudata['rna'].obs.merge(demuxlet_assign, left_index=True, right_on='Barcode', how='left').set_index('Barcode') + mudata['rna'].obs.rename(columns={mudata['rna'].obs.columns[0]: 'donor'}, inplace=True) + mudata['rna'].obs.donor = mudata['rna'].obs.donor.fillna("negative") + mudata['rna'].obs.donor = mudata['rna'].obs.donor.astype(str) + mudata.update() + mudata.write("genetic_summary/mudata/mudata_with_"+ os.path.basename(x)+".h5mu") + params_dir = os.path.join(x, [filename for filename in os.listdir(x) if filename.endswith("params.csv")][0]) params_res = pd.read_csv(params_dir, keep_default_na=False, index_col=0) params_res.columns = [os.path.basename(x)] @@ -80,6 +89,16 @@ def freemuxlet_summary(freemuxlet_res, raw_adata, raw_mudata): adata.obs.donor = adata.obs.donor.fillna("negative") adata.obs.donor = adata.obs.donor.astype(str) adata.write("genetic_summary/adata/adata_with_"+ os.path.basename(x)+".h5ad") + + if raw_mudata is not None: + mudata = raw_mudata.copy() + mudata['rna'].obs = mudata['rna'].obs.merge(freemuxlet_assign, left_index=True, right_on='Barcode', how='left').set_index('Barcode') + mudata['rna'].obs.rename(columns={mudata['rna'].obs.columns[0]: 'donor'}, inplace=True) + mudata['rna'].obs.donor = mudata['rna'].obs.donor.fillna("negative") + mudata['rna'].obs.donor = mudata['rna'].obs.donor.astype(str) + mudata.update() + mudata.write("genetic_summary/mudata/mudata_with_"+ os.path.basename(x)+".h5mu") + assign.append(freemuxlet_assign) params_dir = os.path.join(x, [filename for filename in os.listdir(x) if filename.endswith("params.csv")][0]) @@ -120,6 +139,16 @@ def souporcell_summary(souporcell_res, raw_adata, raw_mudata): adata.obs.donor = adata.obs.donor.fillna("negative") adata.obs.donor = adata.obs.donor.astype(str) adata.write("genetic_summary/adata/adata_with_"+ os.path.basename(x)+".h5ad") + + if raw_mudata is not None: + mudata = raw_mudata.copy() + mudata['rna'].obs = mudata['rna'].obs.merge(obs_res, left_index=True, right_on='Barcode', how='left').set_index('Barcode') + mudata['rna'].obs.rename(columns={mudata['rna'].obs.columns[0]: 'donor'}, inplace=True) + mudata['rna'].obs.donor = mudata['rna'].obs.donor.fillna("negative") + mudata['rna'].obs.donor = mudata['rna'].obs.donor.astype(str) + mudata.update() + mudata.write("genetic_summary/mudata/mudata_with_"+ os.path.basename(x)+".h5mu") + assign.append(obs_res) params_dir = os.path.join(x, [filename for filename in os.listdir(x) if filename.endswith("params.csv")][0]) @@ -160,6 +189,16 @@ def vireo_summary(vireo_res, raw_adata, raw_mudata): adata.obs.donor = adata.obs.donor.fillna("negative") adata.obs.donor = adata.obs.donor.astype(str) adata.write("genetic_summary/adata/adata_with_"+ os.path.basename(x)+".h5ad") + + if raw_mudata is not None: + mudata = raw_mudata.copy() + mudata['rna'].obs = mudata['rna'].obs.merge(obs_res, left_index=True, right_on='Barcode', how='left').set_index('Barcode') + mudata['rna'].obs.rename(columns={mudata['rna'].obs.columns[0]: 'donor'}, inplace=True) + mudata['rna'].obs.donor = mudata['rna'].obs.donor.fillna("negative") + mudata['rna'].obs.donor = mudata['rna'].obs.donor.astype(str) + mudata.update() + mudata.write("genetic_summary/mudata/mudata_with_"+ os.path.basename(x)+".h5mu") + assign.append(obs_res) params_dir = os.path.join(x, [filename for filename in os.listdir(x) if filename.endswith("params.csv")][0]) @@ -198,6 +237,16 @@ def scsplit_summary(scsplit_res, raw_adata, raw_mudata): adata.obs.donor = adata.obs.donor.fillna("negative") adata.obs.donor = adata.obs.donor.astype(str) adata.write("genetic_summary/adata/adata_with_"+ os.path.basename(x)+".h5ad") + + if raw_mudata is not None: + mudata = raw_mudata.copy() + mudata['rna'].obs = mudata['rna'].obs.merge(obs_res, left_index=True, right_on='Barcode', how='left').set_index('Barcode') + mudata['rna'].obs.rename(columns={mudata['rna'].obs.columns[0]: 'donor'}, inplace=True) + mudata['rna'].obs.donor = mudata['rna'].obs.donor.fillna("negative") + mudata['rna'].obs.donor = mudata['rna'].obs.donor.astype(str) + mudata.update() + mudata.write("genetic_summary/mudata/mudata_with_"+ os.path.basename(x)+".h5mu") + assign.append(obs_res) params_dir = os.path.join(x, [filename for filename in os.listdir(x) if filename.endswith("params.csv")][0]) diff --git a/nextflow.config b/nextflow.config index 5aebddd..60866af 100644 --- a/nextflow.config +++ b/nextflow.config @@ -124,7 +124,7 @@ params { filter_demuxem = "True" // gmm-demux - gmmDemux = "True" + gmmDemux = "False" hto_matrix_gmm_demux = "filtered" assignmentOutGmmDemux = "gmm_demux" hto_name_gmm = "None" @@ -136,7 +136,7 @@ params { ambiguous = 0.05 // demuxmix - demuxmix = "True" + demuxmix = "False" rna_matrix_demuxmix = "raw" hto_matrix_demuxmix = "raw" assignmentOutDemuxmix = "demuxmix" @@ -152,7 +152,7 @@ params { k_rna = 1.5 // bff - bff = "True" + bff = "False" rna_matrix_bff = "raw" hto_matrix_bff = "raw" assignmentOutBff = "bff"