From f3cd8c98802243ad2d63d6fe6dd350383008cd81 Mon Sep 17 00:00:00 2001
From: yinuo <yj2589@columbia.edu>
Date: Wed, 3 Jan 2024 13:05:34 -0500
Subject: [PATCH] Minor code refactor

---
 starfysh/gener_img.py | 25 ++++++++++---------------
 starfysh/starfysh.py  |  6 +++---
 starfysh/utils.py     | 34 ++--------------------------------
 3 files changed, 15 insertions(+), 50 deletions(-)

diff --git a/starfysh/gener_img.py b/starfysh/gener_img.py
index e97c876..6f967a5 100644
--- a/starfysh/gener_img.py
+++ b/starfysh/gener_img.py
@@ -30,14 +30,15 @@
  
 class dataset(torch.utils.data.Dataset):
 
-    def __init__(self, 
-                 spot, 
-                 exp_spot, 
-                 barcode_spot, 
-                 #img_size,
-                 #histo_img,
-                 transform=None
-                ):
+    def __init__(
+        self, 
+        spot, 
+        exp_spot, 
+        barcode_spot, 
+        #img_size,
+        #histo_img,
+        transform=None
+    ):
 
         super(dataset, self).__init__()
         self.spot = spot
@@ -115,9 +116,7 @@ def prep_dataset(adata):
     return train_set,test_set,all_set
 
 
-def generate_img(dat_path,
-                 train_flag=True
-                ):
+def generate_img(dat_path, train_flag=True):
     """
     input:
     dat_path: the path for csv file
@@ -331,8 +330,4 @@ def generate_img(dat_path,
 
     
     return recon
-    
-    
 
-    
-    
diff --git a/starfysh/starfysh.py b/starfysh/starfysh.py
index 3bee843..d573ec7 100644
--- a/starfysh/starfysh.py
+++ b/starfysh/starfysh.py
@@ -57,7 +57,7 @@ def __init__(
         win_loglib : float
             Log-library size smoothed with neighboring spots
 
-        alpha_mul : float (default=1e3)
+        alpha_mul : float (default=50)
             Multiplier of Dirichlet concentration parameter to control
             signature prior's confidence
         """
@@ -283,7 +283,7 @@ def __init__(
         gene_sig,
         patch_r,
         win_loglib,
-        alpha_mul=20,
+        alpha_mul=50,
         n_img_chan=1,
         seed=0,
         device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
@@ -306,7 +306,7 @@ def __init__(
         win_loglib : float
             Log-library size smoothed with neighboring spots
 
-        alpha_mul : float (default=1e3)
+        alpha_mul : float (default=50)
             Multiplier of Dirichlet concentration parameter to control
             signature prior's confidence
 
diff --git a/starfysh/utils.py b/starfysh/utils.py
index f4094f4..6d7552b 100644
--- a/starfysh/utils.py
+++ b/starfysh/utils.py
@@ -211,38 +211,9 @@ def _compute_anchors(self):
         Calculate top `anchor_spots` significantly enriched for given cell type(s)
         determined by gene set scores from signatures
         """
-        score_type = self.params['sig_version']
         score_df = self.sig_mean_norm
-        signif_level = self.params['signif_level']
         n_anchor = self.params['n_anchors']
-        n_cell_types = self.sig_mean_norm.shape[1]
-
-        # DEBUG: retry only subset by # anchors
-        # if score_type == 'gene_score':
-        #     pure_spots = []
-        #     for i, cell_type in enumerate(score_df.columns):
-        #         # find anchors by outlier detection
-        #         score = score_df.values[:, i]
-        #
-        #         # modified z-score
-        #         med = np.median(score)
-        #         mad = median_abs_deviation(score)
-        #         modified_zscore = 0.6745 * (score-med)/mad
-        #         top_score = score_df.iloc[:, i][modified_zscore > signif_level]
-        #
-        #         # z-score
-        #         sd = score.std()
-        #         top_score = score_df.iloc[:, i][score > signif_level*sd]
-        #         top_score = top_score[top_score.index]
-        #
-        #         if len(top_score) <= n_anchor:
-        #             pure_spots.append(top_score.index)
-        #         else:
-        #             pure_spots.append(top_score.index[(-top_score.values).argsort()[:n_anchor]])
-        #
-        # else:
-        #     top_expr_spots = (-score_df.values).argsort(axis=0)[:n_anchor, :]
-        #     pure_spots = np.transpose(score_df.index[top_expr_spots])
+
         top_expr_spots = (-score_df.values).argsort(axis=0)[:n_anchor, :]
         pure_spots = np.transpose(score_df.index[top_expr_spots])
 
@@ -560,8 +531,7 @@ def preprocess(
         adata.var_names.str.startswith('mt-')
     )
     adata.var['rb'] = (
-        adata.var_names.str.startswith('RPS') |
-        adata.var_names.str.startswith('RPL') |
+        adata.var_names.str.startswith('RP-') |
         adata.var_names.str.startswith('rp-')
     )