correct3

theislab · Jan 3, 2025 · 0c2ceeb · 0c2ceeb
1 parent 340af85
commit 0c2ceeb
Show file tree

Hide file tree

Showing 5 changed files with 61 additions and 94 deletions.
diff --git a/src/troutpy/pl/plotting.py b/src/troutpy/pl/plotting.py
@@ -410,7 +410,7 @@ def proportion_above_threshold(
     output_path:str='',format='pdf'
 ):
     """ Plot top and bottom percentile of features
- 
+
     Plots the top and bottom percentiles of features with the highest and lowest proportions above a threshold, or visualizes a specific list of transcripts.
 
     Parameters:
@@ -903,7 +903,6 @@ def interactions_with_arrows(
     """Visualizes interactions between source and target cells using arrows, along with transcript locations.
 
     The function plots arrows from source to target cells based on transcript proximity, color-coding source and target cells, and transcript locations. An optional image layer can be overlaid behind the plot. 
-
     Parameters:
     ----------
     - sdata (AnnData): The AnnData object containing the spatial omics data.

diff --git a/src/troutpy/tl/NMF.py b/src/troutpy/tl/NMF.py
@@ -48,7 +48,7 @@ def nmf(
 
     Parameters:
     ----------
-    - sdata : spatial data object 
+    - sdata : spatial data object
         Input spatial data containing transcript and bin data.
     - layer : str, optional
         Layer name of the data that contains extracellular transcripts (default: 'extracellular_transcripts_enriched').
@@ -103,14 +103,13 @@ def apply_exrna_factors_to_cells(sdata, layer_factors='nmf_data'):
     Parameters:
     sdata (AnnData): The AnnData object containing both extracellular and cellular data.
     layer_factors (str, optional): The key in `sdata` that contains the extracellular RNA data with NMF factors. Default is 'nmf_data'.
-  
+
     Returns:
     AnnData: The updated `sdata` object with annotated cellular data that includes the applied exRNA factors as new columns.
 
     Notes:
     The function assumes that the extracellular RNA data is stored in `sdata[layer_factors]` and that the NMF factor loadings are stored in the `uns` attribute of the extracellular dataset as 'H_nmf'. The factor scores are added to the `obs` attribute of the cellular data.
-    """
-
+    """ 
     # Extract extracellular data and cellular annotations
     adata_extracellular_with_nmf = sdata[layer_factors]
     adata_annotated_cellular = sdata['table']

diff --git a/src/troutpy/tl/interactions.py b/src/troutpy/tl/interactions.py
@@ -5,6 +5,8 @@
 import anndata as ad
 import seaborn as sns
 import matplotlib.pyplot as plt
+import os
+
 
 # function to compute the number of exchanged genes between any two cell types
 
@@ -17,9 +19,8 @@ def get_number_of_communication_genes(
     """Compute the number of exchanged genes between any two cell types
 
     Parameters:
-        - source_proportions (pd.DataFrame): A data frame (Gene name x Cell Type) with 
-        - proportion of cells per cell type expressing corresponding gene 
-        - target_proportions : A data frame 
+        - source_proportions (pd.DataFrame): A data frame (Gene name x Cell Type) with proportion of cells per cell type expressing corresponding gene 
+        - target_proportions : A data frame
         - (Gene name x Cell Type) with proportion of cells per cell type being the physically clostest cell to transcripts of corresponding gene. Defaults to 0.2.
         - source_proportion_threshold (float, optional): The threshold to consider a cell type to be a significant source of a gene. Defaults to 0.2.
         - target_proportion_threshold (float, optional): The threshold to consider a cell type to be a significant target of a gene. Defaults to 0.2.
@@ -73,8 +74,7 @@ def get_gene_interaction_strength(
         A DataFrame where rows represent genes and columns represent source cell types. Each value indicates the proportion of the gene in the respective source cell type.
 
     - target_proportions : pd.DataFrame
-        A DataFrame where rows represent genes and columns represent target cell types. Each value indicates 
-        the proportion of the gene in the respective target cell type.
+        A DataFrame where rows represent genes and columns represent target cell types. Each value indicates the proportion of the gene in the respective target cell type.
 
     - gene_symbol : str, optional
         The gene symbol for which the interaction strength is to be computed and visualized (default: '').
@@ -119,7 +119,8 @@ def get_gene_interaction_strength(
     colors = [cmap(i) for i in range(interactions.shape[0])]
 
     # Plot the interaction strength using a chord diagram
-    chord_diagram(interactions, source_proportions.columns.tolist(), directed=True, fontsize=8, colors=colors)
+    #### work on this function
+    #chord_diagram(interactions, source_proportions.columns.tolist(), directed=True, fontsize=8, colors=colors)
     plt.title(f"exotranscriptomic {gene_symbol} exchange", fontweight="bold")
 
     # Save the plot if the 'save' option is enabled

diff --git a/src/troutpy/tl/quantify_xrna.py b/src/troutpy/tl/quantify_xrna.py
@@ -14,34 +14,33 @@
 
 def spatial_variability(
     sdata, 
-    coords_keys=['x', 'y'], 
+    coords_keys=None, 
     gene_id_key='feature_name', 
     n_neighbors=10, 
     resolution=1000, 
     binsize=20, 
     n_threads=1, 
     spatial_autocorr_mode="moran",copy=False
 ):
-    """
-    Computes spatial variability of extracellular RNA using Moran's I.
+    """Computes spatial variability of extracellular RNA using Moran's I.
 
     Parameters:
     -----------
-    sdata : SpatialData
+    - sdata : SpatialData
         The spatial transcriptomics dataset in SpatialData format.
-    coords_keys : list of str, optional
+    - coords_keys : list of str, optional
         The keys for spatial coordinates in the dataset (default: ['x', 'y']).
-    gene_id_key : str, optional
+    - gene_id_key : str, optional
         The key for gene identifiers in the dataset (default: 'feature_name').
-    n_neighbors : int, optional
+    - n_neighbors : int, optional
         Number of neighbors to use for computing spatial neighbors (default: 10).
-    resolution : int, optional
+    - resolution : int, optional
         The resolution for kernel density estimation (default: 1000).
-    binsize : int, optional
+    - binsize : int, optional
         The binsize for kernel density estimation (default: 20).
-    n_threads : int, optional
+    - n_threads : int, optional
         The number of threads for LazyKDE processing (default: 1).
-    spatial_autocorr_mode : str, optional
+    - spatial_autocorr_mode : str, optional
         The mode for spatial autocorrelation computation (default: "moran").
 
     Returns:
@@ -51,7 +50,7 @@ def spatial_variability(
     """
     # Step 1: Extract and preprocess data
     data = sdata.points['transcripts'][coords_keys + ['extracellular', gene_id_key]].compute()
-    data = data[data['extracellular'] == True]
+    data = data[data['extracellular']]
     data[gene_id_key] = data[gene_id_key].astype(str)
 
     # Rename columns for clarity
@@ -107,51 +106,32 @@ def create_xrna_metadata(
     gene_key: str = 'feature_name',
     copy: bool = False
 ) -> SpatialData | None:
-    """
-    Creates a new table within the SpatialData object that contains a 'gene' column 
-    with the unique gene names extracted from the specified points layer.
+    """Creates a new table within the SpatialData object that contains a 'gene' column with the unique gene names extracted from the specified points layer.
 
     Parameters:
     ----------
-    sdata : SpatialData
+    - sdata : SpatialData
         The SpatialData object to modify.
-    
-    points_layer : str, optional
-        The name of the layer in `sdata.points` from which to extract gene names.
-        Default is 'transcripts'.
-    
-    gene_key : str, optional
-        The key in the `points_layer` dataframe that contains the gene names.
-        Default is 'feature_name'.
-    
-    copy : bool, optional
-        If `True`, returns a copy of the `SpatialData` object with the new table added.
-        If `False`, modifies the original `SpatialData` object in place. Default is `False`.
+    - points_layer : str, optional
+        The name of the layer in `sdata.points` from which to extract gene names. Default is 'transcripts'.
+    - gene_key : str, optional
+        The key in the `points_layer` dataframe that contains the gene names.Default is 'feature_name'. 
+    - copy : bool, optional
+        - If `True`, returns a copy of the `SpatialData` object with the new table added.
+        - If `False`, modifies the original `SpatialData` object in place. Default is `False`.
 
     Returns:
     -------
-    SpatialData | None
-        If `copy` is `True`, returns a copy of the modified `SpatialData` object.
-        Otherwise, returns `None`.
+    - SpatialData | None
+        If `copy` is `True`, returns a copy of the modified `SpatialData` object. Otherwise, returns `None`.
 
     Raises:
     ------
     ValueError
-        If the specified points layer does not exist in `sdata.points`.
-        If the `gene_key` column is not present in the specified points layer.
-
-    Examples:
-    --------
-    Add a metadata table for genes in the 'transcripts' layer:
-    >>> create_xrna_metadata(sdata, points_layer='transcripts', gene_key='feature_name')
-
-    Modify a custom SpatialData layer and return a copy:
-    >>> updated_sdata = create_xrna_metadata(sdata, points_layer='custom_layer', gene_key='gene_id', copy=True)
+        - If the specified points layer does not exist in `sdata.points`.
+        - If the `gene_key` column is not present in the specified points layer.
 
-    Notes:
-    -----
-    - The function uses `scanpy` to create an AnnData object and integrates it into the SpatialData table model.
-    - The unique gene names are extracted from the specified points layer and stored in the `.var` of the AnnData object.
+    
     """
     # Check if the specified points layer exists
     if points_layer not in sdata.points:
@@ -191,16 +171,16 @@ def quantify_overexpression(
     """Compare counts per gene with counts per non-gene feature. We define a threshold as the 'percentile_threshold' counts of non-gene counts (e.g. 'percentile_threshold = 100' corresponds to the maximum number of counts observed in any non-gene feature). Any gene whose counts are above the threshold are considered overexpressed.
 
     Args:
-        sdata (pd.DataFrame): The spatial data object holding points and transcript data.
-        codeword_column (str): Column name that holds codeword category.
-        control_codewords (Union[List[str], str]): Name(s) of codewords that correspond to controls based on which noise threshold will be defined.
-        gene_id_column (str): Column that holds name of gene (/ or feature) that is being detected.
-        percentile_threshold (float, optional): Percentile used to define overexpression threshold. Defaults to 100.
-        save (bool, optional): Whether to save outputs to file. Defaults to True.
-        saving_path (str, optional): Path to directory that files should be saved in. Defaults to "".
+    - sdata (pd.DataFrame): The spatial data object holding points and transcript data.
+    - codeword_column (str): Column name that holds codeword category.
+    - control_codewords (Union[List[str], str]): Name(s) of codewords that correspond to controls based on which noise threshold will be defined.
+    - gene_id_column (str): Column that holds name of gene (/ or feature) that is being detected.
+    - percentile_threshold (float, optional): Percentile used to define overexpression threshold. Defaults to 100.
+    - save (bool, optional): Whether to save outputs to file. Defaults to True.
+    - saving_path (str, optional): Path to directory that files should be saved in. Defaults to "".
 
     Returns:
-        Tuple[pd.DataFrame, pd.DataFrame, float]: A tuple containing the updated sdata, scores per gene DataFrame, and the calculated threshold.
+    - Tuple[pd.DataFrame, pd.DataFrame, float]: A tuple containing the updated sdata, scores per gene DataFrame, and the calculated threshold.
     """
 
     # Compute the data from the Dask DataFrame
@@ -239,40 +219,28 @@ def quantify_overexpression(
     return sdata if copy else None
 
 def extracellular_enrichment(sdata, gene_id_column: str = 'feature_name', copy: bool = False):
-    """
-    Calculate the proportion of extracellular and intracellular transcripts for each gene and integrate results into the AnnData object.
+    """Calculate the proportion of extracellular and intracellular transcripts for each gene and integrate results into the AnnData object.
 
     This function computes the proportion of transcripts classified as extracellular or intracellular for each gene and calculates additional metrics, including log fold change of extracellular to intracellular proportions. The results are integrated into the `sdata` object under the 'xrna_metadata' layer.
 
     Parameters:
     -----------
-    sdata : AnnData
-        An AnnData object containing spatial transcriptomics data. The `points` attribute should include a 
-        'transcripts' DataFrame with columns for gene IDs (specified by `gene_id_column`) and a boolean 
-        'extracellular' column indicating whether each transcript is classified as extracellular.
-    gene_id_column : str, optional
+    - sdata : AnnData
+        An AnnData object containing spatial transcriptomics data. The `points` attribute should include a 'transcripts' DataFrame with columns for gene IDs (specified by `gene_id_column`) and a boolean 'extracellular' column indicating whether each transcript is classified as extracellular.
+    - gene_id_column : str, optional
         The name of the column in the 'transcripts' DataFrame containing gene identifiers. Defaults to 'feature_name'.
-    copy : bool, optional
-        Whether to return a modified copy of the input `sdata` object. If `False`, the input object is modified 
-        in place. Defaults to `False`.
+    - copy : bool, optional
+        Whether to return a modified copy of the input `sdata` object. If `False`, the input object is modified in place. Defaults to `False`.
 
     Returns:
     --------
-    AnnData or None
-        If `copy=True`, returns a modified copy of the input `sdata` object with updated metadata. Otherwise, 
-        modifies `sdata` in place and returns `None`.
+    - AnnData or None
+        If `copy=True`, returns a modified copy of the input `sdata` object with updated metadata. Otherwise, modifies `sdata` in place and returns `None`.
 
     Notes:
     ------
     - The function assumes that the `sdata` object has a 'points' layer containing a 'transcripts' DataFrame.
-    - If the 'xrna_metadata' attribute does not exist in `sdata`, it will be created using the `create_xrna_metadata` 
-      function.
-
-    Example:
-    --------
-    >>> updated_sdata = extracellular_enrichment(sdata, gene_id_column='gene_symbol', copy=True)
-    >>> print(updated_sdata['xrna_metadata'].var)
-
+    - If the 'xrna_metadata' attribute does not exist in `sdata`, it will be created using the `create_xrna_metadata` function.
     """
     # Extract and compute the required data
     data = sdata.points['transcripts'][[gene_id_column, 'extracellular']].compute()

diff --git a/src/troutpy/tl/segmentation_free.py b/src/troutpy/tl/segmentation_free.py
@@ -19,27 +19,27 @@ def segmentation_free_clustering(
     This function clusters transcriptomic data without relying on pre-defined cell or tissue segmentations.It supports multiple clustering methods, with Points2Regions being the default.
 
     Parameters:
-        sdata : SpatialData
+        - sdata : SpatialData
             A spatial data object containing transcriptomic information.
-        params : dict, optional (default: {})
+        - params : dict, optional (default: {})
             A dictionary of parameters for the selected clustering method.
             - For `points2regions`:
                 - 'num_clusters' (int): Number of clusters (default: 300).
                 - 'pixel_width' (float): Pixel width parameter (default: 0.4).
                 - 'pixel_smoothing' (float): Pixel smoothing parameter (default: 3.5).
-        x : str, optional (default: 'x')
+        - x : str, optional (default: 'x')
             Column name for the x-coordinates of transcripts.
-        y : str, optional (default: 'y')
+        - y : str, optional (default: 'y')
             Column name for the y-coordinates of transcripts.
-        feature_name : str, optional (default: 'feature_name')
+        - feature_name : str, optional (default: 'feature_name')
             Column name for the feature names.
-        method : str, optional (default: 'points2regions')
+        - method : str, optional (default: 'points2regions')
             Clustering method to use. Options:
             - 'points2regions': Uses the Points2Regions algorithm for clustering.
             - 'sainsc': Placeholder for another clustering method.
-        transcript_id : str, optional (default: 'transcript_id')
+        - transcript_id : str, optional (default: 'transcript_id')
             Column name for the transcript IDs.
-        copy : bool, optional (default: False)
+        - copy : bool, optional (default: False)
             If True, returns a copy of the clustering results. If False, updates `sdata` in-place.
 
     Returns: