Skip to content

Commit

Permalink
correct3
Browse files Browse the repository at this point in the history
  • Loading branch information
sergiomarco25 committed Jan 3, 2025
1 parent 340af85 commit 0c2ceeb
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 94 deletions.
3 changes: 1 addition & 2 deletions src/troutpy/pl/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@ def proportion_above_threshold(
output_path:str='',format='pdf'
):
""" Plot top and bottom percentile of features
Plots the top and bottom percentiles of features with the highest and lowest proportions above a threshold, or visualizes a specific list of transcripts.
Parameters:
Expand Down Expand Up @@ -903,7 +903,6 @@ def interactions_with_arrows(
"""Visualizes interactions between source and target cells using arrows, along with transcript locations.
The function plots arrows from source to target cells based on transcript proximity, color-coding source and target cells, and transcript locations. An optional image layer can be overlaid behind the plot.
Parameters:
----------
- sdata (AnnData): The AnnData object containing the spatial omics data.
Expand Down
7 changes: 3 additions & 4 deletions src/troutpy/tl/NMF.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def nmf(
Parameters:
----------
- sdata : spatial data object
- sdata : spatial data object
Input spatial data containing transcript and bin data.
- layer : str, optional
Layer name of the data that contains extracellular transcripts (default: 'extracellular_transcripts_enriched').
Expand Down Expand Up @@ -103,14 +103,13 @@ def apply_exrna_factors_to_cells(sdata, layer_factors='nmf_data'):
Parameters:
sdata (AnnData): The AnnData object containing both extracellular and cellular data.
layer_factors (str, optional): The key in `sdata` that contains the extracellular RNA data with NMF factors. Default is 'nmf_data'.
Returns:
AnnData: The updated `sdata` object with annotated cellular data that includes the applied exRNA factors as new columns.
Notes:
The function assumes that the extracellular RNA data is stored in `sdata[layer_factors]` and that the NMF factor loadings are stored in the `uns` attribute of the extracellular dataset as 'H_nmf'. The factor scores are added to the `obs` attribute of the cellular data.
"""

"""
# Extract extracellular data and cellular annotations
adata_extracellular_with_nmf = sdata[layer_factors]
adata_annotated_cellular = sdata['table']
Expand Down
13 changes: 7 additions & 6 deletions src/troutpy/tl/interactions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import anndata as ad
import seaborn as sns
import matplotlib.pyplot as plt
import os


# function to compute the number of exchanged genes between any two cell types

Expand All @@ -17,9 +19,8 @@ def get_number_of_communication_genes(
"""Compute the number of exchanged genes between any two cell types
Parameters:
- source_proportions (pd.DataFrame): A data frame (Gene name x Cell Type) with
- proportion of cells per cell type expressing corresponding gene
- target_proportions : A data frame
- source_proportions (pd.DataFrame): A data frame (Gene name x Cell Type) with proportion of cells per cell type expressing corresponding gene
- target_proportions : A data frame
- (Gene name x Cell Type) with proportion of cells per cell type being the physically clostest cell to transcripts of corresponding gene. Defaults to 0.2.
- source_proportion_threshold (float, optional): The threshold to consider a cell type to be a significant source of a gene. Defaults to 0.2.
- target_proportion_threshold (float, optional): The threshold to consider a cell type to be a significant target of a gene. Defaults to 0.2.
Expand Down Expand Up @@ -73,8 +74,7 @@ def get_gene_interaction_strength(
A DataFrame where rows represent genes and columns represent source cell types. Each value indicates the proportion of the gene in the respective source cell type.
- target_proportions : pd.DataFrame
A DataFrame where rows represent genes and columns represent target cell types. Each value indicates
the proportion of the gene in the respective target cell type.
A DataFrame where rows represent genes and columns represent target cell types. Each value indicates the proportion of the gene in the respective target cell type.
- gene_symbol : str, optional
The gene symbol for which the interaction strength is to be computed and visualized (default: '').
Expand Down Expand Up @@ -119,7 +119,8 @@ def get_gene_interaction_strength(
colors = [cmap(i) for i in range(interactions.shape[0])]

# Plot the interaction strength using a chord diagram
chord_diagram(interactions, source_proportions.columns.tolist(), directed=True, fontsize=8, colors=colors)
#### work on this function
#chord_diagram(interactions, source_proportions.columns.tolist(), directed=True, fontsize=8, colors=colors)
plt.title(f"exotranscriptomic {gene_symbol} exchange", fontweight="bold")

# Save the plot if the 'save' option is enabled
Expand Down
116 changes: 42 additions & 74 deletions src/troutpy/tl/quantify_xrna.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,34 +14,33 @@

def spatial_variability(
sdata,
coords_keys=['x', 'y'],
coords_keys=None,
gene_id_key='feature_name',
n_neighbors=10,
resolution=1000,
binsize=20,
n_threads=1,
spatial_autocorr_mode="moran",copy=False
):
"""
Computes spatial variability of extracellular RNA using Moran's I.
"""Computes spatial variability of extracellular RNA using Moran's I.
Parameters:
-----------
sdata : SpatialData
- sdata : SpatialData
The spatial transcriptomics dataset in SpatialData format.
coords_keys : list of str, optional
- coords_keys : list of str, optional
The keys for spatial coordinates in the dataset (default: ['x', 'y']).
gene_id_key : str, optional
- gene_id_key : str, optional
The key for gene identifiers in the dataset (default: 'feature_name').
n_neighbors : int, optional
- n_neighbors : int, optional
Number of neighbors to use for computing spatial neighbors (default: 10).
resolution : int, optional
- resolution : int, optional
The resolution for kernel density estimation (default: 1000).
binsize : int, optional
- binsize : int, optional
The binsize for kernel density estimation (default: 20).
n_threads : int, optional
- n_threads : int, optional
The number of threads for LazyKDE processing (default: 1).
spatial_autocorr_mode : str, optional
- spatial_autocorr_mode : str, optional
The mode for spatial autocorrelation computation (default: "moran").
Returns:
Expand All @@ -51,7 +50,7 @@ def spatial_variability(
"""
# Step 1: Extract and preprocess data
data = sdata.points['transcripts'][coords_keys + ['extracellular', gene_id_key]].compute()
data = data[data['extracellular'] == True]
data = data[data['extracellular']]
data[gene_id_key] = data[gene_id_key].astype(str)

# Rename columns for clarity
Expand Down Expand Up @@ -107,51 +106,32 @@ def create_xrna_metadata(
gene_key: str = 'feature_name',
copy: bool = False
) -> SpatialData | None:
"""
Creates a new table within the SpatialData object that contains a 'gene' column
with the unique gene names extracted from the specified points layer.
"""Creates a new table within the SpatialData object that contains a 'gene' column with the unique gene names extracted from the specified points layer.
Parameters:
----------
sdata : SpatialData
- sdata : SpatialData
The SpatialData object to modify.
points_layer : str, optional
The name of the layer in `sdata.points` from which to extract gene names.
Default is 'transcripts'.
gene_key : str, optional
The key in the `points_layer` dataframe that contains the gene names.
Default is 'feature_name'.
copy : bool, optional
If `True`, returns a copy of the `SpatialData` object with the new table added.
If `False`, modifies the original `SpatialData` object in place. Default is `False`.
- points_layer : str, optional
The name of the layer in `sdata.points` from which to extract gene names. Default is 'transcripts'.
- gene_key : str, optional
The key in the `points_layer` dataframe that contains the gene names.Default is 'feature_name'.
- copy : bool, optional
- If `True`, returns a copy of the `SpatialData` object with the new table added.
- If `False`, modifies the original `SpatialData` object in place. Default is `False`.
Returns:
-------
SpatialData | None
If `copy` is `True`, returns a copy of the modified `SpatialData` object.
Otherwise, returns `None`.
- SpatialData | None
If `copy` is `True`, returns a copy of the modified `SpatialData` object. Otherwise, returns `None`.
Raises:
------
ValueError
If the specified points layer does not exist in `sdata.points`.
If the `gene_key` column is not present in the specified points layer.
Examples:
--------
Add a metadata table for genes in the 'transcripts' layer:
>>> create_xrna_metadata(sdata, points_layer='transcripts', gene_key='feature_name')
Modify a custom SpatialData layer and return a copy:
>>> updated_sdata = create_xrna_metadata(sdata, points_layer='custom_layer', gene_key='gene_id', copy=True)
- If the specified points layer does not exist in `sdata.points`.
- If the `gene_key` column is not present in the specified points layer.
Notes:
-----
- The function uses `scanpy` to create an AnnData object and integrates it into the SpatialData table model.
- The unique gene names are extracted from the specified points layer and stored in the `.var` of the AnnData object.
"""
# Check if the specified points layer exists
if points_layer not in sdata.points:
Expand Down Expand Up @@ -191,16 +171,16 @@ def quantify_overexpression(
"""Compare counts per gene with counts per non-gene feature. We define a threshold as the 'percentile_threshold' counts of non-gene counts (e.g. 'percentile_threshold = 100' corresponds to the maximum number of counts observed in any non-gene feature). Any gene whose counts are above the threshold are considered overexpressed.
Args:
sdata (pd.DataFrame): The spatial data object holding points and transcript data.
codeword_column (str): Column name that holds codeword category.
control_codewords (Union[List[str], str]): Name(s) of codewords that correspond to controls based on which noise threshold will be defined.
gene_id_column (str): Column that holds name of gene (/ or feature) that is being detected.
percentile_threshold (float, optional): Percentile used to define overexpression threshold. Defaults to 100.
save (bool, optional): Whether to save outputs to file. Defaults to True.
saving_path (str, optional): Path to directory that files should be saved in. Defaults to "".
- sdata (pd.DataFrame): The spatial data object holding points and transcript data.
- codeword_column (str): Column name that holds codeword category.
- control_codewords (Union[List[str], str]): Name(s) of codewords that correspond to controls based on which noise threshold will be defined.
- gene_id_column (str): Column that holds name of gene (/ or feature) that is being detected.
- percentile_threshold (float, optional): Percentile used to define overexpression threshold. Defaults to 100.
- save (bool, optional): Whether to save outputs to file. Defaults to True.
- saving_path (str, optional): Path to directory that files should be saved in. Defaults to "".
Returns:
Tuple[pd.DataFrame, pd.DataFrame, float]: A tuple containing the updated sdata, scores per gene DataFrame, and the calculated threshold.
- Tuple[pd.DataFrame, pd.DataFrame, float]: A tuple containing the updated sdata, scores per gene DataFrame, and the calculated threshold.
"""

# Compute the data from the Dask DataFrame
Expand Down Expand Up @@ -239,40 +219,28 @@ def quantify_overexpression(
return sdata if copy else None

def extracellular_enrichment(sdata, gene_id_column: str = 'feature_name', copy: bool = False):
"""
Calculate the proportion of extracellular and intracellular transcripts for each gene and integrate results into the AnnData object.
"""Calculate the proportion of extracellular and intracellular transcripts for each gene and integrate results into the AnnData object.
This function computes the proportion of transcripts classified as extracellular or intracellular for each gene and calculates additional metrics, including log fold change of extracellular to intracellular proportions. The results are integrated into the `sdata` object under the 'xrna_metadata' layer.
Parameters:
-----------
sdata : AnnData
An AnnData object containing spatial transcriptomics data. The `points` attribute should include a
'transcripts' DataFrame with columns for gene IDs (specified by `gene_id_column`) and a boolean
'extracellular' column indicating whether each transcript is classified as extracellular.
gene_id_column : str, optional
- sdata : AnnData
An AnnData object containing spatial transcriptomics data. The `points` attribute should include a 'transcripts' DataFrame with columns for gene IDs (specified by `gene_id_column`) and a boolean 'extracellular' column indicating whether each transcript is classified as extracellular.
- gene_id_column : str, optional
The name of the column in the 'transcripts' DataFrame containing gene identifiers. Defaults to 'feature_name'.
copy : bool, optional
Whether to return a modified copy of the input `sdata` object. If `False`, the input object is modified
in place. Defaults to `False`.
- copy : bool, optional
Whether to return a modified copy of the input `sdata` object. If `False`, the input object is modified in place. Defaults to `False`.
Returns:
--------
AnnData or None
If `copy=True`, returns a modified copy of the input `sdata` object with updated metadata. Otherwise,
modifies `sdata` in place and returns `None`.
- AnnData or None
If `copy=True`, returns a modified copy of the input `sdata` object with updated metadata. Otherwise, modifies `sdata` in place and returns `None`.
Notes:
------
- The function assumes that the `sdata` object has a 'points' layer containing a 'transcripts' DataFrame.
- If the 'xrna_metadata' attribute does not exist in `sdata`, it will be created using the `create_xrna_metadata`
function.
Example:
--------
>>> updated_sdata = extracellular_enrichment(sdata, gene_id_column='gene_symbol', copy=True)
>>> print(updated_sdata['xrna_metadata'].var)
- If the 'xrna_metadata' attribute does not exist in `sdata`, it will be created using the `create_xrna_metadata` function.
"""
# Extract and compute the required data
data = sdata.points['transcripts'][[gene_id_column, 'extracellular']].compute()
Expand Down
16 changes: 8 additions & 8 deletions src/troutpy/tl/segmentation_free.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,27 +19,27 @@ def segmentation_free_clustering(
This function clusters transcriptomic data without relying on pre-defined cell or tissue segmentations.It supports multiple clustering methods, with Points2Regions being the default.
Parameters:
sdata : SpatialData
- sdata : SpatialData
A spatial data object containing transcriptomic information.
params : dict, optional (default: {})
- params : dict, optional (default: {})
A dictionary of parameters for the selected clustering method.
- For `points2regions`:
- 'num_clusters' (int): Number of clusters (default: 300).
- 'pixel_width' (float): Pixel width parameter (default: 0.4).
- 'pixel_smoothing' (float): Pixel smoothing parameter (default: 3.5).
x : str, optional (default: 'x')
- x : str, optional (default: 'x')
Column name for the x-coordinates of transcripts.
y : str, optional (default: 'y')
- y : str, optional (default: 'y')
Column name for the y-coordinates of transcripts.
feature_name : str, optional (default: 'feature_name')
- feature_name : str, optional (default: 'feature_name')
Column name for the feature names.
method : str, optional (default: 'points2regions')
- method : str, optional (default: 'points2regions')
Clustering method to use. Options:
- 'points2regions': Uses the Points2Regions algorithm for clustering.
- 'sainsc': Placeholder for another clustering method.
transcript_id : str, optional (default: 'transcript_id')
- transcript_id : str, optional (default: 'transcript_id')
Column name for the transcript IDs.
copy : bool, optional (default: False)
- copy : bool, optional (default: False)
If True, returns a copy of the clustering results. If False, updates `sdata` in-place.
Returns:
Expand Down

0 comments on commit 0c2ceeb

Please sign in to comment.