Skip to content

Commit

Permalink
correct4
Browse files Browse the repository at this point in the history
  • Loading branch information
sergiomarco25 committed Jan 3, 2025
1 parent 0c2ceeb commit b166b11
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 75 deletions.
2 changes: 1 addition & 1 deletion src/troutpy/pl/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -902,7 +902,7 @@ def interactions_with_arrows(
):
"""Visualizes interactions between source and target cells using arrows, along with transcript locations.
The function plots arrows from source to target cells based on transcript proximity, color-coding source and target cells, and transcript locations. An optional image layer can be overlaid behind the plot.
The function plots arrows from source to target cells based on transcript proximity, color-coding source and target cells, and transcript locations. An optional image layer can be overlaid behind the plot.
Parameters:
----------
- sdata (AnnData): The AnnData object containing the spatial omics data.
Expand Down
8 changes: 3 additions & 5 deletions src/troutpy/tl/interactions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import matplotlib.pyplot as plt
import os


# function to compute the number of exchanged genes between any two cell types

def get_number_of_communication_genes(
Expand All @@ -19,7 +18,7 @@ def get_number_of_communication_genes(
"""Compute the number of exchanged genes between any two cell types
Parameters:
- source_proportions (pd.DataFrame): A data frame (Gene name x Cell Type) with proportion of cells per cell type expressing corresponding gene
- source_proportions (pd.DataFrame): A data frame (Gene name x Cell Type) with proportion of cells per cell type expressing corresponding gene
- target_proportions : A data frame
- (Gene name x Cell Type) with proportion of cells per cell type being the physically clostest cell to transcripts of corresponding gene. Defaults to 0.2.
- source_proportion_threshold (float, optional): The threshold to consider a cell type to be a significant source of a gene. Defaults to 0.2.
Expand Down Expand Up @@ -116,10 +115,9 @@ def get_gene_interaction_strength(

# Define the colormap and create color mappings for each cell type
cmap = plt.get_cmap("tab20")
colors = [cmap(i) for i in range(interactions.shape[0])]

# Plot the interaction strength using a chord diagram
#### work on this function
#### work on this function ######
#colors = [cmap(i) for i in range(interactions.shape[0])]
#chord_diagram(interactions, source_proportions.columns.tolist(), directed=True, fontsize=8, colors=colors)
plt.title(f"exotranscriptomic {gene_symbol} exchange", fontweight="bold")

Expand Down
9 changes: 4 additions & 5 deletions src/troutpy/tl/quantify_xrna.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def create_xrna_metadata(
- points_layer : str, optional
The name of the layer in `sdata.points` from which to extract gene names. Default is 'transcripts'.
- gene_key : str, optional
The key in the `points_layer` dataframe that contains the gene names.Default is 'feature_name'.
The key in the `points_layer` dataframe that contains the gene names.Default is 'feature_name'.
- copy : bool, optional
- If `True`, returns a copy of the `SpatialData` object with the new table added.
- If `False`, modifies the original `SpatialData` object in place. Default is `False`.
Expand All @@ -131,7 +131,6 @@ def create_xrna_metadata(
- If the specified points layer does not exist in `sdata.points`.
- If the `gene_key` column is not present in the specified points layer.
"""
# Check if the specified points layer exists
if points_layer not in sdata.points:
Expand Down Expand Up @@ -185,7 +184,7 @@ def quantify_overexpression(

# Compute the data from the Dask DataFrame
data = sdata.points[layer][['extracellular',codeword_column,gene_id_column]].compute()
data=data[data['extracellular']==True]
data=data[data['extracellular']]

# Ensure control_codewords is a list
if isinstance(control_codewords, str):
Expand Down Expand Up @@ -269,7 +268,7 @@ def extracellular_enrichment(sdata, gene_id_column: str = 'feature_name', copy:

def spatial_colocalization(
sdata,
coords_keys=['x', 'y'],
coords_keys=None,
gene_id_key='feature_name',

resolution=1000,
Expand Down Expand Up @@ -305,7 +304,7 @@ def spatial_colocalization(
"""
# Step 1: Extract and preprocess data
data = sdata.points['transcripts'][coords_keys + ['extracellular', gene_id_key]].compute()
data = data[data['extracellular'] == True]
data = data[data['extracellular']]
data[gene_id_key] = data[gene_id_key].astype(str)

# Rename columns for clarity
Expand Down
2 changes: 1 addition & 1 deletion src/troutpy/tl/segmentation_free.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

def segmentation_free_clustering(
sdata,
params: dict = {},
params: None,
x: str = 'x',
y: str = 'y',
feature_name: str = 'feature_name',
Expand Down
115 changes: 52 additions & 63 deletions src/troutpy/tl/source_cell.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,46 +15,33 @@ def create_xrna_metadata(
gene_key: str = 'feature_name',
copy: bool = False
) -> SpatialData | None:
"""
Creates a new table within the SpatialData object that contains a 'gene' column
with the unique gene names extracted from the specified points layer.
"""Creates a new table within the SpatialData object that contains a 'gene' column with the unique gene names extracted from the specified points layer.
Parameters:
----------
sdata : SpatialData
- sdata : SpatialData
The SpatialData object to modify.
points_layer : str, optional
The name of the layer in `sdata.points` from which to extract gene names.
Default is 'transcripts'.
- points_layer : str, optional
The name of the layer in `sdata.points` from which to extract gene names.Default is 'transcripts'.
gene_key : str, optional
The key in the `points_layer` dataframe that contains the gene names.
Default is 'feature_name'.
- gene_key : str, optional
The key in the `points_layer` dataframe that contains the gene names.Default is 'feature_name'.
copy : bool, optional
If `True`, returns a copy of the `SpatialData` object with the new table added.
If `False`, modifies the original `SpatialData` object in place. Default is `False`.
- copy : bool, optional
- If `True`, returns a copy of the `SpatialData` object with the new table added.
- If `False`, modifies the original `SpatialData` object in place. Default is `False`.
Returns:
-------
SpatialData | None
If `copy` is `True`, returns a copy of the modified `SpatialData` object.
Otherwise, returns `None`.
If `copy` is `True`, returns a copy of the modified `SpatialData` object. Otherwise, returns `None`.
Raises:
------
ValueError
If the specified points layer does not exist in `sdata.points`.
If the `gene_key` column is not present in the specified points layer.
Examples:
--------
Add a metadata table for genes in the 'transcripts' layer:
>>> create_xrna_metadata(sdata, points_layer='transcripts', gene_key='feature_name')
Modify a custom SpatialData layer and return a copy:
>>> updated_sdata = create_xrna_metadata(sdata, points_layer='custom_layer', gene_key='gene_id', copy=True)
- If the specified points layer does not exist in `sdata.points`.
- If the `gene_key` column is not present in the specified points layer.
Notes:
-----
Expand Down Expand Up @@ -94,27 +81,25 @@ def compute_source_cells(
layer='transcripts',
copy=False
):
"""
Compute the source of extracellular RNA by linking detected extracellular transcripts to specific cell types in the spatial data.
"""Compute the source of extracellular RNA by linking detected extracellular transcripts to specific cell types in the spatial data.
Parameters:
- Parameters:
----------
sdata : SpatialData object
- sdata : SpatialData object
The input spatial data object containing spatial transcriptomics data.
expression_threshold : float, optional, default=1
- expression_threshold : float, optional, default=1
Threshold for filtering transcripts based on expression levels.
gene_id_column : str, optional, default='feature_name'
- gene_id_column : str, optional, default='feature_name'
Column name for gene identifiers in the transcripts data.
layer : str, optional, default='transcripts'
- layer : str, optional, default='transcripts'
Layer in `sdata.points` containing the transcript information.
copy : bool, optional, default=False
- copy : bool, optional, default=False
If True, returns a modified copy of the spatial data object. Otherwise, modifies in place.
Returns:
-------
sdata : SpatialData object or None
The modified spatial data object with added `source` metadata if `copy=True`.
Otherwise, modifies the input object in place and returns None.
- sdata : SpatialData object or None
- The modified spatial data object with added `source` metadata if `copy=True`. Otherwise, modifies the input object in place and returns None.
"""

# Create a copy of the table containing spatial transcriptomics data
Expand Down Expand Up @@ -157,24 +142,20 @@ def distance_to_source_cell(
This function computes the distance from each extracellular RNA transcript to the nearest source cell based on their spatial coordinates. The function uses a KDTree to efficiently find the closest cell to each transcript, storing the results in the `sdata` object.
Parameters:
sdata (AnnData): The AnnData object containing both transcript and cellular data.
layer (str, optional): The layer in `sdata` containing the transcript data. Default is 'transcripts'.
xcoord (str, optional): The column name in the transcript data for the x-coordinate. Default is 'x'.
ycoord (str, optional): The column name in the transcript data for the y-coordinate. Default is 'y'.
xcellcoord (str, optional): The column name in the cellular data for the x-coordinate of cell centroids. Default is 'x_centroid'.
ycellcoord (str, optional): The column name in the cellular data for the y-coordinate of cell centroids. Default is 'y_centroid'.
gene_id_column (str, optional): The column name for the gene identifier. Default is 'feature_name'.
copy (bool, optional): Whether to return a copy of the `sdata` object with updated distances, or modify in place. Default is False.
- sdata (AnnData): The AnnData object containing both transcript and cellular data.
- layer (str, optional): The layer in `sdata` containing the transcript data. Default is 'transcripts'.
- xcoord (str, optional): The column name in the transcript data for the x-coordinate. Default is 'x'.
- ycoord (str, optional): The column name in the transcript data for the y-coordinate. Default is 'y'.
- xcellcoord (str, optional): The column name in the cellular data for the x-coordinate of cell centroids. Default is 'x_centroid'.
- ycellcoord (str, optional): The column name in the cellular data for the y-coordinate of cell centroids. Default is 'y_centroid'.
- gene_id_column (str, optional): The column name for the gene identifier. Default is 'feature_name'.
- copy (bool, optional): Whether to return a copy of the `sdata` object with updated distances, or modify in place. Default is False.
Returns:
AnnData or None: If `copy` is True, returns the updated `sdata` object. Otherwise, modifies `sdata` in place and returns None.
- AnnData or None: If `copy` is True, returns the updated `sdata` object. Otherwise, modifies `sdata` in place and returns None.
Notes:
The function assumes that the transcript data contains a column `transcript_id` and that the cellular data contains
cell centroids for spatial coordinates. The KDTree algorithm is used to compute the closest cell for each transcript.
The resulting distances are stored in the `distance_to_source_cell` column of the `sdata` object's transcript layer,
and the closest source cell is stored in the `closest_source_cell` column.
The median distance for each gene is also added to the `xrna_metadata` in the `var` attribute of `sdata`.
- The function assumes that the transcript data contains a column `transcript_id` and that the cellular data contains cell centroids for spatial coordinates. The KDTree algorithm is used to compute the closest cell for each transcript. The resulting distances are stored in the `distance_to_source_cell` column of the `sdata` object's transcript layer, and the closest source cell is stored in the `closest_source_cell` column. The median distance for each gene is also added to the `xrna_metadata` in the `var` attribute of `sdata`.
"""

# Extract transcript and cellular data
Expand Down Expand Up @@ -229,35 +210,29 @@ def distance_to_source_cell(
return sdata.copy() if copy else None

def compute_distant_cells_prop(sdata, layer='transcripts', gene_id_column='feature_name', threshold=30,copy=False):
"""
Compute the proportion of transcripts for each gene that are located beyond a specified distance from their closest source cell, and add the result to the metadata of the SpatialData object.
"""Compute the proportion of transcripts for each gene that are located beyond a specified distance from their closest source cell, and add the result to the metadata of the SpatialData object.
Parameters
----------
sdata : SpatialData
- sdata : SpatialData
A SpatialData object containing the spatial omics data.
layer : str, optional
- layer : str, optional
The layer in `sdata.points` that contains the transcript data. Default is 'transcripts'.
gene_id_column : str, optional
- gene_id_column : str, optional
Column name in the transcript data representing gene identifiers. Default is 'feature_name'.
threshold : float, optional
- threshold : float, optional
The distance threshold (in micrometers) to calculate the proportion of transcripts farther away from their closest source cell. Default is 30.
Returns
-------
None
The function modifies the `sdata` object in place, adding the computed proportions as a new column in `sdata['xrna_metadata'].var`.
- The function modifies the `sdata` object in place, adding the computed proportions as a new column in `sdata['xrna_metadata'].var`.
Notes
-----
- This function assumes that `sdata.points[layer]` contains a column `distance_to_source_cell` with distances between transcripts and their closest source cells.
- The resulting column is named `frac_beyond_<threshold>_from_source`.
Example
-------
```
compute_source_cells_beyond_distance(sdata, layer='transcripts', threshold=30)
```
"""

# Extract transcript data
Expand All @@ -279,7 +254,21 @@ def compute_distant_cells_prop(sdata, layer='transcripts', gene_id_column='featu

return sdata.copy() if copy else None

def get_proportion_expressed_per_cell_type(adata,cell_type_key='cell type'):
def get_proportion_expressed_per_cell_type(adata, cell_type_key='cell type'):
"""Calculate the proportion of expression for each feature (gene) per cell type.
Parameters
----------
- adata : AnnData
An AnnData object containing the single-cell or spatial transcriptomics dataset.The `obs` attribute should contain cell type annotations.
- cell_type_key : str, optional
The key in `adata.obs` corresponding to cell type annotations, by default 'cell type'.
Returns
-------
- pd.DataFrame
A DataFrame where rows correspond to features (genes) and columns correspond to cell types. Each entry represents the mean expression of the feature in the specified cell type.
"""
cell_types = adata.obs[cell_type_key].unique().dropna()
proportions = pd.DataFrame(index=adata.var_names, columns=cell_types)
for cell_type in cell_types:
Expand Down

0 comments on commit b166b11

Please sign in to comment.