You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
As I am using the feature already to analyse my data I have code suggestions, should you be interested.
I adjusted the code and example to work with pertpy-Milo using the tutorial data, compare the neighborhood grouping results to the MiloR implementation, and included a suggestion for neighborhood group markers based on the pertpy differential expression module.
I would be very thankful for any help to correct/verify my code.
Best regards,
max
Marker genes from find_nhood_groups_markers for neighborhood groups 5 vs 4 in "Python Neighborhood groups":
MiloR neighborhood groups:
Neighborhood group functions:
def find_nhood_groups(
mdata,
SpatialFDR_threshold = 0.1,
merge_discord = False,
max_lfc_delta=None,
overlap=1,
subset_nhoods: None | str = None,
):
"""Get igraph graph from adjacency matrix. Adjusted from scanpy louvain clustering."""
nhs = mdata["rna"].obsm["nhoods"].copy()
nhood_adj = mdata["milo"].varp["nhood_connectivities"].copy()
da_res = mdata["milo"].var.copy()
is_da = np.asarray(da_res.SpatialFDR <= SpatialFDR_threshold)
if subset_nhoods is not None:
print(da_res.shape)
mask_da_res = (
da_res
.query(subset_nhoods)
.copy()
)
mask = np.asarray([True if x in mask_da_res.index else False for x in da_res.index])
da_res = da_res.loc[mask, :].copy()
print(da_res.shape)
# mask = da_res.index.values.astype(bool)
print(mask.shape)
nhs = nhs[:, mask].copy()
print(mask.shape)
print(nhood_adj.shape)
nhood_adj = nhood_adj[:, mask]
nhood_adj = nhood_adj[mask, :].copy()
print(nhood_adj.shape)
# is_da = np.asarray(da_res.SpatialFDR <= SpatialFDR_threshold)
is_da = is_da[mask].copy()
# print(is_da.shape)
# da_res.loc[is_da, 'logFC'].values @ (da_res.loc[is_da, 'logFC']).T.values
# print(da_res.loc[is_da, 'logFC'])
if merge_discord is False:
# discord_sign = np.sign(da_res.loc[is_da, 'logFC'].values @ (da_res.loc[is_da, 'logFC'])) < 0
# Assuming da.res is a pandas DataFrame and is.da is a boolean array
discord_sign = np.sign(da_res.loc[is_da, 'logFC'].values @ da_res.loc[is_da, 'logFC'].values.T) < 0
# print(discord_sign.shape)
# nhood_adj[is_da, is_da][discord_sign] <- 0
# print(nhood_adj.shape)
nhood_adj[(is_da, is_da)][discord_sign] = 0
if overlap > 1:
nhood_adj[nhood_adj < overlap] = 0
if max_lfc_delta is not None:
lfc_diff = np.array([da_res['logFC'].values - x for x in da_res['logFC'].values])
nhood_adj[np.abs(lfc_diff) > max_lfc_delta] = 0
# binarise
# nhood_adj = np.asarray((nhood_adj > 0) + 0)
nhood_adj = (nhood_adj > 0).astype(int)
g = sc._utils.get_igraph_from_adjacency(nhood_adj, directed = False)
weights = None
part = g.community_multilevel(weights=weights)
groups = np.array(part.membership)
groups = groups.astype(int).astype(str)
if subset_nhoods is not None:
mdata["milo"].var["nhood_groups"] = np.nan
mdata["milo"].var.loc[mask, "nhood_groups"] = groups
return None
mdata["milo"].var["nhood_groups"] = groups
Nhood group markers:
from typing import Sequence
from lamin_utils import logger
class Limma(pt.tl._differential_gene_expression._base.LinearModelBase):
def _check_counts(self):
# check_is_integer_matrix(self.data)
pass
def fit(self, voom = False, **kwargs):
"""Fit model on normalized data using limma.
Note: this creates its own AnnData object for downstream.
Args:
**kwargs: Keyword arguments specific to glmQLFit()
"""
if voom:
print("voom selected")
self.voom_fit(**kwargs)
return None
from scipy.sparse import issparse
# For running in notebook
# pandas2ri.activate()
# rpy2.robjects.numpy2ri.activate()
try:
import rpy2.robjects.numpy2ri
import rpy2.robjects.pandas2ri
from rpy2 import robjects as ro
from rpy2.robjects import numpy2ri, pandas2ri
from rpy2.robjects.conversion import localconverter
from rpy2.robjects.packages import importr
# pandas2ri.activate()
# rpy2.robjects.numpy2ri.activate()
except ImportError:
raise ImportError("edger requires rpy2 to be installed.") from None
try:
edger = importr("edgeR")
limma = importr("limma")
except ImportError as e:
raise ImportError(
"edgeR requires a valid R installation with the following packages:\n"
"edgeR, BiocParallel, RhpcBLASctl"
) from e
# Convert dataframe
with localconverter(ro.default_converter + numpy2ri.converter):
expr = self.adata.X if self.layer is None else self.adata.layers[self.layer]
if issparse(expr):
expr = expr.T.toarray()
else:
expr = expr.T
expr_r = ro.conversion.py2rpy(pd.DataFrame(expr, index=self.adata.var_names, columns=self.adata.obs_names))
r_obs = ro.conversion.py2rpy(self.adata.obs)
# rbase = importr("base")
with localconverter(default_converter + pandas2ri.converter):
r_design = ro.conversion.py2rpy(self.design)
self.fit = limma.lmFit(expr_r, r_design)
# self.fit = limma.eBayes(fit)
### for the sake of completeness
def voom_fit(self, voom_kwargs = {}, **lmFit_kwargs):
"""Fit model on count data using limma-voom.
Note: this creates its own AnnData object for downstream.
Args:
**kwargs: Keyword arguments specific to glmQLFit()
"""
print("Fitting limma with voom")
from scipy.sparse import issparse
# For running in notebook
# pandas2ri.activate()
# rpy2.robjects.numpy2ri.activate()
try:
import rpy2.robjects.numpy2ri
import rpy2.robjects.pandas2ri
from rpy2 import robjects as ro
from rpy2.robjects import numpy2ri, pandas2ri
from rpy2.robjects.conversion import localconverter
from rpy2.robjects.packages import importr
# pandas2ri.activate()
# rpy2.robjects.numpy2ri.activate()
except ImportError:
raise ImportError("edger requires rpy2 to be installed.") from None
try:
edger = importr("edgeR")
limma = importr("limma")
except ImportError as e:
raise ImportError(
"!!limma requires a valid R installation with the following packages:\n"
"edgeR, limma, BiocParallel, RhpcBLASctl"
) from e
# Convert dataframe
with localconverter(ro.default_converter + numpy2ri.converter):
expr = self.adata.X if self.layer is None else self.adata.layers[self.layer]
if issparse(expr):
expr = expr.T.toarray()
else:
expr = expr.T
expr_r = ro.conversion.py2rpy(pd.DataFrame(expr, index=self.adata.var_names, columns=self.adata.obs_names))
r_obs = ro.conversion.py2rpy(self.adata.obs)
# rbase = importr("base")
with localconverter(default_converter + pandas2ri.converter):
r_design = ro.conversion.py2rpy(self.design)
v = limma.voom(expr_r, r_design, **voom_kwargs)
self.fit = limma.lmFit(v, r_design, **lmFit_kwargs)
def _test_single_contrast(self, contrast, trend = False, robust = True, **kwargs):
with localconverter(default_converter + pandas2ri.converter):
r_design = ro.conversion.py2rpy(self.design)
r_string = '''
FUN = function(fit, contrast, design) {
# contrast_matrix <- makeContrasts(contrast, levels=design)
# print(contrast_matrix)
contrast <- as.numeric(contrast)
print(class(contrast))
fit2 = limma::contrasts.fit(fit, contrast)
print("check")
efit <- limma::eBayes(fit2)
print("check")
# print(coef(efit))
res <- limma::topTable(efit, coef = 1, n = Inf)
}
'''
r_pkg = STAP(r_string, "r_pkg")
res = r_pkg.FUN(self.fit, contrast, r_design)
with localconverter(default_converter + pandas2ri.converter):
de_res = pandas2ri.rpy2py(res)
de_res.index.name = "variable"
de_res = de_res.reset_index()
de_res = de_res.rename(columns={"P.Value": "p_value", "logFC": "log_fc", "adj.P.Val": "adj_p_value"})
return de_res
def get_cells_in_nhoods(mdata, nhood_ids):
'''
Get cells in neighbourhoods of interest '''
in_nhoods = np.array(mdata["rna"].obsm['nhoods'][:,nhood_ids.astype('int')].sum(1))
ad = mdata["rna"][in_nhoods.astype(bool), :].copy()
return ad
def _get_groups(
mdata,
sample_col,
nhood_group_obs = "nhood_groups",
confounders_obs = [],
pseudobulk = True,
func = "sum",
layer = "counts"
):
layer = None if layer == "X" else layer
ad_list = []
groups = mdata["milo"].var[nhood_group_obs].dropna().unique()
print(groups)
mdata["milo"].var["_my_groups"] = mdata["milo"].var[nhood_group_obs]
for g in groups:
nhood_ids = (
mdata["milo"].var
.query('_my_groups == @g')
.index.to_list()
)
nhood_ids = np.asarray(nhood_ids)
cur_ad = get_cells_in_nhoods(mdata, nhood_ids)
cur_ad.obs[nhood_group_obs] = g
if pseudobulk:
cur_ad = sc.get.aggregate(cur_ad, by = [sample_col, nhood_group_obs] + confounders_obs, func = func,
layer=layer)
ad_list.append(cur_ad)
ad = sc.concat(ad_list)
return ad
def find_nhood_group_markers(
mdata,
sample_col = "patient_id",
nhood_group_obs = "nhood_groups",
design = "~0+nhood_groups",
baseline = "non_enriched",
group_to_compare = "enriched",
confounders_obs = [],
pseudobulk = True,
func = "mean",
layer = "X",
pb_model = Limma,
return_model = False,
**kwargs
):
ad = _get_groups(
mdata,
sample_col = sample_col,
nhood_group_obs = nhood_group_obs,
confounders_obs = confounders_obs,
pseudobulk = pseudobulk,
func = func,
layer = layer
)
ad.X = ad.layers[func]
mod = pb_model(ad, design = design)
mod.fit(**kwargs)
res_df = mod._test_single_contrast(
mod.contrast(nhood_group_obs, baseline = baseline, group_to_compare = group_to_compare)
)
if return_model:
return res_df, mod
return res_df
Code to reproduce the example plots:
import os
os.environ["R_HOME"] = r_env_path
import rpy2.rinterface_lib.callbacks
# import anndata2ri
import logging
from rpy2.robjects import pandas2ri
from rpy2.robjects import numpy2ri
from rpy2.robjects import r
import rpy2.robjects as ro
from rpy2.robjects.packages import importr
from rpy2.robjects import default_converter
from rpy2.robjects.conversion import localconverter
from rpy2.robjects.packages import STAP
# sc.settings.verbosity = 0
rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR)
%load_ext rpy2.ipython
import numpy as np
import scanpy as sc
import seaborn as sns
# from scipy.stats import median_abs_deviation
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import matplotlib as mpl
import pertpy as pt
adata = pt.data.stephenson_2021_subsampled()
adata.obs["COVID_severity"] = adata.obs["Status_on_day_collection_summary"].copy() # short name
adata.obs[["patient_id", "COVID_severity"]].drop_duplicates().value_counts("COVID_severity")
sc.pl.umap(adata, color=["author_cell_type"], legend_loc="on data")
sc.pl.umap(adata, color=["Site", "Status"], wspace=0.3, size=8)
## Exclude LPS samples
adata = adata[adata.obs["Status"] != "LPS"].copy()
## Initialize object for Milo analysis
milo = pt.tl.Milo()
mdata = milo.load(adata)
from sklearn_ann.kneighbors.annoy import AnnoyTransformer
transformer = AnnoyTransformer(n_neighbors=150)
sc.pp.neighbors(mdata["rna"], use_rep="X_scVI", transformer = transformer)
milo.make_nhoods(mdata["rna"], prop=0.1)
nhood_size = np.array(mdata["rna"].obsm["nhoods"].sum(0)).ravel()
plt.hist(nhood_size, bins=100)
plt.xlabel("# cells in nhood")
plt.ylabel("# nhoods");
milo.count_nhoods(mdata, sample_col="patient_id")
# Reorder categories
# (by default, the last category is taken as the condition of interest)
mdata["rna"].obs["Status"] = mdata["rna"].obs["Status"].cat.reorder_categories(["Healthy", "Covid"])
milo.da_nhoods(mdata, design="~Status")
old_figsize = plt.rcParams["figure.figsize"]
plt.rcParams["figure.figsize"] = [10, 5]
plt.subplot(1, 2, 1)
plt.hist(mdata["milo"].var.PValue, bins=50)
plt.xlabel("P-Vals")
plt.subplot(1, 2, 2)
plt.plot(mdata["milo"].var.logFC, -np.log10(mdata["milo"].var.SpatialFDR), ".")
plt.xlabel("log-Fold Change")
plt.ylabel("- log10(Spatial FDR)")
plt.tight_layout()
plt.rcParams["figure.figsize"] = old_figsize
milo.build_nhood_graph(mdata)
plt.rcParams["figure.figsize"] = [7, 7]
milo.plot_nhood_graph(
mdata,
alpha=0.1, ## SpatialFDR level (1%)
min_size=1, ## Size of smallest dot
)
milo.annotate_nhoods(mdata, anno_col="author_cell_type")
plt.hist(mdata["milo"].var["nhood_annotation_frac"], bins=30)
plt.xlabel("celltype fraction")
mdata["milo"].var["nhood_annotation"] = mdata["milo"].var["nhood_annotation"].astype(str)
mdata["milo"].var.loc[mdata["milo"].var["nhood_annotation_frac"] < 0.6, "nhood_annotation"] = "Mixed"
milo.plot_da_beeswarm(mdata, alpha=0.1)
find_nhood_groups(mdata, # subset_nhoods = "nhood_annotation == 'CD83_CD14_mono'"
)
### Nhood groups beeswarm plot
milo.plot_da_beeswarm(
mdata,
anno_col = "nhood_groups",
)
### Python neighbor groups plot
from legendkit import cat_legend, size_legend, vstack
plt_df = pd.DataFrame(mdata["milo"].varm["X_milo_graph"],
columns = ["UMAP_"+str(i+1) for i in range(2)]
)
plt_df["nhood_groups"] = mdata["milo"].var["nhood_groups"].values
plt_df["nhood_groups"] = plt_df["nhood_groups"].astype(str)
plt_df["size"] = mdata["milo"].var["Nhood_size"].values
plt_df = plt_df.sort_values("nhood_groups")
label_df = label_df = (
plt_df
.groupby("nhood_groups")
.median()
)
from legendkit import size_legend, cat_legend, vstack
# groups = plt_df["nhood_groups"].unique()
groups = []
[groups.append(n) for n in plt_df["nhood_groups"] if n not in groups]
print(groups)
palette = [color for color in sns.color_palette("Paired", 12) for _ in range(len(groups))]
scale_factor = 0.2
sns.scatterplot(
x=plt_df["UMAP_1"],
y=plt_df["UMAP_2"],
size = plt_df["size"].values * scale_factor, #.apply(lambda x: x*0.2),
hue = plt_df["nhood_groups"],#.astype("category").cat.codes,
legend = False,
linewidth=0,
palette = sns.color_palette("Paired", 12)
)
sns.despine()
slegend = size_legend(mdata["milo"].var["Nhood_size"].values * scale_factor, loc = "out right center",
title = "Nhood size", func = lambda x: x*(1/scale_factor))
color = dict(
colors = sns.color_palette("Paired", 12),
labels = groups
)
ax = plt.gca()
ax.set_title("Python Neighborhood groups", fontsize = 18)
handles, labels = ax.get_legend_handles_labels()
clegend = cat_legend(**color, title = "Nhood group", handle = "circle")
vstack([clegend, slegend], # title="Stack Vertically",
loc="out right center", spacing=10, frameon=False, ax=ax)
for _group, row in label_df.iterrows():
plt.text(row["UMAP_1"], row["UMAP_2"], _group, fontsize = 15)
plt.show()
###############
import anndata2ri
mdata = mdata.copy()
adata = pt.data.stephenson_2021_subsampled()
adata = adata[adata.obs["Status"] != "LPS"].copy()
da_res = mdata["milo"].var.copy()
da_res = (
da_res.drop("nhood_groups", axis = 1)
)
with localconverter(anndata2ri.converter):
r.assign("sce", adata)
r.assign("nhoods", mdata["rna"].obsm["nhoods"])
r.assign("nhoodCounts", mdata["milo"].X.T)
r.assign("nhood_adjacency", mdata["milo"].varp["nhood_connectivities"])
r.assign("da_results", da_res)
r.assign("nhood_graph", mdata["milo"].varm["X_milo_graph"])
### MiloR nhood groups plot
mdata["milo"].var["nhood_groups_miloR"] = da_results["NhoodGroup"]
from legendkit import cat_legend, size_legend, vstack
plt_df = pd.DataFrame(mdata["milo"].varm["X_milo_graph"],
columns = ["UMAP_"+str(i+1) for i in range(2)]
)
plt_df["nhood_groups"] = mdata["milo"].var["nhood_groups_miloR"].values
plt_df.loc[plt_df.nhood_groups.isnull(), "nhood_groups"] = np.nan
plt_df["nhood_groups"] = plt_df["nhood_groups"].astype(str)
plt_df["size"] = mdata["milo"].var["Nhood_size"].values
plt_df = plt_df.sort_values("nhood_groups", na_position = "first")
label_df = label_df = (
plt_df
.groupby("nhood_groups")
.median()
)
from legendkit import size_legend, cat_legend, vstack
# groups = plt_df["nhood_groups"].unique()
groups = []
[groups.append(n) for n in plt_df["nhood_groups"] if n not in groups]
print(groups)
palette = [color for color in sns.color_palette("Paired", 12) for _ in range(len(groups))]
scale_factor = 0.2
sns.scatterplot(
x=plt_df["UMAP_1"],
y=plt_df["UMAP_2"],
size = plt_df["size"].values * scale_factor, #.apply(lambda x: x*0.2),
hue = plt_df["nhood_groups"],#.astype("category").cat.codes,
legend = False,
linewidth=0,
palette = sns.color_palette("Paired", 12)
)
sns.despine()
slegend = size_legend(mdata["milo"].var["Nhood_size"].values * scale_factor, loc = "out right center",
title = "Nhood size", func = lambda x: x*(1/scale_factor))
color = dict(
colors = sns.color_palette("Paired", 12),
labels = groups
)
ax = plt.gca()
ax.set_title("MiloR Neighborhood groups", fontsize = 18)
handles, labels = ax.get_legend_handles_labels()
clegend = cat_legend(**color, title = "Nhood group", handle = "circle")
vstack([clegend, slegend], # title="Stack Vertically",
loc="out right center", spacing=10, frameon=False, ax=ax)
for _group, row in label_df.iterrows():
plt.text(row["UMAP_1"], row["UMAP_2"], _group, fontsize = 15)
plt.show()
mdata["milo"].var.loc[~mdata["milo"].var["nhood_groups"].isin(["4", "5"]), "nhood_groups"] = np.nan
### uses the above Limma class by default, but can be switched out for pt.tl.EdgeR when a counts layer exists.
res_df, mod = find_nhood_group_markers(mdata,
sample_col = "patient_id",
baseline = "4", group_to_compare = "5",
return_model = True,
voom = True)
### Volcano Plot:
mod.plot_volcano(res_df, log2fc_thresh = 0.1,legend_pos = (1.3, 1))
The text was updated successfully, but these errors were encountered:
Description of feature
Dear all,
I would like to ask for a feature in the Milo submodule: Grouping Neighborhoods, as implemented in MiloR (https://github.com/MarioniLab/miloR/blob/master/R/groupNhoods.R).
@emdann suggested that I request the feature for the pertpy implementation (see emdann/milopy#49).
As I am using the feature already to analyse my data I have code suggestions, should you be interested.
I adjusted the code and example to work with pertpy-Milo using the tutorial data, compare the neighborhood grouping results to the MiloR implementation, and included a suggestion for neighborhood group markers based on the pertpy differential expression module.
I would be very thankful for any help to correct/verify my code.
Best regards,
max
Marker genes from
find_nhood_groups_markers
for neighborhood groups 5 vs 4 in "Python Neighborhood groups":MiloR neighborhood groups:
Neighborhood group functions:
Nhood group markers:
Code to reproduce the example plots:
The text was updated successfully, but these errors were encountered: