Skip to content

Commit

Permalink
Merge pull request #14 from cbib/str_var_update
Browse files Browse the repository at this point in the history
update pyproject.toml. Corr: compartment, fdr_bh. flake8 ok. Opt cfg pca.
  • Loading branch information
johaGL authored Aug 24, 2023
2 parents 6c3884e + 79c80b9 commit 4b693de
Show file tree
Hide file tree
Showing 24 changed files with 76 additions and 113 deletions.
9 changes: 5 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
[tool.poetry]
name="DIMet"
version="0.1.0"
description="A tool for Differential Isotope-labeled targeted Metabolomics"
version="0.1.1"
description="A tool for Differential analysis of Isotope-labeled targeted Metabolomics data"
readme="README.md"
license = "MIT"
authors = [
"Johanna Galvis Rodriguez <[email protected]>",
"Joris Guyon <[email protected]>",
"Benjamin Dartigues <[email protected]>",
"Florian Specque <[email protected]>",
"Thomas Daubon <[email protected]>",
"Slim Karkar <[email protected]>",
"Helge Hecht <[email protected]>",
"Bjorn Gruening <[email protected]>",
"Hayssam Soueidan <[email protected]>",
"Macha Nikolski <[email protected]>"

]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ grouping :
- timepoint

qualityDistanceOverSpan : -0.3
correction_method : "bonferroni"
correction_method : "fdr_bh"

impute_values:
abundances: "min"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ grouping :
- timepoint

qualityDistanceOverSpan : -0.3
correction_method : "bonferroni"
correction_method : "fdr_bh"

impute_values:
abundances: "min"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ grouping :
- condition
- timepoint

correction_method : "bonferroni"
correction_method : "fdr_bh"

datatypes:
- abundances
Expand Down
3 changes: 3 additions & 0 deletions src/dimet/config/analysis/method/pca_plot.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ _target_: dimet.method.PcaPlotConfig
label: pca-plot
name: Generate Principal Component Analysis plots

color: condition # color dots using this variable
style: timepoint # style of the dots' shapes, using this variable

pca_split_further:
- timepoint
# - condition
Expand Down
2 changes: 1 addition & 1 deletion src/dimet/config/analysis/method/time_course_analysis.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ grouping:
- timepoint

qualityDistanceOverSpan : -0.3
correction_method : "bonferroni"
correction_method : "fdr_bh"

impute_values:
abundances: "min"
Expand Down
10 changes: 5 additions & 5 deletions src/dimet/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,8 +244,8 @@ def df_to_dict_by_compartment(df: pd.DataFrame,
splits df into a dictionary of dataframes, each for one compartment
"""
output_dict = dict()
for compartment in metadata["short_comp"].unique():
sample_names = metadata[metadata["short_comp"] == compartment][
for compartment in metadata['compartment'].unique():
sample_names = metadata[metadata['compartment'] == compartment][
"original_name"]
compartment_df = df[list(sample_names)]
output_dict[compartment] = compartment_df
Expand Down Expand Up @@ -435,7 +435,7 @@ def absolute_geommean_diff(b_values: np.array, a_values: np.array):
def drop_all_nan_metabolites_on_comp_frames(frames_dict: Dict,
metadata: pd.DataFrame) -> Dict:
""" metabolites must be in rows """
compartments = metadata["short_comp"].unique().tolist()
compartments = metadata['compartment'].unique().tolist()
for dataset in frames_dict.keys():
for compartment in compartments:
tmp = frames_dict[dataset][compartment]
Expand All @@ -454,9 +454,9 @@ def set_samples_names(frames_dict: Dict, metadata: pd.DataFrame) -> Dict:
"""
for dataset, compartments_dict in frames_dict.items():
for compartment, df in compartments_dict.items():
original_names = metadata[metadata["short_comp"] == compartment][
original_names = metadata[metadata['compartment'] == compartment][
"original_name"]
new_names = metadata[metadata["short_comp"] == compartment][
new_names = metadata[metadata['compartment'] == compartment][
"name_to_plot"]
renamed_columns = {old: new for old, new in
zip(original_names, new_names)
Expand Down
18 changes: 10 additions & 8 deletions src/dimet/method/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,8 @@ def build(self) -> "PcaAnalysis":


class PcaPlotConfig(MethodConfig):
color: Union[str, None] = "condition"
style: Union[str, None] = "timepoint"
pca_split_further: Union[ListConfig, None] = ["timepoint"]
draw_ellipses: Union[str, None] = "condition"
run_iris_demo: bool = False
Expand Down Expand Up @@ -205,7 +207,7 @@ def run(self, cfg: DictConfig, dataset: Dataset) -> None:
"No selected metabolites provided, plotting for all")
with open_dict(cfg):
cfg.analysis["metabolites"] = {}
for c in set(dataset.metadata_df["short_comp"]):
for c in set(dataset.metadata_df['compartment']):
metabolites_compartment = \
dataset.compartmentalized_dfs[
'abundances'][c].index.to_list()
Expand All @@ -220,7 +222,7 @@ def check_expectations(self, cfg: DictConfig, dataset: Dataset) -> None:
# check that necessary information is provided in the analysis config
try:
if not set(cfg.analysis.metabolites.keys()).issubset(
dataset.metadata_df["short_comp"]):
dataset.metadata_df['compartment']):
raise ValueError(
"[Analysis > Metabolites > compartments] are missing "
"from [Metadata > Compartments]"
Expand Down Expand Up @@ -378,7 +380,7 @@ def run(self, cfg: DictConfig, dataset: Dataset) -> None:
logger.warning(
"No selected metabolites provided, plotting for all")
with open_dict(cfg):
compartments = list(set(dataset.metadata_df["short_comp"]))
compartments = list(set(dataset.metadata_df['compartment']))
cfg.analysis["metabolites"] = dict()
for c in compartments:
isotopologues_names = \
Expand All @@ -397,7 +399,7 @@ def run(self, cfg: DictConfig, dataset: Dataset) -> None:
def check_expectations(self, cfg: DictConfig, dataset: Dataset) -> None:
try:
if not set(cfg.analysis.metabolites.keys()).issubset(
dataset.metadata_df['short_comp']):
dataset.metadata_df['compartment']):
raise ValueError(
"[Analysis > Metabolites > compartments] "
"are missing from [Metadata > Compartments]"
Expand Down Expand Up @@ -440,7 +442,7 @@ def run(self, cfg: DictConfig, dataset: Dataset) -> None:
"No selected metabolites provided, plotting for all")
with open_dict(cfg):
cfg.analysis["metabolites"] = {}
for c in set(dataset.metadata_df["short_comp"]):
for c in set(dataset.metadata_df['compartment']):
cfg.analysis["metabolites"][c] = \
dataset.compartmentalized_dfs[
'mean_enrichment'][c].index.to_list()
Expand All @@ -453,7 +455,7 @@ def run(self, cfg: DictConfig, dataset: Dataset) -> None:
def check_expectations(self, cfg: DictConfig, dataset: Dataset) -> None:
try:
if not set(cfg.analysis.metabolites.keys()).issubset(
dataset.metadata_df['short_comp']):
dataset.metadata_df['compartment']):
raise ValueError(
"[Analysis > Metabolites > compartments] are missing "
"from [Metadata > Compartments]"
Expand Down Expand Up @@ -728,11 +730,11 @@ def check_expectations(self, cfg: DictConfig,
if not isinstance(cfg.analysis.compartment, str):
raise ValueError("compartment must be string in config file")
if cfg.analysis.compartment not in \
set(data_integration.metadata_df['short_comp']):
set(data_integration.metadata_df['compartment']):
raise ValueError(
f"the compartment '{cfg.analysis.compartment}' "
f"in the config file does not exist. Must be one of: "
f"{set(data_integration.metadata_df['short_comp'])}"
f"{set(data_integration.metadata_df['compartment'])}"
)
if not len(cfg.analysis.statistical_test.keys()) == 1:
raise ValueError(
Expand Down
4 changes: 2 additions & 2 deletions src/dimet/processing/differential_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ def pairwise_comparison(
df_good, "distance/span", cfg.analysis.method.qualityDistanceOverSpan
)
df_good = compute_padj(df_good, 0.05,
cfg.analysis.method.correction_method)
cfg.analysis.method.correction_method)

# re-integrate the "bad" sub-dataframes to the full dataframe
result = concatenate_dataframes(df_good, df_bad, df_no_padj)
Expand Down Expand Up @@ -475,7 +475,7 @@ def multi_group_compairson(
sublist in conditions_list]
df4c = apply_multi_group_kruskal_wallis(df4c, this_comparison)
df4c = compute_padj(df4c, 0.05,
cfg.analysis.method.correction_method)
cfg.analysis.method.correction_method)
base_file_name = dataset.get_file_for_label(file_name)
base_file_name += f"--{compartment}--multigroup"
output_file_name = os.path.join(out_table_dir,
Expand Down
4 changes: 2 additions & 2 deletions src/dimet/processing/pca_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def pca_on_split_dataset(compartment_df: pd.DataFrame,
and computes PCA on each subset.
The results are added to the dictionary of results.
"""
assert len(metadata_co_df['short_comp'].unique()) == 1
assert len(metadata_co_df['compartment'].unique()) == 1
assert chosen_column in ["condition", "timepoint"]
unique_nominal_values = metadata_co_df[chosen_column].unique().tolist()
pca_tables_dict = {}
Expand Down Expand Up @@ -149,7 +149,7 @@ def run_pca_analysis(file_name: data_files_keys_type,
val_instead_zero = arg_repl_zero2value(impute_value, df)
df = df.replace(to_replace=0, value=val_instead_zero)

metadata_co_df = metadata_df[metadata_df['short_comp'] == compartment]
metadata_co_df = metadata_df[metadata_df['compartment'] == compartment]

pca_compartment_dict = pca_global_compartment_dataset(
df, metadata_co_df, description=[file_name, compartment]
Expand Down
4 changes: 2 additions & 2 deletions src/dimet/visualization/abundance_bars.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,10 +214,10 @@ def run_plot_abundance_bars(dataset: Dataset, out_plot_dir,
width_each_subfig = cfg.analysis.width_each_subfig
height_each_subfig = cfg.analysis.method.height_each_subfig

compartments = set(metadata_df["short_comp"])
compartments = set(metadata_df['compartment'])
for compartment in compartments:
metadata_compartment_df: pd.DataFrame = \
metadata_df.loc[metadata_df["short_comp"] == compartment, :]
metadata_df.loc[metadata_df['compartment'] == compartment, :]
compartment_df = dataset.compartmentalized_dfs[
"abundances"][compartment]
# metadata and abundances time of interest
Expand Down
2 changes: 1 addition & 1 deletion src/dimet/visualization/distr_fit_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def run_distr_fit_plot(
df = compartmentalized_df
df = df[(df.T != 0).any()]
val_instead_zero = arg_repl_zero2value(impute_value,
df)
df)
df = df.replace(to_replace=0, value=val_instead_zero)
if mode == "pairwise":
for comparison in cfg.analysis.comparisons:
Expand Down
6 changes: 3 additions & 3 deletions src/dimet/visualization/isotopologue_proportions.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def isotopologue_proportions_2piled_df(
combined_isos_metadata_df, metada_df, on='name_to_plot')

combined_isos_metadata_df = combined_isos_metadata_df.drop(
columns=['short_comp', 'original_name', 'name_to_plot', 'timepoint'])
columns=['compartment', 'original_name', 'name_to_plot', 'timepoint'])
piled_df = pd.melt(combined_isos_metadata_df,
id_vars=['timenum', 'condition'],
var_name="isotopologue_name",
Expand Down Expand Up @@ -455,11 +455,11 @@ def run_isotopologue_proportions_plot(dataset: Dataset,
time_levels_list: List[str] = [
str(i) for i in sorted(metadata_df['timenum'].unique())]

compartments = list(metadata_df['short_comp'].unique())
compartments = list(metadata_df['compartment'].unique())

for compartment in compartments:
metadata_compartment_df: pd.DataFrame = \
metadata_df.loc[metadata_df["short_comp"] == compartment, :]
metadata_df.loc[metadata_df['compartment'] == compartment, :]
compartment_df = dataset.compartmentalized_dfs[
"isotopologue_proportions"][compartment]

Expand Down
6 changes: 3 additions & 3 deletions src/dimet/visualization/mean_enrichment_line_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def melt_data_metadata_2df(compartment_df: pd.DataFrame,
on='name_to_plot')
compartment_df = compartment_df.drop(columns=['name_to_plot',
'timepoint',
'short_comp',
'compartment',
'original_name'])
melted_df = pd.melt(compartment_df,
id_vars=['timenum', 'condition'],
Expand Down Expand Up @@ -346,7 +346,7 @@ def line_plot_by_compartment(dataset: Dataset,
cfg: DictConfig) -> None:
""" calls function to construct and save plot """
metadata_df = dataset.metadata_df
compartments = list(metadata_df['short_comp'].unique())
compartments = list(metadata_df['compartment'].unique())
width_subplot = cfg.analysis.width_subplot
height_subplot = cfg.analysis.method.height_subplot
xaxis_title = cfg.analysis.method.xaxis_title
Expand All @@ -355,7 +355,7 @@ def line_plot_by_compartment(dataset: Dataset,
alpha_conf = cfg.analysis.method.alpha

for co in compartments:
metadata_co_df = metadata_df.loc[metadata_df['short_comp'] == co, :]
metadata_co_df = metadata_df.loc[metadata_df['compartment'] == co, :]
compartment_df = dataset.compartmentalized_dfs["mean_enrichment"][co]

melted_co_df = melt_data_metadata_2df(compartment_df, metadata_co_df)
Expand Down
8 changes: 6 additions & 2 deletions src/dimet/visualization/pca_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,15 +145,19 @@ def run_pca_plot(pca_results_dict: dict, cfg: DictConfig,
name_plot_var = f"{'--'.join(tup)}_var.pdf"
figure_var.savefig(os.path.join(out_plot_dir, name_plot_var))
plt.close()

color_dot = cfg.analysis.method.color
style_dot = cfg.analysis.method.style
options_labels = {'label-y': "name_to_plot",
'label-n': ""} # when empty string, no dot labels

# scatter: save both versions, labeled dots and unlabeled dots:
for choice in options_labels.keys():
labels_column = options_labels[choice]
name_elements = list(tup) + [choice]
scatter_fig: figure.Figure = pca_scatter_plot(
pc_df, var_explained_df, "condition",
"condition", labels_column,
pc_df, var_explained_df, color_dot,
style_dot, labels_column,
ellipses_column=cfg.analysis.method.draw_ellipses)
pca_scatter_2_pdf(scatter_fig, name_elements, out_plot_dir)
plt.close()
Expand Down
7 changes: 3 additions & 4 deletions tests/test_abundance_bars.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def test_pile_up_abundance(self):
'name_to_plot': ['beta-1', 'beta-2', 'ctrl-1', 'ctrl-2'],
'condition': ['beta-glu', 'beta-glu', 'control', 'control'],
'timepoint': ['t0', 't0', 't0', 't0'],
'short_comp': ['ex', 'ex', 'ex', 'ex']
'compartment': ['ex', 'ex', 'ex', 'ex']
})
result = abundance_bars.pile_up_abundance(df, metadata_df)
self.assertTrue(result.shape == (12, 4))
Expand All @@ -46,7 +46,7 @@ def test_plot_one_metabolite(self):
'timepoint': ['t0', 't0'],
'condition': ['beta', 'alpha'],
'metabolite': ['m1', 'm1'],
'abundance' : [200, 700]
'abundance': [200, 700]
})
fig_this_metabolite, axs_k = plt.subplots(
nrows=1, ncols=1,
Expand All @@ -67,7 +67,7 @@ def test_plot_abundance_bars_no_grid(self):
'timepoint': ['t0', 't0'],
'condition': ['beta', 'alpha'],
'metabolite': ['m1', 'm1'],
'abundance' : [200, 700]
'abundance': [200, 700]
})
try:
os.makedirs("../__pycache__/")
Expand All @@ -84,4 +84,3 @@ def test_plot_abundance_bars_no_grid(self):
height_each_subfig=2.4,
cfg=cfg_m)
self.assertTrue(result is None)

6 changes: 1 addition & 5 deletions tests/test_differential_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,7 @@ def test_select_rows_with_sufficient_non_nan_values(self):
groups = [["c1", "c2", "c3"], ["c4", "c5", "c6"]]
df = countnan_samples(df, groups)
result_good, result_bad = differential_analysis. \
select_rows_with_sufficient_non_nan_values(
df, groups
)
select_rows_with_sufficient_non_nan_values(df, groups)
self.assertEqual(result_good.shape, (2, 8))
self.assertEqual(result_bad.shape, (2, 8))
self.assertTrue(np.any(np.array(result_good.loc[0, :]) ==
Expand Down Expand Up @@ -141,5 +139,3 @@ def test_time_course_auto_list_comparisons(self):
self.assertListEqual(result[1], [['cond1', '3h'], ['cond1', '2.7h']])
self.assertListEqual(result[2], [['cond2', '3h'], ['cond2', '2.7h']])
self.assertListEqual(result[3], [['cond1', '2.7h'], ['cond1', '1h']])


9 changes: 4 additions & 5 deletions tests/test_distr_fit_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,10 @@
class TestDistrFitPlot(TestCase):
def test_make_pdf(self):
dist = getattr(stats, "gennorm")
params_dict = {'beta': 1.09, 'loc': 0.01, 'scale': 1.77}
params_dict = {'beta': 1.09, 'loc': 0.01, 'scale': 1.77}
params = list(params_dict.values())
result = distr_fit_plot.make_pdf(dist, params, size=10000)
self.assertAlmostEqual(result.index[0],-5.9147, 2 )
self.assertAlmostEqual(result.index[-3], 5.932, 2 )
self.assertAlmostEqual(result.to_list()[1], 0.007, 3 )
self.assertAlmostEqual(result.index[0], -5.9147, 2)
self.assertAlmostEqual(result.index[-3], 5.932, 2)
self.assertAlmostEqual(result.to_list()[1], 0.007, 3)
self.assertAlmostEqual(result.to_list()[-1], 0.00699, 3)

Loading

0 comments on commit 4b693de

Please sign in to comment.