From 9195361275556cafc8d0a6b94764f1d72e170020 Mon Sep 17 00:00:00 2001 From: johaGL Date: Thu, 24 Aug 2023 16:18:56 +0200 Subject: [PATCH 1/3] change strings: short_comp --> condition; bonferroni --> fdr_bh. & flake8 all code --- .../analysis/method/differential_analysis.yaml | 2 +- .../method/metabologram_integration.yaml | 2 +- .../analysis/method/multi_group_comparison.yaml | 2 +- .../analysis/method/time_course_analysis.yaml | 2 +- src/dimet/helpers.py | 10 +++++----- src/dimet/method/__init__.py | 16 ++++++++-------- src/dimet/processing/differential_analysis.py | 4 ++-- src/dimet/processing/pca_analysis.py | 4 ++-- src/dimet/visualization/abundance_bars.py | 4 ++-- src/dimet/visualization/distr_fit_plot.py | 2 +- .../visualization/isotopologue_proportions.py | 6 +++--- .../visualization/mean_enrichment_line_plot.py | 6 +++--- tests/test_abundance_bars.py | 7 +++---- tests/test_differential_analysis.py | 6 +----- tests/test_distr_fit_plot.py | 9 ++++----- tests/test_fit_statistical_distribution.py | 17 ++++------------- tests/test_helpers.py | 16 ++++++++++++---- tests/test_isotopologue_proportions.py | 11 ++++------- tests/test_pca_analysis.py | 4 ++-- tests/test_pca_plot.py | 4 ---- 20 files changed, 60 insertions(+), 74 deletions(-) diff --git a/src/dimet/config/analysis/method/differential_analysis.yaml b/src/dimet/config/analysis/method/differential_analysis.yaml index 387b233..40ad93a 100644 --- a/src/dimet/config/analysis/method/differential_analysis.yaml +++ b/src/dimet/config/analysis/method/differential_analysis.yaml @@ -8,7 +8,7 @@ grouping : - timepoint qualityDistanceOverSpan : -0.3 -correction_method : "bonferroni" +correction_method : "fdr_bh" impute_values: abundances: "min" diff --git a/src/dimet/config/analysis/method/metabologram_integration.yaml b/src/dimet/config/analysis/method/metabologram_integration.yaml index 8456580..8c3468d 100644 --- a/src/dimet/config/analysis/method/metabologram_integration.yaml +++ b/src/dimet/config/analysis/method/metabologram_integration.yaml @@ -10,7 +10,7 @@ grouping : - timepoint qualityDistanceOverSpan : -0.3 -correction_method : "bonferroni" +correction_method : "fdr_bh" impute_values: abundances: "min" diff --git a/src/dimet/config/analysis/method/multi_group_comparison.yaml b/src/dimet/config/analysis/method/multi_group_comparison.yaml index 969202c..442d054 100644 --- a/src/dimet/config/analysis/method/multi_group_comparison.yaml +++ b/src/dimet/config/analysis/method/multi_group_comparison.yaml @@ -7,7 +7,7 @@ grouping : - condition - timepoint -correction_method : "bonferroni" +correction_method : "fdr_bh" datatypes: - abundances diff --git a/src/dimet/config/analysis/method/time_course_analysis.yaml b/src/dimet/config/analysis/method/time_course_analysis.yaml index dd4b124..a61bfc3 100644 --- a/src/dimet/config/analysis/method/time_course_analysis.yaml +++ b/src/dimet/config/analysis/method/time_course_analysis.yaml @@ -8,7 +8,7 @@ grouping: - timepoint qualityDistanceOverSpan : -0.3 -correction_method : "bonferroni" +correction_method : "fdr_bh" impute_values: abundances: "min" diff --git a/src/dimet/helpers.py b/src/dimet/helpers.py index cd8796b..815c04f 100644 --- a/src/dimet/helpers.py +++ b/src/dimet/helpers.py @@ -244,8 +244,8 @@ def df_to_dict_by_compartment(df: pd.DataFrame, splits df into a dictionary of dataframes, each for one compartment """ output_dict = dict() - for compartment in metadata["short_comp"].unique(): - sample_names = metadata[metadata["short_comp"] == compartment][ + for compartment in metadata['compartment'].unique(): + sample_names = metadata[metadata['compartment'] == compartment][ "original_name"] compartment_df = df[list(sample_names)] output_dict[compartment] = compartment_df @@ -435,7 +435,7 @@ def absolute_geommean_diff(b_values: np.array, a_values: np.array): def drop_all_nan_metabolites_on_comp_frames(frames_dict: Dict, metadata: pd.DataFrame) -> Dict: """ metabolites must be in rows """ - compartments = metadata["short_comp"].unique().tolist() + compartments = metadata['compartment'].unique().tolist() for dataset in frames_dict.keys(): for compartment in compartments: tmp = frames_dict[dataset][compartment] @@ -454,9 +454,9 @@ def set_samples_names(frames_dict: Dict, metadata: pd.DataFrame) -> Dict: """ for dataset, compartments_dict in frames_dict.items(): for compartment, df in compartments_dict.items(): - original_names = metadata[metadata["short_comp"] == compartment][ + original_names = metadata[metadata['compartment'] == compartment][ "original_name"] - new_names = metadata[metadata["short_comp"] == compartment][ + new_names = metadata[metadata['compartment'] == compartment][ "name_to_plot"] renamed_columns = {old: new for old, new in zip(original_names, new_names) diff --git a/src/dimet/method/__init__.py b/src/dimet/method/__init__.py index 90eadfc..53cadc1 100644 --- a/src/dimet/method/__init__.py +++ b/src/dimet/method/__init__.py @@ -205,7 +205,7 @@ def run(self, cfg: DictConfig, dataset: Dataset) -> None: "No selected metabolites provided, plotting for all") with open_dict(cfg): cfg.analysis["metabolites"] = {} - for c in set(dataset.metadata_df["short_comp"]): + for c in set(dataset.metadata_df['compartment']): metabolites_compartment = \ dataset.compartmentalized_dfs[ 'abundances'][c].index.to_list() @@ -220,7 +220,7 @@ def check_expectations(self, cfg: DictConfig, dataset: Dataset) -> None: # check that necessary information is provided in the analysis config try: if not set(cfg.analysis.metabolites.keys()).issubset( - dataset.metadata_df["short_comp"]): + dataset.metadata_df['compartment']): raise ValueError( "[Analysis > Metabolites > compartments] are missing " "from [Metadata > Compartments]" @@ -378,7 +378,7 @@ def run(self, cfg: DictConfig, dataset: Dataset) -> None: logger.warning( "No selected metabolites provided, plotting for all") with open_dict(cfg): - compartments = list(set(dataset.metadata_df["short_comp"])) + compartments = list(set(dataset.metadata_df['compartment'])) cfg.analysis["metabolites"] = dict() for c in compartments: isotopologues_names = \ @@ -397,7 +397,7 @@ def run(self, cfg: DictConfig, dataset: Dataset) -> None: def check_expectations(self, cfg: DictConfig, dataset: Dataset) -> None: try: if not set(cfg.analysis.metabolites.keys()).issubset( - dataset.metadata_df['short_comp']): + dataset.metadata_df['compartment']): raise ValueError( "[Analysis > Metabolites > compartments] " "are missing from [Metadata > Compartments]" @@ -440,7 +440,7 @@ def run(self, cfg: DictConfig, dataset: Dataset) -> None: "No selected metabolites provided, plotting for all") with open_dict(cfg): cfg.analysis["metabolites"] = {} - for c in set(dataset.metadata_df["short_comp"]): + for c in set(dataset.metadata_df['compartment']): cfg.analysis["metabolites"][c] = \ dataset.compartmentalized_dfs[ 'mean_enrichment'][c].index.to_list() @@ -453,7 +453,7 @@ def run(self, cfg: DictConfig, dataset: Dataset) -> None: def check_expectations(self, cfg: DictConfig, dataset: Dataset) -> None: try: if not set(cfg.analysis.metabolites.keys()).issubset( - dataset.metadata_df['short_comp']): + dataset.metadata_df['compartment']): raise ValueError( "[Analysis > Metabolites > compartments] are missing " "from [Metadata > Compartments]" @@ -728,11 +728,11 @@ def check_expectations(self, cfg: DictConfig, if not isinstance(cfg.analysis.compartment, str): raise ValueError("compartment must be string in config file") if cfg.analysis.compartment not in \ - set(data_integration.metadata_df['short_comp']): + set(data_integration.metadata_df['compartment']): raise ValueError( f"the compartment '{cfg.analysis.compartment}' " f"in the config file does not exist. Must be one of: " - f"{set(data_integration.metadata_df['short_comp'])}" + f"{set(data_integration.metadata_df['compartment'])}" ) if not len(cfg.analysis.statistical_test.keys()) == 1: raise ValueError( diff --git a/src/dimet/processing/differential_analysis.py b/src/dimet/processing/differential_analysis.py index b66562e..6b2cd1e 100644 --- a/src/dimet/processing/differential_analysis.py +++ b/src/dimet/processing/differential_analysis.py @@ -392,7 +392,7 @@ def pairwise_comparison( df_good, "distance/span", cfg.analysis.method.qualityDistanceOverSpan ) df_good = compute_padj(df_good, 0.05, - cfg.analysis.method.correction_method) + cfg.analysis.method.correction_method) # re-integrate the "bad" sub-dataframes to the full dataframe result = concatenate_dataframes(df_good, df_bad, df_no_padj) @@ -475,7 +475,7 @@ def multi_group_compairson( sublist in conditions_list] df4c = apply_multi_group_kruskal_wallis(df4c, this_comparison) df4c = compute_padj(df4c, 0.05, - cfg.analysis.method.correction_method) + cfg.analysis.method.correction_method) base_file_name = dataset.get_file_for_label(file_name) base_file_name += f"--{compartment}--multigroup" output_file_name = os.path.join(out_table_dir, diff --git a/src/dimet/processing/pca_analysis.py b/src/dimet/processing/pca_analysis.py index 83b179a..168f3f3 100644 --- a/src/dimet/processing/pca_analysis.py +++ b/src/dimet/processing/pca_analysis.py @@ -73,7 +73,7 @@ def pca_on_split_dataset(compartment_df: pd.DataFrame, and computes PCA on each subset. The results are added to the dictionary of results. """ - assert len(metadata_co_df['short_comp'].unique()) == 1 + assert len(metadata_co_df['compartment'].unique()) == 1 assert chosen_column in ["condition", "timepoint"] unique_nominal_values = metadata_co_df[chosen_column].unique().tolist() pca_tables_dict = {} @@ -149,7 +149,7 @@ def run_pca_analysis(file_name: data_files_keys_type, val_instead_zero = arg_repl_zero2value(impute_value, df) df = df.replace(to_replace=0, value=val_instead_zero) - metadata_co_df = metadata_df[metadata_df['short_comp'] == compartment] + metadata_co_df = metadata_df[metadata_df['compartment'] == compartment] pca_compartment_dict = pca_global_compartment_dataset( df, metadata_co_df, description=[file_name, compartment] diff --git a/src/dimet/visualization/abundance_bars.py b/src/dimet/visualization/abundance_bars.py index 1cd7b57..e6b2b8f 100644 --- a/src/dimet/visualization/abundance_bars.py +++ b/src/dimet/visualization/abundance_bars.py @@ -214,10 +214,10 @@ def run_plot_abundance_bars(dataset: Dataset, out_plot_dir, width_each_subfig = cfg.analysis.width_each_subfig height_each_subfig = cfg.analysis.method.height_each_subfig - compartments = set(metadata_df["short_comp"]) + compartments = set(metadata_df['compartment']) for compartment in compartments: metadata_compartment_df: pd.DataFrame = \ - metadata_df.loc[metadata_df["short_comp"] == compartment, :] + metadata_df.loc[metadata_df['compartment'] == compartment, :] compartment_df = dataset.compartmentalized_dfs[ "abundances"][compartment] # metadata and abundances time of interest diff --git a/src/dimet/visualization/distr_fit_plot.py b/src/dimet/visualization/distr_fit_plot.py index bcf5866..e4afb69 100644 --- a/src/dimet/visualization/distr_fit_plot.py +++ b/src/dimet/visualization/distr_fit_plot.py @@ -163,7 +163,7 @@ def run_distr_fit_plot( df = compartmentalized_df df = df[(df.T != 0).any()] val_instead_zero = arg_repl_zero2value(impute_value, - df) + df) df = df.replace(to_replace=0, value=val_instead_zero) if mode == "pairwise": for comparison in cfg.analysis.comparisons: diff --git a/src/dimet/visualization/isotopologue_proportions.py b/src/dimet/visualization/isotopologue_proportions.py index 1ea4a03..12b9f75 100644 --- a/src/dimet/visualization/isotopologue_proportions.py +++ b/src/dimet/visualization/isotopologue_proportions.py @@ -45,7 +45,7 @@ def isotopologue_proportions_2piled_df( combined_isos_metadata_df, metada_df, on='name_to_plot') combined_isos_metadata_df = combined_isos_metadata_df.drop( - columns=['short_comp', 'original_name', 'name_to_plot', 'timepoint']) + columns=['compartment', 'original_name', 'name_to_plot', 'timepoint']) piled_df = pd.melt(combined_isos_metadata_df, id_vars=['timenum', 'condition'], var_name="isotopologue_name", @@ -455,11 +455,11 @@ def run_isotopologue_proportions_plot(dataset: Dataset, time_levels_list: List[str] = [ str(i) for i in sorted(metadata_df['timenum'].unique())] - compartments = list(metadata_df['short_comp'].unique()) + compartments = list(metadata_df['compartment'].unique()) for compartment in compartments: metadata_compartment_df: pd.DataFrame = \ - metadata_df.loc[metadata_df["short_comp"] == compartment, :] + metadata_df.loc[metadata_df['compartment'] == compartment, :] compartment_df = dataset.compartmentalized_dfs[ "isotopologue_proportions"][compartment] diff --git a/src/dimet/visualization/mean_enrichment_line_plot.py b/src/dimet/visualization/mean_enrichment_line_plot.py index c719f76..3f6095f 100644 --- a/src/dimet/visualization/mean_enrichment_line_plot.py +++ b/src/dimet/visualization/mean_enrichment_line_plot.py @@ -30,7 +30,7 @@ def melt_data_metadata_2df(compartment_df: pd.DataFrame, on='name_to_plot') compartment_df = compartment_df.drop(columns=['name_to_plot', 'timepoint', - 'short_comp', + 'compartment', 'original_name']) melted_df = pd.melt(compartment_df, id_vars=['timenum', 'condition'], @@ -346,7 +346,7 @@ def line_plot_by_compartment(dataset: Dataset, cfg: DictConfig) -> None: """ calls function to construct and save plot """ metadata_df = dataset.metadata_df - compartments = list(metadata_df['short_comp'].unique()) + compartments = list(metadata_df['compartment'].unique()) width_subplot = cfg.analysis.width_subplot height_subplot = cfg.analysis.method.height_subplot xaxis_title = cfg.analysis.method.xaxis_title @@ -355,7 +355,7 @@ def line_plot_by_compartment(dataset: Dataset, alpha_conf = cfg.analysis.method.alpha for co in compartments: - metadata_co_df = metadata_df.loc[metadata_df['short_comp'] == co, :] + metadata_co_df = metadata_df.loc[metadata_df['compartment'] == co, :] compartment_df = dataset.compartmentalized_dfs["mean_enrichment"][co] melted_co_df = melt_data_metadata_2df(compartment_df, metadata_co_df) diff --git a/tests/test_abundance_bars.py b/tests/test_abundance_bars.py index 17351f5..e8ce2e8 100644 --- a/tests/test_abundance_bars.py +++ b/tests/test_abundance_bars.py @@ -27,7 +27,7 @@ def test_pile_up_abundance(self): 'name_to_plot': ['beta-1', 'beta-2', 'ctrl-1', 'ctrl-2'], 'condition': ['beta-glu', 'beta-glu', 'control', 'control'], 'timepoint': ['t0', 't0', 't0', 't0'], - 'short_comp': ['ex', 'ex', 'ex', 'ex'] + 'compartment': ['ex', 'ex', 'ex', 'ex'] }) result = abundance_bars.pile_up_abundance(df, metadata_df) self.assertTrue(result.shape == (12, 4)) @@ -46,7 +46,7 @@ def test_plot_one_metabolite(self): 'timepoint': ['t0', 't0'], 'condition': ['beta', 'alpha'], 'metabolite': ['m1', 'm1'], - 'abundance' : [200, 700] + 'abundance': [200, 700] }) fig_this_metabolite, axs_k = plt.subplots( nrows=1, ncols=1, @@ -67,7 +67,7 @@ def test_plot_abundance_bars_no_grid(self): 'timepoint': ['t0', 't0'], 'condition': ['beta', 'alpha'], 'metabolite': ['m1', 'm1'], - 'abundance' : [200, 700] + 'abundance': [200, 700] }) try: os.makedirs("../__pycache__/") @@ -84,4 +84,3 @@ def test_plot_abundance_bars_no_grid(self): height_each_subfig=2.4, cfg=cfg_m) self.assertTrue(result is None) - diff --git a/tests/test_differential_analysis.py b/tests/test_differential_analysis.py index e044976..8275c1c 100644 --- a/tests/test_differential_analysis.py +++ b/tests/test_differential_analysis.py @@ -50,9 +50,7 @@ def test_select_rows_with_sufficient_non_nan_values(self): groups = [["c1", "c2", "c3"], ["c4", "c5", "c6"]] df = countnan_samples(df, groups) result_good, result_bad = differential_analysis. \ - select_rows_with_sufficient_non_nan_values( - df, groups - ) + select_rows_with_sufficient_non_nan_values(df, groups) self.assertEqual(result_good.shape, (2, 8)) self.assertEqual(result_bad.shape, (2, 8)) self.assertTrue(np.any(np.array(result_good.loc[0, :]) == @@ -141,5 +139,3 @@ def test_time_course_auto_list_comparisons(self): self.assertListEqual(result[1], [['cond1', '3h'], ['cond1', '2.7h']]) self.assertListEqual(result[2], [['cond2', '3h'], ['cond2', '2.7h']]) self.assertListEqual(result[3], [['cond1', '2.7h'], ['cond1', '1h']]) - - diff --git a/tests/test_distr_fit_plot.py b/tests/test_distr_fit_plot.py index 8afa01a..e09b6fc 100644 --- a/tests/test_distr_fit_plot.py +++ b/tests/test_distr_fit_plot.py @@ -14,11 +14,10 @@ class TestDistrFitPlot(TestCase): def test_make_pdf(self): dist = getattr(stats, "gennorm") - params_dict = {'beta': 1.09, 'loc': 0.01, 'scale': 1.77} + params_dict = {'beta': 1.09, 'loc': 0.01, 'scale': 1.77} params = list(params_dict.values()) result = distr_fit_plot.make_pdf(dist, params, size=10000) - self.assertAlmostEqual(result.index[0],-5.9147, 2 ) - self.assertAlmostEqual(result.index[-3], 5.932, 2 ) - self.assertAlmostEqual(result.to_list()[1], 0.007, 3 ) + self.assertAlmostEqual(result.index[0], -5.9147, 2) + self.assertAlmostEqual(result.index[-3], 5.932, 2) + self.assertAlmostEqual(result.to_list()[1], 0.007, 3) self.assertAlmostEqual(result.to_list()[-1], 0.00699, 3) - diff --git a/tests/test_fit_statistical_distribution.py b/tests/test_fit_statistical_distribution.py index f137095..46961d7 100644 --- a/tests/test_fit_statistical_distribution.py +++ b/tests/test_fit_statistical_distribution.py @@ -15,8 +15,8 @@ class TestFitStatisticalDistribution(TestCase): def test_compute_z_score(self): data = { - "gmean_1" : [15, 6, 3.8, 18.6, 16, 12], - "gmean_2" : [1, 20, 16, 12, 2, 1.6], + "gmean_1": [15, 6, 3.8, 18.6, 16, 12], + "gmean_2": [1, 20, 16, 12, 2, 1.6], "ratio": [15, 0.3, 0.23, 1.55, 8, 7.5], } df = pd.DataFrame(data) @@ -37,8 +37,8 @@ def test_compute_z_score(self): def test_find_best_distribution(self): data = {'zscore': np.random.laplace(loc=0.0, scale=1.6, size=500)} df = pd.DataFrame(data) - best_distribution, args_param = \ - fit_statistical_distribution.find_best_distribution(df) + best_distribution, args_param = (fit_statistical_distribution. + find_best_distribution(df)) # unexpected distribution: 'gennorm' or dgamma or loglaplace or ? # impossible to set assert : # self.assertTrue(best_distribution.name == "laplace" | @@ -57,12 +57,3 @@ def test_best_fit(self): fit_statistical_distribution.get_best_fit(dist) self.assertIsInstance(best_dist_name, str) self.assertIsInstance(best_fit_params, str) - - - - - - - - - diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 684ce7d..1baf733 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -6,7 +6,15 @@ from unittest import TestCase -from dimet.helpers import df_to_dict_by_compartment, first_column_for_column_values, split_rows_by_threshold, row_wise_nanstd_reduction, concatenate_dataframes, compute_gmean_nonan, apply_multi_group_kruskal_wallis, countnan_samples, compute_padj, verify_metadata_sample_not_duplicated +from dimet.helpers import (df_to_dict_by_compartment, + first_column_for_column_values, + split_rows_by_threshold, + row_wise_nanstd_reduction, + concatenate_dataframes, + compute_gmean_nonan, + apply_multi_group_kruskal_wallis, + countnan_samples, compute_padj, + verify_metadata_sample_not_duplicated) import numpy as np @@ -21,7 +29,7 @@ def test_df_to_dict_bycomp(self): "condition": ["Control", "Control", "control"], "timepoint": ["T0", "T1", "T2"], "timenum": [0, 1, 2], - "short_comp": ["cell", "medium", "cell"], + "compartment": ["cell", "medium", "cell"], "original_name": ["MCF001089_TD01", "MCF001089_TD02", "MCF001089_TD03"], } @@ -37,7 +45,7 @@ def test_df_to_dict_bycomp(self): abundances_df = pd.DataFrame(abundances_dict) abundances_df = abundances_df.set_index(['myindex']) d = df_to_dict_by_compartment(df=abundances_df, - metadata=metadata_df) + metadata=metadata_df) self.assertEqual(list(d.keys()), ["cell", "medium"]) self.assertEqual(d["cell"].shape, (2, 2)) self.assertAlmostEqual( @@ -173,7 +181,7 @@ def test_compute_padj(self): correction_alpha = 0.05 correction_method = 'fdr_bh' df = compute_padj(df, correction_alpha, - correction_method) + correction_method) self.assertAlmostEqual(df['padj'][1], 0.05, 3) self.assertTrue(np.isnan(df['padj'][2])) diff --git a/tests/test_isotopologue_proportions.py b/tests/test_isotopologue_proportions.py index 3daa987..968ae1c 100644 --- a/tests/test_isotopologue_proportions.py +++ b/tests/test_isotopologue_proportions.py @@ -22,7 +22,7 @@ def test_isotopologue_proportions_2piled_df(self): 'condition': ['b', 'b', 'ctl', 'ctl'], 'timepoint': ['t0', 't0', 't0', 't0'], 'timenum': [0, 0, 0, 0], - 'short_comp': ['ex', 'ex', 'ex', 'ex'], + 'compartment': ['ex', 'ex', 'ex', 'ex'], 'original_name': ['', '', '', ''] }) result = isotopologue_proportions.isotopologue_proportions_2piled_df( @@ -93,7 +93,7 @@ def test_add_combined_conditime(self): 'ctl', 'b', 'ctl'], 'isotopologue_name': ['cit_m+0', 'cit_m+0', 'cit_m+1', 'cit_m+1', - 'cit_m+2', 'cit_m+2'], + 'cit_m+2', 'cit_m+2'], 'Isotopologue Contribution (%)': [15.0, 60.0, 25.0, 45.0, 15.0, 40.0], 'metabolite': ['cit' for i in range(6)], @@ -122,12 +122,12 @@ def test_add_combined_conditime(self): def test_add_categorical_time(self): df = pd.DataFrame({ - 'timenum': ['2','3','1','3','2','1'], + 'timenum': ['2', '3', '1', '3', '2', '1'], 'condition': ['b', 'ctl', 'b', 'ctl', 'b', 'ctl'], 'isotopologue_name': ['cit_m+0', 'cit_m+0', 'cit_m+1', 'cit_m+1', - 'cit_m+2', 'cit_m+2'], + 'cit_m+2', 'cit_m+2'], 'Isotopologue Contribution (%)': [15.0, 60.0, 25.0, 45.0, 15.0, 40.0], 'metabolite': ['cit' for i in range(6)], @@ -158,6 +158,3 @@ def test_add_xemptyspace_tolabs(self): '2 : b', '2 : ctl', '2xemptyspace', '3 : b', '3 : ctl', '3xemptyspace', '4 : b', '4 : ctl', '4xemptyspace']) - - - diff --git a/tests/test_pca_analysis.py b/tests/test_pca_analysis.py index aad09fc..e0f8e97 100644 --- a/tests/test_pca_analysis.py +++ b/tests/test_pca_analysis.py @@ -99,7 +99,7 @@ def test_pca_on_split_dataset(self): 'name_to_plot': ['beta-1', 'beta-2', 'ctrl-1', 'ctrl-2'], 'condition': ['beta-glu', 'beta-glu', 'control', 'control'], 'timepoint': ['t0', 't0', 't0', 't0'], - 'short_comp': ['ex', 'ex', 'ex', 'ex'] + 'compartment': ['ex', 'ex', 'ex', 'ex'] }) description = ["my_file_name", "ex"] result_dict = pca_analysis.pca_on_split_dataset( @@ -142,7 +142,7 @@ def test_pca_global_compartment_dataset(self): 'name_to_plot': ['beta-1', 'beta-2', 'ctrl-1', 'ctrl-2'], 'condition': ['beta-glu', 'beta-glu', 'control', 'control'], 'timepoint': ['t0', 't0', 't0', 't0'], - 'short_comp': ['ex', 'ex', 'ex', 'ex'] + 'compartment': ['ex', 'ex', 'ex', 'ex'] }) description = ["my_file_name", "ex"] result_dict = pca_analysis.pca_global_compartment_dataset( diff --git a/tests/test_pca_plot.py b/tests/test_pca_plot.py index 51335de..0fb922e 100644 --- a/tests/test_pca_plot.py +++ b/tests/test_pca_plot.py @@ -19,7 +19,3 @@ def test_eigsorted(self): self.assertAlmostEqual(vecs[0][1], 0.099, 2) self.assertAlmostEqual(vecs[1][0], -0.099, 2) self.assertAlmostEqual(vecs[1][1], -0.995, 2) - - - - From ce48911d45cea9fcb76748011092216ff9477b1c Mon Sep 17 00:00:00 2001 From: johaGL Date: Thu, 24 Aug 2023 17:14:19 +0200 Subject: [PATCH 2/3] update(pca): evol style & color --- src/dimet/config/analysis/method/pca_plot.yaml | 3 +++ src/dimet/method/__init__.py | 2 ++ src/dimet/visualization/pca_plot.py | 8 ++++++-- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/dimet/config/analysis/method/pca_plot.yaml b/src/dimet/config/analysis/method/pca_plot.yaml index bfab082..9460f1b 100644 --- a/src/dimet/config/analysis/method/pca_plot.yaml +++ b/src/dimet/config/analysis/method/pca_plot.yaml @@ -3,6 +3,9 @@ _target_: dimet.method.PcaPlotConfig label: pca-plot name: Generate Principal Component Analysis plots +color: condition # color dots using this variable +style: timepoint # style of the dots' shapes, using this variable + pca_split_further: - timepoint # - condition diff --git a/src/dimet/method/__init__.py b/src/dimet/method/__init__.py index 53cadc1..3d3213f 100644 --- a/src/dimet/method/__init__.py +++ b/src/dimet/method/__init__.py @@ -127,6 +127,8 @@ def build(self) -> "PcaAnalysis": class PcaPlotConfig(MethodConfig): + color: Union[str, None] = "condition" + style: Union[str, None] = "timepoint" pca_split_further: Union[ListConfig, None] = ["timepoint"] draw_ellipses: Union[str, None] = "condition" run_iris_demo: bool = False diff --git a/src/dimet/visualization/pca_plot.py b/src/dimet/visualization/pca_plot.py index 33f8ca3..5898661 100644 --- a/src/dimet/visualization/pca_plot.py +++ b/src/dimet/visualization/pca_plot.py @@ -145,15 +145,19 @@ def run_pca_plot(pca_results_dict: dict, cfg: DictConfig, name_plot_var = f"{'--'.join(tup)}_var.pdf" figure_var.savefig(os.path.join(out_plot_dir, name_plot_var)) plt.close() + + color_dot = cfg.analysis.method.color + style_dot = cfg.analysis.method.style options_labels = {'label-y': "name_to_plot", 'label-n': ""} # when empty string, no dot labels + # scatter: save both versions, labeled dots and unlabeled dots: for choice in options_labels.keys(): labels_column = options_labels[choice] name_elements = list(tup) + [choice] scatter_fig: figure.Figure = pca_scatter_plot( - pc_df, var_explained_df, "condition", - "condition", labels_column, + pc_df, var_explained_df, color_dot, + style_dot, labels_column, ellipses_column=cfg.analysis.method.draw_ellipses) pca_scatter_2_pdf(scatter_fig, name_elements, out_plot_dir) plt.close() From 79c80b908c08d990c7bcb5f92042533830df5883 Mon Sep 17 00:00:00 2001 From: johaGL Date: Thu, 24 Aug 2023 17:30:55 +0200 Subject: [PATCH 3/3] update(pyproject) & delete useless conda .yml file --- pyproject.toml | 9 +++++---- tools/DIMet.yml | 33 --------------------------------- 2 files changed, 5 insertions(+), 37 deletions(-) delete mode 100644 tools/DIMet.yml diff --git a/pyproject.toml b/pyproject.toml index b492a60..1fc7063 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,16 +1,17 @@ [tool.poetry] name="DIMet" -version="0.1.0" -description="A tool for Differential Isotope-labeled targeted Metabolomics" +version="0.1.1" +description="A tool for Differential analysis of Isotope-labeled targeted Metabolomics data" readme="README.md" license = "MIT" authors = [ "Johanna Galvis Rodriguez ", - "Joris Guyon ", "Benjamin Dartigues ", "Florian Specque ", - "Thomas Daubon ", "Slim Karkar ", + "Helge Hecht ", + "Bjorn Gruening ", + "Hayssam Soueidan ", "Macha Nikolski " ] diff --git a/tools/DIMet.yml b/tools/DIMet.yml deleted file mode 100644 index 2e59b4b..0000000 --- a/tools/DIMet.yml +++ /dev/null @@ -1,33 +0,0 @@ -name: DIMet -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - python=3.9.12 - - pip - - numpy~=1.23.1 - - pandas~=1.5.3 - - openpyxl - - poetry - - pyyaml~=6.0 - - scikit-learn~=1.1.1 - - scipy~=1.9.1 - - matplotlib~=3.7.1 - - seaborn~=0.11.2 - - statsmodels~=0.13.5 - - hydra-core~=1.3.2 - - hydra-colorlog~=1.2.0 - - conda-forge::sphinx - - conda-forge::coverage - - conda-forge::flake8=4.0.1 - - conda-forge::flake8-import-order=0.18.1 - - conda-forge::click - - conda-forge::pytest - - conda-forge::pytest-cov - - conda-forge::mypy - - conda-forge::pydantic - - bioconda::snakemake=7.24.0 - - conda-forge::python-dotenv>=0.5.1 - -