Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REF: minor fixes from working with Jeff #91

Merged
merged 2 commits into from
Aug 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 20 additions & 16 deletions q2_fmt/_peds.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

def peds(ctx, table, metadata, peds_metric, time_column, reference_column,
subject_column, filter_missing_references=False,
drop_incomplete_subjects=False, drop_incomplete_timepoint=None,
drop_incomplete_subjects=False, drop_incomplete_timepoints=None,
level_delimiter=None):

peds_heatmap = ctx.get_action('fmt', 'peds_heatmap')
Expand All @@ -38,14 +38,14 @@ def peds(ctx, table, metadata, peds_metric, time_column, reference_column,
table=table, metadata=metadata, time_column=time_column,
subject_column=subject_column, reference_column=reference_column,
drop_incomplete_subjects=drop_incomplete_subjects,
drop_incomplete_timepoint=drop_incomplete_timepoint,
drop_incomplete_timepoints=drop_incomplete_timepoints,
filter_missing_references=filter_missing_references)

else:
if drop_incomplete_subjects or drop_incomplete_timepoint:
if drop_incomplete_subjects or drop_incomplete_timepoints:
warnings.warn('Feature PEDS was selected as the PEDS metric, which'
' does not accept `drop_incomplete_subjects` or'
' `drop_incomplete_timepoint` as parameters. One'
' `drop_incomplete_timepoints` as parameters. One'
' (or both) of these parameters were detected in'
' your input, and will be ignored.')

Expand All @@ -65,9 +65,11 @@ def peds_heatmap(output_dir: str, data: pd.DataFrame,
per_subject_stats: pd.DataFrame = None,
global_stats: pd.DataFrame = None):
_rename_features(data=data, level_delimiter=level_delimiter)
gstats = None
table1 = None
psstats = None
if global_stats is not None:
gstats = global_stats.to_html(index=False)
# table2, gstats = _make_stats(global_stats)
if per_subject_stats is not None:
table1, psstats = _make_stats(per_subject_stats)
J_ENV = jinja2.Environment(
Expand Down Expand Up @@ -117,12 +119,17 @@ def sample_peds(table: pd.DataFrame, metadata: qiime2.Metadata,
time_column: str, reference_column: str, subject_column: str,
filter_missing_references: bool = False,
drop_incomplete_subjects: bool = False,
drop_incomplete_timepoint: list = None) -> (pd.DataFrame):
drop_incomplete_timepoints: list = None) -> (pd.DataFrame):

ids_with_data = table.index
metadata = metadata.filter_ids(ids_to_keep=ids_with_data)
column_properties = metadata.columns
# TODO: Make incomplete samples possible move this to heatmap
metadata = metadata.to_dataframe()
if drop_incomplete_timepoints is not None:
metadata = _drop_incomplete_timepoints(metadata, time_column,
drop_incomplete_timepoints)
table.filter(items=metadata.index)
# TODO: Make incomplete samples possible move this to heatmap
num_timepoints = _check_for_time_column(metadata, time_column)
_check_column_type(column_properties, "time",
time_column, "numeric")
Expand All @@ -139,10 +146,7 @@ def sample_peds(table: pd.DataFrame, metadata: qiime2.Metadata,
subject_column, "categorical")
_check_duplicate_subject_timepoint(subject_series, metadata,
subject_column, time_column)
if drop_incomplete_timepoint is not None:
metadata = _drop_incomplete_timepoints(metadata, time_column,
drop_incomplete_timepoint)
table.filter(items=metadata.index)

# return things that should be removed
metadata, used_references = \
_check_subjects_in_all_timepoints(subject_series, num_timepoints,
Expand Down Expand Up @@ -357,7 +361,6 @@ def _check_reference_column(metadata, reference_column):
def _filter_associated_reference(reference_series, metadata, time_column,
filter_missing_references, reference_column):
used_references = reference_series[~metadata[time_column].isna()]

if used_references.isna().any():
if filter_missing_references:
metadata = metadata.dropna(subset=[reference_column])
Expand Down Expand Up @@ -393,8 +396,8 @@ def _check_duplicate_subject_timepoint(subject_series, metadata,


def _drop_incomplete_timepoints(metadata, time_column,
drop_incomplete_timepoint):
for time in drop_incomplete_timepoint:
drop_incomplete_timepoints):
for time in drop_incomplete_timepoints:
try:
assert (float(time)
in metadata[time_column].unique())
Expand Down Expand Up @@ -492,7 +495,7 @@ def peds_simulation(table: pd.DataFrame, metadata: qiime2.Metadata,
subject_column: str,
filter_missing_references: bool = False,
drop_incomplete_subjects: bool = False,
drop_incomplete_timepoint: list = None,
drop_incomplete_timepoints: list = None,
num_iterations: int = 999) -> (pd.DataFrame, pd.DataFrame):

metadata_df = metadata.to_dataframe()
Expand Down Expand Up @@ -527,7 +530,8 @@ def peds_simulation(table: pd.DataFrame, metadata: qiime2.Metadata,
subject_column=subject_column,
filter_missing_references=filter_missing_references,
drop_incomplete_subjects=drop_incomplete_subjects,
drop_incomplete_timepoint=drop_incomplete_timepoint).set_index("id")
drop_incomplete_timepoints=drop_incomplete_timepoints
).set_index("id")
actual_peds = peds["measure"]

# Mismatch simulation:
Expand Down
22 changes: 11 additions & 11 deletions q2_fmt/plugin_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,11 @@
drop_incomplete_subjects = ('Filter out subjects that do not have a sample at'
' every timepoint. Default behavior is to raise an'
' error if any subject is missing a timepoint.')
drop_incomplete_timepoint = ('Filter out specified timepoints. This is useful'
' for removing frequently missing timepoints'
' which cause many subjects to be dropped.'
' Default behavior is to raise an error if any'
' subject is missing a timepoint.')
drop_incomplete_timepoints = ('Filter out multiple specified timepoints.'
' This is useful for removing frequently missing'
' timepoints which cause many subjects to be'
' dropped. Default behavior is to raise an error'
' if any subject is missing a timepoint.')
level_delimiter = 'delimiter to split taxonomic label on'
control_column = ('The column within `metadata` that contains any relevant'
' control group IDs. Actual treatment samples should not'
Expand Down Expand Up @@ -233,7 +233,7 @@
'subject_column': Str,
'filter_missing_references': Bool,
'drop_incomplete_subjects': Bool,
'drop_incomplete_timepoint': List[Str],
'drop_incomplete_timepoints': List[Str],
'level_delimiter': Str},
outputs=[('peds_heatmap', Visualization)],
input_descriptions={'table': peds_table},
Expand All @@ -245,7 +245,7 @@
'subject_column': subject_column,
'filter_missing_references': filter_missing_references,
'drop_incomplete_subjects': drop_incomplete_subjects,
'drop_incomplete_timepoint': drop_incomplete_timepoint,
'drop_incomplete_timepoints': drop_incomplete_timepoints,
'level_delimiter': level_delimiter},
output_descriptions={'peds_heatmap': 'PEDS heatmap visualization'},
name='PEDS pipeline to calculate feature or sample PEDS',
Expand Down Expand Up @@ -276,7 +276,7 @@
'reference_column': Str, 'subject_column': Str,
'filter_missing_references': Bool,
'drop_incomplete_subjects': Bool,
'drop_incomplete_timepoint': List[Str]},
'drop_incomplete_timepoints': List[Str]},
outputs=[('peds_dists', Dist1D[Ordered, Matched] % Properties("peds"))],
input_descriptions={'table': peds_table},
parameter_descriptions={
Expand All @@ -286,7 +286,7 @@
'subject_column': subject_column,
'filter_missing_references': filter_missing_references,
'drop_incomplete_subjects': drop_incomplete_subjects,
'drop_incomplete_timepoint': drop_incomplete_timepoint
'drop_incomplete_timepoints': drop_incomplete_timepoints
},
output_descriptions={
'peds_dists': peds_dists
Expand Down Expand Up @@ -337,7 +337,7 @@
'subject_column': T_subject,
'filter_missing_references': Bool,
'drop_incomplete_subjects': Bool,
'drop_incomplete_timepoint': List[Str],
'drop_incomplete_timepoints': List[Str],
'num_iterations': Int % Range(99, None)},
outputs=[('per_subject_stats', StatsTable[Pairwise]),
('global_stats', StatsTable[Pairwise])],
Expand All @@ -348,7 +348,7 @@
'subject_column': subject_column,
'filter_missing_references': filter_missing_references,
'drop_incomplete_subjects': drop_incomplete_subjects,
'drop_incomplete_timepoint': drop_incomplete_timepoint,
'drop_incomplete_timepoints': drop_incomplete_timepoints,
'num_iterations': 'The number of iterations to run the Monte Carlo'
' simulation on'
},
Expand Down
20 changes: 10 additions & 10 deletions q2_fmt/tests/test_engraftment.py
Original file line number Diff line number Diff line change
Expand Up @@ -782,9 +782,9 @@ def test_incomplete_timepoints_with_flag(self):
'donor1', 'donor2'],
'Ref': ['donor1', 'donor1', 'donor1', 'donor2', np.nan,
np.nan],
'subject': ['sub1', 'sub1', 'sub1', 'sub2', np.nan,
'subject': ['sub1', 'sub1', 'sub2', 'sub2', np.nan,
np.nan],
'group': [1, 2, 3, 2, np.nan,
'group': [1, 2, 2, 3, np.nan,
np.nan]}).set_index('id')
metadata = Metadata(metadata_df)
table_df = pd.DataFrame({
Expand All @@ -797,16 +797,16 @@ def test_incomplete_timepoints_with_flag(self):
time_column="group",
reference_column="Ref",
subject_column="subject",
drop_incomplete_subjects=True)
drop_incomplete_timepoints=[1, 3])

exp_peds_df = pd.DataFrame({
'id': ['sample1', 'sample2', 'sample3'],
'measure': [0.666667, 0.333333, 1],
'transfered_donor_features': [2, 1, 3],
'total_donor_features': [3, 3, 3],
'donor': ["donor1", "donor1", "donor1"],
'subject': ["sub1", "sub1", "sub1"],
'group': [1.0, 2.0, 3.0]
'id': ['sample2', 'sample3'],
'measure': [0.333333, 1],
'transfered_donor_features': [1, 3],
'total_donor_features': [3, 3],
'donor': ["donor1", "donor1"],
'subject': ["sub1", "sub2"],
'group': [2.0, 2.0]
})
pd.testing.assert_frame_equal(sample_peds_df, exp_peds_df)

Expand Down
Loading