Skip to content

Commit

Permalink
MAINT: update split to ignore samples that are in the metadata but no…
Browse files Browse the repository at this point in the history
…t the feature table
  • Loading branch information
gregcaporaso authored Aug 21, 2024
1 parent d9e5678 commit a4d0e2e
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 0 deletions.
1 change: 1 addition & 0 deletions q2_feature_table/_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
def split(table: biom.Table,
metadata: qiime2.CategoricalMetadataColumn,
filter_empty_features: bool = True) -> biom.Table:
metadata = metadata.filter_ids(table.ids(axis='sample'))
metadata_df = metadata.drop_missing_values().to_dataframe()

indices = metadata_df.reset_index(
Expand Down
15 changes: 15 additions & 0 deletions q2_feature_table/tests/test_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,21 @@ def test_three_splits(self):
self.assertEqual(actual,
{'a': expected1, 'b': expected2, 'c': expected3})

def test_extra_metadata(self):
# S4 and S5 are in metadata but not the table - they should be ignored
md_column = qiime2.CategoricalMetadataColumn(
pd.Series(['a', 'a', 'a', 'b', 'a'], name='foo',
index=pd.Index(['S1', 'S2', 'S3', 'S4', 'S5'],
name='id')))
table = Table(np.array([[5, 1, 3], [1, 1, 2]]),
['O1', 'O2'],
['S1', 'S2', 'S3'])
actual = split(table, metadata=md_column)
expected1 = Table(np.array([[5, 1, 3], [1, 1, 2]]),
['O1', 'O2'],
['S1', 'S2', 'S3'])
self.assertEqual(actual, {'a': expected1})

def test_invalid_values(self):
table = Table(np.array([[5, 1, 3], [1, 1, 2]]),
['O1', 'O2'],
Expand Down

0 comments on commit a4d0e2e

Please sign in to comment.