From fe2fd5e0feaf51614e54752488fcee84220ac755 Mon Sep 17 00:00:00 2001 From: Greg Caporaso Date: Thu, 1 Aug 2024 15:09:37 -0700 Subject: [PATCH] fixes #314 --- q2_feature_table/_split.py | 1 + q2_feature_table/tests/test_split.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/q2_feature_table/_split.py b/q2_feature_table/_split.py index a7d846d..ab795d5 100644 --- a/q2_feature_table/_split.py +++ b/q2_feature_table/_split.py @@ -13,6 +13,7 @@ def split(table: biom.Table, metadata: qiime2.CategoricalMetadataColumn, filter_empty_features: bool = True) -> biom.Table: + metadata = metadata.filter_ids(table.ids(axis='sample')) metadata_df = metadata.drop_missing_values().to_dataframe() indices = metadata_df.reset_index( diff --git a/q2_feature_table/tests/test_split.py b/q2_feature_table/tests/test_split.py index a204fb5..33c3ad2 100644 --- a/q2_feature_table/tests/test_split.py +++ b/q2_feature_table/tests/test_split.py @@ -67,6 +67,21 @@ def test_three_splits(self): self.assertEqual(actual, {'a': expected1, 'b': expected2, 'c': expected3}) + def test_extra_metadata(self): + # S4 and S5 are in metadata but not the table - they should be ignored + md_column = qiime2.CategoricalMetadataColumn( + pd.Series(['a', 'a', 'a', 'b', 'a'], name='foo', + index=pd.Index(['S1', 'S2', 'S3', 'S4', 'S5'], + name='id'))) + table = Table(np.array([[5, 1, 3], [1, 1, 2]]), + ['O1', 'O2'], + ['S1', 'S2', 'S3']) + actual = split(table, metadata=md_column) + expected1 = Table(np.array([[5, 1, 3], [1, 1, 2]]), + ['O1', 'O2'], + ['S1', 'S2', 'S3']) + self.assertEqual(actual, {'a': expected1}) + def test_invalid_values(self): table = Table(np.array([[5, 1, 3], [1, 1, 2]]), ['O1', 'O2'],