From cd7b8e25ba64e3deb3211196083158bedfc2a9dc Mon Sep 17 00:00:00 2001 From: whitews Date: Sat, 12 Oct 2024 12:04:15 -0400 Subject: [PATCH] add option 'col_multi_index' to Sample.as_dataframe() to control col index type --- src/flowkit/_models/sample.py | 14 +++++++++++--- tests/sample_tests.py | 11 +++++++++++ 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/src/flowkit/_models/sample.py b/src/flowkit/_models/sample.py index e0ae88d3..d9ad0cf9 100644 --- a/src/flowkit/_models/sample.py +++ b/src/flowkit/_models/sample.py @@ -627,7 +627,8 @@ def as_dataframe( subsample=False, event_mask=None, col_order=None, - col_names=None + col_names=None, + col_multi_index=True ): """ Returns a pandas DataFrame of event data. @@ -643,12 +644,19 @@ def as_dataframe( in the output DataFrame. If None, the column order will match the FCS file. :param col_names: list of new column labels. If None (default), the DataFrame columns will be a MultiIndex of the PnN / PnS labels. + :param col_multi_index: Controls whether the column labels are multi-index. If + False, only the PnN labels will be used for a simple column index. Default + is True. :return: pandas DataFrame of event data """ events = self.get_events(source=source, subsample=subsample, event_mask=event_mask) - multi_cols = pd.MultiIndex.from_arrays([self.pnn_labels, self.pns_labels], names=['pnn', 'pns']) - events_df = pd.DataFrame(data=events, columns=multi_cols) + if col_multi_index: + col_index = pd.MultiIndex.from_arrays([self.pnn_labels, self.pns_labels], names=['pnn', 'pns']) + else: + col_index = self.pnn_labels + + events_df = pd.DataFrame(data=events, columns=col_index) if col_order is not None: events_df = events_df[col_order] diff --git a/tests/sample_tests.py b/tests/sample_tests.py index a4277f89..fe7cca62 100644 --- a/tests/sample_tests.py +++ b/tests/sample_tests.py @@ -340,6 +340,17 @@ def test_get_events_as_data_frame_orig(self): self.assertIsInstance(df, pd.DataFrame) np.testing.assert_equal(df.values, data1_sample_with_orig.get_events(source='orig')) + def test_get_events_as_data_frame_col_index(self): + # verifies 'col_multi_index' option works as expected + # by default the col index will be MultiIndex + df_multi = data1_sample.as_dataframe(source='raw') + + # turn off multi-index for simple column index + df_simple = data1_sample.as_dataframe(source='raw', col_multi_index=False) + + self.assertIsInstance(df_multi.columns, pd.MultiIndex) + self.assertIsInstance(df_simple.columns, pd.Index) + def test_get_events_as_data_frame_column_order(self): orig_col_order = ['FSC-H', 'SSC-H', 'FL1-H', 'FL2-H', 'FL3-H', 'FL2-A', 'FL4-H', 'Time'] new_col_order = ['FSC-H', 'SSC-H', 'FL1-H', 'FL2-H', 'FL2-A', 'FL3-H', 'FL4-H', 'Time']