From 374bcc38352c801a0d53a18585ee203beda54595 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Tue, 27 Feb 2024 16:43:58 -0600 Subject: [PATCH] Fix fetching schema ranges in wide tables with object columns (posit-dev/positron-python#392) --- .../positron_ipykernel/data_explorer.py | 2 +- .../tests/test_data_explorer.py | 21 +++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/extensions/positron-python/pythonFiles/positron/positron_ipykernel/data_explorer.py b/extensions/positron-python/pythonFiles/positron/positron_ipykernel/data_explorer.py index 123933dce15..76ee2e00d3c 100644 --- a/extensions/positron-python/pythonFiles/positron/positron_ipykernel/data_explorer.py +++ b/extensions/positron-python/pythonFiles/positron/positron_ipykernel/data_explorer.py @@ -273,7 +273,7 @@ def _get_schema(self, column_start: int, num_columns: int) -> TableSchema: for i, (c, dtype) in enumerate(zip(columns_slice, dtypes_slice)): if dtype == object: column_index = i + column_start - if i not in self._inferred_dtypes: + if column_index not in self._inferred_dtypes: self._inferred_dtypes[column_index] = infer_dtype( self.table.iloc[:, column_index] ) diff --git a/extensions/positron-python/pythonFiles/positron/positron_ipykernel/tests/test_data_explorer.py b/extensions/positron-python/pythonFiles/positron/positron_ipykernel/tests/test_data_explorer.py index f2811b21a5a..453229da5c8 100644 --- a/extensions/positron-python/pythonFiles/positron/positron_ipykernel/tests/test_data_explorer.py +++ b/extensions/positron-python/pythonFiles/positron/positron_ipykernel/tests/test_data_explorer.py @@ -436,6 +436,27 @@ def test_pandas_get_schema(pandas_fixture: PandasFixture): assert result["columns"] == _wrap_json(ColumnSchema, bigger_schema[10:20]) +def test_pandas_wide_schemas(pandas_fixture: PandasFixture): + arr = np.arange(10).astype(object) + + ncols = 10000 + df = pd.DataFrame({f"col_{i}": arr for i in range(ncols)}) + + pandas_fixture.register_table("wide_df", df) + + chunk_size = 100 + for chunk_index in range(ncols // chunk_size): + start_index = chunk_index * chunk_size + pandas_fixture.register_table( + f"wide_df_{chunk_index}", + df.iloc[:, start_index : (chunk_index + 1) * chunk_size], + ) + + schema_slice = pandas_fixture.get_schema("wide_df", start_index, chunk_size) + expected = pandas_fixture.get_schema(f"wide_df_{chunk_index}", 0, chunk_size) + assert schema_slice["columns"] == expected["columns"] + + def _trim_whitespace(columns): return [[x.strip() for x in column] for column in columns]