Skip to content

Commit

Permalink
Python: Do not execute Ibis expressions when ibis.options.interactive…
Browse files Browse the repository at this point in the history
… is True (#5625)

Addresses #5499 by adding a custom inspector for Ibis expressions. This
is very basic, and per #5573 should perhaps live eventually in Ibis
itself.

Ibis is a bit unusual in that its interactive mode causes computation to
be executed when running the `__repr__` method, for nice interactivity
in the console and in Jupyter notebooks. So here we avoid running the
`__repr__` method so we don't accidentally fire off a BigQuery,
Snowflake, or other query which might have unwanted costs or side
effects.

There is a unit test -- Ibis with DuckDB is a minor dependency to pull
in relative to the rest of our test dependencies so I do not think this
is too onerous.
  • Loading branch information
wesm authored Dec 5, 2024
1 parent fb6b305 commit 9b6f25f
Show file tree
Hide file tree
Showing 4 changed files with 121 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ geopandas==0.13.2; python_version < '3.9'
geopandas==1.0.1; python_version >= '3.9'
hvplot==0.10.0 ; python_version >= '3.9'
hvplot==0.8.0 ; python_version < '3.9'
ibis-framework[duckdb]==9.5.0; python_version >= '3.10'
ipykernel==6.29.5
ipywidgets==8.1.5
matplotlib==3.9.2; python_version >= '3.9'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,12 @@
except ImportError:
pass

try:
import ibis # python >= 3.10
except ImportError:
pass


# General display settings
TRUNCATE_AT: int = 1024
PRINT_WIDTH: int = 100
Expand Down Expand Up @@ -90,9 +96,35 @@
}


def _get_class_display(value):
def _remap_ibis_classnames(value):
# We will provide more nuanced handling of Ibis expressions in the
# inspector class for ibis.Expr and its many subclasses

import ibis

if isinstance(value, ibis.Expr):
return "ibis.Expr"

return get_qualname(value)


PACKAGE_REMAPPERS = {
"ibis": _remap_ibis_classnames,
}


def _get_simplified_qualname(value):
display_value = get_qualname(value)
return SIMPLER_NAMES.get(display_value, display_value)

if display_value in SIMPLER_NAMES:
return SIMPLER_NAMES[display_value]

top_path = display_value.split(".")[0]

if top_path in PACKAGE_REMAPPERS:
return PACKAGE_REMAPPERS[top_path](value)

return display_value


class PositronInspector(Generic[T]):
Expand Down Expand Up @@ -805,7 +837,7 @@ def get_display_value(
print_width: Optional[int] = PRINT_WIDTH,
truncate_at: int = TRUNCATE_AT,
) -> Tuple[str, bool]:
display_value = _get_class_display(self.value)
display_value = _get_simplified_qualname(self.value)
column_values = str(cast(Column, self.value[:100]).to_list())
display_value = f"{display_value} {column_values}"

Expand All @@ -821,9 +853,10 @@ def get_size(self) -> int:


class PandasSeriesInspector(BaseColumnInspector["pd.Series"]):
# Simplified names
CLASS_QNAME = [
"pandas.core.series.Series",
"geopandas.geoseries.GeoSeries",
"pandas.Series",
"geopandas.GeoSeries",
]

def get_display_name(self, key: int) -> str:
Expand Down Expand Up @@ -900,9 +933,9 @@ def to_plaintext(self) -> str:


class PolarsSeriesInspector(BaseColumnInspector["pl.Series"]):
# Simplified class names
CLASS_QNAME = [
"polars.series.series.Series",
"polars.internals.series.series.Series",
"polars.Series",
]

def equals(self, value: pl.Series) -> bool:
Expand Down Expand Up @@ -956,7 +989,7 @@ def get_display_value(
print_width: Optional[int] = PRINT_WIDTH,
truncate_at: int = TRUNCATE_AT,
) -> Tuple[str, bool]:
display_value = _get_class_display(self.value)
display_value = _get_simplified_qualname(self.value)
if hasattr(self.value, "shape"):
shape = self.value.shape
display_value = f"[{shape[0]} rows x {shape[1]} columns] {display_value}"
Expand All @@ -970,9 +1003,10 @@ def get_display_value(


class PandasDataFrameInspector(BaseTableInspector["pd.DataFrame", "pd.Series"]):
# Simplified names
CLASS_QNAME = [
"pandas.core.frame.DataFrame",
"geopandas.geodataframe.GeoDataFrame",
"pandas.DataFrame",
"geopandas.GeoDataFrame",
]

def get_display_name(self, key: int) -> str:
Expand Down Expand Up @@ -1000,9 +1034,9 @@ def to_plaintext(self) -> str:


class PolarsDataFrameInspector(BaseTableInspector["pl.DataFrame", "pl.Series"]):
# Simplified class name
CLASS_QNAME = [
"polars.dataframe.frame.DataFrame",
"polars.internals.dataframe.frame.DataFrame",
"polars.DataFrame",
]

def get_children(self):
Expand All @@ -1013,7 +1047,7 @@ def get_display_value(
print_width: Optional[int] = PRINT_WIDTH,
truncate_at: int = TRUNCATE_AT,
) -> Tuple[str, bool]:
qualname = _get_class_display(self.value)
qualname = _get_simplified_qualname(self.value)
shape = self.value.shape
display_value = f"[{shape[0]} rows x {shape[1]} columns] {qualname}"
return (display_value, True)
Expand Down Expand Up @@ -1079,6 +1113,35 @@ def _is_active(self, value) -> bool:
return True


class IbisExprInspector(PositronInspector["ibis.Expr"]):
def has_children(self) -> bool:
return False

def get_length(self) -> int:
return 0

def is_mutable(self):
return False

def get_display_value(
self,
print_width: Optional[int] = PRINT_WIDTH,
truncate_at: int = TRUNCATE_AT,
) -> Tuple[str, bool]:
# Just use the default object.__repr__ for now
simplified_name = get_qualname(self.value)
return (f"{simplified_name}", True)

def get_display_type(self) -> str:
return "ibis.Expr"

def to_html(self) -> str:
return self.get_display_value()[0]

def to_plaintext(self) -> str:
return self.get_display_value()[0]


INSPECTOR_CLASSES: Dict[str, Type[PositronInspector]] = {
**dict.fromkeys(PandasDataFrameInspector.CLASS_QNAME, PandasDataFrameInspector),
**dict.fromkeys(PandasSeriesInspector.CLASS_QNAME, PandasSeriesInspector),
Expand All @@ -1092,6 +1155,7 @@ def _is_active(self, value) -> bool:
DatetimeInspector.CLASS_QNAME: DatetimeInspector,
**dict.fromkeys(SQLiteConnectionInspector.CLASS_QNAME, SQLiteConnectionInspector),
**dict.fromkeys(SQLAlchemyEngineInspector.CLASS_QNAME, SQLAlchemyEngineInspector),
"ibis.Expr": IbisExprInspector,
"boolean": BooleanInspector,
"bytes": BytesInspector,
"class": ClassInspector,
Expand All @@ -1117,7 +1181,7 @@ def get_inspector(value: T) -> PositronInspector[T]:
elif isinstance(value, property):
qualname = "property"
else:
qualname = get_qualname(value)
qualname = _get_simplified_qualname(value)
inspector_cls = INSPECTOR_CLASSES.get(qualname, None)

if inspector_cls is None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import pprint
import random
import string
import sys
import types
from typing import Any, Callable, Iterable, Optional, Tuple

Expand Down Expand Up @@ -914,6 +915,46 @@ def test_get_child(value: Any, key: Any, expected: Any) -> None:
assert get_inspector(child).equals(expected)


@pytest.mark.skipif(sys.version_info < (3, 10), reason="requires Python 3.10 or higher")
def test_inspect_ibis_exprs() -> None:
import ibis

# Make sure we don't return an executed repr
ibis.options.interactive = True

df = pd.DataFrame({"a": [1, 2, 1, 1, 2], "b": ["foo", "bar", "baz", "qux", None]})

t = ibis.memtable(df, name="df")
table_type = "ibis.expr.types.relations.Table"

verify_inspector(
value=t,
display_value=table_type,
kind=VariableKind.Other,
display_type=f"ibis.Expr",
type_info=get_type_as_str(t),
has_children=False,
is_truncated=True,
length=0,
mutable=False,
)

a_sum = t["a"].sum() # type: ignore
int_type = "ibis.expr.types.numeric.IntegerScalar"

verify_inspector(
value=a_sum,
display_value=int_type,
kind=VariableKind.Other,
display_type=f"ibis.Expr",
type_info=get_type_as_str(a_sum),
has_children=False,
is_truncated=True,
length=0,
mutable=False,
)


# TODO(wesm): these size values are only currently used for computing
# comparison costs. We should align on # of cells vs. # of bytes for
# these comparisons (possibly based on more experiments)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ bokeh
fastcore
geopandas
holoviews
ibis-framework[duckdb]; python_version >= '3.10'
ipykernel
ipywidgets
matplotlib
Expand Down

0 comments on commit 9b6f25f

Please sign in to comment.