From df1cd3821e83a39074afaefcd6eb65668e9a77d8 Mon Sep 17 00:00:00 2001 From: Pierre Thary Date: Wed, 13 Nov 2024 23:15:24 +0100 Subject: [PATCH] Add `ignore_index` keyword arg in `dropna` and `drop_duplicates` (Part of GH624) (#1030) * add ignore_index keyword parameter to Series and DF dropna and drop_duplicates * use assert_type instead * reverse overloads order, remove ellipsis when inplace=True --- pandas-stubs/core/frame.pyi | 24 +++++++++++++++++++++++- pandas-stubs/core/series.pyi | 25 ++++++++++++++++++++----- tests/test_frame.py | 26 +++++++++++++++++++++++--- tests/test_series.py | 10 ++++++++++ 4 files changed, 76 insertions(+), 9 deletions(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 9ba351f6..295aad8b 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -880,6 +880,7 @@ class DataFrame(NDFrame, OpsMixin): thresh: int | None = ..., subset: ListLikeU | Scalar | None = ..., inplace: Literal[True], + ignore_index: _bool = ..., ) -> None: ... @overload def dropna( @@ -890,6 +891,7 @@ class DataFrame(NDFrame, OpsMixin): thresh: int | None = ..., subset: ListLikeU | Scalar | None = ..., inplace: Literal[False] = ..., + ignore_index: _bool = ..., ) -> DataFrame: ... @overload def dropna( @@ -900,15 +902,35 @@ class DataFrame(NDFrame, OpsMixin): thresh: int | None = ..., subset: ListLikeU | Scalar | None = ..., inplace: _bool | None = ..., + ignore_index: _bool = ..., ) -> DataFrame | None: ... + @overload def drop_duplicates( self, subset: Hashable | Iterable[Hashable] | None = ..., *, keep: NaPosition | _bool = ..., - inplace: _bool = ..., + inplace: Literal[True], + ignore_index: _bool = ..., + ) -> None: ... + @overload + def drop_duplicates( + self, + subset: Hashable | Iterable[Hashable] | None = ..., + *, + keep: NaPosition | _bool = ..., + inplace: Literal[False] = ..., ignore_index: _bool = ..., ) -> DataFrame: ... + @overload + def drop_duplicates( + self, + subset: Hashable | Iterable[Hashable] | None = ..., + *, + keep: NaPosition | _bool = ..., + inplace: _bool = ..., + ignore_index: _bool = ..., + ) -> DataFrame | None: ... def duplicated( self, subset: Hashable | Iterable[Hashable] | None = ..., diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index 7dabbf76..457db8de 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -726,15 +726,27 @@ class Series(IndexOpsMixin[S1], NDFrame): def unique(self) -> np.ndarray: ... @overload def drop_duplicates( - self, *, keep: NaPosition | Literal[False] = ..., inplace: Literal[False] = ... - ) -> Series[S1]: ... + self, + *, + keep: NaPosition | Literal[False] = ..., + inplace: Literal[True], + ignore_index: _bool = ..., + ) -> None: ... @overload def drop_duplicates( - self, *, keep: NaPosition | Literal[False] = ..., inplace: Literal[True] - ) -> None: ... + self, + *, + keep: NaPosition | Literal[False] = ..., + inplace: Literal[False] = ..., + ignore_index: _bool = ..., + ) -> Series[S1]: ... @overload def drop_duplicates( - self, *, keep: NaPosition | Literal[False] = ..., inplace: bool = ... + self, + *, + keep: NaPosition | Literal[False] = ..., + inplace: bool = ..., + ignore_index: _bool = ..., ) -> Series[S1] | None: ... def duplicated(self, keep: NaPosition | Literal[False] = ...) -> Series[_bool]: ... def idxmax( @@ -1148,6 +1160,7 @@ class Series(IndexOpsMixin[S1], NDFrame): axis: AxisIndex = ..., inplace: Literal[True], how: Literal["any", "all"] | None = ..., + ignore_index: _bool = ..., ) -> None: ... @overload def dropna( @@ -1156,6 +1169,7 @@ class Series(IndexOpsMixin[S1], NDFrame): axis: AxisIndex = ..., inplace: Literal[False] = ..., how: Literal["any", "all"] | None = ..., + ignore_index: _bool = ..., ) -> Series[S1]: ... @overload def dropna( @@ -1164,6 +1178,7 @@ class Series(IndexOpsMixin[S1], NDFrame): axis: AxisIndex = ..., inplace: _bool = ..., how: Literal["any", "all"] | None = ..., + ignore_index: _bool = ..., ) -> Series[S1] | None: ... def to_timestamp( self, diff --git a/tests/test_frame.py b/tests/test_frame.py index 33d09e11..f67294d5 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -373,9 +373,22 @@ def test_arguments_drop() -> None: def test_types_dropna() -> None: df = pd.DataFrame(data={"col1": [np.nan, np.nan], "col2": [3, np.nan]}) - res: pd.DataFrame = df.dropna() - res2: pd.DataFrame = df.dropna(axis=1, thresh=1) - res3: None = df.dropna(axis=0, how="all", subset=["col1"], inplace=True) + check(assert_type(df.dropna(), pd.DataFrame), pd.DataFrame) + check(assert_type(df.dropna(ignore_index=True), pd.DataFrame), pd.DataFrame) + check(assert_type(df.dropna(axis=1, thresh=1), pd.DataFrame), pd.DataFrame) + assert ( + assert_type(df.dropna(axis=0, how="all", subset=["col1"], inplace=True), None) + is None + ) + assert ( + assert_type( + df.dropna( + axis=0, how="all", subset=["col1"], inplace=True, ignore_index=False + ), + None, + ) + is None + ) def test_types_drop_duplicates() -> None: @@ -392,6 +405,13 @@ def test_types_drop_duplicates() -> None: check(assert_type(df.drop_duplicates(["AAA"]), pd.DataFrame), pd.DataFrame) check(assert_type(df.drop_duplicates(("AAA",)), pd.DataFrame), pd.DataFrame) check(assert_type(df.drop_duplicates("AAA"), pd.DataFrame), pd.DataFrame) + assert assert_type(df.drop_duplicates("AAA", inplace=True), None) is None + check( + assert_type( + df.drop_duplicates("AAA", inplace=False, ignore_index=True), pd.DataFrame + ), + pd.DataFrame, + ) if not PD_LTE_22: check(assert_type(df.drop_duplicates({"AAA"}), pd.DataFrame), pd.DataFrame) diff --git a/tests/test_series.py b/tests/test_series.py index 1ea1b717..8e9b60bd 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -308,10 +308,20 @@ def test_types_drop_multilevel() -> None: res: pd.Series = s.drop(labels="first", level=1) +def test_types_drop_duplicates() -> None: + s = pd.Series([1.0, 2.0, 2.0]) + check(assert_type(s.drop_duplicates(), "pd.Series[float]"), pd.Series, float) + assert assert_type(s.drop_duplicates(inplace=True), None) is None + assert ( + assert_type(s.drop_duplicates(inplace=True, ignore_index=False), None) is None + ) + + def test_types_dropna() -> None: s = pd.Series([1.0, np.nan, np.nan]) check(assert_type(s.dropna(), "pd.Series[float]"), pd.Series, float) assert assert_type(s.dropna(axis=0, inplace=True), None) is None + assert assert_type(s.dropna(axis=0, inplace=True, ignore_index=True), None) is None def test_pop() -> None: