From 708968a71aa9e8caee907e05e9591d2bc54def45 Mon Sep 17 00:00:00 2001 From: itholic Date: Wed, 27 Jan 2021 11:50:35 +0900 Subject: [PATCH 1/9] Series.eq supports list-like python objects --- databricks/koalas/series.py | 8 +++++ .../koalas/tests/test_ops_on_diff_frames.py | 36 +++++++++++++++++++ databricks/koalas/tests/test_series.py | 25 +++++++++++++ 3 files changed, 69 insertions(+) diff --git a/databricks/koalas/series.py b/databricks/koalas/series.py index 3d1bda0eed..befd24e5dc 100644 --- a/databricks/koalas/series.py +++ b/databricks/koalas/series.py @@ -658,6 +658,14 @@ def eq(self, other) -> bool: d False Name: b, dtype: bool """ + if isinstance(other, (list, tuple)): + if len(self) == len(other): + other = ks.Series(other) + else: + raise ValueError("Lengths must be equal") + # pandas always returns False for all items with dict and set. + elif isinstance(other, (dict, set)): + return self != self return self == other equals = eq diff --git a/databricks/koalas/tests/test_ops_on_diff_frames.py b/databricks/koalas/tests/test_ops_on_diff_frames.py index 61e8018737..564a71d213 100644 --- a/databricks/koalas/tests/test_ops_on_diff_frames.py +++ b/databricks/koalas/tests/test_ops_on_diff_frames.py @@ -1370,6 +1370,28 @@ def test_index_ops(self): else: self.assert_eq(kidx1 * 10 + kidx3, (pidx1 * 10 + pidx3).rename(None)) + def test_series_eq(self): + pser = pd.Series([1, 2, 3, 4, 5, 6], name="x") + kser = ks.from_pandas(pser) + + # other = Series + pandas_other = pd.Series([np.nan, 1, 3, 4, np.nan, 6], name="x") + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pser.eq(pandas_other), kser.eq(koalas_other).sort_index()) + + # other = Index + pandas_other = pd.Index([np.nan, 1, 3, 4, np.nan, 6], name="x") + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pser.eq(pandas_other), kser.eq(koalas_other).sort_index()) + + # other = list + other = [np.nan, 1, 3, 4, np.nan, 6] + self.assert_eq(pser.eq(other), kser.eq(other).sort_index()) + + # other = tuple + other = (np.nan, 1, 3, 4, np.nan, 6) + self.assert_eq(pser.eq(other), kser.eq(other).sort_index()) + class OpsOnDiffFramesDisabledTest(ReusedSQLTestCase, SQLTestUtils): @classmethod @@ -1511,3 +1533,17 @@ def test_mask(self): with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): kdf1.mask(kdf2 > -250) + + def test_series_eq(self): + pser = pd.Series([1, 2, 3, 4, 5, 6], name="x") + kser = ks.from_pandas(pser) + + others = ( + ks.Series([np.nan, 1, 3, 4, np.nan, 6], name="x"), + ks.Index([np.nan, 1, 3, 4, np.nan, 6], name="x"), + [np.nan, 1, 3, 4, np.nan, 6], + (np.nan, 1, 3, 4, np.nan, 6), + ) + for other in others: + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser.eq(other) diff --git a/databricks/koalas/tests/test_series.py b/databricks/koalas/tests/test_series.py index b3b62552f0..5755a5dd8e 100644 --- a/databricks/koalas/tests/test_series.py +++ b/databricks/koalas/tests/test_series.py @@ -2639,3 +2639,28 @@ def test_backfill(self): # Test `inplace=True` kser.backfill(inplace=True) self.assert_eq(expected, kser) + + def test_eq(self): + pser = pd.Series([1, 2, 3, 4, 5, 6], name="x") + kser = ks.from_pandas(pser) + + # other = Series + self.assert_eq(pser.eq(pser), kser.eq(kser)) + + # other = dict + other = {1: None, 2: None, 3: None, 4: None, np.nan: None, 6: None} + self.assert_eq(pser.eq(other), kser.eq(other)) + + # other = set + other = {1, 2, 3, 4, np.nan, 6} + self.assert_eq(pser.eq(other), kser.eq(other)) + + # other = list with the different length + other = [np.nan, 1, 3, 4, np.nan] + with self.assertRaisesRegex(ValueError, "Lengths must be equal"): + self.assert_eq(pser.eq(other), kser.eq(other)) + + # other = tuple with the different length + other = (np.nan, 1, 3, 4, np.nan) + with self.assertRaisesRegex(ValueError, "Lengths must be equal"): + self.assert_eq(pser.eq(other), kser.eq(other)) From d2b23b35d7bb15ae82e8c36dc61a009eb4aca227 Mon Sep 17 00:00:00 2001 From: itholic Date: Wed, 27 Jan 2021 12:17:56 +0900 Subject: [PATCH 2/9] Override the __eq__ --- databricks/koalas/series.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/databricks/koalas/series.py b/databricks/koalas/series.py index befd24e5dc..94026a9ff0 100644 --- a/databricks/koalas/series.py +++ b/databricks/koalas/series.py @@ -658,6 +658,11 @@ def eq(self, other) -> bool: d False Name: b, dtype: bool """ + return self == other + + equals = eq + + def __eq__(self, other): if isinstance(other, (list, tuple)): if len(self) == len(other): other = ks.Series(other) @@ -666,9 +671,7 @@ def eq(self, other) -> bool: # pandas always returns False for all items with dict and set. elif isinstance(other, (dict, set)): return self != self - return self == other - - equals = eq + return IndexOpsMixin.__eq__(self, other) def gt(self, other) -> "Series": """ From 06438c8c1ffaf36b931f23a25bdae4b3ee4f8e83 Mon Sep 17 00:00:00 2001 From: itholic Date: Wed, 27 Jan 2021 12:32:50 +0900 Subject: [PATCH 3/9] Addressed name of Series & added more tests --- databricks/koalas/series.py | 9 ++++++++- databricks/koalas/tests/test_ops_on_diff_frames.py | 6 ++++++ databricks/koalas/tests/test_series.py | 11 +++++++++-- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/databricks/koalas/series.py b/databricks/koalas/series.py index 94026a9ff0..71dec3c024 100644 --- a/databricks/koalas/series.py +++ b/databricks/koalas/series.py @@ -658,6 +658,13 @@ def eq(self, other) -> bool: d False Name: b, dtype: bool """ + # pandas won't keep the name with `eq` when other is list of tuple, + # whereas `__eq__` always keeps the name. + if isinstance(other, (list, tuple)): + if len(self) == len(other): + other = ks.Series(other) + else: + raise ValueError("Lengths must be equal") return self == other equals = eq @@ -665,7 +672,7 @@ def eq(self, other) -> bool: def __eq__(self, other): if isinstance(other, (list, tuple)): if len(self) == len(other): - other = ks.Series(other) + other = ks.Series(other, name=self.name) else: raise ValueError("Lengths must be equal") # pandas always returns False for all items with dict and set. diff --git a/databricks/koalas/tests/test_ops_on_diff_frames.py b/databricks/koalas/tests/test_ops_on_diff_frames.py index 564a71d213..c1e2bbf4e5 100644 --- a/databricks/koalas/tests/test_ops_on_diff_frames.py +++ b/databricks/koalas/tests/test_ops_on_diff_frames.py @@ -1378,19 +1378,23 @@ def test_series_eq(self): pandas_other = pd.Series([np.nan, 1, 3, 4, np.nan, 6], name="x") koalas_other = ks.from_pandas(pandas_other) self.assert_eq(pser.eq(pandas_other), kser.eq(koalas_other).sort_index()) + self.assert_eq(pser == pandas_other, (kser == koalas_other).sort_index()) # other = Index pandas_other = pd.Index([np.nan, 1, 3, 4, np.nan, 6], name="x") koalas_other = ks.from_pandas(pandas_other) self.assert_eq(pser.eq(pandas_other), kser.eq(koalas_other).sort_index()) + self.assert_eq(pser == pandas_other, (kser == koalas_other).sort_index()) # other = list other = [np.nan, 1, 3, 4, np.nan, 6] self.assert_eq(pser.eq(other), kser.eq(other).sort_index()) + self.assert_eq(pser == other, (kser == other).sort_index()) # other = tuple other = (np.nan, 1, 3, 4, np.nan, 6) self.assert_eq(pser.eq(other), kser.eq(other).sort_index()) + self.assert_eq(pser == other, (kser == other).sort_index()) class OpsOnDiffFramesDisabledTest(ReusedSQLTestCase, SQLTestUtils): @@ -1547,3 +1551,5 @@ def test_series_eq(self): for other in others: with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): kser.eq(other) + with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): + kser == other diff --git a/databricks/koalas/tests/test_series.py b/databricks/koalas/tests/test_series.py index 5755a5dd8e..10a2665e51 100644 --- a/databricks/koalas/tests/test_series.py +++ b/databricks/koalas/tests/test_series.py @@ -2646,21 +2646,28 @@ def test_eq(self): # other = Series self.assert_eq(pser.eq(pser), kser.eq(kser)) + self.assert_eq(pser == pser, kser == kser) # other = dict other = {1: None, 2: None, 3: None, 4: None, np.nan: None, 6: None} self.assert_eq(pser.eq(other), kser.eq(other)) + self.assert_eq(pser == other, kser == other) # other = set other = {1, 2, 3, 4, np.nan, 6} self.assert_eq(pser.eq(other), kser.eq(other)) + self.assert_eq(pser == other, kser == other) # other = list with the different length other = [np.nan, 1, 3, 4, np.nan] with self.assertRaisesRegex(ValueError, "Lengths must be equal"): - self.assert_eq(pser.eq(other), kser.eq(other)) + kser.eq(other) + with self.assertRaisesRegex(ValueError, "Lengths must be equal"): + kser == other # other = tuple with the different length other = (np.nan, 1, 3, 4, np.nan) with self.assertRaisesRegex(ValueError, "Lengths must be equal"): - self.assert_eq(pser.eq(other), kser.eq(other)) + kser.eq(other) + with self.assertRaisesRegex(ValueError, "Lengths must be equal"): + kser == other From 9a5d2492d47cebbfa2a07cf0b05580a0f506d85a Mon Sep 17 00:00:00 2001 From: itholic Date: Wed, 27 Jan 2021 12:37:44 +0900 Subject: [PATCH 4/9] Addressed the comments --- databricks/koalas/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/databricks/koalas/series.py b/databricks/koalas/series.py index 71dec3c024..efab6c645b 100644 --- a/databricks/koalas/series.py +++ b/databricks/koalas/series.py @@ -658,7 +658,7 @@ def eq(self, other) -> bool: d False Name: b, dtype: bool """ - # pandas won't keep the name with `eq` when other is list of tuple, + # pandas won't keep the name with `eq` when `other` is list or tuple, # whereas `__eq__` always keeps the name. if isinstance(other, (list, tuple)): if len(self) == len(other): From e484e6d078317819287d907f203676f4b2a754ae Mon Sep 17 00:00:00 2001 From: itholic Date: Mon, 8 Feb 2021 14:21:02 +0900 Subject: [PATCH 5/9] Fix bug --- databricks/koalas/indexes/base.py | 6 ++- databricks/koalas/series.py | 21 ++++---- .../koalas/tests/test_ops_on_diff_frames.py | 49 +++++++++++++++++-- databricks/koalas/tests/test_series.py | 14 ------ 4 files changed, 59 insertions(+), 31 deletions(-) diff --git a/databricks/koalas/indexes/base.py b/databricks/koalas/indexes/base.py index 03911b7487..603ba5d5fe 100644 --- a/databricks/koalas/indexes/base.py +++ b/databricks/koalas/indexes/base.py @@ -353,8 +353,10 @@ def equals(self, other) -> bool: # some exceptions when 'compute.ops_on_diff_frames' is enabled. # Working around for now via using frame. return ( - self.to_series("self").reset_index(drop=True) - == other.to_series("other").reset_index(drop=True) + IndexOpsMixin.__eq__( + self.to_series("self").reset_index(drop=True), + other.to_series("other").reset_index(drop=True), + ) ).all() else: raise ValueError(ERROR_MESSAGE_CANNOT_COMBINE) diff --git a/databricks/koalas/series.py b/databricks/koalas/series.py index bf9602fb0b..3a75023cff 100644 --- a/databricks/koalas/series.py +++ b/databricks/koalas/series.py @@ -657,26 +657,25 @@ def eq(self, other) -> bool: d False Name: b, dtype: bool """ - # pandas won't keep the name with `eq` when `other` is list or tuple, - # whereas `__eq__` always keeps the name. if isinstance(other, (list, tuple)): - if len(self) == len(other): - other = ks.Series(other) - else: - raise ValueError("Lengths must be equal") - return self == other + other = ks.Index(other, name=self.name) + # pandas always returns False for all items with dict and set. + elif isinstance(other, (dict, set)): + return self != self + return IndexOpsMixin.__eq__(self, other) equals = eq def __eq__(self, other): if isinstance(other, (list, tuple)): - if len(self) == len(other): - other = ks.Series(other, name=self.name) - else: - raise ValueError("Lengths must be equal") + other = ks.Index(other, name=self.name) # pandas always returns False for all items with dict and set. elif isinstance(other, (dict, set)): return self != self + # pandas doesn't support `==` for different Index, but support for `eq` function. + elif isinstance(other, ks.Series): + if not self.index.equals(other.index): + raise ValueError("Can only compare identically-labeled Series objects") return IndexOpsMixin.__eq__(self, other) def gt(self, other) -> "Series": diff --git a/databricks/koalas/tests/test_ops_on_diff_frames.py b/databricks/koalas/tests/test_ops_on_diff_frames.py index 4cf8ea71df..a04d7d414b 100644 --- a/databricks/koalas/tests/test_ops_on_diff_frames.py +++ b/databricks/koalas/tests/test_ops_on_diff_frames.py @@ -1428,6 +1428,17 @@ def test_series_eq(self): self.assert_eq(pser.eq(pandas_other), kser.eq(koalas_other).sort_index()) self.assert_eq(pser == pandas_other, (kser == koalas_other).sort_index()) + # other = Series with different Index + pandas_other = pd.Series( + [np.nan, 1, 3, 4, np.nan, 6], index=[10, 20, 30, 40, 50, 60], name="x" + ) + koalas_other = ks.from_pandas(pandas_other) + self.assert_eq(pser.eq(pandas_other), kser.eq(koalas_other).sort_index()) + with self.assertRaisesRegex( + ValueError, "Can only compare identically-labeled Series objects" + ): + kser == koalas_other + # other = Index pandas_other = pd.Index([np.nan, 1, 3, 4, np.nan, 6], name="x") koalas_other = ks.from_pandas(pandas_other) @@ -1436,13 +1447,43 @@ def test_series_eq(self): # other = list other = [np.nan, 1, 3, 4, np.nan, 6] - self.assert_eq(pser.eq(other), kser.eq(other).sort_index()) - self.assert_eq(pser == other, (kser == other).sort_index()) + if LooseVersion(pd.__version__) >= LooseVersion("1.2"): + self.assert_eq(pser.eq(other), kser.eq(other).sort_index()) + self.assert_eq(pser == other, (kser == other).sort_index()) + else: + self.assert_eq(pser.eq(other).rename("x"), kser.eq(other).sort_index()) + self.assert_eq((pser == other).rename("x"), (kser == other).sort_index()) # other = tuple other = (np.nan, 1, 3, 4, np.nan, 6) - self.assert_eq(pser.eq(other), kser.eq(other).sort_index()) - self.assert_eq(pser == other, (kser == other).sort_index()) + if LooseVersion(pd.__version__) >= LooseVersion("1.2"): + self.assert_eq(pser.eq(other), kser.eq(other).sort_index()) + self.assert_eq(pser == other, (kser == other).sort_index()) + else: + self.assert_eq(pser.eq(other).rename("x"), kser.eq(other).sort_index()) + self.assert_eq((pser == other).rename("x"), (kser == other).sort_index()) + + # other = list with the different length + other = [np.nan, 1, 3, 4, np.nan] + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.eq(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser == other + + # other = tuple with the different length + other = (np.nan, 1, 3, 4, np.nan) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser.eq(other) + with self.assertRaisesRegex( + ValueError, "operands could not be broadcast together with shapes" + ): + kser == other def test_align(self): pdf1 = pd.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]}, index=[10, 20, 30]) diff --git a/databricks/koalas/tests/test_series.py b/databricks/koalas/tests/test_series.py index 07a527d27b..ac7410f093 100644 --- a/databricks/koalas/tests/test_series.py +++ b/databricks/koalas/tests/test_series.py @@ -2668,20 +2668,6 @@ def test_eq(self): self.assert_eq(pser.eq(other), kser.eq(other)) self.assert_eq(pser == other, kser == other) - # other = list with the different length - other = [np.nan, 1, 3, 4, np.nan] - with self.assertRaisesRegex(ValueError, "Lengths must be equal"): - kser.eq(other) - with self.assertRaisesRegex(ValueError, "Lengths must be equal"): - kser == other - - # other = tuple with the different length - other = (np.nan, 1, 3, 4, np.nan) - with self.assertRaisesRegex(ValueError, "Lengths must be equal"): - kser.eq(other) - with self.assertRaisesRegex(ValueError, "Lengths must be equal"): - kser == other - def test_align(self): pdf = pd.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]}) kdf = ks.from_pandas(pdf) From 44e34f669309f284404e239d4206eae7f19efafa Mon Sep 17 00:00:00 2001 From: itholic Date: Mon, 8 Feb 2021 14:24:14 +0900 Subject: [PATCH 6/9] Add tuple type for Index initializer --- databricks/koalas/indexes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/databricks/koalas/indexes/base.py b/databricks/koalas/indexes/base.py index 603ba5d5fe..8fbaa27aaa 100644 --- a/databricks/koalas/indexes/base.py +++ b/databricks/koalas/indexes/base.py @@ -102,7 +102,7 @@ class Index(IndexOpsMixin): Index(['a', 'b', 'c'], dtype='object') """ - def __new__(cls, data: Union[DataFrame, list], dtype=None, name=None, names=None): + def __new__(cls, data: Union[DataFrame, list, tuple], dtype=None, name=None, names=None): from databricks.koalas.indexes.datetimes import DatetimeIndex from databricks.koalas.indexes.multi import MultiIndex from databricks.koalas.indexes.numeric import Float64Index, Int64Index From 54dcc6b2e8574009af9863f76b9e4026f0994d7d Mon Sep 17 00:00:00 2001 From: itholic Date: Thu, 18 Feb 2021 14:37:18 +0900 Subject: [PATCH 7/9] Use internal pandas --- databricks/koalas/base.py | 14 ++++++++++++- databricks/koalas/indexes/base.py | 8 +++---- databricks/koalas/series.py | 21 ++----------------- .../koalas/tests/test_ops_on_diff_frames.py | 20 ++++-------------- 4 files changed, 22 insertions(+), 41 deletions(-) diff --git a/databricks/koalas/base.py b/databricks/koalas/base.py index f735c80daf..d8409c669c 100644 --- a/databricks/koalas/base.py +++ b/databricks/koalas/base.py @@ -572,7 +572,19 @@ def rmod(left, right): __abs__ = column_op(F.abs) # comparison operators - __eq__ = column_op(Column.__eq__) + def __eq__(self, other) -> Union["Series", "Index"]: # type: ignore[override] + if isinstance(other, (list, tuple)): + with ks.option_context("compute.ordered_head", True): + pindex_ops = self.head(len(other) + 1)._to_internal_pandas() # type: ignore + if len(pindex_ops) != len(other): + raise ValueError("Lengths must be equal") + return ks.from_pandas(pindex_ops == other) # type: ignore + # pandas always returns False for all items with dict and set. + elif isinstance(other, (dict, set)): + return self != self + else: + return column_op(Column.__eq__)(self, other) + __ne__ = column_op(Column.__ne__) __lt__ = column_op(Column.__lt__) __le__ = column_op(Column.__le__) diff --git a/databricks/koalas/indexes/base.py b/databricks/koalas/indexes/base.py index 8fbaa27aaa..03911b7487 100644 --- a/databricks/koalas/indexes/base.py +++ b/databricks/koalas/indexes/base.py @@ -102,7 +102,7 @@ class Index(IndexOpsMixin): Index(['a', 'b', 'c'], dtype='object') """ - def __new__(cls, data: Union[DataFrame, list, tuple], dtype=None, name=None, names=None): + def __new__(cls, data: Union[DataFrame, list], dtype=None, name=None, names=None): from databricks.koalas.indexes.datetimes import DatetimeIndex from databricks.koalas.indexes.multi import MultiIndex from databricks.koalas.indexes.numeric import Float64Index, Int64Index @@ -353,10 +353,8 @@ def equals(self, other) -> bool: # some exceptions when 'compute.ops_on_diff_frames' is enabled. # Working around for now via using frame. return ( - IndexOpsMixin.__eq__( - self.to_series("self").reset_index(drop=True), - other.to_series("other").reset_index(drop=True), - ) + self.to_series("self").reset_index(drop=True) + == other.to_series("other").reset_index(drop=True) ).all() else: raise ValueError(ERROR_MESSAGE_CANNOT_COMBINE) diff --git a/databricks/koalas/series.py b/databricks/koalas/series.py index 3a75023cff..b0bbfbc176 100644 --- a/databricks/koalas/series.py +++ b/databricks/koalas/series.py @@ -635,7 +635,7 @@ def rfloordiv(self, other) -> "Series": koalas = CachedAccessor("koalas", KoalasSeriesMethods) # Comparison Operators - def eq(self, other) -> bool: + def eq(self, other) -> "Series": """ Compare if the current value is equal to the other. @@ -657,27 +657,10 @@ def eq(self, other) -> bool: d False Name: b, dtype: bool """ - if isinstance(other, (list, tuple)): - other = ks.Index(other, name=self.name) - # pandas always returns False for all items with dict and set. - elif isinstance(other, (dict, set)): - return self != self - return IndexOpsMixin.__eq__(self, other) + return self == other equals = eq - def __eq__(self, other): - if isinstance(other, (list, tuple)): - other = ks.Index(other, name=self.name) - # pandas always returns False for all items with dict and set. - elif isinstance(other, (dict, set)): - return self != self - # pandas doesn't support `==` for different Index, but support for `eq` function. - elif isinstance(other, ks.Series): - if not self.index.equals(other.index): - raise ValueError("Can only compare identically-labeled Series objects") - return IndexOpsMixin.__eq__(self, other) - def gt(self, other) -> "Series": """ Compare if the current value is greater than the other. diff --git a/databricks/koalas/tests/test_ops_on_diff_frames.py b/databricks/koalas/tests/test_ops_on_diff_frames.py index a04d7d414b..9c510cba6f 100644 --- a/databricks/koalas/tests/test_ops_on_diff_frames.py +++ b/databricks/koalas/tests/test_ops_on_diff_frames.py @@ -1434,10 +1434,6 @@ def test_series_eq(self): ) koalas_other = ks.from_pandas(pandas_other) self.assert_eq(pser.eq(pandas_other), kser.eq(koalas_other).sort_index()) - with self.assertRaisesRegex( - ValueError, "Can only compare identically-labeled Series objects" - ): - kser == koalas_other # other = Index pandas_other = pd.Index([np.nan, 1, 3, 4, np.nan, 6], name="x") @@ -1465,24 +1461,16 @@ def test_series_eq(self): # other = list with the different length other = [np.nan, 1, 3, 4, np.nan] - with self.assertRaisesRegex( - ValueError, "operands could not be broadcast together with shapes" - ): + with self.assertRaisesRegex(ValueError, "Lengths must be equal"): kser.eq(other) - with self.assertRaisesRegex( - ValueError, "operands could not be broadcast together with shapes" - ): + with self.assertRaisesRegex(ValueError, "Lengths must be equal"): kser == other # other = tuple with the different length other = (np.nan, 1, 3, 4, np.nan) - with self.assertRaisesRegex( - ValueError, "operands could not be broadcast together with shapes" - ): + with self.assertRaisesRegex(ValueError, "Lengths must be equal"): kser.eq(other) - with self.assertRaisesRegex( - ValueError, "operands could not be broadcast together with shapes" - ): + with self.assertRaisesRegex(ValueError, "Lengths must be equal"): kser == other def test_align(self): From 4915afd2e34a33162928a0a0255379b28b29130c Mon Sep 17 00:00:00 2001 From: itholic Date: Thu, 18 Feb 2021 15:50:19 +0900 Subject: [PATCH 8/9] Fix tests --- .../koalas/tests/test_ops_on_diff_frames.py | 2 -- databricks/koalas/tests/test_series.py | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/databricks/koalas/tests/test_ops_on_diff_frames.py b/databricks/koalas/tests/test_ops_on_diff_frames.py index 9c510cba6f..db27b02e14 100644 --- a/databricks/koalas/tests/test_ops_on_diff_frames.py +++ b/databricks/koalas/tests/test_ops_on_diff_frames.py @@ -1688,8 +1688,6 @@ def test_series_eq(self): others = ( ks.Series([np.nan, 1, 3, 4, np.nan, 6], name="x"), ks.Index([np.nan, 1, 3, 4, np.nan, 6], name="x"), - [np.nan, 1, 3, 4, np.nan, 6], - (np.nan, 1, 3, 4, np.nan, 6), ) for other in others: with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): diff --git a/databricks/koalas/tests/test_series.py b/databricks/koalas/tests/test_series.py index ac7410f093..b60b2ab08a 100644 --- a/databricks/koalas/tests/test_series.py +++ b/databricks/koalas/tests/test_series.py @@ -2658,6 +2658,24 @@ def test_eq(self): self.assert_eq(pser.eq(pser), kser.eq(kser)) self.assert_eq(pser == pser, kser == kser) + # other = list + other = [np.nan, 1, 3, 4, np.nan, 6] + if LooseVersion(pd.__version__) >= LooseVersion("1.2"): + self.assert_eq(pser.eq(other), kser.eq(other)) + self.assert_eq(pser == other, kser == other) + else: + self.assert_eq(pser.eq(other).rename("x"), kser.eq(other)) + self.assert_eq((pser == other).rename("x"), kser == other) + + # other = tuple + other = (np.nan, 1, 3, 4, np.nan, 6) + if LooseVersion(pd.__version__) >= LooseVersion("1.2"): + self.assert_eq(pser.eq(other), kser.eq(other)) + self.assert_eq(pser == other, kser == other) + else: + self.assert_eq(pser.eq(other).rename("x"), kser.eq(other)) + self.assert_eq((pser == other).rename("x"), kser == other) + # other = dict other = {1: None, 2: None, 3: None, 4: None, np.nan: None, 6: None} self.assert_eq(pser.eq(other), kser.eq(other)) From 31e76479ce0262d95e4a81306990d9165389cf05 Mon Sep 17 00:00:00 2001 From: itholic Date: Fri, 5 Mar 2021 10:53:01 +0900 Subject: [PATCH 9/9] Add docstirng list-like --- databricks/koalas/series.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/databricks/koalas/series.py b/databricks/koalas/series.py index c1ebd402e8..a6a8782d34 100644 --- a/databricks/koalas/series.py +++ b/databricks/koalas/series.py @@ -657,6 +657,22 @@ def eq(self, other) -> "Series": c True d False Name: b, dtype: bool + + Support for list-like Python object with same length + + >>> df.a == [1, 3, 2, 4] + a True + b False + c False + d True + Name: a, dtype: bool + + >>> df.a.eq([1, 3, 2, 4]) + a True + b False + c False + d True + Name: a, dtype: bool """ return self == other