apache · itholic · Sep 27, 2021 · Sep 29, 2021 · Sep 30, 2021 · Oct 1, 2021
diff --git a/python/pyspark/pandas/base.py b/python/pyspark/pandas/base.py
@@ -394,7 +394,17 @@ def __abs__(self: IndexOpsLike) -> IndexOpsLike:
 
     # comparison operators
     def __eq__(self, other: Any) -> SeriesOrIndex:  # type: ignore[override]
-        return self._dtype_op.eq(self, other)
+        if isinstance(other, (list, tuple)):
+            if len(self) != len(other):
+                raise ValueError("Lengths must be equal")
+            name = self._internal.spark_column_name_for(self.spark.column)
+            other = ps.Series(other, name=name)
+            return self == other
+        # pandas always returns False for all items with dict and set.
+        elif isinstance(other, (dict, set)):
+            return self != self
+        else:
+            return self._dtype_op.eq(self, other)
 
     def __ne__(self, other: Any) -> SeriesOrIndex:  # type: ignore[override]
         return self._dtype_op.ne(self, other)

diff --git a/python/pyspark/pandas/series.py b/python/pyspark/pandas/series.py
@@ -684,7 +684,7 @@ def rfloordiv(self, other: Any) -> "Series":
     koalas = CachedAccessor("koalas", PandasOnSparkSeriesMethods)
 
     # Comparison Operators
-    def eq(self, other: Any) -> bool:
+    def eq(self, other: Any) -> "Series":
         """
         Compare if the current value is equal to the other.
 

diff --git a/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py
@@ -259,7 +259,7 @@ def test_eq(self):
         )
         self.assertRaisesRegex(
             TypeError,
-            "The operation can not be applied to list",
+            "Cannot compare a Categorical with the given type",
             lambda: ordered_psser == [1, 2, 3],
         )
 

diff --git a/python/pyspark/pandas/tests/test_ops_on_diff_frames.py b/python/pyspark/pandas/tests/test_ops_on_diff_frames.py
@@ -1830,6 +1830,47 @@ def _test_cov(self, pser1, pser2):
         pscov = psser1.cov(psser2, min_periods=3)
         self.assert_eq(pcov, pscov, almost=True)
 
+    def test_series_eq(self):
+        pser = pd.Series([1, 2, 3, 4, 5, 6], name="x")
+        psser = ps.from_pandas(pser)
+
+        # other = Series
+        pandas_other = pd.Series([np.nan, 1, 3, 4, np.nan, 6], name="x")
+        pandas_on_spark_other = ps.from_pandas(pandas_other)
+        self.assert_eq(pser.eq(pandas_other), psser.eq(pandas_on_spark_other).sort_index())
+        self.assert_eq(pser == pandas_other, (psser == pandas_on_spark_other).sort_index())
+
+        # other = Series with different Index
+        pandas_other = pd.Series(
+            [np.nan, 1, 3, 4, np.nan, 6], index=[10, 20, 30, 40, 50, 60], name="x"
+        )
+        pandas_on_spark_other = ps.from_pandas(pandas_other)
+        self.assert_eq(pser.eq(pandas_other), psser.eq(pandas_on_spark_other).sort_index())
+
+        # other = Index
+        pandas_other = pd.Index([np.nan, 1, 3, 4, np.nan, 6], name="x")
+        pandas_on_spark_other = ps.from_pandas(pandas_other)
+        self.assert_eq(pser.eq(pandas_other), psser.eq(pandas_on_spark_other).sort_index())
+        self.assert_eq(pser == pandas_other, (psser == pandas_on_spark_other).sort_index())
+
+        # other = list
+        other = [np.nan, 1, 3, 4, np.nan, 6]
+        if LooseVersion(pd.__version__) >= LooseVersion("1.2"):
+            self.assert_eq(pser.eq(other), psser.eq(other).sort_index())
+            self.assert_eq(pser == other, (psser == other).sort_index())
+        else:
+            self.assert_eq(pser.eq(other).rename("x"), psser.eq(other).sort_index())
+            self.assert_eq((pser == other).rename("x"), (psser == other).sort_index())
+
+        # other = tuple
+        other = (np.nan, 1, 3, 4, np.nan, 6)
+        if LooseVersion(pd.__version__) >= LooseVersion("1.2"):
+            self.assert_eq(pser.eq(other), psser.eq(other).sort_index())
+            self.assert_eq(pser == other, (psser == other).sort_index())
+        else:
+            self.assert_eq(pser.eq(other).rename("x"), psser.eq(other).sort_index())
+            self.assert_eq((pser == other).rename("x"), (psser == other).sort_index())
+
 
 class OpsOnDiffFramesDisabledTest(PandasOnSparkTestCase, SQLTestUtils):
     @classmethod
@@ -2017,6 +2058,20 @@ def test_combine_first(self):
         with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
             psdf1.combine_first(psdf2)
 
+    def test_series_eq(self):
+        pser = pd.Series([1, 2, 3, 4, 5, 6], name="x")
+        psser = ps.from_pandas(pser)
+
+        others = (
+            ps.Series([np.nan, 1, 3, 4, np.nan, 6], name="x"),
+            ps.Index([np.nan, 1, 3, 4, np.nan, 6], name="x"),
+        )
+        for other in others:
+            with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
+                psser.eq(other)
+            with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
+                psser == other
+
 
 if __name__ == "__main__":
     from pyspark.pandas.tests.test_ops_on_diff_frames import *  # noqa: F401

diff --git a/python/pyspark/pandas/tests/test_series.py b/python/pyspark/pandas/tests/test_series.py
@@ -2950,6 +2950,38 @@ def _test_cov(self, pdf):
         pscov = psdf["s1"].cov(psdf["s2"], min_periods=4)
         self.assert_eq(pcov, pscov, almost=True)
 
+    def test_eq(self):
+        pser = pd.Series([1, 2, 3, 4, 5, 6], name="x")
+        psser = ps.from_pandas(pser)
+
+        # other = Series
+        self.assert_eq(pser.eq(pser), psser.eq(psser))
+        self.assert_eq(pser == pser, psser == psser)
+
+        # other = dict
+        other = {1: None, 2: None, 3: None, 4: None, np.nan: None, 6: None}
+        self.assert_eq(pser.eq(other), psser.eq(other))
+        self.assert_eq(pser == other, psser == other)
+
+        # other = set
+        other = {1, 2, 3, 4, np.nan, 6}
+        self.assert_eq(pser.eq(other), psser.eq(other))
+        self.assert_eq(pser == other, psser == other)
+
+        # other = list with the different length
+        other = [np.nan, 1, 3, 4, np.nan]
+        with self.assertRaisesRegex(ValueError, "Lengths must be equal"):
+            psser.eq(other)
+        with self.assertRaisesRegex(ValueError, "Lengths must be equal"):
+            psser == other
+
+        # other = tuple with the different length
+        other = (np.nan, 1, 3, 4, np.nan)
+        with self.assertRaisesRegex(ValueError, "Lengths must be equal"):
+            psser.eq(other)
+        with self.assertRaisesRegex(ValueError, "Lengths must be equal"):
+            psser == other
+
 
 if __name__ == "__main__":
     from pyspark.pandas.tests.test_series import *  # noqa: F401