From c443ceea8c85cd747cdce6f3332ec860aa4d4cc7 Mon Sep 17 00:00:00 2001
From: Pierre Kestener <pierre.kestener@cea.fr>
Date: Fri, 1 Dec 2023 10:00:42 +0100
Subject: [PATCH 1/2] Update reducer types allowed in parallel reduction.

---
 examples/kokkos/random_sum.py         |  2 +-
 pykokkos/core/translators/bindings.py | 18 +++++++++++++++++-
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/examples/kokkos/random_sum.py b/examples/kokkos/random_sum.py
index 3da72b07..201cb52a 100644
--- a/examples/kokkos/random_sum.py
+++ b/examples/kokkos/random_sum.py
@@ -7,7 +7,7 @@
 class RandomSum:
     def __init__(self, n):
         self.N: int = n
-        self.total: int = 0
+        self.total: pk.int32 = 0
         self.a: pk.View1D[pk.int32] = pk.View([n], pk.int32)
 
         for i in range(self.N):
diff --git a/pykokkos/core/translators/bindings.py b/pykokkos/core/translators/bindings.py
index cf9b21ca..da5c4c0f 100644
--- a/pykokkos/core/translators/bindings.py
+++ b/pykokkos/core/translators/bindings.py
@@ -232,7 +232,7 @@ def get_return_type(operation: str, workunit: cppast.MethodDecl) -> str:
     :param operation: the type of the operation (for, reduce, scan, or workload)
     :param workunit: the workunit for which the binding is being generated
     :returns: the return type as a string
-    """ 
+    """
 
     acc_decl: Optional[cppast.ParmVarDecl] = None
     if operation == "reduce":
@@ -575,6 +575,22 @@ def bind_main_single(
         if "pk.Acc" in element:
             if "pk.int64" in element:
                 acc_type = "int64_t"
+            elif "pk.int32" in element:
+                acc_type = "int32_t"
+            elif "pk.int16" in element:
+                acc_type = "int16_t"
+            elif "pk.int8" in element:
+                acc_type = "int8_t"
+            elif "pk.uint64" in element:
+                acc_type = "uint64_t"
+            elif "pk.uint32" in element:
+                acc_type = "uint32_t"
+            elif "pk.uint16" in element:
+                acc_type = "uint16_t"
+            elif "pk.uint8" in element:
+                acc_type = "uint8_t"
+            elif "pk.float" in element:
+                acc_type = "float"
             elif "pk.double" in element:
                 acc_type = "double"
 

From ab1fafa138cf4cd0ed9dbfa14fba22c6119f59c2 Mon Sep 17 00:00:00 2001
From: Pierre Kestener <pierre.kestener@cea.fr>
Date: Sun, 3 Dec 2023 10:32:09 +0100
Subject: [PATCH 2/2] Update unit test for parallel_reduce.

---
 tests/test_parallelreduce.py | 69 ++++++++++++++++++++++++++++++++----
 1 file changed, 63 insertions(+), 6 deletions(-)

diff --git a/tests/test_parallelreduce.py b/tests/test_parallelreduce.py
index 384274b8..d470f328 100644
--- a/tests/test_parallelreduce.py
+++ b/tests/test_parallelreduce.py
@@ -45,7 +45,7 @@ def test_add_squares(self):
 
 
 @pk.workload
-class SquareSumFloat:
+class SquareSumDouble:
     def __init__(self, n):
         self.N: int = n
         self.total: pk.double = 0
@@ -55,12 +55,12 @@ def run(self):
         self.total = pk.parallel_reduce(self.N, self.squaresum)
 
     @pk.workunit
-    def squaresum(self, i: float, acc: pk.Acc[pk.double]):
+    def squaresum(self, i: pk.int64, acc: pk.Acc[pk.double]):
         acc += i * i
 
 
 @pk.workload
-class SquareSumInt:
+class SquareSumInt64:
     def __init__(self, n):
         self.N: int = n
         self.total: pk.int64 = 0
@@ -73,17 +73,74 @@ def run(self):
     def squaresum(self, i: pk.int64, acc: pk.Acc[pk.int64]):
         acc += i * i
 
+@pk.workload
+class SquareSumUInt32:
+    def __init__(self, n):
+        self.N: int = n
+        self.total: pk.uint32 = 0
+
+    @pk.main
+    def run(self):
+        self.total = pk.parallel_reduce(self.N, self.squaresum)
+
+    @pk.workunit
+    def squaresum(self, i: pk.int32, acc: pk.Acc[pk.uint32]):
+        acc += i * i
+
+@pk.workload
+class SquareSumInt16:
+    def __init__(self, n):
+        self.N: int = n
+        self.total: pk.int16 = 0
+
+    @pk.main
+    def run(self):
+        self.total = pk.parallel_reduce(self.N, self.squaresum)
+
+    @pk.workunit
+    def squaresum(self, i: pk.int16, acc: pk.Acc[pk.int16]):
+        acc += i * i
+
+@pk.workload
+class SquareSumUInt8:
+    def __init__(self, n):
+        self.N: int = n
+        self.total: pk.uint32 = 0
+
+    @pk.main
+    def run(self):
+        self.total = pk.parallel_reduce(self.N, self.squaresum)
+
+    @pk.workunit
+    def squaresum(self, i: pk.uint8, acc: pk.Acc[pk.int32]):
+        acc += i * i
 
 @pytest.mark.parametrize("series_max", [10, 5000, 90000])
-@pytest.mark.parametrize("dtype", [np.float64, np.int64])
+@pytest.mark.parametrize("dtype", [np.float64, np.int64, np.uint32])
 def test_squaresum_types(series_max, dtype):
     # check for the ability to match NumPy in
     # sum of squares reductions with various types
     expected = np.sum(np.arange(series_max, dtype=dtype) ** 2)
     if dtype == np.float64:
-        ss_instance = SquareSumFloat(series_max)
+        ss_instance = SquareSumDouble(series_max)
     elif dtype == np.int64:
-        ss_instance = SquareSumInt(series_max)
+        ss_instance = SquareSumInt64(series_max)
+    elif dtype == np.uint32:
+        ss_instance = SquareSumUInt32(series_max)
+    pk.execute(pk.ExecutionSpace.OpenMP, ss_instance)
+    actual = ss_instance.total
+    assert_allclose(actual, expected)
+
+@pytest.mark.parametrize("series_max", [10, 500])
+@pytest.mark.parametrize("dtype", [np.int16, np.uint8])
+def test_squaresum_types(series_max, dtype):
+    # check for the ability to match NumPy in
+    # sum of squares reductions with various types
+    expected = np.sum(np.arange(series_max, dtype=dtype) ** 2)
+    if dtype == np.int16:
+        ss_instance = SquareSumInt16(series_max)
+    elif dtype == np.uint8:
+        ss_instance = SquareSumUInt8(series_max)
     pk.execute(pk.ExecutionSpace.OpenMP, ss_instance)
     actual = ss_instance.total
     assert_allclose(actual, expected)