Merge pull request #29 from lincc-frameworks/reduce_doc_dict

update reduce docstring
lincc-frameworks · Jun 10, 2024 · c2a0c93 · c2a0c93
2 parents 66d9dca + 081ec8c
commit c2a0c93
Showing 1 changed file with 13 additions and 5 deletions.
diff --git a/src/nested_dask/core.py b/src/nested_dask/core.py
@@ -309,7 +309,8 @@ def reduce(self, func, *args, meta=None, **kwargs) -> NestedFrame:
         ----------
         func : callable
             Function to apply to each nested dataframe. The first arguments to `func` should be which
-            columns to apply the function to.
+            columns to apply the function to. See the Notes for recommendations
+            on writing func outputs.
         args : positional arguments
             Positional arguments to pass to the function, the first *args should be the names of the
             columns to apply the function to.
@@ -325,10 +326,17 @@ def reduce(self, func, *args, meta=None, **kwargs) -> NestedFrame:
 
         Notes
         -----
-        The recommend return value of func should be a `pd.Series` where the indices are the names of the
-        output columns in the dataframe returned by `reduce`. Note however that in cases where func
-        returns a single value there may be a performance benefit to returning the scalar value
-        rather than a `pd.Series`.
+        By default, `reduce` will produce a `NestedFrame` with enumerated
+        column names for each returned value of the function. For more useful
+        naming, it's recommended to have `func` return a dictionary where each
+        key is an output column of the dataframe returned by `reduce`.
+
+        Example User Function:
+
+        >>> def my_sum(col1, col2):
+        >>>    '''reduce will return a NestedFrame with two columns'''
+        >>>    return {"sum_col1": sum(col1), "sum_col2": sum(col2)}
+
         """
 
         # apply nested_pandas reduce via map_partitions