From ba9ad7369ba86cde3df90274ff16e59f15bf1fb0 Mon Sep 17 00:00:00 2001
From: shudson <shudson@anl.gov>
Date: Sun, 3 Sep 2023 15:25:52 -0500
Subject: [PATCH 01/27] Update forces_simple to use persistent gen

---
 .../forces/forces_simple/readme.md            | 16 +++++---
 .../forces/forces_simple/run_libe_forces.py   | 38 +++++++++++++++----
 2 files changed, 41 insertions(+), 13 deletions(-)

diff --git a/libensemble/tests/scaling_tests/forces/forces_simple/readme.md b/libensemble/tests/scaling_tests/forces/forces_simple/readme.md
index 72ee53683..da4dcebd5 100644
--- a/libensemble/tests/scaling_tests/forces/forces_simple/readme.md
+++ b/libensemble/tests/scaling_tests/forces/forces_simple/readme.md
@@ -6,16 +6,19 @@ https://libensemble.readthedocs.io/en/develop/tutorials/executor_forces_tutorial
 
 ## QuickStart
 
-Build executable and run example. Go to `forces_app` directory and build `forces.x`:
+Build forces application and run the ensemble. Go to `forces_app` directory and build `forces.x`:
 
     cd ../forces_app
     ./build_forces.sh
 
 Then return here and run:
 
-    python run_libe_forces.py --comms local --nworkers 4
+    python run_libe_forces.py --comms local --nworkers 5
 
-## Running test run_libe_forces.py
+This will run with four workers. One worker will run the persistent generator.
+The other four will run the forces simulations.
+
+## Detailed instructions
 
 Naive Electrostatics Code Test
 
@@ -30,10 +33,11 @@ See `forces_app` directory for details.
 
 ### Running with libEnsemble.
 
-A random sample of seeds is taken and used as input to the sim func (forces miniapp).
+A random sample of seeds is taken and used as input to the simulation function
+(forces miniapp).
 
-In forces_app directory, modify build_forces.sh for target platform and run to
-build forces.x:
+In the `forces_app` directory, modify `build_forces.sh` for the target platform
+and run to build `forces.x`:
 
     ./build_forces.sh
 
diff --git a/libensemble/tests/scaling_tests/forces/forces_simple/run_libe_forces.py b/libensemble/tests/scaling_tests/forces/forces_simple/run_libe_forces.py
index 5da33ea49..6adb22f95 100644
--- a/libensemble/tests/scaling_tests/forces/forces_simple/run_libe_forces.py
+++ b/libensemble/tests/scaling_tests/forces/forces_simple/run_libe_forces.py
@@ -5,14 +5,18 @@
 import numpy as np
 from forces_simf import run_forces  # Sim func from current dir
 
+from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f
 from libensemble.executors import MPIExecutor
-from libensemble.gen_funcs.sampling import uniform_random_sample
+
+from libensemble.gen_funcs.persistent_sampling import persistent_uniform as gen_f
 from libensemble.libE import libE
 from libensemble.tools import add_unique_random_streams, parse_args
 
 if __name__ == "__main__":
     # Parse number of workers, comms type, etc. from arguments
     nworkers, is_manager, libE_specs, _ = parse_args()
+    nsim_workers = nworkers - 1  # One worker is for persistent generator
+    libE_specs["num_resource_sets"] = nsim_workers  # Persistent gen does not need resources
 
     # Initialize MPI Executor instance
     exctr = MPIExecutor()
@@ -34,13 +38,22 @@
 
     # State the gen_f, inputs, outputs, additional parameters
     gen_specs = {
-        "gen_f": uniform_random_sample,  # Generator function
+        "gen_f": gen_f,  # Generator function
         "in": [],  # Generator input
+        "persis_in": ["sim_id"],  # Just send something back to gen to get number of new points.
         "out": [("x", float, (1,))],  # Name, type and size of data from gen_f
         "user": {
-            "lb": np.array([1000]),  # User parameters for the gen_f
-            "ub": np.array([3000]),
-            "gen_batch_size": 8,
+            "lb": np.array([1000]),  # min particles
+            "ub": np.array([3000]),  # max particles
+            "initial_batch_size": nsim_workers,
+        },
+    }
+
+    # Starts one persistent generator. Simulated values are returned in batch.
+    alloc_specs = {
+        "alloc_f": alloc_f,
+        "user": {
+            "async_return": False,  # False causes batch returns
         },
     }
 
@@ -48,10 +61,21 @@
     libE_specs["sim_dirs_make"] = True
 
     # Instruct libEnsemble to exit after this many simulations
-    exit_criteria = {"sim_max": 8}
+    exit_criteria = {"sim_max": 8}  # Hint: Use nsim_workers*2 to vary with worker count
 
     # Seed random streams for each worker, particularly for gen_f
     persis_info = add_unique_random_streams({}, nworkers + 1)
 
     # Launch libEnsemble
-    H, persis_info, flag = libE(sim_specs, gen_specs, exit_criteria, persis_info=persis_info, libE_specs=libE_specs)
+    H, persis_info, flag = libE(
+        sim_specs,
+        gen_specs,
+        exit_criteria,
+        persis_info=persis_info,
+        alloc_specs=alloc_specs,
+        libE_specs=libE_specs,
+    )
+
+if is_manager:
+    # Note, this will change if change sim_max, nworkers, lb/ub etc...
+    print(f'Final energy checksum: {np.sum(H["energy"])}')

From cccffb8eebd43daa4b29dae7ec66af9d91a6eb38 Mon Sep 17 00:00:00 2001
From: shudson <shudson@anl.gov>
Date: Mon, 4 Sep 2023 00:17:46 -0500
Subject: [PATCH 02/27] Convert forces_simple to OO interface

---
 .../forces/forces_simple/run_libe_forces.py   | 83 +++++++++----------
 1 file changed, 37 insertions(+), 46 deletions(-)

diff --git a/libensemble/tests/scaling_tests/forces/forces_simple/run_libe_forces.py b/libensemble/tests/scaling_tests/forces/forces_simple/run_libe_forces.py
index 6adb22f95..fdfc8cfe6 100644
--- a/libensemble/tests/scaling_tests/forces/forces_simple/run_libe_forces.py
+++ b/libensemble/tests/scaling_tests/forces/forces_simple/run_libe_forces.py
@@ -5,23 +5,16 @@
 import numpy as np
 from forces_simf import run_forces  # Sim func from current dir
 
+from libensemble import Ensemble
 from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f
 from libensemble.executors import MPIExecutor
-
 from libensemble.gen_funcs.persistent_sampling import persistent_uniform as gen_f
-from libensemble.libE import libE
-from libensemble.tools import add_unique_random_streams, parse_args
+from libensemble.specs import AllocSpecs, ExitCriteria, GenSpecs, LibeSpecs, SimSpecs
 
 if __name__ == "__main__":
-    # Parse number of workers, comms type, etc. from arguments
-    nworkers, is_manager, libE_specs, _ = parse_args()
-    nsim_workers = nworkers - 1  # One worker is for persistent generator
-    libE_specs["num_resource_sets"] = nsim_workers  # Persistent gen does not need resources
 
-    # Initialize MPI Executor instance
+    # Initialize MPI Executor
     exctr = MPIExecutor()
-
-    # Register simulation executable with executor
     sim_app = os.path.join(os.getcwd(), "../forces_app/forces.x")
 
     if not os.path.isfile(sim_app):
@@ -29,53 +22,51 @@
 
     exctr.register_app(full_path=sim_app, app_name="forces")
 
-    # State the sim_f, inputs, outputs
-    sim_specs = {
-        "sim_f": run_forces,  # sim_f, imported above
-        "in": ["x"],  # Name of input for sim_f
-        "out": [("energy", float)],  # Name, type of output from sim_f
-    }
+    # Parse number of workers, comms type, etc. from arguments
+    ensemble = Ensemble(parse_args=True)
+    nsim_workers = ensemble.nworkers - 1  # One worker is for persistent generator
+
+    # Persistent gen does not need resources
+    ensemble.libE_specs = LibeSpecs(
+        num_resource_sets=nsim_workers,
+        sim_dirs_make=True,
+    )
+
+    ensemble.sim_specs = SimSpecs(
+        sim_f=run_forces,
+        inputs=["x"],
+        out=[("energy", float)],
+    )
 
-    # State the gen_f, inputs, outputs, additional parameters
-    gen_specs = {
-        "gen_f": gen_f,  # Generator function
-        "in": [],  # Generator input
-        "persis_in": ["sim_id"],  # Just send something back to gen to get number of new points.
-        "out": [("x", float, (1,))],  # Name, type and size of data from gen_f
-        "user": {
+    ensemble.gen_specs = GenSpecs(
+        gen_f=gen_f,
+        inputs=[],  # No input when start persistent generator
+        persis_in=["sim_id"],  # Return sim_ids of evaluated points to generator
+        out=[("x", float, (1,))],
+        user={
+            "initial_batch_size": nsim_workers,
             "lb": np.array([1000]),  # min particles
             "ub": np.array([3000]),  # max particles
-            "initial_batch_size": nsim_workers,
         },
-    }
+    )
 
     # Starts one persistent generator. Simulated values are returned in batch.
-    alloc_specs = {
-        "alloc_f": alloc_f,
-        "user": {
+    ensemble.alloc_specs = AllocSpecs(
+        alloc_f=alloc_f,
+        user={
             "async_return": False,  # False causes batch returns
         },
-    }
-
-    # Create and work inside separate per-simulation directories
-    libE_specs["sim_dirs_make"] = True
+    )
 
     # Instruct libEnsemble to exit after this many simulations
-    exit_criteria = {"sim_max": 8}  # Hint: Use nsim_workers*2 to vary with worker count
+    ensemble.exit_criteria = ExitCriteria(sim_max=8)
 
     # Seed random streams for each worker, particularly for gen_f
-    persis_info = add_unique_random_streams({}, nworkers + 1)
+    ensemble.add_random_streams()
 
-    # Launch libEnsemble
-    H, persis_info, flag = libE(
-        sim_specs,
-        gen_specs,
-        exit_criteria,
-        persis_info=persis_info,
-        alloc_specs=alloc_specs,
-        libE_specs=libE_specs,
-    )
+    # Run ensemble
+    ensemble.run()
 
-if is_manager:
-    # Note, this will change if change sim_max, nworkers, lb/ub etc...
-    print(f'Final energy checksum: {np.sum(H["energy"])}')
+    if ensemble.is_manager:
+        # Note, this will change if change sim_max, nworkers, lb/ub etc...
+        print(f'Final energy checksum: {np.sum(ensemble.H["energy"])}')

From 9cee243fcf72a8e3eda5b52c76b651b05f0254ca Mon Sep 17 00:00:00 2001
From: shudson <shudson@anl.gov>
Date: Mon, 4 Sep 2023 00:43:39 -0500
Subject: [PATCH 03/27] Convert forces_gpu_persis_gen to OO interface

---
 .../forces_gpu_persis_gen/run_libe_forces.py  | 160 +++++++++---------
 1 file changed, 78 insertions(+), 82 deletions(-)

diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu_persis_gen/run_libe_forces.py b/libensemble/tests/scaling_tests/forces/forces_gpu_persis_gen/run_libe_forces.py
index 7d40432a6..db16b7367 100644
--- a/libensemble/tests/scaling_tests/forces/forces_gpu_persis_gen/run_libe_forces.py
+++ b/libensemble/tests/scaling_tests/forces/forces_gpu_persis_gen/run_libe_forces.py
@@ -6,12 +6,16 @@
 live-checking GPU usage.
 
 The forces.c application should be built by setting the GPU preprocessor condition
-in addition to openMP GPU flags for the given system. See examples in
-../forces_app/build_forces.sh. We recommend running forces.x standalone first
+(usually -DGPU) in addition to openMP GPU flags for the given system. See examples
+in ../forces_app/build_forces.sh. We recommend running forces.x standalone first
 and confirm it is running on the GPU (this is given clearly in the output).
 
 An alternative variable resource generator is available (search 'var resources'
 in this script and uncomment relevant lines).
+
+To mock on a non-GPU system, uncomment the resource_info line in libE_specs. You
+will compile forces without -DGPU option. It is recommended that the lb/ub for
+particle counts are reduced for CPU performance.
 """
 
 import os
@@ -20,92 +24,84 @@
 import numpy as np
 from forces_simf import run_forces  # Sim func from current dir
 
+from libensemble import Ensemble
 from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f
 from libensemble.executors import MPIExecutor
+from libensemble.tools import add_unique_random_streams, parse_args
+from libensemble.specs import AllocSpecs, ExitCriteria, GenSpecs, LibeSpecs, SimSpecs
 
 # Fixed resources (one resource set per worker) - persistent gen
 from libensemble.gen_funcs.persistent_sampling import persistent_uniform as gen_f
-from libensemble.libE import libE
-from libensemble.tools import add_unique_random_streams, parse_args
 
 # Uncomment for var resources (checksum will change due to rng differences)
 # from libensemble.gen_funcs.persistent_sampling_var_resources import uniform_sample as gen_f
 
-
-# Parse number of workers, comms type, etc. from arguments
-nworkers, is_manager, libE_specs, _ = parse_args()
-
-nsim_workers = nworkers - 1
-libE_specs["num_resource_sets"] = nsim_workers  # Persistent gen does not need resources
-
-# To test on system without GPUs - compile forces without -DGPU and mock GPUs with this line.
-# libE_specs["resource_info"] = {"gpus_on_node": 4}
-
-# Initialize MPI Executor instance
-exctr = MPIExecutor()
-
-# Register simulation executable with executor
-sim_app = os.path.join(os.getcwd(), "../forces_app/forces.x")
-
-if not os.path.isfile(sim_app):
-    sys.exit("forces.x not found - please build first in ../forces_app dir")
-
-exctr.register_app(full_path=sim_app, app_name="forces")
-
-# State the sim_f, inputs, outputs
-sim_specs = {
-    "sim_f": run_forces,  # sim_f, imported above
-    "in": ["x"],  # Name of input for sim_f
-    "out": [("energy", float)],  # Name, type of output from sim_f
-}
-
-# State the gen_f, inputs, outputs, additional parameters
-gen_specs = {
-    "gen_f": gen_f,  # Generator function
-    "in": [],  # Generator input
-    "persis_in": ["sim_id"],  # Just send something back to gen to get number of new points.
-    "out": [
-        ("x", float, (1,)),  # Name, type and size of data from gen_f
-        # ("resource_sets", int)  # Uncomment for var resources
-    ],
-    "user": {
-        "lb": np.array([50000]),  # fewest particles (changing will change checksum)
-        "ub": np.array([100000]),  # max particles (changing will change checksum)
-        "initial_batch_size": nsim_workers,
-        # "max_resource_sets": nsim_workers  # Uncomment for var resources
-    },
-}
-
-alloc_specs = {
-    "alloc_f": alloc_f,
-    "user": {
-        "give_all_with_same_priority": False,
-        "async_return": False,  # False causes batch returns
-    },
-}
-
-# Create and work inside separate per-simulation directories
-libE_specs["sim_dirs_make"] = True
-
-# Uncomment to see resource sets in libE_stats.txt - useful with var resources
-# libE_specs["stats_fmt"] = {"show_resource_sets": True}
-
-# Instruct libEnsemble to exit after this many simulations
-exit_criteria = {"sim_max": 8}  # changing will change checksum
-
-# Seed random streams for each worker, particularly for gen_f
-persis_info = add_unique_random_streams({}, nworkers + 1)
-
-# Launch libEnsemble
-H, persis_info, flag = libE(
-    sim_specs, gen_specs, exit_criteria, persis_info=persis_info, alloc_specs=alloc_specs, libE_specs=libE_specs
-)
-
-# This is for configuration of this test (inc. lb/ub and sim_max values)
-if is_manager:
-    if exit_criteria["sim_max"] == 8:
-        chksum = np.sum(H["energy"])
-        assert np.isclose(chksum, 96288744.35136001), f"energy check sum is {chksum}"
-        print("Checksum passed")
-    else:
-        print("Run complete. A checksum has not been provided for the given sim_max")
+if __name__ == "__main__":
+
+    # Initialize MPI Executor
+    exctr = MPIExecutor()
+    sim_app = os.path.join(os.getcwd(), "../forces_app/forces.x")
+
+    if not os.path.isfile(sim_app):
+        sys.exit("forces.x not found - please build first in ../forces_app dir")
+
+    exctr.register_app(full_path=sim_app, app_name="forces")
+
+    # Parse number of workers, comms type, etc. from arguments
+    ensemble = Ensemble(parse_args=True)
+    nsim_workers = ensemble.nworkers - 1  # One worker is for persistent generator
+
+    # Persistent gen does not need resources
+    ensemble.libE_specs = LibeSpecs(
+        num_resource_sets=nsim_workers,
+        sim_dirs_make=True,
+        # resource_info = {"gpus_on_node": 4}  # for mocking GPUs
+        # stats_fmt = {"show_resource_sets": True}  # see resource sets in libE_stats.txt
+    )
+
+    ensemble.sim_specs = SimSpecs(
+        sim_f=run_forces,
+        inputs=["x"],
+        out=[("energy", float)],
+    )
+
+    ensemble.gen_specs = GenSpecs(
+        gen_f=gen_f,
+        inputs=[],  # No input when start persistent generator
+        persis_in=["sim_id"],  # Return sim_ids of evaluated points to generator
+        out=[("x", float, (1,)),
+            # ("resource_sets", int)  # Uncomment for var resources
+             ],
+        user={
+            "initial_batch_size": nsim_workers,
+            "lb": np.array([50000]),  # min particles
+            "ub": np.array([100000]),  # max particles
+            # "max_resource_sets": nsim_workers  # Uncomment for var resources
+        },
+    )
+
+    # Starts one persistent generator. Simulated values are returned in batch.
+    ensemble.alloc_specs = AllocSpecs(
+        alloc_f=alloc_f,
+        user={
+            "async_return": False,  # False causes batch returns
+        },
+    )
+
+    # Instruct libEnsemble to exit after this many simulations
+    ensemble.exit_criteria = ExitCriteria(sim_max=8)
+
+    # Seed random streams for each worker, particularly for gen_f
+    ensemble.add_random_streams()
+
+    # Run ensemble
+    ensemble.run()
+
+    if ensemble.is_manager:
+        # Note, this will change if change sim_max, nworkers, lb/ub etc...
+        if ensemble.exit_criteria.sim_max == 8:
+            chksum = np.sum(ensemble.H["energy"])
+            assert np.isclose(chksum, 96288744.35136001), f"energy check sum is {chksum}"
+            print("Checksum passed")
+        else:
+            print("Run complete. A checksum has not been provided for the given sim_max")

From 48cae3eb0aea91e6221a26cb0a064c073ea8b53d Mon Sep 17 00:00:00 2001
From: shudson <shudson@anl.gov>
Date: Tue, 5 Sep 2023 23:09:23 -0500
Subject: [PATCH 04/27] Add generator uniform_sample_with_var_gpus

A geneartor that sets num_gpus based on the x value
---
 .../persistent_sampling_var_resources.py      | 45 +++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/libensemble/gen_funcs/persistent_sampling_var_resources.py b/libensemble/gen_funcs/persistent_sampling_var_resources.py
index bc9dc638f..60f0a199d 100644
--- a/libensemble/gen_funcs/persistent_sampling_var_resources.py
+++ b/libensemble/gen_funcs/persistent_sampling_var_resources.py
@@ -2,6 +2,10 @@
 
 Each function generates points uniformly over the domain defined by ``gen_specs["user"]["ub"]``
 and ``gen_specs["user"]["lb"]``.
+
+Most functions use a random request of resources over a range, setting num_procs, num_gpus or
+resource sets. The function ``uniform_sample_with_var_gpus`` uses the ``x`` value to determine
+the number of GPUs requested.
 """
 
 import numpy as np
@@ -56,6 +60,47 @@ def uniform_sample(_, persis_info, gen_specs, libE_info):
     return H_o, persis_info, FINISHED_PERSISTENT_GEN_TAG
 
 
+def uniform_sample_with_var_gpus(_, persis_info, gen_specs, libE_info):
+    """
+    Requests a number of GPUs based on the ``x`` value to be used in the evaluation
+    of the generated points. By default, simulations will assign one MPI processor
+    per GPU.
+
+    Note that the ``num_gpus`` gen_specs["out"] option (similar to ``num_procs``) does
+    not need to be passed as a sim_specs["in"]. It will automatically be passed to
+    simulation functions and used by any MPI Executor unless overridden in the
+    ``executor.submit`` function.
+
+    .. seealso::
+        `test_GPU_variable_resources.py <https://github.com/Libensemble/libensemble/blob/develop/libensemble/tests/regression_tests/test_GPU_variable_resources.py>`_
+    """  # noqa
+
+    b, n, lb, ub = _get_user_params(gen_specs["user"])
+    rng = persis_info["rand_stream"]
+    ps = PersistentSupport(libE_info, EVAL_GEN_TAG)
+    tag = None
+    max_gpus = gen_specs["user"]["max_gpus"]
+
+    while tag not in [STOP_TAG, PERSIS_STOP]:
+        x = rng.uniform(lb, ub, (b, n))
+        bucket_size = (ub[0] - lb[0]) / max_gpus
+
+        # Determine number of GPUs based on linear split over x range (first dimension).
+        ngpus = [int((num - lb[0]) / bucket_size) + 1 for num in x[:, 0]]
+
+        H_o = np.zeros(b, dtype=gen_specs["out"])
+        H_o["x"] = x
+        H_o["num_gpus"] = ngpus
+
+        print(f"GEN created {b} sims requiring {ngpus} GPUs", flush=True)
+
+        tag, Work, calc_in = ps.send_recv(H_o)
+        if hasattr(calc_in, "__len__"):
+            b = len(calc_in)
+
+    return H_o, persis_info, FINISHED_PERSISTENT_GEN_TAG
+
+
 def uniform_sample_with_procs_gpus(_, persis_info, gen_specs, libE_info):
     """
     Randomly requests a different number of processors and gpus to be used in the

From 6fb64e5f0485e94af5417af05cbaf92acb21f710 Mon Sep 17 00:00:00 2001
From: shudson <shudson@anl.gov>
Date: Tue, 5 Sep 2023 23:11:27 -0500
Subject: [PATCH 05/27] Update test_GPU_variable_resources.py to use
 uniform_sample_with_var_gpus gen

---
 .../test_GPU_variable_resources.py            | 21 +++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/libensemble/tests/regression_tests/test_GPU_variable_resources.py b/libensemble/tests/regression_tests/test_GPU_variable_resources.py
index 3e8dcd79a..cea62eb65 100644
--- a/libensemble/tests/regression_tests/test_GPU_variable_resources.py
+++ b/libensemble/tests/regression_tests/test_GPU_variable_resources.py
@@ -30,7 +30,8 @@
 from libensemble import Ensemble
 from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f
 from libensemble.executors.mpi_executor import MPIExecutor
-from libensemble.gen_funcs.persistent_sampling_var_resources import uniform_sample_with_procs_gpus as gen_f
+from libensemble.gen_funcs.persistent_sampling_var_resources import uniform_sample_with_procs_gpus as gen_f1
+from libensemble.gen_funcs.persistent_sampling_var_resources import uniform_sample_with_var_gpus as gen_f2
 
 # Import libEnsemble items for this test
 from libensemble.sim_funcs import six_hump_camel
@@ -55,6 +56,7 @@
         resource_info={"cores_on_node": (8, 16), "gpus_on_node": 4},
         sim_dirs_make=True,
         ensemble_dir_path="./ensemble_GPU_variable_w" + str(gpu_test.nworkers),
+        reuse_output_dir=True,
     )
 
     gpu_test.sim_specs = SimSpecs(
@@ -64,12 +66,12 @@
         user={"dry_run": False},
     )
     gpu_test.gen_specs = GenSpecs(
-        gen_f=gen_f,
+        gen_f=gen_f1,
         persis_in=["f", "x", "sim_id"],
         out=[("num_procs", int), ("num_gpus", int), ("x", float, 2)],
         user={
             "initial_batch_size": gpu_test.nworkers - 1,
-            "max_procs": (gpu_test.nworkers - 1) // 2,  # Any sim created can req. 1 worker up to max
+            "max_procs": gpu_test.nworkers - 1,  # Any sim created can req. 1 worker up to max
             "lb": np.array([-3, -2]),
             "ub": np.array([3, 2]),
         },
@@ -83,10 +85,21 @@
         },
     )
 
+    # Run with random num_procs/num_gpus for each simulation
+    gpu_test.add_random_streams()
+    gpu_test.exit_criteria = ExitCriteria(sim_max=20)
+
+    gpu_test.run()
+    if gpu_test.is_manager:
+        assert gpu_test.flag == 0
+
+    # Run with num_gpus based on x[0] for each simulation
+    gpu_test.gen_specs.gen_f = gen_f2
+    gpu_test.gen_specs.user["max_gpus"] = gpu_test.nworkers - 1
     gpu_test.add_random_streams()
-    gpu_test.exit_criteria = ExitCriteria(sim_max=40)
     gpu_test.run()
 
     if gpu_test.is_manager:
         assert gpu_test.flag == 0
+
         gpu_test.save_output(__file__)

From 9e1332c82d741b0a977d749705b655f6d281b9bb Mon Sep 17 00:00:00 2001
From: shudson <shudson@anl.gov>
Date: Wed, 6 Sep 2023 13:50:10 -0500
Subject: [PATCH 06/27] Remove old forces_gpu

---
 .../forces/forces_gpu/cleanup.sh              |  1 -
 .../forces/forces_gpu/forces_simf.py          | 61 ------------
 .../scaling_tests/forces/forces_gpu/readme.md | 59 ------------
 .../forces/forces_gpu/run_libe_forces.py      | 95 -------------------
 .../forces/forces_gpu/submit_perlmutter.sh    | 12 ---
 5 files changed, 228 deletions(-)
 delete mode 100755 libensemble/tests/scaling_tests/forces/forces_gpu/cleanup.sh
 delete mode 100644 libensemble/tests/scaling_tests/forces/forces_gpu/forces_simf.py
 delete mode 100644 libensemble/tests/scaling_tests/forces/forces_gpu/readme.md
 delete mode 100644 libensemble/tests/scaling_tests/forces/forces_gpu/run_libe_forces.py
 delete mode 100644 libensemble/tests/scaling_tests/forces/forces_gpu/submit_perlmutter.sh

diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu/cleanup.sh b/libensemble/tests/scaling_tests/forces/forces_gpu/cleanup.sh
deleted file mode 100755
index eaaa23635..000000000
--- a/libensemble/tests/scaling_tests/forces/forces_gpu/cleanup.sh
+++ /dev/null
@@ -1 +0,0 @@
-rm -r ensemble *.npy *.pickle ensemble.log lib*.txt
diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu/forces_simf.py b/libensemble/tests/scaling_tests/forces/forces_gpu/forces_simf.py
deleted file mode 100644
index c609c26af..000000000
--- a/libensemble/tests/scaling_tests/forces/forces_gpu/forces_simf.py
+++ /dev/null
@@ -1,61 +0,0 @@
-import numpy as np
-
-# To retrieve our MPI Executor
-from libensemble.executors.executor import Executor
-
-# Optional status codes to display in libE_stats.txt for each gen or sim
-from libensemble.message_numbers import TASK_FAILED, WORKER_DONE
-
-# Optional - to print GPU settings
-from libensemble.tools.test_support import check_gpu_setting
-
-
-def run_forces(H, persis_info, sim_specs, libE_info):
-    """Launches the forces MPI app and auto-assigns ranks and GPU resources.
-
-    Assigns one MPI rank to each GPU assigned to the worker.
-    """
-
-    calc_status = 0
-
-    # Parse out num particles, from generator function
-    particles = str(int(H["x"][0][0]))
-
-    # app arguments: num particles, timesteps, also using num particles as seed
-    args = particles + " " + str(10) + " " + particles
-
-    # Retrieve our MPI Executor
-    exctr = Executor.executor
-
-    # Submit our forces app for execution.
-    task = exctr.submit(
-        app_name="forces",
-        app_args=args,
-        auto_assign_gpus=True,
-        match_procs_to_gpus=True,
-    )
-
-    # Block until the task finishes
-    task.wait()
-
-    # Optional - prints GPU assignment (method and numbers)
-    check_gpu_setting(task, assert_setting=False, print_setting=True)
-
-    # Stat file to check for bad runs
-    statfile = "forces.stat"
-
-    # Try loading final energy reading, set the sim's status
-    try:
-        data = np.loadtxt(statfile)
-        final_energy = data[-1]
-        calc_status = WORKER_DONE
-    except Exception:
-        final_energy = np.nan
-        calc_status = TASK_FAILED
-
-    # Define our output array, populate with energy reading
-    output = np.zeros(1, dtype=sim_specs["out"])
-    output["energy"] = final_energy
-
-    # Return final information to worker, for reporting to manager
-    return output, persis_info, calc_status
diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu/readme.md b/libensemble/tests/scaling_tests/forces/forces_gpu/readme.md
deleted file mode 100644
index 040b65350..000000000
--- a/libensemble/tests/scaling_tests/forces/forces_gpu/readme.md
+++ /dev/null
@@ -1,59 +0,0 @@
-## Tutorial
-
-This example is explained in the tutorial **Executor - Assign GPUs**.
-
-https://libensemble.readthedocs.io/en/develop/tutorials/forces_gpu_tutorial.html
-
-## QuickStart
-
-Go to `forces_app` directory:
-
-    cd ../forces_app
-
-Compile **forces.x** using one of the GPU build lines in `build_forces.sh` or similar
-for your platform (these will include -DGPU)
-
-Then return here and run:
-
-    python run_libe_forces.py --comms local --nworkers 4
-
-By default, each run of forces will use one CPU and one GPU. The `forces.c` code can also
-be MPI parallel and will use one GPU for each CPU rank, assuming an even split of ranks
-across nodes.
-
-## Running test run_libe_forces.py
-
-Naive Electrostatics Code Test
-
-This is a synthetic, highly configurable simulation function. This test aims
-to show libEnsemble's capability to set assign GPU resources as needed by each
-worker and to launch application instances via the `MPIExecutor`.
-
-### Forces Mini-App
-
-A system of charged particles is initialized and simulated over a number of time-steps.
-
-See `forces_app` directory for details.
-
-### Running with libEnsemble.
-
-A random sample of seeds is taken and used as input to the sim func (forces miniapp).
-
-In forces_app directory, modify build_forces.sh for target platform and run to
-build forces.x:
-
-    ./build_forces.sh
-
-Then to run with local comms (multiprocessing) with one manager and `N` workers:
-
-    python run_libe_forces.py --comms local --nworkers N
-
-To run with MPI comms using one manager and `N-1` workers:
-
-    mpirun -np N python run_libe_forces.py
-
-Application parameters can be adjusted in the file `run_libe_forces.py`.
-
-To remove output before the next run:
-
-    ./cleanup.sh
diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu/run_libe_forces.py b/libensemble/tests/scaling_tests/forces/forces_gpu/run_libe_forces.py
deleted file mode 100644
index aceed59e1..000000000
--- a/libensemble/tests/scaling_tests/forces/forces_gpu/run_libe_forces.py
+++ /dev/null
@@ -1,95 +0,0 @@
-#!/usr/bin/env python
-
-"""
-This example is based on the simple forces test. The default number of
-particles is increased considerably to give perceptible time on the GPUs when
-live-checking GPU usage.
-
-The forces.c application should be built by setting the GPU preprocessor condition
-in addition to openMP GPU flags for the given system. See examples in
-../forces_app/build_forces.sh. We recommend running forces.x standalone first
-and confirm it is running on the GPU (this is given clearly in the output).
-
-An alternative variable resource generator is available (search 'var resources'
-in this script and uncomment relevant lines).
-"""
-
-import os
-import sys
-
-import numpy as np
-from forces_simf import run_forces  # Sim func from current dir
-
-from libensemble.executors import MPIExecutor
-
-# Fixed resources (one resource set per worker)
-from libensemble.gen_funcs.sampling import uniform_random_sample as gen_f
-from libensemble.libE import libE
-from libensemble.tools import add_unique_random_streams, parse_args
-
-# Uncomment for var resources (checksum will change due to rng differences)
-# from libensemble.gen_funcs.sampling import uniform_random_sample_with_variable_resources as gen_f
-
-# Parse number of workers, comms type, etc. from arguments
-nworkers, is_manager, libE_specs, _ = parse_args()
-
-# To test on system without GPUs - compile forces without -DGPU and mock GPUs with this line.
-# libE_specs["resource_info"] = {"gpus_on_node": 4}
-
-# Initialize MPI Executor instance
-exctr = MPIExecutor()
-
-# Register simulation executable with executor
-sim_app = os.path.join(os.getcwd(), "../forces_app/forces.x")
-
-if not os.path.isfile(sim_app):
-    sys.exit("forces.x not found - please build first in ../forces_app dir")
-
-exctr.register_app(full_path=sim_app, app_name="forces")
-
-# State the sim_f, inputs, outputs
-sim_specs = {
-    "sim_f": run_forces,  # sim_f, imported above
-    "in": ["x"],  # Name of input for sim_f
-    "out": [("energy", float)],  # Name, type of output from sim_f
-}
-
-# State the gen_f, inputs, outputs, additional parameters
-gen_specs = {
-    "gen_f": gen_f,  # Generator function
-    "in": [],  # Generator input
-    "out": [
-        ("x", float, (1,)),  # Name, type and size of data from gen_f
-        # ("resource_sets", int)  # Uncomment for var resources
-    ],
-    "user": {
-        "lb": np.array([50000]),  # fewest particles (changing will change checksum)
-        "ub": np.array([100000]),  # max particles (changing will change checksum)
-        "gen_batch_size": 8,
-        # "max_resource_sets": nworkers  # Uncomment for var resources
-    },
-}
-
-# Create and work inside separate per-simulation directories
-libE_specs["sim_dirs_make"] = True
-
-# Uncomment to see resource sets in libE_stats.txt - useful with var resources
-# libE_specs["stats_fmt"] = {"show_resource_sets": True}
-
-# Instruct libEnsemble to exit after this many simulations
-exit_criteria = {"sim_max": 8}  # changing will change checksum
-
-# Seed random streams for each worker, particularly for gen_f
-persis_info = add_unique_random_streams({}, nworkers + 1)
-
-# Launch libEnsemble
-H, persis_info, flag = libE(sim_specs, gen_specs, exit_criteria, persis_info=persis_info, libE_specs=libE_specs)
-
-# This is for configuration of this test (inc. lb/ub and sim_max values)
-if is_manager:
-    if exit_criteria["sim_max"] == 8:
-        chksum = np.sum(H["energy"])
-        assert np.isclose(chksum, 96288744.35136001), f"energy check sum is {chksum}"
-        print("Checksum passed")
-    else:
-        print("Run complete. A checksum has not been provided for the given sim_max")
diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu/submit_perlmutter.sh b/libensemble/tests/scaling_tests/forces/forces_gpu/submit_perlmutter.sh
deleted file mode 100644
index 78bc7cc90..000000000
--- a/libensemble/tests/scaling_tests/forces/forces_gpu/submit_perlmutter.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/bin/bash
-#SBATCH -J libE_small_test
-#SBATCH -A <myproject_g>
-#SBATCH -C gpu
-#SBATCH --time 10
-#SBATCH --nodes 2
-
-export MPICH_GPU_SUPPORT_ENABLED=1
-export SLURM_EXACT=1
-export SLURM_MEM_PER_NODE=0
-
-python run_libe_forces.py --comms local --nworkers 8

From 32ac45fbec965d79bf14df0a2bfc477ba362093f Mon Sep 17 00:00:00 2001
From: shudson <shudson@anl.gov>
Date: Wed, 6 Sep 2023 14:03:11 -0500
Subject: [PATCH 07/27] Rename forces_gpu_persis_gen to forces_gpu

---
 .../forces/{forces_gpu_persis_gen => forces_gpu}/cleanup.sh       | 0
 .../forces/{forces_gpu_persis_gen => forces_gpu}/forces_simf.py   | 0
 .../forces/{forces_gpu_persis_gen => forces_gpu}/readme.md        | 0
 .../{forces_gpu_persis_gen => forces_gpu}/run_libe_forces.py      | 0
 .../{forces_gpu_persis_gen => forces_gpu}/submit_perlmutter.sh    | 0
 5 files changed, 0 insertions(+), 0 deletions(-)
 rename libensemble/tests/scaling_tests/forces/{forces_gpu_persis_gen => forces_gpu}/cleanup.sh (100%)
 rename libensemble/tests/scaling_tests/forces/{forces_gpu_persis_gen => forces_gpu}/forces_simf.py (100%)
 rename libensemble/tests/scaling_tests/forces/{forces_gpu_persis_gen => forces_gpu}/readme.md (100%)
 rename libensemble/tests/scaling_tests/forces/{forces_gpu_persis_gen => forces_gpu}/run_libe_forces.py (100%)
 rename libensemble/tests/scaling_tests/forces/{forces_gpu_persis_gen => forces_gpu}/submit_perlmutter.sh (100%)

diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu_persis_gen/cleanup.sh b/libensemble/tests/scaling_tests/forces/forces_gpu/cleanup.sh
similarity index 100%
rename from libensemble/tests/scaling_tests/forces/forces_gpu_persis_gen/cleanup.sh
rename to libensemble/tests/scaling_tests/forces/forces_gpu/cleanup.sh
diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu_persis_gen/forces_simf.py b/libensemble/tests/scaling_tests/forces/forces_gpu/forces_simf.py
similarity index 100%
rename from libensemble/tests/scaling_tests/forces/forces_gpu_persis_gen/forces_simf.py
rename to libensemble/tests/scaling_tests/forces/forces_gpu/forces_simf.py
diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu_persis_gen/readme.md b/libensemble/tests/scaling_tests/forces/forces_gpu/readme.md
similarity index 100%
rename from libensemble/tests/scaling_tests/forces/forces_gpu_persis_gen/readme.md
rename to libensemble/tests/scaling_tests/forces/forces_gpu/readme.md
diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu_persis_gen/run_libe_forces.py b/libensemble/tests/scaling_tests/forces/forces_gpu/run_libe_forces.py
similarity index 100%
rename from libensemble/tests/scaling_tests/forces/forces_gpu_persis_gen/run_libe_forces.py
rename to libensemble/tests/scaling_tests/forces/forces_gpu/run_libe_forces.py
diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu_persis_gen/submit_perlmutter.sh b/libensemble/tests/scaling_tests/forces/forces_gpu/submit_perlmutter.sh
similarity index 100%
rename from libensemble/tests/scaling_tests/forces/forces_gpu_persis_gen/submit_perlmutter.sh
rename to libensemble/tests/scaling_tests/forces/forces_gpu/submit_perlmutter.sh

From 1b4849eaa71828a201b11b1c0fdd8470260b5ea9 Mon Sep 17 00:00:00 2001
From: shudson <shudson@anl.gov>
Date: Wed, 6 Sep 2023 14:14:04 -0500
Subject: [PATCH 08/27] Improve dir exists warning

---
 libensemble/manager.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libensemble/manager.py b/libensemble/manager.py
index e06160ca8..4c362f7bf 100644
--- a/libensemble/manager.py
+++ b/libensemble/manager.py
@@ -226,6 +226,7 @@ def __init__(
             raise ManagerException(
                 "Manager errored on initialization",
                 "Ensemble directory already existed and wasn't empty.",
+                "To reuse ensemble dir, set libE_specs['reuse_output_dir'] = True",
                 e,
             )
 

From 3f959ae9d735b158db200c36b78ef36813a7c733 Mon Sep 17 00:00:00 2001
From: shudson <shudson@anl.gov>
Date: Wed, 6 Sep 2023 14:19:14 -0500
Subject: [PATCH 09/27] Remove var resources lines in forces_gpu and clean
 scripts

---
 .../scaling_tests/forces/forces_gpu/forces_simf.py  | 12 +++++-------
 .../forces/forces_gpu/run_libe_forces.py            | 13 ++-----------
 .../forces/forces_simple/forces_simf.py             |  9 ++++-----
 3 files changed, 11 insertions(+), 23 deletions(-)

diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu/forces_simf.py b/libensemble/tests/scaling_tests/forces/forces_gpu/forces_simf.py
index cfdb126e0..c609c26af 100644
--- a/libensemble/tests/scaling_tests/forces/forces_gpu/forces_simf.py
+++ b/libensemble/tests/scaling_tests/forces/forces_gpu/forces_simf.py
@@ -1,6 +1,6 @@
 import numpy as np
 
-# To retrieve our MPI Executor and resources instances
+# To retrieve our MPI Executor
 from libensemble.executors.executor import Executor
 
 # Optional status codes to display in libE_stats.txt for each gen or sim
@@ -24,14 +24,13 @@ def run_forces(H, persis_info, sim_specs, libE_info):
     # app arguments: num particles, timesteps, also using num particles as seed
     args = particles + " " + str(10) + " " + particles
 
-    # Retrieve our MPI Executor instance and resources
+    # Retrieve our MPI Executor
     exctr = Executor.executor
 
-    # Submit our forces app for execution. Block until the task starts.
+    # Submit our forces app for execution.
     task = exctr.submit(
         app_name="forces",
         app_args=args,
-        # num_procs = 1,
         auto_assign_gpus=True,
         match_procs_to_gpus=True,
     )
@@ -54,9 +53,8 @@ def run_forces(H, persis_info, sim_specs, libE_info):
         final_energy = np.nan
         calc_status = TASK_FAILED
 
-    # Define our output array,  populate with energy reading
-    outspecs = sim_specs["out"]
-    output = np.zeros(1, dtype=outspecs)
+    # Define our output array, populate with energy reading
+    output = np.zeros(1, dtype=sim_specs["out"])
     output["energy"] = final_energy
 
     # Return final information to worker, for reporting to manager
diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu/run_libe_forces.py b/libensemble/tests/scaling_tests/forces/forces_gpu/run_libe_forces.py
index db16b7367..bf1c0e919 100644
--- a/libensemble/tests/scaling_tests/forces/forces_gpu/run_libe_forces.py
+++ b/libensemble/tests/scaling_tests/forces/forces_gpu/run_libe_forces.py
@@ -27,14 +27,9 @@
 from libensemble import Ensemble
 from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f
 from libensemble.executors import MPIExecutor
-from libensemble.tools import add_unique_random_streams, parse_args
-from libensemble.specs import AllocSpecs, ExitCriteria, GenSpecs, LibeSpecs, SimSpecs
-
-# Fixed resources (one resource set per worker) - persistent gen
 from libensemble.gen_funcs.persistent_sampling import persistent_uniform as gen_f
+from libensemble.specs import AllocSpecs, ExitCriteria, GenSpecs, LibeSpecs, SimSpecs
 
-# Uncomment for var resources (checksum will change due to rng differences)
-# from libensemble.gen_funcs.persistent_sampling_var_resources import uniform_sample as gen_f
 
 if __name__ == "__main__":
 
@@ -56,7 +51,6 @@
         num_resource_sets=nsim_workers,
         sim_dirs_make=True,
         # resource_info = {"gpus_on_node": 4}  # for mocking GPUs
-        # stats_fmt = {"show_resource_sets": True}  # see resource sets in libE_stats.txt
     )
 
     ensemble.sim_specs = SimSpecs(
@@ -69,14 +63,11 @@
         gen_f=gen_f,
         inputs=[],  # No input when start persistent generator
         persis_in=["sim_id"],  # Return sim_ids of evaluated points to generator
-        out=[("x", float, (1,)),
-            # ("resource_sets", int)  # Uncomment for var resources
-             ],
+        out=[("x", float, (1,))],
         user={
             "initial_batch_size": nsim_workers,
             "lb": np.array([50000]),  # min particles
             "ub": np.array([100000]),  # max particles
-            # "max_resource_sets": nsim_workers  # Uncomment for var resources
         },
     )
 
diff --git a/libensemble/tests/scaling_tests/forces/forces_simple/forces_simf.py b/libensemble/tests/scaling_tests/forces/forces_simple/forces_simf.py
index 3c75a356b..66e8a4eb6 100644
--- a/libensemble/tests/scaling_tests/forces/forces_simple/forces_simf.py
+++ b/libensemble/tests/scaling_tests/forces/forces_simple/forces_simf.py
@@ -1,6 +1,6 @@
 import numpy as np
 
-# To retrieve our MPI Executor instance
+# To retrieve our MPI Executor
 from libensemble.executors.executor import Executor
 
 # Optional status codes to display in libE_stats.txt for each gen or sim
@@ -16,10 +16,10 @@ def run_forces(H, persis_info, sim_specs, libE_info):
     # app arguments: num particles, timesteps, also using num particles as seed
     args = particles + " " + str(10) + " " + particles
 
-    # Retrieve our MPI Executor instance
+    # Retrieve our MPI Executor
     exctr = Executor.executor
 
-    # Submit our forces app for execution. Block until the task starts.
+    # Submit our forces app for execution.
     task = exctr.submit(app_name="forces", app_args=args)
 
     # Block until the task finishes
@@ -38,8 +38,7 @@ def run_forces(H, persis_info, sim_specs, libE_info):
         calc_status = TASK_FAILED
 
     # Define our output array,  populate with energy reading
-    outspecs = sim_specs["out"]
-    output = np.zeros(1, dtype=outspecs)
+    output = np.zeros(1, dtype=sim_specs["out"])
     output["energy"] = final_energy
 
     # Return final information to worker, for reporting to manager

From b50343ce9a8c5e3c44046004990d73b193b61a1d Mon Sep 17 00:00:00 2001
From: shudson <shudson@anl.gov>
Date: Wed, 6 Sep 2023 14:30:04 -0500
Subject: [PATCH 10/27] Update forces_gpu readme and submission script

---
 .../tests/scaling_tests/forces/forces_gpu/readme.md   | 11 +++++++----
 .../forces/forces_gpu/submit_perlmutter.sh            |  7 +++----
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu/readme.md b/libensemble/tests/scaling_tests/forces/forces_gpu/readme.md
index 96ba90428..7099ad971 100644
--- a/libensemble/tests/scaling_tests/forces/forces_gpu/readme.md
+++ b/libensemble/tests/scaling_tests/forces/forces_gpu/readme.md
@@ -1,7 +1,6 @@
 ## Tutorial
 
-This example is similar to that explained in the tutorial **Executor - Assign GPUs**,
-but using a persistent generator.
+This example is explained in the tutorial **Executor - Assign GPUs**.
 
 https://libensemble.readthedocs.io/en/develop/tutorials/forces_gpu_tutorial.html
 
@@ -18,11 +17,15 @@ Then return here and run:
 
     python run_libe_forces.py --comms local --nworkers 5
 
+This will run libEnsemble with five workers; one for the persistent generator, and
+four for forces simulations (so four GPUs are required).
+
 By default, each run of forces will use one CPU and one GPU. The `forces.c` code can also
 be MPI parallel and will use one GPU for each CPU rank, assuming an even split of ranks
-across nodes. The extra worker is used for the persistent generator.
+across nodes. There must be enough GPUs per simulation worker (for a more dynamic example,
+see `forces_gpu_var_resources`).
 
-## Running test run_libe_forces.py
+## Detailed instructions
 
 Naive Electrostatics Code Test
 
diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu/submit_perlmutter.sh b/libensemble/tests/scaling_tests/forces/forces_gpu/submit_perlmutter.sh
index 73b1fc39e..f7b74739b 100644
--- a/libensemble/tests/scaling_tests/forces/forces_gpu/submit_perlmutter.sh
+++ b/libensemble/tests/scaling_tests/forces/forces_gpu/submit_perlmutter.sh
@@ -1,12 +1,11 @@
 #!/bin/bash
 #SBATCH -J libE_small_test
-#SBATCH -A m4272_g
+#SBATCH -A <myproject>
 #SBATCH -C gpu
 #SBATCH --time 10
 #SBATCH --nodes 1
 
-# export MPICH_GPU_SUPPORT_ENABLED=1
-# export SLURM_EXACT=1
-# export SLURM_MEM_PER_NODE=0
+export MPICH_GPU_SUPPORT_ENABLED=1
+export SLURM_EXACT=1
 
 python run_libe_forces.py --comms local --nworkers 5

From 24cd717c5940f718fbd6379a378f4548ceae8f72 Mon Sep 17 00:00:00 2001
From: shudson <shudson@anl.gov>
Date: Wed, 6 Sep 2023 14:49:51 -0500
Subject: [PATCH 11/27] Add forces_gpu_var_resources test

---
 .../forces_gpu_var_resources/cleanup.sh       |   1 +
 .../forces_gpu_var_resources/forces_simf.py   |  53 +++++++++
 .../forces/forces_gpu_var_resources/readme.md |  29 +++++
 .../run_libe_forces.py                        | 103 ++++++++++++++++++
 4 files changed, 186 insertions(+)
 create mode 100755 libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/cleanup.sh
 create mode 100644 libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/forces_simf.py
 create mode 100644 libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/readme.md
 create mode 100644 libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/run_libe_forces.py

diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/cleanup.sh b/libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/cleanup.sh
new file mode 100755
index 000000000..eaaa23635
--- /dev/null
+++ b/libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/cleanup.sh
@@ -0,0 +1 @@
+rm -r ensemble *.npy *.pickle ensemble.log lib*.txt
diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/forces_simf.py b/libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/forces_simf.py
new file mode 100644
index 000000000..433977297
--- /dev/null
+++ b/libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/forces_simf.py
@@ -0,0 +1,53 @@
+import numpy as np
+
+# To retrieve our MPI Executor
+from libensemble.executors.executor import Executor
+
+# Optional status codes to display in libE_stats.txt for each gen or sim
+from libensemble.message_numbers import TASK_FAILED, WORKER_DONE
+
+# Optional - to print GPU settings
+from libensemble.tools.test_support import check_gpu_setting
+
+
+def run_forces(H, persis_info, sim_specs, libE_info):
+    """Launches the forces MPI app and auto-assigns ranks and GPU resources
+    (based on generator output).
+    """
+
+    calc_status = 0
+
+    # Parse out num particles, from generator function
+    particles = str(int(H["x"][0][0]))
+
+    # app arguments: num particles, timesteps, also using num particles as seed
+    args = particles + " " + str(10) + " " + particles
+
+    # Retrieve our MPI Executor
+    exctr = Executor.executor
+
+    # Submit our forces app for execution.
+    task = exctr.submit(app_name="forces", app_args=args)
+
+    # Block until the task finishes
+    task.wait()
+
+    # Optional - prints GPU assignment (method and numbers)
+    check_gpu_setting(task, assert_setting=False, print_setting=True)
+
+    # Try loading final energy reading, set the sim's status
+    statfile = "forces.stat"
+    try:
+        data = np.loadtxt(statfile)
+        final_energy = data[-1]
+        calc_status = WORKER_DONE
+    except Exception:
+        final_energy = np.nan
+        calc_status = TASK_FAILED
+
+    # Define our output array, populate with energy reading
+    output = np.zeros(1, dtype=sim_specs["out"])
+    output["energy"] = final_energy
+
+    # Return final information to worker, for reporting to manager
+    return output, persis_info, calc_status
diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/readme.md b/libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/readme.md
new file mode 100644
index 000000000..86d7141ef
--- /dev/null
+++ b/libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/readme.md
@@ -0,0 +1,29 @@
+## Tutorial
+
+This example referred to in the tutorial **Executor - Assign GPUs**.
+
+When the generator creates parameters for each simulation, it sets a number
+of GPUs required for the simulation. Resources are dynamically assigned to
+the simulation workers.
+
+https://libensemble.readthedocs.io/en/develop/tutorials/forces_gpu_tutorial.html
+
+## QuickStart
+
+Go to `forces_app` directory:
+
+    cd ../forces_app
+
+Compile **forces.x** using one of the GPU build lines in `build_forces.sh` or similar
+for your platform (these will include -DGPU)
+
+Then return here and run:
+
+    python run_libe_forces.py --comms local --nworkers 5
+
+This will run libEnsemble with five workers; one for the persistent generator, and
+four for forces simulations (so four GPUs are required).
+
+## Detailed instructions
+
+See ../forces_gpu.
diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/run_libe_forces.py b/libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/run_libe_forces.py
new file mode 100644
index 000000000..96f8ab1c7
--- /dev/null
+++ b/libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/run_libe_forces.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python
+
+"""
+This example is similar to the forces_gpu test.
+
+The forces.c application should be built by setting the GPU preprocessor condition
+(usually -DGPU) in addition to openMP GPU flags for the given system. See examples
+in ../forces_app/build_forces.sh. We recommend running forces.x standalone first
+and confirm it is running on the GPU (this is given clearly in the output).
+
+A number of GPUs is requested based on the number of particles (randomly chosen
+from the range for each simulation). For simplicitly, the number of GPUs requested
+is based on a linear split of the range (lb to ub), rather than absolute particle
+count.
+
+To mock on a non-GPU system, uncomment the resource_info line in libE_specs. You
+will compile forces without -DGPU option. It is recommended that the lb/ub for
+particle counts are reduced for CPU performance.
+"""
+
+import os
+import sys
+
+import numpy as np
+from forces_simf import run_forces  # Sim func from current dir
+
+from libensemble import Ensemble
+from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f
+from libensemble.executors import MPIExecutor
+from libensemble.gen_funcs.persistent_sampling_var_resources import uniform_sample_with_var_gpus as gen_f
+from libensemble.specs import AllocSpecs, ExitCriteria, GenSpecs, LibeSpecs, SimSpecs
+
+
+if __name__ == "__main__":
+
+    # Initialize MPI Executor
+    exctr = MPIExecutor()
+    sim_app = os.path.join(os.getcwd(), "../forces_app/forces.x")
+
+    if not os.path.isfile(sim_app):
+        sys.exit("forces.x not found - please build first in ../forces_app dir")
+
+    exctr.register_app(full_path=sim_app, app_name="forces")
+
+    # Parse number of workers, comms type, etc. from arguments
+    ensemble = Ensemble(parse_args=True)
+    nsim_workers = ensemble.nworkers - 1  # One worker is for persistent generator
+
+    # Persistent gen does not need resources
+    ensemble.libE_specs = LibeSpecs(
+        num_resource_sets=nsim_workers,
+        sim_dirs_make=True,
+        stats_fmt={"show_resource_sets": True},  # see resource sets in libE_stats.txt
+        # resource_info = {"gpus_on_node": 4},  # for mocking GPUs
+    )
+
+    ensemble.sim_specs = SimSpecs(
+        sim_f=run_forces,
+        inputs=["x"],
+        out=[("energy", float)],
+    )
+
+    ensemble.gen_specs = GenSpecs(
+        gen_f=gen_f,
+        inputs=[],  # No input when start persistent generator
+        persis_in=["sim_id"],  # Return sim_ids of evaluated points to generator
+        out=[
+            ("x", float, (1,)),
+            ("num_gpus", int),  # num_gpus auto given to sim when use MPIExecutor.
+        ],
+        user={
+            "initial_batch_size": nsim_workers,
+            "lb": np.array([50000]),  # min particles
+            "ub": np.array([100000]),  # max particles
+            "max_gpus": nsim_workers,
+        },
+    )
+
+    # Starts one persistent generator. Simulated values are returned in batch.
+    ensemble.alloc_specs = AllocSpecs(
+        alloc_f=alloc_f,
+        user={
+            "async_return": False,  # False causes batch returns
+        },
+    )
+
+    # Instruct libEnsemble to exit after this many simulations
+    ensemble.exit_criteria = ExitCriteria(sim_max=8)
+
+    # Seed random streams for each worker, particularly for gen_f
+    ensemble.add_random_streams()
+
+    # Run ensemble
+    ensemble.run()
+
+    if ensemble.is_manager:
+        # Note, this will change if change sim_max, nworkers, lb/ub etc...
+        if ensemble.exit_criteria.sim_max == 8:
+            chksum = np.sum(ensemble.H["energy"])
+            assert np.isclose(chksum, 96288744.35136001), f"energy check sum is {chksum}"
+            print("Checksum passed")
+        else:
+            print("Run complete. A checksum has not been provided for the given sim_max")

From a5ea2e20fad9e9f91e494a3b49bf33b4dd50d257 Mon Sep 17 00:00:00 2001
From: shudson <shudson@anl.gov>
Date: Wed, 6 Sep 2023 14:50:40 -0500
Subject: [PATCH 12/27] Make forces scripts consistent

---
 .../tests/scaling_tests/forces/forces_gpu/forces_simf.py    | 4 +---
 .../scaling_tests/forces/forces_gpu/run_libe_forces.py      | 3 ---
 .../tests/scaling_tests/forces/forces_simple/forces_simf.py | 6 ++----
 3 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu/forces_simf.py b/libensemble/tests/scaling_tests/forces/forces_gpu/forces_simf.py
index c609c26af..be84a5acb 100644
--- a/libensemble/tests/scaling_tests/forces/forces_gpu/forces_simf.py
+++ b/libensemble/tests/scaling_tests/forces/forces_gpu/forces_simf.py
@@ -41,10 +41,8 @@ def run_forces(H, persis_info, sim_specs, libE_info):
     # Optional - prints GPU assignment (method and numbers)
     check_gpu_setting(task, assert_setting=False, print_setting=True)
 
-    # Stat file to check for bad runs
-    statfile = "forces.stat"
-
     # Try loading final energy reading, set the sim's status
+    statfile = "forces.stat"
     try:
         data = np.loadtxt(statfile)
         final_energy = data[-1]
diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu/run_libe_forces.py b/libensemble/tests/scaling_tests/forces/forces_gpu/run_libe_forces.py
index bf1c0e919..f02ab15be 100644
--- a/libensemble/tests/scaling_tests/forces/forces_gpu/run_libe_forces.py
+++ b/libensemble/tests/scaling_tests/forces/forces_gpu/run_libe_forces.py
@@ -10,9 +10,6 @@
 in ../forces_app/build_forces.sh. We recommend running forces.x standalone first
 and confirm it is running on the GPU (this is given clearly in the output).
 
-An alternative variable resource generator is available (search 'var resources'
-in this script and uncomment relevant lines).
-
 To mock on a non-GPU system, uncomment the resource_info line in libE_specs. You
 will compile forces without -DGPU option. It is recommended that the lb/ub for
 particle counts are reduced for CPU performance.
diff --git a/libensemble/tests/scaling_tests/forces/forces_simple/forces_simf.py b/libensemble/tests/scaling_tests/forces/forces_simple/forces_simf.py
index 66e8a4eb6..ff3231587 100644
--- a/libensemble/tests/scaling_tests/forces/forces_simple/forces_simf.py
+++ b/libensemble/tests/scaling_tests/forces/forces_simple/forces_simf.py
@@ -25,10 +25,8 @@ def run_forces(H, persis_info, sim_specs, libE_info):
     # Block until the task finishes
     task.wait()
 
-    # Stat file to check for bad runs
-    statfile = "forces.stat"
-
     # Try loading final energy reading, set the sim's status
+    statfile = "forces.stat"
     try:
         data = np.loadtxt(statfile)
         final_energy = data[-1]
@@ -37,7 +35,7 @@ def run_forces(H, persis_info, sim_specs, libE_info):
         final_energy = np.nan
         calc_status = TASK_FAILED
 
-    # Define our output array,  populate with energy reading
+    # Define our output array, populate with energy reading
     output = np.zeros(1, dtype=sim_specs["out"])
     output["energy"] = final_energy
 

From 99ead25b9df36a26df8f6230cba8aa49c105d24d Mon Sep 17 00:00:00 2001
From: shudson <shudson@anl.gov>
Date: Thu, 7 Sep 2023 20:20:41 -0500
Subject: [PATCH 13/27] Fix up forces multi-task

---
 .../persistent_sampling_var_resources.py      |  5 ++-
 .../forces/forces_multi_task/forces_simf.py   |  8 ++--
 .../forces_multi_task/run_libe_forces.py      | 44 +++++++++----------
 .../forces_multi_task/submit_perlmutter.sh    |  7 ++-
 libensemble/tools/test_support.py             |  7 ++-
 5 files changed, 37 insertions(+), 34 deletions(-)

diff --git a/libensemble/gen_funcs/persistent_sampling_var_resources.py b/libensemble/gen_funcs/persistent_sampling_var_resources.py
index 60f0a199d..f9d236bc0 100644
--- a/libensemble/gen_funcs/persistent_sampling_var_resources.py
+++ b/libensemble/gen_funcs/persistent_sampling_var_resources.py
@@ -182,11 +182,14 @@ def uniform_sample_diff_simulations(_, persis_info, gen_specs, libE_info):
     while tag not in [STOP_TAG, PERSIS_STOP]:
         H_o = np.zeros(b, dtype=gen_specs["out"])
         H_o["x"] = rng.uniform(lb, ub, (b, n))
+
         nprocs = rng.integers(1, gen_specs["user"]["max_procs"] + 1, b)
         use_gpus = rng.choice([True, False], b)
         H_o["num_procs"] = nprocs
         H_o["num_gpus"] = np.where(use_gpus, nprocs, 0)
-        print(f"GEN created {b} sims requiring {nprocs} procs. Use GPUs {use_gpus}", flush=True)
+        H_o["app_type"] = np.where(use_gpus, "gpu_app", "cpu_app")
+
+        print(f"\nGEN created {b} sims requiring {nprocs} procs. Use GPUs {use_gpus}", flush=True)
 
         tag, Work, calc_in = ps.send_recv(H_o)
         if hasattr(calc_in, "__len__"):
diff --git a/libensemble/tests/scaling_tests/forces/forces_multi_task/forces_simf.py b/libensemble/tests/scaling_tests/forces/forces_multi_task/forces_simf.py
index 8f8b10851..a6a146aca 100644
--- a/libensemble/tests/scaling_tests/forces/forces_multi_task/forces_simf.py
+++ b/libensemble/tests/scaling_tests/forces/forces_multi_task/forces_simf.py
@@ -27,9 +27,11 @@ def run_forces(H, persis_info, sim_specs, libE_info):
     # Retrieve our MPI Executor instance and resources
     exctr = Executor.executor
 
-    # Submit our forces app for execution. Block until the task starts.
+    app_type = H["app_type"][0].decode()
+
+    # Submit our forces app for execution.
     task = exctr.submit(
-        app_name="forces_gpu",
+        app_name=app_type,
         app_args=args,
     )
 
@@ -37,7 +39,7 @@ def run_forces(H, persis_info, sim_specs, libE_info):
     task.wait()
 
     # Optional - prints GPU assignment (method and numbers)
-    check_gpu_setting(task, assert_setting=False, print_setting=True)
+    check_gpu_setting(task, assert_setting=False, print_setting=True, desc=app_type)
 
     # Stat file to check for bad runs
     statfile = "forces.stat"
diff --git a/libensemble/tests/scaling_tests/forces/forces_multi_task/run_libe_forces.py b/libensemble/tests/scaling_tests/forces/forces_multi_task/run_libe_forces.py
index ba20944b9..7e92c1409 100644
--- a/libensemble/tests/scaling_tests/forces/forces_multi_task/run_libe_forces.py
+++ b/libensemble/tests/scaling_tests/forces/forces_multi_task/run_libe_forces.py
@@ -1,17 +1,16 @@
 #!/usr/bin/env python
 
 """
-This example is based on the simple forces test. The default number of
-particles is increased considerably to give perceptible time on the GPUs when
-live-checking GPU usage.
+This example runs two difference applications, one that uses only CPUs and one
+that uses GPUs. Both uses a variable number of processors. The GPU application
+uses one GPU per processor. As the generator creates simulations, it randomly
+assigns between one and max_proc processors to each simulation, and also randomly
+assigns which application is to be run.
 
-The forces.c application should be built by setting the GPU preprocessor condition
-in addition to openMP GPU flags for the given system. See examples in
-../forces_app/build_forces.sh. We recommend running forces.x standalone first
-and confirm it is running on the GPU (this is given clearly in the output).
+The forces.c application should be compiled for the CPU to `forces_cpu.x`, and
+for the GPU (setting the GPU preprocessor condition) to `forces_gpu.x`.
 
-An alternative variable resource generator is available (search 'var resources'
-in this script and uncomment relevant lines).
+For compile lines, see examples in ../forces_app/build_forces.sh.
 """
 
 import os
@@ -26,9 +25,6 @@
 from libensemble.libE import libE
 from libensemble.tools import add_unique_random_streams, parse_args
 
-# Fixed resources (one resource set per worker)
-# from libensemble.gen_funcs.sampling import uniform_random_sample as gen_f
-
 
 # Parse number of workers, comms type, etc. from arguments
 nworkers, is_manager, libE_specs, _ = parse_args()
@@ -43,21 +39,21 @@
 exctr = MPIExecutor()
 
 # Register simulation executable with executor
-sim_app1 = os.path.join(os.getcwd(), "../forces_app/forces_cpu.x")
-sim_app2 = os.path.join(os.getcwd(), "../forces_app/forces_gpu.x")
+cpu_app = os.path.join(os.getcwd(), "../forces_app/forces_cpu.x")
+gpu_app = os.path.join(os.getcwd(), "../forces_app/forces_gpu.x")
 
-if not os.path.isfile(sim_app1):
-    sys.exit(f"{sim_app1} not found - please build first in ../forces_app dir")
-if not os.path.isfile(sim_app2):
-    sys.exit(f"{sim_app2} not found - please build first in ../forces_app dir")
+if not os.path.isfile(cpu_app):
+    sys.exit(f"{cpu_app} not found - please build first in ../forces_app dir")
+if not os.path.isfile(gpu_app):
+    sys.exit(f"{gpu_app} not found - please build first in ../forces_app dir")
 
-exctr.register_app(full_path=sim_app1, app_name="forces_cpu")
-exctr.register_app(full_path=sim_app2, app_name="forces_gpu")
+exctr.register_app(full_path=cpu_app, app_name="cpu_app")
+exctr.register_app(full_path=gpu_app, app_name="gpu_app")
 
 # State the sim_f, inputs, outputs
 sim_specs = {
     "sim_f": run_forces,  # sim_f, imported above
-    "in": ["x"],  # Name of input for sim_f
+    "in": ["x", "app_type"],  # Name of input for sim_f
     "out": [("energy", float)],  # Name, type of output from sim_f
 }
 
@@ -70,6 +66,7 @@
         ("x", float, (1,)),  # Name, type and size of data from gen_f
         ("num_procs", int),
         ("num_gpus", int),
+        ("app_type", 'S10'),
     ],
     "user": {
         "lb": np.array([5000]),  # fewest particles (changing will change checksum)
@@ -77,7 +74,6 @@
         "initial_batch_size": nsim_workers,
         "max_procs": (nsim_workers) // 2,  # Any sim created can req. 1 worker up to max
         "multi_task": True,
-        # "max_resource_sets": nworkers  # Uncomment for var resources
     },
 }
 
@@ -92,11 +88,11 @@
 # Create and work inside separate per-simulation directories
 libE_specs["sim_dirs_make"] = True
 
-# Uncomment to see resource sets in libE_stats.txt - useful with var resources
+# Uncomment to see resource sets in libE_stats.txt
 # libE_specs["stats_fmt"] = {"show_resource_sets": True}
 
 # Instruct libEnsemble to exit after this many simulations
-exit_criteria = {"sim_max": nsim_workers * 2}  # changing will change checksum
+exit_criteria = {"sim_max": nsim_workers * 2}
 
 # Seed random streams for each worker, particularly for gen_f
 persis_info = add_unique_random_streams({}, nworkers + 1)
diff --git a/libensemble/tests/scaling_tests/forces/forces_multi_task/submit_perlmutter.sh b/libensemble/tests/scaling_tests/forces/forces_multi_task/submit_perlmutter.sh
index 73b1fc39e..f7b74739b 100644
--- a/libensemble/tests/scaling_tests/forces/forces_multi_task/submit_perlmutter.sh
+++ b/libensemble/tests/scaling_tests/forces/forces_multi_task/submit_perlmutter.sh
@@ -1,12 +1,11 @@
 #!/bin/bash
 #SBATCH -J libE_small_test
-#SBATCH -A m4272_g
+#SBATCH -A <myproject>
 #SBATCH -C gpu
 #SBATCH --time 10
 #SBATCH --nodes 1
 
-# export MPICH_GPU_SUPPORT_ENABLED=1
-# export SLURM_EXACT=1
-# export SLURM_MEM_PER_NODE=0
+export MPICH_GPU_SUPPORT_ENABLED=1
+export SLURM_EXACT=1
 
 python run_libe_forces.py --comms local --nworkers 5
diff --git a/libensemble/tools/test_support.py b/libensemble/tools/test_support.py
index f883e791b..db0451a4c 100644
--- a/libensemble/tools/test_support.py
+++ b/libensemble/tools/test_support.py
@@ -84,7 +84,7 @@ def check_mpi_runner(task, exp, print_setting=False):
     assert ppn_option == runner_info["ppn"]
 
 
-def check_gpu_setting(task, assert_setting=True, print_setting=False, resources=None):
+def check_gpu_setting(task, assert_setting=True, print_setting=False, resources=None, desc=""):
     """Checks GPU run lines
 
     Note that this will check based platform_info or defaults for the MPI runner
@@ -214,8 +214,11 @@ def check_gpu_setting(task, assert_setting=True, print_setting=False, resources=
     else:
         addon = f"(procs {num_procs}, per node {ppn})"
 
+    if desc:
+        desc += " "
+
     if print_setting:
-        print(f"Worker {task.workerID}: GPU setting ({stype}): {gpu_setting} {addon}")
+        print(f"Worker {task.workerID}: {desc}GPU setting ({stype}): {gpu_setting} {addon}")
 
     if assert_setting:
         assert (

From ec9dcb30dea3669fe6d4e8d670d56ff17d71d5c2 Mon Sep 17 00:00:00 2001
From: shudson <shudson@anl.gov>
Date: Fri, 8 Sep 2023 12:13:05 -0500
Subject: [PATCH 14/27] Convert multi-task to OO interface

---
 .../forces_multi_task/run_libe_forces.py      | 162 +++++++++---------
 1 file changed, 83 insertions(+), 79 deletions(-)

diff --git a/libensemble/tests/scaling_tests/forces/forces_multi_task/run_libe_forces.py b/libensemble/tests/scaling_tests/forces/forces_multi_task/run_libe_forces.py
index 7e92c1409..5523318b6 100644
--- a/libensemble/tests/scaling_tests/forces/forces_multi_task/run_libe_forces.py
+++ b/libensemble/tests/scaling_tests/forces/forces_multi_task/run_libe_forces.py
@@ -19,85 +19,89 @@
 import numpy as np
 from forces_simf import run_forces  # Sim func from current dir
 
+from libensemble import Ensemble
 from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f
 from libensemble.executors import MPIExecutor
 from libensemble.gen_funcs.persistent_sampling_var_resources import uniform_sample_diff_simulations as gen_f
-from libensemble.libE import libE
-from libensemble.tools import add_unique_random_streams, parse_args
-
-
-# Parse number of workers, comms type, etc. from arguments
-nworkers, is_manager, libE_specs, _ = parse_args()
-
-nsim_workers = nworkers - 1
-libE_specs["num_resource_sets"] = nsim_workers  # Persistent gen does not need resources
-
-# To test on system without GPUs - compile forces without -DGPU and mock GPUs with this line.
-# libE_specs["resource_info"] = {"gpus_on_node": 4}
-
-# Initialize MPI Executor instance
-exctr = MPIExecutor()
-
-# Register simulation executable with executor
-cpu_app = os.path.join(os.getcwd(), "../forces_app/forces_cpu.x")
-gpu_app = os.path.join(os.getcwd(), "../forces_app/forces_gpu.x")
-
-if not os.path.isfile(cpu_app):
-    sys.exit(f"{cpu_app} not found - please build first in ../forces_app dir")
-if not os.path.isfile(gpu_app):
-    sys.exit(f"{gpu_app} not found - please build first in ../forces_app dir")
-
-exctr.register_app(full_path=cpu_app, app_name="cpu_app")
-exctr.register_app(full_path=gpu_app, app_name="gpu_app")
-
-# State the sim_f, inputs, outputs
-sim_specs = {
-    "sim_f": run_forces,  # sim_f, imported above
-    "in": ["x", "app_type"],  # Name of input for sim_f
-    "out": [("energy", float)],  # Name, type of output from sim_f
-}
-
-# State the gen_f, inputs, outputs, additional parameters
-gen_specs = {
-    "gen_f": gen_f,  # Generator function
-    "in": [],  # Generator input
-    "persis_in": ["sim_id"],  # Just send something back to gen to get number of new points.
-    "out": [
-        ("x", float, (1,)),  # Name, type and size of data from gen_f
-        ("num_procs", int),
-        ("num_gpus", int),
-        ("app_type", 'S10'),
-    ],
-    "user": {
-        "lb": np.array([5000]),  # fewest particles (changing will change checksum)
-        "ub": np.array([10000]),  # max particles (changing will change checksum)
-        "initial_batch_size": nsim_workers,
-        "max_procs": (nsim_workers) // 2,  # Any sim created can req. 1 worker up to max
-        "multi_task": True,
-    },
-}
-
-alloc_specs = {
-    "alloc_f": alloc_f,
-    "user": {
-        "give_all_with_same_priority": False,
-        "async_return": False,  # False causes batch returns
-    },
-}
-
-# Create and work inside separate per-simulation directories
-libE_specs["sim_dirs_make"] = True
-
-# Uncomment to see resource sets in libE_stats.txt
-# libE_specs["stats_fmt"] = {"show_resource_sets": True}
-
-# Instruct libEnsemble to exit after this many simulations
-exit_criteria = {"sim_max": nsim_workers * 2}
-
-# Seed random streams for each worker, particularly for gen_f
-persis_info = add_unique_random_streams({}, nworkers + 1)
-
-# Launch libEnsemble
-H, persis_info, flag = libE(
-    sim_specs, gen_specs, exit_criteria, persis_info=persis_info, alloc_specs=alloc_specs, libE_specs=libE_specs
-)
+from libensemble.specs import AllocSpecs, ExitCriteria, GenSpecs, LibeSpecs, SimSpecs
+
+
+if __name__ == "__main__":
+
+    # Initialize MPI Executor instance
+    exctr = MPIExecutor()
+
+    # Register simulation executable with executor
+    cpu_app = os.path.join(os.getcwd(), "../forces_app/forces_cpu.x")
+    gpu_app = os.path.join(os.getcwd(), "../forces_app/forces_gpu.x")
+
+    if not os.path.isfile(cpu_app):
+        sys.exit(f"{cpu_app} not found - please build first in ../forces_app dir")
+    if not os.path.isfile(gpu_app):
+        sys.exit(f"{gpu_app} not found - please build first in ../forces_app dir")
+
+    exctr.register_app(full_path=cpu_app, app_name="cpu_app")
+    exctr.register_app(full_path=gpu_app, app_name="gpu_app")
+
+    # Parse number of workers, comms type, etc. from arguments
+    ensemble = Ensemble(parse_args=True)
+    nsim_workers = ensemble.nworkers - 1  # One worker is for persistent generator
+
+    # Persistent gen does not need resources
+    ensemble.libE_specs = LibeSpecs(
+        num_resource_sets=nsim_workers,
+        sim_dirs_make=True,
+        stats_fmt={"show_resource_sets": True},  # see resource sets in libE_stats.txt
+        # resource_info = {"gpus_on_node": 4},  # for mocking GPUs
+    )
+
+    ensemble.sim_specs = SimSpecs(
+        sim_f=run_forces,
+        inputs=["x", "app_type"],
+        out=[("energy", float)],
+    )
+
+    ensemble.gen_specs = GenSpecs(
+        gen_f=gen_f,
+        inputs=[],  # No input when start persistent generator
+        persis_in=["sim_id"],  # Return sim_ids of evaluated points to generator
+        out=[
+            ("x", float, (1,)),
+            ("num_procs", int),  # num_procs auto given to sim when use MPIExecutor
+            ("num_gpus", int),  # num_gpus auto given to sim when use MPIExecutor
+            ("app_type", 'S10'), # select app type (cpu_app or gpu_app)
+        ],
+        user={
+            "initial_batch_size": nsim_workers,
+            "lb": np.array([5000]),  # min particles
+            "ub": np.array([10000]),  # max particles
+            "max_procs": (nsim_workers) // 2,  # Any sim created can req. 1 worker up to max
+        },
+    )
+
+    # Starts one persistent generator. Simulated values are returned in batch.
+    ensemble.alloc_specs = AllocSpecs(
+        alloc_f=alloc_f,
+        user={
+            "async_return": False,  # False causes batch returns
+        },
+    )
+
+    # Instruct libEnsemble to exit after this many simulations
+    ensemble.exit_criteria = ExitCriteria(sim_max=nsim_workers * 2)
+
+    # Seed random streams for each worker, particularly for gen_f
+    ensemble.add_random_streams()
+
+    # Run ensemble
+    ensemble.run()
+
+    if ensemble.is_manager:
+        # Note, this will change if change sim_max, nworkers, lb/ub etc...
+        if ensemble.exit_criteria.sim_max == 16:
+            chksum = np.sum(ensemble.H["energy"])
+            assert np.isclose(chksum, -21935405.696289998), f"energy check sum is {chksum}"
+            print("Checksum passed")
+        else:
+            print("Run complete. A checksum has not been provided for the given sim_max")
+

From f758e8c0196cc3b4bd8b14f6ecd10c035bbacc92 Mon Sep 17 00:00:00 2001
From: shudson <shudson@anl.gov>
Date: Fri, 8 Sep 2023 12:14:43 -0500
Subject: [PATCH 15/27] Make app_type optional in
 uniform_sample_diff_simulations

---
 libensemble/gen_funcs/persistent_sampling_var_resources.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libensemble/gen_funcs/persistent_sampling_var_resources.py b/libensemble/gen_funcs/persistent_sampling_var_resources.py
index f9d236bc0..af72512e6 100644
--- a/libensemble/gen_funcs/persistent_sampling_var_resources.py
+++ b/libensemble/gen_funcs/persistent_sampling_var_resources.py
@@ -187,7 +187,8 @@ def uniform_sample_diff_simulations(_, persis_info, gen_specs, libE_info):
         use_gpus = rng.choice([True, False], b)
         H_o["num_procs"] = nprocs
         H_o["num_gpus"] = np.where(use_gpus, nprocs, 0)
-        H_o["app_type"] = np.where(use_gpus, "gpu_app", "cpu_app")
+        if "app_type" in H_o.dtype.names:
+            H_o["app_type"] = np.where(use_gpus, "gpu_app", "cpu_app")
 
         print(f"\nGEN created {b} sims requiring {nprocs} procs. Use GPUs {use_gpus}", flush=True)
 

From 681dec04dbaed82af6336cb229735698eaf05da6 Mon Sep 17 00:00:00 2001
From: shudson <shudson@anl.gov>
Date: Fri, 8 Sep 2023 12:44:50 -0500
Subject: [PATCH 16/27] Make multi-task tests more flexible

---
 .../test_GPU_variable_resources_multi_task.py |  2 +-
 .../forces_multi_task/run_libe_forces.py      | 25 ++++++++++++++-----
 libensemble/tools/test_support.py             |  2 +-
 3 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/libensemble/tests/regression_tests/test_GPU_variable_resources_multi_task.py b/libensemble/tests/regression_tests/test_GPU_variable_resources_multi_task.py
index 961be3a6f..59568c553 100644
--- a/libensemble/tests/regression_tests/test_GPU_variable_resources_multi_task.py
+++ b/libensemble/tests/regression_tests/test_GPU_variable_resources_multi_task.py
@@ -26,7 +26,7 @@
 
 This test must be run with 9 or more workers (8 sim workers), in order
 to resource all works units. More generally:
-((nworkers - 1) - gpus_on_node) >= gen_specs["user"][max_resource_sets]
+    ((nworkers - 1) - gpus_on_node) >= gen_specs["user"][max_procs]
 
 """
 
diff --git a/libensemble/tests/scaling_tests/forces/forces_multi_task/run_libe_forces.py b/libensemble/tests/scaling_tests/forces/forces_multi_task/run_libe_forces.py
index 5523318b6..ecefe4d8f 100644
--- a/libensemble/tests/scaling_tests/forces/forces_multi_task/run_libe_forces.py
+++ b/libensemble/tests/scaling_tests/forces/forces_multi_task/run_libe_forces.py
@@ -11,6 +11,15 @@
 for the GPU (setting the GPU preprocessor condition) to `forces_gpu.x`.
 
 For compile lines, see examples in ../forces_app/build_forces.sh.
+
+It is recommended to run this test such that:
+    ((nworkers - 1) - gpus_on_node) >= gen_specs["user"][max_procs]
+
+E.g, if running on one node with four GPUs, then use:
+    python run_libE_forces.py --comms local --nworkers 9
+
+E.g, if running on one node with eight GPUs, then use:
+    python run_libE_forces.py --comms local --nworkers 17
 """
 
 import os
@@ -69,7 +78,7 @@
             ("x", float, (1,)),
             ("num_procs", int),  # num_procs auto given to sim when use MPIExecutor
             ("num_gpus", int),  # num_gpus auto given to sim when use MPIExecutor
-            ("app_type", 'S10'), # select app type (cpu_app or gpu_app)
+            ("app_type", "S10"),  # select app type (cpu_app or gpu_app)
         ],
         user={
             "initial_batch_size": nsim_workers,
@@ -98,10 +107,14 @@
 
     if ensemble.is_manager:
         # Note, this will change if change sim_max, nworkers, lb/ub etc...
-        if ensemble.exit_criteria.sim_max == 16:
-            chksum = np.sum(ensemble.H["energy"])
-            assert np.isclose(chksum, -21935405.696289998), f"energy check sum is {chksum}"
+        chksum = np.sum(ensemble.H["energy"])
+        print(f"Final energy checksum: {chksum}")
+
+        exp_chksums = {16: -21935405.696289998, 32: -26563930.6356}
+        exp_chksum = exp_chksums.get(ensemble.exit_criteria.sim_max)
+
+        if exp_chksum is not None:
+            assert np.isclose(chksum, exp_chksum), f"energy check sum is {chksum}"
             print("Checksum passed")
         else:
-            print("Run complete. A checksum has not been provided for the given sim_max")
-
+            print("Run complete. An expected checksum has not been provided for the given sim_max")
diff --git a/libensemble/tools/test_support.py b/libensemble/tools/test_support.py
index db0451a4c..c455fcc7b 100644
--- a/libensemble/tools/test_support.py
+++ b/libensemble/tools/test_support.py
@@ -218,7 +218,7 @@ def check_gpu_setting(task, assert_setting=True, print_setting=False, resources=
         desc += " "
 
     if print_setting:
-        print(f"Worker {task.workerID}: {desc}GPU setting ({stype}): {gpu_setting} {addon}")
+        print(f"Worker {task.workerID}: {desc}GPU setting ({stype}): {gpu_setting} {addon}", flush=True)
 
     if assert_setting:
         assert (

From 2003646056b3867a87a52aee25228840d822cf31 Mon Sep 17 00:00:00 2001
From: shudson <shudson@anl.gov>
Date: Fri, 8 Sep 2023 14:25:46 -0500
Subject: [PATCH 17/27] Rename forces_multi_task to forces_multi_app

---
 docs/tutorials/executor_forces_tutorial.rst   | 22 +++++++++++--------
 .../cleanup.sh                                |  0
 .../forces_simf.py                            |  0
 .../run_libe_forces.py                        |  0
 .../submit_perlmutter.sh                      |  0
 5 files changed, 13 insertions(+), 9 deletions(-)
 rename libensemble/tests/scaling_tests/forces/{forces_multi_task => forces_multi_app}/cleanup.sh (100%)
 rename libensemble/tests/scaling_tests/forces/{forces_multi_task => forces_multi_app}/forces_simf.py (100%)
 rename libensemble/tests/scaling_tests/forces/{forces_multi_task => forces_multi_app}/run_libe_forces.py (100%)
 rename libensemble/tests/scaling_tests/forces/{forces_multi_task => forces_multi_app}/submit_perlmutter.sh (100%)

diff --git a/docs/tutorials/executor_forces_tutorial.rst b/docs/tutorials/executor_forces_tutorial.rst
index f19b12525..01b4b40db 100644
--- a/docs/tutorials/executor_forces_tutorial.rst
+++ b/docs/tutorials/executor_forces_tutorial.rst
@@ -14,6 +14,9 @@ if the run was successful.
 This tutorial uses libEnsemble's :doc:`MPI Executor<../executor/mpi_executor>`,
 which automatically detects available MPI runners and resources.
 
+This example also uses a persistent generator. This generator runs on a
+worker throughout the ensemble, producing new simulation parameters as requested.
+
 Getting Started
 ---------------
 
@@ -35,9 +38,9 @@ Calling Script
 
 Complete scripts for this example can be found in the forces_simple_ directory.
 
-Let's begin by writing our calling script to parameterize our simulation and
-generation functions and call libEnsemble. Create a Python file called `run_libe_forces.py`
-containing:
+Let's begin by writing our calling script to specify our simulation and
+generation functions and call libEnsemble. Create a Python file called
+`run_libe_forces.py` containing:
 
 .. code-block:: python
     :linenos:
@@ -70,10 +73,12 @@ it a memorable name. This Executor will later be used within our simulation
 function to launch the registered app.
 
 Next define the :ref:`sim_specs<datastruct-sim-specs>` and
-:ref:`gen_specs<datastruct-gen-specs>` data structures. Recall that these
-are used to specify to libEnsemble what user functions and input/output fields to
+:ref:`gen_specs<datastruct-gen-specs>`. Recall that these are used to specify
+to libEnsemble what user functions and input/output fields to
 expect, and also to parameterize function instances without hard-coding:
 
+****************************update to add alloc - import function / parameterize and explain breifly as possible what it means.
+
 .. code-block:: python
     :linenos:
 
@@ -100,10 +105,9 @@ Our generation function will generate random numbers of particles (between
 the ``"lb"`` and ``"ub"`` bounds) for our simulation function to evaluate via our
 registered application.
 
-The following additional instructs libEnsemble's workers
-to each create and work within a separate directory each time they call a simulation
-function. This helps organize output and also helps prevents workers from overwriting
-previous results:
+The following line instructs libEnsemble's workers to each create and work within
+a separate directory each time they call a simulation function. This helps
+organize output and also helps prevents workers from overwriting previous results:
 
 .. code-block:: python
     :linenos:
diff --git a/libensemble/tests/scaling_tests/forces/forces_multi_task/cleanup.sh b/libensemble/tests/scaling_tests/forces/forces_multi_app/cleanup.sh
similarity index 100%
rename from libensemble/tests/scaling_tests/forces/forces_multi_task/cleanup.sh
rename to libensemble/tests/scaling_tests/forces/forces_multi_app/cleanup.sh
diff --git a/libensemble/tests/scaling_tests/forces/forces_multi_task/forces_simf.py b/libensemble/tests/scaling_tests/forces/forces_multi_app/forces_simf.py
similarity index 100%
rename from libensemble/tests/scaling_tests/forces/forces_multi_task/forces_simf.py
rename to libensemble/tests/scaling_tests/forces/forces_multi_app/forces_simf.py
diff --git a/libensemble/tests/scaling_tests/forces/forces_multi_task/run_libe_forces.py b/libensemble/tests/scaling_tests/forces/forces_multi_app/run_libe_forces.py
similarity index 100%
rename from libensemble/tests/scaling_tests/forces/forces_multi_task/run_libe_forces.py
rename to libensemble/tests/scaling_tests/forces/forces_multi_app/run_libe_forces.py
diff --git a/libensemble/tests/scaling_tests/forces/forces_multi_task/submit_perlmutter.sh b/libensemble/tests/scaling_tests/forces/forces_multi_app/submit_perlmutter.sh
similarity index 100%
rename from libensemble/tests/scaling_tests/forces/forces_multi_task/submit_perlmutter.sh
rename to libensemble/tests/scaling_tests/forces/forces_multi_app/submit_perlmutter.sh

From d06743179869b5f06f7609e709d789943bb4c111 Mon Sep 17 00:00:00 2001
From: shudson <shudson@anl.gov>
Date: Fri, 8 Sep 2023 16:27:52 -0500
Subject: [PATCH 18/27] Update forces tests to use new executor transfer

---
 .../tests/scaling_tests/forces/forces_adv/forces_simf.py   | 5 ++---
 .../tests/scaling_tests/forces/forces_gpu/forces_simf.py   | 5 +----
 .../scaling_tests/forces/forces_gpu/run_libe_forces.py     | 2 +-
 .../forces/forces_gpu_var_resources/forces_simf.py         | 5 +----
 .../forces/forces_gpu_var_resources/run_libe_forces.py     | 2 +-
 .../scaling_tests/forces/forces_multi_app/forces_simf.py   | 7 ++-----
 .../forces/forces_multi_app/run_libe_forces.py             | 2 +-
 .../scaling_tests/forces/forces_simple/forces_simf.py      | 5 +----
 .../scaling_tests/forces/forces_simple/run_libe_forces.py  | 2 +-
 9 files changed, 11 insertions(+), 24 deletions(-)

diff --git a/libensemble/tests/scaling_tests/forces/forces_adv/forces_simf.py b/libensemble/tests/scaling_tests/forces/forces_adv/forces_simf.py
index 39baa24ef..2348aac50 100644
--- a/libensemble/tests/scaling_tests/forces/forces_adv/forces_simf.py
+++ b/libensemble/tests/scaling_tests/forces/forces_adv/forces_simf.py
@@ -2,8 +2,6 @@
 import time
 
 import numpy as np
-
-from libensemble.executors.executor import Executor
 from libensemble.message_numbers import TASK_FAILED, WORKER_DONE, WORKER_KILL
 
 MAX_SEED = 32767
@@ -70,7 +68,8 @@ def run_forces(H, persis_info, sim_specs, libE_info):
     sim_particles = perturb(sim_particles, seed, particle_variance)
     print(f"seed: {seed}   particles: {sim_particles}")
 
-    exctr = Executor.executor  # Get Executor
+    # Retrieve our MPI Executor
+    exctr = libE_info["executor"]
 
     args = str(int(sim_particles)) + " " + str(sim_timesteps) + " " + str(seed) + " " + str(kill_rate)
     # task = exctr.submit( app_name="forces", num_procs=cores, app_args=args, stdout="out.txt", stderr="err.txt")
diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu/forces_simf.py b/libensemble/tests/scaling_tests/forces/forces_gpu/forces_simf.py
index be84a5acb..c65cc8c5a 100644
--- a/libensemble/tests/scaling_tests/forces/forces_gpu/forces_simf.py
+++ b/libensemble/tests/scaling_tests/forces/forces_gpu/forces_simf.py
@@ -1,8 +1,5 @@
 import numpy as np
 
-# To retrieve our MPI Executor
-from libensemble.executors.executor import Executor
-
 # Optional status codes to display in libE_stats.txt for each gen or sim
 from libensemble.message_numbers import TASK_FAILED, WORKER_DONE
 
@@ -25,7 +22,7 @@ def run_forces(H, persis_info, sim_specs, libE_info):
     args = particles + " " + str(10) + " " + particles
 
     # Retrieve our MPI Executor
-    exctr = Executor.executor
+    exctr = libE_info["executor"]
 
     # Submit our forces app for execution.
     task = exctr.submit(
diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu/run_libe_forces.py b/libensemble/tests/scaling_tests/forces/forces_gpu/run_libe_forces.py
index f02ab15be..2cd7bdebc 100644
--- a/libensemble/tests/scaling_tests/forces/forces_gpu/run_libe_forces.py
+++ b/libensemble/tests/scaling_tests/forces/forces_gpu/run_libe_forces.py
@@ -40,7 +40,7 @@
     exctr.register_app(full_path=sim_app, app_name="forces")
 
     # Parse number of workers, comms type, etc. from arguments
-    ensemble = Ensemble(parse_args=True)
+    ensemble = Ensemble(parse_args=True, executor=exctr)
     nsim_workers = ensemble.nworkers - 1  # One worker is for persistent generator
 
     # Persistent gen does not need resources
diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/forces_simf.py b/libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/forces_simf.py
index 433977297..d79af981c 100644
--- a/libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/forces_simf.py
+++ b/libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/forces_simf.py
@@ -1,8 +1,5 @@
 import numpy as np
 
-# To retrieve our MPI Executor
-from libensemble.executors.executor import Executor
-
 # Optional status codes to display in libE_stats.txt for each gen or sim
 from libensemble.message_numbers import TASK_FAILED, WORKER_DONE
 
@@ -24,7 +21,7 @@ def run_forces(H, persis_info, sim_specs, libE_info):
     args = particles + " " + str(10) + " " + particles
 
     # Retrieve our MPI Executor
-    exctr = Executor.executor
+    exctr = libE_info["executor"]
 
     # Submit our forces app for execution.
     task = exctr.submit(app_name="forces", app_args=args)
diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/run_libe_forces.py b/libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/run_libe_forces.py
index 96f8ab1c7..777d06d91 100644
--- a/libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/run_libe_forces.py
+++ b/libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/run_libe_forces.py
@@ -43,7 +43,7 @@
     exctr.register_app(full_path=sim_app, app_name="forces")
 
     # Parse number of workers, comms type, etc. from arguments
-    ensemble = Ensemble(parse_args=True)
+    ensemble = Ensemble(parse_args=True, executor=exctr)
     nsim_workers = ensemble.nworkers - 1  # One worker is for persistent generator
 
     # Persistent gen does not need resources
diff --git a/libensemble/tests/scaling_tests/forces/forces_multi_app/forces_simf.py b/libensemble/tests/scaling_tests/forces/forces_multi_app/forces_simf.py
index a6a146aca..bebc74817 100644
--- a/libensemble/tests/scaling_tests/forces/forces_multi_app/forces_simf.py
+++ b/libensemble/tests/scaling_tests/forces/forces_multi_app/forces_simf.py
@@ -1,8 +1,5 @@
 import numpy as np
 
-# To retrieve our MPI Executor and resources instances
-from libensemble.executors.executor import Executor
-
 # Optional status codes to display in libE_stats.txt for each gen or sim
 from libensemble.message_numbers import TASK_FAILED, WORKER_DONE
 
@@ -24,8 +21,8 @@ def run_forces(H, persis_info, sim_specs, libE_info):
     # app arguments: num particles, timesteps, also using num particles as seed
     args = particles + " " + str(10) + " " + particles
 
-    # Retrieve our MPI Executor instance and resources
-    exctr = Executor.executor
+    # Retrieve our MPI Executor
+    exctr = libE_info["executor"]
 
     app_type = H["app_type"][0].decode()
 
diff --git a/libensemble/tests/scaling_tests/forces/forces_multi_app/run_libe_forces.py b/libensemble/tests/scaling_tests/forces/forces_multi_app/run_libe_forces.py
index ecefe4d8f..2da287986 100644
--- a/libensemble/tests/scaling_tests/forces/forces_multi_app/run_libe_forces.py
+++ b/libensemble/tests/scaling_tests/forces/forces_multi_app/run_libe_forces.py
@@ -53,7 +53,7 @@
     exctr.register_app(full_path=gpu_app, app_name="gpu_app")
 
     # Parse number of workers, comms type, etc. from arguments
-    ensemble = Ensemble(parse_args=True)
+    ensemble = Ensemble(parse_args=True, executor=exctr)
     nsim_workers = ensemble.nworkers - 1  # One worker is for persistent generator
 
     # Persistent gen does not need resources
diff --git a/libensemble/tests/scaling_tests/forces/forces_simple/forces_simf.py b/libensemble/tests/scaling_tests/forces/forces_simple/forces_simf.py
index ff3231587..440f99ff6 100644
--- a/libensemble/tests/scaling_tests/forces/forces_simple/forces_simf.py
+++ b/libensemble/tests/scaling_tests/forces/forces_simple/forces_simf.py
@@ -1,8 +1,5 @@
 import numpy as np
 
-# To retrieve our MPI Executor
-from libensemble.executors.executor import Executor
-
 # Optional status codes to display in libE_stats.txt for each gen or sim
 from libensemble.message_numbers import TASK_FAILED, WORKER_DONE
 
@@ -17,7 +14,7 @@ def run_forces(H, persis_info, sim_specs, libE_info):
     args = particles + " " + str(10) + " " + particles
 
     # Retrieve our MPI Executor
-    exctr = Executor.executor
+    exctr = libE_info["executor"]
 
     # Submit our forces app for execution.
     task = exctr.submit(app_name="forces", app_args=args)
diff --git a/libensemble/tests/scaling_tests/forces/forces_simple/run_libe_forces.py b/libensemble/tests/scaling_tests/forces/forces_simple/run_libe_forces.py
index fdfc8cfe6..10fe0545a 100644
--- a/libensemble/tests/scaling_tests/forces/forces_simple/run_libe_forces.py
+++ b/libensemble/tests/scaling_tests/forces/forces_simple/run_libe_forces.py
@@ -23,7 +23,7 @@
     exctr.register_app(full_path=sim_app, app_name="forces")
 
     # Parse number of workers, comms type, etc. from arguments
-    ensemble = Ensemble(parse_args=True)
+    ensemble = Ensemble(parse_args=True, executor=exctr)
     nsim_workers = ensemble.nworkers - 1  # One worker is for persistent generator
 
     # Persistent gen does not need resources

From 40fca1651700c219ad36120610e440eb101706d2 Mon Sep 17 00:00:00 2001
From: shudson <shudson@anl.gov>
Date: Fri, 8 Sep 2023 18:31:20 -0500
Subject: [PATCH 19/27] Update forces tutorial for OO and persistent gen

---
 docs/tutorials/executor_forces_tutorial.rst | 207 ++++++++++++--------
 1 file changed, 126 insertions(+), 81 deletions(-)

diff --git a/docs/tutorials/executor_forces_tutorial.rst b/docs/tutorials/executor_forces_tutorial.rst
index 01b4b40db..2104ad0af 100644
--- a/docs/tutorials/executor_forces_tutorial.rst
+++ b/docs/tutorials/executor_forces_tutorial.rst
@@ -9,7 +9,7 @@ functions using the :doc:`executor<../executor/overview>`.
 This tutorial's calling script registers a compiled executable that simulates
 electrostatic forces between a collection of particles. The simulator function
 launches instances of this executable and reads output files to determine
-if the run was successful.
+the result.
 
 This tutorial uses libEnsemble's :doc:`MPI Executor<../executor/mpi_executor>`,
 which automatically detects available MPI runners and resources.
@@ -44,27 +44,36 @@ generation functions and call libEnsemble. Create a Python file called
 
 .. code-block:: python
     :linenos:
-    :emphasize-lines: 15,19
+    :emphasize-lines: 17,25, 28
 
     #!/usr/bin/env python
     import os
+    import sys
+
     import numpy as np
     from forces_simf import run_forces  # Sim func from current dir
 
-    from libensemble.libE import libE
-    from libensemble.gen_funcs.sampling import uniform_random_sample
-    from libensemble.tools import parse_args, add_unique_random_streams
+    from libensemble import Ensemble
+    from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f
     from libensemble.executors import MPIExecutor
+    from libensemble.gen_funcs.persistent_sampling import persistent_uniform as gen_f
+    from libensemble.specs import AllocSpecs, ExitCriteria, GenSpecs, LibeSpecs, SimSpecs
+
+    if __name__ == "__main__":
+
+        # Initialize MPI Executor
+        exctr = MPIExecutor()
 
-    # Parse number of workers, comms type, etc. from arguments
-    nworkers, is_manager, libE_specs, _ = parse_args()
+        # Register simulation executable with executor
+        sim_app = os.path.join(os.getcwd(), "../forces_app/forces.x")
 
-    # Initialize MPI Executor instance
-    exctr = MPIExecutor()
+        if not os.path.isfile(sim_app):
+            sys.exit("forces.x not found - please build first in ../forces_app dir")
 
-    # Register simulation executable with executor
-    sim_app = os.path.join(os.getcwd(), "../forces_app/forces.x")
-    exctr.register_app(full_path=sim_app, app_name="forces")
+        exctr.register_app(full_path=sim_app, app_name="forces")
+
+        # Parse number of workers, comms type, etc. from arguments
+        ensemble = Ensemble(parse_args=True, executor=exctr)
 
 On line 15, we instantiate our :doc:`MPI Executor<../executor/mpi_executor>`.
 
@@ -72,64 +81,92 @@ Registering an application is as easy as providing the full file-path and giving
 it a memorable name. This Executor will later be used within our simulation
 function to launch the registered app.
 
-Next define the :ref:`sim_specs<datastruct-sim-specs>` and
-:ref:`gen_specs<datastruct-gen-specs>`. Recall that these are used to specify
-to libEnsemble what user functions and input/output fields to
-expect, and also to parameterize function instances without hard-coding:
+On line 22, we initialize the ensemble. The :meth:`parse_args()<tools.parse_args>`
+is used to read `comms` and `nworkers` from the command line. This sets
+the respective `libE_specs` options.
 
-****************************update to add alloc - import function / parameterize and explain breifly as possible what it means.
+Next we add basic configuration for the ensemble. As one worker will run a persistent
+generator that will not need additinal computing resources, we calculate the number
+of workers that need resources to run simulations. We also set `sim_dirs_make`
+so that a directory is created for each simulation.  This helps organize output and
+also helps prevents workers from overwriting previous results.
 
 .. code-block:: python
-    :linenos:
+  :linenos:
+  :lineno-start: 30
 
-    # State the sim_f, inputs, outputs
-    sim_specs = {
-        "sim_f": run_forces,  # sim_f, imported above
-        "in": ["x"],  # Name of input for sim_f
-        "out": [("energy", float)],  # Name, type of output from sim_f
-    }
-
-    # State the gen_f, inputs, outputs, additional parameters
-    gen_specs = {
-        "gen_f": uniform_random_sample,  # Generator function
-        "in": [],  # Generator input
-        "out": [("x", float, (1,))],  # Name, type, and size of data from gen_f
-        "user": {
-            "lb": np.array([1000]),  # User parameters for the gen_f
-            "ub": np.array([3000]),
-            "gen_batch_size": 8,
-        },
-    }
+    nsim_workers = ensemble.nworkers - 1  # One worker is for persistent generator
 
-Our generation function will generate random numbers of particles (between
-the ``"lb"`` and ``"ub"`` bounds) for our simulation function to evaluate via our
-registered application.
+    # Persistent gen does not need resources
+    ensemble.libE_specs = LibeSpecs(
+        num_resource_sets=nsim_workers,
+        sim_dirs_make=True,
+    )
 
-The following line instructs libEnsemble's workers to each create and work within
-a separate directory each time they call a simulation function. This helps
-organize output and also helps prevents workers from overwriting previous results:
+Next we define the :ref:`sim_specs<datastruct-sim-specs>` and
+:ref:`gen_specs<datastruct-gen-specs>`. Recall that these are used to specify
+to libEnsemble what user functions and input/output fields to
+expect, and also to parameterize user functions:
 
 .. code-block:: python
-    :linenos:
+  :linenos:
+  :lineno-start: 38
+
+    ensemble.sim_specs = SimSpecs(
+        sim_f=run_forces,
+        inputs=["x"],
+        out=[("energy", float)],
+    )
+
+    ensemble.gen_specs = GenSpecs(
+        gen_f=gen_f,
+        inputs=[],  # No input when start persistent generator
+        persis_in=["sim_id"],  # Return sim_ids of evaluated points to generator
+        out=[("x", float, (1,))],
+        user={
+            "initial_batch_size": nsim_workers,
+            "lb": np.array([1000]),  # min particles
+            "ub": np.array([3000]),  # max particles
+        },
+    )
+
+We also configure an allocation function, which starts the one persistent
+generator and farms out the simulations. We also tell it to wait for all
+simulations to return their results, before generating more parameters.
+
+.. code-block:: python
+  :linenos:
+  :lineno-start: 56
+
+    # Starts one persistent generator. Simulated values are returned in batch.
+    ensemble.alloc_specs = AllocSpecs(
+        alloc_f=alloc_f,
+        user={
+            "async_return": False,  # False causes batch returns
+        },
+    )
 
-    # Create and work inside separate per-simulation directories
-    libE_specs["sim_dirs_make"] = True
+Now we set :ref:`exit_criteria<datastruct-exit-criteria>` to
+exit after running eight simulations.
 
-After configuring :ref:`persis_info<datastruct-persis-info>` and
-:ref:`exit_criteria<datastruct-exit-criteria>`, we initialize libEnsemble
-by calling the primary :doc:`libE()<../libe_module>` routine:
+We also give each worker a seeded random stream, via the
+:ref:`persis_info<datastruct-persis-info>`  option.
+These can be used for random number generation if required.
+
+Finally we :doc:`run<../libe_module>` the ensemble.
 
 .. code-block:: python
   :linenos:
+  :lineno-start: 64
 
-  # Instruct libEnsemble to exit after this many simulations
-  exit_criteria = {"sim_max": 8}
+    # Instruct libEnsemble to exit after this many simulations
+    ensemble.exit_criteria = ExitCriteria(sim_max=8)
 
-  # Seed random streams for each worker, particularly for gen_f
-  persis_info = add_unique_random_streams({}, nworkers + 1)
+    # Seed random streams for each worker, particularly for gen_f
+    ensemble.add_random_streams()
 
-  # Launch libEnsemble
-  H, persis_info, flag = libE(sim_specs, gen_specs, exit_criteria, persis_info=persis_info, libE_specs=libE_specs)
+    # Run ensemble
+    ensemble.run()
 
 Exercise
 ^^^^^^^^
@@ -138,48 +175,56 @@ This may take some additional browsing of the docs to complete.
 
 Write an alternative Calling Script similar to above, but with the following differences:
 
- 1. Add an additional worker directory so workers operate in ``/scratch/ensemble`` instead of the default current working directory.
+ 1. Set :ref:`libEnsemble's logger<logger_config>` to print debug messages.
  2. Override the MPIExecutor's detected MPI runner with ``"openmpi"``.
- 3. Set :ref:`libEnsemble's logger<logger_config>` to print debug messages.
- 4. Use the :meth:`save_libE_output()<tools.save_libE_output>` function to save the History array and ``persis_info`` to files after libEnsemble completes.
+ 3. Tell the allocation function to return results to the generator asychronously.
+ 4. Use the ensemble function :meth:`save_output()<ensemble.Ensemble.save_output>` to save the History array and ``persis_info`` to files after libEnsemble completes.
 
-.. dropdown:: **Click Here for Solution**
+.. dropdown:: **Click Here for Solutions**
+
+   **Soln 1.** Debug logging gives lots of information.
 
    .. code-block:: python
        :linenos:
+       :lineno-start: 13
 
-        #!/usr/bin/env python
-        import os
-        import numpy as np
-        from forces_simf import run_forces  # Sim func from current dir
-
-        from libensemble import logger
-        from libensemble.libE import libE
-        from libensemble.gen_funcs.sampling import uniform_random_sample
-        from libensemble.tools import parse_args, add_unique_random_streams, save_libE_output
-        from libensemble.executors import MPIExecutor
+       from libensemble import logger
+       logger.set_level("DEBUG")
 
-        # Parse number of workers, comms type, etc. from arguments
-        nworkers, is_manager, libE_specs, _ = parse_args()
+   **Soln 2.** This can also be specified via :attr:`platform_specs<libensemble.specs.LibeSpecs.platform_specs>` option (see
+   libE_specs options, under Resources).
 
-        # Adjust logger level
-        logger.set_level("DEBUG")
+   .. code-block:: python
+       :linenos:
+       :lineno-start: 16
 
-        # Initialize MPI Executor instance
+        # Initialize MPI Executor
         exctr = MPIExecutor(custom_info={"mpi_runner": "openmpi"})
 
-        ...
+   **Soln 3.** Set ``async_return`` to *True*.
 
-        # Instruct workers to operate somewhere else on the filesystem
-        libE_specs["ensemble_dir_path"] = "/scratch/ensemble"
+   .. code-block:: python
+       :linenos:
+       :lineno-start: 56
+       :emphasize-lines: 60
+
+        # Starts one persistent generator. Simulated values are returned in batch.
+        ensemble.alloc_specs = AllocSpecs(
+            alloc_f=alloc_f,
+            user={
+                "async_return": True,
+            },
+        )
 
-        ...
+   **Soln 4.** This will save the output based on the name of the calling script. You
+   can give any string in place of ``__file__``.
+
+   .. code-block:: python
+       :linenos:
+       :lineno-start: 72
 
-        # Launch libEnsemble
-        H, persis_info, flag = libE(sim_specs, gen_specs, exit_criteria, persis_info=persis_info, libE_specs=libE_specs)
+       ensemble.save_output(__file__)
 
-        if is_manager:
-            save_libE_output(H, persis_info, __file__, nworkers)
 
 Simulation Function
 -------------------

From 04dbc9cfd8c4a4914af9969c57ed500078838d5d Mon Sep 17 00:00:00 2001
From: shudson <shudson@anl.gov>
Date: Fri, 8 Sep 2023 18:31:40 -0500
Subject: [PATCH 20/27] Minor docs fixes

---
 .../scaling_tests/forces/forces_simple/run_libe_forces.py | 2 ++
 libensemble/tools/parse_args.py                           | 8 ++++++++
 2 files changed, 10 insertions(+)

diff --git a/libensemble/tests/scaling_tests/forces/forces_simple/run_libe_forces.py b/libensemble/tests/scaling_tests/forces/forces_simple/run_libe_forces.py
index 10fe0545a..ccfe7dff7 100644
--- a/libensemble/tests/scaling_tests/forces/forces_simple/run_libe_forces.py
+++ b/libensemble/tests/scaling_tests/forces/forces_simple/run_libe_forces.py
@@ -15,6 +15,8 @@
 
     # Initialize MPI Executor
     exctr = MPIExecutor()
+
+    # Register simulation executable with executor
     sim_app = os.path.join(os.getcwd(), "../forces_app/forces.x")
 
     if not os.path.isfile(sim_app):
diff --git a/libensemble/tools/parse_args.py b/libensemble/tools/parse_args.py
index 4c10cd67f..d5598bffa 100644
--- a/libensemble/tools/parse_args.py
+++ b/libensemble/tools/parse_args.py
@@ -158,6 +158,14 @@ def parse_args():
 
         nworkers, is_manager, libE_specs, misc_args = parse_args()
 
+    Or for object interface, when creating the ensemble object.
+
+    .. code-block:: python
+
+        from libensemble import Ensemble
+
+        ensemble = Ensemble(parse_args=True)
+
     From the shell::
 
         $ python calling_script --comms local --nworkers 4

From f0a31a09e775fc2e1aa21949d668ed21a246e55e Mon Sep 17 00:00:00 2001
From: shudson <shudson@anl.gov>
Date: Mon, 11 Sep 2023 11:33:56 -0500
Subject: [PATCH 21/27] Update forces tutorial sim func and output

---
 docs/tutorials/executor_forces_tutorial.rst | 58 ++++++++++++---------
 1 file changed, 33 insertions(+), 25 deletions(-)

diff --git a/docs/tutorials/executor_forces_tutorial.rst b/docs/tutorials/executor_forces_tutorial.rst
index 2104ad0af..97d3b1af3 100644
--- a/docs/tutorials/executor_forces_tutorial.rst
+++ b/docs/tutorials/executor_forces_tutorial.rst
@@ -130,7 +130,7 @@ expect, and also to parameterize user functions:
         },
     )
 
-We also configure an allocation function, which starts the one persistent
+Next, configure an allocation function, which starts the one persistent
 generator and farms out the simulations. We also tell it to wait for all
 simulations to return their results, before generating more parameters.
 
@@ -242,37 +242,34 @@ for starters:
 
     import numpy as np
 
-    # To retrieve our MPI Executor instance
-    from libensemble.executors.executor import Executor
-
     # Optional status codes to display in libE_stats.txt for each gen or sim
     from libensemble.message_numbers import WORKER_DONE, TASK_FAILED
 
 
-    def run_forces(H, _, sim_specs):
+    def run_forces(H, persis_info, sim_specs, libE_info):
         calc_status = 0
 
         # Parse out num particles, from generator function
         particles = str(int(H["x"][0][0]))
 
-        # num particles, timesteps, also using num particles as seed
+        # app arguments: num particles, timesteps, also using num particles as seed
         args = particles + " " + str(10) + " " + particles
 
-        # Retrieve our MPI Executor instance
-        exctr = Executor.executor
+        # Retrieve our MPI Executor
+        exctr = libE_info["executor"]
 
-        # Submit our forces app for execution
+        # Submit our forces app for execution.
         task = exctr.submit(app_name="forces", app_args=args)
 
         # Block until the task finishes
         task.wait()
 
+
 We retrieve the generated number of particles from ``H`` and construct
 an argument string for our launched application. The particle count doubles up
 as a random number seed here.
 
-We then retrieve our previously instantiated Executor from the class definition,
-where it was automatically stored as an attribute.
+We then retrieve our previously instantiated Executor.
 
 After submitting the "forces" app for execution,
 a :ref:`Task<task_tag>` object is returned that correlates with the launched app.
@@ -292,11 +289,11 @@ to ``WORKER_DONE``. Otherwise, send back ``NAN`` and a ``TASK_FAILED`` status:
 
 .. code-block:: python
     :linenos:
+    :lineno-start: 25
 
-        # Stat file to check for bad runs
-        statfile = "forces.stat"
 
         # Try loading final energy reading, set the sim's status
+        statfile = "forces.stat"
         try:
             data = np.loadtxt(statfile)
             final_energy = data[-1]
@@ -305,13 +302,12 @@ to ``WORKER_DONE``. Otherwise, send back ``NAN`` and a ``TASK_FAILED`` status:
             final_energy = np.nan
             calc_status = TASK_FAILED
 
-        # Define our output array,  populate with energy reading
-        outspecs = sim_specs["out"]
-        output = np.zeros(1, dtype=outspecs)
-        output["energy"][0] = final_energy
+        # Define our output array, populate with energy reading
+        output = np.zeros(1, dtype=sim_specs["out"])
+        output["energy"] = final_energy
 
         # Return final information to worker, for reporting to manager
-        return output, calc_status
+        return output, persis_info, calc_status
 
 ``calc_status`` will be displayed in the ``libE_stats.txt`` log file.
 
@@ -319,14 +315,19 @@ That's it! As can be seen, with libEnsemble, it's relatively easy to get started
 with launching applications. Behind the scenes, libEnsemble evaluates default
 MPI runners and available resources and divides them among the workers.
 
+Running the example
+-------------------
+
 This completes our calling script and simulation function. Run libEnsemble with:
 
 .. code-block:: bash
 
     $ python run_libe_forces.py --comms local --nworkers [nworkers]
 
-Output files---including ``forces.stat`` and files containing ``stdout`` and
-``stderr`` content for each task---should appear in the current working
+where ``nworkers`` is one more than the number of concurrent simulations.
+
+Output files (including ``forces.stat`` and files containing ``stdout`` and
+``stderr`` content for each task) should appear in the current working
 directory. Overall workflow information should appear in ``libE_stats.txt``
 and ``ensemble.log`` as usual.
 
@@ -342,7 +343,7 @@ For example, my ``libE_stats.txt`` resembled::
   Worker     1: sim_id     6: sim Time: 0.225 Start: ... End: ... Status: Completed
   Worker     2: sim_id     7: sim Time: 0.626 Start: ... End: ... Status: Completed
 
-Where ``status`` is set based on the simulation function's returned ``calc_status``.
+where ``status`` is set based on the simulation function's returned ``calc_status``.
 
 My ``ensemble.log`` (on a ten-core laptop) resembled::
 
@@ -374,13 +375,18 @@ My ``ensemble.log`` (on a ten-core laptop) resembled::
 
 Note again that the ten cores were divided equally among two workers.
 
-That concludes this tutorial.
-Each of these example files can be found in the repository in `examples/tutorials/forces_with_executor`_.
+That concludes this tutorial. Each of these example files can be found in the
+repository in `examples/tutorials/forces_with_executor`_.
 
 For further experimentation, we recommend trying out this libEnsemble tutorial
 workflow on a cluster or multi-node system, since libEnsemble can also manage
 those resources and is developed to coordinate computations at huge scales.
-See ref:`HPC platform guides<platform-index>` for more information.
+See :ref:`HPC platform guides<platform-index>` for more information.
+
+See the :doc:`forces_gpu tutorial<forces_gpu_tutorial>` for a similar workflow
+including GPUs.
+
+.. and another,which shows how to dynamically assign resources to each simulation.
 
 Please feel free to contact us or open an issue on GitHub_ if this tutorial
 workflow doesn't work properly on your cluster or other compute resource.
@@ -399,8 +405,10 @@ These may require additional browsing of the documentation to complete.
 
 .. dropdown:: **Click Here for Solution**
 
+
+   Showing updated sections only (``---`` refers to snips where code is unchanged).
+
    .. code-block:: python
-       :linenos:
 
         import time
 

From 5952547a05c68a6cf279fd403fd4b412a5b3a013 Mon Sep 17 00:00:00 2001
From: shudson <shudson@anl.gov>
Date: Mon, 11 Sep 2023 12:43:30 -0500
Subject: [PATCH 22/27] Minor fixes for forces_simple tutorial

---
 docs/tutorials/executor_forces_tutorial.rst                | 7 ++++---
 .../scaling_tests/forces/forces_simple/forces_simf.py      | 2 ++
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/docs/tutorials/executor_forces_tutorial.rst b/docs/tutorials/executor_forces_tutorial.rst
index 97d3b1af3..63f1ef0a5 100644
--- a/docs/tutorials/executor_forces_tutorial.rst
+++ b/docs/tutorials/executor_forces_tutorial.rst
@@ -243,10 +243,12 @@ for starters:
     import numpy as np
 
     # Optional status codes to display in libE_stats.txt for each gen or sim
-    from libensemble.message_numbers import WORKER_DONE, TASK_FAILED
+    from libensemble.message_numbers import TASK_FAILED, WORKER_DONE
 
 
     def run_forces(H, persis_info, sim_specs, libE_info):
+        """Runs the forces MPI application"""
+
         calc_status = 0
 
         # Parse out num particles, from generator function
@@ -289,8 +291,7 @@ to ``WORKER_DONE``. Otherwise, send back ``NAN`` and a ``TASK_FAILED`` status:
 
 .. code-block:: python
     :linenos:
-    :lineno-start: 25
-
+    :lineno-start: 27
 
         # Try loading final energy reading, set the sim's status
         statfile = "forces.stat"
diff --git a/libensemble/tests/scaling_tests/forces/forces_simple/forces_simf.py b/libensemble/tests/scaling_tests/forces/forces_simple/forces_simf.py
index 440f99ff6..ecc2ad4b5 100644
--- a/libensemble/tests/scaling_tests/forces/forces_simple/forces_simf.py
+++ b/libensemble/tests/scaling_tests/forces/forces_simple/forces_simf.py
@@ -5,6 +5,8 @@
 
 
 def run_forces(H, persis_info, sim_specs, libE_info):
+    """Runs the forces MPI application"""
+
     calc_status = 0
 
     # Parse out num particles, from generator function

From a1e6d1bf31728c72b11d07af9b391c9901274318 Mon Sep 17 00:00:00 2001
From: shudson <shudson@anl.gov>
Date: Mon, 11 Sep 2023 13:14:46 -0500
Subject: [PATCH 23/27] Update forces_gpu tutorial

---
 docs/tutorials/forces_gpu_tutorial.rst | 88 +++++++++++++-------------
 1 file changed, 43 insertions(+), 45 deletions(-)

diff --git a/docs/tutorials/forces_gpu_tutorial.rst b/docs/tutorials/forces_gpu_tutorial.rst
index 63792aacf..1d4a269ed 100644
--- a/docs/tutorials/forces_gpu_tutorial.rst
+++ b/docs/tutorials/forces_gpu_tutorial.rst
@@ -13,11 +13,12 @@ number of particles (allows live GPU usage to be viewed).
 
 In the first example, each worker will be using one GPU. The code will assign the
 GPUs available to each worker, using the appropriate method. This works on systems
-using nVidia, AMD and intel GPUs.
+using **Nvidia**, **AMD** and **Intel** GPUs without modifiying the scripts.
 
 Videos demonstrate running this example on Perlmutter_, Spock_, and Polaris_.
 *The first two videos are from an earlier release - you no longer need to change
-particle count or modify the `forces.c` file).*
+particle count or modify the `forces.c` file).*. Also, on Polaris, it is no
+longer necessary to change the MPI runner.
 
 Simulation function
 -------------------
@@ -27,12 +28,12 @@ to the forces simple example are highlighted:
 
 .. code-block:: python
     :linenos:
-    :emphasize-lines: 29-30, 37
+    :emphasize-lines: 31-32, 39
 
     import numpy as np
 
-    # To retrieve our MPI Executor
-    from libensemble.executors.executor import Executor
+    # Optional status codes to display in libE_stats.txt for each gen or sim
+    from libensemble.message_numbers import TASK_FAILED, WORKER_DONE
 
     # Optional - to print GPU settings
     from libensemble.tools.test_support import check_gpu_setting
@@ -44,6 +45,8 @@ to the forces simple example are highlighted:
         Assigns one MPI rank to each GPU assigned to the worker.
         """
 
+        calc_status = 0
+
         # Parse out num particles, from generator function
         particles = str(int(H["x"][0][0]))
 
@@ -51,7 +54,7 @@ to the forces simple example are highlighted:
         args = particles + " " + str(10) + " " + particles
 
         # Retrieve our MPI Executor
-        exctr = Executor.executor
+        exctr = libE_info["executor"]
 
         # Submit our forces app for execution.
         task = exctr.submit(
@@ -67,29 +70,36 @@ to the forces simple example are highlighted:
         # Optional - prints GPU assignment (method and numbers)
         check_gpu_setting(task, assert_setting=False, print_setting=True)
 
-        # Stat file to check for bad runs
+        # Try loading final energy reading, set the sim's status
         statfile = "forces.stat"
+        try:
+            data = np.loadtxt(statfile)
+            final_energy = data[-1]
+            calc_status = WORKER_DONE
+        except Exception:
+            final_energy = np.nan
+            calc_status = TASK_FAILED
 
-        # Read final energy
-        data = np.loadtxt(statfile)
-        final_energy = data[-1]
-
-        # Define our output array,  populate with energy reading
+        # Define our output array, populate with energy reading
         output = np.zeros(1, dtype=sim_specs["out"])
-        output["energy"][0] = final_energy
+        output["energy"] = final_energy
+
+        # Return final information to worker, for reporting to manager
+        return output, persis_info, calc_status
+
 
+Lines 31-32 tell the executor to use the GPUs assigned to this worker, and
+to match processors (MPI ranks) to GPUs.
 
-    return output
+The user can also set ``num_procs`` and ``num_gpus`` in the generator as in
+the `forces_gpu_var_resources`_ example, and skip lines 31-32.
 
-Line 37 simply prints out how the GPUs were assigned. If this is not as desired,
+Line 37 simply prints out how the GPUs were assigned. If this is not as expected,
 a :attr:`platform_specs<libensemble.specs.LibeSpecs.platform_specs>` *libE_specs*
 option can be provided in the calling script. Alternatively, for known systems,
 the LIBE_PLATFORM environment variable can be set.
 
-The user can also set ``num_procs`` and ``num_gpus`` in the generator as in
-the `test_GPU_variable_resources.py`_ example.
-
-While this is sufficient for many users, note that it is possible to query
+While this is sufficient for many/most users, note that it is possible to query
 the resources assigned to *this* worker (nodes and partitions of nodes),
 and use this information however you want.
 
@@ -206,15 +216,11 @@ Running the example
 -------------------
 
 As an example, if you have been allocated two nodes, each with four GPUs, then assign
-eight workers. For example::
+nine workers (the extra worker runs the persistent generator).
 
-    python run_libe_forces.py --comms local --nworkers 8
+For example::
 
-Note that if you are running one persistent generator that does not require
-resources, then assign nine workers and fix the number of *resource_sets* in
-your calling script::
-
-    libE_specs["num_resource_sets"] = 8
+    python run_libe_forces.py --comms local --nworkers 9
 
 See :ref:`zero resource workers<zero_resource_workers>` for more ways to express this.
 
@@ -228,20 +234,13 @@ forces run.
 Varying resources
 -----------------
 
-The same code can be used when varying worker resources. In this case, you may
-add an integer field called ``resource_sets`` as a ``gen_specs["out"]`` in your
-calling script.
-
-In the generator function, assign the ``resource_sets`` field of
-:ref:`H<funcguides-history>` for each point generated. For example
-if a larger simulation requires two MPI tasks (and two GPUs), set the ``resource_sets``
-field to *2* for that sim_id in the generator function.
-
-The calling script run_libe_forces.py_ contains alternative commented-out lines for
-a variable resource example. Search for "Uncomment for var resources"
+A variant of this example where you may specify any number of processors
+and GPUs for each simulation is given in the `forces_gpu_var_resources`_ example.
 
-In this case, the simulator function will work unmodified, assigning one CPU processor
-and one GPU to each MPI rank.
+In this example, when simulations are parameterized in the generator function,
+the ``gen_specs["out"]`` field ``num_gpus`` is set for each simulation (based
+on the number of particles). These values will automatically be used for each
+simulation (they do not need to be passed as a ``sim_specs["in"]``).
 
 Further guidance on varying the resources assigned to workers can be found under the
 :doc:`resource manager<../resource_manager/resources_index>` section.
@@ -250,7 +249,8 @@ Checking GPU usage
 ------------------
 
 The output of `forces.x` will say if it has run on the host or device. When running
-libEnsemble, this can be found under the ``ensemble`` directory.
+libEnsemble, this can be found in the simulation directories (under the ``ensemble``
+directory).
 
 You can check you are running forces on the GPUs as expected by using profiling tools and/or
 by using a monitoring utility. For NVIDIA GPUs, for example, the **Nsight** profiler is
@@ -295,12 +295,10 @@ that runs 8 workers on 2 nodes:
 
     export MPICH_GPU_SUPPORT_ENABLED=1
     export SLURM_EXACT=1
-    export SLURM_MEM_PER_NODE=0
 
-    python run_libe_forces.py --comms local --nworkers 8
+    python run_libe_forces.py --comms local --nworkers 9
 
-where ``SLURM_EXACT`` and ``SLURM_MEM_PER_NODE`` are set to prevent
-resource conflicts on each node.
+where ``SLURM_EXACT`` is set to help prevent resource conflicts on each node.
 
 .. _forces_gpu: https://github.com/Libensemble/libensemble/blob/develop/libensemble/tests/scaling_tests/forces/forces_gpu
 .. _forces.c: https://github.com/Libensemble/libensemble/blob/develop/libensemble/tests/scaling_tests/forces/forces_app/forces.c
@@ -309,4 +307,4 @@ resource conflicts on each node.
 .. _Spock: https://www.youtube.com/watch?v=XHXcslDORjU
 .. _Polaris: https://youtu.be/Ff0dYYLQzoU
 .. _run_libe_forces.py: https://github.com/Libensemble/libensemble/blob/develop/libensemble/tests/scaling_tests/forces/forces_gpu/run_libe_forces.py
-.. _test_GPU_variable_resources.py: https://github.com/Libensemble/libensemble/blob/develop/libensemble/tests/regression_tests/test_GPU_variable_resources.py
+.. _forces_gpu_var_resources: https://github.com/Libensemble/libensemble/blob/develop/libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/run_libe_forces.py

From 6bb2667b3f32dc9ba9d0db2fe0140ffd1cf9cc58 Mon Sep 17 00:00:00 2001
From: shudson <shudson@anl.gov>
Date: Mon, 11 Sep 2023 13:51:46 -0500
Subject: [PATCH 24/27] Ref. multi-app example in forces_gpu tutorial

---
 docs/tutorials/forces_gpu_tutorial.rst | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/docs/tutorials/forces_gpu_tutorial.rst b/docs/tutorials/forces_gpu_tutorial.rst
index 1d4a269ed..27066ad58 100644
--- a/docs/tutorials/forces_gpu_tutorial.rst
+++ b/docs/tutorials/forces_gpu_tutorial.rst
@@ -245,6 +245,14 @@ simulation (they do not need to be passed as a ``sim_specs["in"]``).
 Further guidance on varying the resources assigned to workers can be found under the
 :doc:`resource manager<../resource_manager/resources_index>` section.
 
+Multiple Applications
+---------------------
+
+Another variant of this example, forces_multi_app_, has two applications, one that
+uses GPUs, and another that only uses CPUs. The dynamic resource management can
+manage both types of resources and assign these to the same nodes concurrently, for
+maximum efficiency.
+
 Checking GPU usage
 ------------------
 
@@ -308,3 +316,5 @@ where ``SLURM_EXACT`` is set to help prevent resource conflicts on each node.
 .. _Polaris: https://youtu.be/Ff0dYYLQzoU
 .. _run_libe_forces.py: https://github.com/Libensemble/libensemble/blob/develop/libensemble/tests/scaling_tests/forces/forces_gpu/run_libe_forces.py
 .. _forces_gpu_var_resources: https://github.com/Libensemble/libensemble/blob/develop/libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/run_libe_forces.py
+.. _forces_multi_app: //github.com/Libensemble/libensemble/blob/develop/libensemble/tests/scaling_tests/forces/forces_gpu/run_libe_forces.py
+.. _forces_gpu_var_resources: https://github.com/Libensemble/libensemble/blob/develop/libensemble/tests/scaling_tests/forces/forces_multi_app/run_libe_forces.py

From d9f8f11e7a6a567bd8d28f86be17aa3265b9dfa6 Mon Sep 17 00:00:00 2001
From: shudson <shudson@anl.gov>
Date: Mon, 11 Sep 2023 14:00:12 -0500
Subject: [PATCH 25/27] Fix multiapp link

---
 docs/tutorials/forces_gpu_tutorial.rst | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/docs/tutorials/forces_gpu_tutorial.rst b/docs/tutorials/forces_gpu_tutorial.rst
index 27066ad58..91429044b 100644
--- a/docs/tutorials/forces_gpu_tutorial.rst
+++ b/docs/tutorials/forces_gpu_tutorial.rst
@@ -13,7 +13,7 @@ number of particles (allows live GPU usage to be viewed).
 
 In the first example, each worker will be using one GPU. The code will assign the
 GPUs available to each worker, using the appropriate method. This works on systems
-using **Nvidia**, **AMD** and **Intel** GPUs without modifiying the scripts.
+using **Nvidia**, **AMD**, and **Intel** GPUs without modifying the scripts.
 
 Videos demonstrate running this example on Perlmutter_, Spock_, and Polaris_.
 *The first two videos are from an earlier release - you no longer need to change
@@ -24,7 +24,7 @@ Simulation function
 -------------------
 
 The ``sim_f`` (``forces_simf.py``) is as follows. The lines that are different
-to the forces simple example are highlighted:
+to the simple forces example are highlighted:
 
 .. code-block:: python
     :linenos:
@@ -163,7 +163,7 @@ and use this information however you want.
 
         return output
 
-    The above code will assign a GPU to each worker on CUDA capable systems,
+    The above code will assign a GPU to each worker on CUDA-capable systems,
     so long as the number of workers is chosen to fit the resources.
 
     If you want to have one rank with multiple GPUs, then change source lines 30/31
@@ -316,5 +316,4 @@ where ``SLURM_EXACT`` is set to help prevent resource conflicts on each node.
 .. _Polaris: https://youtu.be/Ff0dYYLQzoU
 .. _run_libe_forces.py: https://github.com/Libensemble/libensemble/blob/develop/libensemble/tests/scaling_tests/forces/forces_gpu/run_libe_forces.py
 .. _forces_gpu_var_resources: https://github.com/Libensemble/libensemble/blob/develop/libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/run_libe_forces.py
-.. _forces_multi_app: //github.com/Libensemble/libensemble/blob/develop/libensemble/tests/scaling_tests/forces/forces_gpu/run_libe_forces.py
-.. _forces_gpu_var_resources: https://github.com/Libensemble/libensemble/blob/develop/libensemble/tests/scaling_tests/forces/forces_multi_app/run_libe_forces.py
+.. _forces_multi_app: https://github.com/Libensemble/libensemble/blob/develop/libensemble/tests/scaling_tests/forces/forces_multi_app/run_libe_forces.py

From 2adf63226aa9b4033b532c6c79d40b07e248b569 Mon Sep 17 00:00:00 2001
From: shudson <shudson@anl.gov>
Date: Tue, 12 Sep 2023 18:30:14 -0500
Subject: [PATCH 26/27] Forces tut: Update example output

---
 docs/tutorials/executor_forces_tutorial.rst | 103 +++++++++++---------
 1 file changed, 58 insertions(+), 45 deletions(-)

diff --git a/docs/tutorials/executor_forces_tutorial.rst b/docs/tutorials/executor_forces_tutorial.rst
index 63f1ef0a5..2ff30d7e5 100644
--- a/docs/tutorials/executor_forces_tutorial.rst
+++ b/docs/tutorials/executor_forces_tutorial.rst
@@ -206,7 +206,7 @@ Write an alternative Calling Script similar to above, but with the following dif
    .. code-block:: python
        :linenos:
        :lineno-start: 56
-       :emphasize-lines: 60
+       :emphasize-lines: 5
 
         # Starts one persistent generator. Simulated values are returned in batch.
         ensemble.alloc_specs = AllocSpecs(
@@ -332,49 +332,62 @@ Output files (including ``forces.stat`` and files containing ``stdout`` and
 directory. Overall workflow information should appear in ``libE_stats.txt``
 and ``ensemble.log`` as usual.
 
-For example, my ``libE_stats.txt`` resembled::
-
-  Worker     1: Gen no     1: gen Time: 0.001 Start: ... End: ... Status: Not set
-  Worker     1: sim_id     0: sim Time: 0.227 Start: ... End: ... Status: Completed
-  Worker     2: sim_id     1: sim Time: 0.426 Start: ... End: ... Status: Completed
-  Worker     1: sim_id     2: sim Time: 0.627 Start: ... End: ... Status: Completed
-  Worker     2: sim_id     3: sim Time: 0.225 Start: ... End: ... Status: Completed
-  Worker     1: sim_id     4: sim Time: 0.224 Start: ... End: ... Status: Completed
-  Worker     2: sim_id     5: sim Time: 0.625 Start: ... End: ... Status: Completed
-  Worker     1: sim_id     6: sim Time: 0.225 Start: ... End: ... Status: Completed
-  Worker     2: sim_id     7: sim Time: 0.626 Start: ... End: ... Status: Completed
-
-where ``status`` is set based on the simulation function's returned ``calc_status``.
-
-My ``ensemble.log`` (on a ten-core laptop) resembled::
-
-  [0]  ... libensemble.libE (INFO): Logger initializing: [workerID] precedes each line. [0] = Manager
-  [0]  ... libensemble.libE (INFO): libE version v0.9.0
-  [0]  ... libensemble.manager (INFO): Manager initiated on node my_laptop
-  [0]  ... libensemble.manager (INFO): Manager exit_criteria: {"sim_max": 8}
-  [1]  ... libensemble.worker (INFO): Worker 1 initiated on node my_laptop
-  [2]  ... libensemble.worker (INFO): Worker 2 initiated on node my_laptop
-  [1]  ... libensemble.executors.mpi_executor (INFO): Launching task libe_task_forces_worker1_0: mpirun -hosts my_laptop -np 5 --ppn 5 /Users/.../forces.x 2023 10 2023
-  [2]  ... libensemble.executors.mpi_executor (INFO): Launching task libe_task_forces_worker2_0: mpirun -hosts my_laptop -np 5 --ppn 5 /Users/.../forces.x 2900 10 2900
-  [1]  ... libensemble.executors.executor (INFO): Task libe_task_forces_worker1_0 finished with errcode 0 (FINISHED)
-  [1]  ... libensemble.executors.mpi_executor (INFO): Launching task libe_task_forces_worker1_1: mpirun -hosts my_laptop -np 5 --ppn 5 /Users/.../forces.x 1288 10 1288
-  [2]  ... libensemble.executors.executor (INFO): Task libe_task_forces_worker2_0 finished with errcode 0 (FINISHED)
-  [2]  ... libensemble.executors.mpi_executor (INFO): Launching task libe_task_forces_worker2_1: mpirun -hosts my_laptop -np 5 --ppn 5 /Users/.../forces.x 2897 10 2897
-  [1]  ... libensemble.executors.executor (INFO): Task libe_task_forces_worker1_1 finished with errcode 0 (FINISHED)
-  [1]  ... libensemble.executors.mpi_executor (INFO): Launching task libe_task_forces_worker1_2: mpirun -hosts my_laptop -np 5 --ppn 5 /Users/.../forces.x 1623 10 1623
-  [2]  ... libensemble.executors.executor (INFO): Task libe_task_forces_worker2_1 finished with errcode 0 (FINISHED)
-  [2]  ... libensemble.executors.mpi_executor (INFO): Launching task libe_task_forces_worker2_2: mpirun -hosts my_laptop -np 5 --ppn 5 /Users/.../forces.x 1846 10 1846
-  [1]  ... libensemble.executors.executor (INFO): Task libe_task_forces_worker1_2 finished with errcode 0 (FINISHED)
-  [1]  ... libensemble.executors.mpi_executor (INFO): Launching task libe_task_forces_worker1_3: mpirun -hosts my_laptop -np 5 --ppn 5 /Users/.../forces.x 2655 10 2655
-  [2]  ... libensemble.executors.executor (INFO): Task libe_task_forces_worker2_2 finished with errcode 0 (FINISHED)
-  [2]  ... libensemble.executors.mpi_executor (INFO): Launching task libe_task_forces_worker2_3: mpirun -hosts my_laptop -np 5 --ppn 5 /Users/.../forces.x 1818 10 1818
-  [1]  ... libensemble.executors.executor (INFO): Task libe_task_forces_worker1_3 finished with errcode 0 (FINISHED)
-  [2]  ... libensemble.executors.executor (INFO): Task libe_task_forces_worker2_3 finished with errcode 0 (FINISHED)
-  [0]  ... libensemble.manager (INFO): Term test tripped: sim_max
-  [0]  ... libensemble.manager (INFO): Term test tripped: sim_max
-  [0]  ... libensemble.libE (INFO): Manager total time: 3.939
-
-Note again that the ten cores were divided equally among two workers.
+.. dropdown:: **Example run / output**
+
+
+   For example, after running:
+
+   .. code-block:: bash
+
+       $ python run_libe_forces.py --comms local --nworkers 3
+
+   my ``libE_stats.txt`` resembled::
+
+     Manager     : Starting ensemble at: 2023-09-12 18:12:08.517
+     Worker     2: sim_id     0: sim Time: 0.205 Start: ... End: ... Status: Completed
+     Worker     3: sim_id     1: sim Time: 0.284 Start: ... End: ... Status: Completed
+     Worker     2: sim_id     2: sim Time: 0.117 Start: ... End: ... Status: Completed
+     Worker     3: sim_id     3: sim Time: 0.294 Start: ... End: ... Status: Completed
+     Worker     2: sim_id     4: sim Time: 0.124 Start: ... End: ... Status: Completed
+     Worker     3: sim_id     5: sim Time: 0.174 Start: ... End: ... Status: Completed
+     Worker     3: sim_id     7: sim Time: 0.135 Start: ... End: ... Status: Completed
+     Worker     2: sim_id     6: sim Time: 0.275 Start: ... End: ... Status: Completed
+     Worker     1: Gen no     1: gen Time: 1.038 Start: ... End: ... Status: Persis gen finished
+     Manager     : Exiting ensemble at: 2023-09-12 18:12:09.565 Time Taken: 1.048
+
+
+   where ``status`` is set based on the simulation function's returned ``calc_status``.
+
+   My ``ensemble.log`` (on a four-core laptop) resembled::
+
+     [0]  ... libensemble.libE (INFO): Logger initializing: [workerID] precedes each line. [0] = Manager
+     [0]  ... libensemble.libE (INFO): libE version v0.10.2+dev
+     [0]  ... libensemble.manager (INFO): Manager initiated on node shuds
+     [0]  ... libensemble.manager (INFO): Manager exit_criteria: {'sim_max': 8}
+     [2]  ... libensemble.worker (INFO): Worker 2 initiated on node shuds
+     [3]  ... libensemble.worker (INFO): Worker 3 initiated on node shuds
+     [1]  ... libensemble.worker (INFO): Worker 1 initiated on node shuds
+     [2]  ... libensemble.executors.mpi_executor (INFO): Launching task libe_task_forces_worker2_0: mpirun -hosts shuds -np 2 --ppn 2 /home/.../forces_app/forces.x 2023 10 2023
+     [3]  ... libensemble.executors.mpi_executor (INFO): Launching task libe_task_forces_worker3_0: mpirun -hosts shuds -np 2 --ppn 2 /home/.../forces_app/forces.x 2900 10 2900
+     [2]  ... libensemble.executors.executor (INFO): Task libe_task_forces_worker2_0 finished with errcode 0 (FINISHED)
+     [3]  ... libensemble.executors.executor (INFO): Task libe_task_forces_worker3_0 finished with errcode 0 (FINISHED)
+     [2]  ... libensemble.executors.mpi_executor (INFO): Launching task libe_task_forces_worker2_1: mpirun -hosts shuds -np 2 --ppn 2 /home/.../forces_app/forces.x 1288 10 1288
+     [3]  ... libensemble.executors.mpi_executor (INFO): Launching task libe_task_forces_worker3_1: mpirun -hosts shuds -np 2 --ppn 2 /home/.../forces_app/forces.x 2897 10 2897
+     [2]  ... libensemble.executors.executor (INFO): Task libe_task_forces_worker2_1 finished with errcode 0 (FINISHED)
+     [3]  ... libensemble.executors.executor (INFO): Task libe_task_forces_worker3_1 finished with errcode 0 (FINISHED)
+     [2]  ... libensemble.executors.mpi_executor (INFO): Launching task libe_task_forces_worker2_2: mpirun -hosts shuds -np 2 --ppn 2 /home/.../forces_app/forces.x 1623 10 1623
+     [3]  ... libensemble.executors.mpi_executor (INFO): Launching task libe_task_forces_worker3_2: mpirun -hosts shuds -np 2 --ppn 2 /home/.../forces_app/forces.x 1846 10 1846
+     [2]  ... libensemble.executors.executor (INFO): Task libe_task_forces_worker2_2 finished with errcode 0 (FINISHED)
+     [3]  ... libensemble.executors.executor (INFO): Task libe_task_forces_worker3_2 finished with errcode 0 (FINISHED)
+     [2]  ... libensemble.executors.mpi_executor (INFO): Launching task libe_task_forces_worker2_3: mpirun -hosts shuds -np 2 --ppn 2 /home/.../forces_app/forces.x 2655 10 2655
+     [3]  ... libensemble.executors.mpi_executor (INFO): Launching task libe_task_forces_worker3_3: mpirun -hosts shuds -np 2 --ppn 2 /home/.../forces_app/forces.x 1818 10 1818
+     [3]  ... libensemble.executors.executor (INFO): Task libe_task_forces_worker3_3 finished with errcode 0 (FINISHED)
+     [2]  ... libensemble.executors.executor (INFO): Task libe_task_forces_worker2_3 finished with errcode 0 (FINISHED)
+     [0]  ... libensemble.manager (INFO): Term test tripped: sim_max
+     [0]  ... libensemble.manager (INFO): Term test tripped: sim_max
+     [0]  ... libensemble.libE (INFO): Manager total time: 1.043
+
+   Note again that the four cores were divided equally among two workers that run simulations.
 
 That concludes this tutorial. Each of these example files can be found in the
 repository in `examples/tutorials/forces_with_executor`_.
@@ -397,7 +410,7 @@ Exercises
 
 These may require additional browsing of the documentation to complete.
 
-  1. Adjust :meth:`submit()<mpi_executor.MPIExecutor.submit>` to launch with four processes.
+  1. Adjust :meth:`submit()<executors.mpi_executor.MPIExecutor.submit>` to launch with four processes.
   2. Adjust ``submit()`` again so the app's ``stdout`` and ``stderr`` are written to ``stdout.txt`` and ``stderr.txt`` respectively.
   3. Add a fourth argument to the args line to make 20% of simulations go bad.
   4. Construct a ``while not task.finished:`` loop that periodically sleeps for a tenth of a second, calls :meth:`task.poll()<executor.Task.poll>`,

From ca41e61864be2c7e6d6300075b7b83a7a88dad26 Mon Sep 17 00:00:00 2001
From: shudson <shudson@anl.gov>
Date: Wed, 13 Sep 2023 13:25:45 -0500
Subject: [PATCH 27/27] out to outputs & other minor fixes

---
 docs/tutorials/executor_forces_tutorial.rst   | 39 +++++++++----------
 .../forces/forces_gpu/run_libe_forces.py      |  4 +-
 .../run_libe_forces.py                        |  4 +-
 .../forces_multi_app/run_libe_forces.py       |  4 +-
 .../forces/forces_simple/run_libe_forces.py   |  4 +-
 5 files changed, 27 insertions(+), 28 deletions(-)

diff --git a/docs/tutorials/executor_forces_tutorial.rst b/docs/tutorials/executor_forces_tutorial.rst
index 2ff30d7e5..a68da6ff0 100644
--- a/docs/tutorials/executor_forces_tutorial.rst
+++ b/docs/tutorials/executor_forces_tutorial.rst
@@ -81,15 +81,14 @@ Registering an application is as easy as providing the full file-path and giving
 it a memorable name. This Executor will later be used within our simulation
 function to launch the registered app.
 
-On line 22, we initialize the ensemble. The :meth:`parse_args()<tools.parse_args>`
-is used to read `comms` and `nworkers` from the command line. This sets
+On line 22, we initialize the ensemble. The :meth:`parse_args<tools.parse_args>`
+parameter is used to read `comms` and `nworkers` from the command line. This sets
 the respective `libE_specs` options.
 
-Next we add basic configuration for the ensemble. As one worker will run a persistent
-generator that will not need additinal computing resources, we calculate the number
-of workers that need resources to run simulations. We also set `sim_dirs_make`
-so that a directory is created for each simulation.  This helps organize output and
-also helps prevents workers from overwriting previous results.
+Next, we add basic configuration for the ensemble. As one worker will run a persistent
+generator, we calculate the number of workers that need resources to run simulations.
+We also set `sim_dirs_make` so that a directory is created for each simulation. This
+helps organize output and also helps prevent workers from overwriting previous results.
 
 .. code-block:: python
   :linenos:
@@ -115,14 +114,14 @@ expect, and also to parameterize user functions:
     ensemble.sim_specs = SimSpecs(
         sim_f=run_forces,
         inputs=["x"],
-        out=[("energy", float)],
+        outputs=[("energy", float)],
     )
 
     ensemble.gen_specs = GenSpecs(
         gen_f=gen_f,
         inputs=[],  # No input when start persistent generator
         persis_in=["sim_id"],  # Return sim_ids of evaluated points to generator
-        out=[("x", float, (1,))],
+        outputs=[("x", float, (1,))],
         user={
             "initial_batch_size": nsim_workers,
             "lb": np.array([1000]),  # min particles
@@ -177,8 +176,8 @@ Write an alternative Calling Script similar to above, but with the following dif
 
  1. Set :ref:`libEnsemble's logger<logger_config>` to print debug messages.
  2. Override the MPIExecutor's detected MPI runner with ``"openmpi"``.
- 3. Tell the allocation function to return results to the generator asychronously.
- 4. Use the ensemble function :meth:`save_output()<ensemble.Ensemble.save_output>` to save the History array and ``persis_info`` to files after libEnsemble completes.
+ 3. Tell the allocation function to return results to the generator asynchronously.
+ 4. Use the ensemble function :meth:`save_output()<libensemble.ensemble.Ensemble.save_output>` to save the History array and ``persis_info`` to files after libEnsemble completes.
 
 .. dropdown:: **Click Here for Solutions**
 
@@ -191,8 +190,7 @@ Write an alternative Calling Script similar to above, but with the following dif
        from libensemble import logger
        logger.set_level("DEBUG")
 
-   **Soln 2.** This can also be specified via :attr:`platform_specs<libensemble.specs.LibeSpecs.platform_specs>` option (see
-   libE_specs options, under Resources).
+   **Soln 2.** This can also be specified via :attr:`platform_specs<libensemble.specs.LibeSpecs.platform_specs>` option.
 
    .. code-block:: python
        :linenos:
@@ -271,7 +269,10 @@ We retrieve the generated number of particles from ``H`` and construct
 an argument string for our launched application. The particle count doubles up
 as a random number seed here.
 
-We then retrieve our previously instantiated Executor.
+We then retrieve our previously instantiated Executor. libEnsemble will use
+the MPI runner detected (or provided by platform options).
+As `num_procs` (or similar) is not specified, libEnsemble will assign the processors
+available to this worker.
 
 After submitting the "forces" app for execution,
 a :ref:`Task<task_tag>` object is returned that correlates with the launched app.
@@ -281,7 +282,7 @@ for the task to complete via ``task.wait()``.
 
 We can assume that afterward, any results are now available to parse. Our application
 produces a ``forces.stat`` file that contains either energy
-computations for every time-step or a "kill" message if particles were lost, which
+computations for every timestep or a "kill" message if particles were lost, which
 indicates a bad run - this can be ignored for now.
 
 To complete our simulation function, parse the last energy value from the output file into
@@ -313,8 +314,7 @@ to ``WORKER_DONE``. Otherwise, send back ``NAN`` and a ``TASK_FAILED`` status:
 ``calc_status`` will be displayed in the ``libE_stats.txt`` log file.
 
 That's it! As can be seen, with libEnsemble, it's relatively easy to get started
-with launching applications. Behind the scenes, libEnsemble evaluates default
-MPI runners and available resources and divides them among the workers.
+with launching applications.
 
 Running the example
 -------------------
@@ -398,9 +398,8 @@ those resources and is developed to coordinate computations at huge scales.
 See :ref:`HPC platform guides<platform-index>` for more information.
 
 See the :doc:`forces_gpu tutorial<forces_gpu_tutorial>` for a similar workflow
-including GPUs.
-
-.. and another,which shows how to dynamically assign resources to each simulation.
+including GPUs. That tutorial also shows how to dynamically assign resources to
+each simulation.
 
 Please feel free to contact us or open an issue on GitHub_ if this tutorial
 workflow doesn't work properly on your cluster or other compute resource.
diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu/run_libe_forces.py b/libensemble/tests/scaling_tests/forces/forces_gpu/run_libe_forces.py
index 2cd7bdebc..909963327 100644
--- a/libensemble/tests/scaling_tests/forces/forces_gpu/run_libe_forces.py
+++ b/libensemble/tests/scaling_tests/forces/forces_gpu/run_libe_forces.py
@@ -53,14 +53,14 @@
     ensemble.sim_specs = SimSpecs(
         sim_f=run_forces,
         inputs=["x"],
-        out=[("energy", float)],
+        outputs=[("energy", float)],
     )
 
     ensemble.gen_specs = GenSpecs(
         gen_f=gen_f,
         inputs=[],  # No input when start persistent generator
         persis_in=["sim_id"],  # Return sim_ids of evaluated points to generator
-        out=[("x", float, (1,))],
+        outputs=[("x", float, (1,))],
         user={
             "initial_batch_size": nsim_workers,
             "lb": np.array([50000]),  # min particles
diff --git a/libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/run_libe_forces.py b/libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/run_libe_forces.py
index 777d06d91..a74f90146 100644
--- a/libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/run_libe_forces.py
+++ b/libensemble/tests/scaling_tests/forces/forces_gpu_var_resources/run_libe_forces.py
@@ -57,14 +57,14 @@
     ensemble.sim_specs = SimSpecs(
         sim_f=run_forces,
         inputs=["x"],
-        out=[("energy", float)],
+        outputs=[("energy", float)],
     )
 
     ensemble.gen_specs = GenSpecs(
         gen_f=gen_f,
         inputs=[],  # No input when start persistent generator
         persis_in=["sim_id"],  # Return sim_ids of evaluated points to generator
-        out=[
+        outputs=[
             ("x", float, (1,)),
             ("num_gpus", int),  # num_gpus auto given to sim when use MPIExecutor.
         ],
diff --git a/libensemble/tests/scaling_tests/forces/forces_multi_app/run_libe_forces.py b/libensemble/tests/scaling_tests/forces/forces_multi_app/run_libe_forces.py
index 2da287986..176c3ad88 100644
--- a/libensemble/tests/scaling_tests/forces/forces_multi_app/run_libe_forces.py
+++ b/libensemble/tests/scaling_tests/forces/forces_multi_app/run_libe_forces.py
@@ -67,14 +67,14 @@
     ensemble.sim_specs = SimSpecs(
         sim_f=run_forces,
         inputs=["x", "app_type"],
-        out=[("energy", float)],
+        outputs=[("energy", float)],
     )
 
     ensemble.gen_specs = GenSpecs(
         gen_f=gen_f,
         inputs=[],  # No input when start persistent generator
         persis_in=["sim_id"],  # Return sim_ids of evaluated points to generator
-        out=[
+        outputs=[
             ("x", float, (1,)),
             ("num_procs", int),  # num_procs auto given to sim when use MPIExecutor
             ("num_gpus", int),  # num_gpus auto given to sim when use MPIExecutor
diff --git a/libensemble/tests/scaling_tests/forces/forces_simple/run_libe_forces.py b/libensemble/tests/scaling_tests/forces/forces_simple/run_libe_forces.py
index ccfe7dff7..066c5e050 100644
--- a/libensemble/tests/scaling_tests/forces/forces_simple/run_libe_forces.py
+++ b/libensemble/tests/scaling_tests/forces/forces_simple/run_libe_forces.py
@@ -37,14 +37,14 @@
     ensemble.sim_specs = SimSpecs(
         sim_f=run_forces,
         inputs=["x"],
-        out=[("energy", float)],
+        outputs=[("energy", float)],
     )
 
     ensemble.gen_specs = GenSpecs(
         gen_f=gen_f,
         inputs=[],  # No input when start persistent generator
         persis_in=["sim_id"],  # Return sim_ids of evaluated points to generator
-        out=[("x", float, (1,))],
+        outputs=[("x", float, (1,))],
         user={
             "initial_batch_size": nsim_workers,
             "lb": np.array([1000]),  # min particles