Libensemble · jlnav · Apr 12, 2024 · Apr 12, 2024 · Apr 12, 2024 · Apr 12, 2024
diff --git a/.codecov.yml b/.codecov.yml
@@ -5,3 +5,4 @@ ignore:
   - "libensemble/sim_funcs/executor_hworld.py"
   - "libensemble/gen_funcs/persistent_ax_multitask.py"
   - "libensemble/gen_funcs/persistent_gpCAM.py"
+  - "libensemble/gen_classes/gpCAM.py"
diff --git a/docs/function_guides/ask_tell_generator.rst b/docs/function_guides/ask_tell_generator.rst
@@ -0,0 +1,21 @@
+
+Ask/Tell Generators
+===================
+
+**BETA - SUBJECT TO CHANGE**
+
+These generators, implementations, methods, and subclasses are in BETA, and
+may change in future releases.
+
+The Generator interface is expected to roughly correspond with CAMPA's standard:
+https://github.com/campa-consortium/generator_standard
+
+libEnsemble is in the process of supporting generator objects that implement the following interface:
+
+.. automodule:: generators
+  :members: Generator LibensembleGenerator
+  :undoc-members:
+
+.. autoclass:: Generator
+  :member-order: bysource
+  :members:
diff --git a/docs/function_guides/function_guide_index.rst b/docs/function_guides/function_guide_index.rst
@@ -13,6 +13,7 @@ These guides describe common development patterns and optional components:
    :caption: Writing User Functions
 
    generator
+   ask_tell_generator
    simulator
    allocator
    sim_gen_alloc_api

diff --git a/libensemble/__init__.py b/libensemble/__init__.py
@@ -12,3 +12,4 @@
 from libensemble import logger
 
 from .ensemble import Ensemble
+from .generators import Generator
diff --git a/libensemble/executors/mpi_runner.py b/libensemble/executors/mpi_runner.py
@@ -21,7 +21,7 @@ def get_runner(mpi_runner_type, runner_name=None, platform_info=None):
             "msmpi": MSMPI_MPIRunner,
             "custom": MPIRunner,
         }
-        mpi_runner = mpi_runners[mpi_runner_type]
+        mpi_runner = mpi_runners.get(mpi_runner_type, MPIRunner)
         if runner_name is not None:
             runner = mpi_runner(run_command=runner_name, platform_info=platform_info)
         else:

diff --git a/libensemble/gen_classes/__init__.py b/libensemble/gen_classes/__init__.py
@@ -0,0 +1,3 @@
+from .aposmm import APOSMM  # noqa: F401
+from .sampling import UniformSample, UniformSampleDicts  # noqa: F401
+from .surmise import Surmise  # noqa: F401
diff --git a/libensemble/gen_classes/aposmm.py b/libensemble/gen_classes/aposmm.py
@@ -0,0 +1,123 @@
+import copy
+from typing import List
+
+import numpy as np
+from numpy import typing as npt
+
+from libensemble.generators import LibensembleGenThreadInterfacer
+from libensemble.message_numbers import EVAL_GEN_TAG, PERSIS_STOP
+from libensemble.tools import add_unique_random_streams
+
+
+class APOSMM(LibensembleGenThreadInterfacer):
+    """
+    Standalone object-oriented APOSMM generator
+    """
+
+    def __init__(
+        self, History: npt.NDArray = [], persis_info: dict = {}, gen_specs: dict = {}, libE_info: dict = {}, **kwargs
+    ) -> None:
+        from libensemble.gen_funcs.persistent_aposmm import aposmm
+
+        gen_specs["gen_f"] = aposmm
+        if not gen_specs.get("out"):  # gen_specs never especially changes for aposmm even as the problem varies
+            n = len(kwargs["lb"]) or len(kwargs["ub"])
+            gen_specs["out"] = [
+                ("x", float, n),
+                ("x_on_cube", float, n),
+                ("sim_id", int),
+                ("local_min", bool),
+                ("local_pt", bool),
+            ]
+            gen_specs["persis_in"] = ["x", "f", "local_pt", "sim_id", "sim_ended", "x_on_cube", "local_min"]
+        if not persis_info:
+            persis_info = add_unique_random_streams({}, 2, seed=4321)[1]
+        super().__init__(History, persis_info, gen_specs, libE_info, **kwargs)
+        if not self.persis_info.get("nworkers"):
+            self.persis_info["nworkers"] = gen_specs["user"]["max_active_runs"]  # ??????????
+        self.all_local_minima = []
+        self._ask_idx = 0
+        self._last_ask = None
+        self._tell_buf = None
+        self._n_buffd_results = 0
+        self._n_total_results = 0
+        self._told_initial_sample = False
+
+    def _slot_in_data(self, results):
+        """Slot in libE_calc_in and trial data into corresponding array fields. *Initial sample only!!*"""
+        self._tell_buf["f"][self._n_buffd_results] = results["f"]
+        self._tell_buf["x"][self._n_buffd_results] = results["x"]
+        self._tell_buf["sim_id"][self._n_buffd_results] = results["sim_id"]
+        self._tell_buf["x_on_cube"][self._n_buffd_results] = results["x_on_cube"]
+        self._tell_buf["local_pt"][self._n_buffd_results] = results["local_pt"]
+
+    @property
+    def _array_size(self):
+        """Output array size must match either initial sample or N points to evaluate in parallel."""
+        user = self.gen_specs["user"]
+        return user["initial_sample_size"] if not self._told_initial_sample else user["max_active_runs"]
+
+    @property
+    def _enough_initial_sample(self):
+        """We're typically happy with at least 90% of the initial sample, or we've already told the initial sample"""
+        return (
+            self._n_buffd_results >= self.gen_specs["user"]["initial_sample_size"] - 10
+        ) or self._told_initial_sample
+
+    def ask_numpy(self, num_points: int = 0) -> npt.NDArray:
+        """Request the next set of points to evaluate, as a NumPy array."""
+        if (self._last_ask is None) or (
+            self._ask_idx >= len(self._last_ask)
+        ):  # haven't been asked yet, or all previously enqueued points have been "asked"
+            self._ask_idx = 0
+            self._last_ask = super().ask_numpy(num_points)
+            if self._last_ask[
+                "local_min"
+            ].any():  # filter out local minima rows, but they're cached in self.all_local_minima
+                min_idxs = self._last_ask["local_min"]
+                self.all_local_minima.append(self._last_ask[min_idxs])
+                self._last_ask = self._last_ask[~min_idxs]
+        if num_points > 0:  # we've been asked for a selection of the last ask
+            results = np.copy(
+                self._last_ask[self._ask_idx : self._ask_idx + num_points]
+            )  # if resetting _last_ask later, results may point to "None"
+            self._ask_idx += num_points
+            return results
+        results = np.copy(self._last_ask)
+        self.results = results
+        self._last_ask = None
+        return results
+
+    def tell_numpy(self, results: npt.NDArray, tag: int = EVAL_GEN_TAG) -> None:
+        if (results is None and tag == PERSIS_STOP) or len(
+            results
+        ) == self._array_size:  # told to stop, by final_tell or libE
+            self._told_initial_sample = True  # we definitely got an initial sample already if one matches
+            super().tell_numpy(results, tag)
+            return
+
+        if (
+            self._n_buffd_results == 0  # ONLY NEED TO BUFFER RESULTS FOR INITIAL SAMPLE????
+        ):  # Optimas prefers to give back chunks of initial_sample. So we buffer them
+            self._tell_buf = np.zeros(self._array_size, dtype=self.gen_specs["out"] + [("f", float)])
+
+        if not self._enough_initial_sample:
+            self._slot_in_data(np.copy(results))
+            self._n_buffd_results += len(results)
+        self._n_total_results += len(results)
+
+        if not self._told_initial_sample and self._enough_initial_sample:
+            self._tell_buf = self._tell_buf[self._tell_buf["sim_id"] != 0]
+            super().tell_numpy(self._tell_buf, tag)
+            self._told_initial_sample = True
+            self._n_buffd_results = 0
+
+        elif self._told_initial_sample:  # probably libE: given back smaller selection. but from alloc, so its ok?
+            super().tell_numpy(results, tag)
+            self._n_buffd_results = 0  # dont want to send the same point more than once. slotted in earlier
+
+    def ask_updates(self) -> List[npt.NDArray]:
+        """Request a list of NumPy arrays containing entries that have been identified as minima."""
+        minima = copy.deepcopy(self.all_local_minima)
+        self.all_local_minima = []
+        return minima
diff --git a/libensemble/gen_classes/gpCAM.py b/libensemble/gen_classes/gpCAM.py
@@ -0,0 +1,155 @@
+"""Generator class exposing gpCAM functionality"""
+
+import time
+from typing import List
+
+import numpy as np
+from gpcam import GPOptimizer as GP
+from numpy import typing as npt
+
+# While there are class / func duplicates - re-use functions.
+from libensemble.gen_funcs.persistent_gpCAM import (
+    _calculate_grid_distances,
+    _eval_var,
+    _find_eligible_points,
+    _generate_mesh,
+    _read_testpoints,
+)
+from libensemble.generators import LibensembleGenerator
+
+__all__ = [
+    "GP_CAM",
+    "GP_CAM_Covar",
+]
+
+
+# Note - batch size is set in wrapper currently - and passed to ask as n_trials.
+# To support empty ask(), add batch_size back in here.
+
+
+# Equivalent to function persistent_gpCAM_ask_tell
+class GP_CAM(LibensembleGenerator):
+    """
+    This generation function constructs a global surrogate of `f` values.
+
+    It is a batched method that produces a first batch uniformly random from
+    (lb, ub). On subequent iterations, it calls an optimization method to
+    produce the next batch of points. This optimization might be too slow
+    (relative to the simulation evaluation time) for some use cases.
+    """
+
+    def _initialize_gpCAM(self, user_specs):
+        """Extract user params"""
+        # self.b = user_specs["batch_size"]
+        self.lb = np.array(user_specs["lb"])
+        self.ub = np.array(user_specs["ub"])
+        self.n = len(self.lb)  # dimension
+        assert isinstance(self.n, int), "Dimension must be an integer"
+        assert isinstance(self.lb, np.ndarray), "lb must be a numpy array"
+        assert isinstance(self.ub, np.ndarray), "ub must be a numpy array"
+        self.all_x = np.empty((0, self.n))
+        self.all_y = np.empty((0, 1))
+        np.random.seed(0)
+
+    def __init__(self, H, persis_info, gen_specs, libE_info=None):
+        self.H = H  # Currently not used - could be used for an H0
+        self.persis_info = persis_info
+        self.gen_specs = gen_specs
+        self.libE_info = libE_info
+
+        self.U = self.gen_specs["user"]
+        self._initialize_gpCAM(self.U)
+        self.rng = self.persis_info["rand_stream"]
+
+        self.my_gp = None
+        self.noise = 1e-8  # 1e-12
+        self.ask_max_iter = self.gen_specs["user"].get("ask_max_iter") or 10
+
+    def ask_numpy(self, n_trials: int) -> npt.NDArray:
+        if self.all_x.shape[0] == 0:
+            self.x_new = self.rng.uniform(self.lb, self.ub, (n_trials, self.n))
+        else:
+            start = time.time()
+            self.x_new = self.my_gp.ask(
+                input_set=np.column_stack((self.lb, self.ub)),
+                n=n_trials,
+                pop_size=n_trials,
+                acquisition_function="total correlation",
+                max_iter=self.ask_max_iter,  # Larger takes longer. gpCAM default is 20.
+            )["x"]
+            print(f"Ask time:{time.time() - start}")
+        H_o = np.zeros(n_trials, dtype=self.gen_specs["out"])
+        H_o["x"] = self.x_new
+        return H_o
+
+    def tell_numpy(self, calc_in: npt.NDArray) -> None:
+        if calc_in is not None:
+            self.y_new = np.atleast_2d(calc_in["f"]).T
+            nan_indices = [i for i, fval in enumerate(self.y_new) if np.isnan(fval[0])]
+            self.x_new = np.delete(self.x_new, nan_indices, axis=0)
+            self.y_new = np.delete(self.y_new, nan_indices, axis=0)
+
+            self.all_x = np.vstack((self.all_x, self.x_new))
+            self.all_y = np.vstack((self.all_y, self.y_new))
+
+            noise_var = self.noise * np.ones(len(self.all_y))
+            if self.my_gp is None:
+                self.my_gp = GP(self.all_x, self.all_y.flatten(), noise_variances=noise_var)
+            else:
+                self.my_gp.tell(self.all_x, self.all_y.flatten(), noise_variances=noise_var)
+            self.my_gp.train()
+
+
+class GP_CAM_Covar(GP_CAM):
+    """
+    This generation function constructs a global surrogate of `f` values.
+
+    It is a batched method that produces a first batch uniformly random from
+    (lb, ub) and on following iterations samples the GP posterior covariance
+    function to find sample points.
+    """
+
+    def __init__(self, H, persis_info, gen_specs, libE_info=None):
+        super().__init__(H, persis_info, gen_specs, libE_info)
+        self.test_points = _read_testpoints(self.U)
+        self.x_for_var = None
+        self.var_vals = None
+        if self.U.get("use_grid"):
+            self.num_points = 10
+            self.x_for_var = _generate_mesh(self.lb, self.ub, self.num_points)
+            self.r_low_init, self.r_high_init = _calculate_grid_distances(self.lb, self.ub, self.num_points)
+
+    def ask_numpy(self, n_trials: int) -> List[dict]:
+        if self.all_x.shape[0] == 0:
+            x_new = self.rng.uniform(self.lb, self.ub, (n_trials, self.n))
+        else:
+            if not self.U.get("use_grid"):
+                x_new = self.x_for_var[np.argsort(self.var_vals)[-n_trials:]]
+            else:
+                r_high = self.r_high_init
+                r_low = self.r_low_init
+                x_new = []
+                r_cand = r_high  # Let's start with a large radius and stop when we have batchsize points
+
+                sorted_indices = np.argsort(-self.var_vals)
+                while len(x_new) < n_trials:
+                    x_new = _find_eligible_points(self.x_for_var, sorted_indices, r_cand, n_trials)
+                    if len(x_new) < n_trials:
+                        r_high = r_cand
+                    r_cand = (r_high + r_low) / 2.0
+
+        self.x_new = x_new
+        H_o = np.zeros(n_trials, dtype=self.gen_specs["out"])
+        H_o["x"] = self.x_new
+        return H_o
+
+    def tell_numpy(self, calc_in: npt.NDArray):
+        if calc_in is not None:
+            super().tell_numpy(calc_in)
+            if not self.U.get("use_grid"):
+                n_trials = len(self.y_new)
+                self.x_for_var = self.rng.uniform(self.lb, self.ub, (10 * n_trials, self.n))
+
+            self.var_vals = _eval_var(
+                self.my_gp, self.all_x, self.all_y, self.x_for_var, self.test_points, self.persis_info
+            )
Original file line number	Diff line number	Diff line change
Expand Up		@@ -12,3 +12,4 @@
		from libensemble import logger

		from .ensemble import Ensemble
		from .generators import Generator