-
Notifications
You must be signed in to change notification settings - Fork 26
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ASK/TELL DEVELOP #1307
base: develop
Are you sure you want to change the base?
ASK/TELL DEVELOP #1307
Changes from 250 commits
263dce9
215403e
5ba5457
14c4a38
8e482d4
d92d7a5
904ca39
6ef8768
7f7c4b3
858c5ad
02e60c4
94ad94b
1ed9022
57db8c6
ce79b2a
c23476f
9353e00
7f1ef57
ac5ecb4
8a345f0
273ea96
f54dbc6
3d97790
9932e0a
aeb28db
bba59bb
41d429c
0953eb4
86000e8
9591365
e225e6c
960dd3f
39c3ab2
38721f1
0e7d6e2
4b2da4a
cfdc077
41c4772
f4f8d95
4998094
fa87f59
441cf06
34e9f4a
db36ab8
cfe217a
e999c10
c285920
687ea85
0772deb
91033be
86e268d
2e3253a
f0451f7
f0769f9
bc458da
714c177
7df3447
c791acd
d4fb064
bfd25af
50a3789
7581cc0
f3b02d0
0851cad
4557376
1443cee
815b602
4497ec4
e83d75a
0fda5db
0750fec
cf06598
fe6eedd
bc1bf6a
55011d7
3363e4a
25b4d4d
1ca123e
838057b
2638d33
c3c19e1
4d48ab9
9f7d485
30bf2d2
0c3307b
009f324
68c9b7c
06995d0
9e6c63c
a698385
9da1ab8
c2c7feb
9691602
d5eaddb
7bad6aa
bd996e2
7d0bcf8
136c046
c930cde
2697af9
a3c09a2
4444a71
4489d42
c9c4671
601af44
d454b5c
92e22e4
d14f4d2
e27487d
a6feb77
12a133b
de1916a
ee2508e
070fc6f
a969f50
6eb5fe8
1261274
d960b96
3ce0ca2
09cb4a6
601f02c
6733fe5
7466100
751de5e
18e7079
a34d589
5f33724
41c16b7
4860428
ced8992
cbfdf0b
4261ca8
460bbe3
7fdd8a6
69b0584
8c01ca9
4541d8a
345aea3
fe7629e
0d7e1a3
c7d1cb1
94de46f
80df25f
b5d8bcf
f52bf92
5434dfa
0ab048d
7a9a2d8
a68ffb8
5228711
1ef5898
8371d97
092be69
3ebc467
1159e74
138c89e
2922259
c2a2802
5a2eb09
10accde
aa8ad57
6712f1e
1eec392
c4418fb
2b8e537
70dde7b
484304b
b0897d0
cdcb2d8
57bbfb1
dbd19fd
994b652
64c2cd1
3104240
3d262fb
847a617
f45ddbe
26f1d73
10e96d8
0290deb
e01e87b
a1eb450
23b1549
1b4c2c6
5f777c2
a165cdd
ac5467b
85507a4
fc30284
98267e3
74579d5
63ef323
1b1cd59
dcb3486
6828fe0
e443af9
a1937a9
dedef4c
4b812d6
f9e3cba
14c36fa
25299e7
f0736fb
7fa4d1e
14daf3c
114c7a4
507bc0a
18a52c9
231e6f0
eaebbff
043feeb
c66f10b
3d7981b
c380595
fdcfd66
1e0abd3
c7ea54b
c111afd
0ee448c
bb37f4b
38b3967
4b49233
1d213ef
f8c5eaf
dff6bad
c1ec7f6
a5133b9
682daa8
09ebdbc
9f200f0
cf5ac63
f2ef248
2c6a9c4
7224de3
99a7a2c
e8b7052
23e5164
06c14d7
bc1587e
5c2308d
0d146fc
ef906d5
9d07e6c
902b7f0
64b6401
ab09b9f
d66dafb
f4a9691
6fb608e
f926bfa
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
|
||
Ask/Tell Generators | ||
=================== | ||
|
||
**BETA - SUBJECT TO CHANGE** | ||
|
||
These generators, implementations, methods, and subclasses are in BETA, and | ||
may change in future releases. | ||
|
||
The Generator interface is expected to roughly correspond with CAMPA's standard: | ||
https://github.com/campa-consortium/generator_standard | ||
|
||
libEnsemble is in the process of supporting generator objects that implement the following interface: | ||
|
||
.. automodule:: generators | ||
:members: Generator LibensembleGenerator | ||
:undoc-members: | ||
|
||
.. autoclass:: Generator | ||
:member-order: bysource | ||
:members: |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,3 +12,4 @@ | |
from libensemble import logger | ||
|
||
from .ensemble import Ensemble | ||
from .generators import Generator |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
from .aposmm import APOSMM # noqa: F401 | ||
from .sampling import UniformSample, UniformSampleDicts # noqa: F401 | ||
from .surmise import Surmise # noqa: F401 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
import copy | ||
from typing import List | ||
|
||
import numpy as np | ||
from numpy import typing as npt | ||
|
||
from libensemble.generators import LibensembleGenThreadInterfacer | ||
from libensemble.message_numbers import EVAL_GEN_TAG, PERSIS_STOP | ||
from libensemble.tools import add_unique_random_streams | ||
|
||
|
||
class APOSMM(LibensembleGenThreadInterfacer): | ||
""" | ||
Standalone object-oriented APOSMM generator | ||
""" | ||
|
||
def __init__( | ||
self, History: npt.NDArray = [], persis_info: dict = {}, gen_specs: dict = {}, libE_info: dict = {}, **kwargs | ||
) -> None: | ||
from libensemble.gen_funcs.persistent_aposmm import aposmm | ||
|
||
gen_specs["gen_f"] = aposmm | ||
if not gen_specs.get("out"): # gen_specs never especially changes for aposmm even as the problem varies | ||
n = len(kwargs["lb"]) or len(kwargs["ub"]) | ||
gen_specs["out"] = [ | ||
("x", float, n), | ||
("x_on_cube", float, n), | ||
("sim_id", int), | ||
("local_min", bool), | ||
("local_pt", bool), | ||
] | ||
gen_specs["persis_in"] = ["x", "f", "local_pt", "sim_id", "sim_ended", "x_on_cube", "local_min"] | ||
if not persis_info: | ||
persis_info = add_unique_random_streams({}, 2, seed=4321)[1] | ||
super().__init__(History, persis_info, gen_specs, libE_info, **kwargs) | ||
if not self.persis_info.get("nworkers"): | ||
self.persis_info["nworkers"] = gen_specs["user"]["max_active_runs"] # ?????????? | ||
self.all_local_minima = [] | ||
self._ask_idx = 0 | ||
self._last_ask = None | ||
self._tell_buf = None | ||
self._n_buffd_results = 0 | ||
self._n_total_results = 0 | ||
self._told_initial_sample = False | ||
|
||
def _slot_in_data(self, results): | ||
"""Slot in libE_calc_in and trial data into corresponding array fields. *Initial sample only!!*""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is what you want: |
||
self._tell_buf["f"][self._n_buffd_results] = results["f"] | ||
self._tell_buf["x"][self._n_buffd_results] = results["x"] | ||
self._tell_buf["sim_id"][self._n_buffd_results] = results["sim_id"] | ||
self._tell_buf["x_on_cube"][self._n_buffd_results] = results["x_on_cube"] | ||
self._tell_buf["local_pt"][self._n_buffd_results] = results["local_pt"] | ||
|
||
@property | ||
def _array_size(self): | ||
"""Output array size must match either initial sample or N points to evaluate in parallel.""" | ||
user = self.gen_specs["user"] | ||
return user["initial_sample_size"] if not self._told_initial_sample else user["max_active_runs"] | ||
|
||
@property | ||
def _enough_initial_sample(self): | ||
"""We're typically happy with at least 90% of the initial sample, or we've already told the initial sample""" | ||
return ( | ||
self._n_buffd_results >= self.gen_specs["user"]["initial_sample_size"] - 10 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this line looks very dubious. Why is 10 a hardcoded value? |
||
) or self._told_initial_sample | ||
|
||
def ask_numpy(self, num_points: int = 0) -> npt.NDArray: | ||
"""Request the next set of points to evaluate, as a NumPy array.""" | ||
if (self._last_ask is None) or ( | ||
self._ask_idx >= len(self._last_ask) | ||
): # haven't been asked yet, or all previously enqueued points have been "asked" | ||
self._ask_idx = 0 | ||
self._last_ask = super().ask_numpy(num_points) | ||
if self._last_ask[ | ||
"local_min" | ||
].any(): # filter out local minima rows, but they're cached in self.all_local_minima | ||
min_idxs = self._last_ask["local_min"] | ||
self.all_local_minima.append(self._last_ask[min_idxs]) | ||
self._last_ask = self._last_ask[~min_idxs] | ||
if num_points > 0: # we've been asked for a selection of the last ask | ||
results = np.copy( | ||
self._last_ask[self._ask_idx : self._ask_idx + num_points] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When we are in a situation (perhaps in error) where APOSMM does not give us enough points (e.g. the current ask is for 6, but APOSMM has only supplied one) then this gets filled up with empty points. This should be handled somehow. If it should never arise, then as an error condition. |
||
) # if resetting _last_ask later, results may point to "None" | ||
self._ask_idx += num_points | ||
return results | ||
results = np.copy(self._last_ask) | ||
self.results = results | ||
self._last_ask = None | ||
return results | ||
|
||
def tell_numpy(self, results: npt.NDArray, tag: int = EVAL_GEN_TAG) -> None: | ||
if (results is None and tag == PERSIS_STOP) or len( | ||
results | ||
) == self._array_size: # told to stop, by final_tell or libE | ||
self._told_initial_sample = True # we definitely got an initial sample already if one matches | ||
super().tell_numpy(results, tag) | ||
return | ||
|
||
if ( | ||
self._n_buffd_results == 0 # ONLY NEED TO BUFFER RESULTS FOR INITIAL SAMPLE???? | ||
): # Optimas prefers to give back chunks of initial_sample. So we buffer them | ||
self._tell_buf = np.zeros(self._array_size, dtype=self.gen_specs["out"] + [("f", float)]) | ||
|
||
if not self._enough_initial_sample: | ||
self._slot_in_data(np.copy(results)) | ||
self._n_buffd_results += len(results) | ||
self._n_total_results += len(results) | ||
|
||
if not self._told_initial_sample and self._enough_initial_sample: | ||
self._tell_buf = self._tell_buf[self._tell_buf["sim_id"] != 0] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why getting rid of sim_id 0? |
||
super().tell_numpy(self._tell_buf, tag) | ||
self._told_initial_sample = True | ||
self._n_buffd_results = 0 | ||
|
||
elif self._told_initial_sample: # probably libE: given back smaller selection. but from alloc, so its ok? | ||
super().tell_numpy(results, tag) | ||
self._n_buffd_results = 0 # dont want to send the same point more than once. slotted in earlier | ||
|
||
def ask_updates(self) -> List[npt.NDArray]: | ||
"""Request a list of NumPy arrays containing entries that have been identified as minima.""" | ||
minima = copy.deepcopy(self.all_local_minima) | ||
self.all_local_minima = [] | ||
return minima |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
"""Generator class exposing gpCAM functionality""" | ||
|
||
import time | ||
from typing import List | ||
|
||
import numpy as np | ||
from gpcam import GPOptimizer as GP | ||
from numpy import typing as npt | ||
|
||
# While there are class / func duplicates - re-use functions. | ||
from libensemble.gen_funcs.persistent_gpCAM import ( | ||
_calculate_grid_distances, | ||
_eval_var, | ||
_find_eligible_points, | ||
_generate_mesh, | ||
_read_testpoints, | ||
) | ||
from libensemble.generators import LibensembleGenerator | ||
|
||
__all__ = [ | ||
"GP_CAM", | ||
"GP_CAM_Covar", | ||
] | ||
|
||
|
||
# Note - batch size is set in wrapper currently - and passed to ask as n_trials. | ||
# To support empty ask(), add batch_size back in here. | ||
|
||
|
||
# Equivalent to function persistent_gpCAM_ask_tell | ||
class GP_CAM(LibensembleGenerator): | ||
""" | ||
This generation function constructs a global surrogate of `f` values. | ||
|
||
It is a batched method that produces a first batch uniformly random from | ||
(lb, ub). On subequent iterations, it calls an optimization method to | ||
produce the next batch of points. This optimization might be too slow | ||
(relative to the simulation evaluation time) for some use cases. | ||
""" | ||
|
||
def _initialize_gpCAM(self, user_specs): | ||
"""Extract user params""" | ||
# self.b = user_specs["batch_size"] | ||
self.lb = np.array(user_specs["lb"]) | ||
self.ub = np.array(user_specs["ub"]) | ||
self.n = len(self.lb) # dimension | ||
assert isinstance(self.n, int), "Dimension must be an integer" | ||
assert isinstance(self.lb, np.ndarray), "lb must be a numpy array" | ||
assert isinstance(self.ub, np.ndarray), "ub must be a numpy array" | ||
self.all_x = np.empty((0, self.n)) | ||
self.all_y = np.empty((0, 1)) | ||
np.random.seed(0) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need to decide There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fair enough. My opinion/intuition is a user is more likely to prefer either "classical" gens (e.g. Jeff) or ask/tell gens (e.g. other CAMPA folks). With these gens' interfaces and users being so different, I don't think an arguably simpler rearrangement of the input parameters is too confusing. Similarly to how some people prefer numpy or pandas; they do similar things, but their interfaces being different isn't a point of contention. I'd also lean towards if someone were to initialize some object, like a gen, themselves, they'd prefer their specifications be provided as early and clearly as possible:
vs.
|
||
def __init__(self, H, persis_info, gen_specs, libE_info=None): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will put above _initialize_gpcAM There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and make _initialize_gpcAM _initialize_gpCAM |
||
self.H = H # Currently not used - could be used for an H0 | ||
self.persis_info = persis_info | ||
self.gen_specs = gen_specs | ||
self.libE_info = libE_info | ||
|
||
self.U = self.gen_specs["user"] | ||
self._initialize_gpCAM(self.U) | ||
self.rng = self.persis_info["rand_stream"] | ||
|
||
self.my_gp = None | ||
self.noise = 1e-8 # 1e-12 | ||
self.ask_max_iter = self.gen_specs["user"].get("ask_max_iter") or 10 | ||
|
||
def ask_numpy(self, n_trials: int) -> npt.NDArray: | ||
if self.all_x.shape[0] == 0: | ||
self.x_new = self.rng.uniform(self.lb, self.ub, (n_trials, self.n)) | ||
else: | ||
start = time.time() | ||
self.x_new = self.my_gp.ask( | ||
input_set=np.column_stack((self.lb, self.ub)), | ||
n=n_trials, | ||
pop_size=n_trials, | ||
acquisition_function="total correlation", | ||
max_iter=self.ask_max_iter, # Larger takes longer. gpCAM default is 20. | ||
)["x"] | ||
print(f"Ask time:{time.time() - start}") | ||
H_o = np.zeros(n_trials, dtype=self.gen_specs["out"]) | ||
H_o["x"] = self.x_new | ||
return H_o | ||
|
||
def tell_numpy(self, calc_in: npt.NDArray) -> None: | ||
if calc_in is not None: | ||
self.y_new = np.atleast_2d(calc_in["f"]).T | ||
nan_indices = [i for i, fval in enumerate(self.y_new) if np.isnan(fval[0])] | ||
self.x_new = np.delete(self.x_new, nan_indices, axis=0) | ||
self.y_new = np.delete(self.y_new, nan_indices, axis=0) | ||
|
||
self.all_x = np.vstack((self.all_x, self.x_new)) | ||
self.all_y = np.vstack((self.all_y, self.y_new)) | ||
|
||
noise_var = self.noise * np.ones(len(self.all_y)) | ||
if self.my_gp is None: | ||
self.my_gp = GP(self.all_x, self.all_y.flatten(), noise_variances=noise_var) | ||
else: | ||
self.my_gp.tell(self.all_x, self.all_y.flatten(), noise_variances=noise_var) | ||
self.my_gp.train() | ||
|
||
|
||
class GP_CAM_Covar(GP_CAM): | ||
""" | ||
This generation function constructs a global surrogate of `f` values. | ||
|
||
It is a batched method that produces a first batch uniformly random from | ||
(lb, ub) and on following iterations samples the GP posterior covariance | ||
function to find sample points. | ||
""" | ||
|
||
def __init__(self, H, persis_info, gen_specs, libE_info=None): | ||
super().__init__(H, persis_info, gen_specs, libE_info) | ||
self.test_points = _read_testpoints(self.U) | ||
self.x_for_var = None | ||
self.var_vals = None | ||
if self.U.get("use_grid"): | ||
self.num_points = 10 | ||
self.x_for_var = _generate_mesh(self.lb, self.ub, self.num_points) | ||
self.r_low_init, self.r_high_init = _calculate_grid_distances(self.lb, self.ub, self.num_points) | ||
|
||
def ask_numpy(self, n_trials: int) -> List[dict]: | ||
if self.all_x.shape[0] == 0: | ||
x_new = self.rng.uniform(self.lb, self.ub, (n_trials, self.n)) | ||
else: | ||
if not self.U.get("use_grid"): | ||
x_new = self.x_for_var[np.argsort(self.var_vals)[-n_trials:]] | ||
else: | ||
r_high = self.r_high_init | ||
r_low = self.r_low_init | ||
x_new = [] | ||
r_cand = r_high # Let's start with a large radius and stop when we have batchsize points | ||
|
||
sorted_indices = np.argsort(-self.var_vals) | ||
while len(x_new) < n_trials: | ||
x_new = _find_eligible_points(self.x_for_var, sorted_indices, r_cand, n_trials) | ||
if len(x_new) < n_trials: | ||
r_high = r_cand | ||
r_cand = (r_high + r_low) / 2.0 | ||
|
||
self.x_new = x_new | ||
H_o = np.zeros(n_trials, dtype=self.gen_specs["out"]) | ||
H_o["x"] = self.x_new | ||
return H_o | ||
|
||
def tell_numpy(self, calc_in: npt.NDArray): | ||
if calc_in is not None: | ||
super().tell_numpy(calc_in) | ||
if not self.U.get("use_grid"): | ||
n_trials = len(self.y_new) | ||
self.x_for_var = self.rng.uniform(self.lb, self.ub, (10 * n_trials, self.n)) | ||
|
||
self.var_vals = _eval_var( | ||
self.my_gp, self.all_x, self.all_y, self.x_for_var, self.test_points, self.persis_info | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is not nworkers. It could be less than nworkers.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Right. I'll find a better way to specify nworkers to aposmm
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If its not used until the main loop, can it be captured, or atleast checked later?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We also need to review what is happening with gen_specs and esp. gen_specs['out'], which is not SSoT.