From 0b6e11d1b339887940710d2374a7d356b635ddee Mon Sep 17 00:00:00 2001 From: Robert King Date: Thu, 19 Oct 2023 13:47:11 -0700 Subject: [PATCH] v0.0.20 update emulator --- pyproject.toml | 2 +- src/history_matching/emulator/__init__.py | 1 + src/history_matching/emulator/emulator.py | 76 ++++++++++++++++++++ src/history_matching/emulator/gp_emulator.py | 3 +- 4 files changed, 80 insertions(+), 2 deletions(-) create mode 100644 src/history_matching/emulator/emulator.py diff --git a/pyproject.toml b/pyproject.toml index 574d0e6..18f2bb8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ requires = ["setuptools>=61.0","netcdf4>=1.6.2","xarray>=2023.2.0","joblib>=1.2. build-backend = "setuptools.build_meta" [project] name = "history_matching" -version = "0.0.15" +version = "0.0.20" authors = [ { name="Robert King", email="robcking@stanford.edu" }, ] diff --git a/src/history_matching/emulator/__init__.py b/src/history_matching/emulator/__init__.py index 572d378..c48b26a 100644 --- a/src/history_matching/emulator/__init__.py +++ b/src/history_matching/emulator/__init__.py @@ -1,3 +1,4 @@ +from .emulator import Emulator from .gp_emulator import GPEmulator from .implausibility import ( chisquaredtest, diff --git a/src/history_matching/emulator/emulator.py b/src/history_matching/emulator/emulator.py new file mode 100644 index 0000000..83da3bc --- /dev/null +++ b/src/history_matching/emulator/emulator.py @@ -0,0 +1,76 @@ +import numpy as np +import xarray as xr +from sklearn.base import BaseEstimator +from sklearn.gaussian_process import GaussianProcessRegressor +from sklearn.gaussian_process.kernels import RBF, ConstantKernel, WhiteKernel +from sklearn.preprocessing import MinMaxScaler + +from ..samples import SampleSpace + + +class Emulator(BaseEstimator): + def __init__( + self, n_features=2, random_state=None, kernel=None, n_restarts_optimizer=0 + ) -> None: + self.random_state = random_state + + self.n_features = n_features + if kernel is None: + self.kernel = ( + ConstantKernel() * RBF(length_scale=np.ones((self.n_features,))) + + WhiteKernel() + ) + else: + self.kernel = kernel + self.scaler_x = MinMaxScaler() + self.n_restarts_optimizer = n_restarts_optimizer + self.__gps = [ + GaussianProcessRegressor( + normalize_y=True, + random_state=self.random_state, + kernel=self.kernel, + n_restarts_optimizer=self.n_restarts_optimizer, + ) + for _ in range(self.n_features) + ] + super().__init__() + + def fit(self, X, y): + X = self.scaler_x.fit_transform(X) + for i, gp in enumerate(self.__gps): + gp.fit(X, y[:, i]) + + def predict(self, X, return_std=False): + X = self.scaler_x.transform(X) + values = np.array([gp.predict(X, return_std=return_std) for gp in self.__gps]) + if return_std: + return values[:, 0], values[:, 1] + return values + + def predict_over_space(self, space: SampleSpace, return_std=False, resolution=None): + space_xr = space.to_xarray(resolution=resolution) + valid_points = np.array(np.nonzero(space_xr.values)) + X = np.zeros(valid_points.shape) + for i, dim in enumerate(space_xr.dims): + X[i, :] = space_xr[dim][valid_points[i, :]] + X = np.transpose(X) + dims = ("n_features", *space_xr.dims) + coords = {"n_features": np.arange(self.n_features), **space_xr.coords} + predictions = np.empty((self.n_features, *space_xr.shape)) * np.nan + if return_std: + predictions_std = np.empty((self.n_features, *space_xr.shape)) * np.nan + preds_flt, preds_std_flt = self.predict(X, return_std=True) + for pred, pred_std, valid_point in zip( + preds_flt, preds_std_flt, valid_points + ): + predictions[(slice(None), *valid_point)] = pred + predictions_std[(slice(None), *valid_point)] = pred_std + + return xr.DataArray(predictions, dims=dims, coords=coords), xr.DataArray( + predictions_std, dims=dims, coords=coords + ) + else: + preds_flt = self.predict(X, return_std=False) + for pred, valid_point in zip(preds_flt, valid_points): + predictions[(slice(None), *valid_point)] = pred + return xr.DataArray(predictions, dims=dims, coords=coords) diff --git a/src/history_matching/emulator/gp_emulator.py b/src/history_matching/emulator/gp_emulator.py index 0037004..d9cd95f 100644 --- a/src/history_matching/emulator/gp_emulator.py +++ b/src/history_matching/emulator/gp_emulator.py @@ -76,7 +76,8 @@ def predict_over_space(self, space: SampleSpace, return_std=False, resolution=No X[i, :] = space_up[dim][valid_points[i, :]] X = np.transpose(X) - # TODO: This can almost certainly be implemented better with less code duplication + # TODO: This ca + # n almost certainly be implemented better with less code duplication if return_std: pred, pred_std = self.predict(X, return_std=True) predictions = np.empty((self.n_features, *space_up.shape)) * np.nan