From 0b6e11d1b339887940710d2374a7d356b635ddee Mon Sep 17 00:00:00 2001
From: Robert King <robcking@stanford.edu>
Date: Thu, 19 Oct 2023 13:47:11 -0700
Subject: [PATCH] v0.0.20 update emulator

---
 pyproject.toml                               |  2 +-
 src/history_matching/emulator/__init__.py    |  1 +
 src/history_matching/emulator/emulator.py    | 76 ++++++++++++++++++++
 src/history_matching/emulator/gp_emulator.py |  3 +-
 4 files changed, 80 insertions(+), 2 deletions(-)
 create mode 100644 src/history_matching/emulator/emulator.py

diff --git a/pyproject.toml b/pyproject.toml
index 574d0e6..18f2bb8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -3,7 +3,7 @@ requires = ["setuptools>=61.0","netcdf4>=1.6.2","xarray>=2023.2.0","joblib>=1.2.
 build-backend = "setuptools.build_meta"
 [project]
 name = "history_matching"
-version = "0.0.15"
+version = "0.0.20"
 authors = [
   { name="Robert King", email="robcking@stanford.edu" },
 ]
diff --git a/src/history_matching/emulator/__init__.py b/src/history_matching/emulator/__init__.py
index 572d378..c48b26a 100644
--- a/src/history_matching/emulator/__init__.py
+++ b/src/history_matching/emulator/__init__.py
@@ -1,3 +1,4 @@
+from .emulator import Emulator
 from .gp_emulator import GPEmulator
 from .implausibility import (
     chisquaredtest,
diff --git a/src/history_matching/emulator/emulator.py b/src/history_matching/emulator/emulator.py
new file mode 100644
index 0000000..83da3bc
--- /dev/null
+++ b/src/history_matching/emulator/emulator.py
@@ -0,0 +1,76 @@
+import numpy as np
+import xarray as xr
+from sklearn.base import BaseEstimator
+from sklearn.gaussian_process import GaussianProcessRegressor
+from sklearn.gaussian_process.kernels import RBF, ConstantKernel, WhiteKernel
+from sklearn.preprocessing import MinMaxScaler
+
+from ..samples import SampleSpace
+
+
+class Emulator(BaseEstimator):
+    def __init__(
+        self, n_features=2, random_state=None, kernel=None, n_restarts_optimizer=0
+    ) -> None:
+        self.random_state = random_state
+
+        self.n_features = n_features
+        if kernel is None:
+            self.kernel = (
+                ConstantKernel() * RBF(length_scale=np.ones((self.n_features,)))
+                + WhiteKernel()
+            )
+        else:
+            self.kernel = kernel
+        self.scaler_x = MinMaxScaler()
+        self.n_restarts_optimizer = n_restarts_optimizer
+        self.__gps = [
+            GaussianProcessRegressor(
+                normalize_y=True,
+                random_state=self.random_state,
+                kernel=self.kernel,
+                n_restarts_optimizer=self.n_restarts_optimizer,
+            )
+            for _ in range(self.n_features)
+        ]
+        super().__init__()
+
+    def fit(self, X, y):
+        X = self.scaler_x.fit_transform(X)
+        for i, gp in enumerate(self.__gps):
+            gp.fit(X, y[:, i])
+
+    def predict(self, X, return_std=False):
+        X = self.scaler_x.transform(X)
+        values = np.array([gp.predict(X, return_std=return_std) for gp in self.__gps])
+        if return_std:
+            return values[:, 0], values[:, 1]
+        return values
+
+    def predict_over_space(self, space: SampleSpace, return_std=False, resolution=None):
+        space_xr = space.to_xarray(resolution=resolution)
+        valid_points = np.array(np.nonzero(space_xr.values))
+        X = np.zeros(valid_points.shape)
+        for i, dim in enumerate(space_xr.dims):
+            X[i, :] = space_xr[dim][valid_points[i, :]]
+        X = np.transpose(X)
+        dims = ("n_features", *space_xr.dims)
+        coords = {"n_features": np.arange(self.n_features), **space_xr.coords}
+        predictions = np.empty((self.n_features, *space_xr.shape)) * np.nan
+        if return_std:
+            predictions_std = np.empty((self.n_features, *space_xr.shape)) * np.nan
+            preds_flt, preds_std_flt = self.predict(X, return_std=True)
+            for pred, pred_std, valid_point in zip(
+                preds_flt, preds_std_flt, valid_points
+            ):
+                predictions[(slice(None), *valid_point)] = pred
+                predictions_std[(slice(None), *valid_point)] = pred_std
+
+            return xr.DataArray(predictions, dims=dims, coords=coords), xr.DataArray(
+                predictions_std, dims=dims, coords=coords
+            )
+        else:
+            preds_flt = self.predict(X, return_std=False)
+            for pred, valid_point in zip(preds_flt, valid_points):
+                predictions[(slice(None), *valid_point)] = pred
+            return xr.DataArray(predictions, dims=dims, coords=coords)
diff --git a/src/history_matching/emulator/gp_emulator.py b/src/history_matching/emulator/gp_emulator.py
index 0037004..d9cd95f 100644
--- a/src/history_matching/emulator/gp_emulator.py
+++ b/src/history_matching/emulator/gp_emulator.py
@@ -76,7 +76,8 @@ def predict_over_space(self, space: SampleSpace, return_std=False, resolution=No
             X[i, :] = space_up[dim][valid_points[i, :]]
         X = np.transpose(X)
 
-        # TODO: This can almost certainly be implemented better with less code duplication
+        # TODO: This ca
+        # n almost certainly be implemented better with less code duplication
         if return_std:
             pred, pred_std = self.predict(X, return_std=True)
             predictions = np.empty((self.n_features, *space_up.shape)) * np.nan