diff --git a/docs/_static/figures/Geir-IrisTalk-2017-34.png b/docs/_static/figures/Geir-IrisTalk-2017-34.png new file mode 100644 index 0000000..386b737 Binary files /dev/null and b/docs/_static/figures/Geir-IrisTalk-2017-34.png differ diff --git a/docs/_static/figures/Geir-IrisTalk-2017-38.png b/docs/_static/figures/Geir-IrisTalk-2017-38.png new file mode 100644 index 0000000..0c930f6 Binary files /dev/null and b/docs/_static/figures/Geir-IrisTalk-2017-38.png differ diff --git a/docs/index.rst b/docs/index.rst index 90d4398..10ce6fc 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -20,5 +20,8 @@ resmda Documentation **TODOs** - Add tests -- Use this ``resmda`` to reproduce Geirs example +- Finish docstrings & code documentation - Compare with existing esmda's +- docs: + + - only deploy for versions diff --git a/docs/manual/about.rst b/docs/manual/about.rst index 4c3a9e6..eccf751 100644 --- a/docs/manual/about.rst +++ b/docs/manual/about.rst @@ -2,5 +2,81 @@ About ===== A simple 2D reservoir simulator and a straight-forward implementation of the -basic *Ensemble smoother with multiple data assimilation* (ESMDA) algorithm as -presented by Emerick and Reynolds, 2013. +basic *Ensemble Smoother with Multiple Data Assimilation* (ES-MDA) algorithm. + +ES-MDA +------ + +In the following an introduction to the ES-MDA (Ensemble Smoother with Multiple +Data Assimilation) algorithm following [EmRe13]_: + +In history-matching problems, it is common to only consider the +parameter-estimation problem (neglecting model uncertainties). In that case, +the analyzed vector of model parameters :math:`m^a` is given by + +.. math:: + m_j^a = m_j^f + C_\text{MD}^f \left(C_\text{DD}^f + \alpha C_\text{D} + \right)^{-1}\left(d_{\text{uc},j} - d_j^f \right) \qquad \text{(1)} + +for ensembles :math:`j=1, 2, \dots, N_e`. Here, + +- :math:`^a`: analysis; +- :math:`^f`: forecast; +- :math:`m^f`: prior vector of model parameters; +- :math:`d^f`: vector of predicted data; +- :math:`C_\text{MD}^f`: cross-covariance matrix between :math:`m^f` and + :math:`d^f`; +- :math:`C_\text{DD}^f`: :math:`N_d \times N_d` auto-covariance matrix of + predicted data; +- :math:`d_\text{obs}`: :math:`N_d`-dimensional vector of observed data; +- :math:`d_\text{uc} = d_\text{obs} + \sqrt{\alpha}C_\text{D}^{1/2} z_d, \ z_d + \sim \mathcal{N}(0, I_{N_d})`; +- :math:`C_\text{D}`: :math:`N_d \times N_d` covariance matrix of observed data + measurement errors; +- :math:`\alpha`: ES-MDA coefficient. + +The prior vector of model parameters, :math:`m^f_j`, can in reality be +:math:`j` possible models :math:`m^f` given from an analyst (e.g., the +geologist). In theoretical tests, these are usually created by perturbing the +prior :math:`m^f` by, e.g., adding random Gaussian noise. + +1. Choose the number of data assimilations, :math:`N_a`, and the coefficients + :math:`\alpha_i` for :math:`i = 1, \dots, N_a`. +2. For :math:`i = 1` to :math:`N_a`: + + 1. Run the ensemble from time zero. + 2. For each ensemble member, perturb the observation vector using + :math:`d_\text{uc} = d_\text{obs} + \sqrt{\alpha_i} C_\text{D}^{1/2} + z_d`, where :math:`z_d \sim \mathcal{N}(0,I_{N_d})`. + 3. Update the ensemble using Eq. (1) with :math:`\alpha_i`. + +The difficulty is the inversion of the large (:math:`N_d \times N_d`) matrix +:math:`C=C_\text{DD}^f + \alpha C_\text{D}`, which is often poorly conditioned +and poorly scaled. How to compute this inverse is one of the main differences +between different ES-MDA implementations. + +Also note that in the ES-MDA algorithm, every time we repeat the data +assimilation, we re-sample the vector of perturbed observations, i.e., we +recompute :math:`d_\text{uc} \sim \mathcal{N}(d_\text{obs}, \alpha_i +C_\text{D})`. This procedure tends to reduce sampling problems caused by +matching outliers that may be generated when perturbing the observations. + +One potential difficultly with the proposed MDA procedure is that :math:`N_a` +and the coefficients :math:`\alpha_i`'s need to be selected prior to the data +assimilation. The simplest choice for :math:`\alpha` is :math:`\alpha_i = N_a` +for all :math:`i`. However, intuitively we expect that choosing +:math:`\alpha_i` in a decreasing order can improve the performance of the +method. In this case, we start assimilating data with a large value of +:math:`\alpha`, which reduces the magnitude of the initial updates; then, we +gradually decrease :math:`\alpha`. + +For ES-MDA, we only consider the parameter-estimation problem. Thus, unlike EnKF, the parameters and states are always consistent (Thulin et al., 2007). This fact helps to explain the better data matches obtained by ES-MDA compared to EnKF. + + +Reservoir Model +--------------- + +The implemented small 2D Reservoir Simulator was created by following the +course **AESM304A - Flow and Simulation of Subsurface processes** at Delft +University of Technology (TUD); this particular part was taught by Dr. D.V. +Voskov, https://orcid.org/0000-0002-5399-1755. diff --git a/docs/manual/index.rst b/docs/manual/index.rst index 44b0c3b..b0cd901 100644 --- a/docs/manual/index.rst +++ b/docs/manual/index.rst @@ -15,4 +15,5 @@ User Guide about installation + references api diff --git a/docs/manual/references.rst b/docs/manual/references.rst new file mode 100644 index 0000000..0db7e73 --- /dev/null +++ b/docs/manual/references.rst @@ -0,0 +1,9 @@ +References +########## + +.. _references: + +.. [EmRe13] Emerick, A. A. and A. C. Reynolds, 2013, Ensemble smoother with + multiple data assimilation: Computers & Geosciences, 55, 3--15; DOI: + `10.1016/j.cageo.2012.03.011 + `_. diff --git a/examples/README.rst b/examples/README.rst index 71f2ff1..a1496fe 100644 --- a/examples/README.rst +++ b/examples/README.rst @@ -2,5 +2,3 @@ Gallery ======= - -TODO diff --git a/examples/basicESMDA.py b/examples/basicESMDA.py new file mode 100644 index 0000000..7550539 --- /dev/null +++ b/examples/basicESMDA.py @@ -0,0 +1,207 @@ +r""" +Linear and non-linear ES-MDA examples +===================================== + +A basic example of ES-MDA using a simple 1D equation. + +Geir Evensen gave a talk on *Properties of Iterative Ensemble Smoothers and +Strategies for Conditioning on Production Data* at the IPAM in May 2017. + +Here we reproduce the examples he showed on pages 34 and 38. The material can +be found at: + +- PDF: http://helper.ipam.ucla.edu/publications/oilws3/oilws3_14079.pdf +- Video can be found here: + https://www.ipam.ucla.edu/programs/workshops/workshop-iii-data-assimilation-uncertainty-reduction-and-optimization-for-subsurface-flow/?tab=schedule + +Geir gives the ES-MDA equations as + +.. math:: + x_{j,i+1} &= x_{j,i} + (C^e_{xy})_i \left((C^e_{yy})_i + + \alpha_iC^e_{dd}\right)^{-1} \left(d + \sqrt{\alpha_i} + \varepsilon_j - g(x_{j,i})\right) \\ + y_{j,i+1} &= g(x_{j,i+1}) + +The model used for this example is + +.. math:: + y = x(1+\beta x^2) \ , + +which is a linear model if :math:`\beta=0`. +""" + +import numpy as np +import matplotlib.pyplot as plt + +import resmda + +# sphinx_gallery_thumbnail_number = 3 + +############################################################################### +# Forward model +# ------------- + + +def forward(x, beta): + """Simple model: y = x (1 + β x²) (linear if beta=0).""" + return np.atleast_1d(x * (1 + beta * x**2)) + + +fig, axs = plt.subplots( + 1, 2, figsize=(8, 3), sharex=True, constrained_layout=True) +fig.suptitle("Forward Model: y = x (1 + β x²)") +px = np.linspace(-5, 5, 301) +for i, b in enumerate([0.0, 0.2]): + axs[i].set_title( + f"{['Linear model', 'Nonlinear model'][i]}: $\\beta$ = {b}") + axs[i].plot(px, forward(px, b)) + axs[i].set_xlabel('x') + axs[i].set_ylabel('y') + + +############################################################################### +# Plotting functions +# ------------------ + +def pseudopdf(data, bins=200, density=True, **kwargs): + """Return the pdf from a simple bin count. + + If the data contains a lot of samples, this should be "smooth" enough - and + much faster than estimating the pdf using, e.g., + `scipy.stats.gaussian_kde`. + """ + x, y = np.histogram(data, bins=bins, density=density, **kwargs) + return (y[:-1]+y[1:])/2, x + + +def plot_result(mpost, dpost, dobs, title, ylim): + """Wrapper to use the same plotting for the linear and non-linear case.""" + + fig, (ax1, ax2) = plt.subplots( + 1, 2, figsize=(10, 4), sharey=True, constrained_layout=True) + fig.suptitle(title) + + # Plot Likelihood + ax2.plot( + *pseudopdf(resmda.utils.rng().normal(dobs, size=(ne, dobs.size))), + 'C2', lw=2, label='Datum' + ) + + # Plot steps + na = mpost.shape[0]-1 + for i in range(na+1): + params = { + 'color': 'C0' if i == na else 'C3', # Last blue, rest red + 'lw': 2 if i in [0, na] else 1, # First/last thick + 'alpha': 1 if i in [0, na] else i/na, # start faint + 'label': ['Initial', *((na-2)*('',)), 'MDA steps', 'MDA'][i], + } + ax1.plot(*pseudopdf(mpost[i, :, 0], range=(-3, 5)), **params) + ax2.plot(*pseudopdf(dpost[i, :, 0], range=(-5, 8)), **params) + + # Axis and labels + ax1.set_title('Model Parameter Domain') + ax1.set_xlabel('x') + ax1.set_ylim(ylim) + ax1.set_xlim([-3, 5]) + ax1.legend() + ax2.set_title('Data Domain') + ax2.set_xlabel('y') + ax2.set_xlim([-5, 8]) + ax2.legend() + fig.show() + + +############################################################################### +# Linear case +# ----------- +# +# Prior model parameters and ES-MDA parameters +# '''''''''''''''''''''''''''''''''''''''''''' +# +# In reality, the prior would be :math:`j` models provided by, e.g., the +# geologists. Here we create $j$ realizations using a normal distribution of a +# defined mean and standard deviation. + +# Point of our "observation" +xlocation = -1.0 + +# Ensemble size +ne = int(1e7) + +# Data standard deviation: ones (for this scenario) +obs_std = 1.0 + +# Prior: Let's start with ones as our prior guess +mprior = resmda.utils.rng().normal(loc=1.0, scale=obs_std, size=(ne, 1)) + + +############################################################################### +# Run ES-MDA and plot +# ''''''''''''''''''' + +def lin_fwd(x): + """Linear forward model.""" + return forward(x, beta=0.0) + + +# Sample an "observation". +l_dobs = lin_fwd(xlocation) + +lm_post, ld_post = resmda.esmda( + model_prior=mprior, + forward=lin_fwd, + data_obs=l_dobs, + sigma=obs_std, + alphas=10, + return_steps=True, # To get intermediate results +) + + +############################################################################### + +plot_result(lm_post, ld_post, l_dobs, title='Linear Case', ylim=[0, 0.6]) + + +############################################################################### +# Original figure from Geir's presentation +# '''''''''''''''''''''''''''''''''''''''' +# +# .. image:: ../_static/figures/Geir-IrisTalk-2017-34.png + + +############################################################################### +# Nonlinear case +# -------------- + +def nonlin_fwd(x): + """Nonlinear forward model.""" + return forward(x, beta=0.2) + + +# Sample a nonlinear observation; the rest of the parameters remains the same. +n_dobs = nonlin_fwd(xlocation) +nm_post, nd_post = resmda.esmda( + model_prior=mprior, + forward=nonlin_fwd, + data_obs=n_dobs, + sigma=obs_std, + alphas=10, + return_steps=True, +) + +############################################################################### + +plot_result(nm_post, nd_post, n_dobs, title='Nonlinear Case', ylim=[0, 0.7]) + + +############################################################################### +# Original figure from Geir's presentation +# '''''''''''''''''''''''''''''''''''''''' +# +# .. image:: ../_static/figures/Geir-IrisTalk-2017-38.png + + +############################################################################### + +resmda.Report() diff --git a/examples/example.py b/examples/basicreservoir.py similarity index 95% rename from examples/example.py rename to examples/basicreservoir.py index 7f10e36..debfebe 100644 --- a/examples/example.py +++ b/examples/basicreservoir.py @@ -1,6 +1,6 @@ r""" -Minimum example of resmda -========================= +2D Reservoir ESMDA example +========================== Data Assimilation using ESMDA in Reservoir Simulation ----------------------------------------------------- @@ -8,10 +8,9 @@ *Advanced Data Assimilation using Ensemble Smoother Multiple Data Assimilation (ESMDA) in Reservoir Simulation.* -$$ -m_j^a = m_j^f + C_{MD}^f (C_{DD}^f + \alpha_i C_D)^{-1} (d_{uc,j} - -d_j^f);\quad \text{for} \quad j=1, 2, \dots, N_e -$$ +.. math:: + m_j^a = m_j^f + C_{MD}^f (C_{DD}^f + \alpha_i C_D)^{-1} (d_{uc,j} - + d_j^f);\quad \text{for} \quad j=1, 2, \dots, N_e - Prior model: M (Ne, Nx, Ny) - Prior data: D (Ne, Nt) @@ -90,6 +89,7 @@ ax.set_xlabel('x-direction') for ax in axs[:, 0]: ax.set_ylabel('y-direction') +fig.show() ############################################################################### @@ -119,6 +119,7 @@ def sim(x): ax.legend() ax.set_xlabel('Time (???)') ax.set_ylabel('Pressure (???)') +fig.show() ############################################################################### @@ -158,6 +159,7 @@ def restrict_permeability(x): ax[1].imshow(perm_post.mean(axis=0).T) ax[2].set_title('"Truth"') ax[2].imshow(perm_true.T) +fig.show() ############################################################################### @@ -177,8 +179,8 @@ def restrict_permeability(x): ax.plot(time, data_obs, 'C3o') ax.set_xlabel('Time (???)') ax.set_ylabel('Pressure (???)') +fig.show() -plt.show() ############################################################################### diff --git a/resmda/data_assimilation.py b/resmda/data_assimilation.py index 8950184..eb34f89 100644 --- a/resmda/data_assimilation.py +++ b/resmda/data_assimilation.py @@ -26,10 +26,12 @@ def __dir__(): def esmda(model_prior, forward, data_obs, sigma, alphas=4, data_prior=None, - callback_post=None, return_post_data=False, random=None, - enable_localization=False, localization_matrix=None): + callback_post=None, return_post_data=True, return_steps=False, + random=None, enable_localization=False, localization_matrix=None): """ESMDA algorithm (Emerick and Reynolds, 2013) with optional localization. + ES-MDA as presented by [EmRe13]_. + Parameters ---------- @@ -51,8 +53,11 @@ def esmda(model_prior, forward, data_obs, sigma, alphas=4, data_prior=None, Prior data ensemble, of shape (ne, nd). callback_post : function, default: None Function to be executed after each ESMDA iteration. - return_post_data : bool, default: False + return_post_data : bool, default: True If true, returns data + return_steps : bool, default: False + If true, returns model (and data) of all ESMDA steps. + Setting ``return_steps`` to True wil enforce ``return_post_data``. random : {None, int, np.random.Generator}, default: None Seed or random generator for reproducibility. enable_localization : bool, default: False @@ -88,7 +93,7 @@ def esmda(model_prior, forward, data_obs, sigma, alphas=4, data_prior=None, # Loop over alphas for i, alpha in enumerate(alphas): - print(f"ESMDA step {i+1}; α={alpha}") + print(f"ESMDA step {i+1: 3d}; α={alpha}") # == Step (a) of Emerick & Reynolds, 2013 == # Run the ensemble from time zero. @@ -142,8 +147,22 @@ def esmda(model_prior, forward, data_obs, sigma, alphas=4, data_prior=None, if callback_post: callback_post(model_post) - # Return posterior model and corresponding data - return model_post, forward(model_post) + if return_steps: + if i == 0: + all_models = np.zeros((alphas.size+1, *model_post.shape)) + all_models[0, ...] = model_prior + all_data = np.zeros((alphas.size+1, *data_prior.shape)) + all_models[i+1, ...] = model_post.copy() + all_data[i, ...] = data_prior + + # Return posterior model and corresponding data (if wanted) + if return_steps: + all_data[-1, ...] = forward(model_post) + return all_models, all_data + elif return_post_data: + return model_post, forward(model_post) + else: + return model_post def convert_positions_to_indices(positions, grid_dimensions): diff --git a/resmda/utils.py b/resmda/utils.py index 270a8a6..b617ef2 100644 --- a/resmda/utils.py +++ b/resmda/utils.py @@ -32,10 +32,6 @@ def __dir__(): return __all__ -# Instantiate a random number generator ONCE -RANDOM_NUMBER_GENERATOR = np.random.default_rng() - - def rng(random=None): """Module-wide Random Number Generator. @@ -65,7 +61,9 @@ def rng(random=None): elif isinstance(random, np.random.Generator): return random else: - return RANDOM_NUMBER_GENERATOR + if not hasattr(rng, '_rng'): + rng._rng = np.random.default_rng() + return rng._rng class Report(ScoobyReport):