Merge pull request #283 from uafgeotools/dc_plot_update

Update to double-couple plots (random-grid compatibility)
uafgeotools · Nov 27, 2024 · 959d5d6 · 959d5d6
2 parents b202347 + 59a5967
commit 959d5d6
Show file tree

Hide file tree

Showing 3 changed files with 186 additions and 33 deletions.
diff --git a/mtuq/graphics/__init__.py b/mtuq/graphics/__init__.py
@@ -20,7 +20,8 @@
     _plot_vw, _product_vw
 
 from mtuq.graphics.uq.double_couple import\
-    plot_misfit_dc, plot_likelihood_dc, plot_marginal_dc, _plot_dc
+    plot_misfit_dc, plot_likelihood_dc, plot_marginal_dc, _plot_dc, \
+    plot_variance_reduction_dc
 
 from mtuq.graphics.uq.force import\
     plot_misfit_force, plot_likelihood_force, plot_marginal_force,\

diff --git a/mtuq/graphics/uq/_matplotlib.py b/mtuq/graphics/uq/_matplotlib.py
@@ -24,7 +24,7 @@ def cbformat(st, pos):
 # It should behave as similarly as possible to the GMT backend 
 # and take the same input arguments
 def _plot_lune_matplotlib(filename, longitude, latitude, values, 
-    best_vw=None, lune_array=None, colormap='viridis', plot_type='contour', **kwargs):
+    best_vw=None, lune_array=None, colormap='viridis', plot_type='contour', clip_interval=[0,100],**kwargs):
 
     """ Plots DataArray values on the eigenvalue lune (requires matplotlib)
 
@@ -61,6 +61,9 @@ def _plot_lune_matplotlib(filename, longitude, latitude, values,
     if _nothing_to_plot(values):
         return
 
+    if not isinstance(clip_interval, list) or len(clip_interval) != 2:
+        raise Exception('clip_interval must be a list of two floats')
+
     _development_warning()
 
     # Check plot_type. Can be either contour or colormesh
@@ -75,9 +78,9 @@ def _plot_lune_matplotlib(filename, longitude, latitude, values,
     x, y = _hammer_projection(x, y)
 
     # Plot data
-    # Use the percentile method to filter out outliers (Will alway clip the 10% greater values)
+    # Use the percentile method to filter out outliers based on clip_interval (in percentage)
     if plot_type == 'colormesh':
-        vmin, vmax = np.nanpercentile(np.asarray(values), [0,75])
+        vmin, vmax = np.nanpercentile(np.asarray(values), clip_interval)
         im = ax.pcolormesh(x, y, values, cmap=colormap, vmin=vmin, vmax=vmax, shading='nearest', zorder=10)
     elif plot_type == 'contour':
         # Plot using contourf
@@ -143,7 +146,8 @@ def _plot_lune_matplotlib(filename, longitude, latitude, values,
     pyplot.savefig(filename, dpi=300)
     pyplot.close()
 
-def _plot_force_matplotlib(filename, phi, h, values, best_force=None, colormap='viridis', title=None, plot_type='contour', **kwargs):
+def _plot_force_matplotlib(filename, phi, h, values, best_force=None, colormap='viridis', 
+                           title=None, plot_type='contour', clip_interval=[0,100], **kwargs):
     """ Plots DataArray values on the force sphere (requires matplotlib)
 
     .. rubric :: Keyword arguments
@@ -176,6 +180,9 @@ def _plot_force_matplotlib(filename, phi, h, values, best_force=None, colormap='
     if _nothing_to_plot(values):
         return
 
+    if not isinstance(clip_interval, list) or len(clip_interval) != 2:
+        raise Exception('clip_interval must be a list of two floats')
+
     _development_warning()
 
     # Check plot_type. Can be either contour or colormesh
@@ -210,7 +217,7 @@ def _plot_force_matplotlib(filename, phi, h, values, best_force=None, colormap='
         x2, y2 = np.meshgrid(lon2, latitude)
         x1, y1 = _hammer_projection(x1, y1)
         x2, y2 = _hammer_projection(x2, y2)
-        vmin, vmax = np.nanpercentile(np.asarray(values), [0,75])
+        vmin, vmax = np.nanpercentile(np.asarray(values), clip_interval)
         im = ax.pcolormesh(x1, y1, values1, cmap=colormap, vmin=vmin, vmax=vmax, shading='auto', zorder=10)
         im = ax.pcolormesh(x2, y2, values2, cmap=colormap, vmin=vmin, vmax=vmax, shading='auto', zorder=10)
 
@@ -266,24 +273,26 @@ def _plot_force_matplotlib(filename, phi, h, values, best_force=None, colormap='
 
 
 def _plot_dc_matplotlib(filename, coords, 
-    values_h_kappa, values_sigma_kappa, values_sigma_h,
-    title=None, best_dc=None,  figsize=(8., 8.), fontsize=14, **kwargs):
+    values_h_kappa, values_sigma_kappa, values_sigma_h, colormap='viridis',
+    title=None, best_dc=None, plot_colorbar=True, figsize=(8., 8.), fontsize=14, clip_interval=[0,100],**kwargs):
 
-    _development_warning()
+    if not isinstance(clip_interval, list) or len(clip_interval) != 2:
+        raise Exception('clip_interval must be a list of two floats')
 
-    colormap = kwargs.get('colormap', 'viridis')
+    _development_warning()
 
-    # prepare axes
+    # Prepare axes
     fig, axes = pyplot.subplots(2, 2,
         figsize=figsize,
-        )
+    )
 
     pyplot.subplots_adjust(
         wspace=0.4,
         hspace=0.4,
-        )
+        right=0.88
+    )
 
-    # parse colormap
+    # Parse colormap
     if exists(_cpt_path(colormap)):
        colormap = read_cpt(_cpt_path(colormap))
 
@@ -295,10 +304,10 @@ def _plot_dc_matplotlib(filename, coords,
     vals = np.append(np.append(values_sigma_kappa.ravel(), values_sigma_kappa.ravel()),(values_sigma_h.ravel()))
     # Plot data
     # Use the percentile method to filter out outliers (Will alway clip the 10% greater values)
-    vmin, vmax = np.nanpercentile(vals, [0,75])
+    vmin, vmax = np.nanpercentile(vals, clip_interval)
 
     # plot surfaces
-    _pcolor(axes[0][0], coords['h'], coords['kappa'], values_h_kappa, colormap, vmin=vmin, vmax=vmax)
+    im0 = _pcolor(axes[0][0], coords['h'], coords['kappa'], values_h_kappa, colormap, vmin=vmin, vmax=vmax)
 
     _pcolor(axes[0][1], coords['sigma'], coords['kappa'], values_sigma_kappa, colormap, vmin=vmin, vmax=vmax)
 
@@ -313,7 +322,23 @@ def _plot_dc_matplotlib(filename, coords,
 
     _set_dc_labels(axes, fontsize=fontsize)
 
-    pyplot.savefig(filename)
+    if plot_colorbar:
+        cbar_ax = fig.add_axes([0.91, 0.13, 0.0125, 0.27])  # [left, bottom, width, height]
+        cb = pyplot.colorbar(im0, cax=cbar_ax, orientation='vertical')
+        formatter = ticker.ScalarFormatter()
+        formatter.set_powerlimits((-2, 2))  # Avoid scientific notation between 10^-2 and 10^2
+        cb.ax.yaxis.set_major_formatter(formatter)
+        if 'colorbar_label' in kwargs:
+            cb.set_label(kwargs['colorbar_label'])
+        else:
+            cb.set_label('l2-misfit')
+
+
+    # Set the title if provided
+    if title:
+        fig.suptitle(title, fontsize=fontsize + 2)
+
+    pyplot.savefig(filename, dpi=300)
     pyplot.close()
 
 
@@ -618,6 +643,9 @@ def _pcolor(axis, x, y, values, colormap, **kwargs):
     except:
         axis.pcolor(x, y, values, cmap=colormap, **kwargs)
 
+    # Return a mappable object
+    return axis.collections[0]
+
 
 def _set_dc_labels(axes, **kwargs):
 

diff --git a/mtuq/graphics/uq/double_couple.py b/mtuq/graphics/uq/double_couple.py
@@ -44,9 +44,7 @@ def plot_misfit_dc(filename, ds, **kwargs):
         misfit = _misfit_dc_regular(ds)
 
     elif issubclass(type(ds), DataFrame):
-        warn('plot_misfit_dc() not implemented for irregularly-spaced grids.\n'
-             'No figure will be generated.')
-        return
+        misfit = _misfit_dc_random(ds , **kwargs)
 
     _plot_dc(filename, misfit, **kwargs)
 
@@ -84,9 +82,7 @@ def plot_likelihood_dc(filename, ds, var, **kwargs):
         likelihoods = _likelihoods_dc_regular(ds, var)
 
     elif issubclass(type(ds), DataFrame):
-        warn('plot_likelihood_dc() not implemented for irregularly-spaced grids.\n'
-             'No figure will be generated.')
-        return
+        likelihoods = _likelihoods_dc_random(ds, var, **kwargs)
 
     _plot_dc(filename, likelihoods, **kwargs)
 
@@ -124,9 +120,7 @@ def plot_marginal_dc(filename, ds, var, **kwargs):
         marginals = _marginals_dc_regular(ds, var)
 
     elif issubclass(type(ds), DataFrame):
-        warn('plot_marginal_dc() not implemented for irregularly-spaced grids.\n'
-             'No figure will be generated.')
-        return
+        marginals = _marginals_dc_random(ds, var, **kwargs)
 
     _plot_dc(filename, marginals, **kwargs)
 
@@ -161,19 +155,17 @@ def plot_variance_reduction_dc(filename, ds, data_norm, **kwargs):
     _check(ds)
 
     if issubclass(type(ds), DataArray):
-        variance_reduction = _variance_reduction_dc_regular(ds, var)
+        variance_reduction = _variance_reduction_dc_regular(ds, data_norm)
 
     elif issubclass(type(ds), DataFrame):
-        warn('plot_variance_reduction_dc() not implemented for irregularly-spaced grids.\n'
-             'No figure will be generated.')
-        return
+        variance_reduction = _variance_reduction_dc_random(ds, data_norm, **kwargs)
 
     _plot_dc(filename, variance_reduction, **kwargs)
 
 
 
-def _plot_dc(filename, da, show_best=True, colormap='hot', 
-    backend=_plot_dc_matplotlib, squeeze='min', **kwargs):
+def _plot_dc(filename, da, show_best=True, backend=_plot_dc_matplotlib,
+    squeeze='min', **kwargs):
 
     """ Plots DataArray values over strike, dip, slip
 
@@ -260,6 +252,20 @@ def _misfit_dc_regular(da):
         'best_dc': _min_dc(da),
         })
 
+def _misfit_dc_random(df, **kwargs):
+    df = df.copy()
+    df = df.reset_index()
+
+    # Ensure 'misfit' column exists
+    if 'misfit' not in df.columns:
+        df['misfit'] = df.iloc[:, -1]
+
+    da = _bin_dc_regular(df, lambda group: group['misfit'].min(), **kwargs)
+
+    return da.assign_attrs({
+        'best_dc': _min_dc(da),
+    })
+
 
 def _likelihoods_dc_regular(da, var):
     """ For each moment tensor orientation, calculate maximum likelihood
@@ -278,6 +284,31 @@ def _likelihoods_dc_regular(da, var):
         'maximum_likelihood_estimate': dataarray_idxmax(likelihoods).values(),
         })
 
+def _likelihoods_dc_random(df, var, **kwargs):
+    """
+    Calculate max likelihood from random dataset bins, ensuring global normalization.
+    """
+
+    likelihoods = df.copy().reset_index()
+
+    # Ensure 'misfit' column exists
+    if 'misfit' not in likelihoods.columns:
+        likelihoods.rename(columns={likelihoods.columns[-1]: 'misfit'}, inplace=True)
+
+    # Compute likelihoods globally and normalize BEFORE binning
+    likelihoods['likelihood'] = np.exp(-likelihoods['misfit'] / (2. * var))
+    likelihoods['likelihood'] /= likelihoods['likelihood'].sum()  # Global normalization
+
+    # Apply binning, operating on globally normalized likelihoods
+    binned_likelihoods = _bin_dc_regular(
+        likelihoods, lambda group: group['likelihood'].max(), **kwargs
+    )
+
+    return binned_likelihoods.assign_attrs({
+        'best_dc': _max_dc(binned_likelihoods),
+        'maximum_likelihood_estimate': dataarray_idxmax(binned_likelihoods).values(),
+    })
+
 
 def _marginals_dc_regular(da, var):
     """ For each moment tensor orientation, calculate marginal likelihood
@@ -293,6 +324,30 @@ def _marginals_dc_regular(da, var):
         'best_dc': _max_dc(marginals),
         })
 
+def _marginals_dc_random(df, var, **kwargs):
+    """
+    Calculate marginal likelihoods for random bins with global normalization.
+    """
+
+    likelihoods = df.copy().reset_index()
+
+    if 'misfit' not in likelihoods.columns:
+        likelihoods.rename(columns={likelihoods.columns[-1]: 'misfit'}, inplace=True)
+
+    # Compute likelihoods and normalize globally
+    likelihoods['likelihood'] = np.exp(-likelihoods['misfit'] / (2. * var))
+    likelihoods['likelihood'] /= likelihoods['likelihood'].sum()  # Global normalization
+
+    # Sum within bins after global normalization
+    marginals = _bin_dc_regular(
+        likelihoods, lambda group: group['likelihood'].sum(), **kwargs
+    )
+
+    # No need for further normalization, already globally adjusted
+    return marginals.assign_attrs({
+        'best_dc': _max_dc(marginals),
+    })
+
 
 def _variance_reduction_dc_regular(da, data_norm):
     """ For each moment tensor orientation, extracts maximum variance reduction
@@ -308,9 +363,22 @@ def _variance_reduction_dc_regular(da, data_norm):
     return variance_reduction.assign_attrs({
         'best_mt': _min_mt(da),
         'best_dc': _min_dc(da),
-        'lune_array': _lune_array(da),
         })
 
+def _variance_reduction_dc_random(df, data_norm, **kwargs):
+    df = df.copy()
+    df = df.reset_index()
+
+    # Ensure 'misfit' column exists
+    if 'misfit' not in df.columns:
+        df['misfit'] = df.iloc[:, -1]
+
+    da = _bin_dc_regular(df, lambda group: 1. - group['misfit'].min()/data_norm, **kwargs)
+
+    return da.assign_attrs({
+        'best_dc': _max_dc(da),
+    })
+
 
 #
 # utility functions
@@ -351,6 +419,62 @@ def _max_dc(da):
     return dc_vals
 
 
+def _bin_dc_regular(df, handle, nbins=25, **kwargs):
+    """Bins irregularly-spaced moment tensor orientations into regular grids for plotting."""
+    # Orientation bins
+    kappa_min, kappa_max = 0, 360
+    sigma_min, sigma_max = -90, 90
+    h_min, h_max = 0, 1
+
+    kappa_edges = np.linspace(kappa_min, kappa_max, nbins + 1)
+    sigma_edges = np.linspace(sigma_min, sigma_max, nbins + 1)
+    h_edges = np.linspace(h_min, h_max, nbins + 1)
+
+    kappa_centers = 0.5 * (kappa_edges[:-1] + kappa_edges[1:])
+    sigma_centers = 0.5 * (sigma_edges[:-1] + sigma_edges[1:])
+    h_centers = 0.5 * (h_edges[:-1] + h_edges[1:])
+
+    # Prepare the data arrays
+    kappa_vals = df['kappa'].values
+    sigma_vals = df['sigma'].values
+    h_vals = df['h'].values
+
+    # Compute bin indices for each data point
+    kappa_indices = np.digitize(kappa_vals, kappa_edges) - 1
+    sigma_indices = np.digitize(sigma_vals, sigma_edges) - 1
+    h_indices = np.digitize(h_vals, h_edges) - 1
+
+    # Ensure indices are within valid range
+    kappa_indices = np.clip(kappa_indices, 0, nbins - 1)
+    sigma_indices = np.clip(sigma_indices, 0, nbins - 1)
+    h_indices = np.clip(h_indices, 0, nbins - 1)
+
+    # Add bin indices to DataFrame
+    df = df.copy()
+    df['kappa_idx'] = kappa_indices
+    df['sigma_idx'] = sigma_indices
+    df['h_idx'] = h_indices
+
+    # Group by bin indices
+    # grouped = df.groupby(['h_idx', 'sigma_idx', 'kappa_idx'])
+    grouped = df.groupby(['kappa_idx', 'sigma_idx', 'h_idx'])
+
+    # Initialize the output array with appropriate data type
+    binned = np.full((nbins, nbins, nbins), np.nan)
+
+    # Process each group
+    for (k_idx, s_idx, h_idx), group in grouped:
+        # Apply the handle function to the group DataFrame
+        binned[k_idx, s_idx, h_idx] = handle(group)
+
+    # Create the DataArray
+    da = DataArray(
+        data=binned,
+        dims=('kappa', 'sigma', 'h'),
+        coords={'kappa': kappa_centers, 'sigma': sigma_centers, 'h': h_centers},
+    )
+
+    return da
 
 def _check(ds):
     """ Checks data structures