Skip to content

Commit

Permalink
feat: utility to match fit results to arbitrary model (#288)
Browse files Browse the repository at this point in the history
* added model_utils.match_fit_results function to match fit results to different model
* switched from use of np.diag to np.diagflat
* added warning to model_utils.prediction for mismatch of parameter names in fit results and model
* updated pre-commit
  • Loading branch information
alexander-held authored Oct 1, 2021
1 parent b01edb3 commit 054c2ef
Show file tree
Hide file tree
Showing 4 changed files with 152 additions and 9 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ repos:
- id: flake8
additional_dependencies: [flake8-bugbear, flake8-import-order, flake8-print]
- repo: https://github.com/asottile/pyupgrade
rev: v2.26.0
rev: v2.29.0
hooks:
- id: pyupgrade
args: ["--py37-plus"]
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

[![CI status](https://github.com/scikit-hep/cabinetry/workflows/CI/badge.svg)](https://github.com/scikit-hep/cabinetry/actions?query=workflow%3ACI)
[![Documentation Status](https://readthedocs.org/projects/cabinetry/badge/?version=latest)](https://cabinetry.readthedocs.io/en/latest/?badge=latest)
[![codecov](https://codecov.io/gh/scikit-hep/cabinetry/branch/master/graph/badge.svg)](https://codecov.io/gh/scikit-hep/cabinetry)
[![Codecov](https://codecov.io/gh/scikit-hep/cabinetry/branch/master/graph/badge.svg)](https://codecov.io/gh/scikit-hep/cabinetry)
[![PyPI version](https://badge.fury.io/py/cabinetry.svg)](https://badge.fury.io/py/cabinetry)
[![python version](https://img.shields.io/pypi/pyversions/cabinetry.svg)](https://pypi.org/project/cabinetry/)
[![Python version](https://img.shields.io/pypi/pyversions/cabinetry.svg)](https://pypi.org/project/cabinetry/)
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)

[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4742752.svg)](https://doi.org/10.5281/zenodo.4742752)
Expand Down
72 changes: 71 additions & 1 deletion src/cabinetry/model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ def yield_stdev(

labels = model.config.par_names()
# continue with off-diagonal contributions if there are any
if np.count_nonzero(corr_mat - np.diag(np.ones_like(parameters))) > 0:
if np.count_nonzero(corr_mat - np.diagflat(np.ones_like(parameters))) > 0:
# loop over pairs of parameters
for i_par in range(model.config.npars):
for j_par in range(model.config.npars):
Expand Down Expand Up @@ -336,6 +336,8 @@ def prediction(
ModelPrediction: model, yields and uncertainties per bin and channel
"""
if fit_results is not None:
if fit_results.labels != model.config.par_names():
log.warning("parameter names in fit results and model do not match")
# fit results specified, so they are used
param_values = fit_results.bestfit
param_uncertainty = fit_results.uncertainty
Expand Down Expand Up @@ -484,3 +486,71 @@ def _filter_channels(
)

return filtered_channels


def match_fit_results(model: pyhf.pdf.Model, fit_results: FitResults) -> FitResults:
"""Matches results from a fit to a model by adding or removing parameters as needed.
If the fit results contain parameters missing in the model, these parameters are not
included in the returned fit results. If the fit results do not include parameters
used in the model, they are added to the fit results. The best-fit value for such
parameters are the Asimov values as returned by ```asimov_parameters``` (initial
parameter settings for unconstrained parameters), and the associated uncertainties
as given by ```prefit_uncertainties``` (zero uncertainty for unconstrained or fixed
parameters). These parameters furthermore are assumed to have no correlation with
any other parameters. If required, parameters are re-ordered to match the target
model.
Args:
model (pyhf.pdf.Model): model to match fit results to
fit_results (FitResults): fit results to be updated in order to match model
Returns:
FitResults: fit results matching the model
"""
# start with the assumption that the provided fit results contain no relevant
# information for the target model at all, and initialize everything accordingly
# then inject information contained in provided fit results and return the new
# fit results matching the target model at the end

bestfit = asimov_parameters(model) # Asimov parameter values for target model
uncertainty = prefit_uncertainties(model) # pre-fit uncertainties for target model
labels = model.config.par_names() # labels for target model

# indices of parameters in current fit results, or None if they are missing
indices_for_corr: List[Optional[int]] = [None] * len(labels)

# loop over all required parameters
for target_idx, target_label in enumerate(labels):
# if parameters are missing in fit results, no further action is needed here
# as all relevant objects have been initialized with that assumption
if target_label in fit_results.labels:
# fit results contain parameter, find its position
idx_in_fit_results = fit_results.labels.index(target_label)
# override pre-fit values by actual fit results for parameters
bestfit[target_idx] = fit_results.bestfit[idx_in_fit_results]
uncertainty[target_idx] = fit_results.uncertainty[idx_in_fit_results]
# update indices for correlation matrix reconstruction
indices_for_corr[target_idx] = idx_in_fit_results

# re-build correlation matrix: start with diagonal matrix (assuming fit results do
# not contain relevant info), and then insert values provided in fit results
corr_mat = np.diagflat(np.ones_like(labels, dtype=float))
for i_target, i_orig in enumerate(indices_for_corr):
for j_target, j_orig in enumerate(indices_for_corr):
# i_target and j_target are positions in matched correlation matrix
# i_orig and j_orig are positions in old matrix, None if missing from there
if i_orig is not None and j_orig is not None:
# if i_orig or j_orig are None, one of the parameters are not part of
# original fit result, and no update to correlation matrix is needed
corr_mat[i_target][j_target] = fit_results.corr_mat[i_orig][j_orig]

fit_results_matched = FitResults(
np.asarray(bestfit),
np.asarray(uncertainty),
labels,
corr_mat,
fit_results.best_twice_nll,
fit_results.goodness_of_fit,
)
return fit_results_matched
83 changes: 78 additions & 5 deletions tests/test_model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def test_yield_stdev(example_spec, example_spec_multibin):
# pre-fit
parameters = np.asarray([1.0, 1.0])
uncertainty = np.asarray([0.0495665682, 0.0])
diag_corr_mat = np.diag([1.0, 1.0])
diag_corr_mat = np.diagflat([1.0, 1.0])
total_stdev_bin, total_stdev_chan = model_utils.yield_stdev(
model, parameters, uncertainty, diag_corr_mat
)
Expand Down Expand Up @@ -213,7 +213,8 @@ def test_yield_stdev(example_spec, example_spec_multibin):
"cabinetry.model_utils.prefit_uncertainties", return_value=([0.04956657, 0.0])
)
@mock.patch("cabinetry.model_utils.asimov_parameters", return_value=([1.0, 1.0]))
def test_prediction(mock_asimov, mock_unc, mock_stdev, example_spec):
def test_prediction(mock_asimov, mock_unc, mock_stdev, caplog, example_spec):
caplog.set_level(logging.DEBUG)
model = pyhf.Workspace(example_spec).model()

# pre-fit prediction
Expand Down Expand Up @@ -242,7 +243,7 @@ def test_prediction(mock_asimov, mock_unc, mock_stdev, example_spec):
fit_results = FitResults(
np.asarray([1.01, 1.1]),
np.asarray([0.03, 0.1]),
[],
["staterror_Signal-Region[0]", "Signal strength"],
np.asarray([[1.0, 0.2], [0.2, 1.0]]),
0.0,
)
Expand All @@ -252,6 +253,9 @@ def test_prediction(mock_asimov, mock_unc, mock_stdev, example_spec):
assert model_pred.total_stdev_model_bins == [[0.3]] # from mock
assert model_pred.total_stdev_model_channels == [0.3] # from mock
assert model_pred.label == "post-fit"
assert "parameter names in fit results and model do not match" not in [
rec.message for rec in caplog.records
]

assert mock_asimov.call_count == 1 # no new call
assert mock_unc.call_count == 1 # no new call
Expand All @@ -266,9 +270,22 @@ def test_prediction(mock_asimov, mock_unc, mock_stdev, example_spec):
)
assert mock_stdev.call_args_list[1][1] == {}

# custom label
model_pred = model_utils.prediction(model, label="abc")
caplog.clear()

# custom prediction label, mis-match in parameter names
fit_results = FitResults(
np.asarray([1.01, 1.1]),
np.asarray([0.03, 0.1]),
["a", "b"],
np.asarray([[1.0, 0.2], [0.2, 1.0]]),
0.0,
)
model_pred = model_utils.prediction(model, fit_results=fit_results, label="abc")
assert "parameter names in fit results and model do not match" in [
rec.message for rec in caplog.records
]
assert model_pred.label == "abc"
caplog.clear()


def test_unconstrained_parameter_count(example_spec, example_spec_shapefactor):
Expand Down Expand Up @@ -342,3 +359,59 @@ def test__filter_channels(example_spec_multibin, caplog):
"['region_1', 'region_2']" in [rec.message for rec in caplog.records]
)
caplog.clear()


@mock.patch(
"cabinetry.model_utils.prefit_uncertainties",
side_effect=[[0.4, 0.5, 0.6], [0.4, 0.5], [0.4, 0.5, 0.6]],
)
@mock.patch(
"cabinetry.model_utils.asimov_parameters",
side_effect=[[4.0, 5.0, 6.0], [4.0, 5.0], [4.0, 5.0, 6.0]],
)
def test_match_fit_results(mock_pars, mock_uncs):
mock_model = mock.MagicMock()
fit_results = FitResults(
np.asarray([1.0, 2.0, 3.0]),
np.asarray([0.1, 0.2, 0.3]),
["par_a", "par_b", "par_c"],
np.asarray([[1.0, 0.2, 0.5], [0.2, 1.0, 0.1], [0.5, 0.1, 1.0]]),
5.0,
0.1,
)

# remove par_a, flip par_b and par_c, add par_d
mock_model.config.par_names.return_value = ["par_c", "par_d", "par_b"]
matched_fit_res = model_utils.match_fit_results(mock_model, fit_results)
assert mock_pars.call_args_list == [((mock_model,), {})]
assert mock_uncs.call_args_list == [((mock_model,), {})]
assert np.allclose(matched_fit_res.bestfit, [3.0, 5.0, 2.0])
assert np.allclose(matched_fit_res.uncertainty, [0.3, 0.5, 0.2])
assert matched_fit_res.labels == ["par_c", "par_d", "par_b"]
assert np.allclose(
matched_fit_res.corr_mat, [[1.0, 0.0, 0.1], [0.0, 1.0, 0.0], [0.1, 0.0, 1.0]]
)
assert matched_fit_res.best_twice_nll == 5.0
assert matched_fit_res.goodness_of_fit == 0.1

# all parameters are new
mock_model.config.par_names.return_value = ["par_d", "par_e"]
matched_fit_res = model_utils.match_fit_results(mock_model, fit_results)
assert np.allclose(matched_fit_res.bestfit, [4.0, 5.0])
assert np.allclose(matched_fit_res.uncertainty, [0.4, 0.5])
assert matched_fit_res.labels == ["par_d", "par_e"]
assert np.allclose(matched_fit_res.corr_mat, [[1.0, 0.0], [0.0, 1.0]])
assert matched_fit_res.best_twice_nll == 5.0
assert matched_fit_res.goodness_of_fit == 0.1

# fit results already match model exactly
mock_model.config.par_names.return_value = ["par_a", "par_b", "par_c"]
matched_fit_res = model_utils.match_fit_results(mock_model, fit_results)
assert np.allclose(matched_fit_res.bestfit, [1.0, 2.0, 3.0])
assert np.allclose(matched_fit_res.uncertainty, [0.1, 0.2, 0.3])
assert matched_fit_res.labels == ["par_a", "par_b", "par_c"]
assert np.allclose(
matched_fit_res.corr_mat, [[1.0, 0.2, 0.5], [0.2, 1.0, 0.1], [0.5, 0.1, 1.0]]
)
assert matched_fit_res.best_twice_nll == 5.0
assert matched_fit_res.goodness_of_fit == 0.1

0 comments on commit 054c2ef

Please sign in to comment.