From 054c2efdd6de132f50ebaf5b59de7f295ffbc8ac Mon Sep 17 00:00:00 2001 From: Alexander Held <45009355+alexander-held@users.noreply.github.com> Date: Fri, 1 Oct 2021 14:29:46 +0200 Subject: [PATCH] feat: utility to match fit results to arbitrary model (#288) * added model_utils.match_fit_results function to match fit results to different model * switched from use of np.diag to np.diagflat * added warning to model_utils.prediction for mismatch of parameter names in fit results and model * updated pre-commit --- .pre-commit-config.yaml | 2 +- README.md | 4 +- src/cabinetry/model_utils.py | 72 ++++++++++++++++++++++++++++++- tests/test_model_utils.py | 83 +++++++++++++++++++++++++++++++++--- 4 files changed, 152 insertions(+), 9 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1bcebd5b..6486c5f4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -22,7 +22,7 @@ repos: - id: flake8 additional_dependencies: [flake8-bugbear, flake8-import-order, flake8-print] - repo: https://github.com/asottile/pyupgrade - rev: v2.26.0 + rev: v2.29.0 hooks: - id: pyupgrade args: ["--py37-plus"] diff --git a/README.md b/README.md index 042d7ffc..949f9c57 100644 --- a/README.md +++ b/README.md @@ -2,9 +2,9 @@ [![CI status](https://github.com/scikit-hep/cabinetry/workflows/CI/badge.svg)](https://github.com/scikit-hep/cabinetry/actions?query=workflow%3ACI) [![Documentation Status](https://readthedocs.org/projects/cabinetry/badge/?version=latest)](https://cabinetry.readthedocs.io/en/latest/?badge=latest) -[![codecov](https://codecov.io/gh/scikit-hep/cabinetry/branch/master/graph/badge.svg)](https://codecov.io/gh/scikit-hep/cabinetry) +[![Codecov](https://codecov.io/gh/scikit-hep/cabinetry/branch/master/graph/badge.svg)](https://codecov.io/gh/scikit-hep/cabinetry) [![PyPI version](https://badge.fury.io/py/cabinetry.svg)](https://badge.fury.io/py/cabinetry) -[![python version](https://img.shields.io/pypi/pyversions/cabinetry.svg)](https://pypi.org/project/cabinetry/) +[![Python version](https://img.shields.io/pypi/pyversions/cabinetry.svg)](https://pypi.org/project/cabinetry/) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4742752.svg)](https://doi.org/10.5281/zenodo.4742752) diff --git a/src/cabinetry/model_utils.py b/src/cabinetry/model_utils.py index b62bb90f..b9ed58b1 100644 --- a/src/cabinetry/model_utils.py +++ b/src/cabinetry/model_utils.py @@ -269,7 +269,7 @@ def yield_stdev( labels = model.config.par_names() # continue with off-diagonal contributions if there are any - if np.count_nonzero(corr_mat - np.diag(np.ones_like(parameters))) > 0: + if np.count_nonzero(corr_mat - np.diagflat(np.ones_like(parameters))) > 0: # loop over pairs of parameters for i_par in range(model.config.npars): for j_par in range(model.config.npars): @@ -336,6 +336,8 @@ def prediction( ModelPrediction: model, yields and uncertainties per bin and channel """ if fit_results is not None: + if fit_results.labels != model.config.par_names(): + log.warning("parameter names in fit results and model do not match") # fit results specified, so they are used param_values = fit_results.bestfit param_uncertainty = fit_results.uncertainty @@ -484,3 +486,71 @@ def _filter_channels( ) return filtered_channels + + +def match_fit_results(model: pyhf.pdf.Model, fit_results: FitResults) -> FitResults: + """Matches results from a fit to a model by adding or removing parameters as needed. + + If the fit results contain parameters missing in the model, these parameters are not + included in the returned fit results. If the fit results do not include parameters + used in the model, they are added to the fit results. The best-fit value for such + parameters are the Asimov values as returned by ```asimov_parameters``` (initial + parameter settings for unconstrained parameters), and the associated uncertainties + as given by ```prefit_uncertainties``` (zero uncertainty for unconstrained or fixed + parameters). These parameters furthermore are assumed to have no correlation with + any other parameters. If required, parameters are re-ordered to match the target + model. + + Args: + model (pyhf.pdf.Model): model to match fit results to + fit_results (FitResults): fit results to be updated in order to match model + + Returns: + FitResults: fit results matching the model + """ + # start with the assumption that the provided fit results contain no relevant + # information for the target model at all, and initialize everything accordingly + # then inject information contained in provided fit results and return the new + # fit results matching the target model at the end + + bestfit = asimov_parameters(model) # Asimov parameter values for target model + uncertainty = prefit_uncertainties(model) # pre-fit uncertainties for target model + labels = model.config.par_names() # labels for target model + + # indices of parameters in current fit results, or None if they are missing + indices_for_corr: List[Optional[int]] = [None] * len(labels) + + # loop over all required parameters + for target_idx, target_label in enumerate(labels): + # if parameters are missing in fit results, no further action is needed here + # as all relevant objects have been initialized with that assumption + if target_label in fit_results.labels: + # fit results contain parameter, find its position + idx_in_fit_results = fit_results.labels.index(target_label) + # override pre-fit values by actual fit results for parameters + bestfit[target_idx] = fit_results.bestfit[idx_in_fit_results] + uncertainty[target_idx] = fit_results.uncertainty[idx_in_fit_results] + # update indices for correlation matrix reconstruction + indices_for_corr[target_idx] = idx_in_fit_results + + # re-build correlation matrix: start with diagonal matrix (assuming fit results do + # not contain relevant info), and then insert values provided in fit results + corr_mat = np.diagflat(np.ones_like(labels, dtype=float)) + for i_target, i_orig in enumerate(indices_for_corr): + for j_target, j_orig in enumerate(indices_for_corr): + # i_target and j_target are positions in matched correlation matrix + # i_orig and j_orig are positions in old matrix, None if missing from there + if i_orig is not None and j_orig is not None: + # if i_orig or j_orig are None, one of the parameters are not part of + # original fit result, and no update to correlation matrix is needed + corr_mat[i_target][j_target] = fit_results.corr_mat[i_orig][j_orig] + + fit_results_matched = FitResults( + np.asarray(bestfit), + np.asarray(uncertainty), + labels, + corr_mat, + fit_results.best_twice_nll, + fit_results.goodness_of_fit, + ) + return fit_results_matched diff --git a/tests/test_model_utils.py b/tests/test_model_utils.py index 4bb12425..86cc28e0 100644 --- a/tests/test_model_utils.py +++ b/tests/test_model_utils.py @@ -164,7 +164,7 @@ def test_yield_stdev(example_spec, example_spec_multibin): # pre-fit parameters = np.asarray([1.0, 1.0]) uncertainty = np.asarray([0.0495665682, 0.0]) - diag_corr_mat = np.diag([1.0, 1.0]) + diag_corr_mat = np.diagflat([1.0, 1.0]) total_stdev_bin, total_stdev_chan = model_utils.yield_stdev( model, parameters, uncertainty, diag_corr_mat ) @@ -213,7 +213,8 @@ def test_yield_stdev(example_spec, example_spec_multibin): "cabinetry.model_utils.prefit_uncertainties", return_value=([0.04956657, 0.0]) ) @mock.patch("cabinetry.model_utils.asimov_parameters", return_value=([1.0, 1.0])) -def test_prediction(mock_asimov, mock_unc, mock_stdev, example_spec): +def test_prediction(mock_asimov, mock_unc, mock_stdev, caplog, example_spec): + caplog.set_level(logging.DEBUG) model = pyhf.Workspace(example_spec).model() # pre-fit prediction @@ -242,7 +243,7 @@ def test_prediction(mock_asimov, mock_unc, mock_stdev, example_spec): fit_results = FitResults( np.asarray([1.01, 1.1]), np.asarray([0.03, 0.1]), - [], + ["staterror_Signal-Region[0]", "Signal strength"], np.asarray([[1.0, 0.2], [0.2, 1.0]]), 0.0, ) @@ -252,6 +253,9 @@ def test_prediction(mock_asimov, mock_unc, mock_stdev, example_spec): assert model_pred.total_stdev_model_bins == [[0.3]] # from mock assert model_pred.total_stdev_model_channels == [0.3] # from mock assert model_pred.label == "post-fit" + assert "parameter names in fit results and model do not match" not in [ + rec.message for rec in caplog.records + ] assert mock_asimov.call_count == 1 # no new call assert mock_unc.call_count == 1 # no new call @@ -266,9 +270,22 @@ def test_prediction(mock_asimov, mock_unc, mock_stdev, example_spec): ) assert mock_stdev.call_args_list[1][1] == {} - # custom label - model_pred = model_utils.prediction(model, label="abc") + caplog.clear() + + # custom prediction label, mis-match in parameter names + fit_results = FitResults( + np.asarray([1.01, 1.1]), + np.asarray([0.03, 0.1]), + ["a", "b"], + np.asarray([[1.0, 0.2], [0.2, 1.0]]), + 0.0, + ) + model_pred = model_utils.prediction(model, fit_results=fit_results, label="abc") + assert "parameter names in fit results and model do not match" in [ + rec.message for rec in caplog.records + ] assert model_pred.label == "abc" + caplog.clear() def test_unconstrained_parameter_count(example_spec, example_spec_shapefactor): @@ -342,3 +359,59 @@ def test__filter_channels(example_spec_multibin, caplog): "['region_1', 'region_2']" in [rec.message for rec in caplog.records] ) caplog.clear() + + +@mock.patch( + "cabinetry.model_utils.prefit_uncertainties", + side_effect=[[0.4, 0.5, 0.6], [0.4, 0.5], [0.4, 0.5, 0.6]], +) +@mock.patch( + "cabinetry.model_utils.asimov_parameters", + side_effect=[[4.0, 5.0, 6.0], [4.0, 5.0], [4.0, 5.0, 6.0]], +) +def test_match_fit_results(mock_pars, mock_uncs): + mock_model = mock.MagicMock() + fit_results = FitResults( + np.asarray([1.0, 2.0, 3.0]), + np.asarray([0.1, 0.2, 0.3]), + ["par_a", "par_b", "par_c"], + np.asarray([[1.0, 0.2, 0.5], [0.2, 1.0, 0.1], [0.5, 0.1, 1.0]]), + 5.0, + 0.1, + ) + + # remove par_a, flip par_b and par_c, add par_d + mock_model.config.par_names.return_value = ["par_c", "par_d", "par_b"] + matched_fit_res = model_utils.match_fit_results(mock_model, fit_results) + assert mock_pars.call_args_list == [((mock_model,), {})] + assert mock_uncs.call_args_list == [((mock_model,), {})] + assert np.allclose(matched_fit_res.bestfit, [3.0, 5.0, 2.0]) + assert np.allclose(matched_fit_res.uncertainty, [0.3, 0.5, 0.2]) + assert matched_fit_res.labels == ["par_c", "par_d", "par_b"] + assert np.allclose( + matched_fit_res.corr_mat, [[1.0, 0.0, 0.1], [0.0, 1.0, 0.0], [0.1, 0.0, 1.0]] + ) + assert matched_fit_res.best_twice_nll == 5.0 + assert matched_fit_res.goodness_of_fit == 0.1 + + # all parameters are new + mock_model.config.par_names.return_value = ["par_d", "par_e"] + matched_fit_res = model_utils.match_fit_results(mock_model, fit_results) + assert np.allclose(matched_fit_res.bestfit, [4.0, 5.0]) + assert np.allclose(matched_fit_res.uncertainty, [0.4, 0.5]) + assert matched_fit_res.labels == ["par_d", "par_e"] + assert np.allclose(matched_fit_res.corr_mat, [[1.0, 0.0], [0.0, 1.0]]) + assert matched_fit_res.best_twice_nll == 5.0 + assert matched_fit_res.goodness_of_fit == 0.1 + + # fit results already match model exactly + mock_model.config.par_names.return_value = ["par_a", "par_b", "par_c"] + matched_fit_res = model_utils.match_fit_results(mock_model, fit_results) + assert np.allclose(matched_fit_res.bestfit, [1.0, 2.0, 3.0]) + assert np.allclose(matched_fit_res.uncertainty, [0.1, 0.2, 0.3]) + assert matched_fit_res.labels == ["par_a", "par_b", "par_c"] + assert np.allclose( + matched_fit_res.corr_mat, [[1.0, 0.2, 0.5], [0.2, 1.0, 0.1], [0.5, 0.1, 1.0]] + ) + assert matched_fit_res.best_twice_nll == 5.0 + assert matched_fit_res.goodness_of_fit == 0.1