From 9cbde04fac818b309dd07dabcfbd0fcb435511e9 Mon Sep 17 00:00:00 2001 From: Kin Ho Lo Date: Sat, 6 Apr 2024 11:48:08 +0200 Subject: [PATCH 1/9] Enable external_predictions for short model in benchmarks --- doubleml/double_ml.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py index b47a5ace..320320da 100644 --- a/doubleml/double_ml.py +++ b/doubleml/double_ml.py @@ -1735,7 +1735,7 @@ def sensitivity_plot(self, idx_treatment=0, value='theta', include_scenario=True fill=fill) return fig - def sensitivity_benchmark(self, benchmarking_set): + def sensitivity_benchmark(self, benchmarking_set, fit_args={}): """ Computes a benchmark for a given set of features. Returns a DataFrame containing the corresponding values for cf_y, cf_d, rho and the change in estimates. @@ -1757,12 +1757,15 @@ def sensitivity_benchmark(self, benchmarking_set): if not set(benchmarking_set) <= set(x_list_long): raise ValueError(f"benchmarking_set must be a subset of features {str(self._dml_data.x_cols)}. " f'{str(benchmarking_set)} was passed.') + if not isinstance(fit_args, dict): + raise TypeError('fit_args must be a dict. ' + f'{str(fit_args)} of type {type(fit_args)} was passed.') # refit short form of the model x_list_short = [x for x in x_list_long if x not in benchmarking_set] dml_short = copy.deepcopy(self) dml_short._dml_data.x_cols = x_list_short - dml_short.fit() + dml_short.fit(**fit_args) benchmark_dict = gain_statistics(dml_long=self, dml_short=dml_short) df_benchmark = pd.DataFrame(benchmark_dict, index=self._dml_data.d_cols) From 8b7d9380508e20a2e923903668eb4dbc04b6ced6 Mon Sep 17 00:00:00 2001 From: Kin Ho Lucien Lo Date: Mon, 8 Apr 2024 14:29:49 +0200 Subject: [PATCH 2/9] Added unit test test_sensitivity_benchmark_external_prediction_exception in test_exceptions_ext_preds.py --- doubleml/tests/test_exceptions_ext_preds.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/doubleml/tests/test_exceptions_ext_preds.py b/doubleml/tests/test_exceptions_ext_preds.py index 395d8bf5..d72cd45b 100644 --- a/doubleml/tests/test_exceptions_ext_preds.py +++ b/doubleml/tests/test_exceptions_ext_preds.py @@ -1,8 +1,10 @@ import pytest -from doubleml import DoubleMLCVAR, DoubleMLQTE, DoubleMLData +from doubleml import DoubleMLCVAR, DoubleMLQTE, DoubleMLIRM, DoubleMLData from doubleml.datasets import make_irm_data from doubleml.utils import DMLDummyRegressor, DMLDummyClassifier +from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier + df_irm = make_irm_data(n_obs=10, dim_x=2, theta=0.5, return_type="DataFrame") ext_predictions = {"d": {}} @@ -21,3 +23,13 @@ def test_qte_external_prediction_exception(): with pytest.raises(NotImplementedError, match=msg): qte = DoubleMLQTE(DoubleMLData(df_irm, "y", "d"), DMLDummyClassifier(), DMLDummyClassifier()) qte.fit(external_predictions=ext_predictions) + +@pytest.mark.ci +def test_sensitivity_benchmark_external_prediction_exception(): + msg = "fit_args must be a dict. " + with pytest.raises(TypeError, match=msg): + fit_args = [] + irm = DoubleMLIRM(DoubleMLData(df_irm, "y", "d"), RandomForestRegressor(), RandomForestClassifier()) + irm.fit() + irm.sensitivity_analysis() + irm.sensitivity_benchmark(benchmarking_set=["X1"], fit_args=fit_args) From 519bae63d05b5935e48bc63f00d15e85b2b22a30 Mon Sep 17 00:00:00 2001 From: Kin Ho Lucien Lo Date: Mon, 8 Apr 2024 23:19:59 +0200 Subject: [PATCH 3/9] Change default value of fit_args to be None --- doubleml/double_ml.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py index 320320da..8ea773a4 100644 --- a/doubleml/double_ml.py +++ b/doubleml/double_ml.py @@ -1735,7 +1735,7 @@ def sensitivity_plot(self, idx_treatment=0, value='theta', include_scenario=True fill=fill) return fig - def sensitivity_benchmark(self, benchmarking_set, fit_args={}): + def sensitivity_benchmark(self, benchmarking_set, fit_args=None): """ Computes a benchmark for a given set of features. Returns a DataFrame containing the corresponding values for cf_y, cf_d, rho and the change in estimates. @@ -1757,7 +1757,7 @@ def sensitivity_benchmark(self, benchmarking_set, fit_args={}): if not set(benchmarking_set) <= set(x_list_long): raise ValueError(f"benchmarking_set must be a subset of features {str(self._dml_data.x_cols)}. " f'{str(benchmarking_set)} was passed.') - if not isinstance(fit_args, dict): + if fit_args is not None and not isinstance(fit_args, dict): raise TypeError('fit_args must be a dict. ' f'{str(fit_args)} of type {type(fit_args)} was passed.') @@ -1765,7 +1765,10 @@ def sensitivity_benchmark(self, benchmarking_set, fit_args={}): x_list_short = [x for x in x_list_long if x not in benchmarking_set] dml_short = copy.deepcopy(self) dml_short._dml_data.x_cols = x_list_short - dml_short.fit(**fit_args) + if fit_args is not None: + dml_short.fit(**fit_args) + else: + dml_short.fit() benchmark_dict = gain_statistics(dml_long=self, dml_short=dml_short) df_benchmark = pd.DataFrame(benchmark_dict, index=self._dml_data.d_cols) From d996fdad7e54bf368240ce0155f1e93796707449 Mon Sep 17 00:00:00 2001 From: Kin Ho Lucien Lo Date: Tue, 9 Apr 2024 16:23:50 +0200 Subject: [PATCH 4/9] Added test_dml_benchmark_fixture in test_sensitivity.py --- doubleml/tests/test_sensitivity.py | 49 ++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/doubleml/tests/test_sensitivity.py b/doubleml/tests/test_sensitivity.py index b1277b78..8bb73936 100644 --- a/doubleml/tests/test_sensitivity.py +++ b/doubleml/tests/test_sensitivity.py @@ -1,12 +1,21 @@ import pytest +import math import numpy as np +import copy import doubleml as dml +from doubleml import DoubleMLIRM, DoubleMLData +from doubleml.datasets import make_irm_data from sklearn.linear_model import LinearRegression +from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier from ._utils_doubleml_sensitivity_manual import doubleml_sensitivity_manual, \ doubleml_sensitivity_benchmark_manual +@pytest.fixture(scope="module", params=[["X1"],["X2"],["X3"]]) +def benchmarking_set(request): + return request.param + @pytest.fixture(scope='module', params=[1, 3]) @@ -99,3 +108,43 @@ def test_dml_sensitivity_benchmark(dml_sensitivity_multitreat_fixture): assert all(dml_sensitivity_multitreat_fixture['benchmark'].index == dml_sensitivity_multitreat_fixture['d_cols']) assert dml_sensitivity_multitreat_fixture['benchmark'].equals(dml_sensitivity_multitreat_fixture['benchmark_manual']) + +@pytest.fixture(scope="module") +def test_dml_benchmark_fixture(benchmarking_set,n_rep): + + random_state = 42 + + x, y, d = make_irm_data(n_obs=10, dim_x=5, theta=0.5, return_type="np.array") + + classifier_class = RandomForestClassifier + regressor_class = RandomForestRegressor + + np.random.seed(3141) + dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d) + x_list_long = copy.deepcopy(dml_data.x_cols) + dml_int = DoubleMLIRM(dml_data, ml_m=classifier_class(random_state=random_state), ml_g=regressor_class(random_state=random_state), n_folds=2) + dml_int.fit(store_predictions=True) + dml_int.sensitivity_analysis() + dml_ext = copy.deepcopy(dml_int) + df_bm = dml_int.sensitivity_benchmark(benchmarking_set=benchmarking_set) + + np.random.seed(3141) + dml_short = copy.deepcopy(dml_ext) + dml_data_short = DoubleMLData.from_arrays(x=x, y=y, d=d) + dml_data_short.x_cols = [x for x in x_list_long if x not in benchmarking_set] + dml_short = DoubleMLIRM(dml_data_short, ml_m=classifier_class(random_state=random_state), ml_g=regressor_class(random_state=random_state), n_folds=2) + dml_short.fit(store_predictions=True) + fit_args = {"external_predictions": {"d": {"ml_m": dml_short.predictions["ml_m"][:, :, 0], + "ml_g0": dml_short.predictions["ml_g0"][:, :, 0], + "ml_g1": dml_short.predictions["ml_g1"][:, :, 0],}},} + dml_ext.sensitivity_analysis() + df_bm_ext = dml_ext.sensitivity_benchmark(benchmarking_set=benchmarking_set,fit_args=fit_args) + + res_dict = {"default_benchmark": df_bm.loc["d","delta_theta"], + "external_benchmark": df_bm_ext.loc["d","delta_theta"]} + + return res_dict + +@pytest.mark.ci +def test_dml_sensitivity_external_predictions(test_dml_benchmark_fixture): + assert math.isclose(test_dml_benchmark_fixture["default_benchmark"], test_dml_benchmark_fixture["external_benchmark"], rel_tol=1e-9, abs_tol=1e-4) From d68143eb2c028a0250c67c325aa7e52e86ad8ca4 Mon Sep 17 00:00:00 2001 From: Kin Ho Lucien Lo Date: Tue, 9 Apr 2024 16:31:07 +0200 Subject: [PATCH 5/9] Remove trailing line in test_sensitivity.py --- doubleml/tests/test_sensitivity.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doubleml/tests/test_sensitivity.py b/doubleml/tests/test_sensitivity.py index 8bb73936..e67ab6ae 100644 --- a/doubleml/tests/test_sensitivity.py +++ b/doubleml/tests/test_sensitivity.py @@ -110,8 +110,7 @@ def test_dml_sensitivity_benchmark(dml_sensitivity_multitreat_fixture): assert dml_sensitivity_multitreat_fixture['benchmark'].equals(dml_sensitivity_multitreat_fixture['benchmark_manual']) @pytest.fixture(scope="module") -def test_dml_benchmark_fixture(benchmarking_set,n_rep): - +def test_dml_benchmark_fixture(benchmarking_set,n_rep): random_state = 42 x, y, d = make_irm_data(n_obs=10, dim_x=5, theta=0.5, return_type="np.array") From 96f5d4c989c759b778f1de5886d2a89910f2a7dd Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Thu, 11 Apr 2024 14:49:36 +0200 Subject: [PATCH 6/9] remove trailing whitespaces from test_sensitivity --- doubleml/tests/test_sensitivity.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doubleml/tests/test_sensitivity.py b/doubleml/tests/test_sensitivity.py index e67ab6ae..6424158d 100644 --- a/doubleml/tests/test_sensitivity.py +++ b/doubleml/tests/test_sensitivity.py @@ -110,9 +110,8 @@ def test_dml_sensitivity_benchmark(dml_sensitivity_multitreat_fixture): assert dml_sensitivity_multitreat_fixture['benchmark'].equals(dml_sensitivity_multitreat_fixture['benchmark_manual']) @pytest.fixture(scope="module") -def test_dml_benchmark_fixture(benchmarking_set,n_rep): +def test_dml_benchmark_fixture(benchmarking_set,n_rep): random_state = 42 - x, y, d = make_irm_data(n_obs=10, dim_x=5, theta=0.5, return_type="np.array") classifier_class = RandomForestClassifier From 3160b7532c60f811abfb4305f6e5dba4268eb25b Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Thu, 11 Apr 2024 16:36:25 +0200 Subject: [PATCH 7/9] format files --- doubleml/tests/test_exceptions_ext_preds.py | 1 + doubleml/tests/test_sensitivity.py | 37 +++++++++++++++------ 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/doubleml/tests/test_exceptions_ext_preds.py b/doubleml/tests/test_exceptions_ext_preds.py index d72cd45b..4a61361d 100644 --- a/doubleml/tests/test_exceptions_ext_preds.py +++ b/doubleml/tests/test_exceptions_ext_preds.py @@ -24,6 +24,7 @@ def test_qte_external_prediction_exception(): qte = DoubleMLQTE(DoubleMLData(df_irm, "y", "d"), DMLDummyClassifier(), DMLDummyClassifier()) qte.fit(external_predictions=ext_predictions) + @pytest.mark.ci def test_sensitivity_benchmark_external_prediction_exception(): msg = "fit_args must be a dict. " diff --git a/doubleml/tests/test_sensitivity.py b/doubleml/tests/test_sensitivity.py index 6424158d..c994eda2 100644 --- a/doubleml/tests/test_sensitivity.py +++ b/doubleml/tests/test_sensitivity.py @@ -12,7 +12,8 @@ from ._utils_doubleml_sensitivity_manual import doubleml_sensitivity_manual, \ doubleml_sensitivity_benchmark_manual -@pytest.fixture(scope="module", params=[["X1"],["X2"],["X3"]]) + +@pytest.fixture(scope="module", params=[["X1"], ["X2"], ["X3"]]) def benchmarking_set(request): return request.param @@ -109,40 +110,54 @@ def test_dml_sensitivity_benchmark(dml_sensitivity_multitreat_fixture): dml_sensitivity_multitreat_fixture['d_cols']) assert dml_sensitivity_multitreat_fixture['benchmark'].equals(dml_sensitivity_multitreat_fixture['benchmark_manual']) + @pytest.fixture(scope="module") -def test_dml_benchmark_fixture(benchmarking_set,n_rep): +def test_dml_benchmark_fixture(benchmarking_set, n_rep): random_state = 42 x, y, d = make_irm_data(n_obs=10, dim_x=5, theta=0.5, return_type="np.array") classifier_class = RandomForestClassifier regressor_class = RandomForestRegressor - + np.random.seed(3141) dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d) x_list_long = copy.deepcopy(dml_data.x_cols) - dml_int = DoubleMLIRM(dml_data, ml_m=classifier_class(random_state=random_state), ml_g=regressor_class(random_state=random_state), n_folds=2) + dml_int = DoubleMLIRM(dml_data, + ml_m=classifier_class(random_state=random_state), + ml_g=regressor_class(random_state=random_state), + n_folds=2) dml_int.fit(store_predictions=True) dml_int.sensitivity_analysis() dml_ext = copy.deepcopy(dml_int) df_bm = dml_int.sensitivity_benchmark(benchmarking_set=benchmarking_set) - + np.random.seed(3141) dml_short = copy.deepcopy(dml_ext) dml_data_short = DoubleMLData.from_arrays(x=x, y=y, d=d) dml_data_short.x_cols = [x for x in x_list_long if x not in benchmarking_set] - dml_short = DoubleMLIRM(dml_data_short, ml_m=classifier_class(random_state=random_state), ml_g=regressor_class(random_state=random_state), n_folds=2) + dml_short = DoubleMLIRM(dml_data_short, + ml_m=classifier_class(random_state=random_state), + ml_g=regressor_class(random_state=random_state), + n_folds=2) dml_short.fit(store_predictions=True) fit_args = {"external_predictions": {"d": {"ml_m": dml_short.predictions["ml_m"][:, :, 0], "ml_g0": dml_short.predictions["ml_g0"][:, :, 0], - "ml_g1": dml_short.predictions["ml_g1"][:, :, 0],}},} + "ml_g1": dml_short.predictions["ml_g1"][:, :, 0], + } + }, + } dml_ext.sensitivity_analysis() - df_bm_ext = dml_ext.sensitivity_benchmark(benchmarking_set=benchmarking_set,fit_args=fit_args) + df_bm_ext = dml_ext.sensitivity_benchmark(benchmarking_set=benchmarking_set, fit_args=fit_args) - res_dict = {"default_benchmark": df_bm.loc["d","delta_theta"], - "external_benchmark": df_bm_ext.loc["d","delta_theta"]} + res_dict = {"default_benchmark": df_bm.loc["d", "delta_theta"], + "external_benchmark": df_bm_ext.loc["d", "delta_theta"]} return res_dict + @pytest.mark.ci def test_dml_sensitivity_external_predictions(test_dml_benchmark_fixture): - assert math.isclose(test_dml_benchmark_fixture["default_benchmark"], test_dml_benchmark_fixture["external_benchmark"], rel_tol=1e-9, abs_tol=1e-4) + assert math.isclose(test_dml_benchmark_fixture["default_benchmark"], + test_dml_benchmark_fixture["external_benchmark"], + rel_tol=1e-9, + abs_tol=1e-4) From c0cbf41a6a1e339dd9d29c189853f3e7dee7b829 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Thu, 11 Apr 2024 16:40:45 +0200 Subject: [PATCH 8/9] remove additional dml_short definition --- doubleml/tests/test_sensitivity.py | 1 - 1 file changed, 1 deletion(-) diff --git a/doubleml/tests/test_sensitivity.py b/doubleml/tests/test_sensitivity.py index c994eda2..d4a379f4 100644 --- a/doubleml/tests/test_sensitivity.py +++ b/doubleml/tests/test_sensitivity.py @@ -132,7 +132,6 @@ def test_dml_benchmark_fixture(benchmarking_set, n_rep): df_bm = dml_int.sensitivity_benchmark(benchmarking_set=benchmarking_set) np.random.seed(3141) - dml_short = copy.deepcopy(dml_ext) dml_data_short = DoubleMLData.from_arrays(x=x, y=y, d=d) dml_data_short.x_cols = [x for x in x_list_long if x not in benchmarking_set] dml_short = DoubleMLIRM(dml_data_short, From d9807a85ebf46c65537587c78b0cf6a016401c9c Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Thu, 11 Apr 2024 16:56:15 +0200 Subject: [PATCH 9/9] extend external predictions benchmarking to multiple repetitions --- doubleml/tests/test_sensitivity.py | 32 +++++++++++++++--------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/doubleml/tests/test_sensitivity.py b/doubleml/tests/test_sensitivity.py index d4a379f4..9c9ca9f3 100644 --- a/doubleml/tests/test_sensitivity.py +++ b/doubleml/tests/test_sensitivity.py @@ -1,13 +1,11 @@ import pytest -import math import numpy as np import copy import doubleml as dml from doubleml import DoubleMLIRM, DoubleMLData from doubleml.datasets import make_irm_data -from sklearn.linear_model import LinearRegression -from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier +from sklearn.linear_model import LinearRegression, LogisticRegression from ._utils_doubleml_sensitivity_manual import doubleml_sensitivity_manual, \ doubleml_sensitivity_benchmark_manual @@ -114,18 +112,19 @@ def test_dml_sensitivity_benchmark(dml_sensitivity_multitreat_fixture): @pytest.fixture(scope="module") def test_dml_benchmark_fixture(benchmarking_set, n_rep): random_state = 42 - x, y, d = make_irm_data(n_obs=10, dim_x=5, theta=0.5, return_type="np.array") + x, y, d = make_irm_data(n_obs=50, dim_x=5, theta=0, return_type="np.array") - classifier_class = RandomForestClassifier - regressor_class = RandomForestRegressor + classifier_class = LogisticRegression + regressor_class = LinearRegression np.random.seed(3141) dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d) x_list_long = copy.deepcopy(dml_data.x_cols) dml_int = DoubleMLIRM(dml_data, ml_m=classifier_class(random_state=random_state), - ml_g=regressor_class(random_state=random_state), - n_folds=2) + ml_g=regressor_class(), + n_folds=2, + n_rep=n_rep) dml_int.fit(store_predictions=True) dml_int.sensitivity_analysis() dml_ext = copy.deepcopy(dml_int) @@ -136,8 +135,9 @@ def test_dml_benchmark_fixture(benchmarking_set, n_rep): dml_data_short.x_cols = [x for x in x_list_long if x not in benchmarking_set] dml_short = DoubleMLIRM(dml_data_short, ml_m=classifier_class(random_state=random_state), - ml_g=regressor_class(random_state=random_state), - n_folds=2) + ml_g=regressor_class(), + n_folds=2, + n_rep=n_rep) dml_short.fit(store_predictions=True) fit_args = {"external_predictions": {"d": {"ml_m": dml_short.predictions["ml_m"][:, :, 0], "ml_g0": dml_short.predictions["ml_g0"][:, :, 0], @@ -148,15 +148,15 @@ def test_dml_benchmark_fixture(benchmarking_set, n_rep): dml_ext.sensitivity_analysis() df_bm_ext = dml_ext.sensitivity_benchmark(benchmarking_set=benchmarking_set, fit_args=fit_args) - res_dict = {"default_benchmark": df_bm.loc["d", "delta_theta"], - "external_benchmark": df_bm_ext.loc["d", "delta_theta"]} + res_dict = {"default_benchmark": df_bm, + "external_benchmark": df_bm_ext} return res_dict @pytest.mark.ci def test_dml_sensitivity_external_predictions(test_dml_benchmark_fixture): - assert math.isclose(test_dml_benchmark_fixture["default_benchmark"], - test_dml_benchmark_fixture["external_benchmark"], - rel_tol=1e-9, - abs_tol=1e-4) + assert np.allclose(test_dml_benchmark_fixture["default_benchmark"], + test_dml_benchmark_fixture["external_benchmark"], + rtol=1e-9, + atol=1e-4)