diff --git a/CHANGELOG.md b/CHANGELOG.md
index cf626aa7..6f4949e8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,17 @@ Unreleased
- Add `model_info` class to better capture model information.
- Test `/examples` Jupyter notebooks within normal test suite.
+v1.10.1 (2023-08-24)
+----------
+**Improvements**
+- Introduced ability to specify the target index of a binary model when creating score code.
+ - index can be specified in `pzmm.import_model.ImportModel.import_model()`
+ - Relevant examples updated to include target_index.
+
+**Bugfixes**
+- Reworked `write_score_code.py` to allow for proper execution of single line scoring.
+- Added template files for `assess_model_bias.py` to allow for proper execution
+
v1.10 (2023-08-31)
----------
**Improvements**
diff --git a/examples/pzmm_binary_classification_model_import.ipynb b/examples/pzmm_binary_classification_model_import.ipynb
index c68074b0..e540b703 100644
--- a/examples/pzmm_binary_classification_model_import.ipynb
+++ b/examples/pzmm_binary_classification_model_import.ipynb
@@ -814,7 +814,8 @@
" predict_method=[dtc.predict_proba, [int, int]], # What is the predict method and what does it return?\n",
" score_metrics=score_metrics, # What are the output variables?\n",
" overwrite_model=True, # Overwrite the model if it already exists?\n",
- " target_values=[\"1\", \"0\"], # What are the expected values of the target variable?\n",
+ " target_values=[\"0\", \"1\"], # What are the expected values of the target variable?\n",
+ " target_index=1, # What is the index of the target value in target_values?\n",
" model_file_name=prefix + \".pickle\", # How was the model file serialized?\n",
" missing_values=True # Does the data include missing values?\n",
" )\n",
diff --git a/examples/pzmm_h2o_model_import.ipynb b/examples/pzmm_h2o_model_import.ipynb
index 2609b51f..76982ef9 100644
--- a/examples/pzmm_h2o_model_import.ipynb
+++ b/examples/pzmm_h2o_model_import.ipynb
@@ -87,7 +87,9 @@
"outputs": [
{
"data": {
- "text/plain": "'3.38.0.4'"
+ "text/plain": [
+ "'3.40.0.3'"
+ ]
},
"execution_count": 3,
"metadata": {},
@@ -114,14 +116,116 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Checking whether there is an H2O instance running at http://localhost:54321 . connected.\n",
- "Warning: Your H2O cluster version is too old (5 months and 8 days)!Please download and install the latest version from http://h2o.ai/download/\n"
+ "Checking whether there is an H2O instance running at http://localhost:54321..... not found.\n",
+ "Attempting to start a local H2O server...\n",
+ " Java Version: openjdk version \"11.0.13\" 2021-10-19; OpenJDK Runtime Environment JBR-11.0.13.7-1751.21-jcef (build 11.0.13+7-b1751.21); OpenJDK 64-Bit Server VM JBR-11.0.13.7-1751.21-jcef (build 11.0.13+7-b1751.21, mixed mode)\n",
+ " Starting server from /Users/dalmoo/opt/anaconda3/envs/yeehaw/lib/python3.8/site-packages/h2o/backend/bin/h2o.jar\n",
+ " Ice root: /var/folders/vs/np2dp7cs1y7ggk5pl92q_rb40000gn/T/tmp0_92875_\n",
+ " JVM stdout: /var/folders/vs/np2dp7cs1y7ggk5pl92q_rb40000gn/T/tmp0_92875_/h2o_dalmoo_started_from_python.out\n",
+ " JVM stderr: /var/folders/vs/np2dp7cs1y7ggk5pl92q_rb40000gn/T/tmp0_92875_/h2o_dalmoo_started_from_python.err\n",
+ " Server is running at http://127.0.0.1:54321\n",
+ "Connecting to H2O server at http://127.0.0.1:54321 ... successful.\n",
+ "Warning: Your H2O cluster version is (6 months and 1 day) old. There may be a newer version available.\n",
+ "Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html\n"
]
},
{
"data": {
- "text/plain": "-------------------------- -----------------------------\nH2O_cluster_uptime: 1 day 4 hours 9 mins\nH2O_cluster_timezone: America/New_York\nH2O_data_parsing_timezone: UTC\nH2O_cluster_version: 3.38.0.4\nH2O_cluster_version_age: 5 months and 8 days !!!\nH2O_cluster_name: H2O_from_python_demo_uqabco\nH2O_cluster_total_nodes: 1\nH2O_cluster_free_memory: 15.93 Gb\nH2O_cluster_total_cores: 16\nH2O_cluster_allowed_cores: 16\nH2O_cluster_status: locked, healthy\nH2O_connection_url: http://localhost:54321\nH2O_connection_proxy: {\"http\": null, \"https\": null}\nH2O_internal_security: False\nPython_version: 3.8.16 final\n-------------------------- -----------------------------",
- "text/html": "\n \n
\n
\n \n \n H2O_cluster_uptime: | \n1 day 4 hours 9 mins |
\nH2O_cluster_timezone: | \nAmerica/New_York |
\nH2O_data_parsing_timezone: | \nUTC |
\nH2O_cluster_version: | \n3.38.0.4 |
\nH2O_cluster_version_age: | \n5 months and 8 days !!! |
\nH2O_cluster_name: | \nH2O_from_python_demo_uqabco |
\nH2O_cluster_total_nodes: | \n1 |
\nH2O_cluster_free_memory: | \n15.93 Gb |
\nH2O_cluster_total_cores: | \n16 |
\nH2O_cluster_allowed_cores: | \n16 |
\nH2O_cluster_status: | \nlocked, healthy |
\nH2O_connection_url: | \nhttp://localhost:54321 |
\nH2O_connection_proxy: | \n{\"http\": null, \"https\": null} |
\nH2O_internal_security: | \nFalse |
\nPython_version: | \n3.8.16 final |
\n
\n
\n"
+ "text/html": [
+ "\n",
+ " \n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " H2O_cluster_uptime: | \n",
+ "02 secs |
\n",
+ "H2O_cluster_timezone: | \n",
+ "America/Chicago |
\n",
+ "H2O_data_parsing_timezone: | \n",
+ "UTC |
\n",
+ "H2O_cluster_version: | \n",
+ "3.40.0.3 |
\n",
+ "H2O_cluster_version_age: | \n",
+ "6 months and 1 day |
\n",
+ "H2O_cluster_name: | \n",
+ "H2O_from_python_dalmoo_6awy1u |
\n",
+ "H2O_cluster_total_nodes: | \n",
+ "1 |
\n",
+ "H2O_cluster_free_memory: | \n",
+ "4 Gb |
\n",
+ "H2O_cluster_total_cores: | \n",
+ "10 |
\n",
+ "H2O_cluster_allowed_cores: | \n",
+ "10 |
\n",
+ "H2O_cluster_status: | \n",
+ "locked, healthy |
\n",
+ "H2O_connection_url: | \n",
+ "http://127.0.0.1:54321 |
\n",
+ "H2O_connection_proxy: | \n",
+ "{\"http\": null, \"https\": null} |
\n",
+ "H2O_internal_security: | \n",
+ "False |
\n",
+ "Python_version: | \n",
+ "3.8.16 final |
\n",
+ "
\n",
+ "
\n"
+ ],
+ "text/plain": [
+ "-------------------------- -----------------------------\n",
+ "H2O_cluster_uptime: 02 secs\n",
+ "H2O_cluster_timezone: America/Chicago\n",
+ "H2O_data_parsing_timezone: UTC\n",
+ "H2O_cluster_version: 3.40.0.3\n",
+ "H2O_cluster_version_age: 6 months and 1 day\n",
+ "H2O_cluster_name: H2O_from_python_dalmoo_6awy1u\n",
+ "H2O_cluster_total_nodes: 1\n",
+ "H2O_cluster_free_memory: 4 Gb\n",
+ "H2O_cluster_total_cores: 10\n",
+ "H2O_cluster_allowed_cores: 10\n",
+ "H2O_cluster_status: locked, healthy\n",
+ "H2O_connection_url: http://127.0.0.1:54321\n",
+ "H2O_connection_proxy: {\"http\": null, \"https\": null}\n",
+ "H2O_internal_security: False\n",
+ "Python_version: 3.8.16 final\n",
+ "-------------------------- -----------------------------"
+ ]
},
"metadata": {},
"output_type": "display_data"
@@ -156,7 +260,9 @@
},
{
"data": {
- "text/plain": "(5960, 13)"
+ "text/plain": [
+ "(5960, 13)"
+ ]
},
"execution_count": 5,
"metadata": {},
@@ -219,8 +325,1926 @@
},
{
"data": {
- "text/plain": "Model Details\n=============\nH2OGeneralizedLinearEstimator : Generalized Linear Modeling\nModel Key: glmfit\n\n\nGLM Model: summary\n family link regularization lambda_search number_of_predictors_total number_of_active_predictors number_of_iterations training_frame\n-- -------- ------ --------------------------------------------- ---------------------------------------------------------------------------- ---------------------------- ----------------------------- ---------------------- ----------------\n binomial logit Elastic Net (alpha = 0.5, lambda = 9.244E-4 ) nlambda = 100, lambda.max = 0.2455, lambda.min = 9.244E-4, lambda.1se = -1.0 18 17 93 py_3_sid_80dd\n\nModelMetricsBinomialGLM: glm\n** Reported on train data. **\n\nMSE: 0.12410460050039399\nRMSE: 0.3522848286548741\nLogLoss: 0.40208193579455126\nAUC: 0.78494005994006\nAUCPR: 0.569122888911077\nGini: 0.56988011988012\nNull degrees of freedom: 3580\nResidual degrees of freedom: 3563\nNull deviance: 3597.150438148382\nResidual deviance: 2879.710824160576\nAIC: 2915.710824160576\n\nConfusion Matrix (Act/Pred) for max f1 @ threshold = 0.2574175263537824\n 0 1 Error Rate\n----- ---- --- ------- --------------\n0 2446 414 0.1448 (414.0/2860.0)\n1 314 407 0.4355 (314.0/721.0)\nTotal 2760 821 0.2033 (728.0/3581.0)\n\nMaximum Metrics: Maximum metrics at their respective thresholds\nmetric threshold value idx\n--------------------------- ----------- -------- -----\nmax f1 0.257418 0.527886 208\nmax f2 0.152297 0.626112 279\nmax f0point5 0.388608 0.557126 145\nmax accuracy 0.567264 0.835242 91\nmax precision 0.998809 1 0\nmax recall 0.00123756 1 399\nmax specificity 0.998809 1 0\nmax absolute_mcc 0.266615 0.402098 204\nmax min_per_class_accuracy 0.178278 0.706643 258\nmax mean_per_class_accuracy 0.214316 0.719503 234\nmax tns 0.998809 2860 0\nmax fns 0.998809 717 0\nmax fps 0.00626186 2860 398\nmax tps 0.00123756 721 399\nmax tnr 0.998809 1 0\nmax fnr 0.998809 0.994452 0\nmax fpr 0.00626186 1 398\nmax tpr 0.00123756 1 399\n\nGains/Lift Table: Avg response rate: 20.13 %, avg score: 20.13 %\ngroup cumulative_data_fraction lower_threshold lift cumulative_lift response_rate score cumulative_response_rate cumulative_score capture_rate cumulative_capture_rate gain cumulative_gain kolmogorov_smirnov\n------- -------------------------- ----------------- -------- ----------------- --------------- --------- -------------------------- ------------------ -------------- ------------------------- -------- ----------------- --------------------\n1 0.0100531 0.946685 4.96671 4.96671 1 0.980508 1 0.980508 0.0499307 0.0499307 396.671 396.671 0.0499307\n2 0.0201061 0.852347 4.82875 4.89773 0.972222 0.898798 0.986111 0.939653 0.0485437 0.0984743 382.875 389.773 0.0981247\n3 0.0301592 0.747959 3.863 4.55282 0.777778 0.798413 0.916667 0.892573 0.038835 0.137309 286.3 355.282 0.134162\n4 0.0402122 0.691238 3.17318 4.20791 0.638889 0.715252 0.847222 0.848243 0.0319001 0.169209 217.318 320.791 0.161517\n5 0.0502653 0.626742 3.58707 4.08374 0.722222 0.655898 0.822222 0.809774 0.036061 0.20527 258.707 308.374 0.194082\n6 0.100251 0.430243 2.44174 3.26503 0.49162 0.519979 0.657382 0.66528 0.122053 0.327323 144.174 226.503 0.284316\n7 0.150237 0.345914 2.05328 2.86186 0.413408 0.383859 0.576208 0.571647 0.102635 0.429958 105.328 186.186 0.350238\n8 0.200223 0.284427 1.72031 2.57687 0.346369 0.311905 0.518828 0.506802 0.0859917 0.51595 72.0314 157.687 0.395321\n9 0.300195 0.209845 1.33186 2.16225 0.268156 0.243116 0.435349 0.418988 0.133148 0.649098 33.1856 116.225 0.436861\n10 0.400168 0.169575 0.887904 1.84389 0.178771 0.188252 0.371249 0.361345 0.0887656 0.737864 -11.2096 84.3888 0.422829\n11 0.50014 0.138031 0.679801 1.6112 0.136872 0.152531 0.3244 0.319605 0.0679612 0.805825 -32.0199 61.1201 0.382748\n12 0.600112 0.114618 0.541066 1.43293 0.108939 0.125883 0.288506 0.287333 0.0540915 0.859917 -45.8934 43.2928 0.325301\n13 0.700084 0.0945695 0.527193 1.30359 0.106145 0.104777 0.262465 0.261264 0.0527046 0.912621 -47.2807 30.3589 0.266118\n14 0.800056 0.0736142 0.388458 1.18924 0.0782123 0.0841117 0.239442 0.239128 0.038835 0.951456 -61.1542 18.9237 0.189568\n15 0.900028 0.0521778 0.235849 1.08334 0.047486 0.0627678 0.21812 0.219538 0.0235784 0.975035 -76.4151 8.33382 0.0939158\n16 1 0.00111651 0.249723 1 0.0502793 0.0375143 0.20134 0.201341 0.0249653 1 -75.0277 0 0\n\nModelMetricsBinomialGLM: glm\n** Reported on validation data. **\n\nMSE: 0.11783296119533457\nRMSE: 0.3432680602609782\nLogLoss: 0.38278517425537495\nAUC: 0.8173716250592095\nAUCPR: 0.5982146215209964\nGini: 0.6347432501184189\nNull degrees of freedom: 1195\nResidual degrees of freedom: 1178\nNull deviance: 1193.6689319811862\nResidual deviance: 915.6221368188569\nAIC: 951.6221368188569\n\nConfusion Matrix (Act/Pred) for max f1 @ threshold = 0.31271322943274704\n 0 1 Error Rate\n----- --- --- ------- --------------\n0 861 97 0.1013 (97.0/958.0)\n1 107 131 0.4496 (107.0/238.0)\nTotal 968 228 0.1706 (204.0/1196.0)\n\nMaximum Metrics: Maximum metrics at their respective thresholds\nmetric threshold value idx\n--------------------------- ----------- -------- -----\nmax f1 0.312713 0.562232 155\nmax f2 0.187748 0.655148 230\nmax f0point5 0.429616 0.60794 107\nmax accuracy 0.443857 0.846154 105\nmax precision 0.993986 1 0\nmax recall 0.00965731 1 398\nmax specificity 0.993986 1 0\nmax absolute_mcc 0.312713 0.456535 155\nmax min_per_class_accuracy 0.195405 0.747899 224\nmax mean_per_class_accuracy 0.191452 0.750803 227\nmax tns 0.993986 958 0\nmax fns 0.993986 237 0\nmax fps 0.00749696 958 399\nmax tps 0.00965731 238 398\nmax tnr 0.993986 1 0\nmax fnr 0.993986 0.995798 0\nmax fpr 0.00749696 1 399\nmax tpr 0.00965731 1 398\n\nGains/Lift Table: Avg response rate: 19.90 %, avg score: 21.55 %\ngroup cumulative_data_fraction lower_threshold lift cumulative_lift response_rate score cumulative_response_rate cumulative_score capture_rate cumulative_capture_rate gain cumulative_gain kolmogorov_smirnov\n------- -------------------------- ----------------- -------- ----------------- --------------- --------- -------------------------- ------------------ -------------- ------------------------- -------- ----------------- --------------------\n1 0.0100334 0.942287 5.02521 5.02521 1 0.969817 1 0.969817 0.0504202 0.0504202 402.521 402.521 0.0504202\n2 0.0200669 0.88595 4.60644 4.81583 0.916667 0.91333 0.958333 0.941574 0.0462185 0.0966387 360.644 381.583 0.0955948\n3 0.0301003 0.833395 3.76891 4.46685 0.75 0.866256 0.888889 0.916468 0.0378151 0.134454 276.891 346.685 0.130278\n4 0.0401338 0.784863 3.76891 4.29237 0.75 0.815408 0.854167 0.891203 0.0378151 0.172269 276.891 329.237 0.164962\n5 0.0501672 0.721202 2.51261 3.93641 0.5 0.7506 0.783333 0.863082 0.0252101 0.197479 151.261 293.641 0.183909\n6 0.100334 0.494711 3.26639 3.6014 0.65 0.598425 0.716667 0.730754 0.163866 0.361345 226.639 260.14 0.325854\n7 0.150502 0.358189 2.01008 3.07096 0.4 0.413083 0.611111 0.624864 0.10084 0.462185 101.008 207.096 0.389116\n8 0.200669 0.30021 2.01008 2.80574 0.4 0.327422 0.558333 0.550503 0.10084 0.563025 101.008 180.574 0.452378\n9 0.300167 0.22369 1.1824 2.26764 0.235294 0.257801 0.451253 0.453479 0.117647 0.680672 18.2402 126.764 0.475036\n10 0.400502 0.176142 1.04692 1.96183 0.208333 0.196384 0.390397 0.389071 0.105042 0.785714 4.69188 96.1825 0.480913\n11 0.5 0.143863 0.548973 1.68067 0.109244 0.158272 0.334448 0.343143 0.0546218 0.840336 -45.1027 68.0672 0.424887\n12 0.600334 0.121439 0.502521 1.48377 0.1 0.132591 0.295265 0.307953 0.0504202 0.890756 -49.7479 48.3767 0.362573\n13 0.699833 0.0997811 0.506744 1.34486 0.10084 0.110054 0.267622 0.279817 0.0504202 0.941176 -49.3256 34.4859 0.301302\n14 0.800167 0.0798711 0.251261 1.20773 0.05 0.0891164 0.240334 0.255905 0.0252101 0.966387 -74.8739 20.7731 0.207514\n15 0.899666 0.0556177 0.211143 1.09751 0.0420168 0.0677358 0.218401 0.235094 0.0210084 0.987395 -78.8857 9.75134 0.109524\n16 1 0.00749696 0.12563 1 0.025 0.0397561 0.198997 0.215495 0.012605 1 -87.437 0 0\n\nScoring History: \n timestamp duration iteration lambda predictors deviance_train deviance_test alpha iterations training_rmse training_logloss training_r2 training_auc training_pr_auc training_lift training_classification_error validation_rmse validation_logloss validation_r2 validation_auc validation_pr_auc validation_lift validation_classification_error\n--- ------------------- ---------- ----------- -------- ------------ ------------------ ------------------ ------- ------------ ------------------ ------------------- ------------------- ---------------- ----------------- ----------------- ------------------------------- ------------------ -------------------- ------------------- ------------------ ------------------- ----------------- ---------------------------------\n 2023-06-14 13:11:07 0.000 sec 1 .25E0 1 1.0045100357856414 0.9980509464725633 0.5\n 2023-06-14 13:11:07 0.002 sec 3 .22E0 2 0.9946354860817235 0.9864126793254803 0.5\n 2023-06-14 13:11:07 0.004 sec 5 .2E0 2 0.9859035403430394 0.9760418435888577 0.5\n 2023-06-14 13:11:07 0.006 sec 7 .19E0 2 0.9781697250290138 0.9667699237562073 0.5\n 2023-06-14 13:11:07 0.007 sec 9 .17E0 3 0.9686784218176837 0.9544752889432682 0.5\n 2023-06-14 13:11:07 0.009 sec 11 .15E0 3 0.9587676036112142 0.9411531794664827 0.5\n 2023-06-14 13:11:07 0.011 sec 13 .14E0 3 0.9500952786201631 0.9293708568093557 0.5\n 2023-06-14 13:11:07 0.013 sec 15 .13E0 3 0.9424603199349194 0.918884378267228 0.5\n 2023-06-14 13:11:07 0.015 sec 17 .12E0 5 0.9341430965161417 0.9080921324097249 0.5\n 2023-06-14 13:11:07 0.017 sec 19 .11E0 5 0.9236448771247916 0.8955093341627816 0.5\n--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---\n 2023-06-14 13:11:07 0.119 sec 89 .13E-2 18 0.8045918660168828 0.7656443950528371 0.5\n 2023-06-14 13:11:07 0.120 sec 90 .12E-2 18 0.8044580073891262 0.7656094511752577 0.5\n 2023-06-14 13:11:07 0.121 sec 91 .11E-2 18 0.8043435286223151 0.7655877168108034 0.5\n 2023-06-14 13:11:07 0.122 sec 92 .1E-2 18 0.8042461026983848 0.765575731924841 0.5\n 2023-06-14 13:11:07 0.123 sec 93 .92E-3 18 0.8041638715891026 0.7655703485107499 0.5\n 2023-06-14 13:11:07 0.125 sec 94 .84E-3 18 0.8040938870788785 0.7655716226141344 0.5\n 2023-06-14 13:11:07 0.126 sec 95 .77E-3 18 0.8040346859016411 0.7655772387299105 0.5\n 2023-06-14 13:11:07 0.127 sec 96 .7E-3 18 0.8039849610395213 0.7655871006325123 0.5\n 2023-06-14 13:11:07 0.128 sec 97 .64E-3 18 0.8039427444913662 0.7655986856252318 0.5\n 2023-06-14 13:11:07 0.130 sec 98 .58E-3 18 0.8039071366484086 0.7656131564793512 0.5 98 0.3522848286548741 0.40208193579455126 0.22821696997302088 0.78494005994006 0.569122888911077 4.966712898751734 0.20329516894722144 0.3432680602609782 0.38278517425537495 0.26075791204897436 0.8173716250592095 0.5982146215209964 5.025210084033613 0.1705685618729097\n[66 rows x 24 columns]\n\n\nVariable Importances: \nvariable relative_importance scaled_importance percentage\n-------------- --------------------- ------------------- ------------\nDELINQ 0.797015 1 0.147301\nJOB.Sales 0.747835 0.938295 0.138211\nJOB.Office 0.510924 0.641047 0.0944266\nJOB.Self 0.45714 0.573565 0.0844865\nCLAGE 0.448498 0.562723 0.0828894\nDEBTINC 0.44676 0.560542 0.0825681\nDEROG 0.426324 0.534901 0.0787912\nNINQ 0.303735 0.381091 0.056135\nVALUE 0.241267 0.302713 0.0445898\nMORTDUE 0.238454 0.299183 0.0440699\nLOAN 0.21809 0.273634 0.0403064\nREASON.HomeImp 0.186297 0.233744 0.0344306\nCLNO 0.148527 0.186354 0.02745\nREASON.DebtCon 0.130812 0.164128 0.0241762\nYOJ 0.0438346 0.0549984 0.0081013\nJOB.Other 0.0353206 0.0443161 0.00652779\nJOB.Mgr 0.0299719 0.0376052 0.00553927\nJOB.ProfExe 0 0 0\n\n[tips]\nUse `model.explain()` to inspect the model.\n--\nUse `h2o.display.toggle_user_tips()` to switch on/off this section.",
- "text/html": "Model Details\n=============\nH2OGeneralizedLinearEstimator : Generalized Linear Modeling\nModel Key: glmfit\n
\n\n \n
\n
\n GLM Model: summary\n | \nfamily | \nlink | \nregularization | \nlambda_search | \nnumber_of_predictors_total | \nnumber_of_active_predictors | \nnumber_of_iterations | \ntraining_frame |
\n | \nbinomial | \nlogit | \nElastic Net (alpha = 0.5, lambda = 9.244E-4 ) | \nnlambda = 100, lambda.max = 0.2455, lambda.min = 9.244E-4, lambda.1se = -1.0 | \n18 | \n17 | \n93 | \npy_3_sid_80dd |
\n
\n
\n
\nModelMetricsBinomialGLM: glm\n** Reported on train data. **\n\nMSE: 0.12410460050039399\nRMSE: 0.3522848286548741\nLogLoss: 0.40208193579455126\nAUC: 0.78494005994006\nAUCPR: 0.569122888911077\nGini: 0.56988011988012\nNull degrees of freedom: 3580\nResidual degrees of freedom: 3563\nNull deviance: 3597.150438148382\nResidual deviance: 2879.710824160576\nAIC: 2915.710824160576
\n
\n \n
\n
\n Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.2574175263537824\n | \n0 | \n1 | \nError | \nRate |
\n 0 | \n2446.0 | \n414.0 | \n0.1448 | \n (414.0/2860.0) |
\n1 | \n314.0 | \n407.0 | \n0.4355 | \n (314.0/721.0) |
\nTotal | \n2760.0 | \n821.0 | \n0.2033 | \n (728.0/3581.0) |
\n
\n
\n
\n
\n \n
\n
\n Maximum Metrics: Maximum metrics at their respective thresholds\n metric | \nthreshold | \nvalue | \nidx |
\n max f1 | \n0.2574175 | \n0.5278859 | \n208.0 |
\nmax f2 | \n0.1522974 | \n0.6261121 | \n279.0 |
\nmax f0point5 | \n0.3886078 | \n0.5571256 | \n145.0 |
\nmax accuracy | \n0.5672639 | \n0.8352416 | \n91.0 |
\nmax precision | \n0.9988094 | \n1.0 | \n0.0 |
\nmax recall | \n0.0012376 | \n1.0 | \n399.0 |
\nmax specificity | \n0.9988094 | \n1.0 | \n0.0 |
\nmax absolute_mcc | \n0.2666152 | \n0.4020977 | \n204.0 |
\nmax min_per_class_accuracy | \n0.1782785 | \n0.7066434 | \n258.0 |
\nmax mean_per_class_accuracy | \n0.2143163 | \n0.7195026 | \n234.0 |
\nmax tns | \n0.9988094 | \n2860.0 | \n0.0 |
\nmax fns | \n0.9988094 | \n717.0 | \n0.0 |
\nmax fps | \n0.0062619 | \n2860.0 | \n398.0 |
\nmax tps | \n0.0012376 | \n721.0 | \n399.0 |
\nmax tnr | \n0.9988094 | \n1.0 | \n0.0 |
\nmax fnr | \n0.9988094 | \n0.9944521 | \n0.0 |
\nmax fpr | \n0.0062619 | \n1.0 | \n398.0 |
\nmax tpr | \n0.0012376 | \n1.0 | \n399.0 |
\n
\n
\n
\n
\n \n
\n
\n Gains/Lift Table: Avg response rate: 20.13 %, avg score: 20.13 %\n group | \ncumulative_data_fraction | \nlower_threshold | \nlift | \ncumulative_lift | \nresponse_rate | \nscore | \ncumulative_response_rate | \ncumulative_score | \ncapture_rate | \ncumulative_capture_rate | \ngain | \ncumulative_gain | \nkolmogorov_smirnov |
\n 1 | \n0.0100531 | \n0.9466854 | \n4.9667129 | \n4.9667129 | \n1.0 | \n0.9805079 | \n1.0 | \n0.9805079 | \n0.0499307 | \n0.0499307 | \n396.6712899 | \n396.6712899 | \n0.0499307 |
\n2 | \n0.0201061 | \n0.8523470 | \n4.8287487 | \n4.8977308 | \n0.9722222 | \n0.8987982 | \n0.9861111 | \n0.9396530 | \n0.0485437 | \n0.0984743 | \n382.8748652 | \n389.7730775 | \n0.0981247 |
\n3 | \n0.0301592 | \n0.7479588 | \n3.8629989 | \n4.5528202 | \n0.7777778 | \n0.7984125 | \n0.9166667 | \n0.8925729 | \n0.0388350 | \n0.1373093 | \n286.2998921 | \n355.2820157 | \n0.1341624 |
\n4 | \n0.0402122 | \n0.6912384 | \n3.1731777 | \n4.2079095 | \n0.6388889 | \n0.7152522 | \n0.8472222 | \n0.8482427 | \n0.0319001 | \n0.1692094 | \n217.3177685 | \n320.7909539 | \n0.1615171 |
\n5 | \n0.0502653 | \n0.6267417 | \n3.5870704 | \n4.0837417 | \n0.7222222 | \n0.6558977 | \n0.8222222 | \n0.8097737 | \n0.0360610 | \n0.2052705 | \n258.7070427 | \n308.3741717 | \n0.1940816 |
\n6 | \n0.1002513 | \n0.4302426 | \n2.4417360 | \n3.2650257 | \n0.4916201 | \n0.5199788 | \n0.6573816 | \n0.6652798 | \n0.1220527 | \n0.3273232 | \n144.1735950 | \n226.5025750 | \n0.2843162 |
\n7 | \n0.1502374 | \n0.3459143 | \n2.0532780 | \n2.8618606 | \n0.4134078 | \n0.3838592 | \n0.5762082 | \n0.5716473 | \n0.1026352 | \n0.4299584 | \n105.3277958 | \n186.1860592 | \n0.3502381 |
\n8 | \n0.2002234 | \n0.2844270 | \n1.7203140 | \n2.5768720 | \n0.3463687 | \n0.3119048 | \n0.5188285 | \n0.5068022 | \n0.0859917 | \n0.5159501 | \n72.0313965 | \n157.6871964 | \n0.3953207 |
\n9 | \n0.3001955 | \n0.2098447 | \n1.3318560 | \n2.1622527 | \n0.2681564 | \n0.2431157 | \n0.4353488 | \n0.4189885 | \n0.1331484 | \n0.6490985 | \n33.1855973 | \n116.2252685 | \n0.4368607 |
\n10 | \n0.4001676 | \n0.1695745 | \n0.8879040 | \n1.8438878 | \n0.1787709 | \n0.1882518 | \n0.3712491 | \n0.3613446 | \n0.0887656 | \n0.7378641 | \n-11.2096018 | \n84.3887831 | \n0.4228291 |
\n11 | \n0.5001396 | \n0.1380308 | \n0.6798015 | \n1.6112006 | \n0.1368715 | \n0.1525308 | \n0.3243998 | \n0.3196051 | \n0.0679612 | \n0.8058252 | \n-32.0198514 | \n61.1200555 | \n0.3827483 |
\n12 | \n0.6001117 | \n0.1146180 | \n0.5410665 | \n1.4329279 | \n0.1089385 | \n0.1258826 | \n0.2885063 | \n0.2873331 | \n0.0540915 | \n0.8599168 | \n-45.8933511 | \n43.2927872 | \n0.3253014 |
\n13 | \n0.7000838 | \n0.0945695 | \n0.5271930 | \n1.3035888 | \n0.1061453 | \n0.1047775 | \n0.2624651 | \n0.2612641 | \n0.0527046 | \n0.9126214 | \n-47.2807011 | \n30.3588786 | \n0.2661179 |
\n14 | \n0.8000559 | \n0.0736142 | \n0.3884580 | \n1.1892374 | \n0.0782123 | \n0.0841117 | \n0.2394415 | \n0.2391278 | \n0.0388350 | \n0.9514563 | \n-61.1542008 | \n18.9237364 | \n0.1895682 |
\n15 | \n0.9000279 | \n0.0521778 | \n0.2358495 | \n1.0833382 | \n0.0474860 | \n0.0627678 | \n0.2181198 | \n0.2195383 | \n0.0235784 | \n0.9750347 | \n-76.4150505 | \n8.3338246 | \n0.0939158 |
\n16 | \n1.0 | \n0.0011165 | \n0.2497230 | \n1.0 | \n0.0502793 | \n0.0375143 | \n0.2013404 | \n0.2013410 | \n0.0249653 | \n1.0 | \n-75.0277005 | \n0.0 | \n0.0 |
\n
\n
\n
\nModelMetricsBinomialGLM: glm\n** Reported on validation data. **\n\nMSE: 0.11783296119533457\nRMSE: 0.3432680602609782\nLogLoss: 0.38278517425537495\nAUC: 0.8173716250592095\nAUCPR: 0.5982146215209964\nGini: 0.6347432501184189\nNull degrees of freedom: 1195\nResidual degrees of freedom: 1178\nNull deviance: 1193.6689319811862\nResidual deviance: 915.6221368188569\nAIC: 951.6221368188569
\n
\n \n
\n
\n Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.31271322943274704\n | \n0 | \n1 | \nError | \nRate |
\n 0 | \n861.0 | \n97.0 | \n0.1013 | \n (97.0/958.0) |
\n1 | \n107.0 | \n131.0 | \n0.4496 | \n (107.0/238.0) |
\nTotal | \n968.0 | \n228.0 | \n0.1706 | \n (204.0/1196.0) |
\n
\n
\n
\n
\n \n
\n
\n Maximum Metrics: Maximum metrics at their respective thresholds\n metric | \nthreshold | \nvalue | \nidx |
\n max f1 | \n0.3127132 | \n0.5622318 | \n155.0 |
\nmax f2 | \n0.1877475 | \n0.6551476 | \n230.0 |
\nmax f0point5 | \n0.4296159 | \n0.6079404 | \n107.0 |
\nmax accuracy | \n0.4438566 | \n0.8461538 | \n105.0 |
\nmax precision | \n0.9939862 | \n1.0 | \n0.0 |
\nmax recall | \n0.0096573 | \n1.0 | \n398.0 |
\nmax specificity | \n0.9939862 | \n1.0 | \n0.0 |
\nmax absolute_mcc | \n0.3127132 | \n0.4565355 | \n155.0 |
\nmax min_per_class_accuracy | \n0.1954052 | \n0.7478992 | \n224.0 |
\nmax mean_per_class_accuracy | \n0.1914520 | \n0.7508026 | \n227.0 |
\nmax tns | \n0.9939862 | \n958.0 | \n0.0 |
\nmax fns | \n0.9939862 | \n237.0 | \n0.0 |
\nmax fps | \n0.0074970 | \n958.0 | \n399.0 |
\nmax tps | \n0.0096573 | \n238.0 | \n398.0 |
\nmax tnr | \n0.9939862 | \n1.0 | \n0.0 |
\nmax fnr | \n0.9939862 | \n0.9957983 | \n0.0 |
\nmax fpr | \n0.0074970 | \n1.0 | \n399.0 |
\nmax tpr | \n0.0096573 | \n1.0 | \n398.0 |
\n
\n
\n
\n
\n \n
\n
\n Gains/Lift Table: Avg response rate: 19.90 %, avg score: 21.55 %\n group | \ncumulative_data_fraction | \nlower_threshold | \nlift | \ncumulative_lift | \nresponse_rate | \nscore | \ncumulative_response_rate | \ncumulative_score | \ncapture_rate | \ncumulative_capture_rate | \ngain | \ncumulative_gain | \nkolmogorov_smirnov |
\n 1 | \n0.0100334 | \n0.9422870 | \n5.0252101 | \n5.0252101 | \n1.0 | \n0.9698172 | \n1.0 | \n0.9698172 | \n0.0504202 | \n0.0504202 | \n402.5210084 | \n402.5210084 | \n0.0504202 |
\n2 | \n0.0200669 | \n0.8859499 | \n4.6064426 | \n4.8158263 | \n0.9166667 | \n0.9133302 | \n0.9583333 | \n0.9415737 | \n0.0462185 | \n0.0966387 | \n360.6442577 | \n381.5826331 | \n0.0955948 |
\n3 | \n0.0301003 | \n0.8333951 | \n3.7689076 | \n4.4668534 | \n0.75 | \n0.8662565 | \n0.8888889 | \n0.9164679 | \n0.0378151 | \n0.1344538 | \n276.8907563 | \n346.6853408 | \n0.1302784 |
\n4 | \n0.0401338 | \n0.7848628 | \n3.7689076 | \n4.2923669 | \n0.75 | \n0.8154078 | \n0.8541667 | \n0.8912029 | \n0.0378151 | \n0.1722689 | \n276.8907563 | \n329.2366947 | \n0.1649620 |
\n5 | \n0.0501672 | \n0.7212019 | \n2.5126050 | \n3.9364146 | \n0.5 | \n0.7506003 | \n0.7833333 | \n0.8630824 | \n0.0252101 | \n0.1974790 | \n151.2605042 | \n293.6414566 | \n0.1839091 |
\n6 | \n0.1003344 | \n0.4947107 | \n3.2663866 | \n3.6014006 | \n0.65 | \n0.5984250 | \n0.7166667 | \n0.7307537 | \n0.1638655 | \n0.3613445 | \n226.6386555 | \n260.1400560 | \n0.3258539 |
\n7 | \n0.1505017 | \n0.3581892 | \n2.0100840 | \n3.0709617 | \n0.4 | \n0.4130834 | \n0.6111111 | \n0.6248636 | \n0.1008403 | \n0.4621849 | \n101.0084034 | \n207.0961718 | \n0.3891160 |
\n8 | \n0.2006689 | \n0.3002103 | \n2.0100840 | \n2.8057423 | \n0.4 | \n0.3274215 | \n0.5583333 | \n0.5505031 | \n0.1008403 | \n0.5630252 | \n101.0084034 | \n180.5742297 | \n0.4523780 |
\n9 | \n0.3001672 | \n0.2236898 | \n1.1824024 | \n2.2676435 | \n0.2352941 | \n0.2578008 | \n0.4512535 | \n0.4534792 | \n0.1176471 | \n0.6806723 | \n18.2402373 | \n126.7643548 | \n0.4750355 |
\n10 | \n0.4005017 | \n0.1761419 | \n1.0469188 | \n1.9618252 | \n0.2083333 | \n0.1963836 | \n0.3903967 | \n0.3890711 | \n0.1050420 | \n0.7857143 | \n4.6918768 | \n96.1825231 | \n0.4809126 |
\n11 | \n0.5 | \n0.1438627 | \n0.5489725 | \n1.6806723 | \n0.1092437 | \n0.1582716 | \n0.3344482 | \n0.3431428 | \n0.0546218 | \n0.8403361 | \n-45.1027470 | \n68.0672269 | \n0.4248873 |
\n12 | \n0.6003344 | \n0.1214392 | \n0.5025210 | \n1.4837668 | \n0.1 | \n0.1325910 | \n0.2952646 | \n0.3079531 | \n0.0504202 | \n0.8907563 | \n-49.7478992 | \n48.3766766 | \n0.3625726 |
\n13 | \n0.6998328 | \n0.0997811 | \n0.5067439 | \n1.3448591 | \n0.1008403 | \n0.1100543 | \n0.2676225 | \n0.2798169 | \n0.0504202 | \n0.9411765 | \n-49.3256126 | \n34.4859091 | \n0.3013017 |
\n14 | \n0.8001672 | \n0.0798711 | \n0.2512605 | \n1.2077307 | \n0.05 | \n0.0891164 | \n0.2403344 | \n0.2559046 | \n0.0252101 | \n0.9663866 | \n-74.8739496 | \n20.7730741 | \n0.2075139 |
\n15 | \n0.8996656 | \n0.0556177 | \n0.2111433 | \n1.0975134 | \n0.0420168 | \n0.0677358 | \n0.2184015 | \n0.2350941 | \n0.0210084 | \n0.9873950 | \n-78.8856719 | \n9.7513355 | \n0.1095244 |
\n16 | \n1.0 | \n0.0074970 | \n0.1256303 | \n1.0 | \n0.025 | \n0.0397561 | \n0.1989967 | \n0.2154950 | \n0.0126050 | \n1.0 | \n-87.4369748 | \n0.0 | \n0.0 |
\n
\n
\n
\n\n \n
\n
\n Scoring History: \n | \ntimestamp | \nduration | \niteration | \nlambda | \npredictors | \ndeviance_train | \ndeviance_test | \nalpha | \niterations | \ntraining_rmse | \ntraining_logloss | \ntraining_r2 | \ntraining_auc | \ntraining_pr_auc | \ntraining_lift | \ntraining_classification_error | \nvalidation_rmse | \nvalidation_logloss | \nvalidation_r2 | \nvalidation_auc | \nvalidation_pr_auc | \nvalidation_lift | \nvalidation_classification_error |
\n | \n2023-06-14 13:11:07 | \n 0.000 sec | \n1 | \n.25E0 | \n1 | \n1.0045100 | \n0.9980509 | \n0.5 | \nNone | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n |
\n | \n2023-06-14 13:11:07 | \n 0.002 sec | \n3 | \n.22E0 | \n2 | \n0.9946355 | \n0.9864127 | \n0.5 | \nNone | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n |
\n | \n2023-06-14 13:11:07 | \n 0.004 sec | \n5 | \n.2E0 | \n2 | \n0.9859035 | \n0.9760418 | \n0.5 | \nNone | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n |
\n | \n2023-06-14 13:11:07 | \n 0.006 sec | \n7 | \n.19E0 | \n2 | \n0.9781697 | \n0.9667699 | \n0.5 | \nNone | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n |
\n | \n2023-06-14 13:11:07 | \n 0.007 sec | \n9 | \n.17E0 | \n3 | \n0.9686784 | \n0.9544753 | \n0.5 | \nNone | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n |
\n | \n2023-06-14 13:11:07 | \n 0.009 sec | \n11 | \n.15E0 | \n3 | \n0.9587676 | \n0.9411532 | \n0.5 | \nNone | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n |
\n | \n2023-06-14 13:11:07 | \n 0.011 sec | \n13 | \n.14E0 | \n3 | \n0.9500953 | \n0.9293709 | \n0.5 | \nNone | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n |
\n | \n2023-06-14 13:11:07 | \n 0.013 sec | \n15 | \n.13E0 | \n3 | \n0.9424603 | \n0.9188844 | \n0.5 | \nNone | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n |
\n | \n2023-06-14 13:11:07 | \n 0.015 sec | \n17 | \n.12E0 | \n5 | \n0.9341431 | \n0.9080921 | \n0.5 | \nNone | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n |
\n | \n2023-06-14 13:11:07 | \n 0.017 sec | \n19 | \n.11E0 | \n5 | \n0.9236449 | \n0.8955093 | \n0.5 | \nNone | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n |
\n--- | \n--- | \n--- | \n--- | \n--- | \n--- | \n--- | \n--- | \n--- | \n--- | \n--- | \n--- | \n--- | \n--- | \n--- | \n--- | \n--- | \n--- | \n--- | \n--- | \n--- | \n--- | \n--- | \n--- |
\n | \n2023-06-14 13:11:07 | \n 0.119 sec | \n89 | \n.13E-2 | \n18 | \n0.8045919 | \n0.7656444 | \n0.5 | \nNone | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n |
\n | \n2023-06-14 13:11:07 | \n 0.120 sec | \n90 | \n.12E-2 | \n18 | \n0.8044580 | \n0.7656095 | \n0.5 | \nNone | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n |
\n | \n2023-06-14 13:11:07 | \n 0.121 sec | \n91 | \n.11E-2 | \n18 | \n0.8043435 | \n0.7655877 | \n0.5 | \nNone | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n |
\n | \n2023-06-14 13:11:07 | \n 0.122 sec | \n92 | \n.1E-2 | \n18 | \n0.8042461 | \n0.7655757 | \n0.5 | \nNone | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n |
\n | \n2023-06-14 13:11:07 | \n 0.123 sec | \n93 | \n.92E-3 | \n18 | \n0.8041639 | \n0.7655703 | \n0.5 | \nNone | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n |
\n | \n2023-06-14 13:11:07 | \n 0.125 sec | \n94 | \n.84E-3 | \n18 | \n0.8040939 | \n0.7655716 | \n0.5 | \nNone | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n |
\n | \n2023-06-14 13:11:07 | \n 0.126 sec | \n95 | \n.77E-3 | \n18 | \n0.8040347 | \n0.7655772 | \n0.5 | \nNone | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n |
\n | \n2023-06-14 13:11:07 | \n 0.127 sec | \n96 | \n.7E-3 | \n18 | \n0.8039850 | \n0.7655871 | \n0.5 | \nNone | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n |
\n | \n2023-06-14 13:11:07 | \n 0.128 sec | \n97 | \n.64E-3 | \n18 | \n0.8039427 | \n0.7655987 | \n0.5 | \nNone | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n |
\n | \n2023-06-14 13:11:07 | \n 0.130 sec | \n98 | \n.58E-3 | \n18 | \n0.8039071 | \n0.7656132 | \n0.5 | \n98 | \n0.3522848 | \n0.4020819 | \n0.2282170 | \n0.7849401 | \n0.5691229 | \n4.9667129 | \n0.2032952 | \n0.3432681 | \n0.3827852 | \n0.2607579 | \n0.8173716 | \n0.5982146 | \n5.0252101 | \n0.1705686 |
\n
\n
\n
[66 rows x 24 columns]
\n\n \n
\n
\n Variable Importances: \n variable | \nrelative_importance | \nscaled_importance | \npercentage |
\n DELINQ | \n0.7970149 | \n1.0 | \n0.1473006 |
\nJOB.Sales | \n0.7478347 | \n0.9382946 | \n0.1382114 |
\nJOB.Office | \n0.5109240 | \n0.6410470 | \n0.0944266 |
\nJOB.Self | \n0.4571400 | \n0.5735652 | \n0.0844865 |
\nCLAGE | \n0.4484982 | \n0.5627225 | \n0.0828894 |
\nDEBTINC | \n0.4467601 | \n0.5605417 | \n0.0825681 |
\nDEROG | \n0.4263238 | \n0.5349007 | \n0.0787912 |
\nNINQ | \n0.3037353 | \n0.3810912 | \n0.0561350 |
\nVALUE | \n0.2412667 | \n0.3027129 | \n0.0445898 |
\nMORTDUE | \n0.2384536 | \n0.2991833 | \n0.0440699 |
\nLOAN | \n0.2180902 | \n0.2736338 | \n0.0403064 |
\nREASON.HomeImp | \n0.1862974 | \n0.2337439 | \n0.0344306 |
\nCLNO | \n0.1485267 | \n0.1863538 | \n0.0274500 |
\nREASON.DebtCon | \n0.1308125 | \n0.1641280 | \n0.0241762 |
\nYOJ | \n0.0438346 | \n0.0549984 | \n0.0081013 |
\nJOB.Other | \n0.0353206 | \n0.0443161 | \n0.0065278 |
\nJOB.Mgr | \n0.0299719 | \n0.0376052 | \n0.0055393 |
\nJOB.ProfExe | \n0.0 | \n0.0 | \n0.0 |
\n
\n
\n
\n\n[tips]\nUse `model.explain()` to inspect the model.\n--\nUse `h2o.display.toggle_user_tips()` to switch on/off this section.
"
+ "text/html": [
+ "Model Details\n",
+ "=============\n",
+ "H2OGeneralizedLinearEstimator : Generalized Linear Modeling\n",
+ "Model Key: glmfit\n",
+ "
\n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " GLM Model: summary\n",
+ " | \n",
+ "family | \n",
+ "link | \n",
+ "regularization | \n",
+ "lambda_search | \n",
+ "number_of_predictors_total | \n",
+ "number_of_active_predictors | \n",
+ "number_of_iterations | \n",
+ "training_frame |
\n",
+ " | \n",
+ "binomial | \n",
+ "logit | \n",
+ "Elastic Net (alpha = 0.5, lambda = 9.244E-4 ) | \n",
+ "nlambda = 100, lambda.max = 0.2455, lambda.min = 9.244E-4, lambda.1se = -1.0 | \n",
+ "18 | \n",
+ "17 | \n",
+ "93 | \n",
+ "py_3_sid_a269 |
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "ModelMetricsBinomialGLM: glm\n",
+ "** Reported on train data. **\n",
+ "\n",
+ "MSE: 0.12410463474559494\n",
+ "RMSE: 0.3522848772592927\n",
+ "LogLoss: 0.40208197287665776\n",
+ "AUC: 0.7849415147958837\n",
+ "AUCPR: 0.5691248967099318\n",
+ "Gini: 0.5698830295917674\n",
+ "Null degrees of freedom: 3580\n",
+ "Residual degrees of freedom: 3563\n",
+ "Null deviance: 3597.150438148379\n",
+ "Residual deviance: 2879.711089742623\n",
+ "AIC: 2915.711089742623
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.2574171649920362\n",
+ " | \n",
+ "0 | \n",
+ "1 | \n",
+ "Error | \n",
+ "Rate |
\n",
+ " 0 | \n",
+ "2446.0 | \n",
+ "414.0 | \n",
+ "0.1448 | \n",
+ " (414.0/2860.0) |
\n",
+ "1 | \n",
+ "314.0 | \n",
+ "407.0 | \n",
+ "0.4355 | \n",
+ " (314.0/721.0) |
\n",
+ "Total | \n",
+ "2760.0 | \n",
+ "821.0 | \n",
+ "0.2033 | \n",
+ " (728.0/3581.0) |
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " Maximum Metrics: Maximum metrics at their respective thresholds\n",
+ " metric | \n",
+ "threshold | \n",
+ "value | \n",
+ "idx |
\n",
+ " max f1 | \n",
+ "0.2574172 | \n",
+ "0.5278859 | \n",
+ "207.0 |
\n",
+ "max f2 | \n",
+ "0.1522967 | \n",
+ "0.6261121 | \n",
+ "279.0 |
\n",
+ "max f0point5 | \n",
+ "0.3886058 | \n",
+ "0.5571256 | \n",
+ "144.0 |
\n",
+ "max accuracy | \n",
+ "0.5672592 | \n",
+ "0.8352416 | \n",
+ "90.0 |
\n",
+ "max precision | \n",
+ "0.9988092 | \n",
+ "1.0 | \n",
+ "0.0 |
\n",
+ "max recall | \n",
+ "0.0012377 | \n",
+ "1.0 | \n",
+ "399.0 |
\n",
+ "max specificity | \n",
+ "0.9988092 | \n",
+ "1.0 | \n",
+ "0.0 |
\n",
+ "max absolute_mcc | \n",
+ "0.2661852 | \n",
+ "0.4020977 | \n",
+ "203.0 |
\n",
+ "max min_per_class_accuracy | \n",
+ "0.1782796 | \n",
+ "0.7066434 | \n",
+ "258.0 |
\n",
+ "max mean_per_class_accuracy | \n",
+ "0.2143151 | \n",
+ "0.7195026 | \n",
+ "234.0 |
\n",
+ "max tns | \n",
+ "0.9988092 | \n",
+ "2860.0 | \n",
+ "0.0 |
\n",
+ "max fns | \n",
+ "0.9988092 | \n",
+ "717.0 | \n",
+ "0.0 |
\n",
+ "max fps | \n",
+ "0.0062627 | \n",
+ "2860.0 | \n",
+ "398.0 |
\n",
+ "max tps | \n",
+ "0.0012377 | \n",
+ "721.0 | \n",
+ "399.0 |
\n",
+ "max tnr | \n",
+ "0.9988092 | \n",
+ "1.0 | \n",
+ "0.0 |
\n",
+ "max fnr | \n",
+ "0.9988092 | \n",
+ "0.9944521 | \n",
+ "0.0 |
\n",
+ "max fpr | \n",
+ "0.0062627 | \n",
+ "1.0 | \n",
+ "398.0 |
\n",
+ "max tpr | \n",
+ "0.0012377 | \n",
+ "1.0 | \n",
+ "399.0 |
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " Gains/Lift Table: Avg response rate: 20.13 %, avg score: 20.13 %\n",
+ " group | \n",
+ "cumulative_data_fraction | \n",
+ "lower_threshold | \n",
+ "lift | \n",
+ "cumulative_lift | \n",
+ "response_rate | \n",
+ "score | \n",
+ "cumulative_response_rate | \n",
+ "cumulative_score | \n",
+ "capture_rate | \n",
+ "cumulative_capture_rate | \n",
+ "gain | \n",
+ "cumulative_gain | \n",
+ "kolmogorov_smirnov |
\n",
+ " 1 | \n",
+ "0.0100531 | \n",
+ "0.9466824 | \n",
+ "4.9667129 | \n",
+ "4.9667129 | \n",
+ "1.0 | \n",
+ "0.9805069 | \n",
+ "1.0 | \n",
+ "0.9805069 | \n",
+ "0.0499307 | \n",
+ "0.0499307 | \n",
+ "396.6712899 | \n",
+ "396.6712899 | \n",
+ "0.0499307 |
\n",
+ "2 | \n",
+ "0.0201061 | \n",
+ "0.8523402 | \n",
+ "4.8287487 | \n",
+ "4.8977308 | \n",
+ "0.9722222 | \n",
+ "0.8987941 | \n",
+ "0.9861111 | \n",
+ "0.9396505 | \n",
+ "0.0485437 | \n",
+ "0.0984743 | \n",
+ "382.8748652 | \n",
+ "389.7730775 | \n",
+ "0.0981247 |
\n",
+ "3 | \n",
+ "0.0301592 | \n",
+ "0.7479558 | \n",
+ "3.8629989 | \n",
+ "4.5528202 | \n",
+ "0.7777778 | \n",
+ "0.7984072 | \n",
+ "0.9166667 | \n",
+ "0.8925694 | \n",
+ "0.0388350 | \n",
+ "0.1373093 | \n",
+ "286.2998921 | \n",
+ "355.2820157 | \n",
+ "0.1341624 |
\n",
+ "4 | \n",
+ "0.0402122 | \n",
+ "0.6912340 | \n",
+ "3.1731777 | \n",
+ "4.2079095 | \n",
+ "0.6388889 | \n",
+ "0.7152449 | \n",
+ "0.8472222 | \n",
+ "0.8482383 | \n",
+ "0.0319001 | \n",
+ "0.1692094 | \n",
+ "217.3177685 | \n",
+ "320.7909539 | \n",
+ "0.1615171 |
\n",
+ "5 | \n",
+ "0.0502653 | \n",
+ "0.6267302 | \n",
+ "3.5870704 | \n",
+ "4.0837417 | \n",
+ "0.7222222 | \n",
+ "0.6558936 | \n",
+ "0.8222222 | \n",
+ "0.8097693 | \n",
+ "0.0360610 | \n",
+ "0.2052705 | \n",
+ "258.7070427 | \n",
+ "308.3741717 | \n",
+ "0.1940816 |
\n",
+ "6 | \n",
+ "0.1002513 | \n",
+ "0.4302463 | \n",
+ "2.4417360 | \n",
+ "3.2650257 | \n",
+ "0.4916201 | \n",
+ "0.5199749 | \n",
+ "0.6573816 | \n",
+ "0.6652757 | \n",
+ "0.1220527 | \n",
+ "0.3273232 | \n",
+ "144.1735950 | \n",
+ "226.5025750 | \n",
+ "0.2843162 |
\n",
+ "7 | \n",
+ "0.1502374 | \n",
+ "0.3459178 | \n",
+ "2.0532780 | \n",
+ "2.8618606 | \n",
+ "0.4134078 | \n",
+ "0.3838564 | \n",
+ "0.5762082 | \n",
+ "0.5716436 | \n",
+ "0.1026352 | \n",
+ "0.4299584 | \n",
+ "105.3277958 | \n",
+ "186.1860592 | \n",
+ "0.3502381 |
\n",
+ "8 | \n",
+ "0.2002234 | \n",
+ "0.2844205 | \n",
+ "1.7203140 | \n",
+ "2.5768720 | \n",
+ "0.3463687 | \n",
+ "0.3119030 | \n",
+ "0.5188285 | \n",
+ "0.5067990 | \n",
+ "0.0859917 | \n",
+ "0.5159501 | \n",
+ "72.0313965 | \n",
+ "157.6871964 | \n",
+ "0.3953207 |
\n",
+ "9 | \n",
+ "0.3001955 | \n",
+ "0.2098441 | \n",
+ "1.3318560 | \n",
+ "2.1622527 | \n",
+ "0.2681564 | \n",
+ "0.2431147 | \n",
+ "0.4353488 | \n",
+ "0.4189860 | \n",
+ "0.1331484 | \n",
+ "0.6490985 | \n",
+ "33.1855973 | \n",
+ "116.2252685 | \n",
+ "0.4368607 |
\n",
+ "10 | \n",
+ "0.4001676 | \n",
+ "0.1695731 | \n",
+ "0.8879040 | \n",
+ "1.8438878 | \n",
+ "0.1787709 | \n",
+ "0.1882520 | \n",
+ "0.3712491 | \n",
+ "0.3613428 | \n",
+ "0.0887656 | \n",
+ "0.7378641 | \n",
+ "-11.2096018 | \n",
+ "84.3887831 | \n",
+ "0.4228291 |
\n",
+ "11 | \n",
+ "0.5001396 | \n",
+ "0.1380318 | \n",
+ "0.6798015 | \n",
+ "1.6112006 | \n",
+ "0.1368715 | \n",
+ "0.1525314 | \n",
+ "0.3243998 | \n",
+ "0.3196038 | \n",
+ "0.0679612 | \n",
+ "0.8058252 | \n",
+ "-32.0198514 | \n",
+ "61.1200555 | \n",
+ "0.3827483 |
\n",
+ "12 | \n",
+ "0.6001117 | \n",
+ "0.1146235 | \n",
+ "0.5410665 | \n",
+ "1.4329279 | \n",
+ "0.1089385 | \n",
+ "0.1258838 | \n",
+ "0.2885063 | \n",
+ "0.2873322 | \n",
+ "0.0540915 | \n",
+ "0.8599168 | \n",
+ "-45.8933511 | \n",
+ "43.2927872 | \n",
+ "0.3253014 |
\n",
+ "13 | \n",
+ "0.7000838 | \n",
+ "0.0945753 | \n",
+ "0.5271930 | \n",
+ "1.3035888 | \n",
+ "0.1061453 | \n",
+ "0.1047786 | \n",
+ "0.2624651 | \n",
+ "0.2612635 | \n",
+ "0.0527046 | \n",
+ "0.9126214 | \n",
+ "-47.2807011 | \n",
+ "30.3588786 | \n",
+ "0.2661179 |
\n",
+ "14 | \n",
+ "0.8000559 | \n",
+ "0.0736148 | \n",
+ "0.3884580 | \n",
+ "1.1892374 | \n",
+ "0.0782123 | \n",
+ "0.0841132 | \n",
+ "0.2394415 | \n",
+ "0.2391274 | \n",
+ "0.0388350 | \n",
+ "0.9514563 | \n",
+ "-61.1542008 | \n",
+ "18.9237364 | \n",
+ "0.1895682 |
\n",
+ "15 | \n",
+ "0.9000279 | \n",
+ "0.0521793 | \n",
+ "0.2358495 | \n",
+ "1.0833382 | \n",
+ "0.0474860 | \n",
+ "0.0627694 | \n",
+ "0.2181198 | \n",
+ "0.2195382 | \n",
+ "0.0235784 | \n",
+ "0.9750347 | \n",
+ "-76.4150505 | \n",
+ "8.3338246 | \n",
+ "0.0939158 |
\n",
+ "16 | \n",
+ "1.0 | \n",
+ "0.0011167 | \n",
+ "0.2497230 | \n",
+ "1.0 | \n",
+ "0.0502793 | \n",
+ "0.0375156 | \n",
+ "0.2013404 | \n",
+ "0.2013410 | \n",
+ "0.0249653 | \n",
+ "1.0 | \n",
+ "-75.0277005 | \n",
+ "0.0 | \n",
+ "0.0 |
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "ModelMetricsBinomialGLM: glm\n",
+ "** Reported on validation data. **\n",
+ "\n",
+ "MSE: 0.1178330502946334\n",
+ "RMSE: 0.3432681900418875\n",
+ "LogLoss: 0.3827854417227382\n",
+ "AUC: 0.8173891686110769\n",
+ "AUCPR: 0.5982392824442148\n",
+ "Gini: 0.6347783372221538\n",
+ "Null degrees of freedom: 1195\n",
+ "Residual degrees of freedom: 1178\n",
+ "Null deviance: 1193.6689319811862\n",
+ "Residual deviance: 915.6227766007898\n",
+ "AIC: 951.6227766007898
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.3129932533175332\n",
+ " | \n",
+ "0 | \n",
+ "1 | \n",
+ "Error | \n",
+ "Rate |
\n",
+ " 0 | \n",
+ "863.0 | \n",
+ "95.0 | \n",
+ "0.0992 | \n",
+ " (95.0/958.0) |
\n",
+ "1 | \n",
+ "107.0 | \n",
+ "131.0 | \n",
+ "0.4496 | \n",
+ " (107.0/238.0) |
\n",
+ "Total | \n",
+ "970.0 | \n",
+ "226.0 | \n",
+ "0.1689 | \n",
+ " (202.0/1196.0) |
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " Maximum Metrics: Maximum metrics at their respective thresholds\n",
+ " metric | \n",
+ "threshold | \n",
+ "value | \n",
+ "idx |
\n",
+ " max f1 | \n",
+ "0.3129933 | \n",
+ "0.5646552 | \n",
+ "155.0 |
\n",
+ "max f2 | \n",
+ "0.1877463 | \n",
+ "0.6551476 | \n",
+ "231.0 |
\n",
+ "max f0point5 | \n",
+ "0.4296125 | \n",
+ "0.6079404 | \n",
+ "107.0 |
\n",
+ "max accuracy | \n",
+ "0.4438485 | \n",
+ "0.8461538 | \n",
+ "105.0 |
\n",
+ "max precision | \n",
+ "0.9939857 | \n",
+ "1.0 | \n",
+ "0.0 |
\n",
+ "max recall | \n",
+ "0.0096581 | \n",
+ "1.0 | \n",
+ "398.0 |
\n",
+ "max specificity | \n",
+ "0.9939857 | \n",
+ "1.0 | \n",
+ "0.0 |
\n",
+ "max absolute_mcc | \n",
+ "0.3129933 | \n",
+ "0.4602072 | \n",
+ "155.0 |
\n",
+ "max min_per_class_accuracy | \n",
+ "0.1954052 | \n",
+ "0.7478992 | \n",
+ "225.0 |
\n",
+ "max mean_per_class_accuracy | \n",
+ "0.1914540 | \n",
+ "0.7508026 | \n",
+ "228.0 |
\n",
+ "max tns | \n",
+ "0.9939857 | \n",
+ "958.0 | \n",
+ "0.0 |
\n",
+ "max fns | \n",
+ "0.9939857 | \n",
+ "237.0 | \n",
+ "0.0 |
\n",
+ "max fps | \n",
+ "0.0074980 | \n",
+ "958.0 | \n",
+ "399.0 |
\n",
+ "max tps | \n",
+ "0.0096581 | \n",
+ "238.0 | \n",
+ "398.0 |
\n",
+ "max tnr | \n",
+ "0.9939857 | \n",
+ "1.0 | \n",
+ "0.0 |
\n",
+ "max fnr | \n",
+ "0.9939857 | \n",
+ "0.9957983 | \n",
+ "0.0 |
\n",
+ "max fpr | \n",
+ "0.0074980 | \n",
+ "1.0 | \n",
+ "399.0 |
\n",
+ "max tpr | \n",
+ "0.0096581 | \n",
+ "1.0 | \n",
+ "398.0 |
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " Gains/Lift Table: Avg response rate: 19.90 %, avg score: 21.55 %\n",
+ " group | \n",
+ "cumulative_data_fraction | \n",
+ "lower_threshold | \n",
+ "lift | \n",
+ "cumulative_lift | \n",
+ "response_rate | \n",
+ "score | \n",
+ "cumulative_response_rate | \n",
+ "cumulative_score | \n",
+ "capture_rate | \n",
+ "cumulative_capture_rate | \n",
+ "gain | \n",
+ "cumulative_gain | \n",
+ "kolmogorov_smirnov |
\n",
+ " 1 | \n",
+ "0.0100334 | \n",
+ "0.9422827 | \n",
+ "5.0252101 | \n",
+ "5.0252101 | \n",
+ "1.0 | \n",
+ "0.9698159 | \n",
+ "1.0 | \n",
+ "0.9698159 | \n",
+ "0.0504202 | \n",
+ "0.0504202 | \n",
+ "402.5210084 | \n",
+ "402.5210084 | \n",
+ "0.0504202 |
\n",
+ "2 | \n",
+ "0.0200669 | \n",
+ "0.8859422 | \n",
+ "4.6064426 | \n",
+ "4.8158263 | \n",
+ "0.9166667 | \n",
+ "0.9133265 | \n",
+ "0.9583333 | \n",
+ "0.9415712 | \n",
+ "0.0462185 | \n",
+ "0.0966387 | \n",
+ "360.6442577 | \n",
+ "381.5826331 | \n",
+ "0.0955948 |
\n",
+ "3 | \n",
+ "0.0301003 | \n",
+ "0.8333880 | \n",
+ "3.7689076 | \n",
+ "4.4668534 | \n",
+ "0.75 | \n",
+ "0.8662506 | \n",
+ "0.8888889 | \n",
+ "0.9164643 | \n",
+ "0.0378151 | \n",
+ "0.1344538 | \n",
+ "276.8907563 | \n",
+ "346.6853408 | \n",
+ "0.1302784 |
\n",
+ "4 | \n",
+ "0.0401338 | \n",
+ "0.7848582 | \n",
+ "3.7689076 | \n",
+ "4.2923669 | \n",
+ "0.75 | \n",
+ "0.8154020 | \n",
+ "0.8541667 | \n",
+ "0.8911987 | \n",
+ "0.0378151 | \n",
+ "0.1722689 | \n",
+ "276.8907563 | \n",
+ "329.2366947 | \n",
+ "0.1649620 |
\n",
+ "5 | \n",
+ "0.0501672 | \n",
+ "0.7211959 | \n",
+ "2.5126050 | \n",
+ "3.9364146 | \n",
+ "0.5 | \n",
+ "0.7505935 | \n",
+ "0.7833333 | \n",
+ "0.8630777 | \n",
+ "0.0252101 | \n",
+ "0.1974790 | \n",
+ "151.2605042 | \n",
+ "293.6414566 | \n",
+ "0.1839091 |
\n",
+ "6 | \n",
+ "0.1003344 | \n",
+ "0.4947058 | \n",
+ "3.2663866 | \n",
+ "3.6014006 | \n",
+ "0.65 | \n",
+ "0.5984199 | \n",
+ "0.7166667 | \n",
+ "0.7307488 | \n",
+ "0.1638655 | \n",
+ "0.3613445 | \n",
+ "226.6386555 | \n",
+ "260.1400560 | \n",
+ "0.3258539 |
\n",
+ "7 | \n",
+ "0.1505017 | \n",
+ "0.3581829 | \n",
+ "2.0100840 | \n",
+ "3.0709617 | \n",
+ "0.4 | \n",
+ "0.4130805 | \n",
+ "0.6111111 | \n",
+ "0.6248593 | \n",
+ "0.1008403 | \n",
+ "0.4621849 | \n",
+ "101.0084034 | \n",
+ "207.0961718 | \n",
+ "0.3891160 |
\n",
+ "8 | \n",
+ "0.2006689 | \n",
+ "0.3002017 | \n",
+ "2.0100840 | \n",
+ "2.8057423 | \n",
+ "0.4 | \n",
+ "0.3274207 | \n",
+ "0.5583333 | \n",
+ "0.5504997 | \n",
+ "0.1008403 | \n",
+ "0.5630252 | \n",
+ "101.0084034 | \n",
+ "180.5742297 | \n",
+ "0.4523780 |
\n",
+ "9 | \n",
+ "0.3001672 | \n",
+ "0.2236892 | \n",
+ "1.1824024 | \n",
+ "2.2676435 | \n",
+ "0.2352941 | \n",
+ "0.2577997 | \n",
+ "0.4512535 | \n",
+ "0.4534766 | \n",
+ "0.1176471 | \n",
+ "0.6806723 | \n",
+ "18.2402373 | \n",
+ "126.7643548 | \n",
+ "0.4750355 |
\n",
+ "10 | \n",
+ "0.4005017 | \n",
+ "0.1761387 | \n",
+ "1.0469188 | \n",
+ "1.9618252 | \n",
+ "0.2083333 | \n",
+ "0.1963835 | \n",
+ "0.3903967 | \n",
+ "0.3890691 | \n",
+ "0.1050420 | \n",
+ "0.7857143 | \n",
+ "4.6918768 | \n",
+ "96.1825231 | \n",
+ "0.4809126 |
\n",
+ "11 | \n",
+ "0.5 | \n",
+ "0.1438673 | \n",
+ "0.5489725 | \n",
+ "1.6806723 | \n",
+ "0.1092437 | \n",
+ "0.1582719 | \n",
+ "0.3344482 | \n",
+ "0.3431412 | \n",
+ "0.0546218 | \n",
+ "0.8403361 | \n",
+ "-45.1027470 | \n",
+ "68.0672269 | \n",
+ "0.4248873 |
\n",
+ "12 | \n",
+ "0.6003344 | \n",
+ "0.1214381 | \n",
+ "0.5025210 | \n",
+ "1.4837668 | \n",
+ "0.1 | \n",
+ "0.1325918 | \n",
+ "0.2952646 | \n",
+ "0.3079519 | \n",
+ "0.0504202 | \n",
+ "0.8907563 | \n",
+ "-49.7478992 | \n",
+ "48.3766766 | \n",
+ "0.3625726 |
\n",
+ "13 | \n",
+ "0.6998328 | \n",
+ "0.0997817 | \n",
+ "0.5067439 | \n",
+ "1.3448591 | \n",
+ "0.1008403 | \n",
+ "0.1100559 | \n",
+ "0.2676225 | \n",
+ "0.2798162 | \n",
+ "0.0504202 | \n",
+ "0.9411765 | \n",
+ "-49.3256126 | \n",
+ "34.4859091 | \n",
+ "0.3013017 |
\n",
+ "14 | \n",
+ "0.8001672 | \n",
+ "0.0798720 | \n",
+ "0.2512605 | \n",
+ "1.2077307 | \n",
+ "0.05 | \n",
+ "0.0891178 | \n",
+ "0.2403344 | \n",
+ "0.2559041 | \n",
+ "0.0252101 | \n",
+ "0.9663866 | \n",
+ "-74.8739496 | \n",
+ "20.7730741 | \n",
+ "0.2075139 |
\n",
+ "15 | \n",
+ "0.8996656 | \n",
+ "0.0556184 | \n",
+ "0.2111433 | \n",
+ "1.0975134 | \n",
+ "0.0420168 | \n",
+ "0.0677374 | \n",
+ "0.2184015 | \n",
+ "0.2350939 | \n",
+ "0.0210084 | \n",
+ "0.9873950 | \n",
+ "-78.8856719 | \n",
+ "9.7513355 | \n",
+ "0.1095244 |
\n",
+ "16 | \n",
+ "1.0 | \n",
+ "0.0074980 | \n",
+ "0.1256303 | \n",
+ "1.0 | \n",
+ "0.025 | \n",
+ "0.0397575 | \n",
+ "0.1989967 | \n",
+ "0.2154949 | \n",
+ "0.0126050 | \n",
+ "1.0 | \n",
+ "-87.4369748 | \n",
+ "0.0 | \n",
+ "0.0 |
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " Scoring History: \n",
+ " | \n",
+ "timestamp | \n",
+ "duration | \n",
+ "iteration | \n",
+ "lambda | \n",
+ "predictors | \n",
+ "deviance_train | \n",
+ "deviance_test | \n",
+ "alpha | \n",
+ "iterations | \n",
+ "training_rmse | \n",
+ "training_logloss | \n",
+ "training_r2 | \n",
+ "training_auc | \n",
+ "training_pr_auc | \n",
+ "training_lift | \n",
+ "training_classification_error | \n",
+ "validation_rmse | \n",
+ "validation_logloss | \n",
+ "validation_r2 | \n",
+ "validation_auc | \n",
+ "validation_pr_auc | \n",
+ "validation_lift | \n",
+ "validation_classification_error |
\n",
+ " | \n",
+ "2023-10-05 18:17:37 | \n",
+ " 0.000 sec | \n",
+ "1 | \n",
+ ".25E0 | \n",
+ "1 | \n",
+ "1.0045100 | \n",
+ "0.9980509 | \n",
+ "0.5 | \n",
+ "None | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " |
\n",
+ " | \n",
+ "2023-10-05 18:17:37 | \n",
+ " 0.061 sec | \n",
+ "3 | \n",
+ ".22E0 | \n",
+ "2 | \n",
+ "0.9946355 | \n",
+ "0.9864127 | \n",
+ "0.5 | \n",
+ "None | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " |
\n",
+ " | \n",
+ "2023-10-05 18:17:37 | \n",
+ " 0.101 sec | \n",
+ "5 | \n",
+ ".2E0 | \n",
+ "2 | \n",
+ "0.9859035 | \n",
+ "0.9760418 | \n",
+ "0.5 | \n",
+ "None | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " |
\n",
+ " | \n",
+ "2023-10-05 18:17:37 | \n",
+ " 0.130 sec | \n",
+ "7 | \n",
+ ".19E0 | \n",
+ "2 | \n",
+ "0.9781697 | \n",
+ "0.9667699 | \n",
+ "0.5 | \n",
+ "None | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " |
\n",
+ " | \n",
+ "2023-10-05 18:17:37 | \n",
+ " 0.164 sec | \n",
+ "9 | \n",
+ ".17E0 | \n",
+ "3 | \n",
+ "0.9686784 | \n",
+ "0.9544753 | \n",
+ "0.5 | \n",
+ "None | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " |
\n",
+ " | \n",
+ "2023-10-05 18:17:37 | \n",
+ " 0.201 sec | \n",
+ "11 | \n",
+ ".15E0 | \n",
+ "3 | \n",
+ "0.9587676 | \n",
+ "0.9411532 | \n",
+ "0.5 | \n",
+ "None | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " |
\n",
+ " | \n",
+ "2023-10-05 18:17:37 | \n",
+ " 0.220 sec | \n",
+ "13 | \n",
+ ".14E0 | \n",
+ "3 | \n",
+ "0.9500953 | \n",
+ "0.9293709 | \n",
+ "0.5 | \n",
+ "None | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " |
\n",
+ " | \n",
+ "2023-10-05 18:17:37 | \n",
+ " 0.268 sec | \n",
+ "15 | \n",
+ ".13E0 | \n",
+ "3 | \n",
+ "0.9424603 | \n",
+ "0.9188844 | \n",
+ "0.5 | \n",
+ "None | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " |
\n",
+ " | \n",
+ "2023-10-05 18:17:37 | \n",
+ " 0.295 sec | \n",
+ "17 | \n",
+ ".12E0 | \n",
+ "5 | \n",
+ "0.9341431 | \n",
+ "0.9080921 | \n",
+ "0.5 | \n",
+ "None | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " |
\n",
+ " | \n",
+ "2023-10-05 18:17:37 | \n",
+ " 0.351 sec | \n",
+ "19 | \n",
+ ".11E0 | \n",
+ "5 | \n",
+ "0.9236449 | \n",
+ "0.8955093 | \n",
+ "0.5 | \n",
+ "None | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " |
\n",
+ "--- | \n",
+ "--- | \n",
+ "--- | \n",
+ "--- | \n",
+ "--- | \n",
+ "--- | \n",
+ "--- | \n",
+ "--- | \n",
+ "--- | \n",
+ "--- | \n",
+ "--- | \n",
+ "--- | \n",
+ "--- | \n",
+ "--- | \n",
+ "--- | \n",
+ "--- | \n",
+ "--- | \n",
+ "--- | \n",
+ "--- | \n",
+ "--- | \n",
+ "--- | \n",
+ "--- | \n",
+ "--- | \n",
+ "--- |
\n",
+ " | \n",
+ "2023-10-05 18:17:38 | \n",
+ " 1.462 sec | \n",
+ "89 | \n",
+ ".13E-2 | \n",
+ "18 | \n",
+ "0.8045920 | \n",
+ "0.7656455 | \n",
+ "0.5 | \n",
+ "None | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " |
\n",
+ " | \n",
+ "2023-10-05 18:17:38 | \n",
+ " 1.467 sec | \n",
+ "90 | \n",
+ ".12E-2 | \n",
+ "18 | \n",
+ "0.8044582 | \n",
+ "0.7656103 | \n",
+ "0.5 | \n",
+ "None | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " |
\n",
+ " | \n",
+ "2023-10-05 18:17:38 | \n",
+ " 1.472 sec | \n",
+ "91 | \n",
+ ".11E-2 | \n",
+ "18 | \n",
+ "0.8043436 | \n",
+ "0.7655884 | \n",
+ "0.5 | \n",
+ "None | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " |
\n",
+ " | \n",
+ "2023-10-05 18:17:38 | \n",
+ " 1.485 sec | \n",
+ "92 | \n",
+ ".1E-2 | \n",
+ "18 | \n",
+ "0.8042462 | \n",
+ "0.7655764 | \n",
+ "0.5 | \n",
+ "None | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " |
\n",
+ " | \n",
+ "2023-10-05 18:17:38 | \n",
+ " 1.492 sec | \n",
+ "93 | \n",
+ ".92E-3 | \n",
+ "18 | \n",
+ "0.8041639 | \n",
+ "0.7655709 | \n",
+ "0.5 | \n",
+ "None | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " |
\n",
+ " | \n",
+ "2023-10-05 18:17:38 | \n",
+ " 1.498 sec | \n",
+ "94 | \n",
+ ".84E-3 | \n",
+ "18 | \n",
+ "0.8040939 | \n",
+ "0.7655721 | \n",
+ "0.5 | \n",
+ "None | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " |
\n",
+ " | \n",
+ "2023-10-05 18:17:38 | \n",
+ " 1.502 sec | \n",
+ "95 | \n",
+ ".77E-3 | \n",
+ "18 | \n",
+ "0.8040347 | \n",
+ "0.7655776 | \n",
+ "0.5 | \n",
+ "None | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " |
\n",
+ " | \n",
+ "2023-10-05 18:17:38 | \n",
+ " 1.506 sec | \n",
+ "96 | \n",
+ ".7E-3 | \n",
+ "18 | \n",
+ "0.8039850 | \n",
+ "0.7655875 | \n",
+ "0.5 | \n",
+ "None | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " |
\n",
+ " | \n",
+ "2023-10-05 18:17:38 | \n",
+ " 1.518 sec | \n",
+ "97 | \n",
+ ".64E-3 | \n",
+ "18 | \n",
+ "0.8039428 | \n",
+ "0.7655990 | \n",
+ "0.5 | \n",
+ "None | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " |
\n",
+ " | \n",
+ "2023-10-05 18:17:38 | \n",
+ " 1.528 sec | \n",
+ "98 | \n",
+ ".58E-3 | \n",
+ "18 | \n",
+ "0.8039072 | \n",
+ "0.7656134 | \n",
+ "0.5 | \n",
+ "98 | \n",
+ "0.3522849 | \n",
+ "0.4020820 | \n",
+ "0.2282168 | \n",
+ "0.7849415 | \n",
+ "0.5691249 | \n",
+ "4.9667129 | \n",
+ "0.2032952 | \n",
+ "0.3432682 | \n",
+ "0.3827854 | \n",
+ "0.2607574 | \n",
+ "0.8173892 | \n",
+ "0.5982393 | \n",
+ "5.0252101 | \n",
+ "0.1688963 |
\n",
+ "
\n",
+ "
\n",
+ "
[66 rows x 24 columns]
\n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " Variable Importances: \n",
+ " variable | \n",
+ "relative_importance | \n",
+ "scaled_importance | \n",
+ "percentage |
\n",
+ " DELINQ | \n",
+ "0.7970053 | \n",
+ "1.0 | \n",
+ "0.1473004 |
\n",
+ "JOB.Sales | \n",
+ "0.7478484 | \n",
+ "0.9383230 | \n",
+ "0.1382154 |
\n",
+ "JOB.Office | \n",
+ "0.5109144 | \n",
+ "0.6410426 | \n",
+ "0.0944259 |
\n",
+ "JOB.Self | \n",
+ "0.4571329 | \n",
+ "0.5735632 | \n",
+ "0.0844861 |
\n",
+ "CLAGE | \n",
+ "0.4484834 | \n",
+ "0.5627107 | \n",
+ "0.0828875 |
\n",
+ "DEBTINC | \n",
+ "0.4467476 | \n",
+ "0.5605328 | \n",
+ "0.0825667 |
\n",
+ "DEROG | \n",
+ "0.4263194 | \n",
+ "0.5349016 | \n",
+ "0.0787912 |
\n",
+ "NINQ | \n",
+ "0.3037309 | \n",
+ "0.3810902 | \n",
+ "0.0561347 |
\n",
+ "VALUE | \n",
+ "0.2412600 | \n",
+ "0.3027081 | \n",
+ "0.0445890 |
\n",
+ "MORTDUE | \n",
+ "0.2384487 | \n",
+ "0.2991808 | \n",
+ "0.0440695 |
\n",
+ "LOAN | \n",
+ "0.2180768 | \n",
+ "0.2736203 | \n",
+ "0.0403044 |
\n",
+ "REASON.HomeImp | \n",
+ "0.1797686 | \n",
+ "0.2255551 | \n",
+ "0.0332244 |
\n",
+ "CLNO | \n",
+ "0.1485205 | \n",
+ "0.1863481 | \n",
+ "0.0274492 |
\n",
+ "REASON.DebtCon | \n",
+ "0.1373842 | \n",
+ "0.1723755 | \n",
+ "0.0253910 |
\n",
+ "YOJ | \n",
+ "0.0438345 | \n",
+ "0.0549990 | \n",
+ "0.0081014 |
\n",
+ "JOB.Other | \n",
+ "0.0353093 | \n",
+ "0.0443025 | \n",
+ "0.0065258 |
\n",
+ "JOB.Mgr | \n",
+ "0.0299615 | \n",
+ "0.0375926 | \n",
+ "0.0055374 |
\n",
+ "JOB.ProfExe | \n",
+ "0.0 | \n",
+ "0.0 | \n",
+ "0.0 |
\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "[tips]\n",
+ "Use `model.explain()` to inspect the model.\n",
+ "--\n",
+ "Use `h2o.display.toggle_user_tips()` to switch on/off this section.
"
+ ],
+ "text/plain": [
+ "Model Details\n",
+ "=============\n",
+ "H2OGeneralizedLinearEstimator : Generalized Linear Modeling\n",
+ "Model Key: glmfit\n",
+ "\n",
+ "\n",
+ "GLM Model: summary\n",
+ " family link regularization lambda_search number_of_predictors_total number_of_active_predictors number_of_iterations training_frame\n",
+ "-- -------- ------ --------------------------------------------- ---------------------------------------------------------------------------- ---------------------------- ----------------------------- ---------------------- ----------------\n",
+ " binomial logit Elastic Net (alpha = 0.5, lambda = 9.244E-4 ) nlambda = 100, lambda.max = 0.2455, lambda.min = 9.244E-4, lambda.1se = -1.0 18 17 93 py_3_sid_a269\n",
+ "\n",
+ "ModelMetricsBinomialGLM: glm\n",
+ "** Reported on train data. **\n",
+ "\n",
+ "MSE: 0.12410463474559494\n",
+ "RMSE: 0.3522848772592927\n",
+ "LogLoss: 0.40208197287665776\n",
+ "AUC: 0.7849415147958837\n",
+ "AUCPR: 0.5691248967099318\n",
+ "Gini: 0.5698830295917674\n",
+ "Null degrees of freedom: 3580\n",
+ "Residual degrees of freedom: 3563\n",
+ "Null deviance: 3597.150438148379\n",
+ "Residual deviance: 2879.711089742623\n",
+ "AIC: 2915.711089742623\n",
+ "\n",
+ "Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.2574171649920362\n",
+ " 0 1 Error Rate\n",
+ "----- ---- --- ------- --------------\n",
+ "0 2446 414 0.1448 (414.0/2860.0)\n",
+ "1 314 407 0.4355 (314.0/721.0)\n",
+ "Total 2760 821 0.2033 (728.0/3581.0)\n",
+ "\n",
+ "Maximum Metrics: Maximum metrics at their respective thresholds\n",
+ "metric threshold value idx\n",
+ "--------------------------- ----------- -------- -----\n",
+ "max f1 0.257417 0.527886 207\n",
+ "max f2 0.152297 0.626112 279\n",
+ "max f0point5 0.388606 0.557126 144\n",
+ "max accuracy 0.567259 0.835242 90\n",
+ "max precision 0.998809 1 0\n",
+ "max recall 0.00123775 1 399\n",
+ "max specificity 0.998809 1 0\n",
+ "max absolute_mcc 0.266185 0.402098 203\n",
+ "max min_per_class_accuracy 0.17828 0.706643 258\n",
+ "max mean_per_class_accuracy 0.214315 0.719503 234\n",
+ "max tns 0.998809 2860 0\n",
+ "max fns 0.998809 717 0\n",
+ "max fps 0.00626272 2860 398\n",
+ "max tps 0.00123775 721 399\n",
+ "max tnr 0.998809 1 0\n",
+ "max fnr 0.998809 0.994452 0\n",
+ "max fpr 0.00626272 1 398\n",
+ "max tpr 0.00123775 1 399\n",
+ "\n",
+ "Gains/Lift Table: Avg response rate: 20.13 %, avg score: 20.13 %\n",
+ "group cumulative_data_fraction lower_threshold lift cumulative_lift response_rate score cumulative_response_rate cumulative_score capture_rate cumulative_capture_rate gain cumulative_gain kolmogorov_smirnov\n",
+ "------- -------------------------- ----------------- -------- ----------------- --------------- --------- -------------------------- ------------------ -------------- ------------------------- -------- ----------------- --------------------\n",
+ "1 0.0100531 0.946682 4.96671 4.96671 1 0.980507 1 0.980507 0.0499307 0.0499307 396.671 396.671 0.0499307\n",
+ "2 0.0201061 0.85234 4.82875 4.89773 0.972222 0.898794 0.986111 0.93965 0.0485437 0.0984743 382.875 389.773 0.0981247\n",
+ "3 0.0301592 0.747956 3.863 4.55282 0.777778 0.798407 0.916667 0.892569 0.038835 0.137309 286.3 355.282 0.134162\n",
+ "4 0.0402122 0.691234 3.17318 4.20791 0.638889 0.715245 0.847222 0.848238 0.0319001 0.169209 217.318 320.791 0.161517\n",
+ "5 0.0502653 0.62673 3.58707 4.08374 0.722222 0.655894 0.822222 0.809769 0.036061 0.20527 258.707 308.374 0.194082\n",
+ "6 0.100251 0.430246 2.44174 3.26503 0.49162 0.519975 0.657382 0.665276 0.122053 0.327323 144.174 226.503 0.284316\n",
+ "7 0.150237 0.345918 2.05328 2.86186 0.413408 0.383856 0.576208 0.571644 0.102635 0.429958 105.328 186.186 0.350238\n",
+ "8 0.200223 0.284421 1.72031 2.57687 0.346369 0.311903 0.518828 0.506799 0.0859917 0.51595 72.0314 157.687 0.395321\n",
+ "9 0.300195 0.209844 1.33186 2.16225 0.268156 0.243115 0.435349 0.418986 0.133148 0.649098 33.1856 116.225 0.436861\n",
+ "10 0.400168 0.169573 0.887904 1.84389 0.178771 0.188252 0.371249 0.361343 0.0887656 0.737864 -11.2096 84.3888 0.422829\n",
+ "11 0.50014 0.138032 0.679801 1.6112 0.136872 0.152531 0.3244 0.319604 0.0679612 0.805825 -32.0199 61.1201 0.382748\n",
+ "12 0.600112 0.114623 0.541066 1.43293 0.108939 0.125884 0.288506 0.287332 0.0540915 0.859917 -45.8934 43.2928 0.325301\n",
+ "13 0.700084 0.0945753 0.527193 1.30359 0.106145 0.104779 0.262465 0.261263 0.0527046 0.912621 -47.2807 30.3589 0.266118\n",
+ "14 0.800056 0.0736148 0.388458 1.18924 0.0782123 0.0841132 0.239442 0.239127 0.038835 0.951456 -61.1542 18.9237 0.189568\n",
+ "15 0.900028 0.0521793 0.235849 1.08334 0.047486 0.0627694 0.21812 0.219538 0.0235784 0.975035 -76.4151 8.33382 0.0939158\n",
+ "16 1 0.00111668 0.249723 1 0.0502793 0.0375156 0.20134 0.201341 0.0249653 1 -75.0277 0 0\n",
+ "\n",
+ "ModelMetricsBinomialGLM: glm\n",
+ "** Reported on validation data. **\n",
+ "\n",
+ "MSE: 0.1178330502946334\n",
+ "RMSE: 0.3432681900418875\n",
+ "LogLoss: 0.3827854417227382\n",
+ "AUC: 0.8173891686110769\n",
+ "AUCPR: 0.5982392824442148\n",
+ "Gini: 0.6347783372221538\n",
+ "Null degrees of freedom: 1195\n",
+ "Residual degrees of freedom: 1178\n",
+ "Null deviance: 1193.6689319811862\n",
+ "Residual deviance: 915.6227766007898\n",
+ "AIC: 951.6227766007898\n",
+ "\n",
+ "Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.3129932533175332\n",
+ " 0 1 Error Rate\n",
+ "----- --- --- ------- --------------\n",
+ "0 863 95 0.0992 (95.0/958.0)\n",
+ "1 107 131 0.4496 (107.0/238.0)\n",
+ "Total 970 226 0.1689 (202.0/1196.0)\n",
+ "\n",
+ "Maximum Metrics: Maximum metrics at their respective thresholds\n",
+ "metric threshold value idx\n",
+ "--------------------------- ----------- -------- -----\n",
+ "max f1 0.312993 0.564655 155\n",
+ "max f2 0.187746 0.655148 231\n",
+ "max f0point5 0.429613 0.60794 107\n",
+ "max accuracy 0.443849 0.846154 105\n",
+ "max precision 0.993986 1 0\n",
+ "max recall 0.00965813 1 398\n",
+ "max specificity 0.993986 1 0\n",
+ "max absolute_mcc 0.312993 0.460207 155\n",
+ "max min_per_class_accuracy 0.195405 0.747899 225\n",
+ "max mean_per_class_accuracy 0.191454 0.750803 228\n",
+ "max tns 0.993986 958 0\n",
+ "max fns 0.993986 237 0\n",
+ "max fps 0.00749797 958 399\n",
+ "max tps 0.00965813 238 398\n",
+ "max tnr 0.993986 1 0\n",
+ "max fnr 0.993986 0.995798 0\n",
+ "max fpr 0.00749797 1 399\n",
+ "max tpr 0.00965813 1 398\n",
+ "\n",
+ "Gains/Lift Table: Avg response rate: 19.90 %, avg score: 21.55 %\n",
+ "group cumulative_data_fraction lower_threshold lift cumulative_lift response_rate score cumulative_response_rate cumulative_score capture_rate cumulative_capture_rate gain cumulative_gain kolmogorov_smirnov\n",
+ "------- -------------------------- ----------------- -------- ----------------- --------------- --------- -------------------------- ------------------ -------------- ------------------------- -------- ----------------- --------------------\n",
+ "1 0.0100334 0.942283 5.02521 5.02521 1 0.969816 1 0.969816 0.0504202 0.0504202 402.521 402.521 0.0504202\n",
+ "2 0.0200669 0.885942 4.60644 4.81583 0.916667 0.913326 0.958333 0.941571 0.0462185 0.0966387 360.644 381.583 0.0955948\n",
+ "3 0.0301003 0.833388 3.76891 4.46685 0.75 0.866251 0.888889 0.916464 0.0378151 0.134454 276.891 346.685 0.130278\n",
+ "4 0.0401338 0.784858 3.76891 4.29237 0.75 0.815402 0.854167 0.891199 0.0378151 0.172269 276.891 329.237 0.164962\n",
+ "5 0.0501672 0.721196 2.51261 3.93641 0.5 0.750593 0.783333 0.863078 0.0252101 0.197479 151.261 293.641 0.183909\n",
+ "6 0.100334 0.494706 3.26639 3.6014 0.65 0.59842 0.716667 0.730749 0.163866 0.361345 226.639 260.14 0.325854\n",
+ "7 0.150502 0.358183 2.01008 3.07096 0.4 0.41308 0.611111 0.624859 0.10084 0.462185 101.008 207.096 0.389116\n",
+ "8 0.200669 0.300202 2.01008 2.80574 0.4 0.327421 0.558333 0.5505 0.10084 0.563025 101.008 180.574 0.452378\n",
+ "9 0.300167 0.223689 1.1824 2.26764 0.235294 0.2578 0.451253 0.453477 0.117647 0.680672 18.2402 126.764 0.475036\n",
+ "10 0.400502 0.176139 1.04692 1.96183 0.208333 0.196383 0.390397 0.389069 0.105042 0.785714 4.69188 96.1825 0.480913\n",
+ "11 0.5 0.143867 0.548973 1.68067 0.109244 0.158272 0.334448 0.343141 0.0546218 0.840336 -45.1027 68.0672 0.424887\n",
+ "12 0.600334 0.121438 0.502521 1.48377 0.1 0.132592 0.295265 0.307952 0.0504202 0.890756 -49.7479 48.3767 0.362573\n",
+ "13 0.699833 0.0997817 0.506744 1.34486 0.10084 0.110056 0.267622 0.279816 0.0504202 0.941176 -49.3256 34.4859 0.301302\n",
+ "14 0.800167 0.079872 0.251261 1.20773 0.05 0.0891178 0.240334 0.255904 0.0252101 0.966387 -74.8739 20.7731 0.207514\n",
+ "15 0.899666 0.0556184 0.211143 1.09751 0.0420168 0.0677374 0.218401 0.235094 0.0210084 0.987395 -78.8857 9.75134 0.109524\n",
+ "16 1 0.00749797 0.12563 1 0.025 0.0397575 0.198997 0.215495 0.012605 1 -87.437 0 0\n",
+ "\n",
+ "Scoring History: \n",
+ " timestamp duration iteration lambda predictors deviance_train deviance_test alpha iterations training_rmse training_logloss training_r2 training_auc training_pr_auc training_lift training_classification_error validation_rmse validation_logloss validation_r2 validation_auc validation_pr_auc validation_lift validation_classification_error\n",
+ "--- ------------------- ---------- ----------- -------- ------------ ------------------ ------------------ ------- ------------ ------------------ ------------------- ------------------- ------------------ ------------------ ----------------- ------------------------------- ------------------ -------------------- ------------------- ------------------ ------------------- ----------------- ---------------------------------\n",
+ " 2023-10-05 18:17:37 0.000 sec 1 .25E0 1 1.0045100357856405 0.9980509464725638 0.5\n",
+ " 2023-10-05 18:17:37 0.061 sec 3 .22E0 2 0.9946354860817245 0.9864126793254803 0.5\n",
+ " 2023-10-05 18:17:37 0.101 sec 5 .2E0 2 0.9859035403430406 0.9760418435888577 0.5\n",
+ " 2023-10-05 18:17:37 0.130 sec 7 .19E0 2 0.9781697250290142 0.9667699237562071 0.5\n",
+ " 2023-10-05 18:17:37 0.164 sec 9 .17E0 3 0.9686784218176835 0.9544752889432686 0.5\n",
+ " 2023-10-05 18:17:37 0.201 sec 11 .15E0 3 0.9587676036112146 0.9411531794664825 0.5\n",
+ " 2023-10-05 18:17:37 0.220 sec 13 .14E0 3 0.9500952786201636 0.9293708568093554 0.5\n",
+ " 2023-10-05 18:17:37 0.268 sec 15 .13E0 3 0.9424603199349196 0.918884378267228 0.5\n",
+ " 2023-10-05 18:17:37 0.295 sec 17 .12E0 5 0.934143096516142 0.9080921324097252 0.5\n",
+ " 2023-10-05 18:17:37 0.351 sec 19 .11E0 5 0.9236448771247918 0.8955093341627816 0.5\n",
+ "--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---\n",
+ " 2023-10-05 18:17:38 1.462 sec 89 .13E-2 18 0.8045920176677348 0.7656454925772931 0.5\n",
+ " 2023-10-05 18:17:38 1.467 sec 90 .12E-2 18 0.8044581526528738 0.7656102932355412 0.5\n",
+ " 2023-10-05 18:17:38 1.472 sec 91 .11E-2 18 0.804343644390629 0.7655884396559107 0.5\n",
+ " 2023-10-05 18:17:38 1.485 sec 92 .1E-2 18 0.804246194799186 0.7655763506822736 0.5\n",
+ " 2023-10-05 18:17:38 1.492 sec 93 .92E-3 18 0.8041639457533155 0.7655708834454764 0.5\n",
+ " 2023-10-05 18:17:38 1.498 sec 94 .84E-3 18 0.8040939470898276 0.7655720881120318 0.5\n",
+ " 2023-10-05 18:17:38 1.502 sec 95 .77E-3 18 0.8040347348308917 0.7655776424675973 0.5\n",
+ " 2023-10-05 18:17:38 1.506 sec 96 .7E-3 18 0.8039850013216244 0.7655874603666687 0.5\n",
+ " 2023-10-05 18:17:38 1.518 sec 97 .64E-3 18 0.8039427775722173 0.7655990044572286 0.5\n",
+ " 2023-10-05 18:17:38 1.528 sec 98 .58E-3 18 0.8039071637538451 0.7656134388669195 0.5 98 0.3522848772592927 0.40208197287665776 0.22821675700859534 0.7849415147958837 0.5691248967099318 4.966712898751734 0.20329516894722144 0.3432681900418875 0.3827854417227382 0.26075735307166414 0.8173891686110769 0.5982392824442148 5.025210084033613 0.1688963210702341\n",
+ "[66 rows x 24 columns]\n",
+ "\n",
+ "\n",
+ "Variable Importances: \n",
+ "variable relative_importance scaled_importance percentage\n",
+ "-------------- --------------------- ------------------- ------------\n",
+ "DELINQ 0.797005 1 0.1473\n",
+ "JOB.Sales 0.747848 0.938323 0.138215\n",
+ "JOB.Office 0.510914 0.641043 0.0944259\n",
+ "JOB.Self 0.457133 0.573563 0.0844861\n",
+ "CLAGE 0.448483 0.562711 0.0828875\n",
+ "DEBTINC 0.446748 0.560533 0.0825667\n",
+ "DEROG 0.426319 0.534902 0.0787912\n",
+ "NINQ 0.303731 0.38109 0.0561347\n",
+ "VALUE 0.24126 0.302708 0.044589\n",
+ "MORTDUE 0.238449 0.299181 0.0440695\n",
+ "LOAN 0.218077 0.27362 0.0403044\n",
+ "REASON.HomeImp 0.179769 0.225555 0.0332244\n",
+ "CLNO 0.14852 0.186348 0.0274492\n",
+ "REASON.DebtCon 0.137384 0.172376 0.025391\n",
+ "YOJ 0.0438345 0.054999 0.00810138\n",
+ "JOB.Other 0.0353093 0.0443025 0.00652578\n",
+ "JOB.Mgr 0.0299615 0.0375926 0.0055374\n",
+ "JOB.ProfExe 0 0 0\n",
+ "\n",
+ "[tips]\n",
+ "Use `model.explain()` to inspect the model.\n",
+ "--\n",
+ "Use `h2o.display.toggle_user_tips()` to switch on/off this section."
+ ]
},
"execution_count": 7,
"metadata": {},
@@ -243,7 +2267,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "[[0.551348008992684, 0.8486897717666948]]\n"
+ "[[0.5513512979207219, 0.8486897717666948]]\n"
]
}
],
@@ -253,6 +2277,15 @@
"print(glm_performance.accuracy())"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "glm.pre"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {
@@ -321,14 +2354,14 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "inputVar.json was successfully written and saved to ~\\examples\\data\\hmeqModels\\H2OBinaryGLM\\inputVar.json\n",
- "inputVar.json was successfully written and saved to ~\\examples\\data\\hmeqModels\\H2OMOJOGLM\\inputVar.json\n",
- "outputVar.json was successfully written and saved to ~\\examples\\data\\hmeqModels\\H2OBinaryGLM\\outputVar.json\n",
- "outputVar.json was successfully written and saved to ~\\examples\\data\\hmeqModels\\H2OMOJOGLM\\outputVar.json\n",
- "ModelProperties.json was successfully written and saved to ~\\examples\\data\\hmeqModels\\H2OBinaryGLM\\ModelProperties.json\n",
- "ModelProperties.json was successfully written and saved to ~\\examples\\data\\hmeqModels\\H2OMOJOGLM\\ModelProperties.json\n",
- "fileMetadata.json was successfully written and saved to ~\\examples\\data\\hmeqModels\\H2OBinaryGLM\\fileMetadata.json\n",
- "fileMetadata.json was successfully written and saved to ~\\examples\\data\\hmeqModels\\H2OMOJOGLM\\fileMetadata.json\n"
+ "inputVar.json was successfully written and saved to /Users/dalmoo/Documents/GitHub/python-sasctl/examples/data/hmeqModels/H2OBinaryGLM/inputVar.json\n",
+ "inputVar.json was successfully written and saved to /Users/dalmoo/Documents/GitHub/python-sasctl/examples/data/hmeqModels/H2OMOJOGLM/inputVar.json\n",
+ "outputVar.json was successfully written and saved to /Users/dalmoo/Documents/GitHub/python-sasctl/examples/data/hmeqModels/H2OBinaryGLM/outputVar.json\n",
+ "outputVar.json was successfully written and saved to /Users/dalmoo/Documents/GitHub/python-sasctl/examples/data/hmeqModels/H2OMOJOGLM/outputVar.json\n",
+ "ModelProperties.json was successfully written and saved to /Users/dalmoo/Documents/GitHub/python-sasctl/examples/data/hmeqModels/H2OBinaryGLM/ModelProperties.json\n",
+ "ModelProperties.json was successfully written and saved to /Users/dalmoo/Documents/GitHub/python-sasctl/examples/data/hmeqModels/H2OMOJOGLM/ModelProperties.json\n",
+ "fileMetadata.json was successfully written and saved to /Users/dalmoo/Documents/GitHub/python-sasctl/examples/data/hmeqModels/H2OBinaryGLM/fileMetadata.json\n",
+ "fileMetadata.json was successfully written and saved to /Users/dalmoo/Documents/GitHub/python-sasctl/examples/data/hmeqModels/H2OMOJOGLM/fileMetadata.json\n"
]
}
],
@@ -351,7 +2384,7 @@
" model_name=model_prefix + \"_binary\",\n",
" model_desc=\"Binary H2O model.\",\n",
" target_variable=y,\n",
- " target_values=[\"1\", \"0\"],\n",
+ " target_values=[\"0\", \"1\"],\n",
" json_path=binary_folder,\n",
" modeler=\"sasdemo\"\n",
")\n",
@@ -359,7 +2392,7 @@
" model_name=model_prefix + \"_mojo\",\n",
" model_desc=\"MOJO H2O model.\",\n",
" target_variable=y,\n",
- " target_values=[\"1\", \"0\"],\n",
+ " target_values=[\"0\", \"1\"],\n",
" json_path=mojo_folder,\n",
" modeler=\"sasdemo\"\n",
")\n",
@@ -435,36 +2468,45 @@
},
{
"cell_type": "markdown",
- "source": [
- "### Run a Score Test in SAS Model Manager"
- ],
"metadata": {
"collapsed": false
- }
+ },
+ "source": [
+ "### Run a Score Test in SAS Model Manager"
+ ]
},
{
"cell_type": "code",
"execution_count": 15,
- "outputs": [],
- "source": [
- "# Publish the model to the SAS Microanalytic Score destination in SAS Model Manager\n",
- "module = publish_model(mojo_model[0], \"maslocal\", name=\"HMEQMOJO_publish\", replace=True)"
- ],
"metadata": {
- "collapsed": false,
"ExecuteTime": {
"end_time": "2023-06-14T17:18:16.283114900Z",
"start_time": "2023-06-14T17:18:08.890088300Z"
- }
- }
+ },
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Publish the model to the SAS Microanalytic Score destination in SAS Model Manager\n",
+ "module = publish_model(mojo_model[0], \"maslocal\", name=\"HMEQMOJO_publish\", replace=True)"
+ ]
},
{
"cell_type": "code",
"execution_count": 16,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2023-06-14T17:18:34.236925200Z",
+ "start_time": "2023-06-14T17:18:34.209029800Z"
+ },
+ "collapsed": false
+ },
"outputs": [
{
"data": {
- "text/plain": ""
+ "text/plain": [
+ ""
+ ]
},
"execution_count": 16,
"metadata": {},
@@ -474,18 +2516,18 @@
"source": [
"# Instantiate a API call logger to visualize score calls in realtime\n",
"sess.add_stderr_logger(level=20)"
- ],
- "metadata": {
- "collapsed": false,
- "ExecuteTime": {
- "end_time": "2023-06-14T17:18:34.236925200Z",
- "start_time": "2023-06-14T17:18:34.209029800Z"
- }
- }
+ ]
},
{
"cell_type": "code",
"execution_count": 17,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2023-06-14T17:18:47.064393600Z",
+ "start_time": "2023-06-14T17:18:37.850162200Z"
+ },
+ "collapsed": false
+ },
"outputs": [
{
"name": "stderr",
@@ -521,18 +2563,18 @@
"# Step through the rows of data and collect the score from SAS Microanalytic Score publish destination\n",
"for index, row in X.iterrows():\n",
" result.append(module.score(row))"
- ],
- "metadata": {
- "collapsed": false,
- "ExecuteTime": {
- "end_time": "2023-06-14T17:18:47.064393600Z",
- "start_time": "2023-06-14T17:18:37.850162200Z"
- }
- }
+ ]
},
{
"cell_type": "code",
"execution_count": 18,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2023-06-14T17:19:21.811834300Z",
+ "start_time": "2023-06-14T17:19:21.772752700Z"
+ },
+ "collapsed": false
+ },
"outputs": [
{
"name": "stdout",
@@ -554,33 +2596,23 @@
"source": [
"# Scoring results\n",
"pp.pprint(result)"
- ],
- "metadata": {
- "collapsed": false,
- "ExecuteTime": {
- "end_time": "2023-06-14T17:19:21.811834300Z",
- "start_time": "2023-06-14T17:19:21.772752700Z"
- }
- }
+ ]
},
{
"cell_type": "code",
"execution_count": null,
- "outputs": [],
- "source": [],
"metadata": {
"collapsed": false
- }
+ },
+ "outputs": [],
+ "source": []
}
],
"metadata": {
- "interpreter": {
- "hash": "f9708d3f38eeab835578f0695c8890716ee809285281a28db6e379a5abca1310"
- },
"kernelspec": {
- "display_name": "dev-py38",
+ "display_name": "yeehaw",
"language": "python",
- "name": "dev-py38"
+ "name": "python3"
},
"language_info": {
"codemirror_mode": {
diff --git a/examples/pzmm_tensorflow_keras_model_import.ipynb b/examples/pzmm_tensorflow_keras_model_import.ipynb
index df0fa399..e45e702b 100644
--- a/examples/pzmm_tensorflow_keras_model_import.ipynb
+++ b/examples/pzmm_tensorflow_keras_model_import.ipynb
@@ -753,7 +753,7 @@
" input_data=x, # What does example input data look like?\n",
" predict_method=[model.predict, [int, int]], # What is the predict method and what does it return?\n",
" overwrite_model=True, # Overwrite model if it arleady exists\n",
- " target_values=[\"1\", \"0\"], # What are the expecte values of the target variable?\n",
+ " target_values=[\"0\", \"1\"], # What are the expecte values of the target variable?\n",
" score_metrics=score_metrics, # What are the output variables?\n",
" model_file_name = model_prefix + \".h5\", # How was the model file serialized?\n",
" missing_values = True, # Does the data include missing values?\n",
diff --git a/src/sasctl/pzmm/import_model.py b/src/sasctl/pzmm/import_model.py
index 899042ce..dc66deab 100644
--- a/src/sasctl/pzmm/import_model.py
+++ b/src/sasctl/pzmm/import_model.py
@@ -198,6 +198,7 @@ def import_model(
predict_threshold: Optional[float] = None,
target_values: Optional[List[str]] = None,
overwrite_project_properties: Optional[bool] = False,
+ target_index: Optional[int] = None,
**kwargs,
) -> Tuple[RestObj, Union[dict, str, Path]]:
"""
@@ -275,10 +276,16 @@ def import_model(
target_values : list of strings, optional
A list of target values for the target variable. This argument and the
score_metrics argument dictate the handling of the predicted values from
- the prediction method. The default value is None.
+ the prediction method. The order of the target values should reflect the
+ order of the related probabilities in the model. The default value is None.
overwrite_project_properties : bool, optional
Set whether the project properties should be overwritten when attempting to
import the model. The default value is False.
+ target_index : int, optional
+ Sets the index of success for a binary model. If target_values are given, this
+ index should match the index of the target outcome in target_values. If target_values
+ are not given, this index should indicate whether the the target probability variable
+ is the first or second variable returned by the model. The default value is 1.
kwargs : dict, optional
Other keyword arguments are passed to the following function:
* sasctl.pzmm.ScoreCode.write_score_code(...,
@@ -352,6 +359,7 @@ def import_model(
target_values=target_values,
missing_values=missing_values,
score_cas=score_cas,
+ target_index=target_index,
**kwargs,
)
if score_code_dict:
@@ -451,6 +459,7 @@ def import_model(
target_values=target_values,
missing_values=missing_values,
score_cas=score_cas,
+ target_index=target_index,
**kwargs,
)
if score_code_dict:
diff --git a/src/sasctl/pzmm/write_score_code.py b/src/sasctl/pzmm/write_score_code.py
index 7af3c3f5..056097c6 100644
--- a/src/sasctl/pzmm/write_score_code.py
+++ b/src/sasctl/pzmm/write_score_code.py
@@ -35,6 +35,7 @@ def write_score_code(
missing_values: Union[bool, list, DataFrame] = False,
score_cas: Optional[bool] = True,
score_code_path: Union[Path, str, None] = None,
+ target_index: Optional[int] = None,
**kwargs,
) -> Union[dict, None]:
"""
@@ -129,6 +130,11 @@ def write_score_code(
score_code_path : str or Path, optional
Path for output score code file(s) to be generated. If no value is supplied
a dict is returned instead. The default value is None.
+ target_index : int, optional
+ Sets the index of success for a binary model. If target_values are given, this
+ index should match the index of the target outcome in target_values. If target_values
+ are not given, this index should indicate whether the the target probability variable
+ is the first or second variable returned by the model. The default value is 1.
kwargs
Other keyword arguments are passed to one of the following functions:
* sasctl.pzmm.ScoreCode._write_imports(pickle_type, mojo_model=None,
@@ -245,6 +251,7 @@ def score(var1, var2, var3, var4):
predict_method[1],
target_values=target_values,
predict_threshold=predict_threshold,
+ target_index=target_index,
h2o_model=True,
)
else:
@@ -275,6 +282,7 @@ def score(var1, var2, var3, var4):
predict_method[1],
target_values=target_values,
predict_threshold=predict_threshold,
+ target_index=target_index,
)
if missing_values:
@@ -1082,6 +1090,7 @@ def _predictions_to_metrics(
target_values: Optional[List[str]] = None,
predict_threshold: Optional[float] = None,
h2o_model: Optional[bool] = False,
+ target_index: Optional[int] = 1,
) -> None:
"""
Using the provided arguments, write in to the score code the method for handling
@@ -1106,6 +1115,11 @@ def _predictions_to_metrics(
h2o_model : bool, optional
Flag to indicate that the model is an H2O.ai model. The default value is
False.
+ target_index : int, optional
+ Sets the index of success for a binary model. If target_values are given, this
+ index should match the index of the target outcome in target_values. If target_values
+ are not given, this index should indicate whether the the target probability variable
+ is the first or second variable returned by the model. The default value is 1.
"""
if len(metrics) == 1 and isinstance(metrics, list):
# Flatten single valued list
@@ -1122,7 +1136,12 @@ def _predictions_to_metrics(
# Binary classification model
elif len(target_values) == 2:
cls._binary_target(
- metrics, target_values, predict_returns, predict_threshold, h2o_model
+ metrics,
+ target_values,
+ predict_returns,
+ predict_threshold,
+ target_index,
+ h2o_model,
)
# Multiclass classification model
elif len(target_values) > 2:
@@ -1173,27 +1192,27 @@ def _no_targets_no_thresholds(
)
"""
if input_array.shape[0] == 1:
- Classification = prediction[0]
+ Classification = prediction[1][0]
return Classification
else:
- output_table = pd.DataFrame({'Classification': prediction})
+ output_table = prediction.drop(prediction.columns[1:], axis=1)
+ output_table.columns = ['Classification']
return output_table
"""
else:
cls.score_code += (
f"{'':4}if input_array.shape[0] == 1:\n"
- f"{'':8}{metrics} = prediction[0]\n{'':8}return {metrics}\n"
+ f"{'':8}{metrics} = prediction[0][0]\n{'':8}return {metrics}\n"
f"{'':4}else:\n"
f"{'':8}output_table = pd.DataFrame({{'{metrics}': prediction}})\n"
f"{'':8}return output_table"
)
"""
if input_array.shape[0] == 1:
- Classification = prediction[1][0]
+ Classification = prediction[0][0]
return Classification
else:
- output_table = prediction.drop(prediction.columns[1:], axis=1)
- output_table.columns = ['Classification']
+ output_table = pd.DataFrame({'Classification': prediction})
return output_table
"""
else:
@@ -1228,7 +1247,7 @@ def _no_targets_no_thresholds(
else:
cls.score_code += f"{'':4}if input_array.shape[0] == 1:\n"
for i in range(len(metrics)):
- cls.score_code += f"{'':8}{metrics[i]} = prediction[{i}]\n"
+ cls.score_code += f"{'':8}{metrics[i]} = prediction[0][{i}]\n"
cls.score_code += f"\n{'':8}return {', '.join(metrics)}\n"
cls.score_code += (
f"{'':4}else:\n"
@@ -1237,10 +1256,10 @@ def _no_targets_no_thresholds(
)
"""
if input_array.shape[0] == 1:
- Classification = prediction[0]
- Proba_A = prediction[1]
- Proba_B = prediction[2]
- Proba_C = prediction[3]
+ Classification = prediction[0][0]
+ Proba_A = prediction[0][1]
+ Proba_B = prediction[0][2]
+ Proba_C = prediction[0][3]
return Classification, Proba_A, Proba_B, Proba_C
else:
@@ -1256,6 +1275,7 @@ def _binary_target(
target_values: List[str],
returns: List[Any],
threshold: Optional[float] = None,
+ target_index: Optional[int] = 1,
h2o_model: Optional[bool] = None,
) -> None:
"""
@@ -1276,6 +1296,9 @@ def _binary_target(
h2o_model : bool, optional
Flag to indicate that the model is an H2O.ai model. The default value is
False.
+ target_index : int, optional
+ Sets the index of the probability value to be returned from a binary model. The
+ default value is two for h2o models, and one otherwise.
"""
if not threshold:
# Set default threshold
@@ -1299,14 +1322,13 @@ def _binary_target(
if h2o_model:
cls.score_code += (
f"{'':4}if input_array.shape[0] == 1:\n"
- f"{'':8}if prediction[1][2] > {threshold}:\n"
- f"{'':12}{metrics} = \"{target_values[0]}\"\n"
+ f"{'':8}if prediction[1][{target_index+1}] > {threshold}:\n"
+ f"{'':12}{metrics} = \"{target_values[target_index]}\"\n"
f"{'':8}else:\n"
- f"{'':12}{metrics} = \"{target_values[1]}\"\n"
+ f"{'':12}{metrics} = \"{target_values[abs(target_index-1)]}\"\n"
f"{'':8}return {metrics}\n"
f"{'':4}else:\n"
- f"{'':8}target_values = {target_values}\n"
- f"{'':8}output_table = pd.DataFrame({{'{metrics}': np.array(target_values)[np.argmax(prediction.iloc[0:, 1:].values, axis=1)]}})\n"
+ f"{'':8}output_table = pd.DataFrame({{'{metrics}': np.where(prediction[prediction.columns[{target_index+1}]] > {threshold}, '{target_values[target_index]}', '{target_values[abs(target_index-1)]}')}})\n"
f"{'':8}return output_table"
)
"""
@@ -1317,21 +1339,20 @@ def _binary_target(
Classification = "B"
return Classification
else:
- target_values = ['A', 'B']
- output_table = pd.DataFrame({'Classification': np.array(target_values)[np.argmax(prediction.iloc[0:, 1:].values, axis=1)]})
+ output_table = pd.DataFrame({'Classification': np.where(prediction[prediction.columns[2]] > .5, 'B', 'A')})
return output_table
"""
# One return that is the classification
elif len(returns) == 1 and returns[0]:
cls.score_code += (
f"{'':4}if input_array.shape[0] == 1:\n"
- f"{'':8}return prediction\n"
+ f"{'':8}return prediction[0]\n"
f"{'':4}else:\n"
f"{'':8}return pd.DataFrame({{'{metrics}': prediction}})"
)
"""
if input_array.shape[0] == 1:
- return prediction
+ return prediction[0]
else:
return pd.DataFrame({'Classification': prediction})
"""
@@ -1339,45 +1360,46 @@ def _binary_target(
elif len(returns) == 1 and not returns[0]:
cls.score_code += (
f"{'':4}if input_array.shape[0] == 1:\n"
- f"{'':8}if prediction > {threshold}:\n"
- f"{'':12}{metrics} = \"{target_values[0]}\"\n"
+ f"{'':8}if prediction[0] > {threshold}:\n"
+ f"{'':12}{metrics} = \"{target_values[target_index]}\"\n"
f"{'':8}else:\n"
- f"{'':12}{metrics} = \"{target_values[1]}\"\n"
+ f"{'':12}{metrics} = \"{target_values[abs(target_index-1)]}\"\n"
f"{'':8}return {metrics}\n"
f"{'':4}else:\n"
- f"{'':8}return pd.DataFrame({{'{metrics}': ['{target_values[0]}' if p > {threshold} else '{target_values[1]}' for p in prediction]}})\n"
+ f"{'':8}return pd.DataFrame({{'{metrics}': ['{target_values[target_index]}' if p > {threshold} else '{target_values[abs(target_index-1)]}' for p in prediction]}})\n"
)
"""
if input_array.shape[0] == 1:
- if prediction > 0.5:
+ if prediction[0] > 0.5:
Classification = "A"
else:
Classification = "B"
return Classification
else:
- return pd.DataFrame({'Classification': ['A' if p > 0.5 else 'B' for p in prediction]})
+ return pd.DataFrame({'Classification': ['B' if p > 0.5 else 'A' for p in prediction]})
"""
# Two returns from the prediction method
elif len(returns) == 2 and sum(returns) == 0:
# Only probabilities returned; return classification for larger value
cls.score_code += (
f"{'':4}if input_array.shape[0] == 1:\n"
- f"{'':8}if prediction[0] > prediction[1]:\n"
- f"{'':12}{metrics} = \"{target_values[0]}\"\n"
+ f"{'':8}if prediction[0][{target_index}] > {threshold}:\n"
+ f"{'':12}{metrics} = \"{target_values[target_index]}\"\n"
f"{'':8}else:\n"
- f"{'':12}{metrics} = \"{target_values[1]}\"\n\n"
+ f"{'':12}{metrics} = \"{target_values[abs(target_index-1)]}\"\n\n"
f"{'':8}return {metrics}\n"
f"{'':4}else:\n"
f"{'':8}target_values = {target_values}\n"
- f"{'':8}output_table = pd.DataFrame({{'{metrics}' : np.array(target_values)[np.argmax(prediction, axis=1)]}})\n"
+ f"{'':8}prediction = pd.DataFrame(prediction)\n"
+ f"{'':8}output_table = pd.DataFrame({{'{metrics}': np.where(prediction[prediction.columns[{target_index}]] > {threshold}, '{target_values[target_index]}', '{target_values[abs(target_index-1)]}')}})\n"
f"{'':8}return output_table"
)
"""
if input_array.shape[0] == 1:
- if prediction[0] > prediction[1]:
- Classification = "A"
- else:
+ if prediction[0][0] > .5:
Classification = "B"
+ else:
+ Classification = "A"
return Classification
else:
@@ -1387,18 +1409,19 @@ def _binary_target(
"""
# Classification and probability returned; return classification value
elif len(returns) > 1 and sum(returns) == 1:
+ # TODO: Either figure out how to handle threshold or add warning
# Determine which return is the classification value
class_index = [i for i, x in enumerate(returns) if x][0]
cls.score_code += (
f"{'':4}if input_array.shape[0] == 1:\n"
- f"{'':8}{metrics} = prediction[{class_index}]\n{'':8}return {metrics}\n"
+ f"{'':8}{metrics} = prediction[0][{class_index}]\n{'':8}return {metrics}\n"
f"{'':4}else:\n"
f"{'':8}output_table = pd.DataFrame({{'{metrics}': [p[{class_index}] for p in prediction]}})\n"
f"{'':8}return output_table"
)
"""
if input_array.shape[0] == 1:
- Classification = prediction[1]
+ Classification = prediction[0][1]
return Classification
else:
output_table = pd.DataFrame({'Classification': [p[1] for p in prediction]})
@@ -1407,7 +1430,6 @@ def _binary_target(
else:
cls._invalid_predict_config()
elif len(metrics) == 2:
- # TODO: change to align with other cases and assign target_values to classification column
# H2O models with two metrics are assumed to be classification + probability
if h2o_model:
warn(
@@ -1417,19 +1439,30 @@ def _binary_target(
)
cls.score_code += (
f"{'':4}if input_array.shape[0] == 1:\n"
- f"{'':8}return prediction[1][0], float(prediction[1][2])\n"
+ f"{'':8}if prediction[1][{target_index+1}] > {threshold}:\n"
+ f"{'':12}{metrics[0]} = '{target_values[target_index]}'\n"
+ f"{'':8}else:\n"
+ f"{'':12}{metrics[0]} = '{target_values[abs(target_index-1)]}'\n"
+ f"{'':8}return {metrics[0]}, float(prediction[1][{target_index+1}])\n"
f"{'':4}else:\n"
- f"{'':8}output_table = prediction.drop(prediction.columns[1], axis=1)\n"
+ f"{'':8}output_table = prediction.drop(prediction.columns[{abs(target_index-1)+1}], axis=1)\n"
+ f"{'':8}classifications = np.where(prediction[prediction.columns[{target_index+1}]] > {threshold}, '{target_values[target_index]}', '{target_values[abs(target_index-1)]}')\n"
f"{'':8}output_table.columns = {metrics}\n"
+ f"{'':8}output_table['{metrics[0]}'] = classifications\n"
f"{'':8}return output_table"
)
"""
if input_array.shape[0] == 1:
- return prediction[1][0], float(prediction[1][2])
+ if prediction[1][1] > 0.5:
+ Classification = '1'
+ else:
+ Classification = '0'
+ return EM_CLASSIFICATION, float(prediction[1][1])
else:
- output_table = prediction.drop(prediction.columns[1], axis=1)
- output_table.columns = ['Classification', 'Probability']
- return output_table
+ output_table = prediction.drop(prediction.columns[2], axis=1)
+ classifications = np.where(prediction[prediction.columns[1]] > 0.5, '0', '1')
+ output_table.columns = ['EM_CLASSIFICATION', 'EM_EVENTPROBABILITY']
+ output_table['EM_CLASSIFICATION'] = classifications
"""
# Calculate the classification; return the classification and probability
elif sum(returns) == 0 and len(returns) == 1:
@@ -1440,25 +1473,25 @@ def _binary_target(
)
cls.score_code += (
f"{'':4}if input_array.shape[0] == 1:\n"
- f"{'':8}if prediction > {threshold}:\n"
- f"{'':12}{metrics[0]} = \"{target_values[0]}\"\n"
+ f"{'':8}if prediction[0] > {threshold}:\n"
+ f"{'':12}{metrics[0]} = \"{target_values[target_index]}\"\n"
f"{'':8}else:\n"
- f"{'':12}{metrics[0]} = \"{target_values[1]}\"\n\n"
- f"{'':8}return {metrics[0]}, prediction\n"
+ f"{'':12}{metrics[0]} = \"{target_values[abs(target_index-1)]}\"\n\n"
+ f"{'':8}return {metrics[0]}, prediction[0]\n"
f"{'':4}else:\n"
- f"{'':8}classifications = ['{target_values[0]}' if p > {threshold} else '{target_values[1]}' for p in prediction]\n"
+ f"{'':8}classifications = ['{target_values[target_index]}' if p > {threshold} else '{target_values[abs(target_index-1)]}' for p in prediction]\n"
f"{'':8}return pd.DataFrame({{'{metrics[0]}': classifications, '{metrics[1]}': prediction}})"
)
"""
if input_array.shape[0] == 1:
- if prediction > 0.5:
- Classification = "A"
- else:
+ if prediction[0] > 0.5:
Classification = "B"
+ else:
+ Classification = "A"
- return Classification, prediction
+ return Classification, prediction[0]
else:
- classifications = ['A' if p > 0.5 else 'B' for p in prediction]
+ classifications = ['B' if p > 0.5 else 'A' for p in prediction]
return pd.DataFrame({'Classification': classifications, 'Probability': prediction})
"""
# Calculate the classification; return the classification and probability
@@ -1470,41 +1503,42 @@ def _binary_target(
)
cls.score_code += (
f"{'':4}if input_array.shape[0] == 1:\n"
- f"{'':8}if prediction[0] > prediction[1]:\n"
- f"{'':12}{metrics[0]} = \"{target_values[0]}\"\n"
+ f"{'':8}if prediction[0][{target_index}] > {threshold}:\n"
+ f"{'':12}{metrics[0]} = \"{target_values[target_index]}\"\n"
f"{'':8}else:\n"
- f"{'':12}{metrics[0]} = \"{target_values[1]}\"\n"
- f"{'':8}return {metrics[0]}, prediction[0]\n"
+ f"{'':12}{metrics[0]} = \"{target_values[abs(target_index-1)]}\"\n"
+ f"{'':8}return {metrics[0]}, prediction[0][{target_index}]\n"
f"{'':4}else:\n"
f"{'':8}df = pd.DataFrame(prediction)\n"
- f"{'':8}proba = df[0]\n"
- f"{'':8}classifications = np.where(df[0] > df[1], '{target_values[0]}', '{target_values[1]}')\n"
+ f"{'':8}proba = df[{target_index}]\n"
+ f"{'':8}classifications = np.where(df[{target_index}] > {threshold}, '{target_values[target_index]}', '{target_values[abs(target_index-1)]}')\n"
f"{'':8}return pd.DataFrame({{'{metrics[0]}': classifications, '{metrics[1]}': proba}})"
)
"""
if input_array.shape[0] == 1:
- if prediction[0] > prediction[1]:
- Classification = "A"
- else:
+ if prediction[0][1] > .5:
Classification = "B"
- return Classification, prediction[0]
+ else:
+ Classification = "A"
+ return Classification, prediction[0][1]
else:
df = pd.DataFrame(prediction)
proba = df[0]
- classifications = np.where(df[0] > df[1], 'A', 'B')
+ classifications = np.where(df[1] > .5, 'B', 'A')
return pd.DataFrame({'Classification': classifications, 'Probability': proba})
"""
+ # TODO: Potentially add threshold
# Return classification and probability value
elif sum(returns) == 1 and len(returns) == 2:
cls.score_code += (
f"{'':4}if input_array.shape[0] == 1:\n"
- f"{'':8}return prediction[0], prediction[1]\n"
+ f"{'':8}return prediction[0][0], prediction[0][1]\n"
f"{'':4}else:\n"
f"{'':8}return pd.DataFrame(prediction, columns={metrics})"
)
"""
if input_array.shape[0] == 1:
- return prediction[0], prediction[1]
+ return prediction[0][0], prediction[0][1]
else:
return pd.DataFrame(prediction, columns=['Classification', 'Probability'])
"""
@@ -1517,38 +1551,42 @@ def _binary_target(
# Determine which return is the classification value
class_index = [i for i, x in enumerate(returns) if x][0]
if class_index == 0:
- metric_list = '"' + '","'.join(metrics) + '","drop"'
cls.score_code += (
f"{'':4}if input_array.shape[0] == 1:\n"
- f"{'':8}return prediction[0], prediction[1]\n"
+ f"{'':8}return prediction[0][0], prediction[0][{target_index+1}]\n"
f"{'':4}else:\n"
- f"{'':8}output_table = pd.DataFrame(prediction, columns=[{metric_list}])\n"
- f"{'':8}return output_table.drop('drop', axis=1)"
+ f"{'':8}prediction = pd.DataFrame(prediction)\n"
+ f"{'':8}output_table = prediction.drop(prediction.columns[{abs(target_index-1)+1}], axis=1)\n"
+ f"{'':8}output_table.columns = {metrics}\n"
+ f"{'':8}return output_table"
)
"""
if input_array.shape[0] == 1:
- return prediction[0], prediction[1]
+ return prediction[0][0], prediction[0][2]
else:
- output_table = pd.DataFrame(prediction, columns=["Classification","Probability","drop"])
- return output_table.drop('drop', axis=1)
+ output_table = prediction.drop(prediction.columns[1], axis=1)
+ output_table.columns = ["Classification", "Probability"]
+ return output_table
"""
else:
- metric_list = '"' + '","drop","'.join(metrics[::-1]) + '"'
cls.score_code += (
f"{'':4}if input_array.shape[0] == 1:\n"
- f"{'':8}return prediction[{class_index}], prediction[0]\n"
+ f"{'':8}return prediction[0][{class_index}], prediction[0][{target_index}]\n"
f"{'':4}else:\n"
- f"{'':8}output_table = pd.DataFrame(prediction, columns=[{metric_list}])\n"
+ f"{'':8}prediction = pd.DataFrame(prediction)\n"
+ f"{'':8}output_table = prediction.drop(prediction.columns[{abs(target_index-1)}], axis=1)\n"
f"{'':8}output_table = output_table[output_table.columns[::-1]]\n"
- f"{'':8}return output_table.drop('drop', axis=1)"
+ f"{'':8}output_table.columns = {metrics}\n"
+ f"{'':8}return output_table"
)
"""
if input_array.shape[0] == 1:
- return prediction[2], prediction[0]
+ return prediction[0][2], prediction[0][0]
else:
- output_table = pd.DataFrame(prediction, columns=["Probability","drop","Classification"])
+ output_table = prediction.drop(prediction.columns[0], axis=1)
output_table = output_table[output_table.columns[::-1]]
+ output_table.columns = ["Classification", "Probability"]
return output_table.drop('drop', axis=1)
"""
else:
@@ -1578,59 +1616,63 @@ def _binary_target(
)
cls.score_code += (
f"{'':4}if input_array.shape[0] == 1:\n"
- f"{'':8}if prediction > {threshold}:\n"
- f"{'':12}{metrics[0]} = \"{target_values[0]}\"\n"
+ f"{'':8}if prediction[0] > {threshold}:\n"
+ f"{'':12}{metrics[0]} = \"{target_values[target_index]}\"\n"
f"{'':8}else:\n"
- f"{'':12}{metrics[0]} = \"{target_values[1]}\"\n"
- f"{'':8}return {metrics[0]}, prediction, 1 - prediction\n"
+ f"{'':12}{metrics[0]} = \"{target_values[abs(target_index-1)]}\"\n"
+ f"{'':8}return {metrics[0]}, prediction[0], 1 - prediction[0]\n"
f"{'':4}else:\n"
- f"{'':8}classifications = ['{target_values[0]}' if p > {threshold} else '{target_values[1]}' for p in prediction]\n"
+ f"{'':8}classifications = ['{target_values[target_index]}' if p > {threshold} else '{target_values[abs(target_index-1)]}' for p in prediction]\n"
f"{'':8}output_table = pd.DataFrame({{'{metrics[0]}': classifications, '{metrics[1]}': prediction}})\n"
f"{'':8}output_table['{metrics[2]}'] = 1 - output_table['{metrics[1]}']\n"
f"{'':8}return output_table"
)
"""
if input_array.shape[0] == 1:
- if prediction > 0.5:
- Classification = "A"
- else:
+ if prediction[0] > 0.5:
Classification = "B"
- return Classification, prediction, 1 - prediction
+ else:
+ Classification = "A"
+ return Classification, prediction[0], 1 - prediction[0]
else:
- classifications = ['A' if p > 0.5 else 'B' for p in prediction]
+ classifications = ['B' if p > 0.5 else 'A' for p in prediction]
output_table = pd.DataFrame({'Classification': classifications, 'Proba_0': prediction})
output_table['Proba_1'] = 1 - output_table['Proba_0']
return output_table
"""
elif sum(returns) == 0 and len(returns) == 2:
+ # TODO: Make decision on whether ordering should follow given pattern or reflect input ordering
warn(
"Due to the ambiguity of the provided metrics and prediction return"
" types, the score code assumes the return order to be: "
- "[classification, probability of event, probability of no event]."
+ "[classification, probability of event, probability of no event] "
+ "for a single return. For batch scoring, the return order of the "
+ "probabilities will mirror their return order in the model."
)
- metric_list = '"' + '","'.join(metrics[1:]) + '"'
cls.score_code += (
f"{'':4}if input_array.shape[0] == 1:\n"
- f"{'':8}if prediction[0] > prediction[1]:\n"
- f"{'':12}{metrics[0]} = \"{target_values[0]}\"\n"
+ f"{'':8}if prediction[0][{target_index}] > {threshold}:\n"
+ f"{'':12}{metrics[0]} = \"{target_values[target_index]}\"\n"
f"{'':8}else:\n"
- f"{'':12}{metrics[0]} = \"{target_values[1]}\"\n"
- f"{'':8}return {metrics[0]}, prediction[0], prediction[1]\n"
+ f"{'':12}{metrics[0]} = \"{target_values[abs(target_index-1)]}\"\n"
+ f"{'':8}return {metrics[0]}, prediction[0][{target_index}], prediction[0][{abs(target_index-1)}]\n"
f"{'':4}else:\n"
- f"{'':8}output_table = pd.DataFrame(prediction, columns=[{metric_list}])\n"
- f"{'':8}output_table.insert(0, '{metrics[0]}', np.array({target_values})[np.argmax(output_table.values, axis=1)])\n"
+ f"{'':8}output_table = pd.DataFrame(prediction, columns={metrics[1:]})\n"
+ f"{'':8}classifications = np.where(output_table[output_table.columns[{target_index}]] > {threshold}, '{target_values[target_index]}', '{target_values[abs(target_index-1)]}')\n"
+ f"{'':8}output_table.insert(loc=0, column='{metrics[0]}', value=classifications)\n"
f"{'':8}return output_table"
)
"""
if input_array.shape[0] == 1:
- if prediction[0] > prediction[1]:
+ if prediction[0][0] > prediction[0][1]:
Classification = "A"
else:
Classification = "B"
- return Classification, prediction[0], prediction[1]
+ return Classification, prediction[0][0], prediction[0][1]
else:
output_table = pd.DataFrame(prediction, columns=["Proba_0","Proba_1"])
- output_table.insert(0, 'Classification', np.array(['A', 'B'])[np.argmax(output_table.values, axis=1)])
+ classifications = np.where(prediction[prediction.columns[2]] > .5, 'B', 'A')
+ output_table.insert(loc=0, column='Classification', value=classifications)
return output_table
"""
# Find which return is the classification, then return probabilities
@@ -1638,37 +1680,35 @@ def _binary_target(
# Determine which return is the classification value
class_index = [i for i, x in enumerate(returns) if x][0]
if class_index == 0:
- metric_list = '"' + '","'.join(metrics[:2]) + '"'
cls.score_code += (
f"{'':4}if input_array.shape[0] == 1:\n"
- f"{'':8}return prediction[0], prediction[1], 1 - prediction[1]\n"
+ f"{'':8}return prediction[0][0], prediction[0][1], 1 - prediction[0][1]\n"
f"{'':4}else:\n"
- f"{'':8}output_table = pd.DataFrame(prediction, columns=[{metric_list}])\n"
+ f"{'':8}output_table = pd.DataFrame(prediction, columns={metrics[:2]})\n"
f"{'':8}output_table['{metrics[2]}'] = 1 - output_table['{metrics[1]}']\n"
f"{'':8}return output_table"
)
"""
if input_array.shape[0] == 1:
- return prediction[0], prediction[1], 1 - prediction[1]
+ return prediction[0][0], prediction[0][1], 1 - prediction[0][1]
else:
output_table = pd.DataFrame(prediction, columns=["Classification","Proba_0"])
output_table['Proba_1'] = 1 - output_table['Proba_0']
return output_table
"""
else:
- metric_list = '"' + '","'.join(metrics[1::-1]) + '"'
cls.score_code += (
f"{'':4}if input_array.shape[0] == 1:\n"
- f"{'':8}return prediction[1], prediction[0], 1 - prediction[0]\n"
+ f"{'':8}return prediction[0][1], prediction[0][0], 1 - prediction[0][0]\n"
f"{'':4}else:\n"
- f"{'':8}output_table = pd.DataFrame(prediction, columns=[{metric_list}])\n"
+ f"{'':8}output_table = pd.DataFrame(prediction, columns={metrics[1::-1]})\n"
f"{'':8}output_table = output_table[output_table.columns[::-1]]\n"
f"{'':8}output_table['{metrics[2]}'] = 1 - output_table['{metrics[1]}']\n"
f"{'':8}return output_table"
)
"""
if input_array.shape[0] == 1:
- return prediction[1], prediction[0], 1 - prediction[0]
+ return prediction[0][1], prediction[0][0], 1 - prediction[0][0]
else:
output_table = pd.DataFrame(prediction, columns=["Proba_0","Classification"])
output_table = output_table[output_table.columns[::-1]]
@@ -1679,13 +1719,13 @@ def _binary_target(
elif sum(returns) == 1 and len(returns) == 3:
cls.score_code += (
f"{'':4}if input_array.shape[0] == 1:\n"
- f"{'':8}return prediction[0], prediction[1], prediction[2]\n"
+ f"{'':8}return prediction[0][0], prediction[0][1], prediction[0][2]\n"
f"{'':4}else:\n"
f"{'':8}return pd.DataFrame(prediction, columns={metrics})"
)
"""
if input_array.shape[0] == 1:
- return prediction[0], prediction[1], prediction[2]
+ return prediction[0][0], prediction[0][1], prediction[0][2]
else:
return pd.DataFrame(prediction, columns=['Classification', 'Proba_0', 'Proba_1'])
"""
@@ -1752,13 +1792,13 @@ def _nonbinary_targets(
elif len(returns) == 1:
cls.score_code += (
f"{'':4}if input_array.shape[0] == 1:\n"
- f"{'':8}return prediction\n"
+ f"{'':8}return prediction[0][0]\n"
f"{'':4}else:\n"
f"{'':8}return pd.DataFrame({{'{metrics}': prediction}})"
)
"""
if input_array.shape[0] == 1:
- return prediction
+ return prediction[0]
else:
return pd.DataFrame({'Classification': prediction})
"""
@@ -1766,7 +1806,7 @@ def _nonbinary_targets(
cls.score_code += (
f"{'':4}if input_array.shape[0] == 1:\n"
f"{'':8}target_values = {target_values}\n"
- f"{'':8}return target_values[prediction.index(max(prediction))]\n"
+ f"{'':8}return target_values[prediction[0].index(max(prediction[0]))]\n"
f"{'':4}else:\n"
f"{'':8}output_table = pd.DataFrame({{'{metrics}' : np.array({target_values})[np.argmax(prediction, axis=1)]}})\n"
f"{'':8}return output_table"
@@ -1774,7 +1814,7 @@ def _nonbinary_targets(
"""
if input_array.shape[0] == 1:
target_values = ['A', 'B', 'C']
- return target_values[prediction.index(max(prediction))]
+ return target_values[prediction[0].index(max(prediction[0]))]
else:
output_table = pd.DataFrame({'Classification' : np.array(['A', 'B', 'C'])[np.argmax(prediction, axis=1)]})
return output_table
@@ -1784,13 +1824,13 @@ def _nonbinary_targets(
class_index = [i for i, x in enumerate(returns) if x][0]
cls.score_code += (
f"{'':4}if input_array.shape[0] == 1:\n"
- f"{'':8}return prediction[{class_index}]\n"
+ f"{'':8}return prediction[0][{class_index}]\n"
f"{'':4}else:\n"
f"{'':8}return pd.DataFrame({{'{metrics}': [p[{class_index}] for p in prediction]}})"
)
"""
if input_array.shape[0] == 1:
- return prediction[0]
+ return prediction[0][0]
else:
return pd.DataFrame({'Classification': [p[0] for p in prediction]})
"""
@@ -1821,8 +1861,8 @@ def _nonbinary_targets(
cls.score_code += (
f"{'':4}target_values = {target_values}\n"
f"{'':4}if input_array.shape[0] == 1:\n"
- f"{'':8}return target_values[prediction.index(max(prediction))], "
- f"max(prediction)\n"
+ f"{'':8}return target_values[prediction[0].index(max(prediction[0]))], "
+ f"max(prediction[0])\n"
f"{'':4}else:\n"
f"{'':8}df = pd.DataFrame(prediction)\n"
f"{'':8}index = np.argmax(df.values, axis=1)\n"
@@ -1833,7 +1873,7 @@ def _nonbinary_targets(
"""
target_values = ['A', 'B', 'C']
if input_array.shape[0] == 1:
- return target_values[prediction.index(max(prediction))], max(prediction)
+ return target_values[prediction[0].index(max(prediction[0]))], max(prediction[0])
else:
df = pd.DataFrame(prediction)
index = np.argmax(df.values, axis=1)
@@ -1846,8 +1886,8 @@ def _nonbinary_targets(
class_index = [i for i, x in enumerate(returns) if x][0]
cls.score_code += (
f"{'':4}if input_array.shape[0] == 1:\n"
- f"{'':8}return prediction[{class_index}], "
- f"max(prediction[:{class_index}] + prediction[{class_index + 1}:])\n"
+ f"{'':8}return prediction[0][{class_index}], "
+ f"max(prediction[0][:{class_index}] + prediction[0][{class_index + 1}:])\n"
f"{'':4}else:\n"
f"{'':8}df = pd.DataFrame(prediction)\n"
f"{'':8}probas = df.drop({class_index}, axis=1)\n"
@@ -1856,7 +1896,7 @@ def _nonbinary_targets(
)
"""
if input_array.shape[0] == 1:
- return prediction[0], max(prediction[:0] + prediction[1:])
+ return prediction[0][0], max(prediction[0][:0] + prediction[0][1:])
else:
df = pd.DataFrame(prediction)
probas = df.drop(0, axis=1)
@@ -1907,7 +1947,7 @@ def _nonbinary_targets(
len(metrics) == (len(target_values) + 1) == len(returns)
and sum(returns) == 1
):
- proba_returns = [f"prediction[{i}]" for i in range(len(returns))]
+ proba_returns = [f"prediction[0][{i}]" for i in range(len(returns))]
cls.score_code += (
f"{'':4}if input_array.shape[0] == 1:\n"
f"{'':8}return {', '.join(proba_returns)}\n"
@@ -1917,7 +1957,7 @@ def _nonbinary_targets(
)
"""
if input_array.shape[0] == 1:
- return prediction[0], prediction[1], prediction[2]
+ return prediction[0][0], prediction[0][1], prediction[0][2]
else:
output_table = pd.DataFrame(prediction, columns=['Proba_0', 'Proba_1', 'Proba_2'])
return output_table
@@ -1925,11 +1965,11 @@ def _nonbinary_targets(
elif (len(metrics) - 1) == len(returns) == len(target_values) and sum(
returns
) == 0:
- proba_returns = [f"prediction[{i}]" for i in range(len(returns))]
+ proba_returns = [f"prediction[0][{i}]" for i in range(len(returns))]
cls.score_code += (
f"{'':4}target_values = {target_values}\n\n"
f"{'':4}if input_array.shape[0] == 1:\n"
- f"{'':8}return target_values[prediction.index(max(prediction))], "
+ f"{'':8}return target_values[prediction[0].index(max(prediction[0]))], "
f"{', '.join(proba_returns)}\n"
f"{'':4}else:\n"
f"{'':8}output_table = pd.DataFrame(prediction, columns={metrics[1:]})\n"
@@ -1941,7 +1981,7 @@ def _nonbinary_targets(
target_values = ['A', 'B', 'C']
if input_array.shape[0] == 1:
- return target_values[prediction.index(max(prediction))], prediction[0], prediction[1], prediction[2]
+ return target_values[prediction[0].index(max(prediction[0]))], prediction[0][0], prediction[0][1], prediction[0][2]
else:
output_table = pd.DataFrame(prediction, columns=['Proba_0', 'Proba_1', 'Proba_2'])
classifications = np.array(target_values)[np.argmax(output_table.values, axis=1)]
diff --git a/tests/unit/test_write_score_code.py b/tests/unit/test_write_score_code.py
index f94a122c..c9f35715 100644
--- a/tests/unit/test_write_score_code.py
+++ b/tests/unit/test_write_score_code.py
@@ -331,7 +331,7 @@ def test_single_metric(self):
self.sc._no_targets_no_thresholds(metrics, returns)
# Single row
input_array = pd.DataFrame([[1]], columns=["A"], index=[0])
- prediction = [0.5]
+ prediction = [[0.5]]
self.assertEqual(self.execute_snippet(input_array, prediction), 0.5)
# Multi row
input_array = pd.DataFrame({"A": [0.9, 1, 1.1]})
@@ -369,7 +369,7 @@ def test_multi_metric(self):
self.sc._no_targets_no_thresholds(metrics, returns)
# Single row
input_array = pd.DataFrame([[1]], columns=["A"], index=[0])
- prediction = ["i", 0.3, 0.4, 0.5]
+ prediction = [["i", 0.3, 0.4, 0.5]]
self.assertEqual(
self.execute_snippet(input_array, prediction), ("i", 0.3, 0.4, 0.5)
)
@@ -449,7 +449,7 @@ def test_one_metric_h2o(self):
# Single row
input_array = pd.DataFrame([[1]], columns=["A"], index=[0])
prediction = [[], [1, 2, 3]]
- self.assertEqual(self.execute_snippet(input_array, prediction), "A")
+ self.assertEqual(self.execute_snippet(input_array, prediction), "B")
# Multi row
input_array = pd.DataFrame({"A": [0, 1]})
prediction = pd.DataFrame(
@@ -469,7 +469,7 @@ def test_one_metric_one_return_classification(self):
self.sc._binary_target(metrics, self.target_values, returns)
# Single row
input_array = pd.DataFrame([[1]], columns=["A"], index=[0])
- prediction = 0.5
+ prediction = [0.5]
self.assertEqual(self.execute_snippet(input_array, prediction), 0.5)
# Multi row
input_array = pd.DataFrame({"A": [0.9, 1, 1.1]})
@@ -488,14 +488,14 @@ def test_one_metric_one_return_probability(self):
self.sc._binary_target(metrics, self.target_values, returns)
# Single row
input_array = pd.DataFrame([[1]], columns=["A"], index=[0])
- prediction = 1
- self.assertEqual(self.execute_snippet(input_array, prediction), "A")
+ prediction = [1]
+ self.assertEqual(self.execute_snippet(input_array, prediction), "B")
# Multi row
input_array = pd.DataFrame({"A": [1, 0, 1]})
prediction = [0, 1, 0]
pd.testing.assert_frame_equal(
self.execute_snippet(input_array, prediction),
- pd.DataFrame({metrics: ["B", "A", "B"]}),
+ pd.DataFrame({metrics: ["A", "B", "A"]}),
)
def test_one_metric_two_returns(self):
@@ -509,7 +509,7 @@ def test_one_metric_two_returns(self):
self.sc._binary_target(metrics, self.target_values, returns)
# Single row
input_array = pd.DataFrame([[1]], columns=["A"], index=[0])
- prediction = [1, 0]
+ prediction = [[1, 0]]
self.assertEqual(self.execute_snippet(input_array, prediction), "A")
# Multi row
input_array = pd.DataFrame({"A": [1, 0, 1]})
@@ -528,7 +528,7 @@ def test_one_metric_three_returns(self):
self.sc._binary_target(metrics, self.target_values, returns)
# Single row
input_array = pd.DataFrame([[1]], columns=["A"], index=[0])
- prediction = [0, "Y", "z"]
+ prediction = [[0, "Y", "z"]]
self.assertEqual(self.execute_snippet(input_array, prediction), "Y")
# Multi row
input_array = pd.DataFrame({"A": [1, 0, 1]})
@@ -542,13 +542,15 @@ def test_two_metrics_h2o(self):
metrics = ["Classification", "Probability"]
returns = ["", int, int]
self.sc.score_code += (
- "import pandas as pd\n" "def test_snippet(input_array, prediction):\n"
+ "import pandas as pd\n"
+ "import numpy as np\n"
+ "def test_snippet(input_array, prediction):\n"
)
self.sc._binary_target(metrics, self.target_values, returns, h2o_model=True)
# Single row
input_array = pd.DataFrame([[1]], columns=["A"], index=[0])
prediction = [[], ["a", -1, 1]]
- self.assertEqual(self.execute_snippet(input_array, prediction), ("a", 1))
+ self.assertEqual(self.execute_snippet(input_array, prediction), ("B", 1.0))
# Multi row
input_array = pd.DataFrame({"A": [0, 1]})
prediction = pd.DataFrame(
@@ -556,7 +558,7 @@ def test_two_metrics_h2o(self):
)
pd.testing.assert_frame_equal(
self.execute_snippet(input_array, prediction),
- pd.DataFrame({"Classification": [0, 1], "Probability": [0.1, 0.8]}),
+ pd.DataFrame({"Classification": ["A", "B"], "Probability": [0.1, 0.8]}),
)
def test_two_metrics_one_return(self):
@@ -568,14 +570,14 @@ def test_two_metrics_one_return(self):
self.sc._binary_target(metrics, self.target_values, returns)
# Single row
input_array = pd.DataFrame([[1]], columns=["A"], index=[0])
- prediction = 0.2
- self.assertEqual(self.execute_snippet(input_array, prediction), ("B", 0.2))
+ prediction = [0.2]
+ self.assertEqual(self.execute_snippet(input_array, prediction), ("A", 0.2))
# Multi row
input_array = pd.DataFrame({"A": [1, 0, 1]})
prediction = [1, -1]
pd.testing.assert_frame_equal(
self.execute_snippet(input_array, prediction),
- pd.DataFrame({"Classification": ["A", "B"], "Probability": [1, -1]}),
+ pd.DataFrame({"Classification": ["B", "A"], "Probability": [1, -1]}),
)
def test_two_metrics_two_returns_no_classification(self):
@@ -589,14 +591,14 @@ def test_two_metrics_two_returns_no_classification(self):
self.sc._binary_target(metrics, self.target_values, returns)
# Single row
input_array = pd.DataFrame([[1]], columns=["A"], index=[0])
- prediction = [0.2, 0.8]
- self.assertEqual(self.execute_snippet(input_array, prediction), ("B", 0.2))
+ prediction = [[0.2, 0.8]]
+ self.assertEqual(self.execute_snippet(input_array, prediction), ("B", 0.8))
# Multi row
input_array = pd.DataFrame({"A": [1, 0, 1]})
prediction = [[0.9, 0.1], [0.4, 0.6]]
pd.testing.assert_frame_equal(
self.execute_snippet(input_array, prediction),
- pd.DataFrame({"Classification": ["A", "B"], "Probability": [0.9, 0.4]}),
+ pd.DataFrame({"Classification": ["A", "B"], "Probability": [0.1, 0.6]}),
)
def test_two_metrics_two_returns_classification(self):
@@ -610,7 +612,7 @@ def test_two_metrics_two_returns_classification(self):
self.sc._binary_target(metrics, self.target_values, returns)
# Single row
input_array = pd.DataFrame([[1]], columns=["A"], index=[0])
- prediction = ["B", 0.2]
+ prediction = [["B", 0.2]]
self.assertEqual(self.execute_snippet(input_array, prediction), ("B", 0.2))
# Multi row
input_array = pd.DataFrame({"A": [1, 0, 1]})
@@ -631,14 +633,14 @@ def test_two_metrics_three_returns_class_first(self):
self.sc._binary_target(metrics, self.target_values, returns)
# Single row
input_array = pd.DataFrame([[1]], columns=["A"], index=[0])
- prediction = ["B", 0.2, 0.8]
- self.assertEqual(self.execute_snippet(input_array, prediction), ("B", 0.2))
+ prediction = [["B", 0.2, 0.8]]
+ self.assertEqual(self.execute_snippet(input_array, prediction), ("B", 0.8))
# Multi row
input_array = pd.DataFrame({"A": [1, 0, 1]})
prediction = [["A", 0.9, 0.1], ["B", 0.4, 0.6]]
pd.testing.assert_frame_equal(
self.execute_snippet(input_array, prediction),
- pd.DataFrame({"Classification": ["A", "B"], "Probability": [0.9, 0.4]}),
+ pd.DataFrame({"Classification": ["A", "B"], "Probability": [0.1, 0.6]}),
)
def test_two_metrics_three_returns_class_last(self):
@@ -652,14 +654,14 @@ def test_two_metrics_three_returns_class_last(self):
self.sc._binary_target(metrics, self.target_values, returns)
# Single row
input_array = pd.DataFrame([[1]], columns=["A"], index=[0])
- prediction = [0.2, 0.8, "B"]
- self.assertEqual(self.execute_snippet(input_array, prediction), ("B", 0.2))
+ prediction = [[0.2, 0.8, "B"]]
+ self.assertEqual(self.execute_snippet(input_array, prediction), ("B", 0.8))
# Multi row
input_array = pd.DataFrame({"A": [1, 0, 1]})
prediction = [[0.9, 0.1, "A"], [0.4, 0.6, "B"]]
pd.testing.assert_frame_equal(
self.execute_snippet(input_array, prediction),
- pd.DataFrame({"Classification": ["A", "B"], "Probability": [0.9, 0.4]}),
+ pd.DataFrame({"Classification": ["A", "B"], "Probability": [0.1, 0.6]}),
)
def test_three_metrics_h2o(self):
@@ -695,9 +697,9 @@ def test_three_metrics_one_return(self):
print(self.sc.score_code)
# Single row
input_array = pd.DataFrame([[1]], columns=["A"], index=[0])
- prediction = 0.9
+ prediction = [0.9]
self.assertEqual(
- self.execute_snippet(input_array, prediction), ("A", 0.9, 1 - 0.9)
+ self.execute_snippet(input_array, prediction), ("B", 0.9, 1 - 0.9)
)
# Multiple rows
input_array = pd.DataFrame({"A": [1, 0, 1]})
@@ -706,7 +708,7 @@ def test_three_metrics_one_return(self):
self.execute_snippet(input_array, prediction),
pd.DataFrame(
{
- "Classification": ["A", "B"],
+ "Classification": ["B", "A"],
"Proba_0": [0.9, 0.1],
"Proba_1": [1 - 0.9, 1 - 0.1],
}
@@ -724,8 +726,8 @@ def test_three_metrics_two_returns_no_class(self):
self.sc._binary_target(metrics, self.target_values, returns)
# Single row
input_array = pd.DataFrame([[1]], columns=["A"], index=[0])
- prediction = [0.9, 0.1]
- self.assertEqual(self.execute_snippet(input_array, prediction), ("A", 0.9, 0.1))
+ prediction = [[0.9, 0.1]]
+ self.assertEqual(self.execute_snippet(input_array, prediction), ("A", 0.1, 0.9))
# Multiple rows
input_array = pd.DataFrame({"A": [1, 0, 1]})
prediction = [[0.9, 0.1], [0.2, 0.8]]
@@ -751,7 +753,7 @@ def test_three_metrics_two_returns_class_first(self):
self.sc._binary_target(metrics, self.target_values, returns)
# Single row
input_array = pd.DataFrame([[1]], columns=["A"], index=[0])
- prediction = ["A", 0.9]
+ prediction = [["A", 0.9]]
self.assertEqual(
self.execute_snippet(input_array, prediction), ("A", 0.9, 1 - 0.9)
)
@@ -780,7 +782,7 @@ def test_three_metrics_two_returns_class_last(self):
self.sc._binary_target(metrics, self.target_values, returns)
# Single row
input_array = pd.DataFrame([[1]], columns=["A"], index=[0])
- prediction = [0.9, "A"]
+ prediction = [[0.9, "A"]]
self.assertEqual(
self.execute_snippet(input_array, prediction), ("A", 0.9, 1 - 0.9)
)
@@ -809,7 +811,7 @@ def test_three_metrics_three_returns(self):
self.sc._binary_target(metrics, self.target_values, returns)
# Single row
input_array = pd.DataFrame([[1]], columns=["A"], index=[0])
- prediction = ["A", 0.9, 0.1]
+ prediction = [["A", 0.9, 0.1]]
self.assertEqual(self.execute_snippet(input_array, prediction), ("A", 0.9, 0.1))
# Multiple rows
input_array = pd.DataFrame({"A": [1, 0, 1]})
@@ -869,7 +871,7 @@ def test_one_metric_one_return(self):
self.sc._nonbinary_targets(metrics, self.target_values, returns)
# Single row
input_array = pd.DataFrame([[1]], columns=["A"], index=[0])
- prediction = "C"
+ prediction = ["C"]
self.assertEqual(self.execute_snippet(input_array, prediction), "C")
# Multiple rows
input_array = pd.DataFrame({"A": [1, 0, 1]})
@@ -885,7 +887,7 @@ def test_one_metric_probability_returns(self):
self.sc._nonbinary_targets(metrics, self.target_values, returns)
# Single row
input_array = pd.DataFrame([[1]], columns=["A"], index=[0])
- prediction = [0.1, 0.2, 0.3]
+ prediction = [[0.1, 0.2, 0.3]]
self.assertEqual(self.execute_snippet(input_array, prediction), "C")
# Multiple rows
input_array = pd.DataFrame({"A": [1, 0, 1]})
@@ -901,7 +903,7 @@ def test_one_metric_classification_and_probability_returns(self):
self.sc._nonbinary_targets(metrics, self.target_values, returns)
# Single row
input_array = pd.DataFrame([[1]], columns=["A"], index=[0])
- prediction = ["C", 0.1, 0.2, 0.3]
+ prediction = [["C", 0.1, 0.2, 0.3]]
self.assertEqual(self.execute_snippet(input_array, prediction), "C")
# Multiple rows
input_array = pd.DataFrame({"A": [1, 0, 1]})
@@ -935,7 +937,7 @@ def test_two_metrics_return_probabilities(self):
self.sc._nonbinary_targets(metrics, self.target_values, returns)
# Single row
input_array = pd.DataFrame([[1]], columns=["A"], index=[0])
- prediction = [0.1, 0.2, 0.3]
+ prediction = [[0.1, 0.2, 0.3]]
self.assertEqual(self.execute_snippet(input_array, prediction), ("C", 0.3))
# Multiple rows
input_array = pd.DataFrame({"A": [1, 0, 1]})
@@ -951,7 +953,7 @@ def test_two_metrics_return_classification_and_probability(self):
self.sc._nonbinary_targets(metrics, self.target_values, returns)
# Single row
input_array = pd.DataFrame([[1]], columns=["A"], index=[0])
- prediction = ["C", 0.1, 0.2, 0.3]
+ prediction = [["C", 0.1, 0.2, 0.3]]
self.assertEqual(self.execute_snippet(input_array, prediction), ("C", 0.3))
# Multiple rows
input_array = pd.DataFrame({"A": [1, 0, 1]})
@@ -1014,7 +1016,7 @@ def test_return_all_probabilities(self):
self.sc._nonbinary_targets(metrics, self.target_values, returns)
# Single row
input_array = pd.DataFrame([[1]], columns=["A"], index=[0])
- prediction = [0.1, 0.2, 0.3]
+ prediction = [[0.1, 0.2, 0.3]]
self.assertEqual(self.execute_snippet(input_array, prediction), (0.1, 0.2, 0.3))
# Multiple rows
input_array = pd.DataFrame({"A": [1, 0, 1]})
@@ -1032,7 +1034,7 @@ def test_return_all_probabilities_and_classification(self):
self.sc._nonbinary_targets(metrics, self.target_values, returns)
# Single row
input_array = pd.DataFrame([[1]], columns=["A"], index=[0])
- prediction = ["P", 0.1, 0.2, 0.3]
+ prediction = [["P", 0.1, 0.2, 0.3]]
self.assertEqual(
self.execute_snippet(input_array, prediction), ("P", 0.1, 0.2, 0.3)
)
@@ -1057,7 +1059,7 @@ def test_return_all_probabilities_generate_classification(self):
self.sc._nonbinary_targets(metrics, self.target_values, returns)
# Single row
input_array = pd.DataFrame([[1]], columns=["A"], index=[0])
- prediction = [0.1, 0.2, 0.3]
+ prediction = [[0.1, 0.2, 0.3]]
self.assertEqual(
self.execute_snippet(input_array, prediction), ("C", 0.1, 0.2, 0.3)
)
@@ -1103,7 +1105,7 @@ def test_predictions_to_metrics():
metrics = ["Classification", "Probability"]
target_values = ["1", "0"]
sc._predictions_to_metrics(metrics, returns, target_values)
- func.assert_called_once_with(metrics, ["1", "0"], returns, None, False)
+ func.assert_called_once_with(metrics, ["1", "0"], returns, None, 1, False)
with pytest.raises(
ValueError,