From e72132f2c11640d45efce22f86c647feb2f69af2 Mon Sep 17 00:00:00 2001 From: djm21 Date: Thu, 5 Oct 2023 02:03:10 -0500 Subject: [PATCH 1/7] Introduced option to specify target_index for score code --- src/sasctl/pzmm/import_model.py | 11 +- src/sasctl/pzmm/write_score_code.py | 250 +++++++++++++++------------- 2 files changed, 145 insertions(+), 116 deletions(-) diff --git a/src/sasctl/pzmm/import_model.py b/src/sasctl/pzmm/import_model.py index 899042ce..dc66deab 100644 --- a/src/sasctl/pzmm/import_model.py +++ b/src/sasctl/pzmm/import_model.py @@ -198,6 +198,7 @@ def import_model( predict_threshold: Optional[float] = None, target_values: Optional[List[str]] = None, overwrite_project_properties: Optional[bool] = False, + target_index: Optional[int] = None, **kwargs, ) -> Tuple[RestObj, Union[dict, str, Path]]: """ @@ -275,10 +276,16 @@ def import_model( target_values : list of strings, optional A list of target values for the target variable. This argument and the score_metrics argument dictate the handling of the predicted values from - the prediction method. The default value is None. + the prediction method. The order of the target values should reflect the + order of the related probabilities in the model. The default value is None. overwrite_project_properties : bool, optional Set whether the project properties should be overwritten when attempting to import the model. The default value is False. + target_index : int, optional + Sets the index of success for a binary model. If target_values are given, this + index should match the index of the target outcome in target_values. If target_values + are not given, this index should indicate whether the the target probability variable + is the first or second variable returned by the model. The default value is 1. kwargs : dict, optional Other keyword arguments are passed to the following function: * sasctl.pzmm.ScoreCode.write_score_code(..., @@ -352,6 +359,7 @@ def import_model( target_values=target_values, missing_values=missing_values, score_cas=score_cas, + target_index=target_index, **kwargs, ) if score_code_dict: @@ -451,6 +459,7 @@ def import_model( target_values=target_values, missing_values=missing_values, score_cas=score_cas, + target_index=target_index, **kwargs, ) if score_code_dict: diff --git a/src/sasctl/pzmm/write_score_code.py b/src/sasctl/pzmm/write_score_code.py index 7af3c3f5..4031af03 100644 --- a/src/sasctl/pzmm/write_score_code.py +++ b/src/sasctl/pzmm/write_score_code.py @@ -35,6 +35,7 @@ def write_score_code( missing_values: Union[bool, list, DataFrame] = False, score_cas: Optional[bool] = True, score_code_path: Union[Path, str, None] = None, + target_index: Optional[int] = None, **kwargs, ) -> Union[dict, None]: """ @@ -129,6 +130,11 @@ def write_score_code( score_code_path : str or Path, optional Path for output score code file(s) to be generated. If no value is supplied a dict is returned instead. The default value is None. + target_index : int, optional + Sets the index of success for a binary model. If target_values are given, this + index should match the index of the target outcome in target_values. If target_values + are not given, this index should indicate whether the the target probability variable + is the first or second variable returned by the model. The default value is 1. kwargs Other keyword arguments are passed to one of the following functions: * sasctl.pzmm.ScoreCode._write_imports(pickle_type, mojo_model=None, @@ -245,6 +251,7 @@ def score(var1, var2, var3, var4): predict_method[1], target_values=target_values, predict_threshold=predict_threshold, + target_index=target_index, h2o_model=True, ) else: @@ -275,6 +282,7 @@ def score(var1, var2, var3, var4): predict_method[1], target_values=target_values, predict_threshold=predict_threshold, + target_index=target_index ) if missing_values: @@ -1082,6 +1090,7 @@ def _predictions_to_metrics( target_values: Optional[List[str]] = None, predict_threshold: Optional[float] = None, h2o_model: Optional[bool] = False, + target_index: Optional[int] = 1 ) -> None: """ Using the provided arguments, write in to the score code the method for handling @@ -1106,6 +1115,11 @@ def _predictions_to_metrics( h2o_model : bool, optional Flag to indicate that the model is an H2O.ai model. The default value is False. + target_index : int, optional + Sets the index of success for a binary model. If target_values are given, this + index should match the index of the target outcome in target_values. If target_values + are not given, this index should indicate whether the the target probability variable + is the first or second variable returned by the model. The default value is 1. """ if len(metrics) == 1 and isinstance(metrics, list): # Flatten single valued list @@ -1122,7 +1136,7 @@ def _predictions_to_metrics( # Binary classification model elif len(target_values) == 2: cls._binary_target( - metrics, target_values, predict_returns, predict_threshold, h2o_model + metrics, target_values, predict_returns, predict_threshold, target_index, h2o_model ) # Multiclass classification model elif len(target_values) > 2: @@ -1182,7 +1196,7 @@ def _no_targets_no_thresholds( else: cls.score_code += ( f"{'':4}if input_array.shape[0] == 1:\n" - f"{'':8}{metrics} = prediction[0]\n{'':8}return {metrics}\n" + f"{'':8}{metrics} = prediction[0][0]\n{'':8}return {metrics}\n" f"{'':4}else:\n" f"{'':8}output_table = pd.DataFrame({{'{metrics}': prediction}})\n" f"{'':8}return output_table" @@ -1228,7 +1242,7 @@ def _no_targets_no_thresholds( else: cls.score_code += f"{'':4}if input_array.shape[0] == 1:\n" for i in range(len(metrics)): - cls.score_code += f"{'':8}{metrics[i]} = prediction[{i}]\n" + cls.score_code += f"{'':8}{metrics[i]} = prediction[0][{i}]\n" cls.score_code += f"\n{'':8}return {', '.join(metrics)}\n" cls.score_code += ( f"{'':4}else:\n" @@ -1237,10 +1251,10 @@ def _no_targets_no_thresholds( ) """ if input_array.shape[0] == 1: - Classification = prediction[0] - Proba_A = prediction[1] - Proba_B = prediction[2] - Proba_C = prediction[3] + Classification = prediction[0][0] + Proba_A = prediction[0][1] + Proba_B = prediction[0][2] + Proba_C = prediction[0][3] return Classification, Proba_A, Proba_B, Proba_C else: @@ -1257,6 +1271,7 @@ def _binary_target( returns: List[Any], threshold: Optional[float] = None, h2o_model: Optional[bool] = None, + target_index: Optional[int] = 1 ) -> None: """ Handle binary model prediction outputs. @@ -1276,6 +1291,9 @@ def _binary_target( h2o_model : bool, optional Flag to indicate that the model is an H2O.ai model. The default value is False. + target_index : int, optional + Sets the index of the probability value to be returned from a binary model. The + default value is two for h2o models, and one otherwise. """ if not threshold: # Set default threshold @@ -1299,14 +1317,13 @@ def _binary_target( if h2o_model: cls.score_code += ( f"{'':4}if input_array.shape[0] == 1:\n" - f"{'':8}if prediction[1][2] > {threshold}:\n" - f"{'':12}{metrics} = \"{target_values[0]}\"\n" + f"{'':8}if prediction[1][{target_index} + 1] > {threshold}:\n" + f"{'':12}{metrics} = \"{target_values[target_index]}\"\n" f"{'':8}else:\n" - f"{'':12}{metrics} = \"{target_values[1]}\"\n" + f"{'':12}{metrics} = \"{target_values[abs(target_index-1)]}\"\n" f"{'':8}return {metrics}\n" f"{'':4}else:\n" - f"{'':8}target_values = {target_values}\n" - f"{'':8}output_table = pd.DataFrame({{'{metrics}': np.array(target_values)[np.argmax(prediction.iloc[0:, 1:].values, axis=1)]}})\n" + f"{'':8}output_table = pd.DataFrame({{'{metrics}': np.where(prediction[prediction.columns[{target_index+1}]] > {threshold}, '{target_values[target_index]}', '{target_values[abs(target_index-1)]}')}})" f"{'':8}return output_table" ) """ @@ -1317,21 +1334,20 @@ def _binary_target( Classification = "B" return Classification else: - target_values = ['A', 'B'] - output_table = pd.DataFrame({'Classification': np.array(target_values)[np.argmax(prediction.iloc[0:, 1:].values, axis=1)]}) + output_table = pd.DataFrame({'Classification': np.where(prediction[prediction.columns[2]] > .5, 'B', 'A')}) return output_table """ # One return that is the classification elif len(returns) == 1 and returns[0]: cls.score_code += ( f"{'':4}if input_array.shape[0] == 1:\n" - f"{'':8}return prediction\n" + f"{'':8}return prediction[0]\n" f"{'':4}else:\n" f"{'':8}return pd.DataFrame({{'{metrics}': prediction}})" ) """ if input_array.shape[0] == 1: - return prediction + return prediction[0] else: return pd.DataFrame({'Classification': prediction}) """ @@ -1339,45 +1355,46 @@ def _binary_target( elif len(returns) == 1 and not returns[0]: cls.score_code += ( f"{'':4}if input_array.shape[0] == 1:\n" - f"{'':8}if prediction > {threshold}:\n" - f"{'':12}{metrics} = \"{target_values[0]}\"\n" + f"{'':8}if prediction[0] > {threshold}:\n" + f"{'':12}{metrics} = \"{target_values[target_index]}\"\n" f"{'':8}else:\n" - f"{'':12}{metrics} = \"{target_values[1]}\"\n" + f"{'':12}{metrics} = \"{target_values[abs(target_index-1)]}\"\n" f"{'':8}return {metrics}\n" f"{'':4}else:\n" - f"{'':8}return pd.DataFrame({{'{metrics}': ['{target_values[0]}' if p > {threshold} else '{target_values[1]}' for p in prediction]}})\n" + f"{'':8}return pd.DataFrame({{'{metrics}': ['{target_values[target_index]}' if p > {threshold} else '{target_values[abs(target_index-1)]}' for p in prediction]}})\n" ) """ if input_array.shape[0] == 1: - if prediction > 0.5: + if prediction[0] > 0.5: Classification = "A" else: Classification = "B" return Classification else: - return pd.DataFrame({'Classification': ['A' if p > 0.5 else 'B' for p in prediction]}) + return pd.DataFrame({'Classification': ['B' if p > 0.5 else 'A' for p in prediction]}) """ # Two returns from the prediction method elif len(returns) == 2 and sum(returns) == 0: # Only probabilities returned; return classification for larger value cls.score_code += ( f"{'':4}if input_array.shape[0] == 1:\n" - f"{'':8}if prediction[0] > prediction[1]:\n" - f"{'':12}{metrics} = \"{target_values[0]}\"\n" + f"{'':8}if prediction[0][{target_index}] > {threshold}:\n" + f"{'':12}{metrics} = \"{target_values[target_index]}\"\n" f"{'':8}else:\n" - f"{'':12}{metrics} = \"{target_values[1]}\"\n\n" + f"{'':12}{metrics} = \"{target_values[abs(target_index-1)]}\"\n\n" f"{'':8}return {metrics}\n" f"{'':4}else:\n" f"{'':8}target_values = {target_values}\n" - f"{'':8}output_table = pd.DataFrame({{'{metrics}' : np.array(target_values)[np.argmax(prediction, axis=1)]}})\n" + f"{'':8}prediction = pd.DataFrame(prediction)" + f"{'':8}output_table = pd.DataFrame({{'{metrics}': np.where(prediction[prediction.columns[{target_index}]] > {threshold}, '{target_values[target_index]}', '{target_values[abs(target_index-1)]}')}})" f"{'':8}return output_table" ) """ if input_array.shape[0] == 1: - if prediction[0] > prediction[1]: - Classification = "A" - else: + if prediction[0][0] > .5: Classification = "B" + else: + Classification = "A" return Classification else: @@ -1391,14 +1408,14 @@ def _binary_target( class_index = [i for i, x in enumerate(returns) if x][0] cls.score_code += ( f"{'':4}if input_array.shape[0] == 1:\n" - f"{'':8}{metrics} = prediction[{class_index}]\n{'':8}return {metrics}\n" + f"{'':8}{metrics} = prediction[0][{class_index}]\n{'':8}return {metrics}\n" f"{'':4}else:\n" f"{'':8}output_table = pd.DataFrame({{'{metrics}': [p[{class_index}] for p in prediction]}})\n" f"{'':8}return output_table" ) """ if input_array.shape[0] == 1: - Classification = prediction[1] + Classification = prediction[0][1] return Classification else: output_table = pd.DataFrame({'Classification': [p[1] for p in prediction]}) @@ -1419,7 +1436,7 @@ def _binary_target( f"{'':4}if input_array.shape[0] == 1:\n" f"{'':8}return prediction[1][0], float(prediction[1][2])\n" f"{'':4}else:\n" - f"{'':8}output_table = prediction.drop(prediction.columns[1], axis=1)\n" + f"{'':8}output_table = prediction.drop(prediction.columns[{abs(target_index-1)+1}], axis=1)\n" f"{'':8}output_table.columns = {metrics}\n" f"{'':8}return output_table" ) @@ -1440,25 +1457,25 @@ def _binary_target( ) cls.score_code += ( f"{'':4}if input_array.shape[0] == 1:\n" - f"{'':8}if prediction > {threshold}:\n" - f"{'':12}{metrics[0]} = \"{target_values[0]}\"\n" + f"{'':8}if prediction[0] > {threshold}:\n" + f"{'':12}{metrics[0]} = \"{target_values[target_index]}\"\n" f"{'':8}else:\n" - f"{'':12}{metrics[0]} = \"{target_values[1]}\"\n\n" - f"{'':8}return {metrics[0]}, prediction\n" + f"{'':12}{metrics[0]} = \"{target_values[abs(target_index-1)]}\"\n\n" + f"{'':8}return {metrics[0]}, prediction[0]\n" f"{'':4}else:\n" - f"{'':8}classifications = ['{target_values[0]}' if p > {threshold} else '{target_values[1]}' for p in prediction]\n" + f"{'':8}classifications = ['{target_values[target_index]}' if p > {threshold} else '{target_values[abs(target_index-1)]}' for p in prediction]\n" f"{'':8}return pd.DataFrame({{'{metrics[0]}': classifications, '{metrics[1]}': prediction}})" ) """ if input_array.shape[0] == 1: - if prediction > 0.5: - Classification = "A" - else: + if prediction[0] > 0.5: Classification = "B" + else: + Classification = "A" - return Classification, prediction + return Classification, prediction[0] else: - classifications = ['A' if p > 0.5 else 'B' for p in prediction] + classifications = ['B' if p > 0.5 else 'A' for p in prediction] return pd.DataFrame({'Classification': classifications, 'Probability': prediction}) """ # Calculate the classification; return the classification and probability @@ -1470,41 +1487,41 @@ def _binary_target( ) cls.score_code += ( f"{'':4}if input_array.shape[0] == 1:\n" - f"{'':8}if prediction[0] > prediction[1]:\n" - f"{'':12}{metrics[0]} = \"{target_values[0]}\"\n" + f"{'':8}if prediction[0][{target_index}] > {threshold}:\n" + f"{'':12}{metrics[0]} = \"{target_values[target_index]}\"\n" f"{'':8}else:\n" - f"{'':12}{metrics[0]} = \"{target_values[1]}\"\n" - f"{'':8}return {metrics[0]}, prediction[0]\n" + f"{'':12}{metrics[0]} = \"{target_values[abs(target_index-1)]}\"\n" + f"{'':8}return {metrics[0]}, prediction[0][{target_index}]\n" f"{'':4}else:\n" f"{'':8}df = pd.DataFrame(prediction)\n" f"{'':8}proba = df[0]\n" - f"{'':8}classifications = np.where(df[0] > df[1], '{target_values[0]}', '{target_values[1]}')\n" + f"{'':8}classifications = np.where(df[{target_index}] > {threshold}, '{target_values[target_index]}', '{target_values[abs(target_index-1)]}')\n" f"{'':8}return pd.DataFrame({{'{metrics[0]}': classifications, '{metrics[1]}': proba}})" ) """ if input_array.shape[0] == 1: - if prediction[0] > prediction[1]: - Classification = "A" - else: + if prediction[0][1] > .5: Classification = "B" - return Classification, prediction[0] + else: + Classification = "A" + return Classification, prediction[0][1] else: df = pd.DataFrame(prediction) proba = df[0] - classifications = np.where(df[0] > df[1], 'A', 'B') + classifications = np.where(df[1] > .5, 'B', 'A') return pd.DataFrame({'Classification': classifications, 'Probability': proba}) """ # Return classification and probability value elif sum(returns) == 1 and len(returns) == 2: cls.score_code += ( f"{'':4}if input_array.shape[0] == 1:\n" - f"{'':8}return prediction[0], prediction[1]\n" + f"{'':8}return prediction[0][0], prediction[0][1]\n" f"{'':4}else:\n" f"{'':8}return pd.DataFrame(prediction, columns={metrics})" ) """ if input_array.shape[0] == 1: - return prediction[0], prediction[1] + return prediction[0][0], prediction[0][1] else: return pd.DataFrame(prediction, columns=['Classification', 'Probability']) """ @@ -1517,38 +1534,40 @@ def _binary_target( # Determine which return is the classification value class_index = [i for i, x in enumerate(returns) if x][0] if class_index == 0: - metric_list = '"' + '","'.join(metrics) + '","drop"' cls.score_code += ( f"{'':4}if input_array.shape[0] == 1:\n" - f"{'':8}return prediction[0], prediction[1]\n" + f"{'':8}return prediction[0][0], prediction[0][{target_index} + 1]\n" f"{'':4}else:\n" - f"{'':8}output_table = pd.DataFrame(prediction, columns=[{metric_list}])\n" - f"{'':8}return output_table.drop('drop', axis=1)" + f"{'':8}output_table = prediction.drop(prediction.columns[{abs(target_index-1)+1}], axis=1)\n" + f"{'':8}output_table.columns = {metrics}" + f"{'':8}return output_table" ) """ if input_array.shape[0] == 1: - return prediction[0], prediction[1] + return prediction[0][0], prediction[0][2] else: - output_table = pd.DataFrame(prediction, columns=["Classification","Probability","drop"]) - return output_table.drop('drop', axis=1) + output_table = prediction.drop(prediction.columns[1], axis=1) + output_table.columns = ["Classification", "Probability"] + return output_table """ else: - metric_list = '"' + '","drop","'.join(metrics[::-1]) + '"' cls.score_code += ( f"{'':4}if input_array.shape[0] == 1:\n" - f"{'':8}return prediction[{class_index}], prediction[0]\n" + f"{'':8}return prediction[0][{class_index}], prediction[0][{target_index}]\n" f"{'':4}else:\n" - f"{'':8}output_table = pd.DataFrame(prediction, columns=[{metric_list}])\n" + f"{'':8}output_table = prediction.drop(prediction.columns[{abs(target_index-1)}], axis=1)\n" f"{'':8}output_table = output_table[output_table.columns[::-1]]\n" - f"{'':8}return output_table.drop('drop', axis=1)" + f"{'':8}output_table.columns = {metrics}" + f"{'':8}return output_table" ) """ if input_array.shape[0] == 1: - return prediction[2], prediction[0] + return prediction[0][2], prediction[0][0] else: - output_table = pd.DataFrame(prediction, columns=["Probability","drop","Classification"]) + output_table = prediction.drop(prediction.columns[0], axis=1) output_table = output_table[output_table.columns[::-1]] + output_table.columns = ["Classification", "Probability"] return output_table.drop('drop', axis=1) """ else: @@ -1578,26 +1597,26 @@ def _binary_target( ) cls.score_code += ( f"{'':4}if input_array.shape[0] == 1:\n" - f"{'':8}if prediction > {threshold}:\n" - f"{'':12}{metrics[0]} = \"{target_values[0]}\"\n" + f"{'':8}if prediction[0] > {threshold}:\n" + f"{'':12}{metrics[0]} = \"{target_values[target_index]}\"\n" f"{'':8}else:\n" - f"{'':12}{metrics[0]} = \"{target_values[1]}\"\n" - f"{'':8}return {metrics[0]}, prediction, 1 - prediction\n" + f"{'':12}{metrics[0]} = \"{target_values[abs(target_index-1)]}\"\n" + f"{'':8}return {metrics[0]}, prediction[0], 1 - prediction[0]\n" f"{'':4}else:\n" - f"{'':8}classifications = ['{target_values[0]}' if p > {threshold} else '{target_values[1]}' for p in prediction]\n" + f"{'':8}classifications = ['{target_values[target_index]}' if p > {threshold} else '{target_values[abs(target_index-1)]}' for p in prediction]\n" f"{'':8}output_table = pd.DataFrame({{'{metrics[0]}': classifications, '{metrics[1]}': prediction}})\n" f"{'':8}output_table['{metrics[2]}'] = 1 - output_table['{metrics[1]}']\n" f"{'':8}return output_table" ) """ if input_array.shape[0] == 1: - if prediction > 0.5: - Classification = "A" - else: + if prediction[0] > 0.5: Classification = "B" - return Classification, prediction, 1 - prediction + else: + Classification = "A" + return Classification, prediction[0], 1 - prediction[0] else: - classifications = ['A' if p > 0.5 else 'B' for p in prediction] + classifications = ['B' if p > 0.5 else 'A' for p in prediction] output_table = pd.DataFrame({'Classification': classifications, 'Proba_0': prediction}) output_table['Proba_1'] = 1 - output_table['Proba_0'] return output_table @@ -1606,31 +1625,34 @@ def _binary_target( warn( "Due to the ambiguity of the provided metrics and prediction return" " types, the score code assumes the return order to be: " - "[classification, probability of event, probability of no event]." + "[classification, probability of event, probability of no event] " + "for a single return. For batch scoring, the return order of the " + "probabilities will mirror their return order in the model." ) - metric_list = '"' + '","'.join(metrics[1:]) + '"' cls.score_code += ( f"{'':4}if input_array.shape[0] == 1:\n" - f"{'':8}if prediction[0] > prediction[1]:\n" - f"{'':12}{metrics[0]} = \"{target_values[0]}\"\n" + f"{'':8}if prediction[0][{target_index}] > {threshold}:\n" + f"{'':12}{metrics[0]} = \"{target_values[target_index]}\"\n" f"{'':8}else:\n" - f"{'':12}{metrics[0]} = \"{target_values[1]}\"\n" - f"{'':8}return {metrics[0]}, prediction[0], prediction[1]\n" + f"{'':12}{metrics[0]} = \"{target_values[abs(target_index-1)]}\"\n" + f"{'':8}return {metrics[0]}, prediction[0][{target_index}], prediction[0][{abs(target_index-1)}]\n" f"{'':4}else:\n" - f"{'':8}output_table = pd.DataFrame(prediction, columns=[{metric_list}])\n" - f"{'':8}output_table.insert(0, '{metrics[0]}', np.array({target_values})[np.argmax(output_table.values, axis=1)])\n" + f"{'':8}output_table = pd.DataFrame(prediction, columns={metrics[1:]})\n" + f"{'':8}classifications = np.where(prediction[prediction.columns[{target_index}]] > {threshold}, '{target_values[target_index]}', '{target_values[abs(target_index-1)]}')" + f"{'':8}output_table.insert(loc=0, column='{metrics[0]}', value=classifications)" f"{'':8}return output_table" ) """ if input_array.shape[0] == 1: - if prediction[0] > prediction[1]: + if prediction[0][0] > prediction[0][1]: Classification = "A" else: Classification = "B" - return Classification, prediction[0], prediction[1] + return Classification, prediction[0][0], prediction[0][1] else: output_table = pd.DataFrame(prediction, columns=["Proba_0","Proba_1"]) - output_table.insert(0, 'Classification', np.array(['A', 'B'])[np.argmax(output_table.values, axis=1)]) + classifications = np.where(prediction[prediction.columns[2]] > .5, 'B', 'A') + output_table.insert(loc=0, column='Classification', value=classifications) return output_table """ # Find which return is the classification, then return probabilities @@ -1638,37 +1660,35 @@ def _binary_target( # Determine which return is the classification value class_index = [i for i, x in enumerate(returns) if x][0] if class_index == 0: - metric_list = '"' + '","'.join(metrics[:2]) + '"' cls.score_code += ( f"{'':4}if input_array.shape[0] == 1:\n" - f"{'':8}return prediction[0], prediction[1], 1 - prediction[1]\n" + f"{'':8}return prediction[0][0], prediction[0][1], 1 - prediction[0][1]\n" f"{'':4}else:\n" - f"{'':8}output_table = pd.DataFrame(prediction, columns=[{metric_list}])\n" + f"{'':8}output_table = pd.DataFrame(prediction, columns={metrics[:2]})\n" f"{'':8}output_table['{metrics[2]}'] = 1 - output_table['{metrics[1]}']\n" f"{'':8}return output_table" ) """ if input_array.shape[0] == 1: - return prediction[0], prediction[1], 1 - prediction[1] + return prediction[0][0], prediction[0][1], 1 - prediction[0][1] else: output_table = pd.DataFrame(prediction, columns=["Classification","Proba_0"]) output_table['Proba_1'] = 1 - output_table['Proba_0'] return output_table """ else: - metric_list = '"' + '","'.join(metrics[1::-1]) + '"' cls.score_code += ( f"{'':4}if input_array.shape[0] == 1:\n" - f"{'':8}return prediction[1], prediction[0], 1 - prediction[0]\n" + f"{'':8}return prediction[0][1], prediction[0][0], 1 - prediction[0][0]\n" f"{'':4}else:\n" - f"{'':8}output_table = pd.DataFrame(prediction, columns=[{metric_list}])\n" + f"{'':8}output_table = pd.DataFrame(prediction, columns={metrics[1::-1]})\n" f"{'':8}output_table = output_table[output_table.columns[::-1]]\n" f"{'':8}output_table['{metrics[2]}'] = 1 - output_table['{metrics[1]}']\n" f"{'':8}return output_table" ) """ if input_array.shape[0] == 1: - return prediction[1], prediction[0], 1 - prediction[0] + return prediction[0][1], prediction[0][0], 1 - prediction[0][0] else: output_table = pd.DataFrame(prediction, columns=["Proba_0","Classification"]) output_table = output_table[output_table.columns[::-1]] @@ -1679,13 +1699,13 @@ def _binary_target( elif sum(returns) == 1 and len(returns) == 3: cls.score_code += ( f"{'':4}if input_array.shape[0] == 1:\n" - f"{'':8}return prediction[0], prediction[1], prediction[2]\n" + f"{'':8}return prediction[0][0], prediction[0][1], prediction[0][2]\n" f"{'':4}else:\n" f"{'':8}return pd.DataFrame(prediction, columns={metrics})" ) """ if input_array.shape[0] == 1: - return prediction[0], prediction[1], prediction[2] + return prediction[0][0], prediction[0][1], prediction[0][2] else: return pd.DataFrame(prediction, columns=['Classification', 'Proba_0', 'Proba_1']) """ @@ -1752,13 +1772,13 @@ def _nonbinary_targets( elif len(returns) == 1: cls.score_code += ( f"{'':4}if input_array.shape[0] == 1:\n" - f"{'':8}return prediction\n" + f"{'':8}return prediction[0]\n" f"{'':4}else:\n" f"{'':8}return pd.DataFrame({{'{metrics}': prediction}})" ) """ if input_array.shape[0] == 1: - return prediction + return prediction[0] else: return pd.DataFrame({'Classification': prediction}) """ @@ -1766,7 +1786,7 @@ def _nonbinary_targets( cls.score_code += ( f"{'':4}if input_array.shape[0] == 1:\n" f"{'':8}target_values = {target_values}\n" - f"{'':8}return target_values[prediction.index(max(prediction))]\n" + f"{'':8}return target_values[prediction[0].index(max(prediction[0]))]\n" f"{'':4}else:\n" f"{'':8}output_table = pd.DataFrame({{'{metrics}' : np.array({target_values})[np.argmax(prediction, axis=1)]}})\n" f"{'':8}return output_table" @@ -1774,7 +1794,7 @@ def _nonbinary_targets( """ if input_array.shape[0] == 1: target_values = ['A', 'B', 'C'] - return target_values[prediction.index(max(prediction))] + return target_values[prediction[0].index(max(prediction[0]))] else: output_table = pd.DataFrame({'Classification' : np.array(['A', 'B', 'C'])[np.argmax(prediction, axis=1)]}) return output_table @@ -1784,13 +1804,13 @@ def _nonbinary_targets( class_index = [i for i, x in enumerate(returns) if x][0] cls.score_code += ( f"{'':4}if input_array.shape[0] == 1:\n" - f"{'':8}return prediction[{class_index}]\n" + f"{'':8}return prediction[0][{class_index}]\n" f"{'':4}else:\n" f"{'':8}return pd.DataFrame({{'{metrics}': [p[{class_index}] for p in prediction]}})" ) """ if input_array.shape[0] == 1: - return prediction[0] + return prediction[0][0] else: return pd.DataFrame({'Classification': [p[0] for p in prediction]}) """ @@ -1821,8 +1841,8 @@ def _nonbinary_targets( cls.score_code += ( f"{'':4}target_values = {target_values}\n" f"{'':4}if input_array.shape[0] == 1:\n" - f"{'':8}return target_values[prediction.index(max(prediction))], " - f"max(prediction)\n" + f"{'':8}return target_values[prediction[0].index(max(prediction[0]))], " + f"max(prediction[0])\n" f"{'':4}else:\n" f"{'':8}df = pd.DataFrame(prediction)\n" f"{'':8}index = np.argmax(df.values, axis=1)\n" @@ -1833,7 +1853,7 @@ def _nonbinary_targets( """ target_values = ['A', 'B', 'C'] if input_array.shape[0] == 1: - return target_values[prediction.index(max(prediction))], max(prediction) + return target_values[prediction[0].index(max(prediction[0]))], max(prediction[0]) else: df = pd.DataFrame(prediction) index = np.argmax(df.values, axis=1) @@ -1846,8 +1866,8 @@ def _nonbinary_targets( class_index = [i for i, x in enumerate(returns) if x][0] cls.score_code += ( f"{'':4}if input_array.shape[0] == 1:\n" - f"{'':8}return prediction[{class_index}], " - f"max(prediction[:{class_index}] + prediction[{class_index + 1}:])\n" + f"{'':8}return prediction[0][{class_index}], " + f"max(prediction[0][:{class_index}] + prediction[0][{class_index + 1}:])\n" f"{'':4}else:\n" f"{'':8}df = pd.DataFrame(prediction)\n" f"{'':8}probas = df.drop({class_index}, axis=1)\n" @@ -1856,7 +1876,7 @@ def _nonbinary_targets( ) """ if input_array.shape[0] == 1: - return prediction[0], max(prediction[:0] + prediction[1:]) + return prediction[0][0], max(prediction[0][:0] + prediction[0][1:]) else: df = pd.DataFrame(prediction) probas = df.drop(0, axis=1) @@ -1907,7 +1927,7 @@ def _nonbinary_targets( len(metrics) == (len(target_values) + 1) == len(returns) and sum(returns) == 1 ): - proba_returns = [f"prediction[{i}]" for i in range(len(returns))] + proba_returns = [f"prediction[0][{i}]" for i in range(len(returns))] cls.score_code += ( f"{'':4}if input_array.shape[0] == 1:\n" f"{'':8}return {', '.join(proba_returns)}\n" @@ -1917,7 +1937,7 @@ def _nonbinary_targets( ) """ if input_array.shape[0] == 1: - return prediction[0], prediction[1], prediction[2] + return prediction[0][0], prediction[0][1], prediction[0][2] else: output_table = pd.DataFrame(prediction, columns=['Proba_0', 'Proba_1', 'Proba_2']) return output_table @@ -1925,11 +1945,11 @@ def _nonbinary_targets( elif (len(metrics) - 1) == len(returns) == len(target_values) and sum( returns ) == 0: - proba_returns = [f"prediction[{i}]" for i in range(len(returns))] + proba_returns = [f"prediction[0][{i}]" for i in range(len(returns))] cls.score_code += ( f"{'':4}target_values = {target_values}\n\n" f"{'':4}if input_array.shape[0] == 1:\n" - f"{'':8}return target_values[prediction.index(max(prediction))], " + f"{'':8}return target_values[prediction[0].index(max(prediction[0]))], " f"{', '.join(proba_returns)}\n" f"{'':4}else:\n" f"{'':8}output_table = pd.DataFrame(prediction, columns={metrics[1:]})\n" @@ -1941,7 +1961,7 @@ def _nonbinary_targets( target_values = ['A', 'B', 'C'] if input_array.shape[0] == 1: - return target_values[prediction.index(max(prediction))], prediction[0], prediction[1], prediction[2] + return target_values[prediction[0].index(max(prediction[0]))], prediction[0][0], prediction[0][1], prediction[0][2] else: output_table = pd.DataFrame(prediction, columns=['Proba_0', 'Proba_1', 'Proba_2']) classifications = np.array(target_values)[np.argmax(output_table.values, axis=1)] From 5d577d8b26a8e40cc64e9f94b2a64c2450eb7d96 Mon Sep 17 00:00:00 2001 From: djm21 Date: Thu, 5 Oct 2023 18:03:08 -0500 Subject: [PATCH 2/7] Updated unit tests to correctly test score code changes --- src/sasctl/pzmm/write_score_code.py | 57 ++++++++++++------- tests/unit/test_write_score_code.py | 86 +++++++++++++++-------------- 2 files changed, 80 insertions(+), 63 deletions(-) diff --git a/src/sasctl/pzmm/write_score_code.py b/src/sasctl/pzmm/write_score_code.py index 4031af03..c56c4c50 100644 --- a/src/sasctl/pzmm/write_score_code.py +++ b/src/sasctl/pzmm/write_score_code.py @@ -282,7 +282,7 @@ def score(var1, var2, var3, var4): predict_method[1], target_values=target_values, predict_threshold=predict_threshold, - target_index=target_index + target_index=target_index, ) if missing_values: @@ -1090,7 +1090,7 @@ def _predictions_to_metrics( target_values: Optional[List[str]] = None, predict_threshold: Optional[float] = None, h2o_model: Optional[bool] = False, - target_index: Optional[int] = 1 + target_index: Optional[int] = 1, ) -> None: """ Using the provided arguments, write in to the score code the method for handling @@ -1136,7 +1136,12 @@ def _predictions_to_metrics( # Binary classification model elif len(target_values) == 2: cls._binary_target( - metrics, target_values, predict_returns, predict_threshold, target_index, h2o_model + metrics, + target_values, + predict_returns, + predict_threshold, + target_index, + h2o_model, ) # Multiclass classification model elif len(target_values) > 2: @@ -1187,10 +1192,11 @@ def _no_targets_no_thresholds( ) """ if input_array.shape[0] == 1: - Classification = prediction[0] + Classification = prediction[1][0] return Classification else: - output_table = pd.DataFrame({'Classification': prediction}) + output_table = prediction.drop(prediction.columns[1:], axis=1) + output_table.columns = ['Classification'] return output_table """ else: @@ -1203,11 +1209,10 @@ def _no_targets_no_thresholds( ) """ if input_array.shape[0] == 1: - Classification = prediction[1][0] + Classification = prediction[0][0] return Classification else: - output_table = prediction.drop(prediction.columns[1:], axis=1) - output_table.columns = ['Classification'] + output_table = pd.DataFrame({'Classification': prediction}) return output_table """ else: @@ -1271,7 +1276,7 @@ def _binary_target( returns: List[Any], threshold: Optional[float] = None, h2o_model: Optional[bool] = None, - target_index: Optional[int] = 1 + target_index: Optional[int] = 1, ) -> None: """ Handle binary model prediction outputs. @@ -1317,13 +1322,13 @@ def _binary_target( if h2o_model: cls.score_code += ( f"{'':4}if input_array.shape[0] == 1:\n" - f"{'':8}if prediction[1][{target_index} + 1] > {threshold}:\n" + f"{'':8}if prediction[1][{target_index+1}] > {threshold}:\n" f"{'':12}{metrics} = \"{target_values[target_index]}\"\n" f"{'':8}else:\n" f"{'':12}{metrics} = \"{target_values[abs(target_index-1)]}\"\n" f"{'':8}return {metrics}\n" f"{'':4}else:\n" - f"{'':8}output_table = pd.DataFrame({{'{metrics}': np.where(prediction[prediction.columns[{target_index+1}]] > {threshold}, '{target_values[target_index]}', '{target_values[abs(target_index-1)]}')}})" + f"{'':8}output_table = pd.DataFrame({{'{metrics}': np.where(prediction[prediction.columns[{target_index+1}]] > {threshold}, '{target_values[target_index]}', '{target_values[abs(target_index-1)]}')}})\n" f"{'':8}return output_table" ) """ @@ -1385,8 +1390,8 @@ def _binary_target( f"{'':8}return {metrics}\n" f"{'':4}else:\n" f"{'':8}target_values = {target_values}\n" - f"{'':8}prediction = pd.DataFrame(prediction)" - f"{'':8}output_table = pd.DataFrame({{'{metrics}': np.where(prediction[prediction.columns[{target_index}]] > {threshold}, '{target_values[target_index]}', '{target_values[abs(target_index-1)]}')}})" + f"{'':8}prediction = pd.DataFrame(prediction)\n" + f"{'':8}output_table = pd.DataFrame({{'{metrics}': np.where(prediction[prediction.columns[{target_index}]] > {threshold}, '{target_values[target_index]}', '{target_values[abs(target_index-1)]}')}})\n" f"{'':8}return output_table" ) """ @@ -1404,6 +1409,7 @@ def _binary_target( """ # Classification and probability returned; return classification value elif len(returns) > 1 and sum(returns) == 1: + # TODO: Either figure out how to handle threshold or add warning # Determine which return is the classification value class_index = [i for i, x in enumerate(returns) if x][0] cls.score_code += ( @@ -1424,7 +1430,6 @@ def _binary_target( else: cls._invalid_predict_config() elif len(metrics) == 2: - # TODO: change to align with other cases and assign target_values to classification column # H2O models with two metrics are assumed to be classification + probability if h2o_model: warn( @@ -1434,10 +1439,16 @@ def _binary_target( ) cls.score_code += ( f"{'':4}if input_array.shape[0] == 1:\n" - f"{'':8}return prediction[1][0], float(prediction[1][2])\n" + f"{'':8}if prediction[1][{target_index+1}] > {threshold}:\n" + f"{'':12}{metrics[0]} = '{target_values[target_index]}'\n" + f"{'':8}else:\n" + f"{'':12}{metrics[0]} = '{target_values[abs(target_index-1)]}'\n" + f"{'':8}return {metrics[0]}, float(prediction[1][{target_index+1}])\n" f"{'':4}else:\n" f"{'':8}output_table = prediction.drop(prediction.columns[{abs(target_index-1)+1}], axis=1)\n" + f"{'':8}classifications = np.where(prediction[prediction.columns[{target_index+1}]] > {threshold}, '{target_values[target_index]}', '{target_values[abs(target_index-1)]}')\n" f"{'':8}output_table.columns = {metrics}\n" + f"{'':8}output_table['{metrics[0]}'] = classifications\n" f"{'':8}return output_table" ) """ @@ -1494,7 +1505,7 @@ def _binary_target( f"{'':8}return {metrics[0]}, prediction[0][{target_index}]\n" f"{'':4}else:\n" f"{'':8}df = pd.DataFrame(prediction)\n" - f"{'':8}proba = df[0]\n" + f"{'':8}proba = df[{target_index}]\n" f"{'':8}classifications = np.where(df[{target_index}] > {threshold}, '{target_values[target_index]}', '{target_values[abs(target_index-1)]}')\n" f"{'':8}return pd.DataFrame({{'{metrics[0]}': classifications, '{metrics[1]}': proba}})" ) @@ -1511,6 +1522,7 @@ def _binary_target( classifications = np.where(df[1] > .5, 'B', 'A') return pd.DataFrame({'Classification': classifications, 'Probability': proba}) """ + # TODO: Potentially add threshold # Return classification and probability value elif sum(returns) == 1 and len(returns) == 2: cls.score_code += ( @@ -1536,10 +1548,11 @@ def _binary_target( if class_index == 0: cls.score_code += ( f"{'':4}if input_array.shape[0] == 1:\n" - f"{'':8}return prediction[0][0], prediction[0][{target_index} + 1]\n" + f"{'':8}return prediction[0][0], prediction[0][{target_index+1}]\n" f"{'':4}else:\n" + f"{'':8}prediction = pd.DataFrame(prediction)\n" f"{'':8}output_table = prediction.drop(prediction.columns[{abs(target_index-1)+1}], axis=1)\n" - f"{'':8}output_table.columns = {metrics}" + f"{'':8}output_table.columns = {metrics}\n" f"{'':8}return output_table" ) @@ -1556,9 +1569,10 @@ def _binary_target( f"{'':4}if input_array.shape[0] == 1:\n" f"{'':8}return prediction[0][{class_index}], prediction[0][{target_index}]\n" f"{'':4}else:\n" + f"{'':8}prediction = pd.DataFrame(prediction)\n" f"{'':8}output_table = prediction.drop(prediction.columns[{abs(target_index-1)}], axis=1)\n" f"{'':8}output_table = output_table[output_table.columns[::-1]]\n" - f"{'':8}output_table.columns = {metrics}" + f"{'':8}output_table.columns = {metrics}\n" f"{'':8}return output_table" ) """ @@ -1622,6 +1636,7 @@ def _binary_target( return output_table """ elif sum(returns) == 0 and len(returns) == 2: + # TODO: Make decision on whether ordering should follow given pattern or reflect input ordering warn( "Due to the ambiguity of the provided metrics and prediction return" " types, the score code assumes the return order to be: " @@ -1638,8 +1653,8 @@ def _binary_target( f"{'':8}return {metrics[0]}, prediction[0][{target_index}], prediction[0][{abs(target_index-1)}]\n" f"{'':4}else:\n" f"{'':8}output_table = pd.DataFrame(prediction, columns={metrics[1:]})\n" - f"{'':8}classifications = np.where(prediction[prediction.columns[{target_index}]] > {threshold}, '{target_values[target_index]}', '{target_values[abs(target_index-1)]}')" - f"{'':8}output_table.insert(loc=0, column='{metrics[0]}', value=classifications)" + f"{'':8}classifications = np.where(output_table[output_table.columns[{target_index}]] > {threshold}, '{target_values[target_index]}', '{target_values[abs(target_index-1)]}')\n" + f"{'':8}output_table.insert(loc=0, column='{metrics[0]}', value=classifications)\n" f"{'':8}return output_table" ) """ diff --git a/tests/unit/test_write_score_code.py b/tests/unit/test_write_score_code.py index f94a122c..c9f35715 100644 --- a/tests/unit/test_write_score_code.py +++ b/tests/unit/test_write_score_code.py @@ -331,7 +331,7 @@ def test_single_metric(self): self.sc._no_targets_no_thresholds(metrics, returns) # Single row input_array = pd.DataFrame([[1]], columns=["A"], index=[0]) - prediction = [0.5] + prediction = [[0.5]] self.assertEqual(self.execute_snippet(input_array, prediction), 0.5) # Multi row input_array = pd.DataFrame({"A": [0.9, 1, 1.1]}) @@ -369,7 +369,7 @@ def test_multi_metric(self): self.sc._no_targets_no_thresholds(metrics, returns) # Single row input_array = pd.DataFrame([[1]], columns=["A"], index=[0]) - prediction = ["i", 0.3, 0.4, 0.5] + prediction = [["i", 0.3, 0.4, 0.5]] self.assertEqual( self.execute_snippet(input_array, prediction), ("i", 0.3, 0.4, 0.5) ) @@ -449,7 +449,7 @@ def test_one_metric_h2o(self): # Single row input_array = pd.DataFrame([[1]], columns=["A"], index=[0]) prediction = [[], [1, 2, 3]] - self.assertEqual(self.execute_snippet(input_array, prediction), "A") + self.assertEqual(self.execute_snippet(input_array, prediction), "B") # Multi row input_array = pd.DataFrame({"A": [0, 1]}) prediction = pd.DataFrame( @@ -469,7 +469,7 @@ def test_one_metric_one_return_classification(self): self.sc._binary_target(metrics, self.target_values, returns) # Single row input_array = pd.DataFrame([[1]], columns=["A"], index=[0]) - prediction = 0.5 + prediction = [0.5] self.assertEqual(self.execute_snippet(input_array, prediction), 0.5) # Multi row input_array = pd.DataFrame({"A": [0.9, 1, 1.1]}) @@ -488,14 +488,14 @@ def test_one_metric_one_return_probability(self): self.sc._binary_target(metrics, self.target_values, returns) # Single row input_array = pd.DataFrame([[1]], columns=["A"], index=[0]) - prediction = 1 - self.assertEqual(self.execute_snippet(input_array, prediction), "A") + prediction = [1] + self.assertEqual(self.execute_snippet(input_array, prediction), "B") # Multi row input_array = pd.DataFrame({"A": [1, 0, 1]}) prediction = [0, 1, 0] pd.testing.assert_frame_equal( self.execute_snippet(input_array, prediction), - pd.DataFrame({metrics: ["B", "A", "B"]}), + pd.DataFrame({metrics: ["A", "B", "A"]}), ) def test_one_metric_two_returns(self): @@ -509,7 +509,7 @@ def test_one_metric_two_returns(self): self.sc._binary_target(metrics, self.target_values, returns) # Single row input_array = pd.DataFrame([[1]], columns=["A"], index=[0]) - prediction = [1, 0] + prediction = [[1, 0]] self.assertEqual(self.execute_snippet(input_array, prediction), "A") # Multi row input_array = pd.DataFrame({"A": [1, 0, 1]}) @@ -528,7 +528,7 @@ def test_one_metric_three_returns(self): self.sc._binary_target(metrics, self.target_values, returns) # Single row input_array = pd.DataFrame([[1]], columns=["A"], index=[0]) - prediction = [0, "Y", "z"] + prediction = [[0, "Y", "z"]] self.assertEqual(self.execute_snippet(input_array, prediction), "Y") # Multi row input_array = pd.DataFrame({"A": [1, 0, 1]}) @@ -542,13 +542,15 @@ def test_two_metrics_h2o(self): metrics = ["Classification", "Probability"] returns = ["", int, int] self.sc.score_code += ( - "import pandas as pd\n" "def test_snippet(input_array, prediction):\n" + "import pandas as pd\n" + "import numpy as np\n" + "def test_snippet(input_array, prediction):\n" ) self.sc._binary_target(metrics, self.target_values, returns, h2o_model=True) # Single row input_array = pd.DataFrame([[1]], columns=["A"], index=[0]) prediction = [[], ["a", -1, 1]] - self.assertEqual(self.execute_snippet(input_array, prediction), ("a", 1)) + self.assertEqual(self.execute_snippet(input_array, prediction), ("B", 1.0)) # Multi row input_array = pd.DataFrame({"A": [0, 1]}) prediction = pd.DataFrame( @@ -556,7 +558,7 @@ def test_two_metrics_h2o(self): ) pd.testing.assert_frame_equal( self.execute_snippet(input_array, prediction), - pd.DataFrame({"Classification": [0, 1], "Probability": [0.1, 0.8]}), + pd.DataFrame({"Classification": ["A", "B"], "Probability": [0.1, 0.8]}), ) def test_two_metrics_one_return(self): @@ -568,14 +570,14 @@ def test_two_metrics_one_return(self): self.sc._binary_target(metrics, self.target_values, returns) # Single row input_array = pd.DataFrame([[1]], columns=["A"], index=[0]) - prediction = 0.2 - self.assertEqual(self.execute_snippet(input_array, prediction), ("B", 0.2)) + prediction = [0.2] + self.assertEqual(self.execute_snippet(input_array, prediction), ("A", 0.2)) # Multi row input_array = pd.DataFrame({"A": [1, 0, 1]}) prediction = [1, -1] pd.testing.assert_frame_equal( self.execute_snippet(input_array, prediction), - pd.DataFrame({"Classification": ["A", "B"], "Probability": [1, -1]}), + pd.DataFrame({"Classification": ["B", "A"], "Probability": [1, -1]}), ) def test_two_metrics_two_returns_no_classification(self): @@ -589,14 +591,14 @@ def test_two_metrics_two_returns_no_classification(self): self.sc._binary_target(metrics, self.target_values, returns) # Single row input_array = pd.DataFrame([[1]], columns=["A"], index=[0]) - prediction = [0.2, 0.8] - self.assertEqual(self.execute_snippet(input_array, prediction), ("B", 0.2)) + prediction = [[0.2, 0.8]] + self.assertEqual(self.execute_snippet(input_array, prediction), ("B", 0.8)) # Multi row input_array = pd.DataFrame({"A": [1, 0, 1]}) prediction = [[0.9, 0.1], [0.4, 0.6]] pd.testing.assert_frame_equal( self.execute_snippet(input_array, prediction), - pd.DataFrame({"Classification": ["A", "B"], "Probability": [0.9, 0.4]}), + pd.DataFrame({"Classification": ["A", "B"], "Probability": [0.1, 0.6]}), ) def test_two_metrics_two_returns_classification(self): @@ -610,7 +612,7 @@ def test_two_metrics_two_returns_classification(self): self.sc._binary_target(metrics, self.target_values, returns) # Single row input_array = pd.DataFrame([[1]], columns=["A"], index=[0]) - prediction = ["B", 0.2] + prediction = [["B", 0.2]] self.assertEqual(self.execute_snippet(input_array, prediction), ("B", 0.2)) # Multi row input_array = pd.DataFrame({"A": [1, 0, 1]}) @@ -631,14 +633,14 @@ def test_two_metrics_three_returns_class_first(self): self.sc._binary_target(metrics, self.target_values, returns) # Single row input_array = pd.DataFrame([[1]], columns=["A"], index=[0]) - prediction = ["B", 0.2, 0.8] - self.assertEqual(self.execute_snippet(input_array, prediction), ("B", 0.2)) + prediction = [["B", 0.2, 0.8]] + self.assertEqual(self.execute_snippet(input_array, prediction), ("B", 0.8)) # Multi row input_array = pd.DataFrame({"A": [1, 0, 1]}) prediction = [["A", 0.9, 0.1], ["B", 0.4, 0.6]] pd.testing.assert_frame_equal( self.execute_snippet(input_array, prediction), - pd.DataFrame({"Classification": ["A", "B"], "Probability": [0.9, 0.4]}), + pd.DataFrame({"Classification": ["A", "B"], "Probability": [0.1, 0.6]}), ) def test_two_metrics_three_returns_class_last(self): @@ -652,14 +654,14 @@ def test_two_metrics_three_returns_class_last(self): self.sc._binary_target(metrics, self.target_values, returns) # Single row input_array = pd.DataFrame([[1]], columns=["A"], index=[0]) - prediction = [0.2, 0.8, "B"] - self.assertEqual(self.execute_snippet(input_array, prediction), ("B", 0.2)) + prediction = [[0.2, 0.8, "B"]] + self.assertEqual(self.execute_snippet(input_array, prediction), ("B", 0.8)) # Multi row input_array = pd.DataFrame({"A": [1, 0, 1]}) prediction = [[0.9, 0.1, "A"], [0.4, 0.6, "B"]] pd.testing.assert_frame_equal( self.execute_snippet(input_array, prediction), - pd.DataFrame({"Classification": ["A", "B"], "Probability": [0.9, 0.4]}), + pd.DataFrame({"Classification": ["A", "B"], "Probability": [0.1, 0.6]}), ) def test_three_metrics_h2o(self): @@ -695,9 +697,9 @@ def test_three_metrics_one_return(self): print(self.sc.score_code) # Single row input_array = pd.DataFrame([[1]], columns=["A"], index=[0]) - prediction = 0.9 + prediction = [0.9] self.assertEqual( - self.execute_snippet(input_array, prediction), ("A", 0.9, 1 - 0.9) + self.execute_snippet(input_array, prediction), ("B", 0.9, 1 - 0.9) ) # Multiple rows input_array = pd.DataFrame({"A": [1, 0, 1]}) @@ -706,7 +708,7 @@ def test_three_metrics_one_return(self): self.execute_snippet(input_array, prediction), pd.DataFrame( { - "Classification": ["A", "B"], + "Classification": ["B", "A"], "Proba_0": [0.9, 0.1], "Proba_1": [1 - 0.9, 1 - 0.1], } @@ -724,8 +726,8 @@ def test_three_metrics_two_returns_no_class(self): self.sc._binary_target(metrics, self.target_values, returns) # Single row input_array = pd.DataFrame([[1]], columns=["A"], index=[0]) - prediction = [0.9, 0.1] - self.assertEqual(self.execute_snippet(input_array, prediction), ("A", 0.9, 0.1)) + prediction = [[0.9, 0.1]] + self.assertEqual(self.execute_snippet(input_array, prediction), ("A", 0.1, 0.9)) # Multiple rows input_array = pd.DataFrame({"A": [1, 0, 1]}) prediction = [[0.9, 0.1], [0.2, 0.8]] @@ -751,7 +753,7 @@ def test_three_metrics_two_returns_class_first(self): self.sc._binary_target(metrics, self.target_values, returns) # Single row input_array = pd.DataFrame([[1]], columns=["A"], index=[0]) - prediction = ["A", 0.9] + prediction = [["A", 0.9]] self.assertEqual( self.execute_snippet(input_array, prediction), ("A", 0.9, 1 - 0.9) ) @@ -780,7 +782,7 @@ def test_three_metrics_two_returns_class_last(self): self.sc._binary_target(metrics, self.target_values, returns) # Single row input_array = pd.DataFrame([[1]], columns=["A"], index=[0]) - prediction = [0.9, "A"] + prediction = [[0.9, "A"]] self.assertEqual( self.execute_snippet(input_array, prediction), ("A", 0.9, 1 - 0.9) ) @@ -809,7 +811,7 @@ def test_three_metrics_three_returns(self): self.sc._binary_target(metrics, self.target_values, returns) # Single row input_array = pd.DataFrame([[1]], columns=["A"], index=[0]) - prediction = ["A", 0.9, 0.1] + prediction = [["A", 0.9, 0.1]] self.assertEqual(self.execute_snippet(input_array, prediction), ("A", 0.9, 0.1)) # Multiple rows input_array = pd.DataFrame({"A": [1, 0, 1]}) @@ -869,7 +871,7 @@ def test_one_metric_one_return(self): self.sc._nonbinary_targets(metrics, self.target_values, returns) # Single row input_array = pd.DataFrame([[1]], columns=["A"], index=[0]) - prediction = "C" + prediction = ["C"] self.assertEqual(self.execute_snippet(input_array, prediction), "C") # Multiple rows input_array = pd.DataFrame({"A": [1, 0, 1]}) @@ -885,7 +887,7 @@ def test_one_metric_probability_returns(self): self.sc._nonbinary_targets(metrics, self.target_values, returns) # Single row input_array = pd.DataFrame([[1]], columns=["A"], index=[0]) - prediction = [0.1, 0.2, 0.3] + prediction = [[0.1, 0.2, 0.3]] self.assertEqual(self.execute_snippet(input_array, prediction), "C") # Multiple rows input_array = pd.DataFrame({"A": [1, 0, 1]}) @@ -901,7 +903,7 @@ def test_one_metric_classification_and_probability_returns(self): self.sc._nonbinary_targets(metrics, self.target_values, returns) # Single row input_array = pd.DataFrame([[1]], columns=["A"], index=[0]) - prediction = ["C", 0.1, 0.2, 0.3] + prediction = [["C", 0.1, 0.2, 0.3]] self.assertEqual(self.execute_snippet(input_array, prediction), "C") # Multiple rows input_array = pd.DataFrame({"A": [1, 0, 1]}) @@ -935,7 +937,7 @@ def test_two_metrics_return_probabilities(self): self.sc._nonbinary_targets(metrics, self.target_values, returns) # Single row input_array = pd.DataFrame([[1]], columns=["A"], index=[0]) - prediction = [0.1, 0.2, 0.3] + prediction = [[0.1, 0.2, 0.3]] self.assertEqual(self.execute_snippet(input_array, prediction), ("C", 0.3)) # Multiple rows input_array = pd.DataFrame({"A": [1, 0, 1]}) @@ -951,7 +953,7 @@ def test_two_metrics_return_classification_and_probability(self): self.sc._nonbinary_targets(metrics, self.target_values, returns) # Single row input_array = pd.DataFrame([[1]], columns=["A"], index=[0]) - prediction = ["C", 0.1, 0.2, 0.3] + prediction = [["C", 0.1, 0.2, 0.3]] self.assertEqual(self.execute_snippet(input_array, prediction), ("C", 0.3)) # Multiple rows input_array = pd.DataFrame({"A": [1, 0, 1]}) @@ -1014,7 +1016,7 @@ def test_return_all_probabilities(self): self.sc._nonbinary_targets(metrics, self.target_values, returns) # Single row input_array = pd.DataFrame([[1]], columns=["A"], index=[0]) - prediction = [0.1, 0.2, 0.3] + prediction = [[0.1, 0.2, 0.3]] self.assertEqual(self.execute_snippet(input_array, prediction), (0.1, 0.2, 0.3)) # Multiple rows input_array = pd.DataFrame({"A": [1, 0, 1]}) @@ -1032,7 +1034,7 @@ def test_return_all_probabilities_and_classification(self): self.sc._nonbinary_targets(metrics, self.target_values, returns) # Single row input_array = pd.DataFrame([[1]], columns=["A"], index=[0]) - prediction = ["P", 0.1, 0.2, 0.3] + prediction = [["P", 0.1, 0.2, 0.3]] self.assertEqual( self.execute_snippet(input_array, prediction), ("P", 0.1, 0.2, 0.3) ) @@ -1057,7 +1059,7 @@ def test_return_all_probabilities_generate_classification(self): self.sc._nonbinary_targets(metrics, self.target_values, returns) # Single row input_array = pd.DataFrame([[1]], columns=["A"], index=[0]) - prediction = [0.1, 0.2, 0.3] + prediction = [[0.1, 0.2, 0.3]] self.assertEqual( self.execute_snippet(input_array, prediction), ("C", 0.1, 0.2, 0.3) ) @@ -1103,7 +1105,7 @@ def test_predictions_to_metrics(): metrics = ["Classification", "Probability"] target_values = ["1", "0"] sc._predictions_to_metrics(metrics, returns, target_values) - func.assert_called_once_with(metrics, ["1", "0"], returns, None, False) + func.assert_called_once_with(metrics, ["1", "0"], returns, None, 1, False) with pytest.raises( ValueError, From a511e09128a1ca7c4543d86b493ccc9e117f7aa6 Mon Sep 17 00:00:00 2001 From: djm21 Date: Thu, 5 Oct 2023 18:49:04 -0500 Subject: [PATCH 3/7] Changed ordering of target values to match new score code in examples --- ...m_binary_classification_model_import.ipynb | 3 +- examples/pzmm_h2o_model_import.ipynb | 2160 ++++++++++++++++- .../pzmm_tensorflow_keras_model_import.ipynb | 2 +- 3 files changed, 2099 insertions(+), 66 deletions(-) diff --git a/examples/pzmm_binary_classification_model_import.ipynb b/examples/pzmm_binary_classification_model_import.ipynb index c68074b0..e540b703 100644 --- a/examples/pzmm_binary_classification_model_import.ipynb +++ b/examples/pzmm_binary_classification_model_import.ipynb @@ -814,7 +814,8 @@ " predict_method=[dtc.predict_proba, [int, int]], # What is the predict method and what does it return?\n", " score_metrics=score_metrics, # What are the output variables?\n", " overwrite_model=True, # Overwrite the model if it already exists?\n", - " target_values=[\"1\", \"0\"], # What are the expected values of the target variable?\n", + " target_values=[\"0\", \"1\"], # What are the expected values of the target variable?\n", + " target_index=1, # What is the index of the target value in target_values?\n", " model_file_name=prefix + \".pickle\", # How was the model file serialized?\n", " missing_values=True # Does the data include missing values?\n", " )\n", diff --git a/examples/pzmm_h2o_model_import.ipynb b/examples/pzmm_h2o_model_import.ipynb index 2609b51f..76982ef9 100644 --- a/examples/pzmm_h2o_model_import.ipynb +++ b/examples/pzmm_h2o_model_import.ipynb @@ -87,7 +87,9 @@ "outputs": [ { "data": { - "text/plain": "'3.38.0.4'" + "text/plain": [ + "'3.40.0.3'" + ] }, "execution_count": 3, "metadata": {}, @@ -114,14 +116,116 @@ "name": "stdout", "output_type": "stream", "text": [ - "Checking whether there is an H2O instance running at http://localhost:54321 . connected.\n", - "Warning: Your H2O cluster version is too old (5 months and 8 days)!Please download and install the latest version from http://h2o.ai/download/\n" + "Checking whether there is an H2O instance running at http://localhost:54321..... not found.\n", + "Attempting to start a local H2O server...\n", + " Java Version: openjdk version \"11.0.13\" 2021-10-19; OpenJDK Runtime Environment JBR-11.0.13.7-1751.21-jcef (build 11.0.13+7-b1751.21); OpenJDK 64-Bit Server VM JBR-11.0.13.7-1751.21-jcef (build 11.0.13+7-b1751.21, mixed mode)\n", + " Starting server from /Users/dalmoo/opt/anaconda3/envs/yeehaw/lib/python3.8/site-packages/h2o/backend/bin/h2o.jar\n", + " Ice root: /var/folders/vs/np2dp7cs1y7ggk5pl92q_rb40000gn/T/tmp0_92875_\n", + " JVM stdout: /var/folders/vs/np2dp7cs1y7ggk5pl92q_rb40000gn/T/tmp0_92875_/h2o_dalmoo_started_from_python.out\n", + " JVM stderr: /var/folders/vs/np2dp7cs1y7ggk5pl92q_rb40000gn/T/tmp0_92875_/h2o_dalmoo_started_from_python.err\n", + " Server is running at http://127.0.0.1:54321\n", + "Connecting to H2O server at http://127.0.0.1:54321 ... successful.\n", + "Warning: Your H2O cluster version is (6 months and 1 day) old. There may be a newer version available.\n", + "Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html\n" ] }, { "data": { - "text/plain": "-------------------------- -----------------------------\nH2O_cluster_uptime: 1 day 4 hours 9 mins\nH2O_cluster_timezone: America/New_York\nH2O_data_parsing_timezone: UTC\nH2O_cluster_version: 3.38.0.4\nH2O_cluster_version_age: 5 months and 8 days !!!\nH2O_cluster_name: H2O_from_python_demo_uqabco\nH2O_cluster_total_nodes: 1\nH2O_cluster_free_memory: 15.93 Gb\nH2O_cluster_total_cores: 16\nH2O_cluster_allowed_cores: 16\nH2O_cluster_status: locked, healthy\nH2O_connection_url: http://localhost:54321\nH2O_connection_proxy: {\"http\": null, \"https\": null}\nH2O_internal_security: False\nPython_version: 3.8.16 final\n-------------------------- -----------------------------", - "text/html": "\n \n
\n \n \n \n \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n
H2O_cluster_uptime:1 day 4 hours 9 mins
H2O_cluster_timezone:America/New_York
H2O_data_parsing_timezone:UTC
H2O_cluster_version:3.38.0.4
H2O_cluster_version_age:5 months and 8 days !!!
H2O_cluster_name:H2O_from_python_demo_uqabco
H2O_cluster_total_nodes:1
H2O_cluster_free_memory:15.93 Gb
H2O_cluster_total_cores:16
H2O_cluster_allowed_cores:16
H2O_cluster_status:locked, healthy
H2O_connection_url:http://localhost:54321
H2O_connection_proxy:{\"http\": null, \"https\": null}
H2O_internal_security:False
Python_version:3.8.16 final
\n
\n" + "text/html": [ + "\n", + " \n", + "
\n", + " \n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
H2O_cluster_uptime:02 secs
H2O_cluster_timezone:America/Chicago
H2O_data_parsing_timezone:UTC
H2O_cluster_version:3.40.0.3
H2O_cluster_version_age:6 months and 1 day
H2O_cluster_name:H2O_from_python_dalmoo_6awy1u
H2O_cluster_total_nodes:1
H2O_cluster_free_memory:4 Gb
H2O_cluster_total_cores:10
H2O_cluster_allowed_cores:10
H2O_cluster_status:locked, healthy
H2O_connection_url:http://127.0.0.1:54321
H2O_connection_proxy:{\"http\": null, \"https\": null}
H2O_internal_security:False
Python_version:3.8.16 final
\n", + "
\n" + ], + "text/plain": [ + "-------------------------- -----------------------------\n", + "H2O_cluster_uptime: 02 secs\n", + "H2O_cluster_timezone: America/Chicago\n", + "H2O_data_parsing_timezone: UTC\n", + "H2O_cluster_version: 3.40.0.3\n", + "H2O_cluster_version_age: 6 months and 1 day\n", + "H2O_cluster_name: H2O_from_python_dalmoo_6awy1u\n", + "H2O_cluster_total_nodes: 1\n", + "H2O_cluster_free_memory: 4 Gb\n", + "H2O_cluster_total_cores: 10\n", + "H2O_cluster_allowed_cores: 10\n", + "H2O_cluster_status: locked, healthy\n", + "H2O_connection_url: http://127.0.0.1:54321\n", + "H2O_connection_proxy: {\"http\": null, \"https\": null}\n", + "H2O_internal_security: False\n", + "Python_version: 3.8.16 final\n", + "-------------------------- -----------------------------" + ] }, "metadata": {}, "output_type": "display_data" @@ -156,7 +260,9 @@ }, { "data": { - "text/plain": "(5960, 13)" + "text/plain": [ + "(5960, 13)" + ] }, "execution_count": 5, "metadata": {}, @@ -219,8 +325,1926 @@ }, { "data": { - "text/plain": "Model Details\n=============\nH2OGeneralizedLinearEstimator : Generalized Linear Modeling\nModel Key: glmfit\n\n\nGLM Model: summary\n family link regularization lambda_search number_of_predictors_total number_of_active_predictors number_of_iterations training_frame\n-- -------- ------ --------------------------------------------- ---------------------------------------------------------------------------- ---------------------------- ----------------------------- ---------------------- ----------------\n binomial logit Elastic Net (alpha = 0.5, lambda = 9.244E-4 ) nlambda = 100, lambda.max = 0.2455, lambda.min = 9.244E-4, lambda.1se = -1.0 18 17 93 py_3_sid_80dd\n\nModelMetricsBinomialGLM: glm\n** Reported on train data. **\n\nMSE: 0.12410460050039399\nRMSE: 0.3522848286548741\nLogLoss: 0.40208193579455126\nAUC: 0.78494005994006\nAUCPR: 0.569122888911077\nGini: 0.56988011988012\nNull degrees of freedom: 3580\nResidual degrees of freedom: 3563\nNull deviance: 3597.150438148382\nResidual deviance: 2879.710824160576\nAIC: 2915.710824160576\n\nConfusion Matrix (Act/Pred) for max f1 @ threshold = 0.2574175263537824\n 0 1 Error Rate\n----- ---- --- ------- --------------\n0 2446 414 0.1448 (414.0/2860.0)\n1 314 407 0.4355 (314.0/721.0)\nTotal 2760 821 0.2033 (728.0/3581.0)\n\nMaximum Metrics: Maximum metrics at their respective thresholds\nmetric threshold value idx\n--------------------------- ----------- -------- -----\nmax f1 0.257418 0.527886 208\nmax f2 0.152297 0.626112 279\nmax f0point5 0.388608 0.557126 145\nmax accuracy 0.567264 0.835242 91\nmax precision 0.998809 1 0\nmax recall 0.00123756 1 399\nmax specificity 0.998809 1 0\nmax absolute_mcc 0.266615 0.402098 204\nmax min_per_class_accuracy 0.178278 0.706643 258\nmax mean_per_class_accuracy 0.214316 0.719503 234\nmax tns 0.998809 2860 0\nmax fns 0.998809 717 0\nmax fps 0.00626186 2860 398\nmax tps 0.00123756 721 399\nmax tnr 0.998809 1 0\nmax fnr 0.998809 0.994452 0\nmax fpr 0.00626186 1 398\nmax tpr 0.00123756 1 399\n\nGains/Lift Table: Avg response rate: 20.13 %, avg score: 20.13 %\ngroup cumulative_data_fraction lower_threshold lift cumulative_lift response_rate score cumulative_response_rate cumulative_score capture_rate cumulative_capture_rate gain cumulative_gain kolmogorov_smirnov\n------- -------------------------- ----------------- -------- ----------------- --------------- --------- -------------------------- ------------------ -------------- ------------------------- -------- ----------------- --------------------\n1 0.0100531 0.946685 4.96671 4.96671 1 0.980508 1 0.980508 0.0499307 0.0499307 396.671 396.671 0.0499307\n2 0.0201061 0.852347 4.82875 4.89773 0.972222 0.898798 0.986111 0.939653 0.0485437 0.0984743 382.875 389.773 0.0981247\n3 0.0301592 0.747959 3.863 4.55282 0.777778 0.798413 0.916667 0.892573 0.038835 0.137309 286.3 355.282 0.134162\n4 0.0402122 0.691238 3.17318 4.20791 0.638889 0.715252 0.847222 0.848243 0.0319001 0.169209 217.318 320.791 0.161517\n5 0.0502653 0.626742 3.58707 4.08374 0.722222 0.655898 0.822222 0.809774 0.036061 0.20527 258.707 308.374 0.194082\n6 0.100251 0.430243 2.44174 3.26503 0.49162 0.519979 0.657382 0.66528 0.122053 0.327323 144.174 226.503 0.284316\n7 0.150237 0.345914 2.05328 2.86186 0.413408 0.383859 0.576208 0.571647 0.102635 0.429958 105.328 186.186 0.350238\n8 0.200223 0.284427 1.72031 2.57687 0.346369 0.311905 0.518828 0.506802 0.0859917 0.51595 72.0314 157.687 0.395321\n9 0.300195 0.209845 1.33186 2.16225 0.268156 0.243116 0.435349 0.418988 0.133148 0.649098 33.1856 116.225 0.436861\n10 0.400168 0.169575 0.887904 1.84389 0.178771 0.188252 0.371249 0.361345 0.0887656 0.737864 -11.2096 84.3888 0.422829\n11 0.50014 0.138031 0.679801 1.6112 0.136872 0.152531 0.3244 0.319605 0.0679612 0.805825 -32.0199 61.1201 0.382748\n12 0.600112 0.114618 0.541066 1.43293 0.108939 0.125883 0.288506 0.287333 0.0540915 0.859917 -45.8934 43.2928 0.325301\n13 0.700084 0.0945695 0.527193 1.30359 0.106145 0.104777 0.262465 0.261264 0.0527046 0.912621 -47.2807 30.3589 0.266118\n14 0.800056 0.0736142 0.388458 1.18924 0.0782123 0.0841117 0.239442 0.239128 0.038835 0.951456 -61.1542 18.9237 0.189568\n15 0.900028 0.0521778 0.235849 1.08334 0.047486 0.0627678 0.21812 0.219538 0.0235784 0.975035 -76.4151 8.33382 0.0939158\n16 1 0.00111651 0.249723 1 0.0502793 0.0375143 0.20134 0.201341 0.0249653 1 -75.0277 0 0\n\nModelMetricsBinomialGLM: glm\n** Reported on validation data. **\n\nMSE: 0.11783296119533457\nRMSE: 0.3432680602609782\nLogLoss: 0.38278517425537495\nAUC: 0.8173716250592095\nAUCPR: 0.5982146215209964\nGini: 0.6347432501184189\nNull degrees of freedom: 1195\nResidual degrees of freedom: 1178\nNull deviance: 1193.6689319811862\nResidual deviance: 915.6221368188569\nAIC: 951.6221368188569\n\nConfusion Matrix (Act/Pred) for max f1 @ threshold = 0.31271322943274704\n 0 1 Error Rate\n----- --- --- ------- --------------\n0 861 97 0.1013 (97.0/958.0)\n1 107 131 0.4496 (107.0/238.0)\nTotal 968 228 0.1706 (204.0/1196.0)\n\nMaximum Metrics: Maximum metrics at their respective thresholds\nmetric threshold value idx\n--------------------------- ----------- -------- -----\nmax f1 0.312713 0.562232 155\nmax f2 0.187748 0.655148 230\nmax f0point5 0.429616 0.60794 107\nmax accuracy 0.443857 0.846154 105\nmax precision 0.993986 1 0\nmax recall 0.00965731 1 398\nmax specificity 0.993986 1 0\nmax absolute_mcc 0.312713 0.456535 155\nmax min_per_class_accuracy 0.195405 0.747899 224\nmax mean_per_class_accuracy 0.191452 0.750803 227\nmax tns 0.993986 958 0\nmax fns 0.993986 237 0\nmax fps 0.00749696 958 399\nmax tps 0.00965731 238 398\nmax tnr 0.993986 1 0\nmax fnr 0.993986 0.995798 0\nmax fpr 0.00749696 1 399\nmax tpr 0.00965731 1 398\n\nGains/Lift Table: Avg response rate: 19.90 %, avg score: 21.55 %\ngroup cumulative_data_fraction lower_threshold lift cumulative_lift response_rate score cumulative_response_rate cumulative_score capture_rate cumulative_capture_rate gain cumulative_gain kolmogorov_smirnov\n------- -------------------------- ----------------- -------- ----------------- --------------- --------- -------------------------- ------------------ -------------- ------------------------- -------- ----------------- --------------------\n1 0.0100334 0.942287 5.02521 5.02521 1 0.969817 1 0.969817 0.0504202 0.0504202 402.521 402.521 0.0504202\n2 0.0200669 0.88595 4.60644 4.81583 0.916667 0.91333 0.958333 0.941574 0.0462185 0.0966387 360.644 381.583 0.0955948\n3 0.0301003 0.833395 3.76891 4.46685 0.75 0.866256 0.888889 0.916468 0.0378151 0.134454 276.891 346.685 0.130278\n4 0.0401338 0.784863 3.76891 4.29237 0.75 0.815408 0.854167 0.891203 0.0378151 0.172269 276.891 329.237 0.164962\n5 0.0501672 0.721202 2.51261 3.93641 0.5 0.7506 0.783333 0.863082 0.0252101 0.197479 151.261 293.641 0.183909\n6 0.100334 0.494711 3.26639 3.6014 0.65 0.598425 0.716667 0.730754 0.163866 0.361345 226.639 260.14 0.325854\n7 0.150502 0.358189 2.01008 3.07096 0.4 0.413083 0.611111 0.624864 0.10084 0.462185 101.008 207.096 0.389116\n8 0.200669 0.30021 2.01008 2.80574 0.4 0.327422 0.558333 0.550503 0.10084 0.563025 101.008 180.574 0.452378\n9 0.300167 0.22369 1.1824 2.26764 0.235294 0.257801 0.451253 0.453479 0.117647 0.680672 18.2402 126.764 0.475036\n10 0.400502 0.176142 1.04692 1.96183 0.208333 0.196384 0.390397 0.389071 0.105042 0.785714 4.69188 96.1825 0.480913\n11 0.5 0.143863 0.548973 1.68067 0.109244 0.158272 0.334448 0.343143 0.0546218 0.840336 -45.1027 68.0672 0.424887\n12 0.600334 0.121439 0.502521 1.48377 0.1 0.132591 0.295265 0.307953 0.0504202 0.890756 -49.7479 48.3767 0.362573\n13 0.699833 0.0997811 0.506744 1.34486 0.10084 0.110054 0.267622 0.279817 0.0504202 0.941176 -49.3256 34.4859 0.301302\n14 0.800167 0.0798711 0.251261 1.20773 0.05 0.0891164 0.240334 0.255905 0.0252101 0.966387 -74.8739 20.7731 0.207514\n15 0.899666 0.0556177 0.211143 1.09751 0.0420168 0.0677358 0.218401 0.235094 0.0210084 0.987395 -78.8857 9.75134 0.109524\n16 1 0.00749696 0.12563 1 0.025 0.0397561 0.198997 0.215495 0.012605 1 -87.437 0 0\n\nScoring History: \n timestamp duration iteration lambda predictors deviance_train deviance_test alpha iterations training_rmse training_logloss training_r2 training_auc training_pr_auc training_lift training_classification_error validation_rmse validation_logloss validation_r2 validation_auc validation_pr_auc validation_lift validation_classification_error\n--- ------------------- ---------- ----------- -------- ------------ ------------------ ------------------ ------- ------------ ------------------ ------------------- ------------------- ---------------- ----------------- ----------------- ------------------------------- ------------------ -------------------- ------------------- ------------------ ------------------- ----------------- ---------------------------------\n 2023-06-14 13:11:07 0.000 sec 1 .25E0 1 1.0045100357856414 0.9980509464725633 0.5\n 2023-06-14 13:11:07 0.002 sec 3 .22E0 2 0.9946354860817235 0.9864126793254803 0.5\n 2023-06-14 13:11:07 0.004 sec 5 .2E0 2 0.9859035403430394 0.9760418435888577 0.5\n 2023-06-14 13:11:07 0.006 sec 7 .19E0 2 0.9781697250290138 0.9667699237562073 0.5\n 2023-06-14 13:11:07 0.007 sec 9 .17E0 3 0.9686784218176837 0.9544752889432682 0.5\n 2023-06-14 13:11:07 0.009 sec 11 .15E0 3 0.9587676036112142 0.9411531794664827 0.5\n 2023-06-14 13:11:07 0.011 sec 13 .14E0 3 0.9500952786201631 0.9293708568093557 0.5\n 2023-06-14 13:11:07 0.013 sec 15 .13E0 3 0.9424603199349194 0.918884378267228 0.5\n 2023-06-14 13:11:07 0.015 sec 17 .12E0 5 0.9341430965161417 0.9080921324097249 0.5\n 2023-06-14 13:11:07 0.017 sec 19 .11E0 5 0.9236448771247916 0.8955093341627816 0.5\n--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---\n 2023-06-14 13:11:07 0.119 sec 89 .13E-2 18 0.8045918660168828 0.7656443950528371 0.5\n 2023-06-14 13:11:07 0.120 sec 90 .12E-2 18 0.8044580073891262 0.7656094511752577 0.5\n 2023-06-14 13:11:07 0.121 sec 91 .11E-2 18 0.8043435286223151 0.7655877168108034 0.5\n 2023-06-14 13:11:07 0.122 sec 92 .1E-2 18 0.8042461026983848 0.765575731924841 0.5\n 2023-06-14 13:11:07 0.123 sec 93 .92E-3 18 0.8041638715891026 0.7655703485107499 0.5\n 2023-06-14 13:11:07 0.125 sec 94 .84E-3 18 0.8040938870788785 0.7655716226141344 0.5\n 2023-06-14 13:11:07 0.126 sec 95 .77E-3 18 0.8040346859016411 0.7655772387299105 0.5\n 2023-06-14 13:11:07 0.127 sec 96 .7E-3 18 0.8039849610395213 0.7655871006325123 0.5\n 2023-06-14 13:11:07 0.128 sec 97 .64E-3 18 0.8039427444913662 0.7655986856252318 0.5\n 2023-06-14 13:11:07 0.130 sec 98 .58E-3 18 0.8039071366484086 0.7656131564793512 0.5 98 0.3522848286548741 0.40208193579455126 0.22821696997302088 0.78494005994006 0.569122888911077 4.966712898751734 0.20329516894722144 0.3432680602609782 0.38278517425537495 0.26075791204897436 0.8173716250592095 0.5982146215209964 5.025210084033613 0.1705685618729097\n[66 rows x 24 columns]\n\n\nVariable Importances: \nvariable relative_importance scaled_importance percentage\n-------------- --------------------- ------------------- ------------\nDELINQ 0.797015 1 0.147301\nJOB.Sales 0.747835 0.938295 0.138211\nJOB.Office 0.510924 0.641047 0.0944266\nJOB.Self 0.45714 0.573565 0.0844865\nCLAGE 0.448498 0.562723 0.0828894\nDEBTINC 0.44676 0.560542 0.0825681\nDEROG 0.426324 0.534901 0.0787912\nNINQ 0.303735 0.381091 0.056135\nVALUE 0.241267 0.302713 0.0445898\nMORTDUE 0.238454 0.299183 0.0440699\nLOAN 0.21809 0.273634 0.0403064\nREASON.HomeImp 0.186297 0.233744 0.0344306\nCLNO 0.148527 0.186354 0.02745\nREASON.DebtCon 0.130812 0.164128 0.0241762\nYOJ 0.0438346 0.0549984 0.0081013\nJOB.Other 0.0353206 0.0443161 0.00652779\nJOB.Mgr 0.0299719 0.0376052 0.00553927\nJOB.ProfExe 0 0 0\n\n[tips]\nUse `model.explain()` to inspect the model.\n--\nUse `h2o.display.toggle_user_tips()` to switch on/off this section.", - "text/html": "
Model Details\n=============\nH2OGeneralizedLinearEstimator : Generalized Linear Modeling\nModel Key: glmfit\n
\n
\n \n
\n \n \n \n\n\n\n\n\n\n\n\n \n\n\n\n\n\n\n\n\n
GLM Model: summary
familylinkregularizationlambda_searchnumber_of_predictors_totalnumber_of_active_predictorsnumber_of_iterationstraining_frame
binomiallogitElastic Net (alpha = 0.5, lambda = 9.244E-4 )nlambda = 100, lambda.max = 0.2455, lambda.min = 9.244E-4, lambda.1se = -1.0181793py_3_sid_80dd
\n
\n
\n
ModelMetricsBinomialGLM: glm\n** Reported on train data. **\n\nMSE: 0.12410460050039399\nRMSE: 0.3522848286548741\nLogLoss: 0.40208193579455126\nAUC: 0.78494005994006\nAUCPR: 0.569122888911077\nGini: 0.56988011988012\nNull degrees of freedom: 3580\nResidual degrees of freedom: 3563\nNull deviance: 3597.150438148382\nResidual deviance: 2879.710824160576\nAIC: 2915.710824160576
\n
\n \n
\n \n \n \n\n\n\n\n \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.2574175263537824
01ErrorRate
02446.0414.00.1448 (414.0/2860.0)
1314.0407.00.4355 (314.0/721.0)
Total2760.0821.00.2033 (728.0/3581.0)
\n
\n
\n
\n \n
\n \n \n \n\n\n\n \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n
Maximum Metrics: Maximum metrics at their respective thresholds
metricthresholdvalueidx
max f10.25741750.5278859208.0
max f20.15229740.6261121279.0
max f0point50.38860780.5571256145.0
max accuracy0.56726390.835241691.0
max precision0.99880941.00.0
max recall0.00123761.0399.0
max specificity0.99880941.00.0
max absolute_mcc0.26661520.4020977204.0
max min_per_class_accuracy0.17827850.7066434258.0
max mean_per_class_accuracy0.21431630.7195026234.0
max tns0.99880942860.00.0
max fns0.9988094717.00.0
max fps0.00626192860.0398.0
max tps0.0012376721.0399.0
max tnr0.99880941.00.0
max fnr0.99880940.99445210.0
max fpr0.00626191.0398.0
max tpr0.00123761.0399.0
\n
\n
\n
\n \n
\n \n \n \n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n
Gains/Lift Table: Avg response rate: 20.13 %, avg score: 20.13 %
groupcumulative_data_fractionlower_thresholdliftcumulative_liftresponse_ratescorecumulative_response_ratecumulative_scorecapture_ratecumulative_capture_rategaincumulative_gainkolmogorov_smirnov
10.01005310.94668544.96671294.96671291.00.98050791.00.98050790.04993070.0499307396.6712899396.67128990.0499307
20.02010610.85234704.82874874.89773080.97222220.89879820.98611110.93965300.04854370.0984743382.8748652389.77307750.0981247
30.03015920.74795883.86299894.55282020.77777780.79841250.91666670.89257290.03883500.1373093286.2998921355.28201570.1341624
40.04021220.69123843.17317774.20790950.63888890.71525220.84722220.84824270.03190010.1692094217.3177685320.79095390.1615171
50.05026530.62674173.58707044.08374170.72222220.65589770.82222220.80977370.03606100.2052705258.7070427308.37417170.1940816
60.10025130.43024262.44173603.26502570.49162010.51997880.65738160.66527980.12205270.3273232144.1735950226.50257500.2843162
70.15023740.34591432.05327802.86186060.41340780.38385920.57620820.57164730.10263520.4299584105.3277958186.18605920.3502381
80.20022340.28442701.72031402.57687200.34636870.31190480.51882850.50680220.08599170.515950172.0313965157.68719640.3953207
90.30019550.20984471.33185602.16225270.26815640.24311570.43534880.41898850.13314840.649098533.1855973116.22526850.4368607
100.40016760.16957450.88790401.84388780.17877090.18825180.37124910.36134460.08876560.7378641-11.209601884.38878310.4228291
110.50013960.13803080.67980151.61120060.13687150.15253080.32439980.31960510.06796120.8058252-32.019851461.12005550.3827483
120.60011170.11461800.54106651.43292790.10893850.12588260.28850630.28733310.05409150.8599168-45.893351143.29278720.3253014
130.70008380.09456950.52719301.30358880.10614530.10477750.26246510.26126410.05270460.9126214-47.280701130.35887860.2661179
140.80005590.07361420.38845801.18923740.07821230.08411170.23944150.23912780.03883500.9514563-61.154200818.92373640.1895682
150.90002790.05217780.23584951.08333820.04748600.06276780.21811980.21953830.02357840.9750347-76.41505058.33382460.0939158
161.00.00111650.24972301.00.05027930.03751430.20134040.20134100.02496531.0-75.02770050.00.0
\n
\n
\n
ModelMetricsBinomialGLM: glm\n** Reported on validation data. **\n\nMSE: 0.11783296119533457\nRMSE: 0.3432680602609782\nLogLoss: 0.38278517425537495\nAUC: 0.8173716250592095\nAUCPR: 0.5982146215209964\nGini: 0.6347432501184189\nNull degrees of freedom: 1195\nResidual degrees of freedom: 1178\nNull deviance: 1193.6689319811862\nResidual deviance: 915.6221368188569\nAIC: 951.6221368188569
\n
\n \n
\n \n \n \n\n\n\n\n \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.31271322943274704
01ErrorRate
0861.097.00.1013 (97.0/958.0)
1107.0131.00.4496 (107.0/238.0)
Total968.0228.00.1706 (204.0/1196.0)
\n
\n
\n
\n \n
\n \n \n \n\n\n\n \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n
Maximum Metrics: Maximum metrics at their respective thresholds
metricthresholdvalueidx
max f10.31271320.5622318155.0
max f20.18774750.6551476230.0
max f0point50.42961590.6079404107.0
max accuracy0.44385660.8461538105.0
max precision0.99398621.00.0
max recall0.00965731.0398.0
max specificity0.99398621.00.0
max absolute_mcc0.31271320.4565355155.0
max min_per_class_accuracy0.19540520.7478992224.0
max mean_per_class_accuracy0.19145200.7508026227.0
max tns0.9939862958.00.0
max fns0.9939862237.00.0
max fps0.0074970958.0399.0
max tps0.0096573238.0398.0
max tnr0.99398621.00.0
max fnr0.99398620.99579830.0
max fpr0.00749701.0399.0
max tpr0.00965731.0398.0
\n
\n
\n
\n \n
\n \n \n \n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n
Gains/Lift Table: Avg response rate: 19.90 %, avg score: 21.55 %
groupcumulative_data_fractionlower_thresholdliftcumulative_liftresponse_ratescorecumulative_response_ratecumulative_scorecapture_ratecumulative_capture_rategaincumulative_gainkolmogorov_smirnov
10.01003340.94228705.02521015.02521011.00.96981721.00.96981720.05042020.0504202402.5210084402.52100840.0504202
20.02006690.88594994.60644264.81582630.91666670.91333020.95833330.94157370.04621850.0966387360.6442577381.58263310.0955948
30.03010030.83339513.76890764.46685340.750.86625650.88888890.91646790.03781510.1344538276.8907563346.68534080.1302784
40.04013380.78486283.76890764.29236690.750.81540780.85416670.89120290.03781510.1722689276.8907563329.23669470.1649620
50.05016720.72120192.51260503.93641460.50.75060030.78333330.86308240.02521010.1974790151.2605042293.64145660.1839091
60.10033440.49471073.26638663.60140060.650.59842500.71666670.73075370.16386550.3613445226.6386555260.14005600.3258539
70.15050170.35818922.01008403.07096170.40.41308340.61111110.62486360.10084030.4621849101.0084034207.09617180.3891160
80.20066890.30021032.01008402.80574230.40.32742150.55833330.55050310.10084030.5630252101.0084034180.57422970.4523780
90.30016720.22368981.18240242.26764350.23529410.25780080.45125350.45347920.11764710.680672318.2402373126.76435480.4750355
100.40050170.17614191.04691881.96182520.20833330.19638360.39039670.38907110.10504200.78571434.691876896.18252310.4809126
110.50.14386270.54897251.68067230.10924370.15827160.33444820.34314280.05462180.8403361-45.102747068.06722690.4248873
120.60033440.12143920.50252101.48376680.10.13259100.29526460.30795310.05042020.8907563-49.747899248.37667660.3625726
130.69983280.09978110.50674391.34485910.10084030.11005430.26762250.27981690.05042020.9411765-49.325612634.48590910.3013017
140.80016720.07987110.25126051.20773070.050.08911640.24033440.25590460.02521010.9663866-74.873949620.77307410.2075139
150.89966560.05561770.21114331.09751340.04201680.06773580.21840150.23509410.02100840.9873950-78.88567199.75133550.1095244
161.00.00749700.12563031.00.0250.03975610.19899670.21549500.01260501.0-87.43697480.00.0
\n
\n
\n
\n \n
\n \n \n \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n
Scoring History:
timestampdurationiterationlambdapredictorsdeviance_traindeviance_testalphaiterationstraining_rmsetraining_loglosstraining_r2training_auctraining_pr_auctraining_lifttraining_classification_errorvalidation_rmsevalidation_loglossvalidation_r2validation_aucvalidation_pr_aucvalidation_liftvalidation_classification_error
2023-06-14 13:11:07 0.000 sec1.25E011.00451000.99805090.5None
2023-06-14 13:11:07 0.002 sec3.22E020.99463550.98641270.5None
2023-06-14 13:11:07 0.004 sec5.2E020.98590350.97604180.5None
2023-06-14 13:11:07 0.006 sec7.19E020.97816970.96676990.5None
2023-06-14 13:11:07 0.007 sec9.17E030.96867840.95447530.5None
2023-06-14 13:11:07 0.009 sec11.15E030.95876760.94115320.5None
2023-06-14 13:11:07 0.011 sec13.14E030.95009530.92937090.5None
2023-06-14 13:11:07 0.013 sec15.13E030.94246030.91888440.5None
2023-06-14 13:11:07 0.015 sec17.12E050.93414310.90809210.5None
2023-06-14 13:11:07 0.017 sec19.11E050.92364490.89550930.5None
------------------------------------------------------------------------
2023-06-14 13:11:07 0.119 sec89.13E-2180.80459190.76564440.5None
2023-06-14 13:11:07 0.120 sec90.12E-2180.80445800.76560950.5None
2023-06-14 13:11:07 0.121 sec91.11E-2180.80434350.76558770.5None
2023-06-14 13:11:07 0.122 sec92.1E-2180.80424610.76557570.5None
2023-06-14 13:11:07 0.123 sec93.92E-3180.80416390.76557030.5None
2023-06-14 13:11:07 0.125 sec94.84E-3180.80409390.76557160.5None
2023-06-14 13:11:07 0.126 sec95.77E-3180.80403470.76557720.5None
2023-06-14 13:11:07 0.127 sec96.7E-3180.80398500.76558710.5None
2023-06-14 13:11:07 0.128 sec97.64E-3180.80394270.76559870.5None
2023-06-14 13:11:07 0.130 sec98.58E-3180.80390710.76561320.5980.35228480.40208190.22821700.78494010.56912294.96671290.20329520.34326810.38278520.26075790.81737160.59821465.02521010.1705686
\n
\n
[66 rows x 24 columns]
\n
\n \n
\n \n \n \n\n\n\n \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n
Variable Importances:
variablerelative_importancescaled_importancepercentage
DELINQ0.79701491.00.1473006
JOB.Sales0.74783470.93829460.1382114
JOB.Office0.51092400.64104700.0944266
JOB.Self0.45714000.57356520.0844865
CLAGE0.44849820.56272250.0828894
DEBTINC0.44676010.56054170.0825681
DEROG0.42632380.53490070.0787912
NINQ0.30373530.38109120.0561350
VALUE0.24126670.30271290.0445898
MORTDUE0.23845360.29918330.0440699
LOAN0.21809020.27363380.0403064
REASON.HomeImp0.18629740.23374390.0344306
CLNO0.14852670.18635380.0274500
REASON.DebtCon0.13081250.16412800.0241762
YOJ0.04383460.05499840.0081013
JOB.Other0.03532060.04431610.0065278
JOB.Mgr0.02997190.03760520.0055393
JOB.ProfExe0.00.00.0
\n
\n
\n\n[tips]\nUse `model.explain()` to inspect the model.\n--\nUse `h2o.display.toggle_user_tips()` to switch on/off this section.
" + "text/html": [ + "
Model Details\n",
+       "=============\n",
+       "H2OGeneralizedLinearEstimator : Generalized Linear Modeling\n",
+       "Model Key: glmfit\n",
+       "
\n", + "
\n", + " \n", + "
\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
GLM Model: summary
familylinkregularizationlambda_searchnumber_of_predictors_totalnumber_of_active_predictorsnumber_of_iterationstraining_frame
binomiallogitElastic Net (alpha = 0.5, lambda = 9.244E-4 )nlambda = 100, lambda.max = 0.2455, lambda.min = 9.244E-4, lambda.1se = -1.0181793py_3_sid_a269
\n", + "
\n", + "
\n", + "
ModelMetricsBinomialGLM: glm\n",
+       "** Reported on train data. **\n",
+       "\n",
+       "MSE: 0.12410463474559494\n",
+       "RMSE: 0.3522848772592927\n",
+       "LogLoss: 0.40208197287665776\n",
+       "AUC: 0.7849415147958837\n",
+       "AUCPR: 0.5691248967099318\n",
+       "Gini: 0.5698830295917674\n",
+       "Null degrees of freedom: 3580\n",
+       "Residual degrees of freedom: 3563\n",
+       "Null deviance: 3597.150438148379\n",
+       "Residual deviance: 2879.711089742623\n",
+       "AIC: 2915.711089742623
\n", + "
\n", + " \n", + "
\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.2574171649920362
01ErrorRate
02446.0414.00.1448 (414.0/2860.0)
1314.0407.00.4355 (314.0/721.0)
Total2760.0821.00.2033 (728.0/3581.0)
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Maximum Metrics: Maximum metrics at their respective thresholds
metricthresholdvalueidx
max f10.25741720.5278859207.0
max f20.15229670.6261121279.0
max f0point50.38860580.5571256144.0
max accuracy0.56725920.835241690.0
max precision0.99880921.00.0
max recall0.00123771.0399.0
max specificity0.99880921.00.0
max absolute_mcc0.26618520.4020977203.0
max min_per_class_accuracy0.17827960.7066434258.0
max mean_per_class_accuracy0.21431510.7195026234.0
max tns0.99880922860.00.0
max fns0.9988092717.00.0
max fps0.00626272860.0398.0
max tps0.0012377721.0399.0
max tnr0.99880921.00.0
max fnr0.99880920.99445210.0
max fpr0.00626271.0398.0
max tpr0.00123771.0399.0
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Gains/Lift Table: Avg response rate: 20.13 %, avg score: 20.13 %
groupcumulative_data_fractionlower_thresholdliftcumulative_liftresponse_ratescorecumulative_response_ratecumulative_scorecapture_ratecumulative_capture_rategaincumulative_gainkolmogorov_smirnov
10.01005310.94668244.96671294.96671291.00.98050691.00.98050690.04993070.0499307396.6712899396.67128990.0499307
20.02010610.85234024.82874874.89773080.97222220.89879410.98611110.93965050.04854370.0984743382.8748652389.77307750.0981247
30.03015920.74795583.86299894.55282020.77777780.79840720.91666670.89256940.03883500.1373093286.2998921355.28201570.1341624
40.04021220.69123403.17317774.20790950.63888890.71524490.84722220.84823830.03190010.1692094217.3177685320.79095390.1615171
50.05026530.62673023.58707044.08374170.72222220.65589360.82222220.80976930.03606100.2052705258.7070427308.37417170.1940816
60.10025130.43024632.44173603.26502570.49162010.51997490.65738160.66527570.12205270.3273232144.1735950226.50257500.2843162
70.15023740.34591782.05327802.86186060.41340780.38385640.57620820.57164360.10263520.4299584105.3277958186.18605920.3502381
80.20022340.28442051.72031402.57687200.34636870.31190300.51882850.50679900.08599170.515950172.0313965157.68719640.3953207
90.30019550.20984411.33185602.16225270.26815640.24311470.43534880.41898600.13314840.649098533.1855973116.22526850.4368607
100.40016760.16957310.88790401.84388780.17877090.18825200.37124910.36134280.08876560.7378641-11.209601884.38878310.4228291
110.50013960.13803180.67980151.61120060.13687150.15253140.32439980.31960380.06796120.8058252-32.019851461.12005550.3827483
120.60011170.11462350.54106651.43292790.10893850.12588380.28850630.28733220.05409150.8599168-45.893351143.29278720.3253014
130.70008380.09457530.52719301.30358880.10614530.10477860.26246510.26126350.05270460.9126214-47.280701130.35887860.2661179
140.80005590.07361480.38845801.18923740.07821230.08411320.23944150.23912740.03883500.9514563-61.154200818.92373640.1895682
150.90002790.05217930.23584951.08333820.04748600.06276940.21811980.21953820.02357840.9750347-76.41505058.33382460.0939158
161.00.00111670.24972301.00.05027930.03751560.20134040.20134100.02496531.0-75.02770050.00.0
\n", + "
\n", + "
\n", + "
ModelMetricsBinomialGLM: glm\n",
+       "** Reported on validation data. **\n",
+       "\n",
+       "MSE: 0.1178330502946334\n",
+       "RMSE: 0.3432681900418875\n",
+       "LogLoss: 0.3827854417227382\n",
+       "AUC: 0.8173891686110769\n",
+       "AUCPR: 0.5982392824442148\n",
+       "Gini: 0.6347783372221538\n",
+       "Null degrees of freedom: 1195\n",
+       "Residual degrees of freedom: 1178\n",
+       "Null deviance: 1193.6689319811862\n",
+       "Residual deviance: 915.6227766007898\n",
+       "AIC: 951.6227766007898
\n", + "
\n", + " \n", + "
\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.3129932533175332
01ErrorRate
0863.095.00.0992 (95.0/958.0)
1107.0131.00.4496 (107.0/238.0)
Total970.0226.00.1689 (202.0/1196.0)
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Maximum Metrics: Maximum metrics at their respective thresholds
metricthresholdvalueidx
max f10.31299330.5646552155.0
max f20.18774630.6551476231.0
max f0point50.42961250.6079404107.0
max accuracy0.44384850.8461538105.0
max precision0.99398571.00.0
max recall0.00965811.0398.0
max specificity0.99398571.00.0
max absolute_mcc0.31299330.4602072155.0
max min_per_class_accuracy0.19540520.7478992225.0
max mean_per_class_accuracy0.19145400.7508026228.0
max tns0.9939857958.00.0
max fns0.9939857237.00.0
max fps0.0074980958.0399.0
max tps0.0096581238.0398.0
max tnr0.99398571.00.0
max fnr0.99398570.99579830.0
max fpr0.00749801.0399.0
max tpr0.00965811.0398.0
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Gains/Lift Table: Avg response rate: 19.90 %, avg score: 21.55 %
groupcumulative_data_fractionlower_thresholdliftcumulative_liftresponse_ratescorecumulative_response_ratecumulative_scorecapture_ratecumulative_capture_rategaincumulative_gainkolmogorov_smirnov
10.01003340.94228275.02521015.02521011.00.96981591.00.96981590.05042020.0504202402.5210084402.52100840.0504202
20.02006690.88594224.60644264.81582630.91666670.91332650.95833330.94157120.04621850.0966387360.6442577381.58263310.0955948
30.03010030.83338803.76890764.46685340.750.86625060.88888890.91646430.03781510.1344538276.8907563346.68534080.1302784
40.04013380.78485823.76890764.29236690.750.81540200.85416670.89119870.03781510.1722689276.8907563329.23669470.1649620
50.05016720.72119592.51260503.93641460.50.75059350.78333330.86307770.02521010.1974790151.2605042293.64145660.1839091
60.10033440.49470583.26638663.60140060.650.59841990.71666670.73074880.16386550.3613445226.6386555260.14005600.3258539
70.15050170.35818292.01008403.07096170.40.41308050.61111110.62485930.10084030.4621849101.0084034207.09617180.3891160
80.20066890.30020172.01008402.80574230.40.32742070.55833330.55049970.10084030.5630252101.0084034180.57422970.4523780
90.30016720.22368921.18240242.26764350.23529410.25779970.45125350.45347660.11764710.680672318.2402373126.76435480.4750355
100.40050170.17613871.04691881.96182520.20833330.19638350.39039670.38906910.10504200.78571434.691876896.18252310.4809126
110.50.14386730.54897251.68067230.10924370.15827190.33444820.34314120.05462180.8403361-45.102747068.06722690.4248873
120.60033440.12143810.50252101.48376680.10.13259180.29526460.30795190.05042020.8907563-49.747899248.37667660.3625726
130.69983280.09978170.50674391.34485910.10084030.11005590.26762250.27981620.05042020.9411765-49.325612634.48590910.3013017
140.80016720.07987200.25126051.20773070.050.08911780.24033440.25590410.02521010.9663866-74.873949620.77307410.2075139
150.89966560.05561840.21114331.09751340.04201680.06773740.21840150.23509390.02100840.9873950-78.88567199.75133550.1095244
161.00.00749800.12563031.00.0250.03975750.19899670.21549490.01260501.0-87.43697480.00.0
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Scoring History:
timestampdurationiterationlambdapredictorsdeviance_traindeviance_testalphaiterationstraining_rmsetraining_loglosstraining_r2training_auctraining_pr_auctraining_lifttraining_classification_errorvalidation_rmsevalidation_loglossvalidation_r2validation_aucvalidation_pr_aucvalidation_liftvalidation_classification_error
2023-10-05 18:17:37 0.000 sec1.25E011.00451000.99805090.5None
2023-10-05 18:17:37 0.061 sec3.22E020.99463550.98641270.5None
2023-10-05 18:17:37 0.101 sec5.2E020.98590350.97604180.5None
2023-10-05 18:17:37 0.130 sec7.19E020.97816970.96676990.5None
2023-10-05 18:17:37 0.164 sec9.17E030.96867840.95447530.5None
2023-10-05 18:17:37 0.201 sec11.15E030.95876760.94115320.5None
2023-10-05 18:17:37 0.220 sec13.14E030.95009530.92937090.5None
2023-10-05 18:17:37 0.268 sec15.13E030.94246030.91888440.5None
2023-10-05 18:17:37 0.295 sec17.12E050.93414310.90809210.5None
2023-10-05 18:17:37 0.351 sec19.11E050.92364490.89550930.5None
------------------------------------------------------------------------
2023-10-05 18:17:38 1.462 sec89.13E-2180.80459200.76564550.5None
2023-10-05 18:17:38 1.467 sec90.12E-2180.80445820.76561030.5None
2023-10-05 18:17:38 1.472 sec91.11E-2180.80434360.76558840.5None
2023-10-05 18:17:38 1.485 sec92.1E-2180.80424620.76557640.5None
2023-10-05 18:17:38 1.492 sec93.92E-3180.80416390.76557090.5None
2023-10-05 18:17:38 1.498 sec94.84E-3180.80409390.76557210.5None
2023-10-05 18:17:38 1.502 sec95.77E-3180.80403470.76557760.5None
2023-10-05 18:17:38 1.506 sec96.7E-3180.80398500.76558750.5None
2023-10-05 18:17:38 1.518 sec97.64E-3180.80394280.76559900.5None
2023-10-05 18:17:38 1.528 sec98.58E-3180.80390720.76561340.5980.35228490.40208200.22821680.78494150.56912494.96671290.20329520.34326820.38278540.26075740.81738920.59823935.02521010.1688963
\n", + "
\n", + "
[66 rows x 24 columns]
\n", + "
\n", + " \n", + "
\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Variable Importances:
variablerelative_importancescaled_importancepercentage
DELINQ0.79700531.00.1473004
JOB.Sales0.74784840.93832300.1382154
JOB.Office0.51091440.64104260.0944259
JOB.Self0.45713290.57356320.0844861
CLAGE0.44848340.56271070.0828875
DEBTINC0.44674760.56053280.0825667
DEROG0.42631940.53490160.0787912
NINQ0.30373090.38109020.0561347
VALUE0.24126000.30270810.0445890
MORTDUE0.23844870.29918080.0440695
LOAN0.21807680.27362030.0403044
REASON.HomeImp0.17976860.22555510.0332244
CLNO0.14852050.18634810.0274492
REASON.DebtCon0.13738420.17237550.0253910
YOJ0.04383450.05499900.0081014
JOB.Other0.03530930.04430250.0065258
JOB.Mgr0.02996150.03759260.0055374
JOB.ProfExe0.00.00.0
\n", + "
\n", + "
\n",
+       "\n",
+       "[tips]\n",
+       "Use `model.explain()` to inspect the model.\n",
+       "--\n",
+       "Use `h2o.display.toggle_user_tips()` to switch on/off this section.
" + ], + "text/plain": [ + "Model Details\n", + "=============\n", + "H2OGeneralizedLinearEstimator : Generalized Linear Modeling\n", + "Model Key: glmfit\n", + "\n", + "\n", + "GLM Model: summary\n", + " family link regularization lambda_search number_of_predictors_total number_of_active_predictors number_of_iterations training_frame\n", + "-- -------- ------ --------------------------------------------- ---------------------------------------------------------------------------- ---------------------------- ----------------------------- ---------------------- ----------------\n", + " binomial logit Elastic Net (alpha = 0.5, lambda = 9.244E-4 ) nlambda = 100, lambda.max = 0.2455, lambda.min = 9.244E-4, lambda.1se = -1.0 18 17 93 py_3_sid_a269\n", + "\n", + "ModelMetricsBinomialGLM: glm\n", + "** Reported on train data. **\n", + "\n", + "MSE: 0.12410463474559494\n", + "RMSE: 0.3522848772592927\n", + "LogLoss: 0.40208197287665776\n", + "AUC: 0.7849415147958837\n", + "AUCPR: 0.5691248967099318\n", + "Gini: 0.5698830295917674\n", + "Null degrees of freedom: 3580\n", + "Residual degrees of freedom: 3563\n", + "Null deviance: 3597.150438148379\n", + "Residual deviance: 2879.711089742623\n", + "AIC: 2915.711089742623\n", + "\n", + "Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.2574171649920362\n", + " 0 1 Error Rate\n", + "----- ---- --- ------- --------------\n", + "0 2446 414 0.1448 (414.0/2860.0)\n", + "1 314 407 0.4355 (314.0/721.0)\n", + "Total 2760 821 0.2033 (728.0/3581.0)\n", + "\n", + "Maximum Metrics: Maximum metrics at their respective thresholds\n", + "metric threshold value idx\n", + "--------------------------- ----------- -------- -----\n", + "max f1 0.257417 0.527886 207\n", + "max f2 0.152297 0.626112 279\n", + "max f0point5 0.388606 0.557126 144\n", + "max accuracy 0.567259 0.835242 90\n", + "max precision 0.998809 1 0\n", + "max recall 0.00123775 1 399\n", + "max specificity 0.998809 1 0\n", + "max absolute_mcc 0.266185 0.402098 203\n", + "max min_per_class_accuracy 0.17828 0.706643 258\n", + "max mean_per_class_accuracy 0.214315 0.719503 234\n", + "max tns 0.998809 2860 0\n", + "max fns 0.998809 717 0\n", + "max fps 0.00626272 2860 398\n", + "max tps 0.00123775 721 399\n", + "max tnr 0.998809 1 0\n", + "max fnr 0.998809 0.994452 0\n", + "max fpr 0.00626272 1 398\n", + "max tpr 0.00123775 1 399\n", + "\n", + "Gains/Lift Table: Avg response rate: 20.13 %, avg score: 20.13 %\n", + "group cumulative_data_fraction lower_threshold lift cumulative_lift response_rate score cumulative_response_rate cumulative_score capture_rate cumulative_capture_rate gain cumulative_gain kolmogorov_smirnov\n", + "------- -------------------------- ----------------- -------- ----------------- --------------- --------- -------------------------- ------------------ -------------- ------------------------- -------- ----------------- --------------------\n", + "1 0.0100531 0.946682 4.96671 4.96671 1 0.980507 1 0.980507 0.0499307 0.0499307 396.671 396.671 0.0499307\n", + "2 0.0201061 0.85234 4.82875 4.89773 0.972222 0.898794 0.986111 0.93965 0.0485437 0.0984743 382.875 389.773 0.0981247\n", + "3 0.0301592 0.747956 3.863 4.55282 0.777778 0.798407 0.916667 0.892569 0.038835 0.137309 286.3 355.282 0.134162\n", + "4 0.0402122 0.691234 3.17318 4.20791 0.638889 0.715245 0.847222 0.848238 0.0319001 0.169209 217.318 320.791 0.161517\n", + "5 0.0502653 0.62673 3.58707 4.08374 0.722222 0.655894 0.822222 0.809769 0.036061 0.20527 258.707 308.374 0.194082\n", + "6 0.100251 0.430246 2.44174 3.26503 0.49162 0.519975 0.657382 0.665276 0.122053 0.327323 144.174 226.503 0.284316\n", + "7 0.150237 0.345918 2.05328 2.86186 0.413408 0.383856 0.576208 0.571644 0.102635 0.429958 105.328 186.186 0.350238\n", + "8 0.200223 0.284421 1.72031 2.57687 0.346369 0.311903 0.518828 0.506799 0.0859917 0.51595 72.0314 157.687 0.395321\n", + "9 0.300195 0.209844 1.33186 2.16225 0.268156 0.243115 0.435349 0.418986 0.133148 0.649098 33.1856 116.225 0.436861\n", + "10 0.400168 0.169573 0.887904 1.84389 0.178771 0.188252 0.371249 0.361343 0.0887656 0.737864 -11.2096 84.3888 0.422829\n", + "11 0.50014 0.138032 0.679801 1.6112 0.136872 0.152531 0.3244 0.319604 0.0679612 0.805825 -32.0199 61.1201 0.382748\n", + "12 0.600112 0.114623 0.541066 1.43293 0.108939 0.125884 0.288506 0.287332 0.0540915 0.859917 -45.8934 43.2928 0.325301\n", + "13 0.700084 0.0945753 0.527193 1.30359 0.106145 0.104779 0.262465 0.261263 0.0527046 0.912621 -47.2807 30.3589 0.266118\n", + "14 0.800056 0.0736148 0.388458 1.18924 0.0782123 0.0841132 0.239442 0.239127 0.038835 0.951456 -61.1542 18.9237 0.189568\n", + "15 0.900028 0.0521793 0.235849 1.08334 0.047486 0.0627694 0.21812 0.219538 0.0235784 0.975035 -76.4151 8.33382 0.0939158\n", + "16 1 0.00111668 0.249723 1 0.0502793 0.0375156 0.20134 0.201341 0.0249653 1 -75.0277 0 0\n", + "\n", + "ModelMetricsBinomialGLM: glm\n", + "** Reported on validation data. **\n", + "\n", + "MSE: 0.1178330502946334\n", + "RMSE: 0.3432681900418875\n", + "LogLoss: 0.3827854417227382\n", + "AUC: 0.8173891686110769\n", + "AUCPR: 0.5982392824442148\n", + "Gini: 0.6347783372221538\n", + "Null degrees of freedom: 1195\n", + "Residual degrees of freedom: 1178\n", + "Null deviance: 1193.6689319811862\n", + "Residual deviance: 915.6227766007898\n", + "AIC: 951.6227766007898\n", + "\n", + "Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.3129932533175332\n", + " 0 1 Error Rate\n", + "----- --- --- ------- --------------\n", + "0 863 95 0.0992 (95.0/958.0)\n", + "1 107 131 0.4496 (107.0/238.0)\n", + "Total 970 226 0.1689 (202.0/1196.0)\n", + "\n", + "Maximum Metrics: Maximum metrics at their respective thresholds\n", + "metric threshold value idx\n", + "--------------------------- ----------- -------- -----\n", + "max f1 0.312993 0.564655 155\n", + "max f2 0.187746 0.655148 231\n", + "max f0point5 0.429613 0.60794 107\n", + "max accuracy 0.443849 0.846154 105\n", + "max precision 0.993986 1 0\n", + "max recall 0.00965813 1 398\n", + "max specificity 0.993986 1 0\n", + "max absolute_mcc 0.312993 0.460207 155\n", + "max min_per_class_accuracy 0.195405 0.747899 225\n", + "max mean_per_class_accuracy 0.191454 0.750803 228\n", + "max tns 0.993986 958 0\n", + "max fns 0.993986 237 0\n", + "max fps 0.00749797 958 399\n", + "max tps 0.00965813 238 398\n", + "max tnr 0.993986 1 0\n", + "max fnr 0.993986 0.995798 0\n", + "max fpr 0.00749797 1 399\n", + "max tpr 0.00965813 1 398\n", + "\n", + "Gains/Lift Table: Avg response rate: 19.90 %, avg score: 21.55 %\n", + "group cumulative_data_fraction lower_threshold lift cumulative_lift response_rate score cumulative_response_rate cumulative_score capture_rate cumulative_capture_rate gain cumulative_gain kolmogorov_smirnov\n", + "------- -------------------------- ----------------- -------- ----------------- --------------- --------- -------------------------- ------------------ -------------- ------------------------- -------- ----------------- --------------------\n", + "1 0.0100334 0.942283 5.02521 5.02521 1 0.969816 1 0.969816 0.0504202 0.0504202 402.521 402.521 0.0504202\n", + "2 0.0200669 0.885942 4.60644 4.81583 0.916667 0.913326 0.958333 0.941571 0.0462185 0.0966387 360.644 381.583 0.0955948\n", + "3 0.0301003 0.833388 3.76891 4.46685 0.75 0.866251 0.888889 0.916464 0.0378151 0.134454 276.891 346.685 0.130278\n", + "4 0.0401338 0.784858 3.76891 4.29237 0.75 0.815402 0.854167 0.891199 0.0378151 0.172269 276.891 329.237 0.164962\n", + "5 0.0501672 0.721196 2.51261 3.93641 0.5 0.750593 0.783333 0.863078 0.0252101 0.197479 151.261 293.641 0.183909\n", + "6 0.100334 0.494706 3.26639 3.6014 0.65 0.59842 0.716667 0.730749 0.163866 0.361345 226.639 260.14 0.325854\n", + "7 0.150502 0.358183 2.01008 3.07096 0.4 0.41308 0.611111 0.624859 0.10084 0.462185 101.008 207.096 0.389116\n", + "8 0.200669 0.300202 2.01008 2.80574 0.4 0.327421 0.558333 0.5505 0.10084 0.563025 101.008 180.574 0.452378\n", + "9 0.300167 0.223689 1.1824 2.26764 0.235294 0.2578 0.451253 0.453477 0.117647 0.680672 18.2402 126.764 0.475036\n", + "10 0.400502 0.176139 1.04692 1.96183 0.208333 0.196383 0.390397 0.389069 0.105042 0.785714 4.69188 96.1825 0.480913\n", + "11 0.5 0.143867 0.548973 1.68067 0.109244 0.158272 0.334448 0.343141 0.0546218 0.840336 -45.1027 68.0672 0.424887\n", + "12 0.600334 0.121438 0.502521 1.48377 0.1 0.132592 0.295265 0.307952 0.0504202 0.890756 -49.7479 48.3767 0.362573\n", + "13 0.699833 0.0997817 0.506744 1.34486 0.10084 0.110056 0.267622 0.279816 0.0504202 0.941176 -49.3256 34.4859 0.301302\n", + "14 0.800167 0.079872 0.251261 1.20773 0.05 0.0891178 0.240334 0.255904 0.0252101 0.966387 -74.8739 20.7731 0.207514\n", + "15 0.899666 0.0556184 0.211143 1.09751 0.0420168 0.0677374 0.218401 0.235094 0.0210084 0.987395 -78.8857 9.75134 0.109524\n", + "16 1 0.00749797 0.12563 1 0.025 0.0397575 0.198997 0.215495 0.012605 1 -87.437 0 0\n", + "\n", + "Scoring History: \n", + " timestamp duration iteration lambda predictors deviance_train deviance_test alpha iterations training_rmse training_logloss training_r2 training_auc training_pr_auc training_lift training_classification_error validation_rmse validation_logloss validation_r2 validation_auc validation_pr_auc validation_lift validation_classification_error\n", + "--- ------------------- ---------- ----------- -------- ------------ ------------------ ------------------ ------- ------------ ------------------ ------------------- ------------------- ------------------ ------------------ ----------------- ------------------------------- ------------------ -------------------- ------------------- ------------------ ------------------- ----------------- ---------------------------------\n", + " 2023-10-05 18:17:37 0.000 sec 1 .25E0 1 1.0045100357856405 0.9980509464725638 0.5\n", + " 2023-10-05 18:17:37 0.061 sec 3 .22E0 2 0.9946354860817245 0.9864126793254803 0.5\n", + " 2023-10-05 18:17:37 0.101 sec 5 .2E0 2 0.9859035403430406 0.9760418435888577 0.5\n", + " 2023-10-05 18:17:37 0.130 sec 7 .19E0 2 0.9781697250290142 0.9667699237562071 0.5\n", + " 2023-10-05 18:17:37 0.164 sec 9 .17E0 3 0.9686784218176835 0.9544752889432686 0.5\n", + " 2023-10-05 18:17:37 0.201 sec 11 .15E0 3 0.9587676036112146 0.9411531794664825 0.5\n", + " 2023-10-05 18:17:37 0.220 sec 13 .14E0 3 0.9500952786201636 0.9293708568093554 0.5\n", + " 2023-10-05 18:17:37 0.268 sec 15 .13E0 3 0.9424603199349196 0.918884378267228 0.5\n", + " 2023-10-05 18:17:37 0.295 sec 17 .12E0 5 0.934143096516142 0.9080921324097252 0.5\n", + " 2023-10-05 18:17:37 0.351 sec 19 .11E0 5 0.9236448771247918 0.8955093341627816 0.5\n", + "--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---\n", + " 2023-10-05 18:17:38 1.462 sec 89 .13E-2 18 0.8045920176677348 0.7656454925772931 0.5\n", + " 2023-10-05 18:17:38 1.467 sec 90 .12E-2 18 0.8044581526528738 0.7656102932355412 0.5\n", + " 2023-10-05 18:17:38 1.472 sec 91 .11E-2 18 0.804343644390629 0.7655884396559107 0.5\n", + " 2023-10-05 18:17:38 1.485 sec 92 .1E-2 18 0.804246194799186 0.7655763506822736 0.5\n", + " 2023-10-05 18:17:38 1.492 sec 93 .92E-3 18 0.8041639457533155 0.7655708834454764 0.5\n", + " 2023-10-05 18:17:38 1.498 sec 94 .84E-3 18 0.8040939470898276 0.7655720881120318 0.5\n", + " 2023-10-05 18:17:38 1.502 sec 95 .77E-3 18 0.8040347348308917 0.7655776424675973 0.5\n", + " 2023-10-05 18:17:38 1.506 sec 96 .7E-3 18 0.8039850013216244 0.7655874603666687 0.5\n", + " 2023-10-05 18:17:38 1.518 sec 97 .64E-3 18 0.8039427775722173 0.7655990044572286 0.5\n", + " 2023-10-05 18:17:38 1.528 sec 98 .58E-3 18 0.8039071637538451 0.7656134388669195 0.5 98 0.3522848772592927 0.40208197287665776 0.22821675700859534 0.7849415147958837 0.5691248967099318 4.966712898751734 0.20329516894722144 0.3432681900418875 0.3827854417227382 0.26075735307166414 0.8173891686110769 0.5982392824442148 5.025210084033613 0.1688963210702341\n", + "[66 rows x 24 columns]\n", + "\n", + "\n", + "Variable Importances: \n", + "variable relative_importance scaled_importance percentage\n", + "-------------- --------------------- ------------------- ------------\n", + "DELINQ 0.797005 1 0.1473\n", + "JOB.Sales 0.747848 0.938323 0.138215\n", + "JOB.Office 0.510914 0.641043 0.0944259\n", + "JOB.Self 0.457133 0.573563 0.0844861\n", + "CLAGE 0.448483 0.562711 0.0828875\n", + "DEBTINC 0.446748 0.560533 0.0825667\n", + "DEROG 0.426319 0.534902 0.0787912\n", + "NINQ 0.303731 0.38109 0.0561347\n", + "VALUE 0.24126 0.302708 0.044589\n", + "MORTDUE 0.238449 0.299181 0.0440695\n", + "LOAN 0.218077 0.27362 0.0403044\n", + "REASON.HomeImp 0.179769 0.225555 0.0332244\n", + "CLNO 0.14852 0.186348 0.0274492\n", + "REASON.DebtCon 0.137384 0.172376 0.025391\n", + "YOJ 0.0438345 0.054999 0.00810138\n", + "JOB.Other 0.0353093 0.0443025 0.00652578\n", + "JOB.Mgr 0.0299615 0.0375926 0.0055374\n", + "JOB.ProfExe 0 0 0\n", + "\n", + "[tips]\n", + "Use `model.explain()` to inspect the model.\n", + "--\n", + "Use `h2o.display.toggle_user_tips()` to switch on/off this section." + ] }, "execution_count": 7, "metadata": {}, @@ -243,7 +2267,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[[0.551348008992684, 0.8486897717666948]]\n" + "[[0.5513512979207219, 0.8486897717666948]]\n" ] } ], @@ -253,6 +2277,15 @@ "print(glm_performance.accuracy())" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "glm.pre" + ] + }, { "cell_type": "markdown", "metadata": { @@ -321,14 +2354,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "inputVar.json was successfully written and saved to ~\\examples\\data\\hmeqModels\\H2OBinaryGLM\\inputVar.json\n", - "inputVar.json was successfully written and saved to ~\\examples\\data\\hmeqModels\\H2OMOJOGLM\\inputVar.json\n", - "outputVar.json was successfully written and saved to ~\\examples\\data\\hmeqModels\\H2OBinaryGLM\\outputVar.json\n", - "outputVar.json was successfully written and saved to ~\\examples\\data\\hmeqModels\\H2OMOJOGLM\\outputVar.json\n", - "ModelProperties.json was successfully written and saved to ~\\examples\\data\\hmeqModels\\H2OBinaryGLM\\ModelProperties.json\n", - "ModelProperties.json was successfully written and saved to ~\\examples\\data\\hmeqModels\\H2OMOJOGLM\\ModelProperties.json\n", - "fileMetadata.json was successfully written and saved to ~\\examples\\data\\hmeqModels\\H2OBinaryGLM\\fileMetadata.json\n", - "fileMetadata.json was successfully written and saved to ~\\examples\\data\\hmeqModels\\H2OMOJOGLM\\fileMetadata.json\n" + "inputVar.json was successfully written and saved to /Users/dalmoo/Documents/GitHub/python-sasctl/examples/data/hmeqModels/H2OBinaryGLM/inputVar.json\n", + "inputVar.json was successfully written and saved to /Users/dalmoo/Documents/GitHub/python-sasctl/examples/data/hmeqModels/H2OMOJOGLM/inputVar.json\n", + "outputVar.json was successfully written and saved to /Users/dalmoo/Documents/GitHub/python-sasctl/examples/data/hmeqModels/H2OBinaryGLM/outputVar.json\n", + "outputVar.json was successfully written and saved to /Users/dalmoo/Documents/GitHub/python-sasctl/examples/data/hmeqModels/H2OMOJOGLM/outputVar.json\n", + "ModelProperties.json was successfully written and saved to /Users/dalmoo/Documents/GitHub/python-sasctl/examples/data/hmeqModels/H2OBinaryGLM/ModelProperties.json\n", + "ModelProperties.json was successfully written and saved to /Users/dalmoo/Documents/GitHub/python-sasctl/examples/data/hmeqModels/H2OMOJOGLM/ModelProperties.json\n", + "fileMetadata.json was successfully written and saved to /Users/dalmoo/Documents/GitHub/python-sasctl/examples/data/hmeqModels/H2OBinaryGLM/fileMetadata.json\n", + "fileMetadata.json was successfully written and saved to /Users/dalmoo/Documents/GitHub/python-sasctl/examples/data/hmeqModels/H2OMOJOGLM/fileMetadata.json\n" ] } ], @@ -351,7 +2384,7 @@ " model_name=model_prefix + \"_binary\",\n", " model_desc=\"Binary H2O model.\",\n", " target_variable=y,\n", - " target_values=[\"1\", \"0\"],\n", + " target_values=[\"0\", \"1\"],\n", " json_path=binary_folder,\n", " modeler=\"sasdemo\"\n", ")\n", @@ -359,7 +2392,7 @@ " model_name=model_prefix + \"_mojo\",\n", " model_desc=\"MOJO H2O model.\",\n", " target_variable=y,\n", - " target_values=[\"1\", \"0\"],\n", + " target_values=[\"0\", \"1\"],\n", " json_path=mojo_folder,\n", " modeler=\"sasdemo\"\n", ")\n", @@ -435,36 +2468,45 @@ }, { "cell_type": "markdown", - "source": [ - "### Run a Score Test in SAS Model Manager" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "### Run a Score Test in SAS Model Manager" + ] }, { "cell_type": "code", "execution_count": 15, - "outputs": [], - "source": [ - "# Publish the model to the SAS Microanalytic Score destination in SAS Model Manager\n", - "module = publish_model(mojo_model[0], \"maslocal\", name=\"HMEQMOJO_publish\", replace=True)" - ], "metadata": { - "collapsed": false, "ExecuteTime": { "end_time": "2023-06-14T17:18:16.283114900Z", "start_time": "2023-06-14T17:18:08.890088300Z" - } - } + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "# Publish the model to the SAS Microanalytic Score destination in SAS Model Manager\n", + "module = publish_model(mojo_model[0], \"maslocal\", name=\"HMEQMOJO_publish\", replace=True)" + ] }, { "cell_type": "code", "execution_count": 16, + "metadata": { + "ExecuteTime": { + "end_time": "2023-06-14T17:18:34.236925200Z", + "start_time": "2023-06-14T17:18:34.209029800Z" + }, + "collapsed": false + }, "outputs": [ { "data": { - "text/plain": "" + "text/plain": [ + "" + ] }, "execution_count": 16, "metadata": {}, @@ -474,18 +2516,18 @@ "source": [ "# Instantiate a API call logger to visualize score calls in realtime\n", "sess.add_stderr_logger(level=20)" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-06-14T17:18:34.236925200Z", - "start_time": "2023-06-14T17:18:34.209029800Z" - } - } + ] }, { "cell_type": "code", "execution_count": 17, + "metadata": { + "ExecuteTime": { + "end_time": "2023-06-14T17:18:47.064393600Z", + "start_time": "2023-06-14T17:18:37.850162200Z" + }, + "collapsed": false + }, "outputs": [ { "name": "stderr", @@ -521,18 +2563,18 @@ "# Step through the rows of data and collect the score from SAS Microanalytic Score publish destination\n", "for index, row in X.iterrows():\n", " result.append(module.score(row))" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-06-14T17:18:47.064393600Z", - "start_time": "2023-06-14T17:18:37.850162200Z" - } - } + ] }, { "cell_type": "code", "execution_count": 18, + "metadata": { + "ExecuteTime": { + "end_time": "2023-06-14T17:19:21.811834300Z", + "start_time": "2023-06-14T17:19:21.772752700Z" + }, + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -554,33 +2596,23 @@ "source": [ "# Scoring results\n", "pp.pprint(result)" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-06-14T17:19:21.811834300Z", - "start_time": "2023-06-14T17:19:21.772752700Z" - } - } + ] }, { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [], "metadata": { "collapsed": false - } + }, + "outputs": [], + "source": [] } ], "metadata": { - "interpreter": { - "hash": "f9708d3f38eeab835578f0695c8890716ee809285281a28db6e379a5abca1310" - }, "kernelspec": { - "display_name": "dev-py38", + "display_name": "yeehaw", "language": "python", - "name": "dev-py38" + "name": "python3" }, "language_info": { "codemirror_mode": { diff --git a/examples/pzmm_tensorflow_keras_model_import.ipynb b/examples/pzmm_tensorflow_keras_model_import.ipynb index df0fa399..e45e702b 100644 --- a/examples/pzmm_tensorflow_keras_model_import.ipynb +++ b/examples/pzmm_tensorflow_keras_model_import.ipynb @@ -753,7 +753,7 @@ " input_data=x, # What does example input data look like?\n", " predict_method=[model.predict, [int, int]], # What is the predict method and what does it return?\n", " overwrite_model=True, # Overwrite model if it arleady exists\n", - " target_values=[\"1\", \"0\"], # What are the expecte values of the target variable?\n", + " target_values=[\"0\", \"1\"], # What are the expecte values of the target variable?\n", " score_metrics=score_metrics, # What are the output variables?\n", " model_file_name = model_prefix + \".h5\", # How was the model file serialized?\n", " missing_values = True, # Does the data include missing values?\n", From cb4975b8070bd7e7e0d0442c2229eb05f8cdf41f Mon Sep 17 00:00:00 2001 From: Scott Lindauer Date: Mon, 23 Oct 2023 14:44:53 -0400 Subject: [PATCH 4/7] Swap argument order for _binary_target so that h2o is not always True --- src/sasctl/pzmm/write_score_code.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sasctl/pzmm/write_score_code.py b/src/sasctl/pzmm/write_score_code.py index c56c4c50..1a223126 100644 --- a/src/sasctl/pzmm/write_score_code.py +++ b/src/sasctl/pzmm/write_score_code.py @@ -1275,8 +1275,8 @@ def _binary_target( target_values: List[str], returns: List[Any], threshold: Optional[float] = None, - h2o_model: Optional[bool] = None, target_index: Optional[int] = 1, + h2o_model: Optional[bool] = None, ) -> None: """ Handle binary model prediction outputs. From cd2082b974294bdf452c09d032d9afe9eccad9e1 Mon Sep 17 00:00:00 2001 From: djm21 Date: Tue, 24 Oct 2023 04:27:16 -0500 Subject: [PATCH 5/7] Tentative updates to CHANGELOG.md and __init__.py, as well as fixing example in write_score_code.py. --- CHANGELOG.md | 9 +++++++++ src/sasctl/__init__.py | 2 +- src/sasctl/pzmm/write_score_code.py | 15 ++++++++++----- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cf626aa7..70af17f1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,15 @@ Unreleased - Add `model_info` class to better capture model information. - Test `/examples` Jupyter notebooks within normal test suite. +v1.10.1 (2023-08-24) +---------- +**Improvements** +- Introduced ability to specify the target index of a binary model when creating score code. + - index can be specified in `pzmm.import_model.ImportModel.import_model()` + +**Bugfixes** +- Reworked `write_score_code.py` to allow for proper execution of single line scoring. + v1.10 (2023-08-31) ---------- **Improvements** diff --git a/src/sasctl/__init__.py b/src/sasctl/__init__.py index 15f71c6d..e0216d92 100644 --- a/src/sasctl/__init__.py +++ b/src/sasctl/__init__.py @@ -4,7 +4,7 @@ # Copyright © 2019, SAS Institute Inc., Cary, NC, USA. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 -__version__ = "1.10.0" +__version__ = "1.10.1" __author__ = "SAS" __credits__ = [ "Yi Jian Ching", diff --git a/src/sasctl/pzmm/write_score_code.py b/src/sasctl/pzmm/write_score_code.py index 1a223126..056097c6 100644 --- a/src/sasctl/pzmm/write_score_code.py +++ b/src/sasctl/pzmm/write_score_code.py @@ -1453,11 +1453,16 @@ def _binary_target( ) """ if input_array.shape[0] == 1: - return prediction[1][0], float(prediction[1][2]) + if prediction[1][1] > 0.5: + Classification = '1' + else: + Classification = '0' + return EM_CLASSIFICATION, float(prediction[1][1]) else: - output_table = prediction.drop(prediction.columns[1], axis=1) - output_table.columns = ['Classification', 'Probability'] - return output_table + output_table = prediction.drop(prediction.columns[2], axis=1) + classifications = np.where(prediction[prediction.columns[1]] > 0.5, '0', '1') + output_table.columns = ['EM_CLASSIFICATION', 'EM_EVENTPROBABILITY'] + output_table['EM_CLASSIFICATION'] = classifications """ # Calculate the classification; return the classification and probability elif sum(returns) == 0 and len(returns) == 1: @@ -1787,7 +1792,7 @@ def _nonbinary_targets( elif len(returns) == 1: cls.score_code += ( f"{'':4}if input_array.shape[0] == 1:\n" - f"{'':8}return prediction[0]\n" + f"{'':8}return prediction[0][0]\n" f"{'':4}else:\n" f"{'':8}return pd.DataFrame({{'{metrics}': prediction}})" ) From 31e1bbcab1445ed6e9b9199564b9ffba73859b95 Mon Sep 17 00:00:00 2001 From: djm21 Date: Wed, 25 Oct 2023 13:34:10 -0500 Subject: [PATCH 6/7] minor updates to changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 70af17f1..6f4949e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,9 +10,11 @@ v1.10.1 (2023-08-24) **Improvements** - Introduced ability to specify the target index of a binary model when creating score code. - index can be specified in `pzmm.import_model.ImportModel.import_model()` + - Relevant examples updated to include target_index. **Bugfixes** - Reworked `write_score_code.py` to allow for proper execution of single line scoring. +- Added template files for `assess_model_bias.py` to allow for proper execution v1.10 (2023-08-31) ---------- From bb40fa0cf46d4a329f4e5f35379f5cf7ab84a55f Mon Sep 17 00:00:00 2001 From: djm21 Date: Wed, 25 Oct 2023 14:01:58 -0500 Subject: [PATCH 7/7] unchange __init__.py to allow for prep for release commit --- src/sasctl/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sasctl/__init__.py b/src/sasctl/__init__.py index e0216d92..15f71c6d 100644 --- a/src/sasctl/__init__.py +++ b/src/sasctl/__init__.py @@ -4,7 +4,7 @@ # Copyright © 2019, SAS Institute Inc., Cary, NC, USA. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 -__version__ = "1.10.1" +__version__ = "1.10.0" __author__ = "SAS" __credits__ = [ "Yi Jian Ching",