Merge pull request #3 from getml/parameter-revision

patched score function
getml · Nov 21, 2024 · df9ad57 · df9ad57
2 parents 7bb016b + 4caf022
commit df9ad57
Show file tree

Hide file tree

Showing 2 changed files with 58 additions and 14 deletions.
diff --git a/mlflow/getml/__init__.py b/mlflow/getml/__init__.py
@@ -305,9 +305,7 @@ def _load_model(path):
     import getml
     import shutil
 
-    import pdb
 
-    pdb.set_trace()
     with open(os.path.join(path, "getml.yaml")) as f:
         getml_settings = yaml.safe_load(f.read())
 

diff --git a/mlflow/getml/autologging.py b/mlflow/getml/autologging.py
@@ -9,6 +9,7 @@
 from mlflow.utils.autologging_utils.client import MlflowAutologgingQueueingClient
 
 
+
 @dataclass
 class LogInfo:
     params: dict[str, Any] = field(default_factory=dict)
@@ -50,7 +51,7 @@ def _extract_pipeline_informations(getml_pipeline: getml.Pipeline) -> LogInfo:
             "feature_learners",
             "feature_selectors",
             "predictors",
-            "loss_function",
+            "share_selected_features",
         )
         pipeline_informations = {}
 
@@ -63,15 +64,27 @@ def _extract_pipeline_informations(getml_pipeline: getml.Pipeline) -> LogInfo:
                         for field in fields(v):
                             field_value = getattr(v, field.name)
                             if isinstance(field_value, (frozenset, set)):
-                                field_value = json.dumps(list(field_value))
+                                try:
+                                    field_value = json.dumps(list(field_value))
+                                except: 
+                                    print("Error in converting frozenset to list")
+                            elif isinstance(field_value, getml.feature_learning.FastProp):
+                                field_value = field_value.__class__.__name__
                             elif not isinstance(field_value, str):
-                                field_value = json.dumps(field_value)
+                                try:
+                                    field_value = json.dumps(field_value)
+                                except: 
+                                    print("Error in converting field_value to json")
+                                    print(field_value)
+
                             pipeline_informations[f"{parameter_name}.{name}.{field.name}"] = (
                                 field_value
                             )
-            # else:
-            #    value_name = values.__class__.__name__
-            #    pipeline_informations[parameter_name] = value_name
+            elif isinstance(values, str):
+                pipeline_informations[parameter_name] = values
+            else:
+               value_name = values.__class__.__name__
+               pipeline_informations[parameter_name] = value_name
         tags = [str(t) for t in getml_pipeline.tags]
         return LogInfo(params=pipeline_informations, tags=dict(zip(tags, tags)))
 
@@ -85,14 +98,14 @@ def _extract_fitted_pipeline_informations(getml_pipeline: getml.Pipeline) -> Log
         scores = getml_pipeline.scores
 
         if getml_pipeline.is_classification:
-            metrics["auc"] = scores.auc
-            metrics["accuracy"] = scores.accuracy
-            metrics["cross_entropy"] = scores.cross_entropy
+            metrics["train_auc"] = round(scores.auc,2)
+            metrics["train_accuracy"] = round(scores.accuracy, 2)
+            metrics["train_cross_entropy"] = round(scores.cross_entropy, 4)
 
         if getml_pipeline.is_regression:
-            metrics["mae"] = scores.mae
-            metrics["rmse"] = scores.rmse
-            metrics["rsquared"] = scores.rsquared
+            metrics["train_mae"] = scores.mae
+            metrics["train_rmse"] = scores.rmse
+            metrics["train_rsquared"] = round(scores.rsquared, 2)
 
         # for feature in getml_pipeline.features:
         #     metrics[f"{feature.name}.importance"] = json.dumps(feature.importance)
@@ -150,6 +163,10 @@ def patched_fit_mlflow(original, self: getml.Pipeline, *args, **kwargs):
         assert (active_run := mlflow.active_run())
         run_id = active_run.info.run_id
         pipeline_log_info = _extract_pipeline_informations(self)
+        # with open("my_dict.json", "w") as f:
+        #     json.dump(pipeline_log_info.params, f)
+        # mlflow.log_artifact("my_dict.json")
+        # mlflow.log_dict(pipeline_log_info.params, 'params.json')
         autologging_client.log_params(
             run_id=run_id,
             params=pipeline_log_info.params,
@@ -186,6 +203,27 @@ def patched_fit_mlflow(original, self: getml.Pipeline, *args, **kwargs):
 
         autologging_client.flush(synchronous=True)
         return fit_output
+
+    def patched_score_method(original, self: getml.Pipeline, *args, **kwargs):
+
+        target = self.data_model.population.roles.target[0]
+        pop_df = args[0].population.to_pandas()
+        pop_df["predictions"] = self.predict(*args)
+        pop_df['predictions'] = pop_df.round({'predictions': 0})['predictions'].astype(bool)
+        pop_df[target] = pop_df[target].astype(bool)
+
+        mlflow.evaluate(
+            data = pop_df,
+            targets=target,
+            predictions="predictions",
+            model_type=["regressor" if self.is_regression else "classifier"][0],
+            evaluators=["default"],
+        )
+
+        score_output = original(self, *args, **kwargs)
+
+        return score_output
+
 
     _patch_pipeline_method(
         flavor_name=flavor_name,
@@ -194,3 +232,11 @@ def patched_fit_mlflow(original, self: getml.Pipeline, *args, **kwargs):
         patched_fn=patched_fit_mlflow,
         manage_run=True,
     )
+
+    _patch_pipeline_method(
+        flavor_name=flavor_name,
+        class_def=getml.pipeline.Pipeline,
+        func_name="score",
+        patched_fn=patched_score_method,
+        manage_run=True,
+    )