diff --git a/mlflow/getml/__init__.py b/mlflow/getml/__init__.py index 72e6f2752d52c..391915ff7dc88 100644 --- a/mlflow/getml/__init__.py +++ b/mlflow/getml/__init__.py @@ -305,9 +305,7 @@ def _load_model(path): import getml import shutil - import pdb - pdb.set_trace() with open(os.path.join(path, "getml.yaml")) as f: getml_settings = yaml.safe_load(f.read()) diff --git a/mlflow/getml/autologging.py b/mlflow/getml/autologging.py index f5f64d9dc0adc..f7b34d268235b 100644 --- a/mlflow/getml/autologging.py +++ b/mlflow/getml/autologging.py @@ -9,6 +9,7 @@ from mlflow.utils.autologging_utils.client import MlflowAutologgingQueueingClient + @dataclass class LogInfo: params: dict[str, Any] = field(default_factory=dict) @@ -50,7 +51,7 @@ def _extract_pipeline_informations(getml_pipeline: getml.Pipeline) -> LogInfo: "feature_learners", "feature_selectors", "predictors", - "loss_function", + "share_selected_features", ) pipeline_informations = {} @@ -63,15 +64,27 @@ def _extract_pipeline_informations(getml_pipeline: getml.Pipeline) -> LogInfo: for field in fields(v): field_value = getattr(v, field.name) if isinstance(field_value, (frozenset, set)): - field_value = json.dumps(list(field_value)) + try: + field_value = json.dumps(list(field_value)) + except: + print("Error in converting frozenset to list") + elif isinstance(field_value, getml.feature_learning.FastProp): + field_value = field_value.__class__.__name__ elif not isinstance(field_value, str): - field_value = json.dumps(field_value) + try: + field_value = json.dumps(field_value) + except: + print("Error in converting field_value to json") + print(field_value) + pipeline_informations[f"{parameter_name}.{name}.{field.name}"] = ( field_value ) - # else: - # value_name = values.__class__.__name__ - # pipeline_informations[parameter_name] = value_name + elif isinstance(values, str): + pipeline_informations[parameter_name] = values + else: + value_name = values.__class__.__name__ + pipeline_informations[parameter_name] = value_name tags = [str(t) for t in getml_pipeline.tags] return LogInfo(params=pipeline_informations, tags=dict(zip(tags, tags))) @@ -85,14 +98,14 @@ def _extract_fitted_pipeline_informations(getml_pipeline: getml.Pipeline) -> Log scores = getml_pipeline.scores if getml_pipeline.is_classification: - metrics["auc"] = scores.auc - metrics["accuracy"] = scores.accuracy - metrics["cross_entropy"] = scores.cross_entropy + metrics["train_auc"] = round(scores.auc,2) + metrics["train_accuracy"] = round(scores.accuracy, 2) + metrics["train_cross_entropy"] = round(scores.cross_entropy, 4) if getml_pipeline.is_regression: - metrics["mae"] = scores.mae - metrics["rmse"] = scores.rmse - metrics["rsquared"] = scores.rsquared + metrics["train_mae"] = scores.mae + metrics["train_rmse"] = scores.rmse + metrics["train_rsquared"] = round(scores.rsquared, 2) # for feature in getml_pipeline.features: # metrics[f"{feature.name}.importance"] = json.dumps(feature.importance) @@ -150,6 +163,10 @@ def patched_fit_mlflow(original, self: getml.Pipeline, *args, **kwargs): assert (active_run := mlflow.active_run()) run_id = active_run.info.run_id pipeline_log_info = _extract_pipeline_informations(self) + # with open("my_dict.json", "w") as f: + # json.dump(pipeline_log_info.params, f) + # mlflow.log_artifact("my_dict.json") + # mlflow.log_dict(pipeline_log_info.params, 'params.json') autologging_client.log_params( run_id=run_id, params=pipeline_log_info.params, @@ -186,6 +203,27 @@ def patched_fit_mlflow(original, self: getml.Pipeline, *args, **kwargs): autologging_client.flush(synchronous=True) return fit_output + + def patched_score_method(original, self: getml.Pipeline, *args, **kwargs): + + target = self.data_model.population.roles.target[0] + pop_df = args[0].population.to_pandas() + pop_df["predictions"] = self.predict(*args) + pop_df['predictions'] = pop_df.round({'predictions': 0})['predictions'].astype(bool) + pop_df[target] = pop_df[target].astype(bool) + + mlflow.evaluate( + data = pop_df, + targets=target, + predictions="predictions", + model_type=["regressor" if self.is_regression else "classifier"][0], + evaluators=["default"], + ) + + score_output = original(self, *args, **kwargs) + + return score_output + _patch_pipeline_method( flavor_name=flavor_name, @@ -194,3 +232,11 @@ def patched_fit_mlflow(original, self: getml.Pipeline, *args, **kwargs): patched_fn=patched_fit_mlflow, manage_run=True, ) + + _patch_pipeline_method( + flavor_name=flavor_name, + class_def=getml.pipeline.Pipeline, + func_name="score", + patched_fn=patched_score_method, + manage_run=True, + )