Skip to content

Commit

Permalink
Merge pull request #3 from getml/parameter-revision
Browse files Browse the repository at this point in the history
patched score function
  • Loading branch information
jan-meyer-1986 authored Nov 21, 2024
2 parents 7bb016b + 4caf022 commit df9ad57
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 14 deletions.
2 changes: 0 additions & 2 deletions mlflow/getml/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,9 +305,7 @@ def _load_model(path):
import getml

Check failure on line 305 in mlflow/getml/__init__.py

View workflow job for this annotation

GitHub Actions / lint

[*] Import block is un-sorted or un-formatted. Run `ruff --fix .` or comment `/autoformat` to fix this error.
import shutil

import pdb

pdb.set_trace()
with open(os.path.join(path, "getml.yaml")) as f:
getml_settings = yaml.safe_load(f.read())

Expand Down
70 changes: 58 additions & 12 deletions mlflow/getml/autologging.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from mlflow.utils.autologging_utils.client import MlflowAutologgingQueueingClient



@dataclass
class LogInfo:
params: dict[str, Any] = field(default_factory=dict)
Expand Down Expand Up @@ -50,7 +51,7 @@ def _extract_pipeline_informations(getml_pipeline: getml.Pipeline) -> LogInfo:
"feature_learners",
"feature_selectors",
"predictors",
"loss_function",
"share_selected_features",
)
pipeline_informations = {}

Expand All @@ -63,15 +64,27 @@ def _extract_pipeline_informations(getml_pipeline: getml.Pipeline) -> LogInfo:
for field in fields(v):
field_value = getattr(v, field.name)
if isinstance(field_value, (frozenset, set)):
field_value = json.dumps(list(field_value))
try:
field_value = json.dumps(list(field_value))
except:
print("Error in converting frozenset to list")
elif isinstance(field_value, getml.feature_learning.FastProp):
field_value = field_value.__class__.__name__
elif not isinstance(field_value, str):
field_value = json.dumps(field_value)
try:
field_value = json.dumps(field_value)
except:
print("Error in converting field_value to json")
print(field_value)

pipeline_informations[f"{parameter_name}.{name}.{field.name}"] = (
field_value
)
# else:
# value_name = values.__class__.__name__
# pipeline_informations[parameter_name] = value_name
elif isinstance(values, str):
pipeline_informations[parameter_name] = values
else:
value_name = values.__class__.__name__
pipeline_informations[parameter_name] = value_name
tags = [str(t) for t in getml_pipeline.tags]
return LogInfo(params=pipeline_informations, tags=dict(zip(tags, tags)))

Expand All @@ -85,14 +98,14 @@ def _extract_fitted_pipeline_informations(getml_pipeline: getml.Pipeline) -> Log
scores = getml_pipeline.scores

if getml_pipeline.is_classification:
metrics["auc"] = scores.auc
metrics["accuracy"] = scores.accuracy
metrics["cross_entropy"] = scores.cross_entropy
metrics["train_auc"] = round(scores.auc,2)
metrics["train_accuracy"] = round(scores.accuracy, 2)
metrics["train_cross_entropy"] = round(scores.cross_entropy, 4)

if getml_pipeline.is_regression:
metrics["mae"] = scores.mae
metrics["rmse"] = scores.rmse
metrics["rsquared"] = scores.rsquared
metrics["train_mae"] = scores.mae
metrics["train_rmse"] = scores.rmse
metrics["train_rsquared"] = round(scores.rsquared, 2)

# for feature in getml_pipeline.features:
# metrics[f"{feature.name}.importance"] = json.dumps(feature.importance)
Expand Down Expand Up @@ -150,6 +163,10 @@ def patched_fit_mlflow(original, self: getml.Pipeline, *args, **kwargs):
assert (active_run := mlflow.active_run())
run_id = active_run.info.run_id
pipeline_log_info = _extract_pipeline_informations(self)
# with open("my_dict.json", "w") as f:
# json.dump(pipeline_log_info.params, f)
# mlflow.log_artifact("my_dict.json")
# mlflow.log_dict(pipeline_log_info.params, 'params.json')
autologging_client.log_params(
run_id=run_id,
params=pipeline_log_info.params,
Expand Down Expand Up @@ -186,6 +203,27 @@ def patched_fit_mlflow(original, self: getml.Pipeline, *args, **kwargs):

autologging_client.flush(synchronous=True)
return fit_output

def patched_score_method(original, self: getml.Pipeline, *args, **kwargs):

target = self.data_model.population.roles.target[0]
pop_df = args[0].population.to_pandas()
pop_df["predictions"] = self.predict(*args)
pop_df['predictions'] = pop_df.round({'predictions': 0})['predictions'].astype(bool)
pop_df[target] = pop_df[target].astype(bool)

mlflow.evaluate(
data = pop_df,
targets=target,
predictions="predictions",
model_type=["regressor" if self.is_regression else "classifier"][0],
evaluators=["default"],
)

score_output = original(self, *args, **kwargs)

return score_output


_patch_pipeline_method(
flavor_name=flavor_name,
Expand All @@ -194,3 +232,11 @@ def patched_fit_mlflow(original, self: getml.Pipeline, *args, **kwargs):
patched_fn=patched_fit_mlflow,
manage_run=True,
)

_patch_pipeline_method(
flavor_name=flavor_name,
class_def=getml.pipeline.Pipeline,
func_name="score",
patched_fn=patched_score_method,
manage_run=True,
)

0 comments on commit df9ad57

Please sign in to comment.