Skip to content

Commit

Permalink
Issue #21: Diabetest Mini MLOps notebooks: dded alias to model URI fo…
Browse files Browse the repository at this point in the history
…r scoring plus overall updates
  • Loading branch information
amesar committed Dec 5, 2023
1 parent 6549abc commit acd782a
Show file tree
Hide file tree
Showing 10 changed files with 75 additions and 89 deletions.
14 changes: 8 additions & 6 deletions databricks/notebooks/_README.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
# Databricks notebook source
# MAGIC %md ## README - MLflow Model Notebooks
# MAGIC
# MAGIC * [basic]($basic/_README) - Basic MLflow model notebooks.
# MAGIC * [llama2]($llama2/_README) - Simple example for Databruicks Marketplace LLama2 model.
# MAGIC * [transformers]($transformers/_README) - Basic Hugging Face transformer task examples.
# MAGIC * [Mini_MLOps_Pipeline]($Mini_MLOps_Pipeline/_README) - Simple end-to-end "mini" MLOps pipeline using wine quality dataset.
# MAGIC * [diabetes_mini_mlops]($diabetes_mini_mlops/_README) - Mini MLOps example using diabetes dataset (Unity Catalog).
# MAGIC * [includes]($includes) - Common include notebooks.
# MAGIC * [basic]($basic/_README) - Basic MLflow model example notebooks.
# MAGIC * LLM
# MAGIC * [llama2]($llama2/_README) - Simple example for Databruicks Marketplace LLama2 model.
# MAGIC * [transformers]($transformers/_README) - Basic Hugging Face transformer task examples.
# MAGIC * Mini MLOps examples
# MAGIC * [diabetes_mini_mlops]($diabetes_mini_mlops/_README) - Mini MLOps example using diabetes dataset (Unity Catalog).
# MAGIC * [Mini_MLOps_Pipeline]($Mini_MLOps_Pipeline/_README) - Simple end-to-end "mini" MLOps pipeline using wine quality dataset. Uses non-UC model version stages.
# MAGIC * [./includes]($includes) - Common include notebooks.
# MAGIC
# MAGIC Last updated: 2023-12-04
6 changes: 1 addition & 5 deletions databricks/notebooks/diabetes_mini_mlops/01_Train_Model.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,12 @@
# Databricks notebook source
# MAGIC %md # MLflow quickstart: training and logging
# MAGIC %md # Train Diabetes Model
# MAGIC
# MAGIC This tutorial is based on the MLflow [ElasticNet Diabetes example](https://github.com/mlflow/mlflow/tree/master/examples/sklearn_elasticnet_diabetes). It illustrates how to use MLflow to track the model training process, including logging model parameters, metrics, the model itself, and other artifacts like plots. It also includes instructions for viewing the logged results in the MLflow tracking UI.
# MAGIC
# MAGIC This notebook uses the scikit-learn `diabetes` dataset and predicts the progression metric (a quantitative measure of disease progression after one year) based on BMI, blood pressure, and other measurements. It uses the scikit-learn ElasticNet linear regression model, varying the `alpha` and `l1_ratio` parameters for tuning. For more information on ElasticNet, refer to:
# MAGIC * [Elastic net regularization](https://en.wikipedia.org/wiki/Elastic_net_regularization)
# MAGIC * [Regularization and Variable Selection via the Elastic Net](https://web.stanford.edu/~hastie/TALKS/enet_talk.pdf)
# MAGIC * [sklearn.datasets.load_diabetes](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html#sklearn.datasets.load_diabetes)
# MAGIC
# MAGIC ### Requirements
# MAGIC * This notebook requires Databricks Runtime ML 13.x or above.
# MAGIC

# COMMAND ----------

Expand Down
14 changes: 4 additions & 10 deletions databricks/notebooks/diabetes_mini_mlops/02_Register_Model.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
# Databricks notebook source
# MAGIC %md ## Register Best Model Run in Model Registry
# MAGIC * Creates a new registered model if it doesn't already exist.
# MAGIC * Deletes all current model versions (optional).
# MAGIC * Finds the best model (lowest 'rmse' metric) generated from [01_Train_Model]($01_Train_Model) notebook experiment.
# MAGIC * Adds the best run as a registered model version and promotes it to the `production` stage.
# MAGIC
# MAGIC ##### Notes
# MAGIC * andre_catalog.ml_models2.diabetes_mlops
# MAGIC * Deletes any existing model versions.
# MAGIC * Finds the best model (lowest 'rmse' metric) generated from the [01_Train_Model]($01_Train_Model) notebook.
# MAGIC * Adds the best run as a registered model version and assigns the 'champ' alias to it.

# COMMAND ----------

Expand Down Expand Up @@ -97,11 +94,8 @@

# COMMAND ----------

_alias

# COMMAND ----------

mlflow_client.set_registered_model_alias(model_name, _alias, version.version)
_alias

# COMMAND ----------

Expand Down
37 changes: 29 additions & 8 deletions databricks/notebooks/diabetes_mini_mlops/03_Batch_Scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

# COMMAND ----------

# MAGIC %md ### Setup
# MAGIC %md #### Setup

# COMMAND ----------

Expand All @@ -22,12 +22,16 @@
table_name = dbutils.widgets.get("2. Table")
table_name = table_name or None

dbutils.widgets.text("3. Alias", _alias)
alias = dbutils.widgets.get("3. Alias")

print("model_name:", model_name)
print("table_name:", table_name)
print("alias:", alias)

# COMMAND ----------

# MAGIC %md ### Prepare scoring data
# MAGIC %md #### Prepare scoring data
# MAGIC * Drop the label column `progression`

# COMMAND ----------
Expand All @@ -37,15 +41,28 @@

# COMMAND ----------

# MAGIC %md #### Prepare model URI
# MAGIC * A `model URI` can use either a model version's version or alias.
# MAGIC * With version number: `models:/my_catalog.models.diabetes_mlops/1`
# MAGIC * With alias: `models:/my_catalog.models.diabetes_mlops@alias`

# COMMAND ----------

if alias:
model_uri = f"models:/{model_name}@{alias}"
else:
model_uri = f"models:/{model_name}/1"
model_uri

# COMMAND ----------

# MAGIC
# MAGIC %md ### Score with native Sklearn flavor
# MAGIC * Executes only on the driver node of the cluster
# MAGIC %md #### Score with native Sklearn flavor
# MAGIC * Executes only on the driver node

# COMMAND ----------

import pandas as pd
model_uri = f"models:/{model_name}/1"
model_uri

# COMMAND ----------

Expand All @@ -55,7 +72,11 @@

# COMMAND ----------

# MAGIC %md ### Score with Pyfunc flavor


# COMMAND ----------

# MAGIC %md #### Score with Pyfunc flavor
# MAGIC * Executes only on the driver node of the cluster

# COMMAND ----------
Expand All @@ -68,7 +89,7 @@

# COMMAND ----------

# MAGIC %md ### Distributed scoring with UDF
# MAGIC %md #### Distributed scoring with UDF
# MAGIC * Executes on all worker nodes of the cluster.
# MAGIC * UDF wraps the Sklearn model.
# MAGIC * Pass a Spark dataframe to the UDF.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,42 +19,28 @@

# COMMAND ----------

#dbutils.widgets.removeAll()

# COMMAND ----------

dbutils.widgets.text("1. Registered model", "")
model_name = dbutils.widgets.get("1. Registered model")

dbutils.widgets.text("2. Model version", "")
version = dbutils.widgets.get("2. Model version")

dbutils.widgets.text("3. Model serving endpoint", _endpoint_name)
endpoint_name = dbutils.widgets.get("3. Model serving endpoint")
dbutils.widgets.text("2. Model serving endpoint", _endpoint_name)
endpoint_name = dbutils.widgets.get("2. Model serving endpoint")

print("model_name:", model_name)
print("version:", version)
print("endpoint_name:", endpoint_name)
print("_alias:", _alias)

# COMMAND ----------

assert_widget(model_name, "1. Registered model")
assert_widget(model_name, "2. Model version")
assert_widget(model_name, "3. Model serving endpoint")
assert_widget(model_name, "2. Model serving endpoint")

# COMMAND ----------

# MAGIC %md #### List all endpoints
# MAGIC %md #### List endpoints

# COMMAND ----------

import pandas as pd

endpoints = model_serving_client.list_endpoints()
if len(endpoints) > 0:
lst = [ ( e["name"], e["creator"] ) for e in endpoints ]
df = pd.DataFrame(lst, columns = ["Name","Creator"])
display(spark.createDataFrame(df))
list_model_serving_endpoints()

# COMMAND ----------

Expand Down Expand Up @@ -85,21 +71,17 @@

# COMMAND ----------

_alias

# COMMAND ----------

model = mlflow_client.get_registered_model(model_name)
dump_obj(model)

# COMMAND ----------

version = model.aliases[_alias]
version
print("version:", version)

# COMMAND ----------

# MAGIC %md #### Define endpoint spec
# MAGIC %md #### Define endpoint config spec

# COMMAND ----------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,12 @@
import json
data = json.dumps(data)

headers = { "Authorization": f"Bearer {token}", "Content-Type": "application/json" }
headers = { "Authorization": f"Bearer {_token}", "Content-Type": "application/json" }
rsp = requests.post(endpoint_uri, headers=headers, data=data, timeout=15)
rsp.text

# COMMAND ----------

# MAGIC %md ### Next notebook
# MAGIC
# MAGIC When finished scoring, go to the **[04c_RT_Serving_Stop]($04c_RT_Serving_Stop)** notebook to shut down the serving endpoint.
# MAGIC When finished scoring, go to the **[04c_Model_Serving_Stop]($04c_Model_Serving_Stop)** notebook to shut down the serving endpoint.
16 changes: 6 additions & 10 deletions databricks/notebooks/diabetes_mini_mlops/04c_Model_Serving_Stop.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,15 @@

# COMMAND ----------

# MAGIC %md ### Display endpoints
# MAGIC %md #### List endpoints

# COMMAND ----------

endpoints = model_serving_client.list_endpoints()
for e in endpoints:
print(f"{e['name']} - {e['creator']}")
list_model_serving_endpoints()

# COMMAND ----------

# MAGIC %md ### Stop endpoint
# MAGIC %md #### Stop endpoint

# COMMAND ----------

Expand All @@ -46,20 +44,18 @@

# COMMAND ----------

# MAGIC %md ### Display endpoints
# MAGIC %md #### List endpoints

# COMMAND ----------

model_serving_client.get_endpoint(endpoint_name)

# COMMAND ----------

endpoints = model_serving_client.list_endpoints()
for e in endpoints:
print(f"{e['name']} - {e['creator']}")
list_model_serving_endpoints()

# COMMAND ----------

# MAGIC %md ### Next notebook
# MAGIC %md #### No next notebook
# MAGIC
# MAGIC **_Congratulations!_** You have finished your Diabetes Mini MLOps example. There is no next notebook.
18 changes: 1 addition & 17 deletions databricks/notebooks/diabetes_mini_mlops/_README.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# MAGIC ##### Overview
# MAGIC
# MAGIC * Trains several model runs with different hyperparameters.
# MAGIC * Registers the best run's model in the model registry with a production alias 'champ'.
# MAGIC * Registers the best run's model in the Unity Catalog model registry with a production alias 'champ'.
# MAGIC * Scores with either:
# MAGIC * Batch scoring with Spark.
# MAGIC * Real-time model scoring with Serverless Model Serving ([AWS](https://docs.databricks.com/machine-learning/model-inference/serverless/serverless-real-time-inference.html) - [Azure](https://learn.microsoft.com/en-us/azure/databricks/machine-learning/model-serving/)).
Expand All @@ -28,12 +28,6 @@

# COMMAND ----------

# MAGIC %md #### Databricks documentation resources
# MAGIC * https://docs.databricks.com/en/mlflow/model-example.html
# MAGIC * https://docs.databricks.com/en/mlflow/scikit-learn-model-deployment-on-sagemaker.html

# COMMAND ----------

# MAGIC %md
# MAGIC **Batch Scoring Pipeline**
# MAGIC
Expand All @@ -42,13 +36,3 @@
# MAGIC **Real-time Scoring Pipeline**
# MAGIC
# MAGIC <img src="https://github.com/amesar/mlflow-examples/blob/master/python/e2e-ml-pipeline/e2e_ml_realtime_pipeline.png?raw=true" width="700" />

# COMMAND ----------

# MAGIC %md ### WIP - TODO
# MAGIC
# MAGIC * Real-time Model Serving endpoint - using [Serverless Model Serving](https://docs.databricks.com/machine-learning/model-inference/serverless/serverless-real-time-inference.html).
# MAGIC * [04a_RT_Serving_Start]($04a_RT_Serving_Start) - Start endpoint.
# MAGIC * [04b_RT_Serving_Score]($04b_RT_Serving_Score) - Score endpoint.
# MAGIC * [04c_RT_Serving_Stop]($04c_RT_Serving_Stop) - Stop endpoint.
# MAGIC
7 changes: 2 additions & 5 deletions databricks/notebooks/diabetes_mini_mlops/includes/Common.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import mlflow
mlflow.set_registry_uri("databricks-uc")
mlflow_client = mlflow.MlflowClient()
print("mlflow_client.registry_uri:", mlflow_client._registry_uri)

# COMMAND ----------

Expand Down Expand Up @@ -112,12 +113,8 @@ def create_registered_model(model_name, delete_registered_model=True):
# COMMAND ----------

_host_name = _get_notebook_tag("browserHostName")
print("_host_name:", _host_name)

# COMMAND ----------

_token = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get()
dbutils.fs.put("file:///root/.databrickscfg",f"[DEFAULT]\nhost=https://{_host_name}\ntoken = "+_token,overwrite=True)
print("_host_name:", _host_name)

# COMMAND ----------

Expand Down
14 changes: 14 additions & 0 deletions databricks/notebooks/includes/ModelServingClient.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,17 @@ def wait_until(self, endpoint_name, max=20, sleep_time=2):
# COMMAND ----------

model_serving_client = ModelServingClient()

# COMMAND ----------

from pyspark.sql.functions import *

def list_model_serving_endpoints():
endpoints = model_serving_client.list_endpoints()
if len(endpoints) == 0:
print("No model serving endpoints")
else:
data = [ ( e["name"], e["creator"], e["creation_timestamp"] ) for e in endpoints ]
df = spark.createDataFrame(data=data, schema = ["name","creator","creation_timestamp"])
df = df.withColumn("creation_timestamp",from_unixtime(col("creation_timestamp")/1000, "yyyy-MM-dd hh:mm:ss"))
display(df)

0 comments on commit acd782a

Please sign in to comment.