Issue #21: Diabetest Mini MLOps notebooks: dded alias to model URI fo…

…r scoring plus overall updates
amesar · Dec 5, 2023 · acd782a · acd782a
1 parent 6549abc
commit acd782a
Show file tree

Hide file tree

Showing 10 changed files with 75 additions and 89 deletions.
diff --git a/databricks/notebooks/_README.py b/databricks/notebooks/_README.py
@@ -1,11 +1,13 @@
 # Databricks notebook source
 # MAGIC %md ## README - MLflow Model Notebooks
 # MAGIC
-# MAGIC * [basic]($basic/_README) - Basic MLflow model notebooks.
-# MAGIC * [llama2]($llama2/_README) - Simple example for Databruicks Marketplace LLama2 model.
-# MAGIC * [transformers]($transformers/_README) - Basic Hugging Face transformer task examples.
-# MAGIC * [Mini_MLOps_Pipeline]($Mini_MLOps_Pipeline/_README) - Simple end-to-end "mini" MLOps pipeline using wine quality dataset.
-# MAGIC * [diabetes_mini_mlops]($diabetes_mini_mlops/_README) - Mini MLOps example using diabetes dataset (Unity Catalog).
-# MAGIC * [includes]($includes) - Common include notebooks.
+# MAGIC * [basic]($basic/_README) - Basic MLflow model example notebooks.
+# MAGIC * LLM
+# MAGIC   * [llama2]($llama2/_README) - Simple example for Databruicks Marketplace LLama2 model.
+# MAGIC   * [transformers]($transformers/_README) - Basic Hugging Face transformer task examples.
+# MAGIC * Mini MLOps examples
+# MAGIC   * [diabetes_mini_mlops]($diabetes_mini_mlops/_README) - Mini MLOps example using diabetes dataset (Unity Catalog).
+# MAGIC   * [Mini_MLOps_Pipeline]($Mini_MLOps_Pipeline/_README) - Simple end-to-end "mini" MLOps pipeline using wine quality dataset. Uses non-UC model version stages.
+# MAGIC * [./includes]($includes) - Common include notebooks.
 # MAGIC
 # MAGIC Last updated: 2023-12-04
diff --git a/databricks/notebooks/diabetes_mini_mlops/01_Train_Model.py b/databricks/notebooks/diabetes_mini_mlops/01_Train_Model.py
@@ -1,16 +1,12 @@
 # Databricks notebook source
-# MAGIC %md # MLflow quickstart: training and logging  
+# MAGIC %md # Train Diabetes Model  
 # MAGIC
 # MAGIC This tutorial is based on the MLflow [ElasticNet Diabetes example](https://github.com/mlflow/mlflow/tree/master/examples/sklearn_elasticnet_diabetes). It illustrates how to use MLflow to track the model training process, including logging model parameters, metrics, the model itself, and other artifacts like plots. It also includes instructions for viewing the logged results in the MLflow tracking UI.    
 # MAGIC
 # MAGIC This notebook uses the scikit-learn `diabetes` dataset and predicts the progression metric (a quantitative measure of disease progression after one year) based on BMI, blood pressure, and other measurements. It uses the scikit-learn ElasticNet linear regression model, varying the `alpha` and `l1_ratio` parameters for tuning. For more information on ElasticNet, refer to:
 # MAGIC   * [Elastic net regularization](https://en.wikipedia.org/wiki/Elastic_net_regularization)
 # MAGIC   * [Regularization and Variable Selection via the Elastic Net](https://web.stanford.edu/~hastie/TALKS/enet_talk.pdf)
 # MAGIC   * [sklearn.datasets.load_diabetes](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html#sklearn.datasets.load_diabetes)
-# MAGIC   
-# MAGIC ### Requirements
-# MAGIC * This notebook requires Databricks Runtime ML 13.x or above.
-# MAGIC
 
 # COMMAND ----------
 

diff --git a/databricks/notebooks/diabetes_mini_mlops/02_Register_Model.py b/databricks/notebooks/diabetes_mini_mlops/02_Register_Model.py
@@ -1,12 +1,9 @@
 # Databricks notebook source
 # MAGIC %md ## Register Best Model Run in Model Registry
 # MAGIC * Creates a new registered model if it doesn't already exist.
-# MAGIC * Deletes all current model versions (optional).
-# MAGIC * Finds the best model (lowest 'rmse' metric) generated from [01_Train_Model]($01_Train_Model) notebook experiment.
-# MAGIC * Adds the best run as a registered model version and promotes it to the `production` stage.
-# MAGIC
-# MAGIC ##### Notes
-# MAGIC * andre_catalog.ml_models2.diabetes_mlops
+# MAGIC * Deletes any existing model versions.
+# MAGIC * Finds the best model (lowest 'rmse' metric) generated from the [01_Train_Model]($01_Train_Model) notebook.
+# MAGIC * Adds the best run as a registered model version and assigns the 'champ' alias to it.
 
 # COMMAND ----------
 
@@ -97,11 +94,8 @@
 
 # COMMAND ----------
 
-_alias
-
-# COMMAND ----------
-
 mlflow_client.set_registered_model_alias(model_name, _alias, version.version)
+_alias
 
 # COMMAND ----------
 

diff --git a/databricks/notebooks/diabetes_mini_mlops/03_Batch_Scoring.py b/databricks/notebooks/diabetes_mini_mlops/03_Batch_Scoring.py
@@ -6,7 +6,7 @@
 
 # COMMAND ----------
 
-# MAGIC %md ### Setup
+# MAGIC %md #### Setup
 
 # COMMAND ----------
 
@@ -22,12 +22,16 @@
 table_name = dbutils.widgets.get("2. Table")
 table_name = table_name or None
 
+dbutils.widgets.text("3. Alias", _alias)
+alias = dbutils.widgets.get("3. Alias")
+
 print("model_name:", model_name)
 print("table_name:", table_name)
+print("alias:", alias)
 
 # COMMAND ----------
 
-# MAGIC %md ### Prepare scoring data
+# MAGIC %md #### Prepare scoring data
 # MAGIC * Drop the label column `progression`
 
 # COMMAND ----------
@@ -37,15 +41,28 @@
 
 # COMMAND ----------
 
+# MAGIC %md #### Prepare model URI
+# MAGIC * A `model URI` can use either a model version's version or alias.
+# MAGIC   * With version number: `models:/my_catalog.models.diabetes_mlops/1`
+# MAGIC   * With alias: `models:/my_catalog.models.diabetes_mlops@alias`
+
+# COMMAND ----------
+
+if alias:
+    model_uri = f"models:/{model_name}@{alias}"
+else:
+    model_uri = f"models:/{model_name}/1"
+model_uri
+
+# COMMAND ----------
+
 # MAGIC
-# MAGIC %md ### Score with native Sklearn flavor
-# MAGIC * Executes only on the driver node of the cluster
+# MAGIC %md #### Score with native Sklearn flavor
+# MAGIC * Executes only on the driver node
 
 # COMMAND ----------
 
 import pandas as pd
-model_uri = f"models:/{model_name}/1"
-model_uri
 
 # COMMAND ----------
 
@@ -55,7 +72,11 @@
 
 # COMMAND ----------
 
-# MAGIC %md ### Score with Pyfunc flavor
+
+
+# COMMAND ----------
+
+# MAGIC %md #### Score with Pyfunc flavor
 # MAGIC * Executes only on the driver node of the cluster
 
 # COMMAND ----------
@@ -68,7 +89,7 @@
 
 # COMMAND ----------
 
-# MAGIC %md ### Distributed scoring with UDF
+# MAGIC %md #### Distributed scoring with UDF
 # MAGIC * Executes on all worker nodes of the cluster.
 # MAGIC * UDF wraps the Sklearn model.
 # MAGIC * Pass a Spark dataframe to the UDF.

diff --git a/databricks/notebooks/diabetes_mini_mlops/04a_Model_Serving_Start.py b/databricks/notebooks/diabetes_mini_mlops/04a_Model_Serving_Start.py
@@ -19,42 +19,28 @@
 
 # COMMAND ----------
 
-#dbutils.widgets.removeAll()
-
-# COMMAND ----------
-
 dbutils.widgets.text("1. Registered model", "")
 model_name = dbutils.widgets.get("1. Registered model")
 
-dbutils.widgets.text("2. Model version", "")
-version = dbutils.widgets.get("2. Model version")
-
-dbutils.widgets.text("3. Model serving endpoint", _endpoint_name)
-endpoint_name = dbutils.widgets.get("3. Model serving endpoint")
+dbutils.widgets.text("2. Model serving endpoint", _endpoint_name)
+endpoint_name = dbutils.widgets.get("2. Model serving endpoint")
 
 print("model_name:", model_name)
-print("version:", version)
 print("endpoint_name:", endpoint_name)
+print("_alias:", _alias)
 
 # COMMAND ----------
 
 assert_widget(model_name, "1. Registered model")
-assert_widget(model_name, "2. Model version")
-assert_widget(model_name, "3. Model serving endpoint")
+assert_widget(model_name, "2. Model serving endpoint")
 
 # COMMAND ----------
 
-# MAGIC %md #### List all endpoints
+# MAGIC %md #### List endpoints
 
 # COMMAND ----------
 
-import pandas as pd
-
-endpoints = model_serving_client.list_endpoints()
-if len(endpoints) > 0:
-    lst = [ ( e["name"], e["creator"] ) for e in endpoints ]
-    df = pd.DataFrame(lst, columns = ["Name","Creator"])
-    display(spark.createDataFrame(df))
+list_model_serving_endpoints()
 
 # COMMAND ----------
 
@@ -85,21 +71,17 @@
 
 # COMMAND ----------
 
-_alias
-
-# COMMAND ----------
-
 model = mlflow_client.get_registered_model(model_name)
 dump_obj(model)
 
 # COMMAND ----------
 
 version = model.aliases[_alias]
-version
+print("version:", version)
 
 # COMMAND ----------
 
-# MAGIC %md #### Define endpoint spec
+# MAGIC %md #### Define endpoint config spec
 
 # COMMAND ----------
 

diff --git a/databricks/notebooks/diabetes_mini_mlops/04b_Model_Serving_Score.py b/databricks/notebooks/diabetes_mini_mlops/04b_Model_Serving_Score.py
@@ -59,12 +59,12 @@
 import json
 data = json.dumps(data)
 
-headers = { "Authorization": f"Bearer {token}", "Content-Type": "application/json" }
+headers = { "Authorization": f"Bearer {_token}", "Content-Type": "application/json" }
 rsp = requests.post(endpoint_uri, headers=headers, data=data, timeout=15)
 rsp.text
 
 # COMMAND ----------
 
 # MAGIC %md ### Next notebook
 # MAGIC
-# MAGIC When finished scoring, go to the **[04c_RT_Serving_Stop]($04c_RT_Serving_Stop)** notebook to shut down the serving endpoint.
+# MAGIC When finished scoring, go to the **[04c_Model_Serving_Stop]($04c_Model_Serving_Stop)** notebook to shut down the serving endpoint.
diff --git a/databricks/notebooks/diabetes_mini_mlops/04c_Model_Serving_Stop.py b/databricks/notebooks/diabetes_mini_mlops/04c_Model_Serving_Stop.py
@@ -24,17 +24,15 @@
 
 # COMMAND ----------
 
-# MAGIC %md ### Display endpoints
+# MAGIC %md #### List endpoints
 
 # COMMAND ----------
 
-endpoints = model_serving_client.list_endpoints()
-for e in endpoints:
-    print(f"{e['name']} - {e['creator']}")
+list_model_serving_endpoints()
 
 # COMMAND ----------
 
-# MAGIC %md ### Stop endpoint
+# MAGIC %md #### Stop endpoint
 
 # COMMAND ----------
 
@@ -46,20 +44,18 @@
 
 # COMMAND ----------
 
-# MAGIC %md ### Display endpoints
+# MAGIC %md #### List endpoints
 
 # COMMAND ----------
 
 model_serving_client.get_endpoint(endpoint_name)
 
 # COMMAND ----------
 
-endpoints = model_serving_client.list_endpoints()
-for e in endpoints:
-    print(f"{e['name']} - {e['creator']}")
+list_model_serving_endpoints()
 
 # COMMAND ----------
 
-# MAGIC %md ### Next notebook
+# MAGIC %md #### No next notebook
 # MAGIC
 # MAGIC **_Congratulations!_** You have finished your Diabetes Mini MLOps example. There is no next notebook.
diff --git a/databricks/notebooks/diabetes_mini_mlops/_README.py b/databricks/notebooks/diabetes_mini_mlops/_README.py
@@ -4,7 +4,7 @@
 # MAGIC ##### Overview
 # MAGIC
 # MAGIC * Trains several model runs with different hyperparameters.
-# MAGIC * Registers the best run's model in the model registry with a production alias 'champ'.
+# MAGIC * Registers the best run's model in the Unity Catalog model registry with a production alias 'champ'.
 # MAGIC * Scores with either:
 # MAGIC   * Batch scoring with Spark.
 # MAGIC   * Real-time model scoring with Serverless Model Serving ([AWS](https://docs.databricks.com/machine-learning/model-inference/serverless/serverless-real-time-inference.html) - [Azure](https://learn.microsoft.com/en-us/azure/databricks/machine-learning/model-serving/)).
@@ -28,12 +28,6 @@
 
 # COMMAND ----------
 
-# MAGIC %md #### Databricks documentation resources
-# MAGIC * https://docs.databricks.com/en/mlflow/model-example.html
-# MAGIC * https://docs.databricks.com/en/mlflow/scikit-learn-model-deployment-on-sagemaker.html
-
-# COMMAND ----------
-
 # MAGIC %md 
 # MAGIC **Batch Scoring Pipeline**
 # MAGIC
@@ -42,13 +36,3 @@
 # MAGIC **Real-time Scoring Pipeline**
 # MAGIC
 # MAGIC <img src="https://github.com/amesar/mlflow-examples/blob/master/python/e2e-ml-pipeline/e2e_ml_realtime_pipeline.png?raw=true"  width="700" />
-
-# COMMAND ----------
-
-# MAGIC %md ### WIP - TODO
-# MAGIC
-# MAGIC * Real-time Model Serving endpoint - using [Serverless Model Serving](https://docs.databricks.com/machine-learning/model-inference/serverless/serverless-real-time-inference.html).
-# MAGIC   * [04a_RT_Serving_Start]($04a_RT_Serving_Start) - Start endpoint.
-# MAGIC   * [04b_RT_Serving_Score]($04b_RT_Serving_Score) - Score endpoint.
-# MAGIC   * [04c_RT_Serving_Stop]($04c_RT_Serving_Stop) - Stop endpoint.
-# MAGIC
diff --git a/databricks/notebooks/diabetes_mini_mlops/includes/Common.py b/databricks/notebooks/diabetes_mini_mlops/includes/Common.py
@@ -15,6 +15,7 @@
 import mlflow
 mlflow.set_registry_uri("databricks-uc")
 mlflow_client = mlflow.MlflowClient()
+print("mlflow_client.registry_uri:", mlflow_client._registry_uri)
 
 # COMMAND ----------
 
@@ -112,12 +113,8 @@ def create_registered_model(model_name, delete_registered_model=True):
 # COMMAND ----------
 
 _host_name = _get_notebook_tag("browserHostName")
-print("_host_name:", _host_name)
-
-# COMMAND ----------
-
 _token = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get()
-dbutils.fs.put("file:///root/.databrickscfg",f"[DEFAULT]\nhost=https://{_host_name}\ntoken = "+_token,overwrite=True)
+print("_host_name:", _host_name)
 
 # COMMAND ----------
 

diff --git a/databricks/notebooks/includes/ModelServingClient.py b/databricks/notebooks/includes/ModelServingClient.py
@@ -55,3 +55,17 @@ def wait_until(self, endpoint_name, max=20, sleep_time=2):
 # COMMAND ----------
 
 model_serving_client = ModelServingClient()
+
+# COMMAND ----------
+
+from pyspark.sql.functions import *
+
+def list_model_serving_endpoints():
+    endpoints = model_serving_client.list_endpoints()
+    if len(endpoints) == 0:
+        print("No model serving endpoints")
+    else:
+        data = [ ( e["name"], e["creator"], e["creation_timestamp"] ) for e in endpoints ]
+        df = spark.createDataFrame(data=data, schema = ["name","creator","creation_timestamp"])
+        df = df.withColumn("creation_timestamp",from_unixtime(col("creation_timestamp")/1000, "yyyy-MM-dd hh:mm:ss"))
+        display(df)