From acd782a5dc6f75edfa7a5e3038a7ff8edfc325a7 Mon Sep 17 00:00:00 2001 From: amesar Date: Tue, 5 Dec 2023 00:28:31 +0000 Subject: [PATCH] Issue #21: Diabetest Mini MLOps notebooks: dded alias to model URI for scoring plus overall updates --- databricks/notebooks/_README.py | 14 ++++--- .../diabetes_mini_mlops/01_Train_Model.py | 6 +-- .../diabetes_mini_mlops/02_Register_Model.py | 14 ++----- .../diabetes_mini_mlops/03_Batch_Scoring.py | 37 +++++++++++++++---- .../04a_Model_Serving_Start.py | 34 ++++------------- .../04b_Model_Serving_Score.py | 4 +- .../04c_Model_Serving_Stop.py | 16 +++----- .../notebooks/diabetes_mini_mlops/_README.py | 18 +-------- .../diabetes_mini_mlops/includes/Common.py | 7 +--- .../notebooks/includes/ModelServingClient.py | 14 +++++++ 10 files changed, 75 insertions(+), 89 deletions(-) diff --git a/databricks/notebooks/_README.py b/databricks/notebooks/_README.py index fad708b..6d2ce45 100644 --- a/databricks/notebooks/_README.py +++ b/databricks/notebooks/_README.py @@ -1,11 +1,13 @@ # Databricks notebook source # MAGIC %md ## README - MLflow Model Notebooks # MAGIC -# MAGIC * [basic]($basic/_README) - Basic MLflow model notebooks. -# MAGIC * [llama2]($llama2/_README) - Simple example for Databruicks Marketplace LLama2 model. -# MAGIC * [transformers]($transformers/_README) - Basic Hugging Face transformer task examples. -# MAGIC * [Mini_MLOps_Pipeline]($Mini_MLOps_Pipeline/_README) - Simple end-to-end "mini" MLOps pipeline using wine quality dataset. -# MAGIC * [diabetes_mini_mlops]($diabetes_mini_mlops/_README) - Mini MLOps example using diabetes dataset (Unity Catalog). -# MAGIC * [includes]($includes) - Common include notebooks. +# MAGIC * [basic]($basic/_README) - Basic MLflow model example notebooks. +# MAGIC * LLM +# MAGIC * [llama2]($llama2/_README) - Simple example for Databruicks Marketplace LLama2 model. +# MAGIC * [transformers]($transformers/_README) - Basic Hugging Face transformer task examples. +# MAGIC * Mini MLOps examples +# MAGIC * [diabetes_mini_mlops]($diabetes_mini_mlops/_README) - Mini MLOps example using diabetes dataset (Unity Catalog). +# MAGIC * [Mini_MLOps_Pipeline]($Mini_MLOps_Pipeline/_README) - Simple end-to-end "mini" MLOps pipeline using wine quality dataset. Uses non-UC model version stages. +# MAGIC * [./includes]($includes) - Common include notebooks. # MAGIC # MAGIC Last updated: 2023-12-04 diff --git a/databricks/notebooks/diabetes_mini_mlops/01_Train_Model.py b/databricks/notebooks/diabetes_mini_mlops/01_Train_Model.py index 079f06c..11304fa 100644 --- a/databricks/notebooks/diabetes_mini_mlops/01_Train_Model.py +++ b/databricks/notebooks/diabetes_mini_mlops/01_Train_Model.py @@ -1,5 +1,5 @@ # Databricks notebook source -# MAGIC %md # MLflow quickstart: training and logging +# MAGIC %md # Train Diabetes Model # MAGIC # MAGIC This tutorial is based on the MLflow [ElasticNet Diabetes example](https://github.com/mlflow/mlflow/tree/master/examples/sklearn_elasticnet_diabetes). It illustrates how to use MLflow to track the model training process, including logging model parameters, metrics, the model itself, and other artifacts like plots. It also includes instructions for viewing the logged results in the MLflow tracking UI. # MAGIC @@ -7,10 +7,6 @@ # MAGIC * [Elastic net regularization](https://en.wikipedia.org/wiki/Elastic_net_regularization) # MAGIC * [Regularization and Variable Selection via the Elastic Net](https://web.stanford.edu/~hastie/TALKS/enet_talk.pdf) # MAGIC * [sklearn.datasets.load_diabetes](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html#sklearn.datasets.load_diabetes) -# MAGIC -# MAGIC ### Requirements -# MAGIC * This notebook requires Databricks Runtime ML 13.x or above. -# MAGIC # COMMAND ---------- diff --git a/databricks/notebooks/diabetes_mini_mlops/02_Register_Model.py b/databricks/notebooks/diabetes_mini_mlops/02_Register_Model.py index 8d06426..e44df7f 100644 --- a/databricks/notebooks/diabetes_mini_mlops/02_Register_Model.py +++ b/databricks/notebooks/diabetes_mini_mlops/02_Register_Model.py @@ -1,12 +1,9 @@ # Databricks notebook source # MAGIC %md ## Register Best Model Run in Model Registry # MAGIC * Creates a new registered model if it doesn't already exist. -# MAGIC * Deletes all current model versions (optional). -# MAGIC * Finds the best model (lowest 'rmse' metric) generated from [01_Train_Model]($01_Train_Model) notebook experiment. -# MAGIC * Adds the best run as a registered model version and promotes it to the `production` stage. -# MAGIC -# MAGIC ##### Notes -# MAGIC * andre_catalog.ml_models2.diabetes_mlops +# MAGIC * Deletes any existing model versions. +# MAGIC * Finds the best model (lowest 'rmse' metric) generated from the [01_Train_Model]($01_Train_Model) notebook. +# MAGIC * Adds the best run as a registered model version and assigns the 'champ' alias to it. # COMMAND ---------- @@ -97,11 +94,8 @@ # COMMAND ---------- -_alias - -# COMMAND ---------- - mlflow_client.set_registered_model_alias(model_name, _alias, version.version) +_alias # COMMAND ---------- diff --git a/databricks/notebooks/diabetes_mini_mlops/03_Batch_Scoring.py b/databricks/notebooks/diabetes_mini_mlops/03_Batch_Scoring.py index f67f316..994883c 100644 --- a/databricks/notebooks/diabetes_mini_mlops/03_Batch_Scoring.py +++ b/databricks/notebooks/diabetes_mini_mlops/03_Batch_Scoring.py @@ -6,7 +6,7 @@ # COMMAND ---------- -# MAGIC %md ### Setup +# MAGIC %md #### Setup # COMMAND ---------- @@ -22,12 +22,16 @@ table_name = dbutils.widgets.get("2. Table") table_name = table_name or None +dbutils.widgets.text("3. Alias", _alias) +alias = dbutils.widgets.get("3. Alias") + print("model_name:", model_name) print("table_name:", table_name) +print("alias:", alias) # COMMAND ---------- -# MAGIC %md ### Prepare scoring data +# MAGIC %md #### Prepare scoring data # MAGIC * Drop the label column `progression` # COMMAND ---------- @@ -37,15 +41,28 @@ # COMMAND ---------- +# MAGIC %md #### Prepare model URI +# MAGIC * A `model URI` can use either a model version's version or alias. +# MAGIC * With version number: `models:/my_catalog.models.diabetes_mlops/1` +# MAGIC * With alias: `models:/my_catalog.models.diabetes_mlops@alias` + +# COMMAND ---------- + +if alias: + model_uri = f"models:/{model_name}@{alias}" +else: + model_uri = f"models:/{model_name}/1" +model_uri + +# COMMAND ---------- + # MAGIC -# MAGIC %md ### Score with native Sklearn flavor -# MAGIC * Executes only on the driver node of the cluster +# MAGIC %md #### Score with native Sklearn flavor +# MAGIC * Executes only on the driver node # COMMAND ---------- import pandas as pd -model_uri = f"models:/{model_name}/1" -model_uri # COMMAND ---------- @@ -55,7 +72,11 @@ # COMMAND ---------- -# MAGIC %md ### Score with Pyfunc flavor + + +# COMMAND ---------- + +# MAGIC %md #### Score with Pyfunc flavor # MAGIC * Executes only on the driver node of the cluster # COMMAND ---------- @@ -68,7 +89,7 @@ # COMMAND ---------- -# MAGIC %md ### Distributed scoring with UDF +# MAGIC %md #### Distributed scoring with UDF # MAGIC * Executes on all worker nodes of the cluster. # MAGIC * UDF wraps the Sklearn model. # MAGIC * Pass a Spark dataframe to the UDF. diff --git a/databricks/notebooks/diabetes_mini_mlops/04a_Model_Serving_Start.py b/databricks/notebooks/diabetes_mini_mlops/04a_Model_Serving_Start.py index 630747d..bc65938 100644 --- a/databricks/notebooks/diabetes_mini_mlops/04a_Model_Serving_Start.py +++ b/databricks/notebooks/diabetes_mini_mlops/04a_Model_Serving_Start.py @@ -19,42 +19,28 @@ # COMMAND ---------- -#dbutils.widgets.removeAll() - -# COMMAND ---------- - dbutils.widgets.text("1. Registered model", "") model_name = dbutils.widgets.get("1. Registered model") -dbutils.widgets.text("2. Model version", "") -version = dbutils.widgets.get("2. Model version") - -dbutils.widgets.text("3. Model serving endpoint", _endpoint_name) -endpoint_name = dbutils.widgets.get("3. Model serving endpoint") +dbutils.widgets.text("2. Model serving endpoint", _endpoint_name) +endpoint_name = dbutils.widgets.get("2. Model serving endpoint") print("model_name:", model_name) -print("version:", version) print("endpoint_name:", endpoint_name) +print("_alias:", _alias) # COMMAND ---------- assert_widget(model_name, "1. Registered model") -assert_widget(model_name, "2. Model version") -assert_widget(model_name, "3. Model serving endpoint") +assert_widget(model_name, "2. Model serving endpoint") # COMMAND ---------- -# MAGIC %md #### List all endpoints +# MAGIC %md #### List endpoints # COMMAND ---------- -import pandas as pd - -endpoints = model_serving_client.list_endpoints() -if len(endpoints) > 0: - lst = [ ( e["name"], e["creator"] ) for e in endpoints ] - df = pd.DataFrame(lst, columns = ["Name","Creator"]) - display(spark.createDataFrame(df)) +list_model_serving_endpoints() # COMMAND ---------- @@ -85,21 +71,17 @@ # COMMAND ---------- -_alias - -# COMMAND ---------- - model = mlflow_client.get_registered_model(model_name) dump_obj(model) # COMMAND ---------- version = model.aliases[_alias] -version +print("version:", version) # COMMAND ---------- -# MAGIC %md #### Define endpoint spec +# MAGIC %md #### Define endpoint config spec # COMMAND ---------- diff --git a/databricks/notebooks/diabetes_mini_mlops/04b_Model_Serving_Score.py b/databricks/notebooks/diabetes_mini_mlops/04b_Model_Serving_Score.py index b0edaf2..5acf2d7 100644 --- a/databricks/notebooks/diabetes_mini_mlops/04b_Model_Serving_Score.py +++ b/databricks/notebooks/diabetes_mini_mlops/04b_Model_Serving_Score.py @@ -59,7 +59,7 @@ import json data = json.dumps(data) -headers = { "Authorization": f"Bearer {token}", "Content-Type": "application/json" } +headers = { "Authorization": f"Bearer {_token}", "Content-Type": "application/json" } rsp = requests.post(endpoint_uri, headers=headers, data=data, timeout=15) rsp.text @@ -67,4 +67,4 @@ # MAGIC %md ### Next notebook # MAGIC -# MAGIC When finished scoring, go to the **[04c_RT_Serving_Stop]($04c_RT_Serving_Stop)** notebook to shut down the serving endpoint. +# MAGIC When finished scoring, go to the **[04c_Model_Serving_Stop]($04c_Model_Serving_Stop)** notebook to shut down the serving endpoint. diff --git a/databricks/notebooks/diabetes_mini_mlops/04c_Model_Serving_Stop.py b/databricks/notebooks/diabetes_mini_mlops/04c_Model_Serving_Stop.py index 61eb2cc..0381282 100644 --- a/databricks/notebooks/diabetes_mini_mlops/04c_Model_Serving_Stop.py +++ b/databricks/notebooks/diabetes_mini_mlops/04c_Model_Serving_Stop.py @@ -24,17 +24,15 @@ # COMMAND ---------- -# MAGIC %md ### Display endpoints +# MAGIC %md #### List endpoints # COMMAND ---------- -endpoints = model_serving_client.list_endpoints() -for e in endpoints: - print(f"{e['name']} - {e['creator']}") +list_model_serving_endpoints() # COMMAND ---------- -# MAGIC %md ### Stop endpoint +# MAGIC %md #### Stop endpoint # COMMAND ---------- @@ -46,7 +44,7 @@ # COMMAND ---------- -# MAGIC %md ### Display endpoints +# MAGIC %md #### List endpoints # COMMAND ---------- @@ -54,12 +52,10 @@ # COMMAND ---------- -endpoints = model_serving_client.list_endpoints() -for e in endpoints: - print(f"{e['name']} - {e['creator']}") +list_model_serving_endpoints() # COMMAND ---------- -# MAGIC %md ### Next notebook +# MAGIC %md #### No next notebook # MAGIC # MAGIC **_Congratulations!_** You have finished your Diabetes Mini MLOps example. There is no next notebook. diff --git a/databricks/notebooks/diabetes_mini_mlops/_README.py b/databricks/notebooks/diabetes_mini_mlops/_README.py index 5c89992..34aa949 100644 --- a/databricks/notebooks/diabetes_mini_mlops/_README.py +++ b/databricks/notebooks/diabetes_mini_mlops/_README.py @@ -4,7 +4,7 @@ # MAGIC ##### Overview # MAGIC # MAGIC * Trains several model runs with different hyperparameters. -# MAGIC * Registers the best run's model in the model registry with a production alias 'champ'. +# MAGIC * Registers the best run's model in the Unity Catalog model registry with a production alias 'champ'. # MAGIC * Scores with either: # MAGIC * Batch scoring with Spark. # MAGIC * Real-time model scoring with Serverless Model Serving ([AWS](https://docs.databricks.com/machine-learning/model-inference/serverless/serverless-real-time-inference.html) - [Azure](https://learn.microsoft.com/en-us/azure/databricks/machine-learning/model-serving/)). @@ -28,12 +28,6 @@ # COMMAND ---------- -# MAGIC %md #### Databricks documentation resources -# MAGIC * https://docs.databricks.com/en/mlflow/model-example.html -# MAGIC * https://docs.databricks.com/en/mlflow/scikit-learn-model-deployment-on-sagemaker.html - -# COMMAND ---------- - # MAGIC %md # MAGIC **Batch Scoring Pipeline** # MAGIC @@ -42,13 +36,3 @@ # MAGIC **Real-time Scoring Pipeline** # MAGIC # MAGIC - -# COMMAND ---------- - -# MAGIC %md ### WIP - TODO -# MAGIC -# MAGIC * Real-time Model Serving endpoint - using [Serverless Model Serving](https://docs.databricks.com/machine-learning/model-inference/serverless/serverless-real-time-inference.html). -# MAGIC * [04a_RT_Serving_Start]($04a_RT_Serving_Start) - Start endpoint. -# MAGIC * [04b_RT_Serving_Score]($04b_RT_Serving_Score) - Score endpoint. -# MAGIC * [04c_RT_Serving_Stop]($04c_RT_Serving_Stop) - Stop endpoint. -# MAGIC diff --git a/databricks/notebooks/diabetes_mini_mlops/includes/Common.py b/databricks/notebooks/diabetes_mini_mlops/includes/Common.py index 88b11a2..9b3c9c7 100644 --- a/databricks/notebooks/diabetes_mini_mlops/includes/Common.py +++ b/databricks/notebooks/diabetes_mini_mlops/includes/Common.py @@ -15,6 +15,7 @@ import mlflow mlflow.set_registry_uri("databricks-uc") mlflow_client = mlflow.MlflowClient() +print("mlflow_client.registry_uri:", mlflow_client._registry_uri) # COMMAND ---------- @@ -112,12 +113,8 @@ def create_registered_model(model_name, delete_registered_model=True): # COMMAND ---------- _host_name = _get_notebook_tag("browserHostName") -print("_host_name:", _host_name) - -# COMMAND ---------- - _token = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get() -dbutils.fs.put("file:///root/.databrickscfg",f"[DEFAULT]\nhost=https://{_host_name}\ntoken = "+_token,overwrite=True) +print("_host_name:", _host_name) # COMMAND ---------- diff --git a/databricks/notebooks/includes/ModelServingClient.py b/databricks/notebooks/includes/ModelServingClient.py index fdc1027..7b5bbad 100644 --- a/databricks/notebooks/includes/ModelServingClient.py +++ b/databricks/notebooks/includes/ModelServingClient.py @@ -55,3 +55,17 @@ def wait_until(self, endpoint_name, max=20, sleep_time=2): # COMMAND ---------- model_serving_client = ModelServingClient() + +# COMMAND ---------- + +from pyspark.sql.functions import * + +def list_model_serving_endpoints(): + endpoints = model_serving_client.list_endpoints() + if len(endpoints) == 0: + print("No model serving endpoints") + else: + data = [ ( e["name"], e["creator"], e["creation_timestamp"] ) for e in endpoints ] + df = spark.createDataFrame(data=data, schema = ["name","creator","creation_timestamp"]) + df = df.withColumn("creation_timestamp",from_unixtime(col("creation_timestamp")/1000, "yyyy-MM-dd hh:mm:ss")) + display(df)