-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Added ai example files * Updated Readme * Added formatting changes * Updated examples * Added formatting changes * Removed unneeded lines * Made fixes * Added changes to call open ai by mlflow gateway and creating gateway routes * Removed extra code and added example notebook for creating mlflow gateway routes * formatting changes * fixed name * Updates notes * Made changes as per Review comments --------- Co-authored-by: souravg-db <souravg-db>
- Loading branch information
1 parent
7c462df
commit c73465d
Showing
4 changed files
with
393 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
# Databricks notebook source | ||
# MAGIC %md | ||
# MAGIC #Create MLflow Gateway Routes for MosaicML & OpenAI | ||
# MAGIC This notebook provides examples of creating mlflow gateway routes for MosaicML & OpenAI | ||
# MAGIC | ||
# MAGIC **NOTE**: | ||
# MAGIC - This notebook requires >= DBR 13.3 LTS ML Runtime | ||
# MAGIC - Please refer to [configuring-the-ai-gateway](https://mlflow.org/docs/latest/gateway/index.html#configuring-the-ai-gateway) for more info | ||
|
||
# COMMAND ---------- | ||
|
||
# MAGIC %md | ||
# MAGIC ### Install dependencies | ||
|
||
# COMMAND ---------- | ||
|
||
# MAGIC %pip install mlflow[gateway] | ||
# MAGIC dbutils.library.restartPython() | ||
|
||
# COMMAND ---------- | ||
|
||
# MAGIC %md | ||
# MAGIC ## Setup widgets | ||
|
||
# COMMAND ---------- | ||
|
||
dbutils.widgets.text("moasicml_route_name", "discoverx-mosaicml-llama2-70b-completions", "mosaicml route name") | ||
dbutils.widgets.text("openai_route_name", "discoverx-openai-gpt-3.5-completions", "openai route name") | ||
|
||
# COMMAND ---------- | ||
|
||
# MAGIC %md | ||
# MAGIC ## Import required libs and initialize variables | ||
|
||
# COMMAND ---------- | ||
|
||
import mlflow | ||
from mlflow import gateway | ||
|
||
# COMMAND ---------- | ||
|
||
moasicml_route_name = dbutils.widgets.get("moasicml_route_name") | ||
openai_route_name = dbutils.widgets.get("openai_route_name") | ||
|
||
# COMMAND ---------- | ||
|
||
# MAGIC %md | ||
# MAGIC ### Create MLflow gateway route for MosaicML (llama2 model) | ||
|
||
# COMMAND ---------- | ||
|
||
# get or create mosaicml route | ||
import mlflow | ||
from mlflow import gateway | ||
|
||
gateway.set_gateway_uri(gateway_uri="databricks") | ||
|
||
try: | ||
route = gateway.get_route(moasicml_route_name) | ||
except: | ||
# Create a route for embeddings with MosaicML | ||
print(f"Creating the route {moasicml_route_name}") | ||
print( | ||
gateway.create_route( | ||
name=moasicml_route_name, | ||
route_type="llm/v1/completions", | ||
model={ | ||
"name": "llama2-70b-chat", | ||
"provider": "mosaicml", | ||
"mosaicml_config": { | ||
"mosaicml_api_key": dbutils.secrets.get(scope="discoverx", key="mosaic_ml_api_key") | ||
}, | ||
}, | ||
) | ||
) | ||
|
||
# COMMAND ---------- | ||
|
||
# MAGIC %md | ||
# MAGIC ### Create MLflow gateway route for Open AI (GPT 3.5 model) | ||
|
||
# COMMAND ---------- | ||
|
||
# get or create openai route | ||
import mlflow | ||
from mlflow import gateway | ||
|
||
gateway.set_gateway_uri(gateway_uri="databricks") | ||
try: | ||
route = gateway.get_route(openai_route_name) | ||
except: | ||
# Create a route for embeddings with OpenAI | ||
print(f"Creating the route {openai_route_name}") | ||
print( | ||
gateway.create_route( | ||
name=openai_route_name, | ||
route_type="llm/v1/completions", | ||
model={ | ||
"name": "gpt-35-turbo", | ||
"provider": "openai", | ||
"openai_config": { | ||
"openai_api_key": dbutils.secrets.get(scope="discoverx", key="openaikey"), | ||
"openai_api_base": dbutils.secrets.get(scope="discoverx", key="openaibase"), | ||
"openai_deployment_name": dbutils.secrets.get(scope="discoverx", key="openai_deployment_name"), | ||
"openai_api_type": "azure", | ||
"openai_api_version": "2023-05-15", | ||
}, | ||
}, | ||
) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
# Databricks notebook source | ||
# MAGIC %md | ||
# MAGIC # Text analysis with DiscoverX, MosaicML & Databricks MLflow | ||
# MAGIC | ||
# MAGIC This notebooks uses [DiscoverX](https://github.com/databrickslabs/discoverx) to analyze text with [MosiacML](https://www.mosaicml.com/blog/llama2-inference) over a set of tables in Unity Catalog. | ||
# MAGIC | ||
# MAGIC The notebook will: | ||
# MAGIC 1. Use DiscoverX to sample a set of tables from Unity Catalog and unpivot all string columns into a long format dataset | ||
# MAGIC 2. Run text analysis with MosaicML llama2-70b model & Databricks MLflow | ||
# MAGIC | ||
# MAGIC **NOTE**: | ||
# MAGIC - This notebook requires >= DBR 13.3 LTS ML Runtime | ||
# MAGIC - This notebook requires Mlflow gateway route for MosaicML. For examples of creating routes, please refer to the [README.md](https://github.com/databrickslabs/discoverx/blob/master/README.md) file. | ||
# MAGIC - For detailed information about the cost of API hits, please refer to [MosaicML Inference](https://www.mosaicml.com/inference) | ||
|
||
# COMMAND ---------- | ||
|
||
# MAGIC %md | ||
# MAGIC ## Install dependencies | ||
|
||
# COMMAND ---------- | ||
|
||
# MAGIC %pip install mlflow[gateway] | ||
# MAGIC dbutils.library.restartPython() | ||
|
||
# COMMAND ---------- | ||
|
||
# MAGIC %md | ||
# MAGIC ## Setup widgets | ||
|
||
# COMMAND ---------- | ||
|
||
dbutils.widgets.text("from_tables", "discoverx_sample.*.*", "from tables") | ||
dbutils.widgets.text("moasicml_route_name", "discoverx-mosaicml-llama2-70b-completions", "mosaicml route name") | ||
|
||
# COMMAND ---------- | ||
|
||
# MAGIC %md | ||
# MAGIC ## Import required libs and initialize variables | ||
|
||
# COMMAND ---------- | ||
|
||
import pandas as pd | ||
from pyspark.sql.functions import ( | ||
pandas_udf, | ||
col, | ||
concat, | ||
lit, | ||
explode, | ||
count, | ||
avg, | ||
min, | ||
max, | ||
sum, | ||
collect_set, | ||
concat_ws, | ||
) | ||
from pyspark.sql.types import ArrayType, StringType, StructType, FloatType, StructField | ||
from typing import Iterator | ||
|
||
# COMMAND ---------- | ||
|
||
from_tables = dbutils.widgets.get("from_tables") | ||
moasicml_route_name = dbutils.widgets.get("moasicml_route_name") | ||
|
||
# Set the sample rows size | ||
sample_size = 100 | ||
|
||
# COMMAND ---------- | ||
|
||
# MAGIC %md | ||
# MAGIC ## Initialize discoverx | ||
|
||
# COMMAND ---------- | ||
|
||
from discoverx import DX | ||
|
||
dx = DX() | ||
|
||
# COMMAND ---------- | ||
|
||
# MAGIC %md | ||
# MAGIC ## Transform all sampled tables | ||
|
||
# COMMAND ---------- | ||
|
||
unpivoted_df = ( | ||
dx.from_tables(from_tables) | ||
.unpivot_string_columns(sample_size=sample_size) | ||
.apply() | ||
.localCheckpoint() # Checkpointing to reduce the query plan size | ||
) | ||
|
||
# COMMAND ---------- | ||
|
||
display(unpivoted_df) | ||
|
||
# COMMAND ---------- | ||
|
||
# MAGIC %md | ||
# MAGIC ### Define udf to use MosiacML model | ||
|
||
# COMMAND ---------- | ||
|
||
import mlflow | ||
from mlflow import gateway | ||
|
||
|
||
@pandas_udf(StringType()) | ||
def predict_value_udf(s): | ||
def predict_value(s): | ||
data = { | ||
"prompt": f""" [INST] | ||
<<SYS>> | ||
Reply with either YES or NO | ||
<</SYS>> | ||
Is this news article related to aquisition/merger ? | ||
News Article: {s} | ||
[/INST] | ||
""" | ||
} | ||
r = mlflow.gateway.query(route=moasicml_route_name, data=data) | ||
return r["candidates"][0]["text"] | ||
|
||
return s.apply(predict_value) | ||
|
||
|
||
# COMMAND ---------- | ||
|
||
# MAGIC %md | ||
# MAGIC ### Run Predictions | ||
|
||
# COMMAND ---------- | ||
|
||
df_with_prediction = unpivoted_df.withColumn("is_realted_to_aquisition", predict_value_udf(col("string_value"))) | ||
|
||
# COMMAND ---------- | ||
|
||
display(df_with_prediction) |
Oops, something went wrong.