-
Notifications
You must be signed in to change notification settings - Fork 55
/
predict.py
29 lines (22 loc) · 899 Bytes
/
predict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from __future__ import print_function
import sys
import mlflow
import mlflow.spark as mlflow_spark
from pyspark.sql import SparkSession
print("MLflow Version:", mlflow.version.VERSION)
print("Tracking URI:", mlflow.tracking.get_tracking_uri())
if __name__ == "__main__":
run_id = sys.argv[1]
print("run_id:",run_id)
spark = SparkSession.builder.appName("Predict").getOrCreate()
data_path = "../data/sample_libsvm_data.txt"
print("data_path:",data_path)
data = spark.read.format("libsvm").load(data_path)
model = mlflow_spark.load_model("spark-model", run_id=run_id)
predictions = model.transform(data)
print("Prediction Dataframe")
predictions.printSchema()
print("Filtered Prediction Dataframe")
df = predictions.select("prediction", "indexedLabel","probability").filter("prediction <> indexedLabel")
df.printSchema()
df.show(5,False)