From 2b650884cdd8ab70e59106040539a51d51ed61d5 Mon Sep 17 00:00:00 2001
From: Sean Friedowitz <sean@mozilla.ai>
Date: Tue, 9 Apr 2024 11:34:15 -0700
Subject: [PATCH] commit bug fixes for client serialize

---
 pyproject.toml                             |  2 +-
 src/lm_buddy/jobs/evaluation/prometheus.py | 11 ++++++-----
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 1576db4..07958f9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "lm-buddy"
-version = "0.10.0"
+version = "0.10.1"
 authors = [
     { name = "Sean Friedowitz", email = "sean@mozilla.ai" },
     { name = "Aaron Gonzales", email = "aaron@mozilla.ai" },
diff --git a/src/lm_buddy/jobs/evaluation/prometheus.py b/src/lm_buddy/jobs/evaluation/prometheus.py
index f916b76..78827dd 100644
--- a/src/lm_buddy/jobs/evaluation/prometheus.py
+++ b/src/lm_buddy/jobs/evaluation/prometheus.py
@@ -111,12 +111,8 @@ def get_response_with_retries(
 
 
 def run_eval(config: PrometheusJobConfig) -> Path:
-    # Instantiate OpenAI client to speak with the vLLM endpoint
-    client = OpenAI(base_url=config.prometheus.inference.base_url)
-
-    hf_loader = HuggingFaceAssetLoader()
-
     # Resolve the engine model
+    hf_loader = HuggingFaceAssetLoader()
     engine_path = hf_loader.resolve_asset_path(config.prometheus.inference.engine)
 
     # Load dataset from W&B artifact
@@ -135,6 +131,11 @@ def run_eval(config: PrometheusJobConfig) -> Path:
 
     # Generator that iterates over samples and yields new rows with the prometheus outputs
     def data_generator():
+        # Instantiate OpenAI client to speak with the vLLM endpoint
+        # Client is non-serializable so must be instantiated internal to this method
+        # Reference: https://huggingface.co/docs/datasets/en/troubleshoot#pickling-issues
+        client = OpenAI(base_url=config.prometheus.inference.base_url)
+
         for sample in dataset_iterable:
             # convert instructions from the dataset (`text_field` in a dict) to
             # prompts that prometheus accepts