Website | Schedule Demo | Signup for a Free Account | API Docs | Clarifai Community | Python SDK Docs | Examples | Colab Notebooks | Discord
Install from Source:
git clone https://github.com/Clarifai/clarifai-model-utils
cd clarifai-model-utils
python3 -m venv env
source env/bin/activate
pip install -e .
Set your CLARIFAI_PAT as an environment variable.
Quick demo of LLM Evaluation
from clarifai_model_utils import ClarifaiEvaluator
from clarifai_model_utils.llm_eval.constant import JUDGE_LLMS
from clarifai.client.model import Model
from clarifai.client.dataset import Dataset
model = Model(model_url)
ds = Dataset(ds_url)
evaluator = ClarifaiEvaluator(predictor=model)
out = evaluator.evaluate(
template="llm_as_judge",
judge_llm_url=JUDGE_LLMS.GPT3_5_TURBO,
upload=True,
dataset=ds,
)
print(out)
Using Hugging Face 🤗 Datasets is also supported, with column names question
and answer
.
from clarifai_model_utils import ClarifaiEvaluator
from clarifai_model_utils.llm_eval.constant import JUDGE_LLMS
from clarifai.client.model import Model
+ from datasets import load_dataset
- from clarifai.client.dataset import Dataset
model = Model(model_url)
+ ds = load_dataset("stanfordnlp/coqa", split="train").rename_columns{"questions": "question", "answers": "answer:}
- ds = Dataset(ds_url)
evaluator = ClarifaiEvaluator(predictor=model)
out = evaluator.evaluate(
template="llm_as_judge",
judge_llm_url=JUDGE_LLMS.GPT3_5_TURBO,
upload=True,
dataset=ds,
)
print(out)
Given a dataset of contexts / chunks, questions and answers can be generated using the integration with RAGAS. The dataset can be used directly in the evaluator.
from clarifai_model_utils import ClarifaiEvaluator
from clarifai_model_utils.llm_eval.constant import JUDGE_LLMS
from clarifai.client.model import Model
from clarifai.client.dataset import Dataset
model = Model(model_url)
ds = Dataset(ds_url) ## This dataset only has text chunks from source. There are no questions or answers yet.
evaluator = ClarifaiEvaluator(predictor=model)
out = evaluator.evaluate(
template="llm_as_judge",
judge_llm_url=JUDGE_LLMS.GPT3_5_TURBO,
upload=True,
dataset=ds,
+ generate_qa=True
)
print(out)