Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[llama stack ui] add native eval & inspect distro & playground pages #541

Open
wants to merge 26 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions llama_stack/distribution/ui/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

[!NOTE] This is a work in progress.

## Prerequisite
- Start up Llama Stack Server
```
llama stack run
```

## Running Streamlit App

```
Expand Down
196 changes: 40 additions & 156 deletions llama_stack/distribution/ui/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,170 +3,54 @@
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.

import json

import pandas as pd

import streamlit as st

from modules.api import LlamaStackEvaluation

from modules.utils import process_dataset

EVALUATION_API = LlamaStackEvaluation()


def main():
# Add collapsible sidebar
with st.sidebar:
# Add collapse button
if "sidebar_state" not in st.session_state:
st.session_state.sidebar_state = True

if st.session_state.sidebar_state:
st.title("Navigation")
page = st.radio(
"Select a Page",
["Application Evaluation"],
index=0,
)
else:
page = "Application Evaluation" # Default page when sidebar is collapsed

# Main content area
st.title("🦙 Llama Stack Evaluations")

if page == "Application Evaluation":
application_evaluation_page()


def application_evaluation_page():
# File uploader
uploaded_file = st.file_uploader("Upload Dataset", type=["csv", "xlsx", "xls"])

if uploaded_file is None:
st.error("No file uploaded")
return

# Process uploaded file
df = process_dataset(uploaded_file)
if df is None:
st.error("Error processing file")
return

# Display dataset information
st.success("Dataset loaded successfully!")

# Display dataframe preview
st.subheader("Dataset Preview")
st.dataframe(df)

# Select Scoring Functions to Run Evaluation On
st.subheader("Select Scoring Functions")
scoring_functions = EVALUATION_API.list_scoring_functions()
scoring_functions = {sf.identifier: sf for sf in scoring_functions}
scoring_functions_names = list(scoring_functions.keys())
selected_scoring_functions = st.multiselect(
"Choose one or more scoring functions",
options=scoring_functions_names,
help="Choose one or more scoring functions.",
# Evaluation pages
application_evaluation_page = st.Page(
"page/evaluations/app_eval.py",
title="Evaluations (Scoring)",
icon="📊",
default=False,
)
native_evaluation_page = st.Page(
"page/evaluations/native_eval.py",
title="Evaluations (Generation + Scoring)",
icon="📊",
default=False,
)

available_models = EVALUATION_API.list_models()
available_models = [m.identifier for m in available_models]

scoring_params = {}
if selected_scoring_functions:
st.write("Selected:")
for scoring_fn_id in selected_scoring_functions:
scoring_fn = scoring_functions[scoring_fn_id]
st.write(f"- **{scoring_fn_id}**: {scoring_fn.description}")
new_params = None
if scoring_fn.params:
new_params = {}
for param_name, param_value in scoring_fn.params.to_dict().items():
if param_name == "type":
new_params[param_name] = param_value
continue

if param_name == "judge_model":
value = st.selectbox(
f"Select **{param_name}** for {scoring_fn_id}",
options=available_models,
index=0,
key=f"{scoring_fn_id}_{param_name}",
)
new_params[param_name] = value
else:
value = st.text_area(
f"Enter value for **{param_name}** in {scoring_fn_id} in valid JSON format",
value=json.dumps(param_value, indent=2),
height=80,
)
try:
new_params[param_name] = json.loads(value)
except json.JSONDecodeError:
st.error(
f"Invalid JSON for **{param_name}** in {scoring_fn_id}"
)

st.json(new_params)
scoring_params[scoring_fn_id] = new_params

# Add run evaluation button & slider
total_rows = len(df)
num_rows = st.slider("Number of rows to evaluate", 1, total_rows, total_rows)

if st.button("Run Evaluation"):
progress_text = "Running evaluation..."
progress_bar = st.progress(0, text=progress_text)
rows = df.to_dict(orient="records")
if num_rows < total_rows:
rows = rows[:num_rows]

# Create separate containers for progress text and results
progress_text_container = st.empty()
results_container = st.empty()
output_res = {}
for i, r in enumerate(rows):
# Update progress
progress = i / len(rows)
progress_bar.progress(progress, text=progress_text)

# Run evaluation for current row
score_res = EVALUATION_API.run_scoring(
r,
scoring_function_ids=selected_scoring_functions,
scoring_params=scoring_params,
)

for k in r.keys():
if k not in output_res:
output_res[k] = []
output_res[k].append(r[k])

for fn_id in selected_scoring_functions:
if fn_id not in output_res:
output_res[fn_id] = []
output_res[fn_id].append(score_res.results[fn_id].score_rows[0])

# Display current row results using separate containers
progress_text_container.write(
f"Expand to see current processed result ({i+1}/{len(rows)})"
)
results_container.json(
score_res.to_json(),
expanded=2,
)
# Playground pages
chat_page = st.Page(
"page/playground/chat.py", title="Chat", icon="💬", default=True
)
rag_page = st.Page("page/playground/rag.py", title="RAG", icon="💬", default=False)

progress_bar.progress(1.0, text="Evaluation complete!")
# Distribution pages
resources_page = st.Page(
"page/distribution/resources.py", title="Resources", icon="🔍", default=False
)
provider_page = st.Page(
"page/distribution/providers.py",
title="API Providers",
icon="🔍",
default=False,
)

# Display results in dataframe
if output_res:
output_df = pd.DataFrame(output_res)
st.subheader("Evaluation Results")
st.dataframe(output_df)
pg = st.navigation(
{
"Playground": [
chat_page,
rag_page,
application_evaluation_page,
native_evaluation_page,
],
"Inspect": [provider_page, resources_page],
},
expanded=False,
)
pg.run()


if __name__ == "__main__":
Expand Down
5 changes: 5 additions & 0 deletions llama_stack/distribution/ui/modules/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
13 changes: 4 additions & 9 deletions llama_stack/distribution/ui/modules/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from llama_stack_client import LlamaStackClient


class LlamaStackEvaluation:
class LlamaStackApi:
def __init__(self):
self.client = LlamaStackClient(
base_url=os.environ.get("LLAMA_STACK_ENDPOINT", "http://localhost:5000"),
Expand All @@ -22,14 +22,6 @@ def __init__(self):
},
)

def list_scoring_functions(self):
"""List all available scoring functions"""
return self.client.scoring_functions.list()

def list_models(self):
"""List all available judge models"""
return self.client.models.list()

def run_scoring(
self, row, scoring_function_ids: list[str], scoring_params: Optional[dict]
):
Expand All @@ -39,3 +31,6 @@ def run_scoring(
return self.client.scoring.score(
input_rows=[row], scoring_functions=scoring_params
)


llama_stack_api = LlamaStackApi()
11 changes: 11 additions & 0 deletions llama_stack/distribution/ui/modules/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.

import base64
import os

import pandas as pd
Expand All @@ -29,3 +30,13 @@ def process_dataset(file):
except Exception as e:
st.error(f"Error processing file: {str(e)}")
return None


def data_url_from_file(file) -> str:
file_content = file.getvalue()
base64_content = base64.b64encode(file_content).decode("utf-8")
mime_type = file.type

data_url = f"data:{mime_type};base64,{base64_content}"

return data_url
5 changes: 5 additions & 0 deletions llama_stack/distribution/ui/page/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
19 changes: 19 additions & 0 deletions llama_stack/distribution/ui/page/distribution/datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.

import streamlit as st
from modules.api import llama_stack_api


def datasets():
st.header("Datasets")

datasets_info = {
d.identifier: d.to_dict() for d in llama_stack_api.client.datasets.list()
}

selected_dataset = st.selectbox("Select a dataset", list(datasets_info.keys()))
st.json(datasets_info[selected_dataset], expanded=True)
22 changes: 22 additions & 0 deletions llama_stack/distribution/ui/page/distribution/eval_tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.

import streamlit as st
from modules.api import llama_stack_api


def eval_tasks():
# Eval Tasks Section
st.header("Eval Tasks")

eval_tasks_info = {
d.identifier: d.to_dict() for d in llama_stack_api.client.eval_tasks.list()
}

selected_eval_task = st.selectbox(
"Select an eval task", list(eval_tasks_info.keys()), key="eval_task_inspect"
)
st.json(eval_tasks_info[selected_eval_task], expanded=True)
23 changes: 23 additions & 0 deletions llama_stack/distribution/ui/page/distribution/memory_banks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.

import streamlit as st
from modules.api import llama_stack_api


def memory_banks():
st.header("Memory Banks")
memory_banks_info = {
m.identifier: m.to_dict() for m in llama_stack_api.client.memory_banks.list()
}

if len(memory_banks_info) > 0:
selected_memory_bank = st.selectbox(
"Select a memory bank", list(memory_banks_info.keys())
)
st.json(memory_banks_info[selected_memory_bank])
else:
st.info("No memory banks found")
19 changes: 19 additions & 0 deletions llama_stack/distribution/ui/page/distribution/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.

import streamlit as st
from modules.api import llama_stack_api


def models():
# Models Section
st.header("Models")
models_info = {
m.identifier: m.to_dict() for m in llama_stack_api.client.models.list()
}

selected_model = st.selectbox("Select a model", list(models_info.keys()))
st.json(models_info[selected_model])
20 changes: 20 additions & 0 deletions llama_stack/distribution/ui/page/distribution/providers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.

import streamlit as st
from modules.api import llama_stack_api


def providers():
st.header("🔍 API Providers")
apis_providers_info = llama_stack_api.client.providers.list()
# selected_api = st.selectbox("Select an API", list(apis_providers_info.keys()))
for api in apis_providers_info.keys():
st.markdown(f"###### {api}")
st.dataframe([p.to_dict() for p in apis_providers_info[api]], width=500)


providers()
Loading
Loading