diff --git a/llama_stack/distribution/ui/README.md b/llama_stack/distribution/ui/README.md index a91883067..2cc352c52 100644 --- a/llama_stack/distribution/ui/README.md +++ b/llama_stack/distribution/ui/README.md @@ -2,6 +2,12 @@ [!NOTE] This is a work in progress. +## Prerequisite +- Start up Llama Stack Server +``` +llama stack run +``` + ## Running Streamlit App ``` diff --git a/llama_stack/distribution/ui/app.py b/llama_stack/distribution/ui/app.py index 763b126a7..87a80e235 100644 --- a/llama_stack/distribution/ui/app.py +++ b/llama_stack/distribution/ui/app.py @@ -3,170 +3,54 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. - -import json - -import pandas as pd - import streamlit as st -from modules.api import LlamaStackEvaluation - -from modules.utils import process_dataset - -EVALUATION_API = LlamaStackEvaluation() - def main(): - # Add collapsible sidebar - with st.sidebar: - # Add collapse button - if "sidebar_state" not in st.session_state: - st.session_state.sidebar_state = True - - if st.session_state.sidebar_state: - st.title("Navigation") - page = st.radio( - "Select a Page", - ["Application Evaluation"], - index=0, - ) - else: - page = "Application Evaluation" # Default page when sidebar is collapsed - - # Main content area - st.title("🦙 Llama Stack Evaluations") - - if page == "Application Evaluation": - application_evaluation_page() - - -def application_evaluation_page(): - # File uploader - uploaded_file = st.file_uploader("Upload Dataset", type=["csv", "xlsx", "xls"]) - - if uploaded_file is None: - st.error("No file uploaded") - return - - # Process uploaded file - df = process_dataset(uploaded_file) - if df is None: - st.error("Error processing file") - return - - # Display dataset information - st.success("Dataset loaded successfully!") - - # Display dataframe preview - st.subheader("Dataset Preview") - st.dataframe(df) - - # Select Scoring Functions to Run Evaluation On - st.subheader("Select Scoring Functions") - scoring_functions = EVALUATION_API.list_scoring_functions() - scoring_functions = {sf.identifier: sf for sf in scoring_functions} - scoring_functions_names = list(scoring_functions.keys()) - selected_scoring_functions = st.multiselect( - "Choose one or more scoring functions", - options=scoring_functions_names, - help="Choose one or more scoring functions.", + # Evaluation pages + application_evaluation_page = st.Page( + "page/evaluations/app_eval.py", + title="Evaluations (Scoring)", + icon="📊", + default=False, + ) + native_evaluation_page = st.Page( + "page/evaluations/native_eval.py", + title="Evaluations (Generation + Scoring)", + icon="📊", + default=False, ) - available_models = EVALUATION_API.list_models() - available_models = [m.identifier for m in available_models] - - scoring_params = {} - if selected_scoring_functions: - st.write("Selected:") - for scoring_fn_id in selected_scoring_functions: - scoring_fn = scoring_functions[scoring_fn_id] - st.write(f"- **{scoring_fn_id}**: {scoring_fn.description}") - new_params = None - if scoring_fn.params: - new_params = {} - for param_name, param_value in scoring_fn.params.to_dict().items(): - if param_name == "type": - new_params[param_name] = param_value - continue - - if param_name == "judge_model": - value = st.selectbox( - f"Select **{param_name}** for {scoring_fn_id}", - options=available_models, - index=0, - key=f"{scoring_fn_id}_{param_name}", - ) - new_params[param_name] = value - else: - value = st.text_area( - f"Enter value for **{param_name}** in {scoring_fn_id} in valid JSON format", - value=json.dumps(param_value, indent=2), - height=80, - ) - try: - new_params[param_name] = json.loads(value) - except json.JSONDecodeError: - st.error( - f"Invalid JSON for **{param_name}** in {scoring_fn_id}" - ) - - st.json(new_params) - scoring_params[scoring_fn_id] = new_params - - # Add run evaluation button & slider - total_rows = len(df) - num_rows = st.slider("Number of rows to evaluate", 1, total_rows, total_rows) - - if st.button("Run Evaluation"): - progress_text = "Running evaluation..." - progress_bar = st.progress(0, text=progress_text) - rows = df.to_dict(orient="records") - if num_rows < total_rows: - rows = rows[:num_rows] - - # Create separate containers for progress text and results - progress_text_container = st.empty() - results_container = st.empty() - output_res = {} - for i, r in enumerate(rows): - # Update progress - progress = i / len(rows) - progress_bar.progress(progress, text=progress_text) - - # Run evaluation for current row - score_res = EVALUATION_API.run_scoring( - r, - scoring_function_ids=selected_scoring_functions, - scoring_params=scoring_params, - ) - - for k in r.keys(): - if k not in output_res: - output_res[k] = [] - output_res[k].append(r[k]) - - for fn_id in selected_scoring_functions: - if fn_id not in output_res: - output_res[fn_id] = [] - output_res[fn_id].append(score_res.results[fn_id].score_rows[0]) - - # Display current row results using separate containers - progress_text_container.write( - f"Expand to see current processed result ({i+1}/{len(rows)})" - ) - results_container.json( - score_res.to_json(), - expanded=2, - ) + # Playground pages + chat_page = st.Page( + "page/playground/chat.py", title="Chat", icon="💬", default=True + ) + rag_page = st.Page("page/playground/rag.py", title="RAG", icon="💬", default=False) - progress_bar.progress(1.0, text="Evaluation complete!") + # Distribution pages + resources_page = st.Page( + "page/distribution/resources.py", title="Resources", icon="🔍", default=False + ) + provider_page = st.Page( + "page/distribution/providers.py", + title="API Providers", + icon="🔍", + default=False, + ) - # Display results in dataframe - if output_res: - output_df = pd.DataFrame(output_res) - st.subheader("Evaluation Results") - st.dataframe(output_df) + pg = st.navigation( + { + "Playground": [ + chat_page, + rag_page, + application_evaluation_page, + native_evaluation_page, + ], + "Inspect": [provider_page, resources_page], + }, + expanded=False, + ) + pg.run() if __name__ == "__main__": diff --git a/llama_stack/distribution/ui/modules/__init__.py b/llama_stack/distribution/ui/modules/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/llama_stack/distribution/ui/modules/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/llama_stack/distribution/ui/modules/api.py b/llama_stack/distribution/ui/modules/api.py index a8d8bf37d..d3852caee 100644 --- a/llama_stack/distribution/ui/modules/api.py +++ b/llama_stack/distribution/ui/modules/api.py @@ -11,7 +11,7 @@ from llama_stack_client import LlamaStackClient -class LlamaStackEvaluation: +class LlamaStackApi: def __init__(self): self.client = LlamaStackClient( base_url=os.environ.get("LLAMA_STACK_ENDPOINT", "http://localhost:5000"), @@ -22,14 +22,6 @@ def __init__(self): }, ) - def list_scoring_functions(self): - """List all available scoring functions""" - return self.client.scoring_functions.list() - - def list_models(self): - """List all available judge models""" - return self.client.models.list() - def run_scoring( self, row, scoring_function_ids: list[str], scoring_params: Optional[dict] ): @@ -39,3 +31,6 @@ def run_scoring( return self.client.scoring.score( input_rows=[row], scoring_functions=scoring_params ) + + +llama_stack_api = LlamaStackApi() diff --git a/llama_stack/distribution/ui/modules/utils.py b/llama_stack/distribution/ui/modules/utils.py index f8da2e54e..67cce98fa 100644 --- a/llama_stack/distribution/ui/modules/utils.py +++ b/llama_stack/distribution/ui/modules/utils.py @@ -4,6 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +import base64 import os import pandas as pd @@ -29,3 +30,13 @@ def process_dataset(file): except Exception as e: st.error(f"Error processing file: {str(e)}") return None + + +def data_url_from_file(file) -> str: + file_content = file.getvalue() + base64_content = base64.b64encode(file_content).decode("utf-8") + mime_type = file.type + + data_url = f"data:{mime_type};base64,{base64_content}" + + return data_url diff --git a/llama_stack/distribution/ui/page/__init__.py b/llama_stack/distribution/ui/page/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/llama_stack/distribution/ui/page/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/llama_stack/distribution/ui/page/distribution/datasets.py b/llama_stack/distribution/ui/page/distribution/datasets.py new file mode 100644 index 000000000..44e314cde --- /dev/null +++ b/llama_stack/distribution/ui/page/distribution/datasets.py @@ -0,0 +1,19 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import streamlit as st +from modules.api import llama_stack_api + + +def datasets(): + st.header("Datasets") + + datasets_info = { + d.identifier: d.to_dict() for d in llama_stack_api.client.datasets.list() + } + + selected_dataset = st.selectbox("Select a dataset", list(datasets_info.keys())) + st.json(datasets_info[selected_dataset], expanded=True) diff --git a/llama_stack/distribution/ui/page/distribution/eval_tasks.py b/llama_stack/distribution/ui/page/distribution/eval_tasks.py new file mode 100644 index 000000000..4957fb178 --- /dev/null +++ b/llama_stack/distribution/ui/page/distribution/eval_tasks.py @@ -0,0 +1,22 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import streamlit as st +from modules.api import llama_stack_api + + +def eval_tasks(): + # Eval Tasks Section + st.header("Eval Tasks") + + eval_tasks_info = { + d.identifier: d.to_dict() for d in llama_stack_api.client.eval_tasks.list() + } + + selected_eval_task = st.selectbox( + "Select an eval task", list(eval_tasks_info.keys()), key="eval_task_inspect" + ) + st.json(eval_tasks_info[selected_eval_task], expanded=True) diff --git a/llama_stack/distribution/ui/page/distribution/memory_banks.py b/llama_stack/distribution/ui/page/distribution/memory_banks.py new file mode 100644 index 000000000..f28010bf2 --- /dev/null +++ b/llama_stack/distribution/ui/page/distribution/memory_banks.py @@ -0,0 +1,23 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import streamlit as st +from modules.api import llama_stack_api + + +def memory_banks(): + st.header("Memory Banks") + memory_banks_info = { + m.identifier: m.to_dict() for m in llama_stack_api.client.memory_banks.list() + } + + if len(memory_banks_info) > 0: + selected_memory_bank = st.selectbox( + "Select a memory bank", list(memory_banks_info.keys()) + ) + st.json(memory_banks_info[selected_memory_bank]) + else: + st.info("No memory banks found") diff --git a/llama_stack/distribution/ui/page/distribution/models.py b/llama_stack/distribution/ui/page/distribution/models.py new file mode 100644 index 000000000..70b166f2e --- /dev/null +++ b/llama_stack/distribution/ui/page/distribution/models.py @@ -0,0 +1,19 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import streamlit as st +from modules.api import llama_stack_api + + +def models(): + # Models Section + st.header("Models") + models_info = { + m.identifier: m.to_dict() for m in llama_stack_api.client.models.list() + } + + selected_model = st.selectbox("Select a model", list(models_info.keys())) + st.json(models_info[selected_model]) diff --git a/llama_stack/distribution/ui/page/distribution/providers.py b/llama_stack/distribution/ui/page/distribution/providers.py new file mode 100644 index 000000000..69f6bd771 --- /dev/null +++ b/llama_stack/distribution/ui/page/distribution/providers.py @@ -0,0 +1,20 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import streamlit as st +from modules.api import llama_stack_api + + +def providers(): + st.header("🔍 API Providers") + apis_providers_info = llama_stack_api.client.providers.list() + # selected_api = st.selectbox("Select an API", list(apis_providers_info.keys())) + for api in apis_providers_info.keys(): + st.markdown(f"###### {api}") + st.dataframe([p.to_dict() for p in apis_providers_info[api]], width=500) + + +providers() diff --git a/llama_stack/distribution/ui/page/distribution/resources.py b/llama_stack/distribution/ui/page/distribution/resources.py new file mode 100644 index 000000000..6b3ea0e3a --- /dev/null +++ b/llama_stack/distribution/ui/page/distribution/resources.py @@ -0,0 +1,52 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from page.distribution.datasets import datasets +from page.distribution.eval_tasks import eval_tasks +from page.distribution.memory_banks import memory_banks +from page.distribution.models import models +from page.distribution.scoring_functions import scoring_functions +from page.distribution.shields import shields + +from streamlit_option_menu import option_menu + + +def resources_page(): + options = [ + "Models", + "Memory Banks", + "Shields", + "Scoring Functions", + "Datasets", + "Eval Tasks", + ] + icons = ["magic", "memory", "shield", "file-bar-graph", "database", "list-task"] + selected_resource = option_menu( + None, + options, + icons=icons, + orientation="horizontal", + styles={ + "nav-link": { + "font-size": "12px", + }, + }, + ) + if selected_resource == "Eval Tasks": + eval_tasks() + elif selected_resource == "Memory Banks": + memory_banks() + elif selected_resource == "Datasets": + datasets() + elif selected_resource == "Models": + models() + elif selected_resource == "Scoring Functions": + scoring_functions() + elif selected_resource == "Shields": + shields() + + +resources_page() diff --git a/llama_stack/distribution/ui/page/distribution/scoring_functions.py b/llama_stack/distribution/ui/page/distribution/scoring_functions.py new file mode 100644 index 000000000..581ae0db7 --- /dev/null +++ b/llama_stack/distribution/ui/page/distribution/scoring_functions.py @@ -0,0 +1,22 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import streamlit as st +from modules.api import llama_stack_api + + +def scoring_functions(): + st.header("Scoring Functions") + + scoring_functions_info = { + s.identifier: s.to_dict() + for s in llama_stack_api.client.scoring_functions.list() + } + + selected_scoring_function = st.selectbox( + "Select a scoring function", list(scoring_functions_info.keys()) + ) + st.json(scoring_functions_info[selected_scoring_function], expanded=True) diff --git a/llama_stack/distribution/ui/page/distribution/shields.py b/llama_stack/distribution/ui/page/distribution/shields.py new file mode 100644 index 000000000..18bbfc008 --- /dev/null +++ b/llama_stack/distribution/ui/page/distribution/shields.py @@ -0,0 +1,20 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import streamlit as st +from modules.api import llama_stack_api + + +def shields(): + # Shields Section + st.header("Shields") + + shields_info = { + s.identifier: s.to_dict() for s in llama_stack_api.client.shields.list() + } + + selected_shield = st.selectbox("Select a shield", list(shields_info.keys())) + st.json(shields_info[selected_shield]) diff --git a/llama_stack/distribution/ui/page/evaluations/__init__.py b/llama_stack/distribution/ui/page/evaluations/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/llama_stack/distribution/ui/page/evaluations/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/llama_stack/distribution/ui/page/evaluations/app_eval.py b/llama_stack/distribution/ui/page/evaluations/app_eval.py new file mode 100644 index 000000000..5ec47ed45 --- /dev/null +++ b/llama_stack/distribution/ui/page/evaluations/app_eval.py @@ -0,0 +1,148 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import json + +import pandas as pd +import streamlit as st + +from modules.api import llama_stack_api +from modules.utils import process_dataset + + +def application_evaluation_page(): + + st.set_page_config(page_title="Evaluations (Scoring)", page_icon="🦙") + st.title("📊 Evaluations (Scoring)") + + # File uploader + uploaded_file = st.file_uploader("Upload Dataset", type=["csv", "xlsx", "xls"]) + + if uploaded_file is None: + st.error("No file uploaded") + return + + # Process uploaded file + df = process_dataset(uploaded_file) + if df is None: + st.error("Error processing file") + return + + # Display dataset information + st.success("Dataset loaded successfully!") + + # Display dataframe preview + st.subheader("Dataset Preview") + st.dataframe(df) + + # Select Scoring Functions to Run Evaluation On + st.subheader("Select Scoring Functions") + scoring_functions = llama_stack_api.client.scoring_functions.list() + scoring_functions = {sf.identifier: sf for sf in scoring_functions} + scoring_functions_names = list(scoring_functions.keys()) + selected_scoring_functions = st.multiselect( + "Choose one or more scoring functions", + options=scoring_functions_names, + help="Choose one or more scoring functions.", + ) + + available_models = llama_stack_api.client.models.list() + available_models = [m.identifier for m in available_models] + + scoring_params = {} + if selected_scoring_functions: + st.write("Selected:") + for scoring_fn_id in selected_scoring_functions: + scoring_fn = scoring_functions[scoring_fn_id] + st.write(f"- **{scoring_fn_id}**: {scoring_fn.description}") + new_params = None + if scoring_fn.params: + new_params = {} + for param_name, param_value in scoring_fn.params.to_dict().items(): + if param_name == "type": + new_params[param_name] = param_value + continue + + if param_name == "judge_model": + value = st.selectbox( + f"Select **{param_name}** for {scoring_fn_id}", + options=available_models, + index=0, + key=f"{scoring_fn_id}_{param_name}", + ) + new_params[param_name] = value + else: + value = st.text_area( + f"Enter value for **{param_name}** in {scoring_fn_id} in valid JSON format", + value=json.dumps(param_value, indent=2), + height=80, + ) + try: + new_params[param_name] = json.loads(value) + except json.JSONDecodeError: + st.error( + f"Invalid JSON for **{param_name}** in {scoring_fn_id}" + ) + + st.json(new_params) + scoring_params[scoring_fn_id] = new_params + + # Add run evaluation button & slider + total_rows = len(df) + num_rows = st.slider("Number of rows to evaluate", 1, total_rows, total_rows) + + if st.button("Run Evaluation"): + progress_text = "Running evaluation..." + progress_bar = st.progress(0, text=progress_text) + rows = df.to_dict(orient="records") + if num_rows < total_rows: + rows = rows[:num_rows] + + # Create separate containers for progress text and results + progress_text_container = st.empty() + results_container = st.empty() + output_res = {} + for i, r in enumerate(rows): + # Update progress + progress = i / len(rows) + progress_bar.progress(progress, text=progress_text) + + # Run evaluation for current row + score_res = llama_stack_api.run_scoring( + r, + scoring_function_ids=selected_scoring_functions, + scoring_params=scoring_params, + ) + + for k in r.keys(): + if k not in output_res: + output_res[k] = [] + output_res[k].append(r[k]) + + for fn_id in selected_scoring_functions: + if fn_id not in output_res: + output_res[fn_id] = [] + output_res[fn_id].append(score_res.results[fn_id].score_rows[0]) + + # Display current row results using separate containers + progress_text_container.write( + f"Expand to see current processed result ({i+1}/{len(rows)})" + ) + results_container.json( + score_res.to_json(), + expanded=2, + ) + + progress_bar.progress(1.0, text="Evaluation complete!") + + # Display results in dataframe + if output_res: + output_df = pd.DataFrame(output_res) + st.subheader("Evaluation Results") + st.dataframe(output_df) + + +application_evaluation_page() diff --git a/llama_stack/distribution/ui/page/evaluations/native_eval.py b/llama_stack/distribution/ui/page/evaluations/native_eval.py new file mode 100644 index 000000000..b8cc8bfa6 --- /dev/null +++ b/llama_stack/distribution/ui/page/evaluations/native_eval.py @@ -0,0 +1,257 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import json + +import pandas as pd + +import streamlit as st + +from modules.api import llama_stack_api + + +def select_eval_task_1(): + # Select Eval Tasks + st.subheader("1. Choose An Eval Task") + eval_tasks = llama_stack_api.client.eval_tasks.list() + eval_tasks = {et.identifier: et for et in eval_tasks} + eval_tasks_names = list(eval_tasks.keys()) + selected_eval_task = st.selectbox( + "Choose an eval task.", + options=eval_tasks_names, + help="Choose an eval task. Each eval task is parameterized by a dataset, and list of scoring functions.", + ) + with st.expander("View Eval Task"): + st.json(eval_tasks[selected_eval_task], expanded=True) + + st.session_state["selected_eval_task"] = selected_eval_task + st.session_state["eval_tasks"] = eval_tasks + if st.button("Confirm", key="confirm_1"): + st.session_state["selected_eval_task_1_next"] = True + + +def define_eval_candidate_2(): + if not st.session_state.get("selected_eval_task_1_next", None): + return + + st.subheader("2. Define Eval Candidate") + st.info( + """ + Define the configurations for the evaluation candidate model or agent used for generation. + Select "model" if you want to run generation with inference API, or "agent" if you want to run generation with agent API through specifying AgentConfig. + """ + ) + with st.expander("Define Eval Candidate", expanded=True): + # Define Eval Candidate + candidate_type = st.radio("Candidate Type", ["model", "agent"]) + + available_models = llama_stack_api.client.models.list() + available_models = [model.identifier for model in available_models] + selected_model = st.selectbox( + "Choose a model", + available_models, + index=0, + ) + + # Sampling Parameters + st.markdown("##### Sampling Parameters") + strategy = st.selectbox( + "Strategy", + ["greedy", "top_p", "top_k"], + index=0, + ) + temperature = st.slider( + "Temperature", + min_value=0.0, + max_value=1.0, + value=0.0, + step=0.1, + help="Controls the randomness of the response. Higher values make the output more creative and unexpected, lower values make it more conservative and predictable", + ) + top_p = st.slider( + "Top P", + min_value=0.0, + max_value=1.0, + value=0.95, + step=0.1, + ) + max_tokens = st.slider( + "Max Tokens", + min_value=0, + max_value=4096, + value=512, + step=1, + help="The maximum number of tokens to generate", + ) + repetition_penalty = st.slider( + "Repetition Penalty", + min_value=1.0, + max_value=2.0, + value=1.0, + step=0.1, + help="Controls the likelihood for generating the same word or phrase multiple times in the same sentence or paragraph. 1 implies no penalty, 2 will strongly discourage model to repeat words or phrases.", + ) + if candidate_type == "model": + eval_candidate = { + "type": "model", + "model": selected_model, + "sampling_params": { + "strategy": strategy, + "temperature": temperature, + "top_p": top_p, + "max_tokens": max_tokens, + "repetition_penalty": repetition_penalty, + }, + } + elif candidate_type == "agent": + system_prompt = st.text_area( + "System Prompt", + value="You are a helpful AI assistant.", + help="Initial instructions given to the AI to set its behavior and context", + ) + tools_json = st.text_area( + "Tools Configuration (JSON)", + value=json.dumps( + [ + { + "type": "brave_search", + "engine": "brave", + "api_key": "ENTER_BRAVE_API_KEY_HERE", + } + ] + ), + help="Enter tool configurations in JSON format. Each tool should have a name, description, and parameters.", + height=200, + ) + try: + tools = json.loads(tools_json) + except json.JSONDecodeError: + st.error("Invalid JSON format for tools configuration") + tools = [] + eval_candidate = { + "type": "agent", + "config": { + "model": selected_model, + "instructions": system_prompt, + "tools": tools, + "tool_choice": "auto", + "tool_prompt_format": "json", + "input_shields": [], + "output_shields": [], + "enable_session_persistence": False, + }, + } + st.session_state["eval_candidate"] = eval_candidate + + if st.button("Confirm", key="confirm_2"): + st.session_state["selected_eval_candidate_2_next"] = True + + +def run_evaluation_3(): + if not st.session_state.get("selected_eval_candidate_2_next", None): + return + + st.subheader("3. Run Evaluation") + # Add info box to explain configurations being used + st.info( + """ + Review the configurations that will be used for this evaluation run, make any necessary changes, and then click the "Run Evaluation" button. + """ + ) + selected_eval_task = st.session_state["selected_eval_task"] + eval_tasks = st.session_state["eval_tasks"] + eval_candidate = st.session_state["eval_candidate"] + + dataset_id = eval_tasks[selected_eval_task].dataset_id + rows = llama_stack_api.client.datasetio.get_rows_paginated( + dataset_id=dataset_id, + rows_in_page=-1, + ) + total_rows = len(rows.rows) + # Add number of examples control + num_rows = st.number_input( + "Number of Examples to Evaluate", + min_value=1, + max_value=total_rows, + value=5, + help="Number of examples from the dataset to evaluate. ", + ) + + eval_task_config = { + "type": "benchmark", + "eval_candidate": eval_candidate, + "scoring_params": {}, + } + + with st.expander("View Evaluation Task", expanded=True): + st.json(eval_tasks[selected_eval_task], expanded=True) + with st.expander("View Evaluation Task Configuration", expanded=True): + st.json(eval_task_config, expanded=True) + + # Add run button and handle evaluation + if st.button("Run Evaluation"): + + progress_text = "Running evaluation..." + progress_bar = st.progress(0, text=progress_text) + rows = rows.rows + if num_rows < total_rows: + rows = rows[:num_rows] + + # Create separate containers for progress text and results + progress_text_container = st.empty() + results_container = st.empty() + output_res = {} + for i, r in enumerate(rows): + # Update progress + progress = i / len(rows) + progress_bar.progress(progress, text=progress_text) + # Run evaluation for current row + eval_res = llama_stack_api.client.eval.evaluate_rows( + task_id=selected_eval_task, + input_rows=[r], + scoring_functions=eval_tasks[selected_eval_task].scoring_functions, + task_config=eval_task_config, + ) + + for k in r.keys(): + if k not in output_res: + output_res[k] = [] + output_res[k].append(r[k]) + + for k in eval_res.generations[0].keys(): + if k not in output_res: + output_res[k] = [] + output_res[k].append(eval_res.generations[0][k]) + + for scoring_fn in eval_tasks[selected_eval_task].scoring_functions: + if scoring_fn not in output_res: + output_res[scoring_fn] = [] + output_res[scoring_fn].append(eval_res.scores[scoring_fn].score_rows[0]) + + progress_text_container.write( + f"Expand to see current processed result ({i+1}/{len(rows)})" + ) + results_container.json(eval_res, expanded=2) + + progress_bar.progress(1.0, text="Evaluation complete!") + # Display results in dataframe + if output_res: + output_df = pd.DataFrame(output_res) + st.subheader("Evaluation Results") + st.dataframe(output_df) + + +def native_evaluation_page(): + + st.set_page_config(page_title="Evaluations (Generation + Scoring)", page_icon="🦙") + st.title("📊 Evaluations (Generation + Scoring)") + + select_eval_task_1() + define_eval_candidate_2() + run_evaluation_3() + + +native_evaluation_page() diff --git a/llama_stack/distribution/ui/page/playground/__init__.py b/llama_stack/distribution/ui/page/playground/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/llama_stack/distribution/ui/page/playground/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/llama_stack/distribution/ui/page/playground/chat.py b/llama_stack/distribution/ui/page/playground/chat.py new file mode 100644 index 000000000..157922d3b --- /dev/null +++ b/llama_stack/distribution/ui/page/playground/chat.py @@ -0,0 +1,123 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import streamlit as st +from modules.api import llama_stack_api + +# Sidebar configurations +with st.sidebar: + st.header("Configuration") + available_models = llama_stack_api.client.models.list() + available_models = [model.identifier for model in available_models] + selected_model = st.selectbox( + "Choose a model", + available_models, + index=0, + ) + + temperature = st.slider( + "Temperature", + min_value=0.0, + max_value=1.0, + value=0.0, + step=0.1, + help="Controls the randomness of the response. Higher values make the output more creative and unexpected, lower values make it more conservative and predictable", + ) + + top_p = st.slider( + "Top P", + min_value=0.0, + max_value=1.0, + value=0.95, + step=0.1, + ) + + max_tokens = st.slider( + "Max Tokens", + min_value=0, + max_value=4096, + value=512, + step=1, + help="The maximum number of tokens to generate", + ) + + repetition_penalty = st.slider( + "Repetition Penalty", + min_value=1.0, + max_value=2.0, + value=1.0, + step=0.1, + help="Controls the likelihood for generating the same word or phrase multiple times in the same sentence or paragraph. 1 implies no penalty, 2 will strongly discourage model to repeat words or phrases.", + ) + + stream = st.checkbox("Stream", value=True) + system_prompt = st.text_area( + "System Prompt", + value="You are a helpful AI assistant.", + help="Initial instructions given to the AI to set its behavior and context", + ) + + # Add clear chat button to sidebar + if st.button("Clear Chat", use_container_width=True): + st.session_state.messages = [] + st.rerun() + + +# Main chat interface +st.title("🦙 Chat") + + +# Initialize chat history +if "messages" not in st.session_state: + st.session_state.messages = [] + +# Display chat messages +for message in st.session_state.messages: + with st.chat_message(message["role"]): + st.markdown(message["content"]) + +# Chat input +if prompt := st.chat_input("Example: What is Llama Stack?"): + # Add user message to chat history + st.session_state.messages.append({"role": "user", "content": prompt}) + + # Display user message + with st.chat_message("user"): + st.markdown(prompt) + + # Display assistant response + with st.chat_message("assistant"): + message_placeholder = st.empty() + full_response = "" + + response = llama_stack_api.client.inference.chat_completion( + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": prompt}, + ], + model_id=selected_model, + stream=stream, + sampling_params={ + "temperature": temperature, + "top_p": top_p, + "max_tokens": max_tokens, + "repetition_penalty": repetition_penalty, + }, + ) + + if stream: + for chunk in response: + if chunk.event.event_type == "progress": + full_response += chunk.event.delta + message_placeholder.markdown(full_response + "▌") + message_placeholder.markdown(full_response) + else: + full_response = response + message_placeholder.markdown(full_response.completion_message.content) + + st.session_state.messages.append( + {"role": "assistant", "content": full_response} + ) diff --git a/llama_stack/distribution/ui/page/playground/rag.py b/llama_stack/distribution/ui/page/playground/rag.py new file mode 100644 index 000000000..ffcaf1afd --- /dev/null +++ b/llama_stack/distribution/ui/page/playground/rag.py @@ -0,0 +1,188 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import streamlit as st +from llama_stack_client.lib.agents.agent import Agent +from llama_stack_client.lib.agents.event_logger import EventLogger +from llama_stack_client.types.agent_create_params import AgentConfig +from llama_stack_client.types.memory_insert_params import Document + +from modules.api import llama_stack_api +from modules.utils import data_url_from_file + + +def rag_chat_page(): + st.title("🦙 RAG") + + with st.sidebar: + # File/Directory Upload Section + st.subheader("Upload Documents") + uploaded_files = st.file_uploader( + "Upload file(s) or directory", + accept_multiple_files=True, + type=["txt", "pdf", "doc", "docx"], # Add more file types as needed + ) + # Process uploaded files + if uploaded_files: + st.success(f"Successfully uploaded {len(uploaded_files)} files") + # Add memory bank name input field + memory_bank_name = st.text_input( + "Memory Bank Name", + value="rag_bank", + help="Enter a unique identifier for this memory bank", + ) + if st.button("Create Memory Bank"): + documents = [ + Document( + document_id=uploaded_file.name, + content=data_url_from_file(uploaded_file), + ) + for i, uploaded_file in enumerate(uploaded_files) + ] + + providers = llama_stack_api.client.providers.list() + llama_stack_api.client.memory_banks.register( + memory_bank_id=memory_bank_name, # Use the user-provided name + params={ + "embedding_model": "all-MiniLM-L6-v2", + "chunk_size_in_tokens": 512, + "overlap_size_in_tokens": 64, + }, + provider_id=providers["memory"][0].provider_id, + ) + + # insert documents using the custom bank name + llama_stack_api.client.memory.insert( + bank_id=memory_bank_name, # Use the user-provided name + documents=documents, + ) + st.success("Memory bank created successfully!") + + st.subheader("Configure Agent") + # select memory banks + memory_banks = llama_stack_api.client.memory_banks.list() + memory_banks = [bank.identifier for bank in memory_banks] + selected_memory_banks = st.multiselect( + "Select Memory Banks", + memory_banks, + ) + memory_bank_configs = [ + {"bank_id": bank_id, "type": "vector"} for bank_id in selected_memory_banks + ] + + available_models = llama_stack_api.client.models.list() + available_models = [model.identifier for model in available_models] + selected_model = st.selectbox( + "Choose a model", + available_models, + index=0, + ) + system_prompt = st.text_area( + "System Prompt", + value="You are a helpful assistant. ", + help="Initial instructions given to the AI to set its behavior and context", + ) + temperature = st.slider( + "Temperature", + min_value=0.0, + max_value=1.0, + value=0.0, + step=0.1, + help="Controls the randomness of the response. Higher values make the output more creative and unexpected, lower values make it more conservative and predictable", + ) + + top_p = st.slider( + "Top P", + min_value=0.0, + max_value=1.0, + value=0.95, + step=0.1, + ) + + # Add clear chat button to sidebar + if st.button("Clear Chat", use_container_width=True): + st.session_state.messages = [] + st.rerun() + + # Chat Interface + if "messages" not in st.session_state: + st.session_state.messages = [] + + # Display chat history + for message in st.session_state.messages: + with st.chat_message(message["role"]): + st.markdown(message["content"]) + + selected_model = llama_stack_api.client.models.list()[0].identifier + + agent_config = AgentConfig( + model=selected_model, + instructions=system_prompt, + sampling_params={ + "strategy": "greedy", + "temperature": temperature, + "top_p": top_p, + }, + tools=[ + { + "type": "memory", + "memory_bank_configs": memory_bank_configs, + "query_generator_config": {"type": "default", "sep": " "}, + "max_tokens_in_context": 4096, + "max_chunks": 10, + } + ], + tool_choice="auto", + tool_prompt_format="json", + input_shields=[], + output_shields=[], + enable_session_persistence=False, + ) + + agent = Agent(llama_stack_api.client, agent_config) + session_id = agent.create_session("rag-session") + + # Chat input + if prompt := st.chat_input("Ask a question about your documents"): + # Add user message to chat history + st.session_state.messages.append({"role": "user", "content": prompt}) + + # Display user message + with st.chat_message("user"): + st.markdown(prompt) + + response = agent.create_turn( + messages=[ + { + "role": "user", + "content": prompt, + } + ], + session_id=session_id, + ) + + # Display assistant response + with st.chat_message("assistant"): + retrieval_message_placeholder = st.empty() + message_placeholder = st.empty() + full_response = "" + retrieval_response = "" + for log in EventLogger().log(response): + log.print() + if log.role == "memory_retrieval": + retrieval_response += log.content.replace("====", "").strip() + retrieval_message_placeholder.info(retrieval_response) + else: + full_response += log.content + message_placeholder.markdown(full_response + "▌") + message_placeholder.markdown(full_response) + + st.session_state.messages.append( + {"role": "assistant", "content": full_response} + ) + + +rag_chat_page() diff --git a/llama_stack/distribution/ui/requirements.txt b/llama_stack/distribution/ui/requirements.txt index c03959444..39f2b3d27 100644 --- a/llama_stack/distribution/ui/requirements.txt +++ b/llama_stack/distribution/ui/requirements.txt @@ -1,3 +1,4 @@ streamlit pandas llama-stack-client>=0.0.55 +streamlit-option-menu diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py b/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py index b00b9a7db..0b18bac01 100644 --- a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +++ b/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py @@ -5,7 +5,7 @@ # the root directory of this source tree. from llama_stack.apis.common.type_system import NumberType -from llama_stack.apis.scoring_functions import ScoringFn +from llama_stack.apis.scoring_functions import LLMAsJudgeScoringFnParams, ScoringFn llm_as_judge_base = ScoringFn( @@ -14,4 +14,8 @@ return_type=NumberType(), provider_id="llm-as-judge", provider_resource_id="llm-as-judge-base", + params=LLMAsJudgeScoringFnParams( + judge_model="meta-llama/Llama-3.1-405B-Instruct", + prompt_template="Enter custom LLM as Judge Prompt Template", + ), )