From 987f28559e1e1145e8b74ca0f347bbd3ae61514e Mon Sep 17 00:00:00 2001 From: anuragts Date: Mon, 9 Dec 2024 10:35:34 +0530 Subject: [PATCH 01/54] feat: hunyuan_video --- cookbook/agents/41_generate_hunyuan_video.py | 19 +++++++ phi/tools/hunyuan_video.py | 57 ++++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 cookbook/agents/41_generate_hunyuan_video.py create mode 100644 phi/tools/hunyuan_video.py diff --git a/cookbook/agents/41_generate_hunyuan_video.py b/cookbook/agents/41_generate_hunyuan_video.py new file mode 100644 index 000000000..13e985a0f --- /dev/null +++ b/cookbook/agents/41_generate_hunyuan_video.py @@ -0,0 +1,19 @@ +from phi.agent import Agent +from phi.model.openai import OpenAIChat +from phi.tools.hunyuan_video import HunyuanVideo + +video_agent = Agent( + model=OpenAIChat(id="gpt-4o"), + tools=[HunyuanVideo()], + description="You are an AI agent that can generate videos using the ModelsLabs API.", + instructions=[ + "When the user asks you to create a video, use the `generate_video` tool to create the video.", + "Return the video URL as raw to the user.", + "Don't convert video URL to markdown or anything else.", + ], + markdown=True, + debug_mode=True, + show_tool_calls=True, +) + +video_agent.print_response("Generate a video of a cat playing with a ball") diff --git a/phi/tools/hunyuan_video.py b/phi/tools/hunyuan_video.py new file mode 100644 index 000000000..ed2b8cd05 --- /dev/null +++ b/phi/tools/hunyuan_video.py @@ -0,0 +1,57 @@ +""" +pip install fal-client +""" + +from os import getenv +from typing import Optional + +from phi.agent import Agent +from phi.tools import Toolkit +from phi.utils.log import logger + +try: + import fal_client +except ImportError: + raise ImportError("`fal_client` not installed. Please install using `pip install fal-client`") + + +class HunyuanVideo(Toolkit): + def __init__( + self, + api_key: Optional[str] = None, + ): + super().__init__(name="hunyuan_video") + + self.api_key = api_key or getenv("FAL_API_KEY") + if not self.api_key: + logger.error("FAL_API_KEY not set. Please set the FAL_API_KEY environment variable.") + + self.register(self.generate_video) + + def on_queue_update(self, update): + if isinstance(update, fal_client.InProgress) and update.logs: + for log in update.logs: + logger.info(log["message"]) + + def generate_video(self, agent: Agent, prompt: str) -> str: + """ + Use this function to generate a video given a prompt. + + Args: + prompt (str): A text description of the desired video. + + Returns: + str: Generated video URL. + """ + try: + result = fal_client.subscribe( + "fal-ai/hunyuan-video", + arguments={"prompt": prompt}, + with_logs=True, + on_queue_update=self.on_queue_update, + ) + video_url = result.get("video", {}).get("url", "") + return video_url + except Exception as e: + logger.error(f"Failed to generate video: {e}") + return f"Error: {e}" From 909289791c5400d8bcfdbbc2c854b25e4a2c448a Mon Sep 17 00:00:00 2001 From: anuragts Date: Mon, 9 Dec 2024 10:39:35 +0530 Subject: [PATCH 02/54] fix: lint --- phi/tools/hunyuan_video.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phi/tools/hunyuan_video.py b/phi/tools/hunyuan_video.py index ed2b8cd05..7190491af 100644 --- a/phi/tools/hunyuan_video.py +++ b/phi/tools/hunyuan_video.py @@ -10,7 +10,7 @@ from phi.utils.log import logger try: - import fal_client + import fal_client # type: ignore except ImportError: raise ImportError("`fal_client` not installed. Please install using `pip install fal-client`") From d9951ab57e237303a73fd060e77db101f53dee4c Mon Sep 17 00:00:00 2001 From: anuragts Date: Mon, 9 Dec 2024 18:41:51 +0530 Subject: [PATCH 03/54] fix: create a fal tool --- cookbook/agents/41_generate_hunyuan_video.py | 19 ------------- cookbook/agents/42_generate_fal_video.py | 20 +++++++++++++ phi/tools/{hunyuan_video.py => fal.py} | 30 ++++++++++++-------- 3 files changed, 38 insertions(+), 31 deletions(-) delete mode 100644 cookbook/agents/41_generate_hunyuan_video.py create mode 100644 cookbook/agents/42_generate_fal_video.py rename phi/tools/{hunyuan_video.py => fal.py} (58%) diff --git a/cookbook/agents/41_generate_hunyuan_video.py b/cookbook/agents/41_generate_hunyuan_video.py deleted file mode 100644 index 13e985a0f..000000000 --- a/cookbook/agents/41_generate_hunyuan_video.py +++ /dev/null @@ -1,19 +0,0 @@ -from phi.agent import Agent -from phi.model.openai import OpenAIChat -from phi.tools.hunyuan_video import HunyuanVideo - -video_agent = Agent( - model=OpenAIChat(id="gpt-4o"), - tools=[HunyuanVideo()], - description="You are an AI agent that can generate videos using the ModelsLabs API.", - instructions=[ - "When the user asks you to create a video, use the `generate_video` tool to create the video.", - "Return the video URL as raw to the user.", - "Don't convert video URL to markdown or anything else.", - ], - markdown=True, - debug_mode=True, - show_tool_calls=True, -) - -video_agent.print_response("Generate a video of a cat playing with a ball") diff --git a/cookbook/agents/42_generate_fal_video.py b/cookbook/agents/42_generate_fal_video.py new file mode 100644 index 000000000..5aa187323 --- /dev/null +++ b/cookbook/agents/42_generate_fal_video.py @@ -0,0 +1,20 @@ +from phi.agent import Agent +from phi.model.openai import OpenAIChat +from phi.tools.fal import Fal + +fal_agent = Agent( + model=OpenAIChat(id="gpt-4o"), + tools=[Fal()], + description="You are an AI agent that can generate videos using the Fal API.", + instructions=[ + "When the user asks you to create a video, use the `run` tool to create the video.", + "Return the URL as raw to the user.", + "Don't convert video URL to markdown or anything else.", + "Use `fal-ai/hunyuan-video` model by default.", + ], + markdown=True, + debug_mode=True, + show_tool_calls=True, +) + +fal_agent.print_response("Generate video of ballon in the ocean") diff --git a/phi/tools/hunyuan_video.py b/phi/tools/fal.py similarity index 58% rename from phi/tools/hunyuan_video.py rename to phi/tools/fal.py index 7190491af..573a64397 100644 --- a/phi/tools/hunyuan_video.py +++ b/phi/tools/fal.py @@ -15,43 +15,49 @@ raise ImportError("`fal_client` not installed. Please install using `pip install fal-client`") -class HunyuanVideo(Toolkit): +class Fal(Toolkit): def __init__( self, api_key: Optional[str] = None, + model: str = "fal-ai/hunyuan-video", ): - super().__init__(name="hunyuan_video") + super().__init__(name="fal") self.api_key = api_key or getenv("FAL_API_KEY") if not self.api_key: logger.error("FAL_API_KEY not set. Please set the FAL_API_KEY environment variable.") - self.register(self.generate_video) + self.register(self.run) def on_queue_update(self, update): if isinstance(update, fal_client.InProgress) and update.logs: for log in update.logs: logger.info(log["message"]) - def generate_video(self, agent: Agent, prompt: str) -> str: + def run(self, agent: Agent, prompt: str, model: str) -> str: """ - Use this function to generate a video given a prompt. + Use this function to run a model with a given prompt. Args: - prompt (str): A text description of the desired video. - + prompt (str): A text description of the task. + model (str): The model to use. Returns: - str: Generated video URL. + str: Return the result of the model. """ try: result = fal_client.subscribe( - "fal-ai/hunyuan-video", + model, arguments={"prompt": prompt}, with_logs=True, on_queue_update=self.on_queue_update, ) - video_url = result.get("video", {}).get("url", "") - return video_url + if video_url := result.get("video", {}).get("url", ""): + return video_url + elif image_url := result.get("image", {}).get("url", ""): + print(image_url) + return image_url + else: + return str(result) except Exception as e: - logger.error(f"Failed to generate video: {e}") + logger.error(f"Failed to run model: {e}") return f"Error: {e}" From 37b577e26473982116d0c7a2f90dff096874f568 Mon Sep 17 00:00:00 2001 From: anuragts Date: Mon, 9 Dec 2024 18:44:34 +0530 Subject: [PATCH 04/54] fix: remove print --- phi/tools/fal.py | 1 - 1 file changed, 1 deletion(-) diff --git a/phi/tools/fal.py b/phi/tools/fal.py index 573a64397..ff233d820 100644 --- a/phi/tools/fal.py +++ b/phi/tools/fal.py @@ -54,7 +54,6 @@ def run(self, agent: Agent, prompt: str, model: str) -> str: if video_url := result.get("video", {}).get("url", ""): return video_url elif image_url := result.get("image", {}).get("url", ""): - print(image_url) return image_url else: return str(result) From c9895bb21981f86e6980a4172df70f24eefc5d5c Mon Sep 17 00:00:00 2001 From: Anurag Date: Mon, 9 Dec 2024 19:16:48 +0530 Subject: [PATCH 05/54] Update cookbook/agents/42_generate_fal_video.py Co-authored-by: Dirk Brand <51947788+dirkbrnd@users.noreply.github.com> --- cookbook/agents/42_generate_fal_video.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cookbook/agents/42_generate_fal_video.py b/cookbook/agents/42_generate_fal_video.py index 5aa187323..bbe00f568 100644 --- a/cookbook/agents/42_generate_fal_video.py +++ b/cookbook/agents/42_generate_fal_video.py @@ -5,7 +5,7 @@ fal_agent = Agent( model=OpenAIChat(id="gpt-4o"), tools=[Fal()], - description="You are an AI agent that can generate videos using the Fal API.", + description="You are an AI agent that can generate videos using the 'fal' API.", instructions=[ "When the user asks you to create a video, use the `run` tool to create the video.", "Return the URL as raw to the user.", From 87c1afb75b684c3ebd2e72cb4c7f52fecea82fc6 Mon Sep 17 00:00:00 2001 From: Anurag Date: Mon, 9 Dec 2024 19:16:54 +0530 Subject: [PATCH 06/54] Update cookbook/agents/42_generate_fal_video.py Co-authored-by: Dirk Brand <51947788+dirkbrnd@users.noreply.github.com> --- cookbook/agents/42_generate_fal_video.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cookbook/agents/42_generate_fal_video.py b/cookbook/agents/42_generate_fal_video.py index bbe00f568..a49a8f389 100644 --- a/cookbook/agents/42_generate_fal_video.py +++ b/cookbook/agents/42_generate_fal_video.py @@ -7,7 +7,7 @@ tools=[Fal()], description="You are an AI agent that can generate videos using the 'fal' API.", instructions=[ - "When the user asks you to create a video, use the `run` tool to create the video.", + "When the user asks you to create a video, execute `run` on the tool to create the video.", "Return the URL as raw to the user.", "Don't convert video URL to markdown or anything else.", "Use `fal-ai/hunyuan-video` model by default.", From 3243c5b15147a7e2599f2acba2fba0464cc56afe Mon Sep 17 00:00:00 2001 From: anuragts Date: Mon, 9 Dec 2024 21:39:03 +0530 Subject: [PATCH 07/54] fix: move to fal tools --- cookbook/agents/42_generate_fal_video.py | 9 ++++--- phi/tools/{fal.py => fal_tools.py} | 32 +++++++++++++++++------- 2 files changed, 28 insertions(+), 13 deletions(-) rename phi/tools/{fal.py => fal_tools.py} (62%) diff --git a/cookbook/agents/42_generate_fal_video.py b/cookbook/agents/42_generate_fal_video.py index a49a8f389..a979ffc10 100644 --- a/cookbook/agents/42_generate_fal_video.py +++ b/cookbook/agents/42_generate_fal_video.py @@ -1,16 +1,17 @@ from phi.agent import Agent from phi.model.openai import OpenAIChat -from phi.tools.fal import Fal +from phi.tools.fal_tools import FalTools fal_agent = Agent( model=OpenAIChat(id="gpt-4o"), - tools=[Fal()], - description="You are an AI agent that can generate videos using the 'fal' API.", + tools=[FalTools()], + description="You are an AI agent that can generate videos using the Fal API.", instructions=[ - "When the user asks you to create a video, execute `run` on the tool to create the video.", + "When the user asks you to create a video, use the `run` tool to create the video.", "Return the URL as raw to the user.", "Don't convert video URL to markdown or anything else.", "Use `fal-ai/hunyuan-video` model by default.", + "Also pass the type of model of the tool, it can be either `image` or `video`.", ], markdown=True, debug_mode=True, diff --git a/phi/tools/fal.py b/phi/tools/fal_tools.py similarity index 62% rename from phi/tools/fal.py rename to phi/tools/fal_tools.py index ff233d820..961145849 100644 --- a/phi/tools/fal.py +++ b/phi/tools/fal_tools.py @@ -8,6 +8,7 @@ from phi.agent import Agent from phi.tools import Toolkit from phi.utils.log import logger +from enum import Enum try: import fal_client # type: ignore @@ -15,46 +16,59 @@ raise ImportError("`fal_client` not installed. Please install using `pip install fal-client`") -class Fal(Toolkit): +class ModelType(Enum): + IMAGE = "image" + VIDEO = "video" + TEXT = "text" + + +class FalTools(Toolkit): def __init__( self, api_key: Optional[str] = None, model: str = "fal-ai/hunyuan-video", + type: ModelType = ModelType.VIDEO, ): super().__init__(name="fal") self.api_key = api_key or getenv("FAL_API_KEY") + self.model = model + self.type = type if not self.api_key: logger.error("FAL_API_KEY not set. Please set the FAL_API_KEY environment variable.") - + self.seen_logs: set[str] = set() self.register(self.run) def on_queue_update(self, update): if isinstance(update, fal_client.InProgress) and update.logs: for log in update.logs: - logger.info(log["message"]) + message = log["message"] + if message not in self.seen_logs: + logger.info(message) + self.seen_logs.add(message) - def run(self, agent: Agent, prompt: str, model: str) -> str: + def run(self, agent: Agent, prompt: str, model: Optional[str] = None, type: Optional[ModelType] = None) -> str: """ Use this function to run a model with a given prompt. Args: prompt (str): A text description of the task. model (str): The model to use. + type (ModelType): The type of the model to use. It can be either `image` or `video` or `text`. Returns: str: Return the result of the model. """ try: result = fal_client.subscribe( - model, + model or self.model, arguments={"prompt": prompt}, with_logs=True, on_queue_update=self.on_queue_update, ) - if video_url := result.get("video", {}).get("url", ""): - return video_url - elif image_url := result.get("image", {}).get("url", ""): - return image_url + if type == ModelType.VIDEO: + return result.get("video", {}).get("url", "") + elif type == ModelType.IMAGE: + return result.get("image", {}).get("url", "") else: return str(result) except Exception as e: From a59d392e96495eb1890ab8691cb8d3b9e5367fbe Mon Sep 17 00:00:00 2001 From: anuragts Date: Mon, 9 Dec 2024 21:49:20 +0530 Subject: [PATCH 08/54] fix: enum type --- phi/tools/fal_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phi/tools/fal_tools.py b/phi/tools/fal_tools.py index 961145849..2e0a36403 100644 --- a/phi/tools/fal_tools.py +++ b/phi/tools/fal_tools.py @@ -16,7 +16,7 @@ raise ImportError("`fal_client` not installed. Please install using `pip install fal-client`") -class ModelType(Enum): +class ModelType(str, Enum): IMAGE = "image" VIDEO = "video" TEXT = "text" From 31f86fc427c5ace047d192575ec5df5606498903 Mon Sep 17 00:00:00 2001 From: anuragts Date: Mon, 9 Dec 2024 21:51:59 +0530 Subject: [PATCH 09/54] fix: name --- cookbook/agents/42_generate_fal_video.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cookbook/agents/42_generate_fal_video.py b/cookbook/agents/42_generate_fal_video.py index a979ffc10..8bfab8040 100644 --- a/cookbook/agents/42_generate_fal_video.py +++ b/cookbook/agents/42_generate_fal_video.py @@ -3,6 +3,7 @@ from phi.tools.fal_tools import FalTools fal_agent = Agent( + name="Fal Video Generator Agent", model=OpenAIChat(id="gpt-4o"), tools=[FalTools()], description="You are an AI agent that can generate videos using the Fal API.", From 7cf736dd859be8ad9169cd2e794f58b0de4e2f9b Mon Sep 17 00:00:00 2001 From: anuragts Date: Mon, 9 Dec 2024 22:17:00 +0530 Subject: [PATCH 10/54] fix: improve instructions --- cookbook/agents/42_generate_fal_video.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cookbook/agents/42_generate_fal_video.py b/cookbook/agents/42_generate_fal_video.py index 8bfab8040..8076508da 100644 --- a/cookbook/agents/42_generate_fal_video.py +++ b/cookbook/agents/42_generate_fal_video.py @@ -11,7 +11,6 @@ "When the user asks you to create a video, use the `run` tool to create the video.", "Return the URL as raw to the user.", "Don't convert video URL to markdown or anything else.", - "Use `fal-ai/hunyuan-video` model by default.", "Also pass the type of model of the tool, it can be either `image` or `video`.", ], markdown=True, From d04acf6712f1a80ace6cd0ccf541f02a1cec05e9 Mon Sep 17 00:00:00 2001 From: anuragts Date: Mon, 9 Dec 2024 22:20:37 +0530 Subject: [PATCH 11/54] fix: return video/image url to agent --- phi/tools/fal_tools.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/phi/tools/fal_tools.py b/phi/tools/fal_tools.py index 2e0a36403..4ac217067 100644 --- a/phi/tools/fal_tools.py +++ b/phi/tools/fal_tools.py @@ -66,9 +66,13 @@ def run(self, agent: Agent, prompt: str, model: Optional[str] = None, type: Opti on_queue_update=self.on_queue_update, ) if type == ModelType.VIDEO: - return result.get("video", {}).get("url", "") + video_url = result.get("video", {}).get("url", "") + agent.add_video(video_url) + return f"Video URL: {video_url}" elif type == ModelType.IMAGE: - return result.get("image", {}).get("url", "") + image_url = result.get("image", {}).get("url", "") + agent.add_image(image_url) + return f"Image URL: {image_url}" else: return str(result) except Exception as e: From 1b9513cdddb4bbba0ffcc32a3efb5635d0a6936b Mon Sep 17 00:00:00 2001 From: anuragts Date: Mon, 9 Dec 2024 22:23:22 +0530 Subject: [PATCH 12/54] fix: add fal video agent to playground --- cookbook/playground/multimodal_agent.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/cookbook/playground/multimodal_agent.py b/cookbook/playground/multimodal_agent.py index 8c45ba7a0..aaa66b3ce 100644 --- a/cookbook/playground/multimodal_agent.py +++ b/cookbook/playground/multimodal_agent.py @@ -6,6 +6,7 @@ from phi.tools.models_labs import ModelsLabs from phi.playground import Playground, serve_playground_app from phi.storage.agent.sqlite import SqlAgentStorage +from phi.tools.fal_tools import FalTools image_agent_storage_file: str = "tmp/image_agent.db" @@ -46,7 +47,24 @@ storage=SqlAgentStorage(table_name="video_agent", db_file="tmp/video_agent.db"), ) -app = Playground(agents=[image_agent, video_agent]).get_app() +fal_agent = Agent( + name="Fal Video Generator Agent", + agent_id="fal_agent", + model=OpenAIChat(id="gpt-4o"), + tools=[FalTools()], + description="You are an AI agent that can generate videos using the Fal API.", + instructions=[ + "When the user asks you to create a video, use the `run` tool to create the video.", + "The video will be displayed in the UI automatically below your response, so you don't need to show the video URL in your response.", + ], + markdown=True, + debug_mode=True, + add_history_to_messages=True, + add_datetime_to_instructions=True, + storage=SqlAgentStorage(table_name="fal_agent", db_file="tmp/fal_agent.db"), +) + +app = Playground(agents=[image_agent, video_agent, fal_agent]).get_app() if __name__ == "__main__": serve_playground_app("multimodal_agent:app", reload=True) From a50da31494bd581489b5fe61e6e2fe0c143774dc Mon Sep 17 00:00:00 2001 From: anuragts Date: Mon, 9 Dec 2024 22:48:12 +0530 Subject: [PATCH 13/54] fix: send as schema --- phi/tools/fal_tools.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/phi/tools/fal_tools.py b/phi/tools/fal_tools.py index 4ac217067..117168c3e 100644 --- a/phi/tools/fal_tools.py +++ b/phi/tools/fal_tools.py @@ -9,6 +9,7 @@ from phi.tools import Toolkit from phi.utils.log import logger from enum import Enum +import json try: import fal_client # type: ignore @@ -59,6 +60,7 @@ def run(self, agent: Agent, prompt: str, model: Optional[str] = None, type: Opti str: Return the result of the model. """ try: + data = [] result = fal_client.subscribe( model or self.model, arguments={"prompt": prompt}, @@ -67,11 +69,13 @@ def run(self, agent: Agent, prompt: str, model: Optional[str] = None, type: Opti ) if type == ModelType.VIDEO: video_url = result.get("video", {}).get("url", "") - agent.add_video(video_url) + data.append({"url": video_url}) + agent.add_video(json.dumps(data)) return f"Video URL: {video_url}" elif type == ModelType.IMAGE: image_url = result.get("image", {}).get("url", "") - agent.add_image(image_url) + data.append({"url": image_url}) + agent.add_image(json.dumps(data)) return f"Image URL: {image_url}" else: return str(result) From cf895cc5f9ee5f97354e5b923076780e22907a9a Mon Sep 17 00:00:00 2001 From: anuragts Date: Mon, 9 Dec 2024 22:53:09 +0530 Subject: [PATCH 14/54] fix: return as mp4 --- phi/tools/models_labs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phi/tools/models_labs.py b/phi/tools/models_labs.py index a6142aec0..cafc06a22 100644 --- a/phi/tools/models_labs.py +++ b/phi/tools/models_labs.py @@ -60,7 +60,7 @@ def generate_video(self, agent: Agent, prompt: str) -> str: "width": 512, "num_frames": 25, "webhook": None, - "output_type": "gif", + "output_type": "mp4", "track_id": None, "negative_prompt": "low quality", "model_id": "cogvideox", From c939f7e5598bbfcf750202a2100a5915b0640f03 Mon Sep 17 00:00:00 2001 From: anuragts Date: Mon, 9 Dec 2024 23:01:11 +0530 Subject: [PATCH 15/54] fix: add enum --- phi/tools/models_labs.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/phi/tools/models_labs.py b/phi/tools/models_labs.py index cafc06a22..23b5ee3bf 100644 --- a/phi/tools/models_labs.py +++ b/phi/tools/models_labs.py @@ -6,6 +6,7 @@ from phi.agent import Agent from phi.tools import Toolkit from phi.utils.log import logger +from enum import Enum try: import requests @@ -13,6 +14,11 @@ raise ImportError("`requests` not installed. Please install using `pip install requests`") +class FileType(str, Enum): + MP4 = "mp4" + GIF = "gif" + + class ModelsLabs(Toolkit): def __init__( self, @@ -25,6 +31,7 @@ def __init__( add_to_eta: int = 15, # Maximum time to wait for the video to be ready max_wait_time: int = 60, + file_type: FileType = FileType.MP4, ): super().__init__(name="models_labs") @@ -33,6 +40,7 @@ def __init__( self.wait_for_completion = wait_for_completion self.add_to_eta = add_to_eta self.max_wait_time = max_wait_time + self.file_type = file_type self.api_key = api_key or getenv("MODELS_LAB_API_KEY") if not self.api_key: logger.error("MODELS_LAB_API_KEY not set. Please set the MODELS_LAB_API_KEY environment variable.") @@ -60,7 +68,7 @@ def generate_video(self, agent: Agent, prompt: str) -> str: "width": 512, "num_frames": 25, "webhook": None, - "output_type": "mp4", + "output_type": self.file_type.value, "track_id": None, "negative_prompt": "low quality", "model_id": "cogvideox", From d0cb754b904fc286fc8b0287e516b2524bbf1d29 Mon Sep 17 00:00:00 2001 From: anuragts Date: Mon, 9 Dec 2024 23:21:54 +0530 Subject: [PATCH 16/54] fix: data --- phi/tools/fal_tools.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/phi/tools/fal_tools.py b/phi/tools/fal_tools.py index 117168c3e..2495b5838 100644 --- a/phi/tools/fal_tools.py +++ b/phi/tools/fal_tools.py @@ -70,12 +70,14 @@ def run(self, agent: Agent, prompt: str, model: Optional[str] = None, type: Opti if type == ModelType.VIDEO: video_url = result.get("video", {}).get("url", "") data.append({"url": video_url}) - agent.add_video(json.dumps(data)) + result["data"] = data + agent.add_video(json.dumps(result)) return f"Video URL: {video_url}" elif type == ModelType.IMAGE: image_url = result.get("image", {}).get("url", "") data.append({"url": image_url}) - agent.add_image(json.dumps(data)) + result["data"] = data + agent.add_image(json.dumps(result)) return f"Image URL: {image_url}" else: return str(result) From 45e9e86c14817d8febc240f54a96173bca781676 Mon Sep 17 00:00:00 2001 From: anuragts Date: Mon, 9 Dec 2024 23:29:35 +0530 Subject: [PATCH 17/54] fix: instruction for video model --- cookbook/playground/multimodal_agent.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cookbook/playground/multimodal_agent.py b/cookbook/playground/multimodal_agent.py index aaa66b3ce..d8b81de9e 100644 --- a/cookbook/playground/multimodal_agent.py +++ b/cookbook/playground/multimodal_agent.py @@ -56,6 +56,7 @@ instructions=[ "When the user asks you to create a video, use the `run` tool to create the video.", "The video will be displayed in the UI automatically below your response, so you don't need to show the video URL in your response.", + "Use `fal-ai/hunyuan-video` model by default.", ], markdown=True, debug_mode=True, From dd75ea259250ad782e9a69ce477b6ab7fb8e5cd6 Mon Sep 17 00:00:00 2001 From: anuragts Date: Mon, 9 Dec 2024 23:33:38 +0530 Subject: [PATCH 18/54] fix: more instruction --- cookbook/playground/multimodal_agent.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cookbook/playground/multimodal_agent.py b/cookbook/playground/multimodal_agent.py index d8b81de9e..1509bd19d 100644 --- a/cookbook/playground/multimodal_agent.py +++ b/cookbook/playground/multimodal_agent.py @@ -57,6 +57,8 @@ "When the user asks you to create a video, use the `run` tool to create the video.", "The video will be displayed in the UI automatically below your response, so you don't need to show the video URL in your response.", "Use `fal-ai/hunyuan-video` model by default.", + "Politely and courteously let the user know that the video has been generated and will be displayed below as soon as its ready.", + "Don't send video url in markdown format.", ], markdown=True, debug_mode=True, From 710aa30d01add98b7bce6b33ce79209094fe5ac4 Mon Sep 17 00:00:00 2001 From: Dirk Brand Date: Mon, 9 Dec 2024 21:59:56 +0200 Subject: [PATCH 19/54] Add replicate toolkit --- .../agents/43_generate_replicate_video.py | 24 +++++++++++ phi/tools/replicate.py | 43 +++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 cookbook/agents/43_generate_replicate_video.py create mode 100644 phi/tools/replicate.py diff --git a/cookbook/agents/43_generate_replicate_video.py b/cookbook/agents/43_generate_replicate_video.py new file mode 100644 index 000000000..88804e1b0 --- /dev/null +++ b/cookbook/agents/43_generate_replicate_video.py @@ -0,0 +1,24 @@ +from phi.agent import Agent +from phi.model.openai import OpenAIChat +from phi.tools.replicate import ReplicateToolKit + +video_agent = Agent( + name="Video Generator Agent", + model=OpenAIChat(id="gpt-4o"), + tools=[ + ReplicateToolKit( + model="tencent/hunyuan-video:847dfa8b01e739637fc76f480ede0c1d76408e1d694b830b5dfb8e547bf98405" + ) + ], + description="You are an AI agent that can generate videos using the Replicate API.", + instructions=[ + "When the user asks you to create a video, use the `generate_video` tool to create the video.", + "Return the URL as raw to the user.", + "Don't convert video URL to markdown or anything else." + ], + markdown=True, + debug_mode=True, + show_tool_calls=True, +) + +video_agent.print_response("Generate a video of a horse in the dessert.") diff --git a/phi/tools/replicate.py b/phi/tools/replicate.py new file mode 100644 index 000000000..5b8737389 --- /dev/null +++ b/phi/tools/replicate.py @@ -0,0 +1,43 @@ +import json +from os import getenv +from typing import Optional + +from phi.agent import Agent +from phi.tools import Toolkit +from phi.utils.log import logger + +try: + import replicate +except ImportError: + raise ImportError("`replicate` not installed. Please install using `pip install replicate`.") + + +class ReplicateToolKit(Toolkit): + def __init__( + self, + model: str = "tencent/hunyuan-video", + ): + super().__init__(name="replicate_toolkit") + self.api_key = getenv("REPLICATE_API_TOKEN") + if not self.api_key: + logger.error("REPLICATE_API_TOKEN not set. Please set the REPLICATE_API_TOKEN environment variable.") + self.model = model + + self.register(self.generate_video) + + def generate_video(self, agent: Agent, prompt: str) -> str: + """ + Use this function to generate a video. + Args: + prompt (str): A text description of the task. + Returns: + str: Return a URI to the generated video. + """ + output = replicate.run( + ref=self.model, + input={ + "prompt": prompt + } + ) + return output + From 22a07bbf819913c833adc1ba2abea8e371d64103 Mon Sep 17 00:00:00 2001 From: Dirk Brand Date: Mon, 9 Dec 2024 22:06:28 +0200 Subject: [PATCH 20/54] Add replicate toolkit --- .../agents/43_generate_replicate_video.py | 8 +++---- phi/tools/replicate.py | 22 ++++++------------- 2 files changed, 10 insertions(+), 20 deletions(-) diff --git a/cookbook/agents/43_generate_replicate_video.py b/cookbook/agents/43_generate_replicate_video.py index 88804e1b0..a0a5c697f 100644 --- a/cookbook/agents/43_generate_replicate_video.py +++ b/cookbook/agents/43_generate_replicate_video.py @@ -6,15 +6,13 @@ name="Video Generator Agent", model=OpenAIChat(id="gpt-4o"), tools=[ - ReplicateToolKit( - model="tencent/hunyuan-video:847dfa8b01e739637fc76f480ede0c1d76408e1d694b830b5dfb8e547bf98405" - ) + ReplicateToolKit(model="tencent/hunyuan-video:847dfa8b01e739637fc76f480ede0c1d76408e1d694b830b5dfb8e547bf98405") ], description="You are an AI agent that can generate videos using the Replicate API.", instructions=[ - "When the user asks you to create a video, use the `generate_video` tool to create the video.", + "When the user asks you to create a video, use the `generate_content` tool to create the video.", "Return the URL as raw to the user.", - "Don't convert video URL to markdown or anything else." + "Don't convert video URL to markdown or anything else.", ], markdown=True, debug_mode=True, diff --git a/phi/tools/replicate.py b/phi/tools/replicate.py index 5b8737389..eb3bc44bf 100644 --- a/phi/tools/replicate.py +++ b/phi/tools/replicate.py @@ -1,6 +1,4 @@ -import json from os import getenv -from typing import Optional from phi.agent import Agent from phi.tools import Toolkit @@ -15,7 +13,7 @@ class ReplicateToolKit(Toolkit): def __init__( self, - model: str = "tencent/hunyuan-video", + model: str = "minimax/video-01", ): super().__init__(name="replicate_toolkit") self.api_key = getenv("REPLICATE_API_TOKEN") @@ -23,21 +21,15 @@ def __init__( logger.error("REPLICATE_API_TOKEN not set. Please set the REPLICATE_API_TOKEN environment variable.") self.model = model - self.register(self.generate_video) + self.register(self.generate_content) - def generate_video(self, agent: Agent, prompt: str) -> str: + def generate_content(self, agent: Agent, prompt: str) -> str: """ - Use this function to generate a video. + Use this function to generate an image or a video using a replicate model. Args: - prompt (str): A text description of the task. + prompt (str): A text description of the content. Returns: - str: Return a URI to the generated video. + str: Return a URI to the generated video or image. """ - output = replicate.run( - ref=self.model, - input={ - "prompt": prompt - } - ) + output = replicate.run(ref=self.model, input={"prompt": prompt}) return output - From b3bf340028de5a573c68aa8fbfc2c139d4b1d49c Mon Sep 17 00:00:00 2001 From: Dirk Brand Date: Mon, 9 Dec 2024 22:12:14 +0200 Subject: [PATCH 21/54] Ignore missing imports --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 6bb6dda36..3033a1950 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -89,6 +89,7 @@ exclude = ["phienv*", "aienv*"] check_untyped_defs = true no_implicit_optional = true warn_unused_configs = true +ignore_missing_imports = true plugins = ["pydantic.mypy"] exclude = ["phienv*", "aienv*", "scratch*", "wip*", "tmp*", "cookbook/assistants/examples/*", "phi/assistant/openai/*"] From 0a739bc707b6d4a8a71b946e529b8ab004135773 Mon Sep 17 00:00:00 2001 From: ayush0054 Date: Tue, 10 Dec 2024 01:59:23 +0530 Subject: [PATCH 22/54] luma lab video generation --- cookbook/tools/lumaAgent.py | 37 +++++++++++++ phi/tools/lumalab.py | 103 ++++++++++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 cookbook/tools/lumaAgent.py create mode 100644 phi/tools/lumalab.py diff --git a/cookbook/tools/lumaAgent.py b/cookbook/tools/lumaAgent.py new file mode 100644 index 000000000..a09d7c22f --- /dev/null +++ b/cookbook/tools/lumaAgent.py @@ -0,0 +1,37 @@ +from phi.agent import Agent +from phi.llm.openai import OpenAIChat +from phi.tools.lumalab import LumaLab + +"""Create an agent specialized for Luma AI video generation""" + +luma_agent = Agent( + name="Luma Video Agent", + agent_id="luma-video-agent", + llm=OpenAIChat(model="gpt-4o"), + tools=[LumaLab()], # Using the LumaLab tool we created + markdown=True, + debug_mode=True, + show_tool_calls=True, + instructions=[ + "You are an agent designed to generate videos using the Luma AI API.", + "When asked to generate a video, use the generate_video function from the LumaLab toolkit.", + "Only pass the required parameters to the generate_video function unless specifically asked for other parameters.", + "The default parameters are:", + "- loop: False", + "- aspect_ratio: '16:9'", + "- keyframes: None", + "After generating the video, display the video URL in markdown format.", + "If the video generation is async (wait_for_completion=False), inform the user about the generation ID.", + "If any errors occur during generation, clearly communicate them to the user.", + ], + system_message=( + "Do not modify any default parameters of the generate_video function " + "unless explicitly specified in the user's request. Always provide clear " + "feedback about the video generation status." + ), +) + +luma_agent.run("Generate a video of a sunset over a peaceful ocean with gentle waves") + + + diff --git a/phi/tools/lumalab.py b/phi/tools/lumalab.py new file mode 100644 index 000000000..7912a1ebc --- /dev/null +++ b/phi/tools/lumalab.py @@ -0,0 +1,103 @@ +import time +import json +from os import getenv +from typing import Optional, Dict, Any + +from phi.agent import Agent +from phi.tools import Toolkit +from phi.utils.log import logger + +try: + from lumaai import LumaAI +except ImportError: + raise ImportError("`lumaai` not installed. Please install using `pip install lumaai`") + + +class LumaLab(Toolkit): + def __init__( + self, + api_key: Optional[str] = None, + wait_for_completion: bool = True, + poll_interval: int = 3, + max_wait_time: int = 300, # 5 minutes + ): + super().__init__(name="luma_lab") + + self.wait_for_completion = wait_for_completion + self.poll_interval = poll_interval + self.max_wait_time = max_wait_time + self.api_key = api_key or getenv("LUMAAI_API_KEY") + + if not self.api_key: + logger.error("LUMAAI_API_KEY not set. Please set the LUMAAI_API_KEY environment variable.") + + self.client = LumaAI(auth_token=self.api_key) + self.register(self.generate_video) + + def generate_video( + self, + agent: Agent, + prompt: str, + loop: bool = False, + aspect_ratio: str = "16:9", + keyframes: Optional[Dict[str, Any]] = None, + ) -> str: + """Use this function to generate a video given a prompt. + + Args: + prompt (str): A text description of the desired video. + loop (bool, optional): Whether the video should loop. Defaults to False. + aspect_ratio (str, optional): Aspect ratio of the video. Defaults to "16:9". + keyframes (Dict[str, Any], optional): Keyframe configuration for image-to-video or video extension. + + Returns: + str: A message indicating if the video has been generated successfully or an error message. + """ + if not self.api_key: + return "Please set the LUMAAI_API_KEY" + + try: + # Create generation request + generation_params = { + "prompt": prompt, + "loop": loop, + "aspect_ratio": aspect_ratio, + } + if keyframes: + generation_params["keyframes"] = keyframes + + logger.debug(f"Generating video with params: {generation_params}") + generation = self.client.generations.create(**generation_params) + + if not self.wait_for_completion: + agent.add_video(json.dumps({"id": generation.id})) + return f"Video generation started with ID: {generation.id}" + + # Poll for completion + completed = False + seconds_waited = 0 + while not completed and seconds_waited < self.max_wait_time: + generation = self.client.generations.get(id=generation.id) + + if generation.state == "completed": + completed = True + video_url = generation.assets.video + agent.add_video(json.dumps({ + "id": generation.id, + "url": video_url, + "state": "completed" + })) + return f"Video generated successfully: {video_url}" + elif generation.state == "failed": + return f"Generation failed: {generation.failure_reason}" + + logger.info(f"Generation in progress... State: {generation.state}") + time.sleep(self.poll_interval) + seconds_waited += self.poll_interval + + if not completed: + return f"Video generation timed out after {self.max_wait_time} seconds" + + except Exception as e: + logger.error(f"Failed to generate video: {e}") + return f"Error: {e}" From 11adf3e9281fc7662c93f6fb1f50895971ab6288 Mon Sep 17 00:00:00 2001 From: Dirk Brand Date: Tue, 10 Dec 2024 09:45:54 +0200 Subject: [PATCH 23/54] Fix cookbook for replicate --- .../agents/43_generate_replicate_video.py | 6 ++++-- .../llms/vertexai/samples/multimodal.py | 2 +- .../llms/vertexai/samples/text_stream.py | 2 +- phi/agent/agent.py | 17 ++++++++------- phi/llm/google/gemini.py | 4 ++-- phi/llm/vertexai/gemini.py | 4 ++-- phi/model/content.py | 16 ++++++++++++++ phi/model/google/gemini.py | 4 ++-- phi/model/vertexai/gemini.py | 4 ++-- phi/run/response.py | 5 +++-- phi/tools/dalle.py | 10 ++++++++- phi/tools/models_labs.py | 12 +++++++---- phi/tools/replicate.py | 21 +++++++++++++++---- 13 files changed, 76 insertions(+), 31 deletions(-) create mode 100644 phi/model/content.py diff --git a/cookbook/agents/43_generate_replicate_video.py b/cookbook/agents/43_generate_replicate_video.py index a0a5c697f..d64faf531 100644 --- a/cookbook/agents/43_generate_replicate_video.py +++ b/cookbook/agents/43_generate_replicate_video.py @@ -2,15 +2,17 @@ from phi.model.openai import OpenAIChat from phi.tools.replicate import ReplicateToolKit +"""Create an agent specialized for Replicate AI content generation""" + video_agent = Agent( name="Video Generator Agent", model=OpenAIChat(id="gpt-4o"), tools=[ ReplicateToolKit(model="tencent/hunyuan-video:847dfa8b01e739637fc76f480ede0c1d76408e1d694b830b5dfb8e547bf98405") ], - description="You are an AI agent that can generate videos using the Replicate API.", + description="You are an AI agent that can generate images and videos using the Replicate API.", instructions=[ - "When the user asks you to create a video, use the `generate_content` tool to create the video.", + "When the user asks you to create a video, use the `generate_media` tool to create the video.", "Return the URL as raw to the user.", "Don't convert video URL to markdown or anything else.", ], diff --git a/cookbook/assistants/llms/vertexai/samples/multimodal.py b/cookbook/assistants/llms/vertexai/samples/multimodal.py index 7b3f67914..92114d2a0 100644 --- a/cookbook/assistants/llms/vertexai/samples/multimodal.py +++ b/cookbook/assistants/llms/vertexai/samples/multimodal.py @@ -11,7 +11,7 @@ def multimodal_example(project: Optional[str], location: Optional[str]) -> str: # Load the model multimodal_model = GenerativeModel("gemini-1.0-pro-vision") # Query the model - response = multimodal_model.generate_content( + response = multimodal_model.generate_media( [ # Add an example image Part.from_uri("gs://generativeai-downloads/images/scones.jpg", mime_type="image/jpeg"), diff --git a/cookbook/assistants/llms/vertexai/samples/text_stream.py b/cookbook/assistants/llms/vertexai/samples/text_stream.py index 70fd96e96..fcc1d5bfb 100644 --- a/cookbook/assistants/llms/vertexai/samples/text_stream.py +++ b/cookbook/assistants/llms/vertexai/samples/text_stream.py @@ -11,7 +11,7 @@ def generate(project: Optional[str], location: Optional[str]) -> None: # Load the model model = GenerativeModel("gemini-1.0-pro-vision") # Query the model - responses: Iterable[GenerationResponse] = model.generate_content("Who are you?", stream=True) + responses: Iterable[GenerationResponse] = model.generate_media("Who are you?", stream=True) # Process the response for response in responses: print(response.text, end="") diff --git a/phi/agent/agent.py b/phi/agent/agent.py index 9cba61c55..ef0dff66e 100644 --- a/phi/agent/agent.py +++ b/phi/agent/agent.py @@ -28,6 +28,7 @@ from phi.document import Document from phi.agent.session import AgentSession +from phi.model.content import Image, Video from phi.reasoning.step import ReasoningStep, ReasoningSteps, NextAction from phi.run.response import RunEvent, RunResponse, RunResponseExtraData from phi.knowledge.agent import AgentKnowledge @@ -57,9 +58,9 @@ class Agent(BaseModel): # -*- Agent Data # Images associated with this agent - images: Optional[List[Union[str, Dict[str, Any]]]] = None + images: Optional[List[Image]] = None # Videos associated with this agent - videos: Optional[List[Union[str, Dict[str, Any]]]] = None + videos: Optional[List[Video]] = None # Data associated with this agent # name, model, images and videos are automatically added to the agent_data @@ -632,13 +633,13 @@ def from_agent_session(self, session: AgentSession): if "images" in session.agent_data: images_from_db = session.agent_data.get("images") if self.images is not None and isinstance(self.images, list): - self.images.extend(images_from_db) # type: ignore + self.images.extend([Image.model_validate(img) for img in images_from_db]) else: self.images = images_from_db if "videos" in session.agent_data: videos_from_db = session.agent_data.get("videos") if self.videos is not None and isinstance(self.videos, list): - self.videos.extend(videos_from_db) # type: ignore + self.videos.extend([Video.model_validate(vid) for vid in videos_from_db]) else: self.videos = videos_from_db @@ -2433,7 +2434,7 @@ def delete_session(self, session_id: str): # Handle images and videos ########################################################################### - def add_image(self, image: Union[str, Dict]) -> None: + def add_image(self, image: Image) -> None: if self.images is None: self.images = [] self.images.append(image) @@ -2442,7 +2443,7 @@ def add_image(self, image: Union[str, Dict]) -> None: self.run_response.images = [] self.run_response.images.append(image) - def add_video(self, video: Union[str, Dict]) -> None: + def add_video(self, video: Video) -> None: if self.videos is None: self.videos = [] self.videos.append(video) @@ -2451,10 +2452,10 @@ def add_video(self, video: Union[str, Dict]) -> None: self.run_response.videos = [] self.run_response.videos.append(video) - def get_images(self) -> Optional[List[Union[str, Dict]]]: + def get_images(self) -> Optional[List[Image]]: return self.images - def get_videos(self) -> Optional[List[Union[str, Dict]]]: + def get_videos(self) -> Optional[List[Video]]: return self.videos ########################################################################### diff --git a/phi/llm/google/gemini.py b/phi/llm/google/gemini.py index 00d7626d9..f4aeb134a 100644 --- a/phi/llm/google/gemini.py +++ b/phi/llm/google/gemini.py @@ -132,10 +132,10 @@ def api_kwargs(self) -> Dict[str, Any]: return kwargs def invoke(self, messages: List[Message]): - return self.client.generate_content(contents=self.conform_messages_to_gemini(messages)) + return self.client.generate_media(contents=self.conform_messages_to_gemini(messages)) def invoke_stream(self, messages: List[Message]): - yield from self.client.generate_content( + yield from self.client.generate_media( contents=self.conform_messages_to_gemini(messages), stream=True, ) diff --git a/phi/llm/vertexai/gemini.py b/phi/llm/vertexai/gemini.py index 9f7cfee7b..aa952872e 100644 --- a/phi/llm/vertexai/gemini.py +++ b/phi/llm/vertexai/gemini.py @@ -130,10 +130,10 @@ def convert_messages_to_contents(self, messages: List[Message]) -> List[Any]: return _contents def invoke(self, messages: List[Message]) -> GenerationResponse: - return self.client.generate_content(contents=self.convert_messages_to_contents(messages)) + return self.client.generate_media(contents=self.convert_messages_to_contents(messages)) def invoke_stream(self, messages: List[Message]) -> Iterator[GenerationResponse]: - yield from self.client.generate_content( + yield from self.client.generate_media( contents=self.convert_messages_to_contents(messages), stream=True, ) diff --git a/phi/model/content.py b/phi/model/content.py new file mode 100644 index 000000000..f0ef06bd2 --- /dev/null +++ b/phi/model/content.py @@ -0,0 +1,16 @@ +from typing import Optional + +from pydantic import BaseModel + +class Video(BaseModel): + id: str + url: str + original_prompt: Optional[str] = None + revised_prompt: Optional[str] = None + eta: Optional[str] = None + +class Image(BaseModel): + id: str + url: str + original_prompt: Optional[str] = None + revised_prompt: Optional[str] = None diff --git a/phi/model/google/gemini.py b/phi/model/google/gemini.py index f0975cb49..ab9f4957c 100644 --- a/phi/model/google/gemini.py +++ b/phi/model/google/gemini.py @@ -266,7 +266,7 @@ def invoke(self, messages: List[Message]): Returns: GenerateContentResponse: The response from the model. """ - return self.get_client().generate_content(contents=self._format_messages(messages)) + return self.get_client().generate_media(contents=self._format_messages(messages)) def invoke_stream(self, messages: List[Message]): """ @@ -278,7 +278,7 @@ def invoke_stream(self, messages: List[Message]): Returns: Iterator[GenerateContentResponse]: The response from the model as a stream. """ - yield from self.get_client().generate_content( + yield from self.get_client().generate_media( contents=self._format_messages(messages), stream=True, ) diff --git a/phi/model/vertexai/gemini.py b/phi/model/vertexai/gemini.py index b416e1cc5..268065d48 100644 --- a/phi/model/vertexai/gemini.py +++ b/phi/model/vertexai/gemini.py @@ -258,7 +258,7 @@ def invoke(self, messages: List[Message]) -> GenerationResponse: Returns: GenerationResponse object containing the response content """ - return self.get_client().generate_content(contents=self._format_messages(messages)) + return self.get_client().generate_media(contents=self._format_messages(messages)) def invoke_stream(self, messages: List[Message]) -> Iterator[GenerationResponse]: """ @@ -270,7 +270,7 @@ def invoke_stream(self, messages: List[Message]) -> Iterator[GenerationResponse] Returns: Iterator[GenerationResponse] object containing the response content """ - yield from self.get_client().generate_content( + yield from self.get_client().generate_media( contents=self._format_messages(messages), stream=True, ) diff --git a/phi/run/response.py b/phi/run/response.py index 32bd149e8..809992855 100644 --- a/phi/run/response.py +++ b/phi/run/response.py @@ -4,6 +4,7 @@ from pydantic import BaseModel, ConfigDict, Field +from phi.model.content import Video, Image from phi.reasoning.step import ReasoningStep from phi.model.message import Message, MessageReferences @@ -48,8 +49,8 @@ class RunResponse(BaseModel): session_id: Optional[str] = None workflow_id: Optional[str] = None tools: Optional[List[Dict[str, Any]]] = None - images: Optional[List[Union[str, Dict[str, Any]]]] = None - videos: Optional[List[Union[str, Dict[str, Any]]]] = None + images: Optional[List[Image]] = None + videos: Optional[List[Video]] = None audio: Optional[Dict] = None extra_data: Optional[RunResponseExtraData] = None created_at: int = Field(default_factory=lambda: int(time())) diff --git a/phi/tools/dalle.py b/phi/tools/dalle.py index 662239573..cf5f08f2b 100644 --- a/phi/tools/dalle.py +++ b/phi/tools/dalle.py @@ -1,7 +1,9 @@ from os import getenv from typing import Optional, Literal +from uuid import uuid4 from phi.agent import Agent +from phi.model.content import Image from phi.tools import Toolkit from phi.utils.log import logger @@ -80,7 +82,13 @@ def create_image(self, agent: Agent, prompt: str) -> str: logger.debug("Image generated successfully") # Update the run response with the image URLs - agent.add_image(response.model_dump()) + for img in response.data: + agent.add_image(Image( + id=str(uuid4()), + url=img.url, + original_prompt=prompt, + revised_prompt=img.revised_prompt + )) return "Image has been generated successfully and will be displayed below" except Exception as e: logger.error(f"Failed to generate image: {e}") diff --git a/phi/tools/models_labs.py b/phi/tools/models_labs.py index a6142aec0..e18afca4b 100644 --- a/phi/tools/models_labs.py +++ b/phi/tools/models_labs.py @@ -4,6 +4,7 @@ from typing import Optional from phi.agent import Agent +from phi.model.content import Video from phi.tools import Toolkit from phi.utils.log import logger @@ -85,6 +86,7 @@ def generate_video(self, agent: Agent, prompt: str) -> str: logger.info(f"Video URLs: {video_url_links}") video_data = [] + logger.debug(f"Result: {result}") for video_url in video_url_links: video_data.append( { @@ -93,11 +95,13 @@ def generate_video(self, agent: Agent, prompt: str) -> str: "url": video_url, } ) - result["data"] = video_data - logger.debug(f"Result: {result}") - # Update the run response with the image URLs - agent.add_video(json.dumps(result)) + # Update the run response with the video URLs + agent.add_video(Video( + id=video_id, + url=video_url, + eta=eta + )) if self.wait_for_completion and isinstance(eta, int): video_ready = False diff --git a/phi/tools/replicate.py b/phi/tools/replicate.py index eb3bc44bf..ae8041cfb 100644 --- a/phi/tools/replicate.py +++ b/phi/tools/replicate.py @@ -1,11 +1,15 @@ +import json from os import getenv +from uuid import uuid4 from phi.agent import Agent +from phi.model.content import Video from phi.tools import Toolkit from phi.utils.log import logger try: import replicate + from replicate.helpers import FileOutput except ImportError: raise ImportError("`replicate` not installed. Please install using `pip install replicate`.") @@ -14,6 +18,8 @@ class ReplicateToolKit(Toolkit): def __init__( self, model: str = "minimax/video-01", + wait_for_completion: bool = True, + max_wait_time: int = 300, # 5 minutes ): super().__init__(name="replicate_toolkit") self.api_key = getenv("REPLICATE_API_TOKEN") @@ -21,9 +27,9 @@ def __init__( logger.error("REPLICATE_API_TOKEN not set. Please set the REPLICATE_API_TOKEN environment variable.") self.model = model - self.register(self.generate_content) + self.register(self.generate_media) - def generate_content(self, agent: Agent, prompt: str) -> str: + def generate_media(self, agent: Agent, prompt: str) -> str: """ Use this function to generate an image or a video using a replicate model. Args: @@ -31,5 +37,12 @@ def generate_content(self, agent: Agent, prompt: str) -> str: Returns: str: Return a URI to the generated video or image. """ - output = replicate.run(ref=self.model, input={"prompt": prompt}) - return output + output: FileOutput = replicate.run(ref=self.model, input={"prompt": prompt}) + + # Update the run response with the video URLs + agent.add_video(Video( + id=str(uuid4()), + url=output.url, + )) + + return f"Media generated successfully at {output.url}" From 2522dd7d2efe13325dc554c4b7a4e287d2ed008c Mon Sep 17 00:00:00 2001 From: Dirk Brand Date: Tue, 10 Dec 2024 10:54:47 +0200 Subject: [PATCH 24/54] Update image/video serialization --- phi/agent/agent.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/phi/agent/agent.py b/phi/agent/agent.py index ef0dff66e..b45a03518 100644 --- a/phi/agent/agent.py +++ b/phi/agent/agent.py @@ -574,9 +574,9 @@ def get_agent_data(self) -> Dict[str, Any]: if self.model is not None: agent_data["model"] = self.model.to_dict() if self.images is not None: - agent_data["images"] = self.images + agent_data["images"] = [img.model_dump() for img in self.images] if self.videos is not None: - agent_data["videos"] = self.videos + agent_data["videos"] = [vid.model_dump() for vid in self.videos] return agent_data def get_session_data(self) -> Dict[str, Any]: From d438e3a6b81a0ba667807a7f056bf403b20ff5a0 Mon Sep 17 00:00:00 2001 From: ayush0054 Date: Tue, 10 Dec 2024 14:53:22 +0530 Subject: [PATCH 25/54] added image to video funcationality ,fixed formatting and mypy errors --- cookbook/tools/lumaAgent.py | 37 --------- cookbook/tools/lumalabs_tool.py | 46 +++++++++++ phi/tools/lumalab.py | 133 ++++++++++++++++++++++++-------- 3 files changed, 148 insertions(+), 68 deletions(-) delete mode 100644 cookbook/tools/lumaAgent.py create mode 100644 cookbook/tools/lumalabs_tool.py diff --git a/cookbook/tools/lumaAgent.py b/cookbook/tools/lumaAgent.py deleted file mode 100644 index a09d7c22f..000000000 --- a/cookbook/tools/lumaAgent.py +++ /dev/null @@ -1,37 +0,0 @@ -from phi.agent import Agent -from phi.llm.openai import OpenAIChat -from phi.tools.lumalab import LumaLab - -"""Create an agent specialized for Luma AI video generation""" - -luma_agent = Agent( - name="Luma Video Agent", - agent_id="luma-video-agent", - llm=OpenAIChat(model="gpt-4o"), - tools=[LumaLab()], # Using the LumaLab tool we created - markdown=True, - debug_mode=True, - show_tool_calls=True, - instructions=[ - "You are an agent designed to generate videos using the Luma AI API.", - "When asked to generate a video, use the generate_video function from the LumaLab toolkit.", - "Only pass the required parameters to the generate_video function unless specifically asked for other parameters.", - "The default parameters are:", - "- loop: False", - "- aspect_ratio: '16:9'", - "- keyframes: None", - "After generating the video, display the video URL in markdown format.", - "If the video generation is async (wait_for_completion=False), inform the user about the generation ID.", - "If any errors occur during generation, clearly communicate them to the user.", - ], - system_message=( - "Do not modify any default parameters of the generate_video function " - "unless explicitly specified in the user's request. Always provide clear " - "feedback about the video generation status." - ), -) - -luma_agent.run("Generate a video of a sunset over a peaceful ocean with gentle waves") - - - diff --git a/cookbook/tools/lumalabs_tool.py b/cookbook/tools/lumalabs_tool.py new file mode 100644 index 000000000..bfa9f6043 --- /dev/null +++ b/cookbook/tools/lumalabs_tool.py @@ -0,0 +1,46 @@ +from phi.agent import Agent +from phi.llm.openai import OpenAIChat +from phi.tools.lumalab import LumaLab + +"""Create an agent specialized for Luma AI video generation""" + +luma_agent = Agent( + name="Luma Video Agent", + agent_id="luma-video-agent", + llm=OpenAIChat(model="gpt-4o"), + tools=[LumaLab()], # Using the LumaLab tool we created + markdown=True, + debug_mode=True, + show_tool_calls=True, + instructions=[ + "You are an agent designed to generate videos using the Luma AI API.", + "You can generate videos in two ways:", + "1. Text-to-Video Generation:", + " - Use the generate_video function for creating videos from text prompts", + " - Default parameters: loop=False, aspect_ratio='16:9', keyframes=None", + "2. Image-to-Video Generation:", + " - Use the image_to_video function when starting from one or two images", + " - Required parameters: prompt, image_url", + " - Optional parameters: end_image_url, loop=False, aspect_ratio='16:9'", + " - The image URLs must be publicly accessible", + "After generating any video:", + "- Display the video URL in markdown format", + "- If generation is async (wait_for_completion=False), inform about the generation ID", + "- Clearly communicate any errors that occur", + "Choose the appropriate function based on whether the user provides image URLs or just a text prompt.", + ], + system_message=( + "Use generate_video for text-to-video requests and image_to_video for image-based " + "generation. Don't modify default parameters unless specifically requested. " + "Always provide clear feedback about the video generation status." + ), +) + +# luma_agent.run("Generate a video of a car in a city") +# luma_agent.run("Transform this image into a video of a tiger walking: https://upload.wikimedia.org/wikipedia/commons/thumb/3/3f/Walking_tiger_female.jpg/1920px-Walking_tiger_female.jpg") +luma_agent.run(""" +Create a transition video between these two images: +Start: https://img.freepik.com/premium-photo/car-driving-dark-forest-generative-ai_634053-6661.jpg?w=1380 +End: https://img.freepik.com/free-photo/front-view-black-luxury-sedan-road_114579-5030.jpg?t=st=1733821884~exp=1733825484~hmac=735ca584a9b985c53875fc1ad343c3fd394e1de4db49e5ab1a9ab37ac5f91a36&w=1380 +Make it a smooth, natural movement +""") diff --git a/phi/tools/lumalab.py b/phi/tools/lumalab.py index 7912a1ebc..6336d2460 100644 --- a/phi/tools/lumalab.py +++ b/phi/tools/lumalab.py @@ -1,18 +1,27 @@ import time import json from os import getenv -from typing import Optional, Dict, Any +from typing import Optional, Dict, Any, Literal, TypedDict from phi.agent import Agent from phi.tools import Toolkit from phi.utils.log import logger try: - from lumaai import LumaAI + from lumaai import LumaAI # type: ignore except ImportError: raise ImportError("`lumaai` not installed. Please install using `pip install lumaai`") +# Define types for keyframe structure +class KeyframeImage(TypedDict): + type: Literal["image"] + url: str + + +Keyframes = Dict[str, KeyframeImage] + + class LumaLab(Toolkit): def __init__( self, @@ -33,61 +42,124 @@ def __init__( self.client = LumaAI(auth_token=self.api_key) self.register(self.generate_video) + self.register(self.image_to_video) - def generate_video( + def image_to_video( self, agent: Agent, prompt: str, + image_url: str, + end_image_url: Optional[str] = None, loop: bool = False, - aspect_ratio: str = "16:9", - keyframes: Optional[Dict[str, Any]] = None, + aspect_ratio: Literal["1:1", "16:9", "9:16", "4:3", "3:4", "21:9", "9:21"] = "16:9", ) -> str: - """Use this function to generate a video given a prompt. + """Generate a video from one or two images with a prompt. Args: - prompt (str): A text description of the desired video. - loop (bool, optional): Whether the video should loop. Defaults to False. - aspect_ratio (str, optional): Aspect ratio of the video. Defaults to "16:9". - keyframes (Dict[str, Any], optional): Keyframe configuration for image-to-video or video extension. + agent: The agent instance + prompt: Text description of the desired video + image_url: URL of the starting image + end_image_url: Optional URL of the ending image + loop: Whether the video should loop + aspect_ratio: Aspect ratio of the output video Returns: - str: A message indicating if the video has been generated successfully or an error message. + str: Status message or error """ if not self.api_key: return "Please set the LUMAAI_API_KEY" try: - # Create generation request - generation_params = { + # Construct keyframes + keyframes: Dict[str, Dict[str, str]] = {"frame0": {"type": "image", "url": image_url}} + + # Add end image if provided + if end_image_url: + keyframes["frame1"] = {"type": "image", "url": end_image_url} + + # Create generation with keyframes + generation = self.client.generations.create( + prompt=prompt, + loop=loop, + aspect_ratio=aspect_ratio, + keyframes=keyframes, # type: ignore + ) + + if not self.wait_for_completion: + if generation and generation.id: + agent.add_video(json.dumps({"id": generation.id})) + return f"Video generation started with ID: {generation.id}" + return "Failed to start video generation: No generation ID received" + + # Poll for completion + seconds_waited = 0 + while seconds_waited < self.max_wait_time: + if not generation or not generation.id: + return "Failed to get generation ID" + + generation = self.client.generations.get(generation.id) + + if generation.state == "completed" and generation.assets: + video_url = generation.assets.video + if video_url: + agent.add_video(json.dumps({"id": generation.id, "url": video_url, "state": "completed"})) + return f"Video generated successfully: {video_url}" + elif generation.state == "failed": + return f"Generation failed: {generation.failure_reason}" + + logger.info(f"Generation in progress... State: {generation.state}") + time.sleep(self.poll_interval) + seconds_waited += self.poll_interval + + return f"Video generation timed out after {self.max_wait_time} seconds" + + except Exception as e: + logger.error(f"Failed to generate video: {e}") + return f"Error: {e}" + + def generate_video( + self, + agent: Agent, + prompt: str, + loop: bool = False, + aspect_ratio: Literal["1:1", "16:9", "9:16", "4:3", "3:4", "21:9", "9:21"] = "16:9", + keyframes: Optional[Dict[str, Dict[str, str]]] = None, + ) -> str: + """Use this function to generate a video given a prompt.""" + if not self.api_key: + return "Please set the LUMAAI_API_KEY" + + try: + generation_params: Dict[str, Any] = { "prompt": prompt, "loop": loop, "aspect_ratio": aspect_ratio, } - if keyframes: + + if keyframes is not None: generation_params["keyframes"] = keyframes - logger.debug(f"Generating video with params: {generation_params}") - generation = self.client.generations.create(**generation_params) + generation = self.client.generations.create(**generation_params) # type: ignore if not self.wait_for_completion: - agent.add_video(json.dumps({"id": generation.id})) - return f"Video generation started with ID: {generation.id}" + if generation and generation.id: + agent.add_video(json.dumps({"id": generation.id})) + return f"Video generation started with ID: {generation.id}" + return "Failed to start video generation: No generation ID received" # Poll for completion - completed = False seconds_waited = 0 - while not completed and seconds_waited < self.max_wait_time: - generation = self.client.generations.get(id=generation.id) + while seconds_waited < self.max_wait_time: + if not generation or not generation.id: + return "Failed to get generation ID" + + generation = self.client.generations.get(generation.id) - if generation.state == "completed": - completed = True + if generation.state == "completed" and generation.assets: video_url = generation.assets.video - agent.add_video(json.dumps({ - "id": generation.id, - "url": video_url, - "state": "completed" - })) - return f"Video generated successfully: {video_url}" + if video_url: + agent.add_video(json.dumps({"id": generation.id, "url": video_url, "state": "completed"})) + return f"Video generated successfully: {video_url}" elif generation.state == "failed": return f"Generation failed: {generation.failure_reason}" @@ -95,8 +167,7 @@ def generate_video( time.sleep(self.poll_interval) seconds_waited += self.poll_interval - if not completed: - return f"Video generation timed out after {self.max_wait_time} seconds" + return f"Video generation timed out after {self.max_wait_time} seconds" except Exception as e: logger.error(f"Failed to generate video: {e}") From 74bde7fce40a8df8d86011c189d96c325bd01bc3 Mon Sep 17 00:00:00 2001 From: Dirk Brand Date: Tue, 10 Dec 2024 11:29:55 +0200 Subject: [PATCH 26/54] Fix style --- phi/agent/agent.py | 4 ++-- phi/model/content.py | 2 ++ phi/run/response.py | 2 +- phi/tools/dalle.py | 9 +++------ phi/tools/models_labs.py | 6 +----- phi/tools/replicate.py | 11 ++++++----- 6 files changed, 15 insertions(+), 19 deletions(-) diff --git a/phi/agent/agent.py b/phi/agent/agent.py index b45a03518..fb0158695 100644 --- a/phi/agent/agent.py +++ b/phi/agent/agent.py @@ -633,13 +633,13 @@ def from_agent_session(self, session: AgentSession): if "images" in session.agent_data: images_from_db = session.agent_data.get("images") if self.images is not None and isinstance(self.images, list): - self.images.extend([Image.model_validate(img) for img in images_from_db]) + self.images.extend([Image.model_validate(img) for img in self.images]) else: self.images = images_from_db if "videos" in session.agent_data: videos_from_db = session.agent_data.get("videos") if self.videos is not None and isinstance(self.videos, list): - self.videos.extend([Video.model_validate(vid) for vid in videos_from_db]) + self.videos.extend([Video.model_validate(vid) for vid in self.videos]) else: self.videos = videos_from_db diff --git a/phi/model/content.py b/phi/model/content.py index f0ef06bd2..65a50650e 100644 --- a/phi/model/content.py +++ b/phi/model/content.py @@ -2,6 +2,7 @@ from pydantic import BaseModel + class Video(BaseModel): id: str url: str @@ -9,6 +10,7 @@ class Video(BaseModel): revised_prompt: Optional[str] = None eta: Optional[str] = None + class Image(BaseModel): id: str url: str diff --git a/phi/run/response.py b/phi/run/response.py index 809992855..fe2ab05f3 100644 --- a/phi/run/response.py +++ b/phi/run/response.py @@ -1,6 +1,6 @@ from time import time from enum import Enum -from typing import Optional, Any, Dict, List, Union +from typing import Optional, Any, Dict, List from pydantic import BaseModel, ConfigDict, Field diff --git a/phi/tools/dalle.py b/phi/tools/dalle.py index cf5f08f2b..52258419c 100644 --- a/phi/tools/dalle.py +++ b/phi/tools/dalle.py @@ -83,12 +83,9 @@ def create_image(self, agent: Agent, prompt: str) -> str: # Update the run response with the image URLs for img in response.data: - agent.add_image(Image( - id=str(uuid4()), - url=img.url, - original_prompt=prompt, - revised_prompt=img.revised_prompt - )) + agent.add_image( + Image(id=str(uuid4()), url=img.url, original_prompt=prompt, revised_prompt=img.revised_prompt) + ) return "Image has been generated successfully and will be displayed below" except Exception as e: logger.error(f"Failed to generate image: {e}") diff --git a/phi/tools/models_labs.py b/phi/tools/models_labs.py index e18afca4b..ab8945d12 100644 --- a/phi/tools/models_labs.py +++ b/phi/tools/models_labs.py @@ -97,11 +97,7 @@ def generate_video(self, agent: Agent, prompt: str) -> str: ) # Update the run response with the video URLs - agent.add_video(Video( - id=video_id, - url=video_url, - eta=eta - )) + agent.add_video(Video(id=video_id, url=video_url, eta=eta)) if self.wait_for_completion and isinstance(eta, int): video_ready = False diff --git a/phi/tools/replicate.py b/phi/tools/replicate.py index ae8041cfb..ad2fdb79d 100644 --- a/phi/tools/replicate.py +++ b/phi/tools/replicate.py @@ -1,4 +1,3 @@ -import json from os import getenv from uuid import uuid4 @@ -40,9 +39,11 @@ def generate_media(self, agent: Agent, prompt: str) -> str: output: FileOutput = replicate.run(ref=self.model, input={"prompt": prompt}) # Update the run response with the video URLs - agent.add_video(Video( - id=str(uuid4()), - url=output.url, - )) + agent.add_video( + Video( + id=str(uuid4()), + url=output.url, + ) + ) return f"Media generated successfully at {output.url}" From 4f16362bc98a3f0e7c56a63b58eea75b772fd24d Mon Sep 17 00:00:00 2001 From: anuragts Date: Tue, 10 Dec 2024 15:39:43 +0530 Subject: [PATCH 27/54] fix: improvements --- cookbook/agents/42_generate_fal_video.py | 2 +- phi/tools/fal_tools.py | 22 ++++++++++++---------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/cookbook/agents/42_generate_fal_video.py b/cookbook/agents/42_generate_fal_video.py index 8076508da..7cc1e96e2 100644 --- a/cookbook/agents/42_generate_fal_video.py +++ b/cookbook/agents/42_generate_fal_video.py @@ -8,7 +8,7 @@ tools=[FalTools()], description="You are an AI agent that can generate videos using the Fal API.", instructions=[ - "When the user asks you to create a video, use the `run` tool to create the video.", + "When the user asks you to create a video, use the `generate_media` tool to create the video.", "Return the URL as raw to the user.", "Don't convert video URL to markdown or anything else.", "Also pass the type of model of the tool, it can be either `image` or `video`.", diff --git a/phi/tools/fal_tools.py b/phi/tools/fal_tools.py index 2495b5838..5244f506a 100644 --- a/phi/tools/fal_tools.py +++ b/phi/tools/fal_tools.py @@ -8,8 +8,10 @@ from phi.agent import Agent from phi.tools import Toolkit from phi.utils.log import logger +from phi.model.content import Video, Image from enum import Enum -import json +from uuid import uuid4 + try: import fal_client # type: ignore @@ -38,7 +40,7 @@ def __init__( if not self.api_key: logger.error("FAL_API_KEY not set. Please set the FAL_API_KEY environment variable.") self.seen_logs: set[str] = set() - self.register(self.run) + self.register(self.generate_media) def on_queue_update(self, update): if isinstance(update, fal_client.InProgress) and update.logs: @@ -48,7 +50,9 @@ def on_queue_update(self, update): logger.info(message) self.seen_logs.add(message) - def run(self, agent: Agent, prompt: str, model: Optional[str] = None, type: Optional[ModelType] = None) -> str: + def generate_media( + self, agent: Agent, prompt: str, model: Optional[str] = None, type: Optional[ModelType] = None + ) -> str: """ Use this function to run a model with a given prompt. @@ -70,17 +74,15 @@ def run(self, agent: Agent, prompt: str, model: Optional[str] = None, type: Opti if type == ModelType.VIDEO: video_url = result.get("video", {}).get("url", "") data.append({"url": video_url}) - result["data"] = data - agent.add_video(json.dumps(result)) - return f"Video URL: {video_url}" + agent.add_video(Video(id=str(uuid4()), url=video_url)) + return f"Media generated successfully at {video_url}" elif type == ModelType.IMAGE: image_url = result.get("image", {}).get("url", "") data.append({"url": image_url}) - result["data"] = data - agent.add_image(json.dumps(result)) - return f"Image URL: {image_url}" + agent.add_image(Image(id=str(uuid4()), url=image_url)) + return f"Media generated successfully at {image_url}" else: - return str(result) + raise Exception("Model not supported") except Exception as e: logger.error(f"Failed to run model: {e}") return f"Error: {e}" From a8b3c6180046432e88984d9b6cc8aa3734854407 Mon Sep 17 00:00:00 2001 From: anuragts Date: Tue, 10 Dec 2024 15:40:30 +0530 Subject: [PATCH 28/54] fix: rename file --- .../agents/{42_generate_fal_video.py => 44_generate_fal_video.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename cookbook/agents/{42_generate_fal_video.py => 44_generate_fal_video.py} (100%) diff --git a/cookbook/agents/42_generate_fal_video.py b/cookbook/agents/44_generate_fal_video.py similarity index 100% rename from cookbook/agents/42_generate_fal_video.py rename to cookbook/agents/44_generate_fal_video.py From 935b680ca2a3f0ab90cea86adb786f1771e3b4b4 Mon Sep 17 00:00:00 2001 From: anuragts Date: Tue, 10 Dec 2024 15:41:16 +0530 Subject: [PATCH 29/54] fix: instruction update --- cookbook/playground/multimodal_agent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cookbook/playground/multimodal_agent.py b/cookbook/playground/multimodal_agent.py index fec7d28b9..ce5df836c 100644 --- a/cookbook/playground/multimodal_agent.py +++ b/cookbook/playground/multimodal_agent.py @@ -59,7 +59,7 @@ tools=[FalTools()], description="You are an AI agent that can generate videos using the Fal API.", instructions=[ - "When the user asks you to create a video, use the `run` tool to create the video.", + "When the user asks you to create a video, use the `generate_media` tool to create the video.", "The video will be displayed in the UI automatically below your response, so you don't need to show the video URL in your response.", "Use `fal-ai/hunyuan-video` model by default.", "Politely and courteously let the user know that the video has been generated and will be displayed below as soon as its ready.", From e85e388c2330f0399b894cb4eb277166522452d8 Mon Sep 17 00:00:00 2001 From: ayush0054 Date: Tue, 10 Dec 2024 16:17:22 +0530 Subject: [PATCH 30/54] updated according to comments/review --- cookbook/tools/lumalabs_tool.py | 20 ++++++++++---------- phi/tools/lumalab.py | 22 +++++++++------------- 2 files changed, 19 insertions(+), 23 deletions(-) diff --git a/cookbook/tools/lumalabs_tool.py b/cookbook/tools/lumalabs_tool.py index bfa9f6043..43fe5c6f2 100644 --- a/cookbook/tools/lumalabs_tool.py +++ b/cookbook/tools/lumalabs_tool.py @@ -1,6 +1,6 @@ from phi.agent import Agent from phi.llm.openai import OpenAIChat -from phi.tools.lumalab import LumaLab +from phi.tools.lumalab import LumaLabToolkit """Create an agent specialized for Luma AI video generation""" @@ -8,7 +8,7 @@ name="Luma Video Agent", agent_id="luma-video-agent", llm=OpenAIChat(model="gpt-4o"), - tools=[LumaLab()], # Using the LumaLab tool we created + tools=[LumaLabToolkit()], # Using the LumaLab tool we created markdown=True, debug_mode=True, show_tool_calls=True, @@ -20,7 +20,7 @@ " - Default parameters: loop=False, aspect_ratio='16:9', keyframes=None", "2. Image-to-Video Generation:", " - Use the image_to_video function when starting from one or two images", - " - Required parameters: prompt, image_url", + " - Required parameters: prompt, start_image_url", " - Optional parameters: end_image_url, loop=False, aspect_ratio='16:9'", " - The image URLs must be publicly accessible", "After generating any video:", @@ -36,11 +36,11 @@ ), ) -# luma_agent.run("Generate a video of a car in a city") +luma_agent.run("Generate a video of a car in a sky") # luma_agent.run("Transform this image into a video of a tiger walking: https://upload.wikimedia.org/wikipedia/commons/thumb/3/3f/Walking_tiger_female.jpg/1920px-Walking_tiger_female.jpg") -luma_agent.run(""" -Create a transition video between these two images: -Start: https://img.freepik.com/premium-photo/car-driving-dark-forest-generative-ai_634053-6661.jpg?w=1380 -End: https://img.freepik.com/free-photo/front-view-black-luxury-sedan-road_114579-5030.jpg?t=st=1733821884~exp=1733825484~hmac=735ca584a9b985c53875fc1ad343c3fd394e1de4db49e5ab1a9ab37ac5f91a36&w=1380 -Make it a smooth, natural movement -""") +# luma_agent.run(""" +# Create a transition video between these two images: +# Start: https://img.freepik.com/premium-photo/car-driving-dark-forest-generative-ai_634053-6661.jpg?w=1380 +# End: https://img.freepik.com/free-photo/front-view-black-luxury-sedan-road_114579-5030.jpg?t=st=1733821884~exp=1733825484~hmac=735ca584a9b985c53875fc1ad343c3fd394e1de4db49e5ab1a9ab37ac5f91a36&w=1380 +# Make it a smooth, natural movement +# """) diff --git a/phi/tools/lumalab.py b/phi/tools/lumalab.py index 6336d2460..4caaf6743 100644 --- a/phi/tools/lumalab.py +++ b/phi/tools/lumalab.py @@ -1,11 +1,11 @@ import time -import json from os import getenv from typing import Optional, Dict, Any, Literal, TypedDict from phi.agent import Agent from phi.tools import Toolkit from phi.utils.log import logger +from phi.model.content import Video try: from lumaai import LumaAI # type: ignore @@ -22,7 +22,7 @@ class KeyframeImage(TypedDict): Keyframes = Dict[str, KeyframeImage] -class LumaLab(Toolkit): +class LumaLabToolkit(Toolkit): def __init__( self, api_key: Optional[str] = None, @@ -48,7 +48,7 @@ def image_to_video( self, agent: Agent, prompt: str, - image_url: str, + start_image_url: str, end_image_url: Optional[str] = None, loop: bool = False, aspect_ratio: Literal["1:1", "16:9", "9:16", "4:3", "3:4", "21:9", "9:21"] = "16:9", @@ -58,7 +58,7 @@ def image_to_video( Args: agent: The agent instance prompt: Text description of the desired video - image_url: URL of the starting image + start_image_url: URL of the starting image end_image_url: Optional URL of the ending image loop: Whether the video should loop aspect_ratio: Aspect ratio of the output video @@ -66,12 +66,10 @@ def image_to_video( Returns: str: Status message or error """ - if not self.api_key: - return "Please set the LUMAAI_API_KEY" try: # Construct keyframes - keyframes: Dict[str, Dict[str, str]] = {"frame0": {"type": "image", "url": image_url}} + keyframes: Dict[str, Dict[str, str]] = {"frame0": {"type": "image", "url": start_image_url}} # Add end image if provided if end_image_url: @@ -87,7 +85,7 @@ def image_to_video( if not self.wait_for_completion: if generation and generation.id: - agent.add_video(json.dumps({"id": generation.id})) + agent.add_video(Video(id=generation.id)) return f"Video generation started with ID: {generation.id}" return "Failed to start video generation: No generation ID received" @@ -102,7 +100,7 @@ def image_to_video( if generation.state == "completed" and generation.assets: video_url = generation.assets.video if video_url: - agent.add_video(json.dumps({"id": generation.id, "url": video_url, "state": "completed"})) + agent.add_video(Video(id=generation.id, url=video_url, eta="completed")) return f"Video generated successfully: {video_url}" elif generation.state == "failed": return f"Generation failed: {generation.failure_reason}" @@ -126,8 +124,6 @@ def generate_video( keyframes: Optional[Dict[str, Dict[str, str]]] = None, ) -> str: """Use this function to generate a video given a prompt.""" - if not self.api_key: - return "Please set the LUMAAI_API_KEY" try: generation_params: Dict[str, Any] = { @@ -143,7 +139,7 @@ def generate_video( if not self.wait_for_completion: if generation and generation.id: - agent.add_video(json.dumps({"id": generation.id})) + agent.add_video(Video(id=generation.id)) return f"Video generation started with ID: {generation.id}" return "Failed to start video generation: No generation ID received" @@ -158,7 +154,7 @@ def generate_video( if generation.state == "completed" and generation.assets: video_url = generation.assets.video if video_url: - agent.add_video(json.dumps({"id": generation.id, "url": video_url, "state": "completed"})) + agent.add_video(Video(id=generation.id, url=video_url, state="completed")) return f"Video generated successfully: {video_url}" elif generation.state == "failed": return f"Generation failed: {generation.failure_reason}" From 47f59f1ce2db4228b0dd2b4345fd951505928fb2 Mon Sep 17 00:00:00 2001 From: ayush0054 Date: Tue, 10 Dec 2024 16:24:08 +0530 Subject: [PATCH 31/54] formatting --- phi/tools/lumalab.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/phi/tools/lumalab.py b/phi/tools/lumalab.py index 4caaf6743..32723007d 100644 --- a/phi/tools/lumalab.py +++ b/phi/tools/lumalab.py @@ -85,7 +85,7 @@ def image_to_video( if not self.wait_for_completion: if generation and generation.id: - agent.add_video(Video(id=generation.id)) + agent.add_video(Video(id=generation.id, url=None)) return f"Video generation started with ID: {generation.id}" return "Failed to start video generation: No generation ID received" @@ -139,7 +139,7 @@ def generate_video( if not self.wait_for_completion: if generation and generation.id: - agent.add_video(Video(id=generation.id)) + agent.add_video(Video(id=generation.id, url=None)) return f"Video generation started with ID: {generation.id}" return "Failed to start video generation: No generation ID received" From c0b018c557cdf185a3664db7f33a753762b7d4d4 Mon Sep 17 00:00:00 2001 From: Dirk Brand Date: Tue, 10 Dec 2024 14:09:49 +0200 Subject: [PATCH 32/54] Fix typo --- cookbook/agents/43_generate_replicate_video.py | 4 ++-- phi/tools/replicate.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cookbook/agents/43_generate_replicate_video.py b/cookbook/agents/43_generate_replicate_video.py index d64faf531..d1de16478 100644 --- a/cookbook/agents/43_generate_replicate_video.py +++ b/cookbook/agents/43_generate_replicate_video.py @@ -1,6 +1,6 @@ from phi.agent import Agent from phi.model.openai import OpenAIChat -from phi.tools.replicate import ReplicateToolKit +from phi.tools.replicate import ReplicateToolkit """Create an agent specialized for Replicate AI content generation""" @@ -8,7 +8,7 @@ name="Video Generator Agent", model=OpenAIChat(id="gpt-4o"), tools=[ - ReplicateToolKit(model="tencent/hunyuan-video:847dfa8b01e739637fc76f480ede0c1d76408e1d694b830b5dfb8e547bf98405") + ReplicateToolkit(model="tencent/hunyuan-video:847dfa8b01e739637fc76f480ede0c1d76408e1d694b830b5dfb8e547bf98405") ], description="You are an AI agent that can generate images and videos using the Replicate API.", instructions=[ diff --git a/phi/tools/replicate.py b/phi/tools/replicate.py index ad2fdb79d..5ddea7d8e 100644 --- a/phi/tools/replicate.py +++ b/phi/tools/replicate.py @@ -13,7 +13,7 @@ raise ImportError("`replicate` not installed. Please install using `pip install replicate`.") -class ReplicateToolKit(Toolkit): +class ReplicateToolkit(Toolkit): def __init__( self, model: str = "minimax/video-01", From 5740cf8d11b0f6deba31aee6a379fb43ec2bc1cf Mon Sep 17 00:00:00 2001 From: anuragts Date: Tue, 10 Dec 2024 17:53:45 +0530 Subject: [PATCH 33/54] fix: modal labs type mismatch --- phi/tools/models_labs.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/phi/tools/models_labs.py b/phi/tools/models_labs.py index b438d8e94..00ffc49a6 100644 --- a/phi/tools/models_labs.py +++ b/phi/tools/models_labs.py @@ -93,19 +93,11 @@ def generate_video(self, agent: Agent, prompt: str) -> str: logger.info(f"Video will be ready in {eta} seconds") logger.info(f"Video URLs: {video_url_links}") - video_data = [] logger.debug(f"Result: {result}") for video_url in video_url_links: - video_data.append( - { - "eta": eta, - "video_id": video_id, - "url": video_url, - } - ) # Update the run response with the video URLs - agent.add_video(Video(id=video_id, url=video_url, eta=eta)) + agent.add_video(Video(id=str(video_id), url=video_url, eta=str(eta))) if self.wait_for_completion and isinstance(eta, int): video_ready = False From dff1e02559da7da97f9da315c3b45c7d144c0332 Mon Sep 17 00:00:00 2001 From: Dirk Brand Date: Tue, 10 Dec 2024 14:54:02 +0200 Subject: [PATCH 34/54] Bump version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index fa7685340..a829aa4ec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "phidata" -version = "2.6.5" +version = "2.7.0" description = "Build multi-modal Agents with memory, knowledge and tools." requires-python = ">=3.7" readme = "README.md" From eccea9a405f0b3ea192796b3dc9837355dd6823a Mon Sep 17 00:00:00 2001 From: Dirk Brand Date: Tue, 10 Dec 2024 15:06:21 +0200 Subject: [PATCH 35/54] Add image cookbook --- .../agents/43_generate_replicate_video.py | 2 +- .../agents/44_generate_replicate_image.py | 24 ++++++++++ phi/tools/replicate.py | 44 ++++++++++++++----- 3 files changed, 59 insertions(+), 11 deletions(-) create mode 100644 cookbook/agents/44_generate_replicate_image.py diff --git a/cookbook/agents/43_generate_replicate_video.py b/cookbook/agents/43_generate_replicate_video.py index d1de16478..7c8e92324 100644 --- a/cookbook/agents/43_generate_replicate_video.py +++ b/cookbook/agents/43_generate_replicate_video.py @@ -10,7 +10,7 @@ tools=[ ReplicateToolkit(model="tencent/hunyuan-video:847dfa8b01e739637fc76f480ede0c1d76408e1d694b830b5dfb8e547bf98405") ], - description="You are an AI agent that can generate images and videos using the Replicate API.", + description="You are an AI agent that can generate videos using the Replicate API.", instructions=[ "When the user asks you to create a video, use the `generate_media` tool to create the video.", "Return the URL as raw to the user.", diff --git a/cookbook/agents/44_generate_replicate_image.py b/cookbook/agents/44_generate_replicate_image.py new file mode 100644 index 000000000..b5bc991da --- /dev/null +++ b/cookbook/agents/44_generate_replicate_image.py @@ -0,0 +1,24 @@ +from phi.agent import Agent +from phi.model.openai import OpenAIChat +from phi.tools.replicate import ReplicateToolkit + +"""Create an agent specialized for Replicate AI content generation""" + +video_agent = Agent( + name="Image Generator Agent", + model=OpenAIChat(id="gpt-4o"), + tools=[ + ReplicateToolkit(model="luma/photon-flash") + ], + description="You are an AI agent that can generate images using the Replicate API.", + instructions=[ + "When the user asks you to create an image, use the `generate_media` tool to create the image.", + "Return the URL as raw to the user.", + "Don't convert image URL to markdown or anything else.", + ], + markdown=True, + debug_mode=True, + show_tool_calls=True, +) + +video_agent.print_response("Generate an image of a horse in the dessert.") diff --git a/phi/tools/replicate.py b/phi/tools/replicate.py index 5ddea7d8e..74d6b85bf 100644 --- a/phi/tools/replicate.py +++ b/phi/tools/replicate.py @@ -1,8 +1,10 @@ +import os from os import getenv +from urllib.parse import urlparse from uuid import uuid4 from phi.agent import Agent -from phi.model.content import Video +from phi.model.content import Video, Image from phi.tools import Toolkit from phi.utils.log import logger @@ -17,8 +19,6 @@ class ReplicateToolkit(Toolkit): def __init__( self, model: str = "minimax/video-01", - wait_for_completion: bool = True, - max_wait_time: int = 300, # 5 minutes ): super().__init__(name="replicate_toolkit") self.api_key = getenv("REPLICATE_API_TOKEN") @@ -38,12 +38,36 @@ def generate_media(self, agent: Agent, prompt: str) -> str: """ output: FileOutput = replicate.run(ref=self.model, input={"prompt": prompt}) - # Update the run response with the video URLs - agent.add_video( - Video( - id=str(uuid4()), - url=output.url, + # Parse the URL to extract the file extension + parsed_url = urlparse(output.url) + path = parsed_url.path + _, ext = os.path.splitext(path) + ext = ext.lower() + + # Define supported extensions + image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp'} + video_extensions = {'.mp4', '.mov', '.avi', '.mkv', '.flv', '.wmv', '.webm'} + + media_id = str(uuid4()) + + if ext in image_extensions: + agent.add_image( + Image( + id=media_id, + url=output.url, + ) + ) + media_type = "image" + elif ext in video_extensions: + agent.add_video( + Video( + id=media_id, + url=output.url, + ) ) - ) + media_type = "video" + else: + logger.error(f"Unsupported media type with extension '{ext}' for URL: {output.url}") + return f"Unsupported media type with extension '{ext}'." - return f"Media generated successfully at {output.url}" + return f"{media_type.capitalize()} generated successfully at {output.url}" From 2443c273f2dbba9027ca77a78259ad676dbfaac8 Mon Sep 17 00:00:00 2001 From: anuragts Date: Tue, 10 Dec 2024 18:40:18 +0530 Subject: [PATCH 36/54] fix: send gif in image --- phi/tools/models_labs.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/phi/tools/models_labs.py b/phi/tools/models_labs.py index 00ffc49a6..4c801d1bf 100644 --- a/phi/tools/models_labs.py +++ b/phi/tools/models_labs.py @@ -4,7 +4,7 @@ from typing import Optional from phi.agent import Agent -from phi.model.content import Video +from phi.model.content import Video, Image from phi.tools import Toolkit from phi.utils.log import logger from enum import Enum @@ -96,8 +96,10 @@ def generate_video(self, agent: Agent, prompt: str) -> str: logger.debug(f"Result: {result}") for video_url in video_url_links: - # Update the run response with the video URLs - agent.add_video(Video(id=str(video_id), url=video_url, eta=str(eta))) + if self.file_type == FileType.MP4: + agent.add_video(Video(id=str(video_id), url=video_url, eta=str(eta))) + elif self.file_type == FileType.GIF: + agent.add_image(Image(id=str(video_id), url=video_url, eta=str(eta))) if self.wait_for_completion and isinstance(eta, int): video_ready = False From a643f2120ee06e34a8e50779a9f9f533fd9324ee Mon Sep 17 00:00:00 2001 From: Dirk Brand Date: Tue, 10 Dec 2024 15:58:35 +0200 Subject: [PATCH 37/54] Update --- phi/tools/replicate.py | 1 - 1 file changed, 1 deletion(-) diff --git a/phi/tools/replicate.py b/phi/tools/replicate.py index 74d6b85bf..4d8c8517f 100644 --- a/phi/tools/replicate.py +++ b/phi/tools/replicate.py @@ -25,7 +25,6 @@ def __init__( if not self.api_key: logger.error("REPLICATE_API_TOKEN not set. Please set the REPLICATE_API_TOKEN environment variable.") self.model = model - self.register(self.generate_media) def generate_media(self, agent: Agent, prompt: str) -> str: From 312895bbedd58d147b7c94bbd6e38a6d24a3ac22 Mon Sep 17 00:00:00 2001 From: Dirk Brand Date: Tue, 10 Dec 2024 16:11:41 +0200 Subject: [PATCH 38/54] Fix FAL interface --- cookbook/agents/44_generate_fal_video.py | 5 ++- cookbook/playground/multimodal_agent.py | 3 +- phi/tools/fal_tools.py | 46 ++++++++++++++---------- phi/tools/models_labs.py | 6 ++-- 4 files changed, 35 insertions(+), 25 deletions(-) diff --git a/cookbook/agents/44_generate_fal_video.py b/cookbook/agents/44_generate_fal_video.py index 7cc1e96e2..8ed8139b4 100644 --- a/cookbook/agents/44_generate_fal_video.py +++ b/cookbook/agents/44_generate_fal_video.py @@ -5,17 +5,16 @@ fal_agent = Agent( name="Fal Video Generator Agent", model=OpenAIChat(id="gpt-4o"), - tools=[FalTools()], + tools=[FalTools("fal-ai/hunyuan-video")], description="You are an AI agent that can generate videos using the Fal API.", instructions=[ "When the user asks you to create a video, use the `generate_media` tool to create the video.", "Return the URL as raw to the user.", "Don't convert video URL to markdown or anything else.", - "Also pass the type of model of the tool, it can be either `image` or `video`.", ], markdown=True, debug_mode=True, show_tool_calls=True, ) -fal_agent.print_response("Generate video of ballon in the ocean") +fal_agent.print_response("Generate video of balloon in the ocean") diff --git a/cookbook/playground/multimodal_agent.py b/cookbook/playground/multimodal_agent.py index ce5df836c..f58a3d6b5 100644 --- a/cookbook/playground/multimodal_agent.py +++ b/cookbook/playground/multimodal_agent.py @@ -56,12 +56,11 @@ name="Fal Video Generator Agent", agent_id="fal_agent", model=OpenAIChat(id="gpt-4o"), - tools=[FalTools()], + tools=[FalTools("fal-ai/hunyuan-video")], description="You are an AI agent that can generate videos using the Fal API.", instructions=[ "When the user asks you to create a video, use the `generate_media` tool to create the video.", "The video will be displayed in the UI automatically below your response, so you don't need to show the video URL in your response.", - "Use `fal-ai/hunyuan-video` model by default.", "Politely and courteously let the user know that the video has been generated and will be displayed below as soon as its ready.", "Don't send video url in markdown format.", ], diff --git a/phi/tools/fal_tools.py b/phi/tools/fal_tools.py index 5244f506a..3c9afa94a 100644 --- a/phi/tools/fal_tools.py +++ b/phi/tools/fal_tools.py @@ -30,13 +30,11 @@ def __init__( self, api_key: Optional[str] = None, model: str = "fal-ai/hunyuan-video", - type: ModelType = ModelType.VIDEO, ): super().__init__(name="fal") self.api_key = api_key or getenv("FAL_API_KEY") self.model = model - self.type = type if not self.api_key: logger.error("FAL_API_KEY not set. Please set the FAL_API_KEY environment variable.") self.seen_logs: set[str] = set() @@ -51,38 +49,50 @@ def on_queue_update(self, update): self.seen_logs.add(message) def generate_media( - self, agent: Agent, prompt: str, model: Optional[str] = None, type: Optional[ModelType] = None + self, agent: Agent, prompt: str ) -> str: """ Use this function to run a model with a given prompt. Args: prompt (str): A text description of the task. - model (str): The model to use. - type (ModelType): The type of the model to use. It can be either `image` or `video` or `text`. Returns: str: Return the result of the model. """ try: - data = [] + result = fal_client.subscribe( - model or self.model, + self.model, arguments={"prompt": prompt}, with_logs=True, on_queue_update=self.on_queue_update, ) - if type == ModelType.VIDEO: - video_url = result.get("video", {}).get("url", "") - data.append({"url": video_url}) - agent.add_video(Video(id=str(uuid4()), url=video_url)) - return f"Media generated successfully at {video_url}" - elif type == ModelType.IMAGE: - image_url = result.get("image", {}).get("url", "") - data.append({"url": image_url}) - agent.add_image(Image(id=str(uuid4()), url=image_url)) - return f"Media generated successfully at {image_url}" + + media_id = str(uuid4()) + + if "image" in result: + url = result.get("image", {}).get("url", "") + agent.add_image( + Image( + id=media_id, + url=url, + ) + ) + media_type = "image" + elif "video" in result: + url = result.get("video", {}).get("url", "") + agent.add_video( + Video( + id=media_id, + url=url, + ) + ) + media_type = "video" else: - raise Exception("Model not supported") + logger.error(f"Unsupported type in result: {result}") + return f"Unsupported type in result: {result}" + + return f"{media_type.capitalize()} generated successfully at {url}" except Exception as e: logger.error(f"Failed to run model: {e}") return f"Error: {e}" diff --git a/phi/tools/models_labs.py b/phi/tools/models_labs.py index 00ffc49a6..17ac8330e 100644 --- a/phi/tools/models_labs.py +++ b/phi/tools/models_labs.py @@ -2,6 +2,7 @@ import json from os import getenv from typing import Optional +from uuid import uuid4 from phi.agent import Agent from phi.model.content import Video @@ -89,15 +90,16 @@ def generate_video(self, agent: Agent, prompt: str) -> str: eta = result["eta"] video_url_links = result["future_links"] - video_id = result["id"] logger.info(f"Video will be ready in {eta} seconds") logger.info(f"Video URLs: {video_url_links}") + video_id = str(uuid4()) + logger.debug(f"Result: {result}") for video_url in video_url_links: # Update the run response with the video URLs - agent.add_video(Video(id=str(video_id), url=video_url, eta=str(eta))) + agent.add_video(Video(id=video_id, url=video_url, eta=str(eta))) if self.wait_for_completion and isinstance(eta, int): video_ready = False From 398e2272d5ba36f7871a213a92f0c0988b6d80ae Mon Sep 17 00:00:00 2001 From: Dirk Brand Date: Tue, 10 Dec 2024 16:41:03 +0200 Subject: [PATCH 39/54] Fix FAL_KEY --- ..._generate_fal_video.py => 45_generate_fal_video.py} | 0 cookbook/playground/multimodal_agent.py | 6 +++--- phi/tools/fal_tools.py | 10 ++-------- 3 files changed, 5 insertions(+), 11 deletions(-) rename cookbook/agents/{44_generate_fal_video.py => 45_generate_fal_video.py} (100%) diff --git a/cookbook/agents/44_generate_fal_video.py b/cookbook/agents/45_generate_fal_video.py similarity index 100% rename from cookbook/agents/44_generate_fal_video.py rename to cookbook/agents/45_generate_fal_video.py diff --git a/cookbook/playground/multimodal_agent.py b/cookbook/playground/multimodal_agent.py index f58a3d6b5..73583363f 100644 --- a/cookbook/playground/multimodal_agent.py +++ b/cookbook/playground/multimodal_agent.py @@ -17,7 +17,7 @@ image_agent_storage_file: str = "tmp/image_agent.db" image_agent = Agent( - name="Image Agent", + name="DALL-E Image Agent", agent_id="image_agent", model=OpenAIChat(id="gpt-4o"), tools=[Dalle()], @@ -35,7 +35,7 @@ ) video_agent = Agent( - name="Video Agent", + name="ModelsLab Video Agent", agent_id="video_agent", model=OpenAIChat(id="gpt-4o"), tools=[ModelsLabs(wait_for_completion=True)], @@ -53,7 +53,7 @@ ) fal_agent = Agent( - name="Fal Video Generator Agent", + name="Fal Video Agent", agent_id="fal_agent", model=OpenAIChat(id="gpt-4o"), tools=[FalTools("fal-ai/hunyuan-video")], diff --git a/phi/tools/fal_tools.py b/phi/tools/fal_tools.py index 3c9afa94a..542956ec3 100644 --- a/phi/tools/fal_tools.py +++ b/phi/tools/fal_tools.py @@ -19,12 +19,6 @@ raise ImportError("`fal_client` not installed. Please install using `pip install fal-client`") -class ModelType(str, Enum): - IMAGE = "image" - VIDEO = "video" - TEXT = "text" - - class FalTools(Toolkit): def __init__( self, @@ -33,10 +27,10 @@ def __init__( ): super().__init__(name="fal") - self.api_key = api_key or getenv("FAL_API_KEY") + self.api_key = api_key or getenv("FAL_KEY") self.model = model if not self.api_key: - logger.error("FAL_API_KEY not set. Please set the FAL_API_KEY environment variable.") + logger.error("FAL_KEY not set. Please set the FAL_KEY environment variable.") self.seen_logs: set[str] = set() self.register(self.generate_media) From 5e3d3e202ad07cac869e14a8f9f1981d3499f55d Mon Sep 17 00:00:00 2001 From: Dirk Brand Date: Tue, 10 Dec 2024 17:09:00 +0200 Subject: [PATCH 40/54] Add modellabs gif to playground app --- cookbook/agents/15_generate_video.py | 2 +- cookbook/playground/multimodal_agent.py | 33 ++++++++++++++++++++----- phi/model/response.py | 5 ++++ phi/tools/models_labs.py | 7 +----- 4 files changed, 34 insertions(+), 13 deletions(-) diff --git a/cookbook/agents/15_generate_video.py b/cookbook/agents/15_generate_video.py index 14fb66e9f..d20e3cc55 100644 --- a/cookbook/agents/15_generate_video.py +++ b/cookbook/agents/15_generate_video.py @@ -7,7 +7,7 @@ tools=[ModelsLabs()], description="You are an AI agent that can generate videos using the ModelsLabs API.", instructions=[ - "When the user asks you to create a video, use the `create_video` tool to create the video.", + "When the user asks you to create a video, use the `generate_media` tool to create the video.", "The video will be displayed in the UI automatically below your response, so you don't need to show the video URL in your response.", "Politely and courteously let the user know that the video has been generated and will be displayed below as soon as its ready.", ], diff --git a/cookbook/playground/multimodal_agent.py b/cookbook/playground/multimodal_agent.py index 73583363f..2db6fffba 100644 --- a/cookbook/playground/multimodal_agent.py +++ b/cookbook/playground/multimodal_agent.py @@ -10,6 +10,7 @@ from phi.model.openai import OpenAIChat from phi.tools.dalle import Dalle from phi.tools.models_labs import ModelsLabs +from phi.model.response import FileType from phi.playground import Playground, serve_playground_app from phi.storage.agent.sqlite import SqlAgentStorage from phi.tools.fal_tools import FalTools @@ -34,14 +35,32 @@ storage=SqlAgentStorage(table_name="image_agent", db_file="tmp/image_agent.db"), ) -video_agent = Agent( +ml_gif_agent = Agent( + name="ModelsLab GIF Agent", + agent_id="ml_gif_agent", + model=OpenAIChat(id="gpt-4o"), + tools=[ModelsLabs(wait_for_completion=True, file_type=FileType.GIF)], + description="You are an AI agent that can generate gifs using the ModelsLabs API.", + instructions=[ + "When the user asks you to create an image, use the `generate_media` tool to create the image.", + "The image will be displayed in the UI automatically below your response, so you don't need to show the image URL in your response.", + "Politely and courteously let the user know that the gif has been generated and will be displayed below as soon as its ready.", + ], + markdown=True, + debug_mode=True, + add_history_to_messages=True, + add_datetime_to_instructions=True, + storage=SqlAgentStorage(table_name="ml_gif_agent", db_file="tmp/ml_gif_agent.db"), +) + +ml_video_agent = Agent( name="ModelsLab Video Agent", - agent_id="video_agent", + agent_id="ml_video_agent", model=OpenAIChat(id="gpt-4o"), - tools=[ModelsLabs(wait_for_completion=True)], + tools=[ModelsLabs(wait_for_completion=True, file_type=FileType.MP4)], description="You are an AI agent that can generate videos using the ModelsLabs API.", instructions=[ - "When the user asks you to create a video, use the `create_video` tool to create the video.", + "When the user asks you to create a video, use the `generate_media` tool to create the video.", "The video will be displayed in the UI automatically below your response, so you don't need to show the video URL in your response.", "Politely and courteously let the user know that the video has been generated and will be displayed below as soon as its ready.", ], @@ -49,7 +68,7 @@ debug_mode=True, add_history_to_messages=True, add_datetime_to_instructions=True, - storage=SqlAgentStorage(table_name="video_agent", db_file="tmp/video_agent.db"), + storage=SqlAgentStorage(table_name="ml_video_agent", db_file="tmp/ml_video_agent.db"), ) fal_agent = Agent( @@ -71,7 +90,9 @@ storage=SqlAgentStorage(table_name="fal_agent", db_file="tmp/fal_agent.db"), ) -app = Playground(agents=[image_agent, video_agent, fal_agent]).get_app() + + +app = Playground(agents=[image_agent, ml_gif_agent, ml_video_agent, fal_agent]).get_app() if __name__ == "__main__": serve_playground_app("multimodal_agent:app", reload=True) diff --git a/phi/model/response.py b/phi/model/response.py index 9fccf3df6..619c96073 100644 --- a/phi/model/response.py +++ b/phi/model/response.py @@ -23,3 +23,8 @@ class ModelResponse: tool_call: Optional[Dict[str, Any]] = None event: str = ModelResponseEvent.assistant_response.value created_at: int = int(time()) + + +class FileType(str, Enum): + MP4 = "mp4" + GIF = "gif" diff --git a/phi/tools/models_labs.py b/phi/tools/models_labs.py index a89082137..c32a58ac8 100644 --- a/phi/tools/models_labs.py +++ b/phi/tools/models_labs.py @@ -6,9 +6,9 @@ from phi.agent import Agent from phi.model.content import Video, Image +from phi.model.response import FileType from phi.tools import Toolkit from phi.utils.log import logger -from enum import Enum try: import requests @@ -16,11 +16,6 @@ raise ImportError("`requests` not installed. Please install using `pip install requests`") -class FileType(str, Enum): - MP4 = "mp4" - GIF = "gif" - - class ModelsLabs(Toolkit): def __init__( self, From 7ddc56cdcd392331ecb535ebc4fee52c92b693b7 Mon Sep 17 00:00:00 2001 From: Dirk Brand Date: Tue, 10 Dec 2024 17:43:42 +0200 Subject: [PATCH 41/54] Update name of replicate tools --- cookbook/agents/43_generate_replicate_video.py | 4 ++-- cookbook/agents/44_generate_replicate_image.py | 4 ++-- phi/tools/replicate.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cookbook/agents/43_generate_replicate_video.py b/cookbook/agents/43_generate_replicate_video.py index 7c8e92324..f855abf23 100644 --- a/cookbook/agents/43_generate_replicate_video.py +++ b/cookbook/agents/43_generate_replicate_video.py @@ -1,6 +1,6 @@ from phi.agent import Agent from phi.model.openai import OpenAIChat -from phi.tools.replicate import ReplicateToolkit +from phi.tools.replicate import ReplicateTools """Create an agent specialized for Replicate AI content generation""" @@ -8,7 +8,7 @@ name="Video Generator Agent", model=OpenAIChat(id="gpt-4o"), tools=[ - ReplicateToolkit(model="tencent/hunyuan-video:847dfa8b01e739637fc76f480ede0c1d76408e1d694b830b5dfb8e547bf98405") + ReplicateTools(model="tencent/hunyuan-video:847dfa8b01e739637fc76f480ede0c1d76408e1d694b830b5dfb8e547bf98405") ], description="You are an AI agent that can generate videos using the Replicate API.", instructions=[ diff --git a/cookbook/agents/44_generate_replicate_image.py b/cookbook/agents/44_generate_replicate_image.py index b5bc991da..148b8b020 100644 --- a/cookbook/agents/44_generate_replicate_image.py +++ b/cookbook/agents/44_generate_replicate_image.py @@ -1,6 +1,6 @@ from phi.agent import Agent from phi.model.openai import OpenAIChat -from phi.tools.replicate import ReplicateToolkit +from phi.tools.replicate import ReplicateTools """Create an agent specialized for Replicate AI content generation""" @@ -8,7 +8,7 @@ name="Image Generator Agent", model=OpenAIChat(id="gpt-4o"), tools=[ - ReplicateToolkit(model="luma/photon-flash") + ReplicateTools(model="luma/photon-flash") ], description="You are an AI agent that can generate images using the Replicate API.", instructions=[ diff --git a/phi/tools/replicate.py b/phi/tools/replicate.py index 4d8c8517f..1588b8b9e 100644 --- a/phi/tools/replicate.py +++ b/phi/tools/replicate.py @@ -15,7 +15,7 @@ raise ImportError("`replicate` not installed. Please install using `pip install replicate`.") -class ReplicateToolkit(Toolkit): +class ReplicateTools(Toolkit): def __init__( self, model: str = "minimax/video-01", From a5c4681f629a5229be453f3845048ca62e40263a Mon Sep 17 00:00:00 2001 From: Dirk Brand Date: Tue, 10 Dec 2024 17:44:48 +0200 Subject: [PATCH 42/54] Fix style --- cookbook/agents/44_generate_replicate_image.py | 4 +--- cookbook/playground/multimodal_agent.py | 1 - phi/tools/fal_tools.py | 6 +----- phi/tools/replicate.py | 4 ++-- 4 files changed, 4 insertions(+), 11 deletions(-) diff --git a/cookbook/agents/44_generate_replicate_image.py b/cookbook/agents/44_generate_replicate_image.py index 148b8b020..d99477957 100644 --- a/cookbook/agents/44_generate_replicate_image.py +++ b/cookbook/agents/44_generate_replicate_image.py @@ -7,9 +7,7 @@ video_agent = Agent( name="Image Generator Agent", model=OpenAIChat(id="gpt-4o"), - tools=[ - ReplicateTools(model="luma/photon-flash") - ], + tools=[ReplicateTools(model="luma/photon-flash")], description="You are an AI agent that can generate images using the Replicate API.", instructions=[ "When the user asks you to create an image, use the `generate_media` tool to create the image.", diff --git a/cookbook/playground/multimodal_agent.py b/cookbook/playground/multimodal_agent.py index 2db6fffba..52eba7c75 100644 --- a/cookbook/playground/multimodal_agent.py +++ b/cookbook/playground/multimodal_agent.py @@ -91,7 +91,6 @@ ) - app = Playground(agents=[image_agent, ml_gif_agent, ml_video_agent, fal_agent]).get_app() if __name__ == "__main__": diff --git a/phi/tools/fal_tools.py b/phi/tools/fal_tools.py index 542956ec3..e51eb0926 100644 --- a/phi/tools/fal_tools.py +++ b/phi/tools/fal_tools.py @@ -9,7 +9,6 @@ from phi.tools import Toolkit from phi.utils.log import logger from phi.model.content import Video, Image -from enum import Enum from uuid import uuid4 @@ -42,9 +41,7 @@ def on_queue_update(self, update): logger.info(message) self.seen_logs.add(message) - def generate_media( - self, agent: Agent, prompt: str - ) -> str: + def generate_media(self, agent: Agent, prompt: str) -> str: """ Use this function to run a model with a given prompt. @@ -54,7 +51,6 @@ def generate_media( str: Return the result of the model. """ try: - result = fal_client.subscribe( self.model, arguments={"prompt": prompt}, diff --git a/phi/tools/replicate.py b/phi/tools/replicate.py index 1588b8b9e..7d5fb3e16 100644 --- a/phi/tools/replicate.py +++ b/phi/tools/replicate.py @@ -44,8 +44,8 @@ def generate_media(self, agent: Agent, prompt: str) -> str: ext = ext.lower() # Define supported extensions - image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp'} - video_extensions = {'.mp4', '.mov', '.avi', '.mkv', '.flv', '.wmv', '.webm'} + image_extensions = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".webp"} + video_extensions = {".mp4", ".mov", ".avi", ".mkv", ".flv", ".wmv", ".webm"} media_id = str(uuid4()) From 0d8c2b402f8f644539f5afdd7d169a1fc0a9dbd0 Mon Sep 17 00:00:00 2001 From: anuragts Date: Tue, 10 Dec 2024 21:35:11 +0530 Subject: [PATCH 43/54] fix: remove duplicate --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b9b8c3c94..a829aa4ec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -89,7 +89,6 @@ exclude = ["phienv*", "aienv*"] check_untyped_defs = true no_implicit_optional = true warn_unused_configs = true -ignore_missing_imports = true plugins = ["pydantic.mypy"] exclude = ["phienv*", "aienv*", "scratch*", "wip*", "tmp*", "cookbook/assistants/examples/*", "phi/assistant/openai/*"] From 0e98701b58b3fcccfcb8f34f3b20a6d0b3786d62 Mon Sep 17 00:00:00 2001 From: Dirk Brand Date: Tue, 10 Dec 2024 18:17:34 +0200 Subject: [PATCH 44/54] Fix gemini reference --- phi/llm/google/gemini.py | 4 ++-- phi/llm/vertexai/gemini.py | 4 ++-- phi/model/google/gemini.py | 4 ++-- phi/model/vertexai/gemini.py | 4 ++-- pyproject.toml | 1 - 5 files changed, 8 insertions(+), 9 deletions(-) diff --git a/phi/llm/google/gemini.py b/phi/llm/google/gemini.py index f4aeb134a..00d7626d9 100644 --- a/phi/llm/google/gemini.py +++ b/phi/llm/google/gemini.py @@ -132,10 +132,10 @@ def api_kwargs(self) -> Dict[str, Any]: return kwargs def invoke(self, messages: List[Message]): - return self.client.generate_media(contents=self.conform_messages_to_gemini(messages)) + return self.client.generate_content(contents=self.conform_messages_to_gemini(messages)) def invoke_stream(self, messages: List[Message]): - yield from self.client.generate_media( + yield from self.client.generate_content( contents=self.conform_messages_to_gemini(messages), stream=True, ) diff --git a/phi/llm/vertexai/gemini.py b/phi/llm/vertexai/gemini.py index aa952872e..9f7cfee7b 100644 --- a/phi/llm/vertexai/gemini.py +++ b/phi/llm/vertexai/gemini.py @@ -130,10 +130,10 @@ def convert_messages_to_contents(self, messages: List[Message]) -> List[Any]: return _contents def invoke(self, messages: List[Message]) -> GenerationResponse: - return self.client.generate_media(contents=self.convert_messages_to_contents(messages)) + return self.client.generate_content(contents=self.convert_messages_to_contents(messages)) def invoke_stream(self, messages: List[Message]) -> Iterator[GenerationResponse]: - yield from self.client.generate_media( + yield from self.client.generate_content( contents=self.convert_messages_to_contents(messages), stream=True, ) diff --git a/phi/model/google/gemini.py b/phi/model/google/gemini.py index fa808ca1f..891f87962 100644 --- a/phi/model/google/gemini.py +++ b/phi/model/google/gemini.py @@ -267,7 +267,7 @@ def invoke(self, messages: List[Message]): Returns: GenerateContentResponse: The response from the model. """ - return self.get_client().generate_media(contents=self._format_messages(messages)) + return self.get_client().generate_content(contents=self._format_messages(messages)) def invoke_stream(self, messages: List[Message]): """ @@ -279,7 +279,7 @@ def invoke_stream(self, messages: List[Message]): Returns: Iterator[GenerateContentResponse]: The response from the model as a stream. """ - yield from self.get_client().generate_media( + yield from self.get_client().generate_content( contents=self._format_messages(messages), stream=True, ) diff --git a/phi/model/vertexai/gemini.py b/phi/model/vertexai/gemini.py index 268065d48..b416e1cc5 100644 --- a/phi/model/vertexai/gemini.py +++ b/phi/model/vertexai/gemini.py @@ -258,7 +258,7 @@ def invoke(self, messages: List[Message]) -> GenerationResponse: Returns: GenerationResponse object containing the response content """ - return self.get_client().generate_media(contents=self._format_messages(messages)) + return self.get_client().generate_content(contents=self._format_messages(messages)) def invoke_stream(self, messages: List[Message]) -> Iterator[GenerationResponse]: """ @@ -270,7 +270,7 @@ def invoke_stream(self, messages: List[Message]) -> Iterator[GenerationResponse] Returns: Iterator[GenerationResponse] object containing the response content """ - yield from self.get_client().generate_media( + yield from self.get_client().generate_content( contents=self._format_messages(messages), stream=True, ) diff --git a/pyproject.toml b/pyproject.toml index b9b8c3c94..168f0c05b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -169,4 +169,3 @@ module = [ "youtube_transcript_api.*", "chonkie.*", ] -ignore_missing_imports = true From 8620e579b82b7652a69ecb5c2d012dae912843c9 Mon Sep 17 00:00:00 2001 From: anuragts Date: Tue, 10 Dec 2024 21:51:04 +0530 Subject: [PATCH 45/54] fix: remove correct --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 38e742bbc..168f0c05b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -89,6 +89,7 @@ exclude = ["phienv*", "aienv*"] check_untyped_defs = true no_implicit_optional = true warn_unused_configs = true +ignore_missing_imports = true plugins = ["pydantic.mypy"] exclude = ["phienv*", "aienv*", "scratch*", "wip*", "tmp*", "cookbook/assistants/examples/*", "phi/assistant/openai/*"] From 2ce1fe6d4c13f679160e15767a81cbeba77a3720 Mon Sep 17 00:00:00 2001 From: Dirk Brand Date: Tue, 10 Dec 2024 18:29:52 +0200 Subject: [PATCH 46/54] Fix mypy --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 38e742bbc..d3dde82f5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -145,6 +145,7 @@ module = [ "pytz.*", "qdrant_client.*", "rapidocr_onnxruntime.*", + "replicate.*", "requests.*", "sentence_transformers.*", "serpapi.*", @@ -168,3 +169,4 @@ module = [ "youtube_transcript_api.*", "chonkie.*", ] +ignore_missing_imports = true From 795ee52caab25acae9cd8b7ca3499dfd3a1cf738 Mon Sep 17 00:00:00 2001 From: Dirk Brand Date: Tue, 10 Dec 2024 19:05:28 +0200 Subject: [PATCH 47/54] Update lumalabs to work with new interface --- cookbook/tools/lumalabs_tool.py | 11 +++++------ phi/tools/lumalab.py | 20 +++++++++----------- 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/cookbook/tools/lumalabs_tool.py b/cookbook/tools/lumalabs_tool.py index 43fe5c6f2..8d87d31f1 100644 --- a/cookbook/tools/lumalabs_tool.py +++ b/cookbook/tools/lumalabs_tool.py @@ -1,6 +1,6 @@ from phi.agent import Agent from phi.llm.openai import OpenAIChat -from phi.tools.lumalab import LumaLabToolkit +from phi.tools.lumalab import LumaLabTools """Create an agent specialized for Luma AI video generation""" @@ -8,7 +8,7 @@ name="Luma Video Agent", agent_id="luma-video-agent", llm=OpenAIChat(model="gpt-4o"), - tools=[LumaLabToolkit()], # Using the LumaLab tool we created + tools=[LumaLabTools()], # Using the LumaLab tool we created markdown=True, debug_mode=True, show_tool_calls=True, @@ -23,11 +23,10 @@ " - Required parameters: prompt, start_image_url", " - Optional parameters: end_image_url, loop=False, aspect_ratio='16:9'", " - The image URLs must be publicly accessible", - "After generating any video:", - "- Display the video URL in markdown format", - "- If generation is async (wait_for_completion=False), inform about the generation ID", - "- Clearly communicate any errors that occur", "Choose the appropriate function based on whether the user provides image URLs or just a text prompt.", + "The video will be displayed in the UI automatically below your response, so you don't need to show the video URL in your response.", + "Politely and courteously let the user know that the video has been generated and will be displayed below as soon as its ready.", + "After generating any video, if generation is async (wait_for_completion=False), inform about the generation ID", ], system_message=( "Use generate_video for text-to-video requests and image_to_video for image-based " diff --git a/phi/tools/lumalab.py b/phi/tools/lumalab.py index 32723007d..bebb2b652 100644 --- a/phi/tools/lumalab.py +++ b/phi/tools/lumalab.py @@ -1,4 +1,5 @@ import time +import uuid from os import getenv from typing import Optional, Dict, Any, Literal, TypedDict @@ -22,7 +23,7 @@ class KeyframeImage(TypedDict): Keyframes = Dict[str, KeyframeImage] -class LumaLabToolkit(Toolkit): +class LumaLabTools(Toolkit): def __init__( self, api_key: Optional[str] = None, @@ -83,11 +84,10 @@ def image_to_video( keyframes=keyframes, # type: ignore ) + video_id = str(uuid.uuid4()) + if not self.wait_for_completion: - if generation and generation.id: - agent.add_video(Video(id=generation.id, url=None)) - return f"Video generation started with ID: {generation.id}" - return "Failed to start video generation: No generation ID received" + return "Async generation unsupported" # Poll for completion seconds_waited = 0 @@ -100,7 +100,7 @@ def image_to_video( if generation.state == "completed" and generation.assets: video_url = generation.assets.video if video_url: - agent.add_video(Video(id=generation.id, url=video_url, eta="completed")) + agent.add_video(Video(id=video_id, url=video_url, eta="completed")) return f"Video generated successfully: {video_url}" elif generation.state == "failed": return f"Generation failed: {generation.failure_reason}" @@ -137,11 +137,9 @@ def generate_video( generation = self.client.generations.create(**generation_params) # type: ignore + video_id = str(uuid.uuid4()) if not self.wait_for_completion: - if generation and generation.id: - agent.add_video(Video(id=generation.id, url=None)) - return f"Video generation started with ID: {generation.id}" - return "Failed to start video generation: No generation ID received" + return "Async generation unsupported" # Poll for completion seconds_waited = 0 @@ -154,7 +152,7 @@ def generate_video( if generation.state == "completed" and generation.assets: video_url = generation.assets.video if video_url: - agent.add_video(Video(id=generation.id, url=video_url, state="completed")) + agent.add_video(Video(id=video_id, url=video_url, state="completed")) return f"Video generated successfully: {video_url}" elif generation.state == "failed": return f"Generation failed: {generation.failure_reason}" From d280dfa2e0dd9491bc90b9d68cb220bcee59ea3f Mon Sep 17 00:00:00 2001 From: Dirk Brand Date: Wed, 11 Dec 2024 12:37:55 +0200 Subject: [PATCH 48/54] Improve instructions --- cookbook/playground/multimodal_agent.py | 24 ++++++++++-------------- phi/agent/agent.py | 1 - phi/llm/openai/chat.py | 2 +- phi/model/openai/chat.py | 2 +- 4 files changed, 12 insertions(+), 17 deletions(-) diff --git a/cookbook/playground/multimodal_agent.py b/cookbook/playground/multimodal_agent.py index 52eba7c75..104177972 100644 --- a/cookbook/playground/multimodal_agent.py +++ b/cookbook/playground/multimodal_agent.py @@ -14,6 +14,7 @@ from phi.playground import Playground, serve_playground_app from phi.storage.agent.sqlite import SqlAgentStorage from phi.tools.fal_tools import FalTools +from pydantic import BaseModel, Field image_agent_storage_file: str = "tmp/image_agent.db" @@ -25,14 +26,13 @@ description="You are an AI agent that can generate images using DALL-E.", instructions=[ "When the user asks you to create an image, use the `create_image` tool to create the image.", - "The image will be displayed in the UI automatically below your response, so you don't need to show the image URL in your response.", - "Politely and courteously let the user know that the image has been generated and will be displayed below as soon as its ready.", + "Don't provide the URL of the image in the response. Only describe what image was generated." ], markdown=True, debug_mode=True, add_history_to_messages=True, add_datetime_to_instructions=True, - storage=SqlAgentStorage(table_name="image_agent", db_file="tmp/image_agent.db"), + storage=SqlAgentStorage(table_name="image_agent", db_file=image_agent_storage_file), ) ml_gif_agent = Agent( @@ -43,14 +43,13 @@ description="You are an AI agent that can generate gifs using the ModelsLabs API.", instructions=[ "When the user asks you to create an image, use the `generate_media` tool to create the image.", - "The image will be displayed in the UI automatically below your response, so you don't need to show the image URL in your response.", - "Politely and courteously let the user know that the gif has been generated and will be displayed below as soon as its ready.", + "Don't provide the URL of the image in the response. Only describe what image was generated." ], markdown=True, debug_mode=True, add_history_to_messages=True, add_datetime_to_instructions=True, - storage=SqlAgentStorage(table_name="ml_gif_agent", db_file="tmp/ml_gif_agent.db"), + storage=SqlAgentStorage(table_name="ml_gif_agent", db_file=image_agent_storage_file), ) ml_video_agent = Agent( @@ -61,14 +60,13 @@ description="You are an AI agent that can generate videos using the ModelsLabs API.", instructions=[ "When the user asks you to create a video, use the `generate_media` tool to create the video.", - "The video will be displayed in the UI automatically below your response, so you don't need to show the video URL in your response.", - "Politely and courteously let the user know that the video has been generated and will be displayed below as soon as its ready.", + "Don't provide the URL of the video in the response. Only describe what video was generated." ], markdown=True, debug_mode=True, add_history_to_messages=True, add_datetime_to_instructions=True, - storage=SqlAgentStorage(table_name="ml_video_agent", db_file="tmp/ml_video_agent.db"), + storage=SqlAgentStorage(table_name="ml_video_agent", db_file=image_agent_storage_file), ) fal_agent = Agent( @@ -79,19 +77,17 @@ description="You are an AI agent that can generate videos using the Fal API.", instructions=[ "When the user asks you to create a video, use the `generate_media` tool to create the video.", - "The video will be displayed in the UI automatically below your response, so you don't need to show the video URL in your response.", - "Politely and courteously let the user know that the video has been generated and will be displayed below as soon as its ready.", - "Don't send video url in markdown format.", + "Don't provide the URL of the video in the response. Only describe what video was generated." ], markdown=True, debug_mode=True, add_history_to_messages=True, add_datetime_to_instructions=True, - storage=SqlAgentStorage(table_name="fal_agent", db_file="tmp/fal_agent.db"), + storage=SqlAgentStorage(table_name="fal_agent", db_file=image_agent_storage_file), ) -app = Playground(agents=[image_agent, ml_gif_agent, ml_video_agent, fal_agent]).get_app() +app = Playground(agents=[image_agent, ml_gif_agent, ml_video_agent, fal_agent]).get_app(use_async=False) if __name__ == "__main__": serve_playground_app("multimodal_agent:app", reload=True) diff --git a/phi/agent/agent.py b/phi/agent/agent.py index fb0158695..c551a63cc 100644 --- a/phi/agent/agent.py +++ b/phi/agent/agent.py @@ -589,7 +589,6 @@ def get_session_data(self) -> Dict[str, Any]: def get_agent_session(self) -> AgentSession: """Get an AgentSession object, which can be saved to the database""" - return AgentSession( session_id=self.session_id, agent_id=self.agent_id, diff --git a/phi/llm/openai/chat.py b/phi/llm/openai/chat.py index 037e548d4..60b3fe2e3 100644 --- a/phi/llm/openai/chat.py +++ b/phi/llm/openai/chat.py @@ -181,7 +181,7 @@ def to_dict(self) -> Dict[str, Any]: if self.presence_penalty: _dict["presence_penalty"] = self.presence_penalty if self.response_format: - _dict["response_format"] = self.response_format + _dict["response_format"] = self.response_format if isinstance(self.response_format, dict) else str(self.response_format) if self.seed is not None: _dict["seed"] = self.seed if self.stop: diff --git a/phi/model/openai/chat.py b/phi/model/openai/chat.py index cb5277597..66dbf6242 100644 --- a/phi/model/openai/chat.py +++ b/phi/model/openai/chat.py @@ -255,7 +255,7 @@ def to_dict(self) -> Dict[str, Any]: if self.presence_penalty is not None: model_dict["presence_penalty"] = self.presence_penalty if self.response_format is not None: - model_dict["response_format"] = self.response_format + model_dict["response_format"] = self.response_format if isinstance(self.response_format, dict) else str(self.response_format) if self.seed is not None: model_dict["seed"] = self.seed if self.stop is not None: From 6a60da113b4fa5818ef72300e52fab82b9104281 Mon Sep 17 00:00:00 2001 From: Dirk Brand Date: Wed, 11 Dec 2024 12:41:41 +0200 Subject: [PATCH 49/54] Fix typo --- cookbook/agents/44_generate_replicate_image.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cookbook/agents/44_generate_replicate_image.py b/cookbook/agents/44_generate_replicate_image.py index d99477957..ebf7af0bd 100644 --- a/cookbook/agents/44_generate_replicate_image.py +++ b/cookbook/agents/44_generate_replicate_image.py @@ -4,7 +4,7 @@ """Create an agent specialized for Replicate AI content generation""" -video_agent = Agent( +image_agent = Agent( name="Image Generator Agent", model=OpenAIChat(id="gpt-4o"), tools=[ReplicateTools(model="luma/photon-flash")], @@ -19,4 +19,4 @@ show_tool_calls=True, ) -video_agent.print_response("Generate an image of a horse in the dessert.") +image_agent.print_response("Generate an image of a horse in the dessert.") From d10bc9cc9166248ff910a48ab7e233c7be8a6a52 Mon Sep 17 00:00:00 2001 From: Dirk Brand Date: Wed, 11 Dec 2024 12:42:51 +0200 Subject: [PATCH 50/54] Fix style --- cookbook/playground/multimodal_agent.py | 9 ++++----- phi/llm/openai/chat.py | 4 +++- phi/model/openai/chat.py | 4 +++- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/cookbook/playground/multimodal_agent.py b/cookbook/playground/multimodal_agent.py index 104177972..25f040568 100644 --- a/cookbook/playground/multimodal_agent.py +++ b/cookbook/playground/multimodal_agent.py @@ -14,7 +14,6 @@ from phi.playground import Playground, serve_playground_app from phi.storage.agent.sqlite import SqlAgentStorage from phi.tools.fal_tools import FalTools -from pydantic import BaseModel, Field image_agent_storage_file: str = "tmp/image_agent.db" @@ -26,7 +25,7 @@ description="You are an AI agent that can generate images using DALL-E.", instructions=[ "When the user asks you to create an image, use the `create_image` tool to create the image.", - "Don't provide the URL of the image in the response. Only describe what image was generated." + "Don't provide the URL of the image in the response. Only describe what image was generated.", ], markdown=True, debug_mode=True, @@ -43,7 +42,7 @@ description="You are an AI agent that can generate gifs using the ModelsLabs API.", instructions=[ "When the user asks you to create an image, use the `generate_media` tool to create the image.", - "Don't provide the URL of the image in the response. Only describe what image was generated." + "Don't provide the URL of the image in the response. Only describe what image was generated.", ], markdown=True, debug_mode=True, @@ -60,7 +59,7 @@ description="You are an AI agent that can generate videos using the ModelsLabs API.", instructions=[ "When the user asks you to create a video, use the `generate_media` tool to create the video.", - "Don't provide the URL of the video in the response. Only describe what video was generated." + "Don't provide the URL of the video in the response. Only describe what video was generated.", ], markdown=True, debug_mode=True, @@ -77,7 +76,7 @@ description="You are an AI agent that can generate videos using the Fal API.", instructions=[ "When the user asks you to create a video, use the `generate_media` tool to create the video.", - "Don't provide the URL of the video in the response. Only describe what video was generated." + "Don't provide the URL of the video in the response. Only describe what video was generated.", ], markdown=True, debug_mode=True, diff --git a/phi/llm/openai/chat.py b/phi/llm/openai/chat.py index 60b3fe2e3..666313522 100644 --- a/phi/llm/openai/chat.py +++ b/phi/llm/openai/chat.py @@ -181,7 +181,9 @@ def to_dict(self) -> Dict[str, Any]: if self.presence_penalty: _dict["presence_penalty"] = self.presence_penalty if self.response_format: - _dict["response_format"] = self.response_format if isinstance(self.response_format, dict) else str(self.response_format) + _dict["response_format"] = ( + self.response_format if isinstance(self.response_format, dict) else str(self.response_format) + ) if self.seed is not None: _dict["seed"] = self.seed if self.stop: diff --git a/phi/model/openai/chat.py b/phi/model/openai/chat.py index 66dbf6242..ef177512c 100644 --- a/phi/model/openai/chat.py +++ b/phi/model/openai/chat.py @@ -255,7 +255,9 @@ def to_dict(self) -> Dict[str, Any]: if self.presence_penalty is not None: model_dict["presence_penalty"] = self.presence_penalty if self.response_format is not None: - model_dict["response_format"] = self.response_format if isinstance(self.response_format, dict) else str(self.response_format) + model_dict["response_format"] = ( + self.response_format if isinstance(self.response_format, dict) else str(self.response_format) + ) if self.seed is not None: model_dict["seed"] = self.seed if self.stop is not None: From 09dd849239a9c689a7ce7a10336534783ed73e21 Mon Sep 17 00:00:00 2001 From: Dirk Brand Date: Wed, 11 Dec 2024 15:04:40 +0200 Subject: [PATCH 51/54] Update --- cookbook/assistants/llms/vertexai/samples/multimodal.py | 2 +- cookbook/assistants/llms/vertexai/samples/text_stream.py | 2 +- phi/agent/agent.py | 4 ++-- pyproject.toml | 1 - 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/cookbook/assistants/llms/vertexai/samples/multimodal.py b/cookbook/assistants/llms/vertexai/samples/multimodal.py index 92114d2a0..7b3f67914 100644 --- a/cookbook/assistants/llms/vertexai/samples/multimodal.py +++ b/cookbook/assistants/llms/vertexai/samples/multimodal.py @@ -11,7 +11,7 @@ def multimodal_example(project: Optional[str], location: Optional[str]) -> str: # Load the model multimodal_model = GenerativeModel("gemini-1.0-pro-vision") # Query the model - response = multimodal_model.generate_media( + response = multimodal_model.generate_content( [ # Add an example image Part.from_uri("gs://generativeai-downloads/images/scones.jpg", mime_type="image/jpeg"), diff --git a/cookbook/assistants/llms/vertexai/samples/text_stream.py b/cookbook/assistants/llms/vertexai/samples/text_stream.py index fcc1d5bfb..70fd96e96 100644 --- a/cookbook/assistants/llms/vertexai/samples/text_stream.py +++ b/cookbook/assistants/llms/vertexai/samples/text_stream.py @@ -11,7 +11,7 @@ def generate(project: Optional[str], location: Optional[str]) -> None: # Load the model model = GenerativeModel("gemini-1.0-pro-vision") # Query the model - responses: Iterable[GenerationResponse] = model.generate_media("Who are you?", stream=True) + responses: Iterable[GenerationResponse] = model.generate_content("Who are you?", stream=True) # Process the response for response in responses: print(response.text, end="") diff --git a/phi/agent/agent.py b/phi/agent/agent.py index c551a63cc..0acae861b 100644 --- a/phi/agent/agent.py +++ b/phi/agent/agent.py @@ -574,9 +574,9 @@ def get_agent_data(self) -> Dict[str, Any]: if self.model is not None: agent_data["model"] = self.model.to_dict() if self.images is not None: - agent_data["images"] = [img.model_dump() for img in self.images] + agent_data["images"] = [img if isinstance(img, dict) else img.model_dump() for img in self.images] if self.videos is not None: - agent_data["videos"] = [vid.model_dump() for vid in self.videos] + agent_data["videos"] = [vid if isinstance(vid, dict) else vid.model_dump() for vid in self.videos] return agent_data def get_session_data(self) -> Dict[str, Any]: diff --git a/pyproject.toml b/pyproject.toml index 28da0961d..d3dde82f5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -89,7 +89,6 @@ exclude = ["phienv*", "aienv*"] check_untyped_defs = true no_implicit_optional = true warn_unused_configs = true -ignore_missing_imports = true plugins = ["pydantic.mypy"] exclude = ["phienv*", "aienv*", "scratch*", "wip*", "tmp*", "cookbook/assistants/examples/*", "phi/assistant/openai/*"] From f2d0d71a0a5ab6fd43b29126099eecd020bfc09f Mon Sep 17 00:00:00 2001 From: Unnati Date: Thu, 12 Dec 2024 15:59:49 +0530 Subject: [PATCH 52/54] use-case-example-recipe-creator (#1511) * use-case-example-recipe-creator * fixed linting * fixed linting * added changes * directory name change * fixes * resolved comments * resolved comments * fixes * fixes * fix prompts and instructions --------- Co-authored-by: Manthan Gupta --- .../examples/agents/01_ai_recipe_creator.py | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 cookbook/examples/agents/01_ai_recipe_creator.py diff --git a/cookbook/examples/agents/01_ai_recipe_creator.py b/cookbook/examples/agents/01_ai_recipe_creator.py new file mode 100644 index 000000000..65c8cb856 --- /dev/null +++ b/cookbook/examples/agents/01_ai_recipe_creator.py @@ -0,0 +1,33 @@ +from phi.agent import Agent +from phi.knowledge.pdf import PDFUrlKnowledgeBase +from phi.vectordb.pgvector import PgVector +from phi.tools.exa import ExaTools + +db_url = "postgresql+psycopg://ai:ai@localhost:5532/ai" + +knowledge_base = PDFUrlKnowledgeBase( + urls=[ + "https://www.poshantracker.in/pdf/Awareness/MilletsRecipeBook2023_Low%20Res_V5.pdf", + "https://www.cardiff.ac.uk/__data/assets/pdf_file/0003/123681/Recipe-Book.pdf", + ], + vector_db=PgVector(table_name="recipes", db_url=db_url), +) +knowledge_base.load(recreate=False) + +recipe_agent = Agent( + name="RecipeGenie", + knowledge_base=knowledge_base, + search_knowledge=True, + tools=[ExaTools()], + markdown=True, + instructions=[ + "Search for recipes based on the ingredients and time available from the knowledge base.", + "Include the exact calories, preparation time, cooking instructions, and highlight allergens for the recommended recipes.", + "Always search exa for recipe links or tips related to the recipes apart from knowledge base.", + "Provide a list of recipes that match the user's requirements and preferences.", + ], +) + +recipe_agent.print_response( + "I have potatoes, tomatoes, onions, garlic, ginger, and chicken. Suggest me a quick recipe for dinner", stream=True +) From 6d72f5ed9c208392c6eae534e207722977cd1388 Mon Sep 17 00:00:00 2001 From: Saajan Saini <121461922+saajann@users.noreply.github.com> Date: Thu, 12 Dec 2024 11:31:33 +0100 Subject: [PATCH 53/54] Update PR template (#1538) improved Description section for clarity --- .github/pull_request_template.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 3414bc3aa..4974c2e3a 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -1,10 +1,12 @@ ## Description **Please include:** -- A summary of the changes and the related issue -- Relevant motivation and context -- Any dependencies or environment changes required -- If this modifies AI/ML components, include model performance metrics + +- **Summary of changes**: Clearly describe the key changes in this PR and their purpose. +- **Related issues**: Mention if this PR fixes or is connected to any issues (e.g., "Fixes #123" or "Relates to #456"). +- **Motivation and context**: Explain the reason for the changes and the problem they solve. +- **Environment or dependencies**: Specify any changes in dependencies or environment configurations required for this update. +- **Impact on AI/ML components**: (If applicable) Describe changes to AI/ML models and include performance metrics (e.g., accuracy, F1-score). Fixes # (issue) @@ -30,4 +32,4 @@ Please check the options that are relevant: ## Additional Notes -Include any deployment notes, performance implications, or other relevant information: \ No newline at end of file +Include any deployment notes, performance implications, or other relevant information: From 08fc4076fadf9fb7bb09d9028b3e8864fe80dd79 Mon Sep 17 00:00:00 2001 From: Dirk Brand Date: Thu, 12 Dec 2024 12:34:32 +0200 Subject: [PATCH 54/54] Pull in main --- cookbook/examples/agents/03_itinerary_planner.py | 2 +- cookbook/vectordb/qdrant_db.py | 9 +++------ phi/workspace/settings.py | 6 +++--- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/cookbook/examples/agents/03_itinerary_planner.py b/cookbook/examples/agents/03_itinerary_planner.py index b6564f330..50c94c850 100644 --- a/cookbook/examples/agents/03_itinerary_planner.py +++ b/cookbook/examples/agents/03_itinerary_planner.py @@ -14,7 +14,7 @@ "Ensure that the gathered data is accurate and tailored to the user's preferences, such as destination, group size, and budget constraints.", "Create a clear and concise itinerary that includes: detailed day-by-day travel plan, suggested transportation and accommodation options, activity recommendations (e.g., sightseeing, dining, events), an estimated cost breakdown (covering transportation, accommodation, food, and activities).", "If a particular website or travel option is unavailable, provide alternatives from other trusted sources.", - "Do not include direct links to external websites or booking platforms in the response." + "Do not include direct links to external websites or booking platforms in the response.", ], ) diff --git a/cookbook/vectordb/qdrant_db.py b/cookbook/vectordb/qdrant_db.py index 10d2ee8eb..35b4f542a 100644 --- a/cookbook/vectordb/qdrant_db.py +++ b/cookbook/vectordb/qdrant_db.py @@ -1,4 +1,4 @@ -# pip install qdrant-client +# pip install qdrant-client from phi.vectordb.qdrant import Qdrant from phi.agent import Agent from phi.knowledge.pdf import PDFUrlKnowledgeBase @@ -13,10 +13,7 @@ """ COLLECTION_NAME = "thai-recipes" -vector_db = Qdrant( - collection=COLLECTION_NAME, - url="http://localhost:6333" -) +vector_db = Qdrant(collection=COLLECTION_NAME, url="http://localhost:6333") knowledge_base = PDFUrlKnowledgeBase( urls=["https://phi-public.s3.amazonaws.com/recipes/ThaiRecipes.pdf"], @@ -27,4 +24,4 @@ # Create and use the agent agent = Agent(knowledge_base=knowledge_base, use_tools=True, show_tool_calls=True) -agent.print_response("List down the ingredients to make Massaman Gai", markdown=True) \ No newline at end of file +agent.print_response("List down the ingredients to make Massaman Gai", markdown=True) diff --git a/phi/workspace/settings.py b/phi/workspace/settings.py index a8a37dc65..b7a0845f8 100644 --- a/phi/workspace/settings.py +++ b/phi/workspace/settings.py @@ -3,7 +3,7 @@ from pathlib import Path from typing import Optional, List, Dict -from pydantic import field_validator, ValidationInfo +from pydantic import field_validator, ValidationInfo, Field from pydantic_settings import BaseSettings, SettingsConfigDict from phi.api.schemas.workspace import WorkspaceSchema @@ -117,9 +117,9 @@ class WorkspaceSettings(BaseSettings): aws_az4: Optional[str] = None aws_az5: Optional[str] = None # Public subnets. 1 in each AZ. - public_subnets: List[str] = [] + public_subnets: List[str] = Field(default_factory=list) # Private subnets. 1 in each AZ. - private_subnets: List[str] = [] + private_subnets: List[str] = Field(default_factory=list) # Subnet IDs. 1 in each AZ. # Derived from public and private subnets if not provided. subnet_ids: Optional[List[str]] = None