From 79e57639c76e6a7cfd4f89c21c99341789ec098e Mon Sep 17 00:00:00 2001 From: Maksim Ivanov Date: Fri, 6 Dec 2024 10:44:36 +0100 Subject: [PATCH 01/40] Record log entries into context --- skyvern/forge/sdk/forge_log.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/skyvern/forge/sdk/forge_log.py b/skyvern/forge/sdk/forge_log.py index 612780900..3b37883ac 100644 --- a/skyvern/forge/sdk/forge_log.py +++ b/skyvern/forge/sdk/forge_log.py @@ -2,7 +2,6 @@ import structlog from structlog.typing import EventDict - from skyvern.config import settings from skyvern.forge.sdk.core import skyvern_context @@ -54,6 +53,21 @@ def add_kv_pairs_to_msg(logger: logging.Logger, method_name: str, event_dict: Ev return event_dict +def skyvern_logs_processor(logger: logging.Logger, method_name: str, event_dict: EventDict) -> EventDict: + """ + A custom processor to add skyvern logs to the context + """ + if method_name not in ["info", "warning", "error", "critical", "exception"]: + return event_dict + + context = skyvern_context.current() + if context: + log_entry = dict(event_dict) + context.log.append(log_entry) + + return event_dict + + def setup_logger() -> None: """ Setup the logger with the specified format @@ -88,7 +102,7 @@ def setup_logger() -> None: structlog.processors.format_exc_info, ] + additional_processors - + [renderer], + + [skyvern_logs_processor, renderer], ) uvicorn_error = logging.getLogger("uvicorn.error") uvicorn_error.disabled = True From c3cbf4bc458f1d856b5a359bbaaaa50524ac56c6 Mon Sep 17 00:00:00 2001 From: Maksim Ivanov Date: Fri, 6 Dec 2024 10:45:59 +0100 Subject: [PATCH 02/40] Add log field to context --- skyvern/forge/sdk/core/skyvern_context.py | 1 + 1 file changed, 1 insertion(+) diff --git a/skyvern/forge/sdk/core/skyvern_context.py b/skyvern/forge/sdk/core/skyvern_context.py index 11a615fc8..52a10bf72 100644 --- a/skyvern/forge/sdk/core/skyvern_context.py +++ b/skyvern/forge/sdk/core/skyvern_context.py @@ -11,6 +11,7 @@ class SkyvernContext: workflow_run_id: str | None = None max_steps_override: int | None = None totp_codes: dict[str, str | None] = field(default_factory=dict) + log: list[dict] = field(default_factory=list) def __repr__(self) -> str: return f"SkyvernContext(request_id={self.request_id}, organization_id={self.organization_id}, task_id={self.task_id}, workflow_id={self.workflow_id}, workflow_run_id={self.workflow_run_id}, max_steps_override={self.max_steps_override})" From f3debf5eceea14c132b65ac0827677e6d2ff5f6d Mon Sep 17 00:00:00 2001 From: Maksim Ivanov Date: Fri, 6 Dec 2024 10:47:26 +0100 Subject: [PATCH 03/40] Add SKYVERN_LOG artifact type --- skyvern/forge/sdk/artifact/models.py | 2 ++ skyvern/forge/sdk/artifact/storage/base.py | 1 + 2 files changed, 3 insertions(+) diff --git a/skyvern/forge/sdk/artifact/models.py b/skyvern/forge/sdk/artifact/models.py index 261788e55..fbda70bac 100644 --- a/skyvern/forge/sdk/artifact/models.py +++ b/skyvern/forge/sdk/artifact/models.py @@ -10,6 +10,8 @@ class ArtifactType(StrEnum): RECORDING = "recording" BROWSER_CONSOLE_LOG = "browser_console_log" + SKYVERN_LOG = "skyvern_log" + # DEPRECATED. pls use SCREENSHOT_LLM, SCREENSHOT_ACTION or SCREENSHOT_FINAL SCREENSHOT = "screenshot" diff --git a/skyvern/forge/sdk/artifact/storage/base.py b/skyvern/forge/sdk/artifact/storage/base.py index 811c5bf17..f1c19ac67 100644 --- a/skyvern/forge/sdk/artifact/storage/base.py +++ b/skyvern/forge/sdk/artifact/storage/base.py @@ -10,6 +10,7 @@ ArtifactType.SCREENSHOT_LLM: "png", ArtifactType.SCREENSHOT_ACTION: "png", ArtifactType.SCREENSHOT_FINAL: "png", + ArtifactType.SKYVERN_LOG: "json", ArtifactType.LLM_PROMPT: "txt", ArtifactType.LLM_REQUEST: "json", ArtifactType.LLM_RESPONSE: "json", From c1e7ff604e5e9709b8efef99bcabf8430ce3c32f Mon Sep 17 00:00:00 2001 From: Maksim Ivanov Date: Fri, 6 Dec 2024 11:09:57 +0100 Subject: [PATCH 04/40] Add skyvern log tab on the frontend --- skyvern-frontend/src/api/types.ts | 1 + .../src/routes/tasks/detail/StepArtifacts.tsx | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/skyvern-frontend/src/api/types.ts b/skyvern-frontend/src/api/types.ts index 526995b32..4cd2cfb13 100644 --- a/skyvern-frontend/src/api/types.ts +++ b/skyvern-frontend/src/api/types.ts @@ -10,6 +10,7 @@ export const ArtifactType = { LLMPrompt: "llm_prompt", LLMRequest: "llm_request", HTMLScrape: "html_scrape", + SkyvernLog: "skyvern_log", } as const; export type ArtifactType = (typeof ArtifactType)[keyof typeof ArtifactType]; diff --git a/skyvern-frontend/src/routes/tasks/detail/StepArtifacts.tsx b/skyvern-frontend/src/routes/tasks/detail/StepArtifacts.tsx index 815154013..f14c1423c 100644 --- a/skyvern-frontend/src/routes/tasks/detail/StepArtifacts.tsx +++ b/skyvern-frontend/src/routes/tasks/detail/StepArtifacts.tsx @@ -79,6 +79,10 @@ function StepArtifacts({ id, stepProps }: Props) { (artifact) => artifact.artifact_type === ArtifactType.HTMLScrape, ); + const skyvernLog = artifacts?.filter( + (artifact) => artifact.artifact_type === ArtifactType.SkyvernLog, + ); + return ( Action List HTML (Raw) LLM Request (Raw) + Skyvern Log
@@ -209,6 +214,9 @@ function StepArtifacts({ id, stepProps }: Props) { {llmRequest ? : null} + + {skyvernLog ? : null} + ); } From b1f14c6c8a7a5864a26e3b2343629cee2fa63c03 Mon Sep 17 00:00:00 2001 From: Maksim Ivanov Date: Fri, 6 Dec 2024 11:19:52 +0100 Subject: [PATCH 05/40] Add SkyvernLogEncoder to handle non-serializable objects --- skyvern/forge/skyvern_log_encoder.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 skyvern/forge/skyvern_log_encoder.py diff --git a/skyvern/forge/skyvern_log_encoder.py b/skyvern/forge/skyvern_log_encoder.py new file mode 100644 index 000000000..03db2ab9b --- /dev/null +++ b/skyvern/forge/skyvern_log_encoder.py @@ -0,0 +1,22 @@ +import json +from typing import Any + +class SkyvernLogEncoder(json.JSONEncoder): + """Custom JSON encoder for Skyvern logs that handles non-serializable objects""" + def default(self, obj: Any) -> Any: + if hasattr(obj, 'model_dump'): + return obj.model_dump() + + if hasattr(obj, '__dataclass_fields__'): + return {k: getattr(obj, k) for k in obj.__dataclass_fields__} + + if hasattr(obj, '__dict__'): + return { + 'type': obj.__class__.__name__, + 'attributes': {k: v for k, v in obj.__dict__.items() if not k.startswith('_')} + } + # Handle other non-serializable objects + try: + return str(obj) + except Exception: + return f"" From b2cb9c477721a3ded22eabed9e0af0a2fe86fc0f Mon Sep 17 00:00:00 2001 From: Maksim Ivanov Date: Fri, 6 Dec 2024 11:23:31 +0100 Subject: [PATCH 06/40] Record logs to an artifact --- skyvern/forge/agent.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py index dad74f9b3..0e0066551 100644 --- a/skyvern/forge/agent.py +++ b/skyvern/forge/agent.py @@ -72,6 +72,7 @@ from skyvern.webeye.browser_factory import BrowserState from skyvern.webeye.scraper.scraper import ElementTreeFormat, ScrapedPage, scrape_website from skyvern.webeye.utils.page import SkyvernFrame +from skyvern.forge.skyvern_log_encoder import SkyvernLogEncoder LOG = structlog.get_logger() @@ -1773,6 +1774,23 @@ async def update_step( step_id=step.step_id, diff=update_comparison, ) + + try: + log = skyvern_context.current().log + log_json = json.dumps(log, cls=SkyvernLogEncoder, indent=2) + await app.ARTIFACT_MANAGER.create_artifact( + step=step, + artifact_type=ArtifactType.SKYVERN_LOG, + data=log_json.encode(), + ) + except Exception: + LOG.error( + "Failed to record skyvern log after action", + task_id=step.task_id, + step_id=step.step_id, + exc_info=True, + ) + return await app.DATABASE.update_step( task_id=step.task_id, step_id=step.step_id, From eb6fc3db5bbdbaa67f8b39a4d849f69ab6cab8dd Mon Sep 17 00:00:00 2001 From: Maksim Ivanov Date: Fri, 6 Dec 2024 11:55:48 +0100 Subject: [PATCH 07/40] Record only logs corresponding to a step --- skyvern/forge/agent.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py index 0e0066551..cb1f8c492 100644 --- a/skyvern/forge/agent.py +++ b/skyvern/forge/agent.py @@ -1777,7 +1777,8 @@ async def update_step( try: log = skyvern_context.current().log - log_json = json.dumps(log, cls=SkyvernLogEncoder, indent=2) + current_step_log = [entry for entry in log if entry.get("step_id", "") == step.step_id] + log_json = json.dumps(current_step_log, cls=SkyvernLogEncoder, indent=2) await app.ARTIFACT_MANAGER.create_artifact( step=step, artifact_type=ArtifactType.SKYVERN_LOG, From 57cd772e9f2e0953d3d41326c8e91396ee8977f8 Mon Sep 17 00:00:00 2001 From: Maksim Ivanov Date: Fri, 6 Dec 2024 12:03:31 +0100 Subject: [PATCH 08/40] Recover empty line --- skyvern/forge/sdk/forge_log.py | 1 + 1 file changed, 1 insertion(+) diff --git a/skyvern/forge/sdk/forge_log.py b/skyvern/forge/sdk/forge_log.py index 3b37883ac..a6862cc6c 100644 --- a/skyvern/forge/sdk/forge_log.py +++ b/skyvern/forge/sdk/forge_log.py @@ -2,6 +2,7 @@ import structlog from structlog.typing import EventDict + from skyvern.config import settings from skyvern.forge.sdk.core import skyvern_context From 419e12becb5e4fa358f4e498762c5a778263bcbc Mon Sep 17 00:00:00 2001 From: Maksim Ivanov Date: Mon, 9 Dec 2024 12:57:36 +0100 Subject: [PATCH 09/40] Add text log encoder --- skyvern-frontend/src/api/types.ts | 1 + .../src/routes/tasks/detail/StepArtifacts.tsx | 2 +- skyvern/forge/agent.py | 14 ++- skyvern/forge/sdk/artifact/models.py | 1 + skyvern/forge/sdk/artifact/storage/base.py | 3 +- skyvern/forge/skyvern_json_encoder.py | 22 ++++ skyvern/forge/skyvern_log_encoder.py | 107 ++++++++++++++---- 7 files changed, 124 insertions(+), 26 deletions(-) create mode 100644 skyvern/forge/skyvern_json_encoder.py diff --git a/skyvern-frontend/src/api/types.ts b/skyvern-frontend/src/api/types.ts index 4cd2cfb13..509c305e4 100644 --- a/skyvern-frontend/src/api/types.ts +++ b/skyvern-frontend/src/api/types.ts @@ -11,6 +11,7 @@ export const ArtifactType = { LLMRequest: "llm_request", HTMLScrape: "html_scrape", SkyvernLog: "skyvern_log", + SkyvernLogRaw: "skyvern_log_raw", } as const; export type ArtifactType = (typeof ArtifactType)[keyof typeof ArtifactType]; diff --git a/skyvern-frontend/src/routes/tasks/detail/StepArtifacts.tsx b/skyvern-frontend/src/routes/tasks/detail/StepArtifacts.tsx index f14c1423c..1a0d85084 100644 --- a/skyvern-frontend/src/routes/tasks/detail/StepArtifacts.tsx +++ b/skyvern-frontend/src/routes/tasks/detail/StepArtifacts.tsx @@ -215,7 +215,7 @@ function StepArtifacts({ id, stepProps }: Props) { {llmRequest ? : null} - {skyvernLog ? : null} + {skyvernLog ? : null} ); diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py index cb1f8c492..643bf7773 100644 --- a/skyvern/forge/agent.py +++ b/skyvern/forge/agent.py @@ -72,6 +72,7 @@ from skyvern.webeye.browser_factory import BrowserState from skyvern.webeye.scraper.scraper import ElementTreeFormat, ScrapedPage, scrape_website from skyvern.webeye.utils.page import SkyvernFrame +from skyvern.forge.skyvern_json_encoder import SkyvernJSONLogEncoder from skyvern.forge.skyvern_log_encoder import SkyvernLogEncoder LOG = structlog.get_logger() @@ -930,7 +931,6 @@ async def agent_step( complete_action.task_id = task.task_id complete_action.step_id = step.step_id complete_action.step_order = step.order - complete_action.action_order = len(detailed_agent_step_output.actions_and_results) complete_results = await ActionHandler.handle_action( scraped_page, task, step, working_page, complete_action ) @@ -1778,12 +1778,20 @@ async def update_step( try: log = skyvern_context.current().log current_step_log = [entry for entry in log if entry.get("step_id", "") == step.step_id] - log_json = json.dumps(current_step_log, cls=SkyvernLogEncoder, indent=2) + + log_json = json.dumps(current_step_log, cls=SkyvernJSONLogEncoder, indent=2) await app.ARTIFACT_MANAGER.create_artifact( step=step, - artifact_type=ArtifactType.SKYVERN_LOG, + artifact_type=ArtifactType.SKYVERN_LOG_RAW, data=log_json.encode(), ) + + formatted_log = SkyvernLogEncoder.encode(current_step_log) + await app.ARTIFACT_MANAGER.create_artifact( + step=step, + artifact_type=ArtifactType.SKYVERN_LOG, + data=formatted_log.encode(), + ) except Exception: LOG.error( "Failed to record skyvern log after action", diff --git a/skyvern/forge/sdk/artifact/models.py b/skyvern/forge/sdk/artifact/models.py index fbda70bac..2ca075626 100644 --- a/skyvern/forge/sdk/artifact/models.py +++ b/skyvern/forge/sdk/artifact/models.py @@ -11,6 +11,7 @@ class ArtifactType(StrEnum): BROWSER_CONSOLE_LOG = "browser_console_log" SKYVERN_LOG = "skyvern_log" + SKYVERN_LOG_RAW = "skyvern_log_raw" # DEPRECATED. pls use SCREENSHOT_LLM, SCREENSHOT_ACTION or SCREENSHOT_FINAL SCREENSHOT = "screenshot" diff --git a/skyvern/forge/sdk/artifact/storage/base.py b/skyvern/forge/sdk/artifact/storage/base.py index f1c19ac67..5c68420d0 100644 --- a/skyvern/forge/sdk/artifact/storage/base.py +++ b/skyvern/forge/sdk/artifact/storage/base.py @@ -10,7 +10,8 @@ ArtifactType.SCREENSHOT_LLM: "png", ArtifactType.SCREENSHOT_ACTION: "png", ArtifactType.SCREENSHOT_FINAL: "png", - ArtifactType.SKYVERN_LOG: "json", + ArtifactType.SKYVERN_LOG: "log", + ArtifactType.SKYVERN_LOG_RAW: "json", ArtifactType.LLM_PROMPT: "txt", ArtifactType.LLM_REQUEST: "json", ArtifactType.LLM_RESPONSE: "json", diff --git a/skyvern/forge/skyvern_json_encoder.py b/skyvern/forge/skyvern_json_encoder.py new file mode 100644 index 000000000..1e365e9f0 --- /dev/null +++ b/skyvern/forge/skyvern_json_encoder.py @@ -0,0 +1,22 @@ +import json +from typing import Any + +class SkyvernJSONLogEncoder(json.JSONEncoder): + """Custom JSON encoder for Skyvern logs that handles non-serializable objects""" + def default(self, obj: Any) -> Any: + if hasattr(obj, 'model_dump'): + return obj.model_dump() + + if hasattr(obj, '__dataclass_fields__'): + return {k: getattr(obj, k) for k in obj.__dataclass_fields__} + + if hasattr(obj, '__dict__'): + return { + 'type': obj.__class__.__name__, + 'attributes': {k: v for k, v in obj.__dict__.items() if not k.startswith('_')} + } + # Handle other non-serializable objects + try: + return str(obj) + except Exception: + return f"" diff --git a/skyvern/forge/skyvern_log_encoder.py b/skyvern/forge/skyvern_log_encoder.py index 03db2ab9b..9e17d55d9 100644 --- a/skyvern/forge/skyvern_log_encoder.py +++ b/skyvern/forge/skyvern_log_encoder.py @@ -1,22 +1,87 @@ +from typing import Any, Dict, List +from datetime import datetime import json -from typing import Any - -class SkyvernLogEncoder(json.JSONEncoder): - """Custom JSON encoder for Skyvern logs that handles non-serializable objects""" - def default(self, obj: Any) -> Any: - if hasattr(obj, 'model_dump'): - return obj.model_dump() - - if hasattr(obj, '__dataclass_fields__'): - return {k: getattr(obj, k) for k in obj.__dataclass_fields__} - - if hasattr(obj, '__dict__'): - return { - 'type': obj.__class__.__name__, - 'attributes': {k: v for k, v in obj.__dict__.items() if not k.startswith('_')} - } - # Handle other non-serializable objects - try: - return str(obj) - except Exception: - return f"" + +from structlog.dev import ConsoleRenderer + +LOG = structlog.get_logger() + +class SkyvernLogEncoder: + """Encodes Skyvern logs from JSON format to human-readable string format""" + + def __init__(self): + self.renderer = ConsoleRenderer( + pad_event=30, + colors=False, + ) + + @staticmethod + def _format_value(value: Any) -> str: + """Format complex values into readable strings.""" + if isinstance(value, (dict, list)): + return json.dumps(value, sort_keys=True) + return str(value) + + @staticmethod + def _parse_json_entry(entry: Dict[str, Any]) -> Dict[str, Any]: + """Convert a JSON log entry into our standard format.""" + event = entry.get('message', entry.get('event', '')) + + clean_entry = { + 'timestamp': entry.get('timestamp', datetime.utcnow().isoformat() + "Z"), + 'level': entry.get('level', 'info').lower(), + 'event': event + } + + for key, value in entry.items(): + if key not in ('timestamp', 'level', 'event', 'message'): + clean_entry[key] = SkyvernLogEncoder._format_value(value) + + return clean_entry + + @classmethod + def encode(cls, log_entries: List[Dict[str, Any]]) -> str: + """ + Encode log entries into formatted string output using structlog's ConsoleRenderer. + + Args: + log_entries: List of log entry dictionaries + + Returns: + Formatted string with one log entry per line + """ + encoder = cls() + formatted_lines = [] + + for entry in log_entries: + try: + if isinstance(entry, str): + try: + entry = json.loads(entry) + except json.JSONDecodeError: + entry = {'event': entry, 'level': 'info'} + + parsed_entry = cls._parse_json_entry(entry) + + formatted_line = encoder.renderer(None, None, parsed_entry) + formatted_lines.append(formatted_line) + + except Exception as e: + LOG.error( + "Failed to format log entry", + entry=entry, + error=str(e), + exc_info=True + ) + # Add error line to output + error_timestamp = datetime.utcnow().isoformat() + "Z" + error_entry = { + 'timestamp': error_timestamp, + 'level': 'error', + 'event': 'Failed to format log entry', + 'entry': str(entry), + 'error': str(e) + } + formatted_lines.append(encoder.renderer(None, None, error_entry)) + + return "\n".join(formatted_lines) From ddbc3402f401b4fd867b17729f5a4a8267faa5a6 Mon Sep 17 00:00:00 2001 From: Shuchang Zheng Date: Thu, 5 Dec 2024 06:00:16 -0800 Subject: [PATCH 10/40] re-enable upload block (#1324) --- .../editor/panels/WorkflowNodeLibraryPanel.tsx | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/skyvern-frontend/src/routes/workflows/editor/panels/WorkflowNodeLibraryPanel.tsx b/skyvern-frontend/src/routes/workflows/editor/panels/WorkflowNodeLibraryPanel.tsx index 6de504a43..81d12c7b0 100644 --- a/skyvern-frontend/src/routes/workflows/editor/panels/WorkflowNodeLibraryPanel.tsx +++ b/skyvern-frontend/src/routes/workflows/editor/panels/WorkflowNodeLibraryPanel.tsx @@ -11,6 +11,7 @@ import { PlusIcon, StopwatchIcon, UpdateIcon, + UploadIcon, } from "@radix-ui/react-icons"; import { WorkflowBlockNode } from "../nodes"; import { AddNodeProps } from "../FlowRenderer"; @@ -93,12 +94,12 @@ const nodeLibraryItems: Array<{ // title: "Download Block", // description: "Downloads a file from S3", // }, - // { - // nodeType: "upload", - // icon: , - // title: "Upload Block", - // description: "Uploads a file to S3", - // }, + { + nodeType: "upload", + icon: , + title: "Upload Block", + description: "Uploads a file to S3", + }, { nodeType: "fileDownload", icon: , From f31332609a325e34dffc037cbb14c39ffe7a660a Mon Sep 17 00:00:00 2001 From: LawyZheng Date: Thu, 5 Dec 2024 22:29:24 +0800 Subject: [PATCH 11/40] remove no latest screenshot error log (#1325) --- skyvern/forge/agent.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py index 643bf7773..9811775e9 100644 --- a/skyvern/forge/agent.py +++ b/skyvern/forge/agent.py @@ -1640,12 +1640,6 @@ async def build_task_response( latest_action_screenshot_urls = await app.ARTIFACT_MANAGER.get_share_links( latest_action_screenshot_artifacts ) - else: - LOG.error( - "Failed to get latest action screenshots", - task_id=task.task_id, - task_status=task.status, - ) if task.organization_id: try: From 9b3565142087efc31eeca6cac14e16900859acab Mon Sep 17 00:00:00 2001 From: Shuchang Zheng Date: Thu, 5 Dec 2024 08:41:26 -0800 Subject: [PATCH 12/40] Put a guard in workflow save error detail (#1326) --- skyvern-frontend/src/routes/workflows/editor/FlowRenderer.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skyvern-frontend/src/routes/workflows/editor/FlowRenderer.tsx b/skyvern-frontend/src/routes/workflows/editor/FlowRenderer.tsx index 1cf5d5735..8597d71af 100644 --- a/skyvern-frontend/src/routes/workflows/editor/FlowRenderer.tsx +++ b/skyvern-frontend/src/routes/workflows/editor/FlowRenderer.tsx @@ -252,7 +252,7 @@ function FlowRenderer({ setHasChanges(false); }, onError: (error: AxiosError) => { - const detail = (error.response?.data as { detail?: string }).detail; + const detail = (error.response?.data as { detail?: string })?.detail; toast({ title: "Error", description: detail ? detail : error.message, From 2d90fedc53125e2d79f69c8759cb32652595b60b Mon Sep 17 00:00:00 2001 From: LawyZheng Date: Fri, 6 Dec 2024 00:53:57 +0800 Subject: [PATCH 13/40] urlencode download suffix (#1327) --- skyvern/forge/sdk/workflow/models/block.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/skyvern/forge/sdk/workflow/models/block.py b/skyvern/forge/sdk/workflow/models/block.py index d2f0a8ca0..98ded18cb 100644 --- a/skyvern/forge/sdk/workflow/models/block.py +++ b/skyvern/forge/sdk/workflow/models/block.py @@ -14,6 +14,7 @@ from enum import StrEnum from pathlib import Path from typing import Annotated, Any, Literal, Union +from urllib.parse import quote import filetype import structlog @@ -245,6 +246,8 @@ def format_potential_template_parameters(self, workflow_run_context: WorkflowRun self.download_suffix = self.format_block_parameter_template_from_workflow_run_context( self.download_suffix, workflow_run_context ) + # encode the suffix to prevent invalid path style + self.download_suffix = quote(string=self.download_suffix, safe="") if self.navigation_goal: self.navigation_goal = self.format_block_parameter_template_from_workflow_run_context( From be0e817b4f7857b9201a761429f9c68ac5279bd8 Mon Sep 17 00:00:00 2001 From: LawyZheng Date: Fri, 6 Dec 2024 02:25:13 +0800 Subject: [PATCH 14/40] wait for downloads to be done (#1328) --- skyvern/exceptions.py | 5 ++ skyvern/forge/agent.py | 9 +++ skyvern/webeye/actions/handler.py | 121 ++++++++++++++++++++++-------- 3 files changed, 104 insertions(+), 31 deletions(-) diff --git a/skyvern/exceptions.py b/skyvern/exceptions.py index 622bc0989..6a0634d80 100644 --- a/skyvern/exceptions.py +++ b/skyvern/exceptions.py @@ -250,6 +250,11 @@ def __init__(self, max_size: int) -> None: super().__init__(f"Download file size exceeded the maximum allowed size of {max_size} MB.") +class NoFileDownloadTriggered(SkyvernException): + def __init__(self, element_id: str) -> None: + super().__init__(f"Clicking on element doesn't trigger the file download. element_id={element_id}") + + class BitwardenBaseError(SkyvernException): def __init__(self, message: str) -> None: super().__init__(f"Bitwarden error: {message}") diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py index 9811775e9..f931edab9 100644 --- a/skyvern/forge/agent.py +++ b/skyvern/forge/agent.py @@ -333,6 +333,15 @@ async def execute_step( files_to_rename = list(set(list_files_after) - set(list_files_before)) for file in files_to_rename: file_extension = Path(file).suffix + if file_extension == ".crdownload": + LOG.warning( + "Detecting incompleted download file, skip the rename", + file=file, + task_id=task.task_id, + workflow_run_id=task.workflow_run_id, + ) + continue + random_file_id = "".join(random.choices(string.ascii_uppercase + string.digits, k=4)) random_file_name = f"download-{datetime.now().strftime('%Y%m%d%H%M%S%f')}-{random_file_id}" if task_block.download_suffix: diff --git a/skyvern/webeye/actions/handler.py b/skyvern/webeye/actions/handler.py index 6f8f333c6..3961bda21 100644 --- a/skyvern/webeye/actions/handler.py +++ b/skyvern/webeye/actions/handler.py @@ -5,6 +5,7 @@ import urllib.parse import uuid from datetime import datetime, timedelta, timezone +from pathlib import Path from typing import Any, Awaitable, Callable, List import pyotp @@ -13,7 +14,7 @@ from pydantic import BaseModel from skyvern.config import settings -from skyvern.constants import REPO_ROOT_DIR, SKYVERN_ID_ATTR +from skyvern.constants import BROWSER_DOWNLOAD_TIMEOUT, REPO_ROOT_DIR, SKYVERN_ID_ATTR from skyvern.exceptions import ( EmptySelect, ErrEmptyTweakValue, @@ -34,6 +35,7 @@ NoAutoCompleteOptionMeetCondition, NoAvailableOptionFoundForCustomSelection, NoElementMatchedForTargetOption, + NoFileDownloadTriggered, NoIncrementalElementFoundForAutoCompletion, NoIncrementalElementFoundForCustomSelection, NoSuitableAutoCompleteOption, @@ -42,11 +44,7 @@ ) from skyvern.forge import app from skyvern.forge.prompts import prompt_engine -from skyvern.forge.sdk.api.files import ( - download_file, - get_number_of_files_in_directory, - get_path_for_workflow_download_directory, -) +from skyvern.forge.sdk.api.files import download_file, get_download_dir, list_files_in_directory from skyvern.forge.sdk.core.aiohttp_helper import aiohttp_post from skyvern.forge.sdk.core.security import generate_skyvern_signature from skyvern.forge.sdk.db.enums import OrganizationAuthTokenType @@ -337,16 +335,6 @@ async def handle_click_action( task: Task, step: Step, ) -> list[ActionResult]: - num_downloaded_files_before = 0 - download_dir = None - if task.workflow_run_id: - download_dir = get_path_for_workflow_download_directory(task.workflow_run_id) - num_downloaded_files_before = get_number_of_files_in_directory(download_dir) - LOG.info( - "Number of files in download directory before click", - num_downloaded_files_before=num_downloaded_files_before, - download_dir=download_dir, - ) dom = DomUtil(scraped_page=scraped_page, page=page) skyvern_element = await dom.get_skyvern_element_by_id(action.element_id) await asyncio.sleep(0.3) @@ -363,7 +351,7 @@ async def handle_click_action( return [ActionFailure(InteractWithDisabledElement(skyvern_element.get_id()))] if action.download: - results = await handle_click_to_download_file_action(action, page, scraped_page, task) + results = await handle_click_to_download_file_action(action, page, scraped_page, task, step) else: results = await chain_click( task, @@ -374,18 +362,6 @@ async def handle_click_action( timeout=settings.BROWSER_ACTION_TIMEOUT_MS, ) - if results and task.workflow_run_id and download_dir: - LOG.info("Sleeping for 5 seconds to let the download finish") - await asyncio.sleep(5) - num_downloaded_files_after = get_number_of_files_in_directory(download_dir) - LOG.info( - "Number of files in download directory after click", - num_downloaded_files_after=num_downloaded_files_after, - download_dir=download_dir, - ) - if num_downloaded_files_after > num_downloaded_files_before: - results[-1].download_triggered = True - return results @@ -394,19 +370,102 @@ async def handle_click_to_download_file_action( page: Page, scraped_page: ScrapedPage, task: Task, + step: Step, ) -> list[ActionResult]: dom = DomUtil(scraped_page=scraped_page, page=page) skyvern_element = await dom.get_skyvern_element_by_id(action.element_id) locator = skyvern_element.locator + download_dir = Path(get_download_dir(workflow_run_id=task.workflow_run_id, task_id=task.task_id)) + list_files_before = list_files_in_directory(download_dir) + LOG.info( + "Number of files in download directory before click", + num_downloaded_files_before=len(list_files_before), + download_dir=download_dir, + task_id=task.task_id, + step_id=step.step_id, + workflow_run_id=task.workflow_run_id, + ) + try: await locator.click(timeout=settings.BROWSER_ACTION_TIMEOUT_MS) await page.wait_for_load_state(timeout=settings.BROWSER_LOADING_TIMEOUT_MS) except Exception as e: - LOG.exception("ClickAction with download failed", action=action, exc_info=True) + LOG.exception( + "ClickAction with download failed", + exc_info=True, + action=action, + task_id=task.task_id, + step_id=step.step_id, + workflow_run_id=task.workflow_run_id, + ) return [ActionFailure(e, download_triggered=False)] - return [ActionSuccess()] + # wait 5s to start downloading + LOG.info( + "Sleep for 5s to let download finish", + task_id=task.task_id, + step_id=step.step_id, + workflow_run_id=task.workflow_run_id, + ) + await asyncio.sleep(5) + list_files_after = list_files_in_directory(download_dir) + LOG.info( + "Number of files in download directory after click", + num_downloaded_files_after=len(list_files_after), + download_dir=download_dir, + task_id=task.task_id, + step_id=step.step_id, + workflow_run_id=task.workflow_run_id, + ) + + if len(list_files_after) <= len(list_files_before): + LOG.warning( + "No file to download after click", + task_id=task.task_id, + step_id=step.step_id, + workflow_run_id=task.workflow_run_id, + ) + return [ActionFailure(exception=NoFileDownloadTriggered(action.element_id))] + + # check if there's any file is still downloading + downloading_files: list[Path] = [] + for file in list_files_after: + path = Path(file) + if path.suffix == ".crdownload": + downloading_files.append(path) + + if len(downloading_files) == 0: + return [ActionSuccess(download_triggered=True)] + + LOG.info( + "File downloading hasn't completed, wait for a while", + downloading_files=downloading_files, + task_id=task.task_id, + step_id=step.step_id, + workflow_run_id=task.workflow_run_id, + ) + try: + async with asyncio.timeout(BROWSER_DOWNLOAD_TIMEOUT): + while len(downloading_files) > 0: + new_downloading_files: list[Path] = [] + for path in downloading_files: + if not path.exists(): + continue + new_downloading_files.append(path) + downloading_files = new_downloading_files + await asyncio.sleep(1) + + except asyncio.TimeoutError: + LOG.warning( + "There're several long-time downloading files, these files might be broken", + downloading_files=downloading_files, + task_id=task.task_id, + step_id=step.step_id, + workflow_run_id=task.workflow_run_id, + ) + + return [ActionSuccess(download_triggered=True)] async def handle_input_text_action( From 5ee8c3b8eae71e180733ae3ae3b4c801d03bea06 Mon Sep 17 00:00:00 2001 From: Shuchang Zheng Date: Thu, 5 Dec 2024 11:56:09 -0800 Subject: [PATCH 15/40] Skyvern Forms UI (#1330) --- skyvern-frontend/src/api/types.ts | 21 +-- .../src/components/NavLinkGroup.tsx | 87 ++++++++++ .../src/components/StatusFilterDropdown.tsx | 57 +++++++ .../src/components/icons/BagIcon.tsx | 26 +++ .../src/components/icons/GovernmentIcon.tsx | 26 +++ .../src/components/icons/ReceiptIcon.tsx | 26 +++ .../src/components/icons/ToolIcon.tsx | 26 +++ skyvern-frontend/src/router.tsx | 40 ++--- skyvern-frontend/src/routes/root/SideNav.tsx | 104 ++++-------- .../src/routes/root/SidebarContent.tsx | 2 +- .../routes/tasks/create/CreateNewTaskForm.tsx | 3 +- .../src/routes/tasks/create/PromptBox.tsx | 20 ++- .../routes/tasks/create/retry/RetryTask.tsx | 12 +- .../src/routes/tasks/detail/TaskDetails.tsx | 2 +- .../routes/tasks/detail/TaskParameters.tsx | 4 +- .../src/routes/tasks/list/TaskHistory.tsx | 155 ++++++++++++------ .../src/routes/tasks/list/TaskList.tsx | 59 ------- .../src/routes/tasks/list/TasksPage.tsx | 43 +++++ 18 files changed, 479 insertions(+), 234 deletions(-) create mode 100644 skyvern-frontend/src/components/NavLinkGroup.tsx create mode 100644 skyvern-frontend/src/components/StatusFilterDropdown.tsx create mode 100644 skyvern-frontend/src/components/icons/BagIcon.tsx create mode 100644 skyvern-frontend/src/components/icons/GovernmentIcon.tsx create mode 100644 skyvern-frontend/src/components/icons/ReceiptIcon.tsx create mode 100644 skyvern-frontend/src/components/icons/ToolIcon.tsx delete mode 100644 skyvern-frontend/src/routes/tasks/list/TaskList.tsx create mode 100644 skyvern-frontend/src/routes/tasks/list/TasksPage.tsx diff --git a/skyvern-frontend/src/api/types.ts b/skyvern-frontend/src/api/types.ts index 509c305e4..4238649e9 100644 --- a/skyvern-frontend/src/api/types.ts +++ b/skyvern-frontend/src/api/types.ts @@ -95,17 +95,18 @@ export type TaskApiResponse = { }; export type CreateTaskRequest = { - title: string | null; + title?: string | null; url: string; - webhook_callback_url: string | null; - navigation_goal: string | null; - data_extraction_goal: string | null; - navigation_payload: Record | string | null; - extracted_information_schema: Record | string | null; - error_code_mapping: Record | null; - proxy_location: ProxyLocation | null; - totp_verification_url: string | null; - totp_identifier: string | null; + webhook_callback_url?: string | null; + navigation_goal?: string | null; + data_extraction_goal?: string | null; + navigation_payload?: Record | string | null; + extracted_information_schema?: Record | string | null; + error_code_mapping?: Record | null; + proxy_location?: ProxyLocation | null; + totp_verification_url?: string | null; + totp_identifier?: string | null; + application?: string | null; }; export type User = { diff --git a/skyvern-frontend/src/components/NavLinkGroup.tsx b/skyvern-frontend/src/components/NavLinkGroup.tsx new file mode 100644 index 000000000..f65401b63 --- /dev/null +++ b/skyvern-frontend/src/components/NavLinkGroup.tsx @@ -0,0 +1,87 @@ +import { useSidebarStore } from "@/store/SidebarStore"; +import { cn } from "@/util/utils"; +import { NavLink, useMatches } from "react-router-dom"; +import { Badge } from "./ui/badge"; + +type Props = { + title: string; + links: Array<{ + label: string; + to: string; + disabled?: boolean; + icon?: React.ReactNode; + }>; +}; + +function NavLinkGroup({ title, links }: Props) { + const { collapsed } = useSidebarStore(); + const matches = useMatches(); + const groupIsActive = matches.some((match) => { + const inputs = links.map((link) => link.to); + return inputs.includes(match.pathname); + }); + + return ( +
+
+
+ {title} +
+
+
+ {links.map((link) => { + return ( + { + return cn( + "block rounded-lg py-2 pl-3 text-slate-400 hover:bg-muted hover:text-primary", + { + "bg-muted": isActive, + }, + { + "text-primary": groupIsActive, + "px-3": collapsed, + }, + ); + }} + > +
+
+ {link.icon} + {!collapsed && link.label} +
+ {!collapsed && link.disabled && ( + + Training + + )} +
+
+ ); + })} +
+
+ ); +} + +export { NavLinkGroup }; diff --git a/skyvern-frontend/src/components/StatusFilterDropdown.tsx b/skyvern-frontend/src/components/StatusFilterDropdown.tsx new file mode 100644 index 000000000..86a1649e7 --- /dev/null +++ b/skyvern-frontend/src/components/StatusFilterDropdown.tsx @@ -0,0 +1,57 @@ +import { ChevronDownIcon } from "@radix-ui/react-icons"; +import { Button } from "./ui/button"; +import { + DropdownMenu, + DropdownMenuContent, + DropdownMenuTrigger, +} from "./ui/dropdown-menu"; +import { Checkbox } from "./ui/checkbox"; +import { Status } from "@/api/types"; + +type Item = { + label: string; + value: Status; +}; + +type Props = { + options: Array; + values: Array; + onChange: (values: Array) => void; +}; + +function StatusFilterDropdown({ options, values, onChange }: Props) { + return ( + + + + + + {options.map((item) => { + return ( +
+ { + if (checked) { + onChange([...values, item.value]); + } else { + onChange(values.filter((value) => value !== item.value)); + } + }} + /> + +
+ ); + })} +
+
+ ); +} + +export { StatusFilterDropdown }; diff --git a/skyvern-frontend/src/components/icons/BagIcon.tsx b/skyvern-frontend/src/components/icons/BagIcon.tsx new file mode 100644 index 000000000..a519dcb6e --- /dev/null +++ b/skyvern-frontend/src/components/icons/BagIcon.tsx @@ -0,0 +1,26 @@ +type Props = { + className?: string; +}; + +function BagIcon({ className }: Props) { + return ( + + + + ); +} + +export { BagIcon }; diff --git a/skyvern-frontend/src/components/icons/GovernmentIcon.tsx b/skyvern-frontend/src/components/icons/GovernmentIcon.tsx new file mode 100644 index 000000000..9cf0517fd --- /dev/null +++ b/skyvern-frontend/src/components/icons/GovernmentIcon.tsx @@ -0,0 +1,26 @@ +type Props = { + className?: string; +}; + +function GovernmentIcon({ className }: Props) { + return ( + + + + ); +} + +export { GovernmentIcon }; diff --git a/skyvern-frontend/src/components/icons/ReceiptIcon.tsx b/skyvern-frontend/src/components/icons/ReceiptIcon.tsx new file mode 100644 index 000000000..ba08d1c71 --- /dev/null +++ b/skyvern-frontend/src/components/icons/ReceiptIcon.tsx @@ -0,0 +1,26 @@ +type Props = { + className?: string; +}; + +function ReceiptIcon({ className }: Props) { + return ( + + + + ); +} + +export { ReceiptIcon }; diff --git a/skyvern-frontend/src/components/icons/ToolIcon.tsx b/skyvern-frontend/src/components/icons/ToolIcon.tsx new file mode 100644 index 000000000..5f6b5a016 --- /dev/null +++ b/skyvern-frontend/src/components/icons/ToolIcon.tsx @@ -0,0 +1,26 @@ +type Props = { + className?: string; +}; + +function ToolIcon({ className }: Props) { + return ( + + + + ); +} + +export { ToolIcon }; diff --git a/skyvern-frontend/src/router.tsx b/skyvern-frontend/src/router.tsx index 7e03ce3b1..b426edbfd 100644 --- a/skyvern-frontend/src/router.tsx +++ b/skyvern-frontend/src/router.tsx @@ -4,15 +4,13 @@ import { Settings } from "./routes/settings/Settings"; import { SettingsPageLayout } from "./routes/settings/SettingsPageLayout"; import { TasksPageLayout } from "./routes/tasks/TasksPageLayout"; import { CreateNewTaskFormPage } from "./routes/tasks/create/CreateNewTaskFormPage"; -import { CreateNewTaskLayout } from "./routes/tasks/create/CreateNewTaskLayout"; -import { TaskTemplates } from "./routes/tasks/create/TaskTemplates"; import { RetryTask } from "./routes/tasks/create/retry/RetryTask"; import { StepArtifactsLayout } from "./routes/tasks/detail/StepArtifactsLayout"; import { TaskActions } from "./routes/tasks/detail/TaskActions"; import { TaskDetails } from "./routes/tasks/detail/TaskDetails"; import { TaskParameters } from "./routes/tasks/detail/TaskParameters"; import { TaskRecording } from "./routes/tasks/detail/TaskRecording"; -import { TaskList } from "./routes/tasks/list/TaskList"; +import { TasksPage } from "./routes/tasks/list/TasksPage"; import { WorkflowPage } from "./routes/workflows/WorkflowPage"; import { WorkflowRun } from "./routes/workflows/WorkflowRun"; import { WorkflowRunParameters } from "./routes/workflows/WorkflowRunParameters"; @@ -27,7 +25,7 @@ const router = createBrowserRouter([ children: [ { index: true, - element: , + element: , }, { path: "tasks", @@ -35,7 +33,21 @@ const router = createBrowserRouter([ children: [ { index: true, - element: , + element: , + }, + { + path: "create", + element: , + children: [ + { + path: ":template", + element: , + }, + { + path: "retry/:taskId", + element: , + }, + ], }, { path: ":taskId", @@ -65,24 +77,6 @@ const router = createBrowserRouter([ }, ], }, - { - path: "create", - element: , - children: [ - { - index: true, - element: , - }, - { - path: ":template", - element: , - }, - { - path: "retry/:taskId", - element: , - }, - ], - }, { path: "workflows", element: , diff --git a/skyvern-frontend/src/routes/root/SideNav.tsx b/skyvern-frontend/src/routes/root/SideNav.tsx index 97f8d0f51..9a6be3482 100644 --- a/skyvern-frontend/src/routes/root/SideNav.tsx +++ b/skyvern-frontend/src/routes/root/SideNav.tsx @@ -1,75 +1,43 @@ +import { RobotIcon } from "@/components/icons/RobotIcon"; +import { NavLinkGroup } from "@/components/NavLinkGroup"; +import { useSidebarStore } from "@/store/SidebarStore"; import { cn } from "@/util/utils"; -import { - GearIcon, - LightningBoltIcon, - ListBulletIcon, - PlusCircledIcon, -} from "@radix-ui/react-icons"; -import { NavLink } from "react-router-dom"; +import { GearIcon, LightningBoltIcon } from "@radix-ui/react-icons"; -type Props = { - collapsed: boolean; -}; +function SideNav() { + const { collapsed } = useSidebarStore(); -function SideNav({ collapsed }: Props) { return ( -
- +
>([ diff --git a/skyvern-frontend/src/routes/tasks/create/PromptBox.tsx b/skyvern-frontend/src/routes/tasks/create/PromptBox.tsx index e4c7ad87e..f3f8913e2 100644 --- a/skyvern-frontend/src/routes/tasks/create/PromptBox.tsx +++ b/skyvern-frontend/src/routes/tasks/create/PromptBox.tsx @@ -1,7 +1,6 @@ import { getClient } from "@/api/AxiosClient"; import { TaskGenerationApiResponse } from "@/api/types"; import img from "@/assets/promptBoxBg.png"; -import { BookIcon } from "@/components/icons/BookIcon"; import { CartIcon } from "@/components/icons/CartIcon"; import { GraphIcon } from "@/components/icons/GraphIcon"; import { InboxIcon } from "@/components/icons/InboxIcon"; @@ -16,6 +15,7 @@ import { GearIcon, PaperPlaneIcon, Pencil1Icon, + PlusIcon, ReloadIcon, } from "@radix-ui/react-icons"; import { useMutation, useQueryClient } from "@tanstack/react-query"; @@ -97,11 +97,6 @@ const exampleCases = [ label: "Search for AAPL on Google Finance", icon: , }, - { - key: "NYTBestseller", - label: "Get the top NYT bestseller", - icon: , - }, { key: "topRankedFootballTeam", label: "Get the top ranked football team", @@ -196,7 +191,7 @@ function PromptBox() { const taskGenerationResponse = await getTaskFromPromptMutation.mutateAsync(prompt); await saveTaskMutation.mutateAsync(taskGenerationResponse); - navigate("/create/from-prompt", { + navigate("/tasks/create/from-prompt", { state: { data: taskGenerationResponse, }, @@ -209,13 +204,22 @@ function PromptBox() {
+
{ + navigate("/tasks/create/blank"); + }} + > + + Build Your Own +
{exampleCases.map((example) => { return (
{ - navigate(`/create/${example.key}`); + navigate(`/tasks/create/${example.key}`); }} >
{example.icon}
diff --git a/skyvern-frontend/src/routes/tasks/create/retry/RetryTask.tsx b/skyvern-frontend/src/routes/tasks/create/retry/RetryTask.tsx index 4c11c4c80..0b5d0a572 100644 --- a/skyvern-frontend/src/routes/tasks/create/retry/RetryTask.tsx +++ b/skyvern-frontend/src/routes/tasks/create/retry/RetryTask.tsx @@ -22,12 +22,12 @@ function RetryTask() {
diff --git a/skyvern-frontend/src/routes/tasks/detail/TaskDetails.tsx b/skyvern-frontend/src/routes/tasks/detail/TaskDetails.tsx index 8541c86f1..148b154f1 100644 --- a/skyvern-frontend/src/routes/tasks/detail/TaskDetails.tsx +++ b/skyvern-frontend/src/routes/tasks/detail/TaskDetails.tsx @@ -242,7 +242,7 @@ function TaskDetails() { )} {taskHasTerminalState && ( - - - - Running Tasks - Tasks that are currently running - - -
- -
-
-
- - - Queued Tasks - Tasks that are waiting to run - - - - - - - - Task History - Tasks you have run previously - - - - - -
- ); -} - -export { TaskList }; diff --git a/skyvern-frontend/src/routes/tasks/list/TasksPage.tsx b/skyvern-frontend/src/routes/tasks/list/TasksPage.tsx new file mode 100644 index 000000000..0326a6474 --- /dev/null +++ b/skyvern-frontend/src/routes/tasks/list/TasksPage.tsx @@ -0,0 +1,43 @@ +import { TaskHistory } from "./TaskHistory"; +import { PromptBox } from "../create/PromptBox"; +import { useState } from "react"; +import { cn } from "@/util/utils"; +import { SavedTasks } from "../create/SavedTasks"; + +function TasksPage() { + const [view, setView] = useState<"history" | "myTasks">("history"); + + return ( +
+ +
+
setView("history")} + > + Run History +
+
setView("myTasks")} + > + My Tasks +
+
+ {view === "history" && } + {view === "myTasks" && } +
+ ); +} + +export { TasksPage }; From f8a6b58cf9a0bb655380ec2d5fd78f8abdafa28d Mon Sep 17 00:00:00 2001 From: Shuchang Zheng Date: Thu, 5 Dec 2024 12:09:42 -0800 Subject: [PATCH 16/40] Fix a navigation bug with saved tasks (#1331) --- skyvern-frontend/src/routes/tasks/create/SavedTaskCard.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skyvern-frontend/src/routes/tasks/create/SavedTaskCard.tsx b/skyvern-frontend/src/routes/tasks/create/SavedTaskCard.tsx index 0679a7bef..a9d828325 100644 --- a/skyvern-frontend/src/routes/tasks/create/SavedTaskCard.tsx +++ b/skyvern-frontend/src/routes/tasks/create/SavedTaskCard.tsx @@ -156,7 +156,7 @@ function SavedTaskCard({ workflowId, title, url, description }: Props) { }, )} onClick={() => { - navigate(workflowId); + navigate(`/tasks/create/${workflowId}`); }} > {description} From 8bf863c4cb9c38b85694cce68512554fa38ac09f Mon Sep 17 00:00:00 2001 From: Shuchang Zheng Date: Thu, 5 Dec 2024 17:14:05 -0800 Subject: [PATCH 17/40] workflow run block (#1332) --- ...254717601_introduce_workflow_run_blocks.py | 64 +++++++++++++++++++ skyvern/forge/sdk/db/id.py | 6 ++ skyvern/forge/sdk/db/models.py | 22 +++++++ 3 files changed, 92 insertions(+) create mode 100644 alembic/versions/2024_12_06_0113-de0254717601_introduce_workflow_run_blocks.py diff --git a/alembic/versions/2024_12_06_0113-de0254717601_introduce_workflow_run_blocks.py b/alembic/versions/2024_12_06_0113-de0254717601_introduce_workflow_run_blocks.py new file mode 100644 index 000000000..ec730ec98 --- /dev/null +++ b/alembic/versions/2024_12_06_0113-de0254717601_introduce_workflow_run_blocks.py @@ -0,0 +1,64 @@ +"""Introduce workflow_run_blocks + +Revision ID: de0254717601 +Revises: db41106b9f1a +Create Date: 2024-12-06 01:13:07.932965+00:00 + +""" + +from typing import Sequence, Union + +import sqlalchemy as sa + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "de0254717601" +down_revision: Union[str, None] = "db41106b9f1a" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "workflow_run_blocks", + sa.Column("workflow_run_block_id", sa.String(), nullable=False), + sa.Column("workflow_run_id", sa.String(), nullable=False), + sa.Column("parent_workflow_run_block_id", sa.String(), nullable=True), + sa.Column("organization_id", sa.String(), nullable=True), + sa.Column("task_id", sa.String(), nullable=True), + sa.Column("label", sa.String(), nullable=True), + sa.Column("block_type", sa.String(), nullable=False), + sa.Column("status", sa.String(), nullable=False), + sa.Column("output", sa.JSON(), nullable=True), + sa.Column("continue_on_failure", sa.Boolean(), nullable=False), + sa.Column("created_at", sa.DateTime(), nullable=False), + sa.Column("modified_at", sa.DateTime(), nullable=False), + sa.ForeignKeyConstraint( + ["organization_id"], + ["organizations.organization_id"], + ), + sa.ForeignKeyConstraint( + ["parent_workflow_run_block_id"], + ["workflow_run_blocks.workflow_run_block_id"], + ), + sa.ForeignKeyConstraint( + ["task_id"], + ["tasks.task_id"], + ), + sa.ForeignKeyConstraint( + ["workflow_run_id"], + ["workflow_runs.workflow_run_id"], + ), + sa.PrimaryKeyConstraint("workflow_run_block_id"), + ) + op.create_index("wfrb_org_wfr_index", "workflow_run_blocks", ["organization_id", "workflow_run_id"], unique=False) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index("wfrb_org_wfr_index", table_name="workflow_run_blocks") + op.drop_table("workflow_run_blocks") + # ### end Alembic commands ### diff --git a/skyvern/forge/sdk/db/id.py b/skyvern/forge/sdk/db/id.py index 36efab2a1..da992cf53 100644 --- a/skyvern/forge/sdk/db/id.py +++ b/skyvern/forge/sdk/db/id.py @@ -36,6 +36,7 @@ WORKFLOW_PREFIX = "w" WORKFLOW_PERMANENT_ID_PREFIX = "wpid" WORKFLOW_RUN_PREFIX = "wr" +WORKFLOW_RUN_BLOCK_PREFIX = "wrb" WORKFLOW_PARAMETER_PREFIX = "wp" AWS_SECRET_PARAMETER_PREFIX = "asp" OUTPUT_PARAMETER_PREFIX = "op" @@ -55,6 +56,11 @@ def generate_workflow_permanent_id() -> str: return f"{WORKFLOW_PERMANENT_ID_PREFIX}_{int_id}" +def generate_workflow_run_block_id() -> str: + int_id = generate_id() + return f"{WORKFLOW_RUN_BLOCK_PREFIX}_{int_id}" + + def generate_workflow_run_id() -> str: int_id = generate_id() return f"{WORKFLOW_RUN_PREFIX}_{int_id}" diff --git a/skyvern/forge/sdk/db/models.py b/skyvern/forge/sdk/db/models.py index 1c1d3b68f..6c61db955 100644 --- a/skyvern/forge/sdk/db/models.py +++ b/skyvern/forge/sdk/db/models.py @@ -35,6 +35,7 @@ generate_workflow_id, generate_workflow_parameter_id, generate_workflow_permanent_id, + generate_workflow_run_block_id, generate_workflow_run_id, ) from skyvern.forge.sdk.schemas.tasks import ProxyLocation @@ -473,3 +474,24 @@ class ActionModel(Base): created_at = Column(DateTime, default=datetime.datetime.utcnow, nullable=False) modified_at = Column(DateTime, default=datetime.datetime.utcnow, onupdate=datetime.datetime.utcnow, nullable=False) + + +class WorkflowRunBlockModel(Base): + __tablename__ = "workflow_run_blocks" + __table_args__ = (Index("wfrb_org_wfr_index", "organization_id", "workflow_run_id"),) + + workflow_run_block_id = Column(String, primary_key=True, default=generate_workflow_run_block_id) + workflow_run_id = Column(String, ForeignKey("workflow_runs.workflow_run_id"), nullable=False) + parent_workflow_run_block_id = Column( + String, ForeignKey("workflow_run_blocks.workflow_run_block_id"), nullable=True + ) + organization_id = Column(String, ForeignKey("organizations.organization_id"), nullable=True) + task_id = Column(String, ForeignKey("tasks.task_id"), nullable=True) + label = Column(String, nullable=True) + block_type = Column(String, nullable=False) + status = Column(String, nullable=False) + output = Column(JSON, nullable=True) + continue_on_failure = Column(Boolean, nullable=False, default=False) + + created_at = Column(DateTime, default=datetime.datetime.utcnow, nullable=False) + modified_at = Column(DateTime, default=datetime.datetime.utcnow, onupdate=datetime.datetime.utcnow, nullable=False) From 5d32d53892c398aa63bc9f6adf35df55dcb7419f Mon Sep 17 00:00:00 2001 From: LawyZheng Date: Fri, 6 Dec 2024 11:35:32 +0800 Subject: [PATCH 18/40] forloop metadata variables (#1334) --- skyvern/forge/sdk/workflow/context_manager.py | 15 ++++++++++ skyvern/forge/sdk/workflow/models/block.py | 29 ++++++++++++------- 2 files changed, 34 insertions(+), 10 deletions(-) diff --git a/skyvern/forge/sdk/workflow/context_manager.py b/skyvern/forge/sdk/workflow/context_manager.py index 7b0eabab9..5c0429724 100644 --- a/skyvern/forge/sdk/workflow/context_manager.py +++ b/skyvern/forge/sdk/workflow/context_manager.py @@ -25,6 +25,9 @@ LOG = structlog.get_logger() +BlockMetadata = dict[str, str | int | float | bool | dict | list] + + class WorkflowRunContext: parameters: dict[str, PARAMETER_TYPE] values: dict[str, Any] @@ -36,9 +39,12 @@ def __init__( workflow_output_parameters: list[OutputParameter], context_parameters: list[ContextParameter], ) -> None: + # key is label name + self.blocks_metadata: dict[str, BlockMetadata] = {} self.parameters = {} self.values = {} self.secrets = {} + for parameter, run_parameter in workflow_parameter_tuples: if parameter.key in self.parameters: prev_value = self.parameters[parameter.key] @@ -81,6 +87,15 @@ def has_value(self, key: str) -> bool: def set_value(self, key: str, value: Any) -> None: self.values[key] = value + def update_block_metadata(self, label: str, metadata: BlockMetadata) -> None: + if label in self.blocks_metadata: + self.blocks_metadata[label].update(metadata) + return + self.blocks_metadata[label] = metadata + + def get_block_metadata(self, label: str) -> BlockMetadata: + return self.blocks_metadata.get(label, BlockMetadata()) + def get_original_secret_value_or_none(self, secret_id_or_value: Any) -> Any: """ Get the original secret value from the secrets dict. If the secret id is not found, return None. diff --git a/skyvern/forge/sdk/workflow/models/block.py b/skyvern/forge/sdk/workflow/models/block.py index 98ded18cb..4b011fb6a 100644 --- a/skyvern/forge/sdk/workflow/models/block.py +++ b/skyvern/forge/sdk/workflow/models/block.py @@ -45,7 +45,7 @@ from skyvern.forge.sdk.api.llm.api_handler_factory import LLMAPIHandlerFactory from skyvern.forge.sdk.db.enums import TaskType from skyvern.forge.sdk.schemas.tasks import Task, TaskOutput, TaskStatus -from skyvern.forge.sdk.workflow.context_manager import WorkflowRunContext +from skyvern.forge.sdk.workflow.context_manager import BlockMetadata, WorkflowRunContext from skyvern.forge.sdk.workflow.exceptions import ( InvalidEmailClientConfiguration, InvalidFileType, @@ -140,6 +140,17 @@ def build_block_result( status=status, ) + def format_block_parameter_template_from_workflow_run_context( + self, potential_template: str, workflow_run_context: WorkflowRunContext + ) -> str: + if not potential_template: + return potential_template + template = Template(potential_template) + + template_data = workflow_run_context.values.copy() + template_data[self.label] = workflow_run_context.get_block_metadata(self.label) + return template.render(template_data) + @classmethod def get_subclasses(cls) -> tuple[type["Block"], ...]: return tuple(cls.__subclasses__()) @@ -152,15 +163,6 @@ def get_workflow_run_context(workflow_run_id: str) -> WorkflowRunContext: def get_async_aws_client() -> AsyncAWSClient: return app.WORKFLOW_CONTEXT_MANAGER.aws_client - @staticmethod - def format_block_parameter_template_from_workflow_run_context( - potential_template: str, workflow_run_context: WorkflowRunContext - ) -> str: - if not potential_template: - return potential_template - template = Template(potential_template) - return template.render(workflow_run_context.values) - @abc.abstractmethod async def execute(self, workflow_run_id: str, **kwargs: dict) -> BlockResult: pass @@ -659,8 +661,15 @@ async def execute_loop_helper( context_parameters_with_value = self.get_loop_block_context_parameters(workflow_run_id, loop_over_value) for context_parameter in context_parameters_with_value: workflow_run_context.set_value(context_parameter.key, context_parameter.value) + each_loop_output_values: list[dict[str, Any]] = [] for block_idx, loop_block in enumerate(self.loop_blocks): + metadata: BlockMetadata = { + "current_index": loop_idx, + "current_value": loop_over_value, + } + workflow_run_context.update_block_metadata(loop_block.label, metadata) + original_loop_block = loop_block loop_block = loop_block.copy() current_block = loop_block From 3c37a4aae88e6c1634d7b0f604328466ba6dc53a Mon Sep 17 00:00:00 2001 From: LawyZheng Date: Fri, 6 Dec 2024 11:56:12 +0800 Subject: [PATCH 19/40] auto prepend scheme to url (#1335) --- skyvern/exceptions.py | 6 ++--- skyvern/forge/agent.py | 10 ++++--- skyvern/forge/sdk/core/validators.py | 21 +++++++++++++++ skyvern/forge/sdk/schemas/tasks.py | 26 +++++++++++++------ skyvern/forge/sdk/workflow/models/workflow.py | 25 +++++++++++++++++- 5 files changed, 72 insertions(+), 16 deletions(-) diff --git a/skyvern/exceptions.py b/skyvern/exceptions.py index 6a0634d80..96e7f8000 100644 --- a/skyvern/exceptions.py +++ b/skyvern/exceptions.py @@ -504,11 +504,9 @@ def __init__(self, message: str) -> None: super().__init__(message) -class InvalidUrl(SkyvernHTTPException): +class InvalidUrl(SkyvernException): def __init__(self, url: str) -> None: - super().__init__( - f"Invalid URL: {url}. Skyvern supports HTTP and HTTPS urls.", status_code=status.HTTP_400_BAD_REQUEST - ) + super().__init__(f"Invalid URL: {url}. Skyvern supports HTTP and HTTPS urls with max 2083 character length.") class BlockedHost(SkyvernHTTPException): diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py index f931edab9..2dd628fac 100644 --- a/skyvern/forge/agent.py +++ b/skyvern/forge/agent.py @@ -48,7 +48,7 @@ from skyvern.forge.sdk.artifact.models import ArtifactType from skyvern.forge.sdk.core import skyvern_context from skyvern.forge.sdk.core.security import generate_skyvern_signature -from skyvern.forge.sdk.core.validators import validate_url +from skyvern.forge.sdk.core.validators import prepend_scheme_and_validate_url from skyvern.forge.sdk.db.enums import TaskType from skyvern.forge.sdk.models import Organization, Step, StepStatus from skyvern.forge.sdk.schemas.tasks import Task, TaskRequest, TaskResponse, TaskStatus @@ -141,7 +141,11 @@ async def create_task_and_step_from_block( task_url = working_page.url - task_url = validate_url(task_url) + task_url = prepend_scheme_and_validate_url(task_url) + totp_verification_url = task_block.totp_verification_url + if totp_verification_url: + totp_verification_url = prepend_scheme_and_validate_url(totp_verification_url) + task = await app.DATABASE.create_task( url=task_url, task_type=task_block.task_type, @@ -149,7 +153,7 @@ async def create_task_and_step_from_block( terminate_criterion=task_block.terminate_criterion, title=task_block.title or task_block.label, webhook_callback_url=None, - totp_verification_url=task_block.totp_verification_url, + totp_verification_url=totp_verification_url, totp_identifier=task_block.totp_identifier, navigation_goal=task_block.navigation_goal, data_extraction_goal=task_block.data_extraction_goal, diff --git a/skyvern/forge/sdk/core/validators.py b/skyvern/forge/sdk/core/validators.py index 54f47210d..7da7c9903 100644 --- a/skyvern/forge/sdk/core/validators.py +++ b/skyvern/forge/sdk/core/validators.py @@ -1,4 +1,5 @@ import ipaddress +from urllib.parse import urlparse from pydantic import HttpUrl, ValidationError, parse_obj_as @@ -6,6 +7,26 @@ from skyvern.exceptions import InvalidUrl +def prepend_scheme_and_validate_url(url: str) -> str: + if not url: + return url + + parsed_url = urlparse(url=url) + if parsed_url.scheme and parsed_url.scheme not in ["http", "https"]: + raise InvalidUrl(url=url) + + # if url doesn't contain any scheme, we prepend `https` to it by default + if not parsed_url.scheme: + url = f"https://{url}" + + try: + HttpUrl(url) + except ValidationError: + raise InvalidUrl(url=url) + + return url + + def validate_url(url: str) -> str: try: if url: diff --git a/skyvern/forge/sdk/schemas/tasks.py b/skyvern/forge/sdk/schemas/tasks.py index 17654658a..a71e21695 100644 --- a/skyvern/forge/sdk/schemas/tasks.py +++ b/skyvern/forge/sdk/schemas/tasks.py @@ -4,10 +4,11 @@ from enum import StrEnum from typing import Any +from fastapi import status from pydantic import BaseModel, Field, HttpUrl, field_validator -from skyvern.exceptions import BlockedHost, InvalidTaskStatusTransition, TaskAlreadyCanceled -from skyvern.forge.sdk.core.validators import is_blocked_host +from skyvern.exceptions import BlockedHost, InvalidTaskStatusTransition, SkyvernHTTPException, TaskAlreadyCanceled +from skyvern.forge.sdk.core.validators import is_blocked_host, prepend_scheme_and_validate_url from skyvern.forge.sdk.db.enums import TaskType @@ -99,28 +100,37 @@ class TaskBase(BaseModel): class TaskRequest(TaskBase): - url: HttpUrl = Field( + url: str = Field( ..., description="Starting URL for the task.", examples=["https://www.geico.com"], ) - webhook_callback_url: HttpUrl | None = Field( + webhook_callback_url: str | None = Field( default=None, description="The URL to call when the task is completed.", examples=["https://my-webhook.com"], ) - totp_verification_url: HttpUrl | None = None + totp_verification_url: str | None = None @field_validator("url", "webhook_callback_url", "totp_verification_url") @classmethod - def validate_urls(cls, v: HttpUrl | None) -> HttpUrl | None: - if not v or not v.host: + def validate_urls(cls, url: str | None) -> str | None: + if url is None: + return None + + try: + url = prepend_scheme_and_validate_url(url=url) + v = HttpUrl(url=url) + except Exception as e: + raise SkyvernHTTPException(message=str(e), status_code=status.HTTP_400_BAD_REQUEST) + + if not v.host: return None host = v.host blocked = is_blocked_host(host) if blocked: raise BlockedHost(host=host) - return v + return str(v) class TaskStatus(StrEnum): diff --git a/skyvern/forge/sdk/workflow/models/workflow.py b/skyvern/forge/sdk/workflow/models/workflow.py index d8d1dc0ac..bf5633900 100644 --- a/skyvern/forge/sdk/workflow/models/workflow.py +++ b/skyvern/forge/sdk/workflow/models/workflow.py @@ -2,8 +2,11 @@ from enum import StrEnum from typing import Any, List -from pydantic import BaseModel +from fastapi import status +from pydantic import BaseModel, HttpUrl, field_validator +from skyvern.exceptions import BlockedHost, SkyvernHTTPException +from skyvern.forge.sdk.core.validators import is_blocked_host, prepend_scheme_and_validate_url from skyvern.forge.sdk.schemas.tasks import ProxyLocation from skyvern.forge.sdk.workflow.exceptions import WorkflowDefinitionHasDuplicateBlockLabels from skyvern.forge.sdk.workflow.models.block import BlockTypeVar @@ -17,6 +20,26 @@ class WorkflowRequestBody(BaseModel): totp_verification_url: str | None = None totp_identifier: str | None = None + @field_validator("webhook_callback_url", "totp_verification_url") + @classmethod + def validate_urls(cls, url: str | None) -> str | None: + if url is None: + return None + + try: + url = prepend_scheme_and_validate_url(url=url) + v = HttpUrl(url=url) + except Exception as e: + raise SkyvernHTTPException(message=str(e), status_code=status.HTTP_400_BAD_REQUEST) + + if not v.host: + return None + host = v.host + blocked = is_blocked_host(host) + if blocked: + raise BlockedHost(host=host) + return str(v) + class RunWorkflowResponse(BaseModel): workflow_id: str From 1aa11461101df37d51f4bbedc2418bd68dd358fc Mon Sep 17 00:00:00 2001 From: Nick Fisher Date: Fri, 6 Dec 2024 12:28:54 +0800 Subject: [PATCH 20/40] rename GEMINI_FLUSH->GEMINI_FLASH (#1333) --- README.md | 4 ++-- skyvern/forge/sdk/api/llm/config_registry.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 17c6a8750..8a1b1167e 100644 --- a/README.md +++ b/README.md @@ -285,8 +285,8 @@ More extensive documentation can be found on our [documentation website](https:/ | `ENABLE_AZURE` | Register Azure OpenAI models | Boolean | `true`, `false` | | `ENABLE_BEDROCK` | Register AWS Bedrock models. To use AWS Bedrock, you need to make sure your [AWS configurations](https://github.com/boto/boto3?tab=readme-ov-file#using-boto3) are set up correctly first. | Boolean | `true`, `false` | | `ENABLE_GEMINI` | Register Gemini models| Boolean | `true`, `false` | -| `LLM_KEY` | The name of the model you want to use | String | Currently supported llm keys: `OPENAI_GPT4_TURBO`, `OPENAI_GPT4V`, `OPENAI_GPT4O`, `OPENAI_GPT4O_MINI`, `ANTHROPIC_CLAUDE3`, `ANTHROPIC_CLAUDE3_OPUS`, `ANTHROPIC_CLAUDE3_SONNET`, `ANTHROPIC_CLAUDE3_HAIKU`, `ANTHROPIC_CLAUDE3.5_SONNET`, `BEDROCK_ANTHROPIC_CLAUDE3_OPUS`, `BEDROCK_ANTHROPIC_CLAUDE3_SONNET`, `BEDROCK_ANTHROPIC_CLAUDE3_HAIKU`, `BEDROCK_ANTHROPIC_CLAUDE3.5_SONNET`, `AZURE_OPENAI`, `GEMINI_PRO`, `GEMINI_FLUSH`, `BEDROCK_AMAZON_NOVA_PRO`, `BEDROCK_AMAZON_NOVA_LITE`| -| `SECONDARY_LLM_KEY` | The name of the model for mini agents skyvern runs with | String | Currently supported llm keys: `OPENAI_GPT4_TURBO`, `OPENAI_GPT4V`, `OPENAI_GPT4O`, `OPENAI_GPT4O_MINI`, `ANTHROPIC_CLAUDE3`, `ANTHROPIC_CLAUDE3_OPUS`, `ANTHROPIC_CLAUDE3_SONNET`, `ANTHROPIC_CLAUDE3_HAIKU`, `ANTHROPIC_CLAUDE3.5_SONNET`, `BEDROCK_ANTHROPIC_CLAUDE3_OPUS`, `BEDROCK_ANTHROPIC_CLAUDE3_SONNET`, `BEDROCK_ANTHROPIC_CLAUDE3_HAIKU`, `BEDROCK_ANTHROPIC_CLAUDE3.5_SONNET`, `AZURE_OPENAI`, `GEMINI_PRO`, `GEMINI_FLUSH`| +| `LLM_KEY` | The name of the model you want to use | String | Currently supported llm keys: `OPENAI_GPT4_TURBO`, `OPENAI_GPT4V`, `OPENAI_GPT4O`, `OPENAI_GPT4O_MINI`, `ANTHROPIC_CLAUDE3`, `ANTHROPIC_CLAUDE3_OPUS`, `ANTHROPIC_CLAUDE3_SONNET`, `ANTHROPIC_CLAUDE3_HAIKU`, `ANTHROPIC_CLAUDE3.5_SONNET`, `BEDROCK_ANTHROPIC_CLAUDE3_OPUS`, `BEDROCK_ANTHROPIC_CLAUDE3_SONNET`, `BEDROCK_ANTHROPIC_CLAUDE3_HAIKU`, `BEDROCK_ANTHROPIC_CLAUDE3.5_SONNET`, `AZURE_OPENAI`, `GEMINI_PRO`, `GEMINI_FLASH`, `BEDROCK_AMAZON_NOVA_PRO`, `BEDROCK_AMAZON_NOVA_LITE`| +| `SECONDARY_LLM_KEY` | The name of the model for mini agents skyvern runs with | String | Currently supported llm keys: `OPENAI_GPT4_TURBO`, `OPENAI_GPT4V`, `OPENAI_GPT4O`, `OPENAI_GPT4O_MINI`, `ANTHROPIC_CLAUDE3`, `ANTHROPIC_CLAUDE3_OPUS`, `ANTHROPIC_CLAUDE3_SONNET`, `ANTHROPIC_CLAUDE3_HAIKU`, `ANTHROPIC_CLAUDE3.5_SONNET`, `BEDROCK_ANTHROPIC_CLAUDE3_OPUS`, `BEDROCK_ANTHROPIC_CLAUDE3_SONNET`, `BEDROCK_ANTHROPIC_CLAUDE3_HAIKU`, `BEDROCK_ANTHROPIC_CLAUDE3.5_SONNET`, `AZURE_OPENAI`, `GEMINI_PRO`, `GEMINI_FLASH`| | `OPENAI_API_KEY` | OpenAI API Key | String | `sk-1234567890` | | `OPENAI_API_BASE` | OpenAI API Base, optional | String | `https://openai.api.base` | | `OPENAI_ORGANIZATION` | OpenAI Organization ID, optional | String | `your-org-id` | diff --git a/skyvern/forge/sdk/api/llm/config_registry.py b/skyvern/forge/sdk/api/llm/config_registry.py index 966744480..46f68696c 100644 --- a/skyvern/forge/sdk/api/llm/config_registry.py +++ b/skyvern/forge/sdk/api/llm/config_registry.py @@ -278,7 +278,7 @@ def get_config(cls, llm_key: str) -> LLMRouterConfig | LLMConfig: ), ) LLMConfigRegistry.register_config( - "GEMINI_FLUSH", + "GEMINI_FLASH", LLMConfig( "gemini/gemini-1.5-flash", ["GEMINI_API_KEY"], From f3bb2fe35bb9804fab717c623f3eb3c804ace483 Mon Sep 17 00:00:00 2001 From: LawyZheng Date: Fri, 6 Dec 2024 12:53:35 +0800 Subject: [PATCH 21/40] bump navigation max retry to 5 (#1336) --- skyvern/constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skyvern/constants.py b/skyvern/constants.py index 5bbe74558..54319b388 100644 --- a/skyvern/constants.py +++ b/skyvern/constants.py @@ -14,7 +14,7 @@ DOWNLOAD_FILE_PREFIX = "downloads" SAVE_DOWNLOADED_FILES_TIMEOUT = 180 GET_DOWNLOADED_FILES_TIMEOUT = 30 -NAVIGATION_MAX_RETRY_TIME = 3 +NAVIGATION_MAX_RETRY_TIME = 5 # reserved fields for navigation payload SPECIAL_FIELD_VERIFICATION_CODE = "verification_code" From 6606fbacb03ef9f0843e708276fbaaf0a6b05c0f Mon Sep 17 00:00:00 2001 From: Shuchang Zheng Date: Thu, 5 Dec 2024 23:16:41 -0800 Subject: [PATCH 22/40] add workflow_run_id column to artifacts + ObserverCruise and ObserverThought (#1298) --- ...96d9557da_introduce_observercruise_and_.py | 91 +++++++++++++++++++ skyvern/forge/sdk/db/id.py | 12 +++ skyvern/forge/sdk/db/models.py | 31 ++++++- 3 files changed, 133 insertions(+), 1 deletion(-) create mode 100644 alembic/versions/2024_12_06_0540-3c196d9557da_introduce_observercruise_and_.py diff --git a/alembic/versions/2024_12_06_0540-3c196d9557da_introduce_observercruise_and_.py b/alembic/versions/2024_12_06_0540-3c196d9557da_introduce_observercruise_and_.py new file mode 100644 index 000000000..53ea9a038 --- /dev/null +++ b/alembic/versions/2024_12_06_0540-3c196d9557da_introduce_observercruise_and_.py @@ -0,0 +1,91 @@ +"""Introduce ObserverCruise and ObserverThought. Add workflow_run_block_id to artifacts + +Revision ID: 3c196d9557da +Revises: de0254717601 +Create Date: 2024-12-06 05:40:10.408358+00:00 + +""" + +from typing import Sequence, Union + +import sqlalchemy as sa + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "3c196d9557da" +down_revision: Union[str, None] = "de0254717601" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "observer_cruises", + sa.Column("observer_cruise_id", sa.String(), nullable=False), + sa.Column("status", sa.String(), nullable=False), + sa.Column("organization_id", sa.String(), nullable=True), + sa.Column("workflow_run_id", sa.String(), nullable=True), + sa.Column("workflow_id", sa.String(), nullable=True), + sa.ForeignKeyConstraint( + ["organization_id"], + ["organizations.organization_id"], + ), + sa.ForeignKeyConstraint( + ["workflow_id"], + ["workflows.workflow_id"], + ), + sa.ForeignKeyConstraint( + ["workflow_run_id"], + ["workflow_runs.workflow_run_id"], + ), + sa.PrimaryKeyConstraint("observer_cruise_id"), + ) + op.create_table( + "observer_thoughts", + sa.Column("observer_thought_id", sa.String(), nullable=False), + sa.Column("organization_id", sa.String(), nullable=True), + sa.Column("observer_cruise_id", sa.String(), nullable=False), + sa.Column("workflow_run_id", sa.String(), nullable=True), + sa.Column("workflow_id", sa.String(), nullable=True), + sa.Column("thought", sa.String(), nullable=True), + sa.Column("answer", sa.String(), nullable=True), + sa.ForeignKeyConstraint( + ["observer_cruise_id"], + ["observer_cruises.observer_cruise_id"], + ), + sa.ForeignKeyConstraint( + ["organization_id"], + ["organizations.organization_id"], + ), + sa.ForeignKeyConstraint( + ["workflow_id"], + ["workflows.workflow_id"], + ), + sa.ForeignKeyConstraint( + ["workflow_run_id"], + ["workflow_runs.workflow_run_id"], + ), + sa.PrimaryKeyConstraint("observer_thought_id"), + ) + op.add_column("artifacts", sa.Column("workflow_run_id", sa.String(), nullable=True)) + op.add_column("artifacts", sa.Column("workflow_run_block_id", sa.String(), nullable=True)) + op.create_index("org_workflow_run_index", "artifacts", ["organization_id", "workflow_run_id"], unique=False) + op.create_foreign_key(None, "artifacts", "workflow_runs", ["workflow_run_id"], ["workflow_run_id"]) + op.create_foreign_key( + None, "artifacts", "workflow_run_blocks", ["workflow_run_block_id"], ["workflow_run_block_id"] + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_constraint(None, "artifacts", type_="foreignkey") + op.drop_constraint(None, "artifacts", type_="foreignkey") + op.drop_index("org_workflow_run_index", table_name="artifacts") + op.drop_column("artifacts", "workflow_run_block_id") + op.drop_column("artifacts", "workflow_run_id") + op.drop_table("observer_thoughts") + op.drop_table("observer_cruises") + # ### end Alembic commands ### diff --git a/skyvern/forge/sdk/db/id.py b/skyvern/forge/sdk/db/id.py index da992cf53..a27242ab4 100644 --- a/skyvern/forge/sdk/db/id.py +++ b/skyvern/forge/sdk/db/id.py @@ -44,6 +44,8 @@ BITWARDEN_SENSITIVE_INFORMATION_PARAMETER_PREFIX = "bsi" BITWARDEN_CREDIT_CARD_DATA_PARAMETER_PREFIX = "bccd" TASK_GENERATION_PREFIX = "tg" +OBSERVER_CRUISE_ID = "oc" +OBSERVER_THOUGHT_ID = "ot" def generate_workflow_id() -> str: @@ -141,6 +143,16 @@ def generate_action_id() -> str: return f"a_{int_id}" +def generate_observer_cruise_id() -> str: + int_id = generate_id() + return f"{OBSERVER_CRUISE_ID}_{int_id}" + + +def generate_observer_thought_id() -> str: + int_id = generate_id() + return f"{OBSERVER_THOUGHT_ID}_{int_id}" + + def generate_id() -> int: """ generate a 64-bit int ID diff --git a/skyvern/forge/sdk/db/models.py b/skyvern/forge/sdk/db/models.py index 6c61db955..4dd2b2ef4 100644 --- a/skyvern/forge/sdk/db/models.py +++ b/skyvern/forge/sdk/db/models.py @@ -25,6 +25,8 @@ generate_bitwarden_credit_card_data_parameter_id, generate_bitwarden_login_credential_parameter_id, generate_bitwarden_sensitive_information_parameter_id, + generate_observer_cruise_id, + generate_observer_thought_id, generate_org_id, generate_organization_auth_token_id, generate_output_parameter_id, @@ -157,10 +159,15 @@ class OrganizationAuthTokenModel(Base): class ArtifactModel(Base): __tablename__ = "artifacts" - __table_args__ = (Index("org_task_step_index", "organization_id", "task_id", "step_id"),) + __table_args__ = ( + Index("org_task_step_index", "organization_id", "task_id", "step_id"), + Index("org_workflow_run_index", "organization_id", "workflow_run_id"), + ) artifact_id = Column(String, primary_key=True, index=True, default=generate_artifact_id) organization_id = Column(String, ForeignKey("organizations.organization_id")) + workflow_run_id = Column(String, ForeignKey("workflow_runs.workflow_run_id")) + workflow_run_block_id = Column(String, ForeignKey("workflow_run_blocks.workflow_run_block_id")) task_id = Column(String, ForeignKey("tasks.task_id")) step_id = Column(String, ForeignKey("steps.step_id"), index=True) artifact_type = Column(String) @@ -495,3 +502,25 @@ class WorkflowRunBlockModel(Base): created_at = Column(DateTime, default=datetime.datetime.utcnow, nullable=False) modified_at = Column(DateTime, default=datetime.datetime.utcnow, onupdate=datetime.datetime.utcnow, nullable=False) + + +class ObserverCruise(Base): + __tablename__ = "observer_cruises" + + observer_cruise_id = Column(String, primary_key=True, default=generate_observer_cruise_id) + status = Column(String, nullable=False, default="created") + organization_id = Column(String, ForeignKey("organizations.organization_id"), nullable=True) + workflow_run_id = Column(String, ForeignKey("workflow_runs.workflow_run_id"), nullable=True) + workflow_id = Column(String, ForeignKey("workflows.workflow_id"), nullable=True) + + +class ObserverThought(Base): + __tablename__ = "observer_thoughts" + + observer_thought_id = Column(String, primary_key=True, default=generate_observer_thought_id) + organization_id = Column(String, ForeignKey("organizations.organization_id"), nullable=True) + observer_cruise_id = Column(String, ForeignKey("observer_cruises.observer_cruise_id"), nullable=False) + workflow_run_id = Column(String, ForeignKey("workflow_runs.workflow_run_id"), nullable=True) + workflow_id = Column(String, ForeignKey("workflows.workflow_id"), nullable=True) + thought = Column(String, nullable=True) + answer = Column(String, nullable=True) From aabc0fc3caa9fc847ea8a4355ec6b4ea2d076c89 Mon Sep 17 00:00:00 2001 From: Shuchang Zheng Date: Thu, 5 Dec 2024 23:28:49 -0800 Subject: [PATCH 23/40] add observer cruise id to artifacts table (#1337) --- ...6217e9d9_introduce_observercruise_and_.py} | 32 ++++++++++++++----- skyvern/forge/sdk/db/models.py | 1 + 2 files changed, 25 insertions(+), 8 deletions(-) rename alembic/versions/{2024_12_06_0540-3c196d9557da_introduce_observercruise_and_.py => 2024_12_06_0725-5e266217e9d9_introduce_observercruise_and_.py} (73%) diff --git a/alembic/versions/2024_12_06_0540-3c196d9557da_introduce_observercruise_and_.py b/alembic/versions/2024_12_06_0725-5e266217e9d9_introduce_observercruise_and_.py similarity index 73% rename from alembic/versions/2024_12_06_0540-3c196d9557da_introduce_observercruise_and_.py rename to alembic/versions/2024_12_06_0725-5e266217e9d9_introduce_observercruise_and_.py index 53ea9a038..f9221dda4 100644 --- a/alembic/versions/2024_12_06_0540-3c196d9557da_introduce_observercruise_and_.py +++ b/alembic/versions/2024_12_06_0725-5e266217e9d9_introduce_observercruise_and_.py @@ -1,8 +1,8 @@ -"""Introduce ObserverCruise and ObserverThought. Add workflow_run_block_id to artifacts +"""Introduce ObserverCruise and ObserverThought. Add workflow_run_block_id and observer_cruise_id to artifacts -Revision ID: 3c196d9557da +Revision ID: 5e266217e9d9 Revises: de0254717601 -Create Date: 2024-12-06 05:40:10.408358+00:00 +Create Date: 2024-12-06 07:25:50.080125+00:00 """ @@ -13,7 +13,7 @@ from alembic import op # revision identifiers, used by Alembic. -revision: str = "3c196d9557da" +revision: str = "5e266217e9d9" down_revision: Union[str, None] = "de0254717601" branch_labels: Union[str, Sequence[str], None] = None depends_on: Union[str, Sequence[str], None] = None @@ -71,19 +71,35 @@ def upgrade() -> None: ) op.add_column("artifacts", sa.Column("workflow_run_id", sa.String(), nullable=True)) op.add_column("artifacts", sa.Column("workflow_run_block_id", sa.String(), nullable=True)) + op.add_column("artifacts", sa.Column("observer_cruise_id", sa.String(), nullable=True)) op.create_index("org_workflow_run_index", "artifacts", ["organization_id", "workflow_run_id"], unique=False) - op.create_foreign_key(None, "artifacts", "workflow_runs", ["workflow_run_id"], ["workflow_run_id"]) op.create_foreign_key( - None, "artifacts", "workflow_run_blocks", ["workflow_run_block_id"], ["workflow_run_block_id"] + "artifacts_workflow_run_block_id_fkey", + "artifacts", + "workflow_run_blocks", + ["workflow_run_block_id"], + ["workflow_run_block_id"], + ) + op.create_foreign_key( + "artifacts_observer_cruise_id_fkey", + "artifacts", + "observer_cruises", + ["observer_cruise_id"], + ["observer_cruise_id"], + ) + op.create_foreign_key( + "artifacts_workflow_run_id_fkey", "artifacts", "workflow_runs", ["workflow_run_id"], ["workflow_run_id"] ) # ### end Alembic commands ### def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### - op.drop_constraint(None, "artifacts", type_="foreignkey") - op.drop_constraint(None, "artifacts", type_="foreignkey") + op.drop_constraint("artifacts_workflow_run_block_id_fkey", "artifacts", type_="foreignkey") + op.drop_constraint("artifacts_observer_cruise_id_fkey", "artifacts", type_="foreignkey") + op.drop_constraint("artifacts_workflow_run_id_fkey", "artifacts", type_="foreignkey") op.drop_index("org_workflow_run_index", table_name="artifacts") + op.drop_column("artifacts", "observer_cruise_id") op.drop_column("artifacts", "workflow_run_block_id") op.drop_column("artifacts", "workflow_run_id") op.drop_table("observer_thoughts") diff --git a/skyvern/forge/sdk/db/models.py b/skyvern/forge/sdk/db/models.py index 4dd2b2ef4..565ed5d68 100644 --- a/skyvern/forge/sdk/db/models.py +++ b/skyvern/forge/sdk/db/models.py @@ -168,6 +168,7 @@ class ArtifactModel(Base): organization_id = Column(String, ForeignKey("organizations.organization_id")) workflow_run_id = Column(String, ForeignKey("workflow_runs.workflow_run_id")) workflow_run_block_id = Column(String, ForeignKey("workflow_run_blocks.workflow_run_block_id")) + observer_cruise_id = Column(String, ForeignKey("observer_cruises.observer_cruise_id")) task_id = Column(String, ForeignKey("tasks.task_id")) step_id = Column(String, ForeignKey("steps.step_id"), index=True) artifact_type = Column(String) From b83c64878ee82a2ce53ff45012d03879907a277b Mon Sep 17 00:00:00 2001 From: Shuchang Zheng Date: Fri, 6 Dec 2024 00:58:30 -0800 Subject: [PATCH 24/40] ObserverThought reproduce migration script (#1338) --- ...852-4d51ed4719d5_introduce_observercruise_and_.py} | 11 ++++++++--- skyvern/forge/sdk/db/models.py | 1 + 2 files changed, 9 insertions(+), 3 deletions(-) rename alembic/versions/{2024_12_06_0725-5e266217e9d9_introduce_observercruise_and_.py => 2024_12_06_0852-4d51ed4719d5_introduce_observercruise_and_.py} (92%) diff --git a/alembic/versions/2024_12_06_0725-5e266217e9d9_introduce_observercruise_and_.py b/alembic/versions/2024_12_06_0852-4d51ed4719d5_introduce_observercruise_and_.py similarity index 92% rename from alembic/versions/2024_12_06_0725-5e266217e9d9_introduce_observercruise_and_.py rename to alembic/versions/2024_12_06_0852-4d51ed4719d5_introduce_observercruise_and_.py index f9221dda4..7fbab699a 100644 --- a/alembic/versions/2024_12_06_0725-5e266217e9d9_introduce_observercruise_and_.py +++ b/alembic/versions/2024_12_06_0852-4d51ed4719d5_introduce_observercruise_and_.py @@ -1,8 +1,8 @@ """Introduce ObserverCruise and ObserverThought. Add workflow_run_block_id and observer_cruise_id to artifacts -Revision ID: 5e266217e9d9 +Revision ID: 4d51ed4719d5 Revises: de0254717601 -Create Date: 2024-12-06 07:25:50.080125+00:00 +Create Date: 2024-12-06 08:52:52.111448+00:00 """ @@ -13,7 +13,7 @@ from alembic import op # revision identifiers, used by Alembic. -revision: str = "5e266217e9d9" +revision: str = "4d51ed4719d5" down_revision: Union[str, None] = "de0254717601" branch_labels: Union[str, Sequence[str], None] = None depends_on: Union[str, Sequence[str], None] = None @@ -48,6 +48,7 @@ def upgrade() -> None: sa.Column("organization_id", sa.String(), nullable=True), sa.Column("observer_cruise_id", sa.String(), nullable=False), sa.Column("workflow_run_id", sa.String(), nullable=True), + sa.Column("workflow_run_block_id", sa.String(), nullable=True), sa.Column("workflow_id", sa.String(), nullable=True), sa.Column("thought", sa.String(), nullable=True), sa.Column("answer", sa.String(), nullable=True), @@ -63,6 +64,10 @@ def upgrade() -> None: ["workflow_id"], ["workflows.workflow_id"], ), + sa.ForeignKeyConstraint( + ["workflow_run_block_id"], + ["workflow_run_blocks.workflow_run_block_id"], + ), sa.ForeignKeyConstraint( ["workflow_run_id"], ["workflow_runs.workflow_run_id"], diff --git a/skyvern/forge/sdk/db/models.py b/skyvern/forge/sdk/db/models.py index 565ed5d68..c3523d524 100644 --- a/skyvern/forge/sdk/db/models.py +++ b/skyvern/forge/sdk/db/models.py @@ -522,6 +522,7 @@ class ObserverThought(Base): organization_id = Column(String, ForeignKey("organizations.organization_id"), nullable=True) observer_cruise_id = Column(String, ForeignKey("observer_cruises.observer_cruise_id"), nullable=False) workflow_run_id = Column(String, ForeignKey("workflow_runs.workflow_run_id"), nullable=True) + workflow_run_block_id = Column(String, ForeignKey("workflow_run_blocks.workflow_run_block_id"), nullable=True) workflow_id = Column(String, ForeignKey("workflows.workflow_id"), nullable=True) thought = Column(String, nullable=True) answer = Column(String, nullable=True) From 40e9dd368eab86b7520f51b25ec6ea9b4863cc19 Mon Sep 17 00:00:00 2001 From: Maksim Ivanov Date: Wed, 11 Dec 2024 08:27:42 +0100 Subject: [PATCH 25/40] Import structlog --- skyvern/forge/skyvern_log_encoder.py | 1 + 1 file changed, 1 insertion(+) diff --git a/skyvern/forge/skyvern_log_encoder.py b/skyvern/forge/skyvern_log_encoder.py index 9e17d55d9..7a48c9d4d 100644 --- a/skyvern/forge/skyvern_log_encoder.py +++ b/skyvern/forge/skyvern_log_encoder.py @@ -3,6 +3,7 @@ import json from structlog.dev import ConsoleRenderer +import structlog LOG = structlog.get_logger() From 5c8488d2a0fb4d80f93bc26d3a141e3f8d2a7eeb Mon Sep 17 00:00:00 2001 From: Maksim Ivanov Date: Thu, 12 Dec 2024 12:51:10 +0100 Subject: [PATCH 26/40] Define build_log_uri method --- skyvern/forge/sdk/artifact/models.py | 6 ++++++ skyvern/forge/sdk/artifact/storage/base.py | 6 +++++- skyvern/forge/sdk/artifact/storage/local.py | 6 +++++- skyvern/forge/sdk/artifact/storage/s3.py | 6 +++++- 4 files changed, 21 insertions(+), 3 deletions(-) diff --git a/skyvern/forge/sdk/artifact/models.py b/skyvern/forge/sdk/artifact/models.py index 2ca075626..d855213ff 100644 --- a/skyvern/forge/sdk/artifact/models.py +++ b/skyvern/forge/sdk/artifact/models.py @@ -89,3 +89,9 @@ def serialize_datetime_to_isoformat(self, value: datetime) -> str: ) signed_url: str | None = None organization_id: str | None = None + + +class LogEntityType(StrEnum): + STEP = "step" + TASK = "task" + WORKFLOW = "workflow" diff --git a/skyvern/forge/sdk/artifact/storage/base.py b/skyvern/forge/sdk/artifact/storage/base.py index 5c68420d0..540df50c8 100644 --- a/skyvern/forge/sdk/artifact/storage/base.py +++ b/skyvern/forge/sdk/artifact/storage/base.py @@ -1,6 +1,6 @@ from abc import ABC, abstractmethod -from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType +from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType, LogEntityType from skyvern.forge.sdk.models import Step # TODO: This should be a part of the ArtifactType model @@ -35,6 +35,10 @@ class BaseStorage(ABC): def build_uri(self, artifact_id: str, step: Step, artifact_type: ArtifactType) -> str: pass + @abstractmethod + def build_log_uri(self, log_entity_type: LogEntityType, log_entity_id: str, artifact_type: ArtifactType) -> str: + pass + @abstractmethod async def store_artifact(self, artifact: Artifact, data: bytes) -> None: pass diff --git a/skyvern/forge/sdk/artifact/storage/local.py b/skyvern/forge/sdk/artifact/storage/local.py index 0fdcef69a..f8def19b0 100644 --- a/skyvern/forge/sdk/artifact/storage/local.py +++ b/skyvern/forge/sdk/artifact/storage/local.py @@ -8,7 +8,7 @@ from skyvern.config import settings from skyvern.forge.sdk.api.files import get_download_dir, get_skyvern_temp_dir -from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType +from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType, LogEntityType from skyvern.forge.sdk.artifact.storage.base import FILE_EXTENTSION_MAP, BaseStorage from skyvern.forge.sdk.models import Step @@ -23,6 +23,10 @@ def build_uri(self, artifact_id: str, step: Step, artifact_type: ArtifactType) - file_ext = FILE_EXTENTSION_MAP[artifact_type] return f"file://{self.artifact_path}/{step.task_id}/{step.order:02d}_{step.retry_index}_{step.step_id}/{datetime.utcnow().isoformat()}_{artifact_id}_{artifact_type}.{file_ext}" + def build_log_uri(self, log_entity_type: LogEntityType, log_entity_id: str, artifact_type: ArtifactType) -> str: + file_ext = FILE_EXTENTSION_MAP[artifact_type] + return f"file://{self.artifact_path}/logs/{log_entity_type}/{log_entity_id}/{datetime.utcnow().isoformat()}_{artifact_type}.{file_ext}" + async def store_artifact(self, artifact: Artifact, data: bytes) -> None: file_path = None try: diff --git a/skyvern/forge/sdk/artifact/storage/s3.py b/skyvern/forge/sdk/artifact/storage/s3.py index 17b587f08..d359f0014 100644 --- a/skyvern/forge/sdk/artifact/storage/s3.py +++ b/skyvern/forge/sdk/artifact/storage/s3.py @@ -12,7 +12,7 @@ make_temp_directory, unzip_files, ) -from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType +from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType, LogEntityType from skyvern.forge.sdk.artifact.storage.base import FILE_EXTENTSION_MAP, BaseStorage from skyvern.forge.sdk.models import Step @@ -26,6 +26,10 @@ def build_uri(self, artifact_id: str, step: Step, artifact_type: ArtifactType) - file_ext = FILE_EXTENTSION_MAP[artifact_type] return f"s3://{self.bucket}/{settings.ENV}/{step.task_id}/{step.order:02d}_{step.retry_index}_{step.step_id}/{datetime.utcnow().isoformat()}_{artifact_id}_{artifact_type}.{file_ext}" + def build_log_uri(self, log_entity_type: LogEntityType, log_entity_id: str, artifact_type: ArtifactType) -> str: + file_ext = FILE_EXTENTSION_MAP[artifact_type] + return f"s3://{self.bucket}/{settings.ENV}/logs/{log_entity_type}/{log_entity_id}/{datetime.utcnow().isoformat()}_{artifact_type}.{file_ext}" + async def store_artifact(self, artifact: Artifact, data: bytes) -> None: await self.async_client.upload_file(artifact.uri, data) From 4742fc498d9899e871f6db5c3516c19536a2615d Mon Sep 17 00:00:00 2001 From: Maksim Ivanov Date: Fri, 13 Dec 2024 13:53:27 +0100 Subject: [PATCH 27/40] Extract save logs functions --- skyvern/forge/agent.py | 29 +------ skyvern/forge/sdk/artifact/manager.py | 32 ++++++- skyvern/forge/sdk/artifact/models.py | 3 +- skyvern/forge/sdk/log_artifacts.py | 117 ++++++++++++++++++++++++++ 4 files changed, 154 insertions(+), 27 deletions(-) create mode 100644 skyvern/forge/sdk/log_artifacts.py diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py index c3e53a6ee..c1828e1ab 100644 --- a/skyvern/forge/agent.py +++ b/skyvern/forge/agent.py @@ -45,7 +45,7 @@ from skyvern.forge.async_operations import AgentPhase, AsyncOperationPool from skyvern.forge.prompts import prompt_engine from skyvern.forge.sdk.api.files import get_path_for_workflow_download_directory, list_files_in_directory, rename_file -from skyvern.forge.sdk.artifact.models import ArtifactType +from skyvern.forge.sdk.artifact.models import ArtifactType, LogEntityType from skyvern.forge.sdk.core import skyvern_context from skyvern.forge.sdk.core.security import generate_skyvern_signature from skyvern.forge.sdk.core.validators import prepend_scheme_and_validate_url @@ -76,6 +76,8 @@ from skyvern.forge.skyvern_json_encoder import SkyvernJSONLogEncoder from skyvern.forge.skyvern_log_encoder import SkyvernLogEncoder +from skyvern.forge.sdk.log_artifacts import save_step_logs + LOG = structlog.get_logger() @@ -1785,30 +1787,7 @@ async def update_step( diff=update_comparison, ) - try: - log = skyvern_context.current().log - current_step_log = [entry for entry in log if entry.get("step_id", "") == step.step_id] - - log_json = json.dumps(current_step_log, cls=SkyvernJSONLogEncoder, indent=2) - await app.ARTIFACT_MANAGER.create_artifact( - step=step, - artifact_type=ArtifactType.SKYVERN_LOG_RAW, - data=log_json.encode(), - ) - - formatted_log = SkyvernLogEncoder.encode(current_step_log) - await app.ARTIFACT_MANAGER.create_artifact( - step=step, - artifact_type=ArtifactType.SKYVERN_LOG, - data=formatted_log.encode(), - ) - except Exception: - LOG.error( - "Failed to record skyvern log after action", - task_id=step.task_id, - step_id=step.step_id, - exc_info=True, - ) + await save_step_logs(step.step_id) return await app.DATABASE.update_step( task_id=step.task_id, diff --git a/skyvern/forge/sdk/artifact/manager.py b/skyvern/forge/sdk/artifact/manager.py index bbe2af54c..bd776ed1e 100644 --- a/skyvern/forge/sdk/artifact/manager.py +++ b/skyvern/forge/sdk/artifact/manager.py @@ -6,7 +6,7 @@ import structlog from skyvern.forge import app -from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType +from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType, LogEntityType from skyvern.forge.sdk.db.id import generate_artifact_id from skyvern.forge.sdk.models import Step from skyvern.forge.sdk.schemas.observers import ObserverCruise, ObserverThought @@ -82,6 +82,36 @@ async def create_artifact( path=path, ) + async def create_log_artifact( + self, + log_entity_type: LogEntityType, + log_entity_id: str, + artifact_type: ArtifactType, + step_id: str | None = None, + task_id: str | None = None, + workflow_run_id: str | None = None, + workflow_run_block_id: str | None = None, + organization_id: str | None = None, + data: bytes | None = None, + path: str | None = None, + ) -> str: + artifact_id = generate_artifact_id() + uri = app.STORAGE.build_log_uri(log_entity_type, log_entity_id, artifact_type) + print("[create_log_artifact] uri", uri) + return await self._create_artifact( + aio_task_primary_key=log_entity_id, + artifact_id=artifact_id, + artifact_type=artifact_type, + uri=uri, + step_id=step_id, + task_id=task_id, + workflow_run_id=workflow_run_id, + workflow_run_block_id=workflow_run_block_id, + organization_id=organization_id, + data=data, + path=path, + ) + async def create_observer_thought_artifact( self, observer_thought: ObserverThought, diff --git a/skyvern/forge/sdk/artifact/models.py b/skyvern/forge/sdk/artifact/models.py index 8ba9810ca..631d7557e 100644 --- a/skyvern/forge/sdk/artifact/models.py +++ b/skyvern/forge/sdk/artifact/models.py @@ -78,4 +78,5 @@ def serialize_datetime_to_isoformat(self, value: datetime) -> str: class LogEntityType(StrEnum): STEP = "step" TASK = "task" - WORKFLOW = "workflow" + WORKFLOW_RUN = "workflow_run" + WORKFLOW_RUN_BLOCK = "workflow_run_block" diff --git a/skyvern/forge/sdk/log_artifacts.py b/skyvern/forge/sdk/log_artifacts.py new file mode 100644 index 000000000..97c1e08dc --- /dev/null +++ b/skyvern/forge/sdk/log_artifacts.py @@ -0,0 +1,117 @@ +import json +import structlog + +from skyvern.forge import app +from skyvern.forge.sdk.core import skyvern_context +from skyvern.forge.skyvern_json_encoder import SkyvernJSONLogEncoder +from skyvern.forge.skyvern_log_encoder import SkyvernLogEncoder +from skyvern.forge.sdk.artifact.models import ArtifactType, LogEntityType + +LOG = structlog.get_logger() + +async def save_step_logs(step_id: str) -> None: + log = skyvern_context.current().log + organization_id = skyvern_context.current().organization_id + + current_step_log = [entry for entry in log if entry.get("step_id", "") == step_id] + + await _save_log_artifacts( + log=current_step_log, + log_entity_type=LogEntityType.STEP, + log_entity_id=step_id, + organization_id=organization_id, + ) + + +async def save_task_logs(task_id: str) -> None: + log = skyvern_context.current().log + organization_id = skyvern_context.current().organization_id + + current_task_log = [entry for entry in log if entry.get("task_id", "") == task_id] + + await _save_log_artifacts( + log=current_task_log, + log_entity_type=LogEntityType.TASK, + log_entity_id=task_id, + organization_id=organization_id, + ) + + +async def save_workflow_run_logs(workflow_run_id: str) -> None: + log = skyvern_context.current().log + organization_id = skyvern_context.current().organization_id + + current_workflow_run_log = [entry for entry in log if entry.get("workflow_run_id", "") == workflow_run_id] + + await _save_log_artifacts( + log=current_workflow_run_log, + log_entity_type=LogEntityType.WORKFLOW_RUN, + log_entity_id=workflow_run_id, + organization_id=organization_id, + workflow_run_id=workflow_run_id, + ) + + +async def save_workflow_run_block_logs(workflow_run_block_id: str) -> None: + log = skyvern_context.current().log + organization_id = skyvern_context.current().organization_id + current_workflow_run_block_log = [entry for entry in log if entry.get("workflow_run_block_id", "") == workflow_run_block_id] + + await _save_log_artifacts( + log=current_workflow_run_block_log, + log_entity_type=LogEntityType.WORKFLOW_RUN_BLOCK, + log_entity_id=workflow_run_block_id, + organization_id=organization_id, + workflow_run_block_id=workflow_run_block_id, + ) + + +async def _save_log_artifacts( + log: list[dict], + log_entity_type: LogEntityType, + log_entity_id: str, + organization_id: str, + step_id: str | None = None, + task_id: str | None = None, + workflow_run_id: str | None = None, + workflow_run_block_id: str | None = None, +) -> None: + try: + log_json = json.dumps(log, cls=SkyvernJSONLogEncoder, indent=2) + await app.ARTIFACT_MANAGER.create_log_artifact( + organization_id=organization_id, + step_id=step_id, + task_id=task_id, + workflow_run_id=workflow_run_id, + workflow_run_block_id=workflow_run_block_id, + log_entity_type=log_entity_type, + log_entity_id=log_entity_id, + artifact_type=ArtifactType.SKYVERN_LOG_RAW, + data=log_json.encode(), + ) + + formatted_log = SkyvernLogEncoder.encode(log) + await app.ARTIFACT_MANAGER.create_log_artifact( + organization_id=organization_id, + step_id=step_id, + task_id=task_id, + workflow_run_id=workflow_run_id, + workflow_run_block_id=workflow_run_block_id, + log_entity_type=log_entity_type, + log_entity_id=log_entity_id, + artifact_type=ArtifactType.SKYVERN_LOG, + data=formatted_log.encode(), + ) + except Exception as e: + LOG.error( + "Failed to save log artifacts", + log_entity_type=log_entity_type, + log_entity_id=log_entity_id, + organization_id=organization_id, + step_id=step_id, + task_id=task_id, + workflow_run_id=workflow_run_id, + workflow_run_block_id=workflow_run_block_id, + exc_info=True, + ) + From 974b37ec6bfc984cd10546b8bb1e75a2cda46bbb Mon Sep 17 00:00:00 2001 From: Maksim Ivanov Date: Fri, 13 Dec 2024 14:22:48 +0100 Subject: [PATCH 28/40] Pass step_id to log artifact methods --- skyvern/forge/agent.py | 2 -- skyvern/forge/sdk/artifact/manager.py | 1 - skyvern/forge/sdk/log_artifacts.py | 2 ++ skyvern/forge/skyvern_json_encoder.py | 40 ++++++++++++++++++++++++--- skyvern/forge/skyvern_log_encoder.py | 10 +++---- 5 files changed, 42 insertions(+), 13 deletions(-) diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py index c1828e1ab..fe8720483 100644 --- a/skyvern/forge/agent.py +++ b/skyvern/forge/agent.py @@ -73,8 +73,6 @@ from skyvern.webeye.browser_factory import BrowserState from skyvern.webeye.scraper.scraper import ElementTreeFormat, ScrapedPage, scrape_website from skyvern.webeye.utils.page import SkyvernFrame -from skyvern.forge.skyvern_json_encoder import SkyvernJSONLogEncoder -from skyvern.forge.skyvern_log_encoder import SkyvernLogEncoder from skyvern.forge.sdk.log_artifacts import save_step_logs diff --git a/skyvern/forge/sdk/artifact/manager.py b/skyvern/forge/sdk/artifact/manager.py index bd776ed1e..fdbc4b912 100644 --- a/skyvern/forge/sdk/artifact/manager.py +++ b/skyvern/forge/sdk/artifact/manager.py @@ -97,7 +97,6 @@ async def create_log_artifact( ) -> str: artifact_id = generate_artifact_id() uri = app.STORAGE.build_log_uri(log_entity_type, log_entity_id, artifact_type) - print("[create_log_artifact] uri", uri) return await self._create_artifact( aio_task_primary_key=log_entity_id, artifact_id=artifact_id, diff --git a/skyvern/forge/sdk/log_artifacts.py b/skyvern/forge/sdk/log_artifacts.py index 97c1e08dc..35483fd45 100644 --- a/skyvern/forge/sdk/log_artifacts.py +++ b/skyvern/forge/sdk/log_artifacts.py @@ -20,6 +20,7 @@ async def save_step_logs(step_id: str) -> None: log_entity_type=LogEntityType.STEP, log_entity_id=step_id, organization_id=organization_id, + step_id=step_id, ) @@ -34,6 +35,7 @@ async def save_task_logs(task_id: str) -> None: log_entity_type=LogEntityType.TASK, log_entity_id=task_id, organization_id=organization_id, + task_id=task_id, ) diff --git a/skyvern/forge/skyvern_json_encoder.py b/skyvern/forge/skyvern_json_encoder.py index 1e365e9f0..4103aee1c 100644 --- a/skyvern/forge/skyvern_json_encoder.py +++ b/skyvern/forge/skyvern_json_encoder.py @@ -5,18 +5,50 @@ class SkyvernJSONLogEncoder(json.JSONEncoder): """Custom JSON encoder for Skyvern logs that handles non-serializable objects""" def default(self, obj: Any) -> Any: if hasattr(obj, 'model_dump'): - return obj.model_dump() + return self._encode_value(obj.model_dump()) if hasattr(obj, '__dataclass_fields__'): - return {k: getattr(obj, k) for k in obj.__dataclass_fields__} + return self._encode_value({k: getattr(obj, k) for k in obj.__dataclass_fields__}) + + if hasattr(obj, 'to_dict'): + return self._encode_value(obj.to_dict()) + + if hasattr(obj, 'asdict'): + return self._encode_value(obj.asdict()) if hasattr(obj, '__dict__'): return { 'type': obj.__class__.__name__, - 'attributes': {k: v for k, v in obj.__dict__.items() if not k.startswith('_')} + 'attributes': { + k: self._encode_value(v) + for k, v in obj.__dict__.items() + if not k.startswith('_') and not callable(v) + } } - # Handle other non-serializable objects + try: return str(obj) except Exception: return f"" + + def _encode_value(self, value: Any) -> Any: + """Helper method to encode nested values recursively""" + if isinstance(value, (str, int, float, bool, type(None))): + return value + + if isinstance(value, (list, tuple)): + return [self._encode_value(item) for item in value] + + if isinstance(value, dict): + return { + self._encode_value(k): self._encode_value(v) + for k, v in value.items() + } + + # For any other type, try to encode it using our custom logic + return self.default(value) + + @classmethod + def dumps(cls, obj: Any, **kwargs) -> str: + """Helper method to properly encode objects to JSON string""" + return json.dumps(obj, cls=cls, **kwargs) diff --git a/skyvern/forge/skyvern_log_encoder.py b/skyvern/forge/skyvern_log_encoder.py index 7a48c9d4d..d90bb177d 100644 --- a/skyvern/forge/skyvern_log_encoder.py +++ b/skyvern/forge/skyvern_log_encoder.py @@ -5,6 +5,7 @@ from structlog.dev import ConsoleRenderer import structlog +from skyvern.forge.skyvern_json_encoder import SkyvernJSONLogEncoder LOG = structlog.get_logger() class SkyvernLogEncoder: @@ -16,12 +17,9 @@ def __init__(self): colors=False, ) - @staticmethod - def _format_value(value: Any) -> str: - """Format complex values into readable strings.""" - if isinstance(value, (dict, list)): - return json.dumps(value, sort_keys=True) - return str(value) + @classmethod + def _format_value(cls, value): + return SkyvernJSONLogEncoder.dumps(value, sort_keys=True) @staticmethod def _parse_json_entry(entry: Dict[str, Any]) -> Dict[str, Any]: From d48f792d0a6696571665c3eadb64e6a06ca71d14 Mon Sep 17 00:00:00 2001 From: Maksim Ivanov Date: Fri, 13 Dec 2024 14:28:51 +0100 Subject: [PATCH 29/40] Define get_artifact_by_entity_id --- skyvern/forge/sdk/db/client.py | 43 ++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/skyvern/forge/sdk/db/client.py b/skyvern/forge/sdk/db/client.py index dc474fc25..b9400aa24 100644 --- a/skyvern/forge/sdk/db/client.py +++ b/skyvern/forge/sdk/db/client.py @@ -808,6 +808,49 @@ async def get_artifact_by_id( LOG.exception("UnexpectedError") raise + async def get_artifact_by_entity_id( + self, + artifact_type: ArtifactType, + task_id: str | None = None, + step_id: str | None = None, + workflow_run_id: str | None = None, + workflow_run_block_id: str | None = None, + observer_thought_id: str | None = None, + observer_cruise_id: str | None = None, + organization_id: str | None = None, + ) -> Artifact | None: + try: + async with self.Session() as session: + query = select(ArtifactModel).filter_by(artifact_type=artifact_type) + + if task_id is not None: + query = query.filter_by(task_id=task_id) + if step_id is not None: + query = query.filter_by(step_id=step_id) + if workflow_run_id is not None: + query = query.filter_by(workflow_run_id=workflow_run_id) + if workflow_run_block_id is not None: + query = query.filter_by(workflow_run_block_id=workflow_run_block_id) + if observer_thought_id is not None: + query = query.filter_by(observer_thought_id=observer_thought_id) + if observer_cruise_id is not None: + query = query.filter_by(observer_cruise_id=observer_cruise_id) + if organization_id is not None: + query = query.filter_by(organization_id=organization_id) + + query = query.order_by(ArtifactModel.created_at.desc()) + artifact = (await session.scalars(query)).first() + + if artifact: + return convert_to_artifact(artifact, self.debug_enabled) + return None + except SQLAlchemyError: + LOG.error("SQLAlchemyError", exc_info=True) + raise + except Exception: + LOG.error("UnexpectedError", exc_info=True) + raise + async def get_artifact( self, task_id: str, From f57c7b6c31b8d478a43590a12f3d9317d6c6ea7f Mon Sep 17 00:00:00 2001 From: Maksim Ivanov Date: Fri, 13 Dec 2024 14:58:07 +0100 Subject: [PATCH 30/40] Reuse log artifacts when saving --- skyvern/forge/sdk/artifact/manager.py | 28 +++++----- skyvern/forge/sdk/artifact/models.py | 4 ++ skyvern/forge/sdk/log_artifacts.py | 75 ++++++++++++++++++++++----- 3 files changed, 82 insertions(+), 25 deletions(-) diff --git a/skyvern/forge/sdk/artifact/manager.py b/skyvern/forge/sdk/artifact/manager.py index fdbc4b912..1cbcabc27 100644 --- a/skyvern/forge/sdk/artifact/manager.py +++ b/skyvern/forge/sdk/artifact/manager.py @@ -13,6 +13,14 @@ LOG = structlog.get_logger(__name__) +PRIMARY_KEY = Literal[ + "task_id", + "observer_thought_id", + "observer_cruise_id", + "step_id", + "workflow_run_id", + "workflow_run_block_id", +] class ArtifactManager: # task_id -> list of aio_tasks for uploading artifacts @@ -203,7 +211,7 @@ async def update_artifact_data( artifact_id: str | None, organization_id: str | None, data: bytes, - primary_key: Literal["task_id", "observer_thought_id", "observer_cruise_id"] = "task_id", + primary_key: PRIMARY_KEY = "task_id", ) -> None: if not artifact_id or not organization_id: return None @@ -212,18 +220,12 @@ async def update_artifact_data( return # Fire and forget aio_task = asyncio.create_task(app.STORAGE.store_artifact(artifact, data)) - if primary_key == "task_id": - if not artifact.task_id: - raise ValueError("Task ID is required to update artifact data.") - self.upload_aiotasks_map[artifact.task_id].append(aio_task) - elif primary_key == "observer_thought_id": - if not artifact.observer_thought_id: - raise ValueError("Observer Thought ID is required to update artifact data.") - self.upload_aiotasks_map[artifact.observer_thought_id].append(aio_task) - elif primary_key == "observer_cruise_id": - if not artifact.observer_cruise_id: - raise ValueError("Observer Cruise ID is required to update artifact data.") - self.upload_aiotasks_map[artifact.observer_cruise_id].append(aio_task) + + print("---",primary_key, artifact[primary_key]) + if not artifact[primary_key]: + raise ValueError(f"{primary_key} is required to update artifact data.") + self.upload_aiotasks_map[artifact[primary_key]].append(aio_task) + async def retrieve_artifact(self, artifact: Artifact) -> bytes | None: return await app.STORAGE.retrieve_artifact(artifact) diff --git a/skyvern/forge/sdk/artifact/models.py b/skyvern/forge/sdk/artifact/models.py index 631d7557e..307c902bf 100644 --- a/skyvern/forge/sdk/artifact/models.py +++ b/skyvern/forge/sdk/artifact/models.py @@ -2,6 +2,7 @@ from datetime import datetime from enum import StrEnum +from typing import Any from pydantic import BaseModel, Field, field_serializer @@ -74,6 +75,9 @@ def serialize_datetime_to_isoformat(self, value: datetime) -> str: signed_url: str | None = None organization_id: str | None = None + def __getitem__(self, key: str) -> Any: + return getattr(self, key) + class LogEntityType(StrEnum): STEP = "step" diff --git a/skyvern/forge/sdk/log_artifacts.py b/skyvern/forge/sdk/log_artifacts.py index 35483fd45..7e16cc0c9 100644 --- a/skyvern/forge/sdk/log_artifacts.py +++ b/skyvern/forge/sdk/log_artifacts.py @@ -9,6 +9,18 @@ LOG = structlog.get_logger() +def primary_key_from_log_entity_type(log_entity_type: LogEntityType) -> str: + if log_entity_type == LogEntityType.STEP: + return "step_id" + elif log_entity_type == LogEntityType.TASK: + return "task_id" + elif log_entity_type == LogEntityType.WORKFLOW_RUN: + return "workflow_run_id" + elif log_entity_type == LogEntityType.WORKFLOW_RUN_BLOCK: + return "workflow_run_block_id" + else: + raise ValueError(f"Invalid log entity type: {log_entity_type}") + async def save_step_logs(step_id: str) -> None: log = skyvern_context.current().log organization_id = skyvern_context.current().organization_id @@ -80,30 +92,69 @@ async def _save_log_artifacts( ) -> None: try: log_json = json.dumps(log, cls=SkyvernJSONLogEncoder, indent=2) - await app.ARTIFACT_MANAGER.create_log_artifact( - organization_id=organization_id, + + log_artifact = await app.DATABASE.get_artifact_by_entity_id( + artifact_type=ArtifactType.SKYVERN_LOG_RAW, step_id=step_id, task_id=task_id, workflow_run_id=workflow_run_id, workflow_run_block_id=workflow_run_block_id, - log_entity_type=log_entity_type, - log_entity_id=log_entity_id, - artifact_type=ArtifactType.SKYVERN_LOG_RAW, - data=log_json.encode(), + organization_id=organization_id, ) + + if log_artifact: + print("log_artifact", log_artifact) + await app.ARTIFACT_MANAGER.update_artifact_data( + artifact_id=log_artifact.artifact_id, + organization_id=organization_id, + data=log_json.encode(), + primary_key=primary_key_from_log_entity_type(log_entity_type), + ) + else: + print("log_artifact not found, creating new one") + await app.ARTIFACT_MANAGER.create_log_artifact( + organization_id=organization_id, + step_id=step_id, + task_id=task_id, + workflow_run_id=workflow_run_id, + workflow_run_block_id=workflow_run_block_id, + log_entity_type=log_entity_type, + log_entity_id=log_entity_id, + artifact_type=ArtifactType.SKYVERN_LOG_RAW, + data=log_json.encode(), + ) + formatted_log = SkyvernLogEncoder.encode(log) - await app.ARTIFACT_MANAGER.create_log_artifact( - organization_id=organization_id, + + formatted_log_artifact = await app.DATABASE.get_artifact_by_entity_id( + artifact_type=ArtifactType.SKYVERN_LOG, step_id=step_id, task_id=task_id, workflow_run_id=workflow_run_id, workflow_run_block_id=workflow_run_block_id, - log_entity_type=log_entity_type, - log_entity_id=log_entity_id, - artifact_type=ArtifactType.SKYVERN_LOG, - data=formatted_log.encode(), + organization_id=organization_id, ) + + if formatted_log_artifact: + await app.ARTIFACT_MANAGER.update_artifact_data( + artifact_id=formatted_log_artifact.artifact_id, + organization_id=organization_id, + data=formatted_log.encode(), + primary_key=primary_key_from_log_entity_type(log_entity_type), + ) + else: + await app.ARTIFACT_MANAGER.create_log_artifact( + organization_id=organization_id, + step_id=step_id, + task_id=task_id, + workflow_run_id=workflow_run_id, + workflow_run_block_id=workflow_run_block_id, + log_entity_type=log_entity_type, + log_entity_id=log_entity_id, + artifact_type=ArtifactType.SKYVERN_LOG, + data=formatted_log.encode(), + ) except Exception as e: LOG.error( "Failed to save log artifacts", From c8274c3cc3b03d8f772eac30f80071fac586c962 Mon Sep 17 00:00:00 2001 From: Maksim Ivanov Date: Fri, 13 Dec 2024 15:28:20 +0100 Subject: [PATCH 31/40] Introduce get artifacts by entity id handler --- skyvern/forge/sdk/db/client.py | 44 ++++++++++--- skyvern/forge/sdk/routes/agent_protocol.py | 75 ++++++++++++++++++++++ 2 files changed, 110 insertions(+), 9 deletions(-) diff --git a/skyvern/forge/sdk/db/client.py b/skyvern/forge/sdk/db/client.py index b9400aa24..4039e8e39 100644 --- a/skyvern/forge/sdk/db/client.py +++ b/skyvern/forge/sdk/db/client.py @@ -808,9 +808,9 @@ async def get_artifact_by_id( LOG.exception("UnexpectedError") raise - async def get_artifact_by_entity_id( + async def get_artifacts_by_entity_id( self, - artifact_type: ArtifactType, + artifact_type: ArtifactType | None = None, task_id: str | None = None, step_id: str | None = None, workflow_run_id: str | None = None, @@ -818,11 +818,13 @@ async def get_artifact_by_entity_id( observer_thought_id: str | None = None, observer_cruise_id: str | None = None, organization_id: str | None = None, - ) -> Artifact | None: + ) -> list[Artifact]: try: async with self.Session() as session: - query = select(ArtifactModel).filter_by(artifact_type=artifact_type) + query = select(ArtifactModel) + if artifact_type is not None: + query = query.filter_by(artifact_type=artifact_type) if task_id is not None: query = query.filter_by(task_id=task_id) if step_id is not None: @@ -839,11 +841,12 @@ async def get_artifact_by_entity_id( query = query.filter_by(organization_id=organization_id) query = query.order_by(ArtifactModel.created_at.desc()) - artifact = (await session.scalars(query)).first() - - if artifact: - return convert_to_artifact(artifact, self.debug_enabled) - return None + if artifacts := ( + await session.scalars(query) + ).all(): + return [convert_to_artifact(artifact, self.debug_enabled) for artifact in artifacts] + else: + return [] except SQLAlchemyError: LOG.error("SQLAlchemyError", exc_info=True) raise @@ -851,6 +854,29 @@ async def get_artifact_by_entity_id( LOG.error("UnexpectedError", exc_info=True) raise + async def get_artifact_by_entity_id( + self, + artifact_type: ArtifactType, + task_id: str | None = None, + step_id: str | None = None, + workflow_run_id: str | None = None, + workflow_run_block_id: str | None = None, + observer_thought_id: str | None = None, + observer_cruise_id: str | None = None, + organization_id: str | None = None, + ) -> Artifact | None: + artifacts = await self.get_artifacts_by_entity_id( + artifact_type=artifact_type, + task_id=task_id, + step_id=step_id, + workflow_run_id=workflow_run_id, + workflow_run_block_id=workflow_run_block_id, + observer_thought_id=observer_thought_id, + observer_cruise_id=observer_cruise_id, + organization_id=organization_id, + ) + return artifacts[0] if artifacts else None + async def get_artifact( self, task_id: str, diff --git a/skyvern/forge/sdk/routes/agent_protocol.py b/skyvern/forge/sdk/routes/agent_protocol.py index a5981274c..f569f5e85 100644 --- a/skyvern/forge/sdk/routes/agent_protocol.py +++ b/skyvern/forge/sdk/routes/agent_protocol.py @@ -2,6 +2,7 @@ import hashlib import uuid from typing import Annotated, Any +from enum import Enum import structlog import yaml @@ -454,6 +455,80 @@ async def get_agent_task_steps( return ORJSONResponse([step.model_dump(exclude_none=True) for step in steps]) +class EntityType(str, Enum): + STEP = "step" + TASK = "task" + WORKFLOW_RUN = "workflow_run" + WORKFLOW_RUN_BLOCK = "workflow_run_block" + +entity_type_to_param = { + EntityType.STEP: "step_id", + EntityType.TASK: "task_id", + EntityType.WORKFLOW_RUN: "workflow_run_id", + EntityType.WORKFLOW_RUN_BLOCK: "workflow_run_block_id", +} + +@base_router.get( + "/{entity_type}/{entity_id}/artifacts", + tags=["agent"], + response_model=list[Artifact], +) +@base_router.get( + "/{entity_type}/{entity_id}/artifacts/", + tags=["agent"], + response_model=list[Artifact], + include_in_schema=False, +) +async def get_agent_entity_artifacts( + entity_type: EntityType, + entity_id: str, + current_org: Organization = Depends(org_auth_service.get_current_org), +) -> Response: + """ + Get all artifacts for an entity (step, task, workflow_run). + + Args: + entity_type: Type of entity to fetch artifacts for + entity_id: ID of the entity + current_org: Current organization from auth + + Returns: + List of artifacts for the entity + + Raises: + HTTPException: If entity is not supported + """ + + if entity_type not in entity_type_to_param: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Invalid entity_type: {entity_type}", + ) + + analytics.capture("skyvern-oss-agent-entity-artifacts-get") + + params = { + "organization_id": current_org.organization_id, + entity_type_to_param[entity_type]: entity_id, + } + + artifacts = await app.DATABASE.get_artifacts_by_entity_id(**params) + + if settings.ENV != "local" or settings.GENERATE_PRESIGNED_URLS: + signed_urls = await app.ARTIFACT_MANAGER.get_share_links(artifacts) + if signed_urls: + for i, artifact in enumerate(artifacts): + artifact.signed_url = signed_urls[i] + else: + LOG.warning( + "Failed to get signed urls for artifacts", + entity_type=entity_type, + entity_id=entity_id, + ) + + return ORJSONResponse([artifact.model_dump() for artifact in artifacts]) + + @base_router.get( "/tasks/{task_id}/steps/{step_id}/artifacts", tags=["agent"], From 3ff49cbeaf0ddb528b4e15ba0036bc01e0ea3405 Mon Sep 17 00:00:00 2001 From: Maksim Ivanov Date: Fri, 13 Dec 2024 15:30:00 +0100 Subject: [PATCH 32/40] Get step artifacts using entity id handler --- .../src/routes/tasks/detail/ScrollableActionList.tsx | 6 ++---- skyvern-frontend/src/routes/tasks/detail/StepArtifacts.tsx | 7 +++---- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/skyvern-frontend/src/routes/tasks/detail/ScrollableActionList.tsx b/skyvern-frontend/src/routes/tasks/detail/ScrollableActionList.tsx index dc66828e2..e4bd3f83a 100644 --- a/skyvern-frontend/src/routes/tasks/detail/ScrollableActionList.tsx +++ b/skyvern-frontend/src/routes/tasks/detail/ScrollableActionList.tsx @@ -11,7 +11,6 @@ import { } from "@radix-ui/react-icons"; import { useQueryClient } from "@tanstack/react-query"; import { ReactNode, useRef } from "react"; -import { useParams } from "react-router-dom"; import { ActionTypePill } from "./ActionTypePill"; type Props = { @@ -33,7 +32,6 @@ function ScrollableActionList({ showStreamOption, taskDetails, }: Props) { - const { taskId } = useParams(); const queryClient = useQueryClient(); const credentialGetter = useCredentialGetter(); const refs = useRef>( @@ -65,11 +63,11 @@ function ScrollableActionList({ onClick={() => onActiveIndexChange(i)} onMouseEnter={() => { queryClient.prefetchQuery({ - queryKey: ["task", taskId, "steps", action.stepId, "artifacts"], + queryKey: ["step", action.stepId, "artifacts"], queryFn: async () => { const client = await getClient(credentialGetter); return client - .get(`/tasks/${taskId}/steps/${action.stepId}/artifacts`) + .get(`/step/${action.stepId}/artifacts`) .then((response) => response.data); }, }); diff --git a/skyvern-frontend/src/routes/tasks/detail/StepArtifacts.tsx b/skyvern-frontend/src/routes/tasks/detail/StepArtifacts.tsx index 1a0d85084..b09be32ba 100644 --- a/skyvern-frontend/src/routes/tasks/detail/StepArtifacts.tsx +++ b/skyvern-frontend/src/routes/tasks/detail/StepArtifacts.tsx @@ -7,7 +7,7 @@ import { import { StatusBadge } from "@/components/StatusBadge"; import { Label } from "@/components/ui/label"; import { useQuery } from "@tanstack/react-query"; -import { useParams, useSearchParams } from "react-router-dom"; +import { useSearchParams } from "react-router-dom"; import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs"; import { ZoomableImage } from "@/components/ZoomableImage"; import { Skeleton } from "@/components/ui/skeleton"; @@ -25,7 +25,6 @@ type Props = { function StepArtifacts({ id, stepProps }: Props) { const [searchParams, setSearchParams] = useSearchParams(); const artifact = searchParams.get("artifact") ?? "info"; - const { taskId } = useParams(); const credentialGetter = useCredentialGetter(); const { data: artifacts, @@ -33,11 +32,11 @@ function StepArtifacts({ id, stepProps }: Props) { isError, error, } = useQuery>({ - queryKey: ["task", taskId, "steps", id, "artifacts"], + queryKey: ["step", id, "artifacts"], queryFn: async () => { const client = await getClient(credentialGetter); return client - .get(`/tasks/${taskId}/steps/${id}/artifacts`) + .get(`/step/${id}/artifacts`) .then((response) => response.data); }, }); From c3ce7d36c93a607fd3733a51fd53a3cb5575ab51 Mon Sep 17 00:00:00 2001 From: Maksim Ivanov Date: Fri, 13 Dec 2024 15:38:36 +0100 Subject: [PATCH 33/40] Remove logs --- skyvern/forge/sdk/artifact/manager.py | 1 - skyvern/forge/sdk/log_artifacts.py | 2 -- 2 files changed, 3 deletions(-) diff --git a/skyvern/forge/sdk/artifact/manager.py b/skyvern/forge/sdk/artifact/manager.py index 1cbcabc27..95fa0ad4b 100644 --- a/skyvern/forge/sdk/artifact/manager.py +++ b/skyvern/forge/sdk/artifact/manager.py @@ -221,7 +221,6 @@ async def update_artifact_data( # Fire and forget aio_task = asyncio.create_task(app.STORAGE.store_artifact(artifact, data)) - print("---",primary_key, artifact[primary_key]) if not artifact[primary_key]: raise ValueError(f"{primary_key} is required to update artifact data.") self.upload_aiotasks_map[artifact[primary_key]].append(aio_task) diff --git a/skyvern/forge/sdk/log_artifacts.py b/skyvern/forge/sdk/log_artifacts.py index 7e16cc0c9..660e13ae4 100644 --- a/skyvern/forge/sdk/log_artifacts.py +++ b/skyvern/forge/sdk/log_artifacts.py @@ -104,7 +104,6 @@ async def _save_log_artifacts( if log_artifact: - print("log_artifact", log_artifact) await app.ARTIFACT_MANAGER.update_artifact_data( artifact_id=log_artifact.artifact_id, organization_id=organization_id, @@ -112,7 +111,6 @@ async def _save_log_artifacts( primary_key=primary_key_from_log_entity_type(log_entity_type), ) else: - print("log_artifact not found, creating new one") await app.ARTIFACT_MANAGER.create_log_artifact( organization_id=organization_id, step_id=step_id, From 72b3f15ef708bbd097f63ee81aa7502ea3f27a32 Mon Sep 17 00:00:00 2001 From: Maksim Ivanov Date: Fri, 13 Dec 2024 15:46:03 +0100 Subject: [PATCH 34/40] Record workflow run logs --- skyvern/forge/sdk/artifact/manager.py | 1 + skyvern/forge/sdk/db/client.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/skyvern/forge/sdk/artifact/manager.py b/skyvern/forge/sdk/artifact/manager.py index 95fa0ad4b..17b12d576 100644 --- a/skyvern/forge/sdk/artifact/manager.py +++ b/skyvern/forge/sdk/artifact/manager.py @@ -105,6 +105,7 @@ async def create_log_artifact( ) -> str: artifact_id = generate_artifact_id() uri = app.STORAGE.build_log_uri(log_entity_type, log_entity_id, artifact_type) + print("--- [create_log_artifact]", log_entity_type, log_entity_id, artifact_id, uri) return await self._create_artifact( aio_task_primary_key=log_entity_id, artifact_id=artifact_id, diff --git a/skyvern/forge/sdk/db/client.py b/skyvern/forge/sdk/db/client.py index 4039e8e39..6243729c4 100644 --- a/skyvern/forge/sdk/db/client.py +++ b/skyvern/forge/sdk/db/client.py @@ -75,6 +75,7 @@ ) from skyvern.webeye.actions.actions import Action from skyvern.webeye.actions.models import AgentStepOutput +from skyvern.forge.sdk.log_artifacts import save_workflow_run_logs LOG = structlog.get_logger() @@ -1252,6 +1253,7 @@ async def update_workflow_run( workflow_run.failure_reason = failure_reason await session.commit() await session.refresh(workflow_run) + await save_workflow_run_logs(workflow_run_id) return convert_to_workflow_run(workflow_run) LOG.error( "WorkflowRun not found, nothing to update", From aac781feaf7a31acedaabe8360d01a37229c4fd4 Mon Sep 17 00:00:00 2001 From: Maksim Ivanov Date: Fri, 13 Dec 2024 16:49:11 +0100 Subject: [PATCH 35/40] Save task logs --- skyvern/forge/agent.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py index fe8720483..d3eab0334 100644 --- a/skyvern/forge/agent.py +++ b/skyvern/forge/agent.py @@ -45,7 +45,7 @@ from skyvern.forge.async_operations import AgentPhase, AsyncOperationPool from skyvern.forge.prompts import prompt_engine from skyvern.forge.sdk.api.files import get_path_for_workflow_download_directory, list_files_in_directory, rename_file -from skyvern.forge.sdk.artifact.models import ArtifactType, LogEntityType +from skyvern.forge.sdk.artifact.models import ArtifactType from skyvern.forge.sdk.core import skyvern_context from skyvern.forge.sdk.core.security import generate_skyvern_signature from skyvern.forge.sdk.core.validators import prepend_scheme_and_validate_url @@ -74,7 +74,7 @@ from skyvern.webeye.scraper.scraper import ElementTreeFormat, ScrapedPage, scrape_website from skyvern.webeye.utils.page import SkyvernFrame -from skyvern.forge.sdk.log_artifacts import save_step_logs +from skyvern.forge.sdk.log_artifacts import save_step_logs, save_task_logs LOG = structlog.get_logger() @@ -1819,6 +1819,7 @@ async def update_task( for key, value in updates.items() if getattr(task, key) != value } + await save_task_logs(task.task_id) LOG.info("Updating task in db", task_id=task.task_id, diff=update_comparison) return await app.DATABASE.update_task( task.task_id, From a86d81dcfbee2cfbd8f44c5e841995b09a9e0c09 Mon Sep 17 00:00:00 2001 From: Maksim Ivanov Date: Fri, 13 Dec 2024 16:58:00 +0100 Subject: [PATCH 36/40] Remove print --- skyvern/forge/sdk/artifact/manager.py | 1 - 1 file changed, 1 deletion(-) diff --git a/skyvern/forge/sdk/artifact/manager.py b/skyvern/forge/sdk/artifact/manager.py index 17b12d576..95fa0ad4b 100644 --- a/skyvern/forge/sdk/artifact/manager.py +++ b/skyvern/forge/sdk/artifact/manager.py @@ -105,7 +105,6 @@ async def create_log_artifact( ) -> str: artifact_id = generate_artifact_id() uri = app.STORAGE.build_log_uri(log_entity_type, log_entity_id, artifact_type) - print("--- [create_log_artifact]", log_entity_type, log_entity_id, artifact_id, uri) return await self._create_artifact( aio_task_primary_key=log_entity_id, artifact_id=artifact_id, From 727dd7ec6c246d2667af07c99d421d15751ec741 Mon Sep 17 00:00:00 2001 From: Maksim Ivanov Date: Mon, 16 Dec 2024 14:10:43 +0100 Subject: [PATCH 37/40] Revert change to InvalidUrl type --- skyvern/exceptions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skyvern/exceptions.py b/skyvern/exceptions.py index 96e7f8000..522209b75 100644 --- a/skyvern/exceptions.py +++ b/skyvern/exceptions.py @@ -504,7 +504,7 @@ def __init__(self, message: str) -> None: super().__init__(message) -class InvalidUrl(SkyvernException): +class InvalidUrl(SkyvernHTTPException): def __init__(self, url: str) -> None: super().__init__(f"Invalid URL: {url}. Skyvern supports HTTP and HTTPS urls with max 2083 character length.") From 23122812c2c110986993ea1e29b5cbe0cb7a2863 Mon Sep 17 00:00:00 2001 From: Maksim Ivanov Date: Mon, 16 Dec 2024 14:12:46 +0100 Subject: [PATCH 38/40] Add complete_action.action_order back --- skyvern/forge/agent.py | 1 + 1 file changed, 1 insertion(+) diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py index 8a50127dd..0b84d71b2 100644 --- a/skyvern/forge/agent.py +++ b/skyvern/forge/agent.py @@ -945,6 +945,7 @@ async def agent_step( complete_action.task_id = task.task_id complete_action.step_id = step.step_id complete_action.step_order = step.order + complete_action.action_order = len(detailed_agent_step_output.actions_and_results) complete_results = await ActionHandler.handle_action( scraped_page, task, step, working_page, complete_action ) From d66e67b42605e53c8546f94da4aa0472b1cf443e Mon Sep 17 00:00:00 2001 From: Maksim Ivanov Date: Mon, 16 Dec 2024 14:58:36 +0100 Subject: [PATCH 39/40] Add with_skyvern_context decorator --- skyvern/forge/sdk/log_artifacts.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/skyvern/forge/sdk/log_artifacts.py b/skyvern/forge/sdk/log_artifacts.py index 660e13ae4..ccbd1d744 100644 --- a/skyvern/forge/sdk/log_artifacts.py +++ b/skyvern/forge/sdk/log_artifacts.py @@ -1,6 +1,9 @@ import json import structlog +import functools +from typing import Callable + from skyvern.forge import app from skyvern.forge.sdk.core import skyvern_context from skyvern.forge.skyvern_json_encoder import SkyvernJSONLogEncoder @@ -9,6 +12,21 @@ LOG = structlog.get_logger() +def with_skyvern_context(func: Callable): + """ + Decorator to ensure the presence of a Skyvern context for a function. + If no context is available, the function will not execute. + """ + @functools.wraps(func) + async def wrapper(*args, **kwargs): + context = skyvern_context.current() + if not context: + LOG.warning("No Skyvern context found, skipping function execution", func=func.__name__) + return + return await func(*args, **kwargs) + + return wrapper + def primary_key_from_log_entity_type(log_entity_type: LogEntityType) -> str: if log_entity_type == LogEntityType.STEP: return "step_id" @@ -21,6 +39,7 @@ def primary_key_from_log_entity_type(log_entity_type: LogEntityType) -> str: else: raise ValueError(f"Invalid log entity type: {log_entity_type}") +@with_skyvern_context async def save_step_logs(step_id: str) -> None: log = skyvern_context.current().log organization_id = skyvern_context.current().organization_id @@ -36,6 +55,7 @@ async def save_step_logs(step_id: str) -> None: ) +@with_skyvern_context async def save_task_logs(task_id: str) -> None: log = skyvern_context.current().log organization_id = skyvern_context.current().organization_id @@ -51,6 +71,7 @@ async def save_task_logs(task_id: str) -> None: ) +@with_skyvern_context async def save_workflow_run_logs(workflow_run_id: str) -> None: log = skyvern_context.current().log organization_id = skyvern_context.current().organization_id @@ -66,6 +87,7 @@ async def save_workflow_run_logs(workflow_run_id: str) -> None: ) +@with_skyvern_context async def save_workflow_run_block_logs(workflow_run_block_id: str) -> None: log = skyvern_context.current().log organization_id = skyvern_context.current().organization_id From 63829fa06e0f390e58aff1052f2d8cb01fefdc52 Mon Sep 17 00:00:00 2001 From: Shuchang Zheng Date: Mon, 16 Dec 2024 20:00:28 -0800 Subject: [PATCH 40/40] address typing fix --- skyvern/forge/agent.py | 3 +- skyvern/forge/sdk/artifact/manager.py | 14 +----- skyvern/forge/sdk/db/client.py | 6 +-- skyvern/forge/sdk/log_artifacts.py | 55 ++++++++-------------- skyvern/forge/sdk/routes/agent_protocol.py | 12 +++-- skyvern/forge/skyvern_json_encoder.py | 37 +++++++-------- skyvern/forge/skyvern_log_encoder.py | 45 +++++++++--------- 7 files changed, 71 insertions(+), 101 deletions(-) diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py index 0b84d71b2..707c060cd 100644 --- a/skyvern/forge/agent.py +++ b/skyvern/forge/agent.py @@ -50,6 +50,7 @@ from skyvern.forge.sdk.core.security import generate_skyvern_signature from skyvern.forge.sdk.core.validators import prepend_scheme_and_validate_url from skyvern.forge.sdk.db.enums import TaskType +from skyvern.forge.sdk.log_artifacts import save_step_logs, save_task_logs from skyvern.forge.sdk.models import Step, StepStatus from skyvern.forge.sdk.schemas.organizations import Organization from skyvern.forge.sdk.schemas.tasks import Task, TaskRequest, TaskResponse, TaskStatus @@ -74,8 +75,6 @@ from skyvern.webeye.scraper.scraper import ElementTreeFormat, ScrapedPage, scrape_website from skyvern.webeye.utils.page import SkyvernFrame -from skyvern.forge.sdk.log_artifacts import save_step_logs, save_task_logs - LOG = structlog.get_logger() diff --git a/skyvern/forge/sdk/artifact/manager.py b/skyvern/forge/sdk/artifact/manager.py index 95fa0ad4b..68ed10e56 100644 --- a/skyvern/forge/sdk/artifact/manager.py +++ b/skyvern/forge/sdk/artifact/manager.py @@ -1,7 +1,6 @@ import asyncio import time from collections import defaultdict -from typing import Literal import structlog @@ -13,14 +12,6 @@ LOG = structlog.get_logger(__name__) -PRIMARY_KEY = Literal[ - "task_id", - "observer_thought_id", - "observer_cruise_id", - "step_id", - "workflow_run_id", - "workflow_run_block_id", -] class ArtifactManager: # task_id -> list of aio_tasks for uploading artifacts @@ -117,7 +108,7 @@ async def create_log_artifact( organization_id=organization_id, data=data, path=path, - ) + ) async def create_observer_thought_artifact( self, @@ -211,7 +202,7 @@ async def update_artifact_data( artifact_id: str | None, organization_id: str | None, data: bytes, - primary_key: PRIMARY_KEY = "task_id", + primary_key: str = "task_id", ) -> None: if not artifact_id or not organization_id: return None @@ -225,7 +216,6 @@ async def update_artifact_data( raise ValueError(f"{primary_key} is required to update artifact data.") self.upload_aiotasks_map[artifact[primary_key]].append(aio_task) - async def retrieve_artifact(self, artifact: Artifact) -> bytes | None: return await app.STORAGE.retrieve_artifact(artifact) diff --git a/skyvern/forge/sdk/db/client.py b/skyvern/forge/sdk/db/client.py index 5f1ec2dd4..b8c2c5a6e 100644 --- a/skyvern/forge/sdk/db/client.py +++ b/skyvern/forge/sdk/db/client.py @@ -51,6 +51,7 @@ convert_to_workflow_run_output_parameter, convert_to_workflow_run_parameter, ) +from skyvern.forge.sdk.log_artifacts import save_workflow_run_logs from skyvern.forge.sdk.models import Step, StepStatus from skyvern.forge.sdk.schemas.observers import ObserverCruise, ObserverCruiseStatus, ObserverThought from skyvern.forge.sdk.schemas.organizations import Organization, OrganizationAuthToken @@ -75,7 +76,6 @@ ) from skyvern.webeye.actions.actions import Action from skyvern.webeye.actions.models import AgentStepOutput -from skyvern.forge.sdk.log_artifacts import save_workflow_run_logs LOG = structlog.get_logger() @@ -842,9 +842,7 @@ async def get_artifacts_by_entity_id( query = query.filter_by(organization_id=organization_id) query = query.order_by(ArtifactModel.created_at.desc()) - if artifacts := ( - await session.scalars(query) - ).all(): + if artifacts := (await session.scalars(query)).all(): return [convert_to_artifact(artifact, self.debug_enabled) for artifact in artifacts] else: return [] diff --git a/skyvern/forge/sdk/log_artifacts.py b/skyvern/forge/sdk/log_artifacts.py index ccbd1d744..1d555abdc 100644 --- a/skyvern/forge/sdk/log_artifacts.py +++ b/skyvern/forge/sdk/log_artifacts.py @@ -1,31 +1,15 @@ import json -import structlog -import functools -from typing import Callable +import structlog from skyvern.forge import app +from skyvern.forge.sdk.artifact.models import ArtifactType, LogEntityType from skyvern.forge.sdk.core import skyvern_context from skyvern.forge.skyvern_json_encoder import SkyvernJSONLogEncoder from skyvern.forge.skyvern_log_encoder import SkyvernLogEncoder -from skyvern.forge.sdk.artifact.models import ArtifactType, LogEntityType LOG = structlog.get_logger() -def with_skyvern_context(func: Callable): - """ - Decorator to ensure the presence of a Skyvern context for a function. - If no context is available, the function will not execute. - """ - @functools.wraps(func) - async def wrapper(*args, **kwargs): - context = skyvern_context.current() - if not context: - LOG.warning("No Skyvern context found, skipping function execution", func=func.__name__) - return - return await func(*args, **kwargs) - - return wrapper def primary_key_from_log_entity_type(log_entity_type: LogEntityType) -> str: if log_entity_type == LogEntityType.STEP: @@ -39,10 +23,11 @@ def primary_key_from_log_entity_type(log_entity_type: LogEntityType) -> str: else: raise ValueError(f"Invalid log entity type: {log_entity_type}") -@with_skyvern_context + async def save_step_logs(step_id: str) -> None: - log = skyvern_context.current().log - organization_id = skyvern_context.current().organization_id + context = skyvern_context.ensure_context() + log = context.log + organization_id = context.organization_id current_step_log = [entry for entry in log if entry.get("step_id", "") == step_id] @@ -55,10 +40,10 @@ async def save_step_logs(step_id: str) -> None: ) -@with_skyvern_context async def save_task_logs(task_id: str) -> None: - log = skyvern_context.current().log - organization_id = skyvern_context.current().organization_id + context = skyvern_context.ensure_context() + log = context.log + organization_id = context.organization_id current_task_log = [entry for entry in log if entry.get("task_id", "") == task_id] @@ -71,10 +56,10 @@ async def save_task_logs(task_id: str) -> None: ) -@with_skyvern_context async def save_workflow_run_logs(workflow_run_id: str) -> None: - log = skyvern_context.current().log - organization_id = skyvern_context.current().organization_id + context = skyvern_context.ensure_context() + log = context.log + organization_id = context.organization_id current_workflow_run_log = [entry for entry in log if entry.get("workflow_run_id", "") == workflow_run_id] @@ -87,11 +72,13 @@ async def save_workflow_run_logs(workflow_run_id: str) -> None: ) -@with_skyvern_context async def save_workflow_run_block_logs(workflow_run_block_id: str) -> None: - log = skyvern_context.current().log - organization_id = skyvern_context.current().organization_id - current_workflow_run_block_log = [entry for entry in log if entry.get("workflow_run_block_id", "") == workflow_run_block_id] + context = skyvern_context.ensure_context() + log = context.log + organization_id = context.organization_id + current_workflow_run_block_log = [ + entry for entry in log if entry.get("workflow_run_block_id", "") == workflow_run_block_id + ] await _save_log_artifacts( log=current_workflow_run_block_log, @@ -106,7 +93,7 @@ async def _save_log_artifacts( log: list[dict], log_entity_type: LogEntityType, log_entity_id: str, - organization_id: str, + organization_id: str | None, step_id: str | None = None, task_id: str | None = None, workflow_run_id: str | None = None, @@ -124,7 +111,6 @@ async def _save_log_artifacts( organization_id=organization_id, ) - if log_artifact: await app.ARTIFACT_MANAGER.update_artifact_data( artifact_id=log_artifact.artifact_id, @@ -175,7 +161,7 @@ async def _save_log_artifacts( artifact_type=ArtifactType.SKYVERN_LOG, data=formatted_log.encode(), ) - except Exception as e: + except Exception: LOG.error( "Failed to save log artifacts", log_entity_type=log_entity_type, @@ -187,4 +173,3 @@ async def _save_log_artifacts( workflow_run_block_id=workflow_run_block_id, exc_info=True, ) - diff --git a/skyvern/forge/sdk/routes/agent_protocol.py b/skyvern/forge/sdk/routes/agent_protocol.py index 1f185062f..e5ea80577 100644 --- a/skyvern/forge/sdk/routes/agent_protocol.py +++ b/skyvern/forge/sdk/routes/agent_protocol.py @@ -1,8 +1,8 @@ import datetime import hashlib import uuid -from typing import Annotated, Any from enum import Enum +from typing import Annotated, Any import structlog import yaml @@ -474,6 +474,7 @@ class EntityType(str, Enum): WORKFLOW_RUN = "workflow_run" WORKFLOW_RUN_BLOCK = "workflow_run_block" + entity_type_to_param = { EntityType.STEP: "step_id", EntityType.TASK: "task_id", @@ -481,6 +482,7 @@ class EntityType(str, Enum): EntityType.WORKFLOW_RUN_BLOCK: "workflow_run_block_id", } + @base_router.get( "/{entity_type}/{entity_id}/artifacts", tags=["agent"], @@ -499,15 +501,15 @@ async def get_agent_entity_artifacts( ) -> Response: """ Get all artifacts for an entity (step, task, workflow_run). - + Args: entity_type: Type of entity to fetch artifacts for entity_id: ID of the entity current_org: Current organization from auth - + Returns: List of artifacts for the entity - + Raises: HTTPException: If entity is not supported """ @@ -525,7 +527,7 @@ async def get_agent_entity_artifacts( entity_type_to_param[entity_type]: entity_id, } - artifacts = await app.DATABASE.get_artifacts_by_entity_id(**params) + artifacts = await app.DATABASE.get_artifacts_by_entity_id(**params) # type: ignore if settings.ENV != "local" or settings.GENERATE_PRESIGNED_URLS: signed_urls = await app.ARTIFACT_MANAGER.get_share_links(artifacts) diff --git a/skyvern/forge/skyvern_json_encoder.py b/skyvern/forge/skyvern_json_encoder.py index 4103aee1c..206334acb 100644 --- a/skyvern/forge/skyvern_json_encoder.py +++ b/skyvern/forge/skyvern_json_encoder.py @@ -1,29 +1,31 @@ import json from typing import Any + class SkyvernJSONLogEncoder(json.JSONEncoder): """Custom JSON encoder for Skyvern logs that handles non-serializable objects""" + def default(self, obj: Any) -> Any: - if hasattr(obj, 'model_dump'): + if hasattr(obj, "model_dump"): return self._encode_value(obj.model_dump()) - if hasattr(obj, '__dataclass_fields__'): + if hasattr(obj, "__dataclass_fields__"): return self._encode_value({k: getattr(obj, k) for k in obj.__dataclass_fields__}) - if hasattr(obj, 'to_dict'): + if hasattr(obj, "to_dict"): return self._encode_value(obj.to_dict()) - if hasattr(obj, 'asdict'): + if hasattr(obj, "asdict"): return self._encode_value(obj.asdict()) - if hasattr(obj, '__dict__'): + if hasattr(obj, "__dict__"): return { - 'type': obj.__class__.__name__, - 'attributes': { - k: self._encode_value(v) - for k, v in obj.__dict__.items() - if not k.startswith('_') and not callable(v) - } + "type": obj.__class__.__name__, + "attributes": { + k: self._encode_value(v) + for k, v in obj.__dict__.items() + if not k.startswith("_") and not callable(v) + }, } try: @@ -35,20 +37,17 @@ def _encode_value(self, value: Any) -> Any: """Helper method to encode nested values recursively""" if isinstance(value, (str, int, float, bool, type(None))): return value - + if isinstance(value, (list, tuple)): return [self._encode_value(item) for item in value] - + if isinstance(value, dict): - return { - self._encode_value(k): self._encode_value(v) - for k, v in value.items() - } - + return {self._encode_value(k): self._encode_value(v) for k, v in value.items()} + # For any other type, try to encode it using our custom logic return self.default(value) @classmethod - def dumps(cls, obj: Any, **kwargs) -> str: + def dumps(cls, obj: Any, **kwargs: Any) -> str: """Helper method to properly encode objects to JSON string""" return json.dumps(obj, cls=cls, **kwargs) diff --git a/skyvern/forge/skyvern_log_encoder.py b/skyvern/forge/skyvern_log_encoder.py index d90bb177d..9b8dd23e9 100644 --- a/skyvern/forge/skyvern_log_encoder.py +++ b/skyvern/forge/skyvern_log_encoder.py @@ -1,45 +1,47 @@ -from typing import Any, Dict, List -from datetime import datetime import json +from datetime import datetime +from typing import Any -from structlog.dev import ConsoleRenderer import structlog +from structlog.dev import ConsoleRenderer from skyvern.forge.skyvern_json_encoder import SkyvernJSONLogEncoder + LOG = structlog.get_logger() + class SkyvernLogEncoder: """Encodes Skyvern logs from JSON format to human-readable string format""" - def __init__(self): + def __init__(self) -> None: self.renderer = ConsoleRenderer( pad_event=30, colors=False, ) @classmethod - def _format_value(cls, value): + def _format_value(cls, value: Any) -> str: return SkyvernJSONLogEncoder.dumps(value, sort_keys=True) @staticmethod - def _parse_json_entry(entry: Dict[str, Any]) -> Dict[str, Any]: + def _parse_json_entry(entry: dict[str, Any]) -> dict[str, Any]: """Convert a JSON log entry into our standard format.""" - event = entry.get('message', entry.get('event', '')) + event = entry.get("message", entry.get("event", "")) clean_entry = { - 'timestamp': entry.get('timestamp', datetime.utcnow().isoformat() + "Z"), - 'level': entry.get('level', 'info').lower(), - 'event': event + "timestamp": entry.get("timestamp", datetime.utcnow().isoformat() + "Z"), + "level": entry.get("level", "info").lower(), + "event": event, } for key, value in entry.items(): - if key not in ('timestamp', 'level', 'event', 'message'): + if key not in ("timestamp", "level", "event", "message"): clean_entry[key] = SkyvernLogEncoder._format_value(value) return clean_entry @classmethod - def encode(cls, log_entries: List[Dict[str, Any]]) -> str: + def encode(cls, log_entries: list[dict[str, Any]]) -> str: """ Encode log entries into formatted string output using structlog's ConsoleRenderer. @@ -58,7 +60,7 @@ def encode(cls, log_entries: List[Dict[str, Any]]) -> str: try: entry = json.loads(entry) except json.JSONDecodeError: - entry = {'event': entry, 'level': 'info'} + entry = {"event": entry, "level": "info"} parsed_entry = cls._parse_json_entry(entry) @@ -66,20 +68,15 @@ def encode(cls, log_entries: List[Dict[str, Any]]) -> str: formatted_lines.append(formatted_line) except Exception as e: - LOG.error( - "Failed to format log entry", - entry=entry, - error=str(e), - exc_info=True - ) + LOG.error("Failed to format log entry", entry=entry, error=str(e), exc_info=True) # Add error line to output error_timestamp = datetime.utcnow().isoformat() + "Z" error_entry = { - 'timestamp': error_timestamp, - 'level': 'error', - 'event': 'Failed to format log entry', - 'entry': str(entry), - 'error': str(e) + "timestamp": error_timestamp, + "level": "error", + "event": "Failed to format log entry", + "entry": str(entry), + "error": str(e), } formatted_lines.append(encoder.renderer(None, None, error_entry))