Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Record logs into step artifacts #1339

Merged
merged 46 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from 39 commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
79e5763
Record log entries into context
satansdeer Dec 6, 2024
c3cbf4b
Add log field to context
satansdeer Dec 6, 2024
f3debf5
Add SKYVERN_LOG artifact type
satansdeer Dec 6, 2024
c1e7ff6
Add skyvern log tab on the frontend
satansdeer Dec 6, 2024
b1f14c6
Add SkyvernLogEncoder to handle non-serializable objects
satansdeer Dec 6, 2024
b2cb9c4
Record logs to an artifact
satansdeer Dec 6, 2024
eb6fc3d
Record only logs corresponding to a step
satansdeer Dec 6, 2024
57cd772
Recover empty line
satansdeer Dec 6, 2024
419e12b
Add text log encoder
satansdeer Dec 9, 2024
ddbc340
re-enable upload block (#1324)
wintonzheng Dec 5, 2024
f313326
remove no latest screenshot error log (#1325)
LawyZheng Dec 5, 2024
9b35651
Put a guard in workflow save error detail (#1326)
wintonzheng Dec 5, 2024
2d90fed
urlencode download suffix (#1327)
LawyZheng Dec 5, 2024
be0e817
wait for downloads to be done (#1328)
LawyZheng Dec 5, 2024
5ee8c3b
Skyvern Forms UI (#1330)
wintonzheng Dec 5, 2024
f8a6b58
Fix a navigation bug with saved tasks (#1331)
wintonzheng Dec 5, 2024
8bf863c
workflow run block (#1332)
wintonzheng Dec 6, 2024
5d32d53
forloop metadata variables (#1334)
LawyZheng Dec 6, 2024
3c37a4a
auto prepend scheme to url (#1335)
LawyZheng Dec 6, 2024
1aa1146
rename GEMINI_FLUSH->GEMINI_FLASH (#1333)
nmfisher Dec 6, 2024
f3bb2fe
bump navigation max retry to 5 (#1336)
LawyZheng Dec 6, 2024
6606fba
add workflow_run_id column to artifacts + ObserverCruise and Observer…
wintonzheng Dec 6, 2024
aabc0fc
add observer cruise id to artifacts table (#1337)
wintonzheng Dec 6, 2024
b83c648
ObserverThought reproduce migration script (#1338)
wintonzheng Dec 6, 2024
40e9dd3
Import structlog
satansdeer Dec 11, 2024
5c8488d
Define build_log_uri method
satansdeer Dec 12, 2024
127b8a6
Merge main
satansdeer Dec 12, 2024
4742fc4
Extract save logs functions
satansdeer Dec 13, 2024
974b37e
Pass step_id to log artifact methods
satansdeer Dec 13, 2024
d48f792
Define get_artifact_by_entity_id
satansdeer Dec 13, 2024
f57c7b6
Reuse log artifacts when saving
satansdeer Dec 13, 2024
c8274c3
Introduce get artifacts by entity id handler
satansdeer Dec 13, 2024
3ff49cb
Get step artifacts using entity id handler
satansdeer Dec 13, 2024
c3ce7d3
Remove logs
satansdeer Dec 13, 2024
72b3f15
Record workflow run logs
satansdeer Dec 13, 2024
aac781f
Save task logs
satansdeer Dec 13, 2024
ff86b52
Merge branch 'main' into maksimi/record-logs
satansdeer Dec 13, 2024
a86d81d
Remove print
satansdeer Dec 13, 2024
0f0633f
Merge main
satansdeer Dec 16, 2024
727dd7e
Revert change to InvalidUrl type
satansdeer Dec 16, 2024
2312281
Add complete_action.action_order back
satansdeer Dec 16, 2024
d66e67b
Add with_skyvern_context decorator
satansdeer Dec 16, 2024
2cf5f69
Merge branch 'main' into shu/maksimi/record-logs-new
wintonzheng Dec 17, 2024
63829fa
address typing
wintonzheng Dec 17, 2024
9eda7ca
Add ENABLE_LOG_ARTIFACTS env variable; Disable log artifacts by default
satansdeer Dec 17, 2024
631f898
Pre-commit check
satansdeer Dec 17, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions skyvern-frontend/src/api/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ export const ArtifactType = {
LLMPrompt: "llm_prompt",
LLMRequest: "llm_request",
HTMLScrape: "html_scrape",
SkyvernLog: "skyvern_log",
SkyvernLogRaw: "skyvern_log_raw",
} as const;

export type ArtifactType = (typeof ArtifactType)[keyof typeof ArtifactType];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import {
} from "@radix-ui/react-icons";
import { useQueryClient } from "@tanstack/react-query";
import { ReactNode, useRef } from "react";
import { useParams } from "react-router-dom";
import { ActionTypePill } from "./ActionTypePill";

type Props = {
Expand All @@ -33,7 +32,6 @@ function ScrollableActionList({
showStreamOption,
taskDetails,
}: Props) {
const { taskId } = useParams();
const queryClient = useQueryClient();
const credentialGetter = useCredentialGetter();
const refs = useRef<Array<HTMLDivElement | null>>(
Expand Down Expand Up @@ -65,11 +63,11 @@ function ScrollableActionList({
onClick={() => onActiveIndexChange(i)}
onMouseEnter={() => {
queryClient.prefetchQuery({
queryKey: ["task", taskId, "steps", action.stepId, "artifacts"],
queryKey: ["step", action.stepId, "artifacts"],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@msalihaltun can you review?

queryFn: async () => {
const client = await getClient(credentialGetter);
return client
.get(`/tasks/${taskId}/steps/${action.stepId}/artifacts`)
.get(`/step/${action.stepId}/artifacts`)
.then((response) => response.data);
},
});
Expand Down
15 changes: 11 additions & 4 deletions skyvern-frontend/src/routes/tasks/detail/StepArtifacts.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import {
import { StatusBadge } from "@/components/StatusBadge";
import { Label } from "@/components/ui/label";
import { useQuery } from "@tanstack/react-query";
import { useParams, useSearchParams } from "react-router-dom";
import { useSearchParams } from "react-router-dom";
import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs";
import { ZoomableImage } from "@/components/ZoomableImage";
import { Skeleton } from "@/components/ui/skeleton";
Expand All @@ -25,19 +25,18 @@ type Props = {
function StepArtifacts({ id, stepProps }: Props) {
const [searchParams, setSearchParams] = useSearchParams();
const artifact = searchParams.get("artifact") ?? "info";
const { taskId } = useParams();
const credentialGetter = useCredentialGetter();
const {
data: artifacts,
isFetching,
isError,
error,
} = useQuery<Array<ArtifactApiResponse>>({
queryKey: ["task", taskId, "steps", id, "artifacts"],
queryKey: ["step", id, "artifacts"],
queryFn: async () => {
const client = await getClient(credentialGetter);
return client
.get(`/tasks/${taskId}/steps/${id}/artifacts`)
.get(`/step/${id}/artifacts`)
.then((response) => response.data);
},
});
Expand Down Expand Up @@ -79,6 +78,10 @@ function StepArtifacts({ id, stepProps }: Props) {
(artifact) => artifact.artifact_type === ArtifactType.HTMLScrape,
);

const skyvernLog = artifacts?.filter(
(artifact) => artifact.artifact_type === ArtifactType.SkyvernLog,
);

return (
<Tabs
value={artifact}
Expand Down Expand Up @@ -108,6 +111,7 @@ function StepArtifacts({ id, stepProps }: Props) {
<TabsTrigger value="llm_response_parsed">Action List</TabsTrigger>
<TabsTrigger value="html_raw">HTML (Raw)</TabsTrigger>
<TabsTrigger value="llm_request">LLM Request (Raw)</TabsTrigger>
<TabsTrigger value="skyvern_log">Skyvern Log</TabsTrigger>
</TabsList>
<TabsContent value="info">
<div className="flex flex-col gap-6 p-4">
Expand Down Expand Up @@ -209,6 +213,9 @@ function StepArtifacts({ id, stepProps }: Props) {
<TabsContent value="llm_request">
{llmRequest ? <Artifact type="json" artifacts={llmRequest} /> : null}
</TabsContent>
<TabsContent value="skyvern_log">
{skyvernLog ? <Artifact type="text" artifacts={skyvernLog} /> : null}
</TabsContent>
</Tabs>
);
}
Expand Down
2 changes: 1 addition & 1 deletion skyvern/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,7 +504,7 @@ def __init__(self, message: str) -> None:
super().__init__(message)


class InvalidUrl(SkyvernHTTPException):
class InvalidUrl(SkyvernException):
satansdeer marked this conversation as resolved.
Show resolved Hide resolved
def __init__(self, url: str) -> None:
super().__init__(f"Invalid URL: {url}. Skyvern supports HTTP and HTTPS urls with max 2083 character length.")

Expand Down
7 changes: 6 additions & 1 deletion skyvern/forge/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@
from skyvern.webeye.scraper.scraper import ElementTreeFormat, ScrapedPage, scrape_website
from skyvern.webeye.utils.page import SkyvernFrame

from skyvern.forge.sdk.log_artifacts import save_step_logs, save_task_logs

LOG = structlog.get_logger()


Expand Down Expand Up @@ -943,7 +945,6 @@ async def agent_step(
complete_action.task_id = task.task_id
complete_action.step_id = step.step_id
complete_action.step_order = step.order
complete_action.action_order = len(detailed_agent_step_output.actions_and_results)
satansdeer marked this conversation as resolved.
Show resolved Hide resolved
complete_results = await ActionHandler.handle_action(
scraped_page, task, step, working_page, complete_action
)
Expand Down Expand Up @@ -1783,6 +1784,9 @@ async def update_step(
step_id=step.step_id,
diff=update_comparison,
)

await save_step_logs(step.step_id)

return await app.DATABASE.update_step(
task_id=step.task_id,
step_id=step.step_id,
Expand Down Expand Up @@ -1815,6 +1819,7 @@ async def update_task(
for key, value in updates.items()
if getattr(task, key) != value
}
await save_task_logs(task.task_id)
LOG.info("Updating task in db", task_id=task.task_id, diff=update_comparison)
return await app.DATABASE.update_task(
task.task_id,
Expand Down
58 changes: 44 additions & 14 deletions skyvern/forge/sdk/artifact/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,21 @@
import structlog

from skyvern.forge import app
from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType
from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType, LogEntityType
from skyvern.forge.sdk.db.id import generate_artifact_id
from skyvern.forge.sdk.models import Step
from skyvern.forge.sdk.schemas.observers import ObserverCruise, ObserverThought

LOG = structlog.get_logger(__name__)

PRIMARY_KEY = Literal[
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@LawyZheng we're introducing new primary keys for artifacts

Now i think we need to ensure those aio tasks for new primary keys (workflow_run_id, step_id) will be awaited at cleanup time.

"task_id",
"observer_thought_id",
"observer_cruise_id",
"step_id",
"workflow_run_id",
"workflow_run_block_id",
]

class ArtifactManager:
# task_id -> list of aio_tasks for uploading artifacts
Expand Down Expand Up @@ -82,6 +90,35 @@ async def create_artifact(
path=path,
)

async def create_log_artifact(
self,
log_entity_type: LogEntityType,
log_entity_id: str,
artifact_type: ArtifactType,
step_id: str | None = None,
task_id: str | None = None,
workflow_run_id: str | None = None,
workflow_run_block_id: str | None = None,
organization_id: str | None = None,
data: bytes | None = None,
path: str | None = None,
) -> str:
artifact_id = generate_artifact_id()
uri = app.STORAGE.build_log_uri(log_entity_type, log_entity_id, artifact_type)
return await self._create_artifact(
aio_task_primary_key=log_entity_id,
artifact_id=artifact_id,
artifact_type=artifact_type,
uri=uri,
step_id=step_id,
task_id=task_id,
workflow_run_id=workflow_run_id,
workflow_run_block_id=workflow_run_block_id,
organization_id=organization_id,
data=data,
path=path,
)

async def create_observer_thought_artifact(
self,
observer_thought: ObserverThought,
Expand Down Expand Up @@ -174,7 +211,7 @@ async def update_artifact_data(
artifact_id: str | None,
organization_id: str | None,
data: bytes,
primary_key: Literal["task_id", "observer_thought_id", "observer_cruise_id"] = "task_id",
primary_key: PRIMARY_KEY = "task_id",
) -> None:
if not artifact_id or not organization_id:
return None
Expand All @@ -183,18 +220,11 @@ async def update_artifact_data(
return
# Fire and forget
aio_task = asyncio.create_task(app.STORAGE.store_artifact(artifact, data))
if primary_key == "task_id":
if not artifact.task_id:
raise ValueError("Task ID is required to update artifact data.")
self.upload_aiotasks_map[artifact.task_id].append(aio_task)
elif primary_key == "observer_thought_id":
if not artifact.observer_thought_id:
raise ValueError("Observer Thought ID is required to update artifact data.")
self.upload_aiotasks_map[artifact.observer_thought_id].append(aio_task)
elif primary_key == "observer_cruise_id":
if not artifact.observer_cruise_id:
raise ValueError("Observer Cruise ID is required to update artifact data.")
self.upload_aiotasks_map[artifact.observer_cruise_id].append(aio_task)

if not artifact[primary_key]:
raise ValueError(f"{primary_key} is required to update artifact data.")
self.upload_aiotasks_map[artifact[primary_key]].append(aio_task)


async def retrieve_artifact(self, artifact: Artifact) -> bytes | None:
return await app.STORAGE.retrieve_artifact(artifact)
Expand Down
14 changes: 14 additions & 0 deletions skyvern/forge/sdk/artifact/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from datetime import datetime
from enum import StrEnum
from typing import Any

from pydantic import BaseModel, Field, field_serializer

Expand All @@ -10,6 +11,9 @@ class ArtifactType(StrEnum):
RECORDING = "recording"
BROWSER_CONSOLE_LOG = "browser_console_log"

SKYVERN_LOG = "skyvern_log"
SKYVERN_LOG_RAW = "skyvern_log_raw"

# DEPRECATED. pls use SCREENSHOT_LLM, SCREENSHOT_ACTION or SCREENSHOT_FINAL
SCREENSHOT = "screenshot"

Expand Down Expand Up @@ -70,3 +74,13 @@ def serialize_datetime_to_isoformat(self, value: datetime) -> str:
observer_thought_id: str | None = None
signed_url: str | None = None
organization_id: str | None = None

def __getitem__(self, key: str) -> Any:
return getattr(self, key)


class LogEntityType(StrEnum):
STEP = "step"
TASK = "task"
WORKFLOW_RUN = "workflow_run"
WORKFLOW_RUN_BLOCK = "workflow_run_block"
8 changes: 7 additions & 1 deletion skyvern/forge/sdk/artifact/storage/base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from abc import ABC, abstractmethod

from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType
from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType, LogEntityType
from skyvern.forge.sdk.models import Step
from skyvern.forge.sdk.schemas.observers import ObserverCruise, ObserverThought

Expand All @@ -11,6 +11,8 @@
ArtifactType.SCREENSHOT_LLM: "png",
ArtifactType.SCREENSHOT_ACTION: "png",
ArtifactType.SCREENSHOT_FINAL: "png",
ArtifactType.SKYVERN_LOG: "log",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Step log..

Task log..

Workflow log...

Copy link
Contributor

@wintonzheng wintonzheng Dec 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i don't think adding STEP_LOG, TASK_LOG, WORKFLOW_LOG helps.

It's better to use step_id, task_id and workflow_run_id columns to filter artifacts. Also, step log and task log are not mutually exclusive. TaskLogs should be the super set of StepLogs. WorkflowRunLogs should be the super set of TaskLogs. We don't have to create multiple sets of log artifacts that have duplicated data.

Copy link
Contributor

@suchintan suchintan Dec 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure if I agree with this -- why not have multiple artifact types that may or may not include duplicate data? It makes the implementation + writing of data + reading of data dead simple.

Why is it better to use step_id / task_id / workflow_run_id columns to filter artifacts w/ the same name + type?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I understand correctly in this case we'll store workflow and task level logs in the step artifact folders

Current implementation of bulid uri always assumes step:

def build_uri(self, artifact_id: str, step: Step, artifact_type: ArtifactType) -> str:
        file_ext = FILE_EXTENTSION_MAP[artifact_type]
        return f"file://{self.artifact_path}/{step.task_id}/{step.order:02d}_{step.retry_index}_{step.step_id}/{datetime.utcnow().isoformat()}_{artifact_id}_{artifact_type}.{file_ext}"

We could define a build uri method for logs that would take entity type and id

So that the uri would look like this:

return f"file://{self.artifact_path}/{entity_type}_{entity_id}/{datetime.utcnow().isoformat()}_{artifact_type}.{file_ext}"

This way we would be able to fetch logs on the frontend using a similar query as for the other artifacts

We would be able to use artifact type for all the levels

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@suchintan no concern of data duplication. you're wright, having more artifact_types makes the implementation easier.

ArtifactType.SKYVERN_LOG_RAW: "json",
ArtifactType.LLM_PROMPT: "txt",
ArtifactType.LLM_REQUEST: "json",
ArtifactType.LLM_RESPONSE: "json",
Expand All @@ -34,6 +36,10 @@ class BaseStorage(ABC):
def build_uri(self, artifact_id: str, step: Step, artifact_type: ArtifactType) -> str:
pass

@abstractmethod
def build_log_uri(self, log_entity_type: LogEntityType, log_entity_id: str, artifact_type: ArtifactType) -> str:
pass

@abstractmethod
def build_observer_thought_uri(
self, artifact_id: str, observer_thought: ObserverThought, artifact_type: ArtifactType
Expand Down
6 changes: 5 additions & 1 deletion skyvern/forge/sdk/artifact/storage/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from skyvern.config import settings
from skyvern.forge.sdk.api.files import get_download_dir, get_skyvern_temp_dir
from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType
from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType, LogEntityType
from skyvern.forge.sdk.artifact.storage.base import FILE_EXTENTSION_MAP, BaseStorage
from skyvern.forge.sdk.models import Step
from skyvern.forge.sdk.schemas.observers import ObserverCruise, ObserverThought
Expand All @@ -24,6 +24,10 @@ def build_uri(self, artifact_id: str, step: Step, artifact_type: ArtifactType) -
file_ext = FILE_EXTENTSION_MAP[artifact_type]
return f"file://{self.artifact_path}/{step.task_id}/{step.order:02d}_{step.retry_index}_{step.step_id}/{datetime.utcnow().isoformat()}_{artifact_id}_{artifact_type}.{file_ext}"

def build_log_uri(self, log_entity_type: LogEntityType, log_entity_id: str, artifact_type: ArtifactType) -> str:
file_ext = FILE_EXTENTSION_MAP[artifact_type]
return f"file://{self.artifact_path}/logs/{log_entity_type}/{log_entity_id}/{datetime.utcnow().isoformat()}_{artifact_type}.{file_ext}"

def build_observer_thought_uri(
self, artifact_id: str, observer_thought: ObserverThought, artifact_type: ArtifactType
) -> str:
Expand Down
6 changes: 5 additions & 1 deletion skyvern/forge/sdk/artifact/storage/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
make_temp_directory,
unzip_files,
)
from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType
from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType, LogEntityType
from skyvern.forge.sdk.artifact.storage.base import FILE_EXTENTSION_MAP, BaseStorage
from skyvern.forge.sdk.models import Step
from skyvern.forge.sdk.schemas.observers import ObserverCruise, ObserverThought
Expand All @@ -27,6 +27,10 @@ def build_uri(self, artifact_id: str, step: Step, artifact_type: ArtifactType) -
file_ext = FILE_EXTENTSION_MAP[artifact_type]
return f"s3://{self.bucket}/{settings.ENV}/{step.task_id}/{step.order:02d}_{step.retry_index}_{step.step_id}/{datetime.utcnow().isoformat()}_{artifact_id}_{artifact_type}.{file_ext}"

def build_log_uri(self, log_entity_type: LogEntityType, log_entity_id: str, artifact_type: ArtifactType) -> str:
file_ext = FILE_EXTENTSION_MAP[artifact_type]
return f"s3://{self.bucket}/{settings.ENV}/logs/{log_entity_type}/{log_entity_id}/{datetime.utcnow().isoformat()}_{artifact_type}.{file_ext}"

def build_observer_thought_uri(
self, artifact_id: str, observer_thought: ObserverThought, artifact_type: ArtifactType
) -> str:
Expand Down
1 change: 1 addition & 0 deletions skyvern/forge/sdk/core/skyvern_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ class SkyvernContext:
max_steps_override: int | None = None
tz_info: ZoneInfo | None = None
totp_codes: dict[str, str | None] = field(default_factory=dict)
log: list[dict] = field(default_factory=list)

def __repr__(self) -> str:
return f"SkyvernContext(request_id={self.request_id}, organization_id={self.organization_id}, task_id={self.task_id}, workflow_id={self.workflow_id}, workflow_run_id={self.workflow_run_id}, max_steps_override={self.max_steps_override})"
Expand Down
Loading