Skip to content

Commit

Permalink
support aws s3 storage (#720)
Browse files Browse the repository at this point in the history
  • Loading branch information
wintonzheng authored Aug 29, 2024
1 parent 11811e7 commit ffc4b35
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 0 deletions.
10 changes: 10 additions & 0 deletions docs/running-tasks/visualizing-results.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,13 @@ The following endpoint can be used to retrieve artifacts for a specific step:
```
GET https://api.skyvern.com/api/v1/tasks/{task_id}/steps/{step_id}/artifacts
```

## Artifacts configurations
By default, Skyvern stores artifacts, including video recording, screenshots, llm requests and responses, html and skyvern parsed html elements locally in the `/artifacts` folder under the skyvern repository.
You can also have skyvern to upload atrifacts to your s3 buckets. To do this, first set up these environment variables:
- `AWS_DEFAULT_REGION`: `us-east-1`, `us-west-1`, ...
- `AWS_ACCESS_KEY_ID`
- `AWS_SECRET_ACCESS_KEY`
- `SKYVERN_STORAGE_TYPE`: set it to be `s3`. The default is `local`

Make sure these s3 buckets are created: `skyvern-artifacts`, `skyvern-screenshots`. These are the default bucket names skyvern uses. To customize the bucket names, change these two env variables: `AWS_S3_BUCKET_ARTIFACTS` and `AWS_S3_BUCKET_SCREENSHOTS`
5 changes: 5 additions & 0 deletions skyvern/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ class Settings(BaseSettings):
# Artifact storage settings
ARTIFACT_STORAGE_PATH: str = f"{SKYVERN_DIR}/artifacts"
GENERATE_PRESIGNED_URLS: bool = False
AWS_S3_BUCKET_ARTIFACTS: str = "skyvern-artifacts"
AWS_S3_BUCKET_SCREENSHOTS: str = "skyvern-screenshots"

# Supported storage types: local, s3
SKYVERN_STORAGE_TYPE: str = "local"

# S3 bucket settings
AWS_REGION: str = "us-east-1"
Expand Down
3 changes: 3 additions & 0 deletions skyvern/forge/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from skyvern.forge.sdk.api.llm.api_handler_factory import LLMAPIHandlerFactory
from skyvern.forge.sdk.artifact.manager import ArtifactManager
from skyvern.forge.sdk.artifact.storage.factory import StorageFactory
from skyvern.forge.sdk.artifact.storage.s3 import S3Storage
from skyvern.forge.sdk.cache.factory import CacheFactory
from skyvern.forge.sdk.db.client import AgentDB
from skyvern.forge.sdk.experimentation.providers import BaseExperimentationProvider, NoOpExperimentationProvider
Expand All @@ -22,6 +23,8 @@
SettingsManager.get_settings().DATABASE_STRING,
debug_enabled=SettingsManager.get_settings().DEBUG_MODE,
)
if SettingsManager.get_settings().SKYVERN_STORAGE_TYPE == "s3":
StorageFactory.set_storage(S3Storage())
STORAGE = StorageFactory.get_storage()
CACHE = CacheFactory.get_cache()
ARTIFACT_MANAGER = ArtifactManager()
Expand Down
42 changes: 42 additions & 0 deletions skyvern/forge/sdk/artifact/storage/s3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from datetime import datetime

from skyvern.config import settings
from skyvern.forge.sdk.api.aws import AsyncAWSClient
from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType
from skyvern.forge.sdk.artifact.storage.base import FILE_EXTENTSION_MAP, BaseStorage
from skyvern.forge.sdk.models import Step


class S3Storage(BaseStorage):
def __init__(self, bucket: str | None = None) -> None:
self.async_client = AsyncAWSClient()
self.bucket = bucket or settings.AWS_S3_BUCKET_ARTIFACTS

def build_uri(self, artifact_id: str, step: Step, artifact_type: ArtifactType) -> str:
file_ext = FILE_EXTENTSION_MAP[artifact_type]
return f"s3://{self.bucket}/{settings.ENV}/{step.task_id}/{step.order:02d}_{step.retry_index}_{step.step_id}/{datetime.utcnow().isoformat()}_{artifact_id}_{artifact_type}.{file_ext}"

async def store_artifact(self, artifact: Artifact, data: bytes) -> None:
await self.async_client.upload_file(artifact.uri, data)

async def retrieve_artifact(self, artifact: Artifact) -> bytes | None:
return await self.async_client.download_file(artifact.uri)

async def get_share_link(self, artifact: Artifact) -> str | None:
share_urls = await self.async_client.create_presigned_urls([artifact.uri])
return share_urls[0] if share_urls else None

async def get_share_links(self, artifacts: list[Artifact]) -> list[str] | None:
return await self.async_client.create_presigned_urls([artifact.uri for artifact in artifacts])

async def store_artifact_from_path(self, artifact: Artifact, path: str) -> None:
await self.async_client.upload_file_from_path(artifact.uri, path)

async def save_streaming_file(self, organization_id: str, file_name: str) -> None:
from_path = f"{settings.STREAMING_FILE_BASE_PATH}/{organization_id}/{file_name}"
to_path = f"s3://{settings.AWS_S3_BUCKET_SCREENSHOTS}/{settings.ENV}/{organization_id}/{file_name}"
await self.async_client.upload_file_from_path(to_path, from_path)

async def get_streaming_file(self, organization_id: str, file_name: str, use_default: bool = True) -> bytes | None:
path = f"s3://{settings.AWS_S3_BUCKET_SCREENSHOTS}/{settings.ENV}/{organization_id}/{file_name}"
return await self.async_client.download_file(path, log_exception=False)

0 comments on commit ffc4b35

Please sign in to comment.