Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Basic Archive management #70

Merged
merged 24 commits into from
Aug 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
4cf8991
use ArchiveStatus in Archive.status instead of str
rgaudin Jun 25, 2024
35458a7
replacing -dev containers in dev with reload compose
rgaudin Jun 25, 2024
dfa1475
Typo fix
rgaudin Aug 21, 2024
2963269
Fixed logging ; allowing DEBUG calls (enabled for reload compose)
rgaudin Aug 21, 2024
fd57682
Fixed zimfarm call syntax
rgaudin Aug 21, 2024
00a4cc1
Initial Archive management in UI
rgaudin Aug 21, 2024
d4447b9
return requested_on and completed_on (used in UI)
rgaudin Aug 21, 2024
cf5f22c
record file's sizes sum as archive size on request (until it is compl…
rgaudin Aug 21, 2024
c67ad0a
Better default
rgaudin Aug 21, 2024
6843816
Archives list ; improved metadata edition
rgaudin Aug 21, 2024
b2b3d73
better fallback for DOWNLOAD_URL
rgaudin Aug 22, 2024
9f550a7
zf callback url is to target this backend, not ZF
rgaudin Aug 22, 2024
012a258
Added main-logo support
rgaudin Aug 22, 2024
7137f35
prioritize our tasks on the farm (for now)
rgaudin Aug 23, 2024
31e4e34
fixed JSON formatting
rgaudin Aug 23, 2024
366695d
fixed hook URL
rgaudin Aug 23, 2024
97defb6
allow hook to be called without a user's session
rgaudin Aug 23, 2024
0263bea
simplify ZF hook payload to whats useful
rgaudin Aug 23, 2024
706ed68
Fixed hook and email
rgaudin Aug 23, 2024
b22d7b5
order meta same as in edit
rgaudin Aug 23, 2024
9f23b58
fixed backend lint/check
rgaudin Aug 23, 2024
cfeef65
formatted frontend
rgaudin Aug 23, 2024
0d72189
fixed linting
rgaudin Aug 23, 2024
3de4f75
requesting an archive now requires a payload with email (set or not)
rgaudin Aug 23, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 14 additions & 6 deletions backend/api/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import humanfriendly
from rq import Retry

logging.basicConfig()


def determine_mandatory_environment_variables():
for variable in ("POSTGRES_URI", "S3_URL_WITH_CREDENTIALS", "PRIVATE_SALT"):
Expand Down Expand Up @@ -59,14 +61,16 @@ class BackendConf:

# Deployment
public_url: str = os.getenv("PUBLIC_URL") or "http://localhost"
# /!\ this must match the region/bucket on s3 credentials
download_url: str = (
os.getenv("DOWNLOAD_URL")
or "https://s3.us-west-1.wasabisys.com/org-kiwix-zimit/zim"
or "https://s3.eu-west-2.wasabisys.com/org-kiwix-nautilus"
)
allowed_origins = os.getenv(
"ALLOWED_ORIGINS",
"http://localhost",
).split("|")
debug: bool = bool(os.getenv("DEBUG") or "")

# Zimfarm (3rd party API creating ZIMs and calling back with feedback)
zimfarm_api_url: str = (
Expand All @@ -80,10 +84,16 @@ class BackendConf:
zimfarm_task_cpu: int = int(os.getenv("ZIMFARM_TASK_CPU") or "3")
zimfarm_task_memory: int = 0
zimfarm_task_disk: int = 0
zimfarm_callback_base_url = os.getenv("ZIMFARM_CALLBACK_BASE_URL", "")
zimfarm_callback_base_url = (
os.getenv("ZIMFARM_CALLBACK_BASE_URL") or "https://api.nautilus.openzim.org/v1"
)
zimfarm_callback_token = os.getenv("ZIMFARM_CALLBACK_TOKEN", uuid.uuid4().hex)
zimfarm_task_worker: str = os.getenv("ZIMFARM_TASK_WORKDER") or "-"
zimfarm_task_worker: str = os.getenv("ZIMFARM_TASK_WORKER") or "-"
zimfarm_request_timeout_sec: int = 10
zim_download_url: str = (
os.getenv("ZIM_DOWNLOAD_URL")
or "https://s3.us-west-1.wasabisys.com/org-kiwix-zimit"
)

# Mailgun (3rd party API to send emails)
mailgun_api_url: str = os.getenv("MAILGUN_API_URL") or ""
Expand All @@ -95,6 +105,7 @@ class BackendConf:

def __post_init__(self):
self.logger = logging.getLogger(Path(__file__).parent.name)
self.logger.setLevel(logging.DEBUG if self.debug else logging.INFO)
self.transient_storage_path.mkdir(exist_ok=True)
self.job_retry = Retry(max=self.s3_max_tries, interval=int(self.s3_retry_wait))

Expand All @@ -119,9 +130,6 @@ def __post_init__(self):
os.getenv("ZIMFARM_TASK_DISK") or "200MiB"
)

if not self.zimfarm_callback_base_url:
self.zimfarm_callback_base_url = f"{self.zimfarm_api_url}/requests/hook"


constants = BackendConf()
logger = constants.logger
20 changes: 18 additions & 2 deletions backend/api/database/models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from datetime import datetime
from enum import Enum
from typing import Any, ClassVar, TypeVar
from uuid import UUID

Expand Down Expand Up @@ -36,6 +37,7 @@ class ArchiveConfig(BaseModel):
tags: list[str]
illustration: str
filename: str
main_logo: str | None = None

@classmethod
def init_with(cls: type[T], filename: str, **kwargs) -> T:
Expand All @@ -49,7 +51,8 @@ def init_with(cls: type[T], filename: str, **kwargs) -> T:
def is_ready(self) -> bool:
try:
for key in self.model_fields.keys():
validate_required_values(key.title(), getattr(self, key, ""))
if key != "main_logo":
validate_required_values(key.title(), getattr(self, key, ""))
validate_title("Title", self.title)
validate_description("Description", self.description)
validate_language("Language", self.languages)
Expand All @@ -60,6 +63,18 @@ def is_ready(self) -> bool:
return True


class ArchiveStatus(str, Enum):
# It's in database but not requested and can be modified
PENDING = "PENDING"
# it has been ZF-requested; can not be modified by user,
# awaiting callback from ZimFarm
REQUESTED = "REQUESTED"
# ZimFarm task succeeded, it now has a download_url and filesize
READY = "READY"
# ZimFarm task failed, cant be downloaded
FAILED = "FAILED"


class ArchiveConfigType(types.TypeDecorator):
cache_ok = True
impl = JSONB
Expand Down Expand Up @@ -89,6 +104,7 @@ class Base(MappedAsDataclass, DeclarativeBase):
# timezone below)
type_annotation_map: ClassVar = {
ArchiveConfig: ArchiveConfigType,
ArchiveStatus: String,
dict[str, Any]: JSONB, # transform Python Dict[str, Any] into PostgreSQL JSONB
list[dict[str, Any]]: JSONB,
datetime: DateTime(
Expand Down Expand Up @@ -207,7 +223,7 @@ class Archive(Base):
completed_on: Mapped[datetime | None]
download_url: Mapped[str | None]
collection_json_path: Mapped[str | None]
status: Mapped[str]
status: Mapped[ArchiveStatus]
zimfarm_task_id: Mapped[UUID | None]
email: Mapped[str | None]
config: Mapped[ArchiveConfig]
5 changes: 3 additions & 2 deletions backend/api/email.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from api.database.models import Archive

jinja_env = Environment(
loader=FileSystemLoader("templates"),
loader=FileSystemLoader(Path(__file__).parent.joinpath("templates")),
autoescape=select_autoescape(["html", "txt"]),
)
jinja_env.filters["short_id"] = lambda value: str(value)[:5]
Expand Down Expand Up @@ -69,7 +69,8 @@ def get_context(task: dict[str, Any], archive: Archive):
"""Jinja context dict for email notifications"""
return {
"base_url": constants.public_url,
"download_url": constants.download_url,
"zim_download_url": constants.zim_download_url,
"task": task,
"file": next(iter(task["files"].values())) if task.get("files") else None,
"archive": archive,
}
12 changes: 12 additions & 0 deletions backend/api/routes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,15 @@
if not project:
raise HTTPException(HTTPStatus.NOT_FOUND, f"Project not found: {project_id}")
return project


async def userless_validated_project(
project_id: UUID,
session: Session = Depends(gen_session),
) -> Project:
"""Depends()-able Project from request, ensuring it exists"""
stmt = select(Project).filter_by(id=project_id)
project = session.execute(stmt).scalar()

Check warning on line 64 in backend/api/routes/__init__.py

View check run for this annotation

Codecov / codecov/patch

backend/api/routes/__init__.py#L63-L64

Added lines #L63 - L64 were not covered by tests
if not project:
raise HTTPException(HTTPStatus.NOT_FOUND, f"Project not found: {project_id}")
return project

Check warning on line 67 in backend/api/routes/__init__.py

View check run for this annotation

Codecov / codecov/patch

backend/api/routes/__init__.py#L66-L67

Added lines #L66 - L67 were not covered by tests
Loading
Loading