diff --git a/components/renku_data_services/crc/models.py b/components/renku_data_services/crc/models.py index 330a3db74..c5eea3417 100644 --- a/components/renku_data_services/crc/models.py +++ b/components/renku_data_services/crc/models.py @@ -92,6 +92,7 @@ class ResourceClass(ResourcesCompareMixin): matching: Optional[bool] = None node_affinities: list[NodeAffinity] = field(default_factory=list) tolerations: list[str] = field(default_factory=list) + quota: str | None = None def __post_init__(self) -> None: if len(self.name) > 40: @@ -109,6 +110,7 @@ def from_dict(cls, data: dict) -> "ResourceClass": """Create the model from a plain dictionary.""" node_affinities: list[NodeAffinity] = [] tolerations: list[str] = [] + quota: str | None = None if data.get("node_affinities"): node_affinities = [ NodeAffinity.from_dict(affinity) if isinstance(affinity, dict) else affinity @@ -116,7 +118,12 @@ def from_dict(cls, data: dict) -> "ResourceClass": ] if isinstance(data.get("tolerations"), list): tolerations = [toleration for toleration in data["tolerations"]] - return cls(**{**data, "tolerations": tolerations, "node_affinities": node_affinities}) + if data_quota := data.get("quota"): + if isinstance(data_quota, str): + quota = data_quota + elif isinstance(data_quota, Quota): + quota = data_quota.id + return cls(**{**data, "tolerations": tolerations, "node_affinities": node_affinities, "quota": quota}) def is_quota_valid(self, quota: "Quota") -> bool: """Determine if a quota is compatible with the resource class.""" diff --git a/components/renku_data_services/crc/orm.py b/components/renku_data_services/crc/orm.py index 08a2ebf9d..3ec19ea35 100644 --- a/components/renku_data_services/crc/orm.py +++ b/components/renku_data_services/crc/orm.py @@ -75,7 +75,9 @@ class ResourceClassORM(BaseORM): resource_pool_id: Mapped[Optional[int]] = mapped_column( ForeignKey("resource_pools.id", ondelete="CASCADE"), default=None, index=True ) - resource_pool: Mapped[Optional["ResourcePoolORM"]] = relationship(back_populates="classes", default=None) + resource_pool: Mapped[Optional["ResourcePoolORM"]] = relationship( + back_populates="classes", default=None, lazy="joined" + ) id: Mapped[int] = mapped_column(Integer, Identity(always=True), primary_key=True, default=None, init=False) tolerations: Mapped[list["TolerationORM"]] = relationship( back_populates="resource_class", @@ -127,6 +129,7 @@ def dump(self, matching_criteria: models.ResourceClass | None = None) -> models. node_affinities=[affinity.dump() for affinity in self.node_affinities], tolerations=[toleration.key for toleration in self.tolerations], matching=matching, + quota=self.resource_pool.quota if self.resource_pool else None, ) diff --git a/components/renku_data_services/notebooks/blueprints.py b/components/renku_data_services/notebooks/blueprints.py index cd6551dba..77bdc0669 100644 --- a/components/renku_data_services/notebooks/blueprints.py +++ b/components/renku_data_services/notebooks/blueprints.py @@ -291,9 +291,18 @@ async def _handler( default_resource_class = await self.rp_repo.get_default_resource_class() if default_resource_class.id is None: raise errors.ProgrammingError(message="The default resource class has to have an ID", quiet=True) - resource_class_id = body.resource_class_id or default_resource_class.id + resource_class_id: int + quota: str | None = None + if body.resource_class_id is None: + resource_class = await self.rp_repo.get_default_resource_class() + # TODO: Add types for saved and unsaved resource class + resource_class_id = cast(int, resource_class.id) + else: + resource_class = await self.rp_repo.get_resource_class(user, resource_class_id) + # TODO: Add types for saved and unsaved resource class + resource_class_id = cast(int, resource_class.id) + quota = resource_class.quota await self.nb_config.crc_validator.validate_class_storage(user, resource_class_id, body.disk_storage) - resource_class = await self.rp_repo.get_resource_class(user, resource_class_id) work_dir_fallback = PurePosixPath("/home/jovyan") work_dir = environment.working_directory or image_workdir or work_dir_fallback storage_mount_fallback = work_dir / "work" @@ -411,6 +420,7 @@ async def _handler( ) base_server_url = self.nb_config.sessions.ingress.base_url(server_name) + base_server_https_url = self.nb_config.sessions.ingress.base_url(server_name, force_https=True) base_server_path = self.nb_config.sessions.ingress.base_path(server_name) ui_path: str = ( f"{base_server_path.rstrip("/")}/{environment.default_url.lstrip("/")}" @@ -426,9 +436,11 @@ async def _handler( "cpu": str(round(resource_class.cpu * 1000)) + "m", "memory": f"{resource_class.memory}Gi", } + limits: dict[str, str | int] = {} if resource_class.gpu > 0: gpu_name = GpuKind.NVIDIA.value + "/gpu" requests[gpu_name] = resource_class.gpu + limits[gpu_name] = resource_class.gpu tolerations = [ Toleration.model_validate(toleration) for toleration in self.nb_config.sessions.tolerations ] + tolerations_from_resource_class(resource_class) @@ -444,6 +456,7 @@ async def _handler( codeRepositories=[], hibernated=False, reconcileStrategy=ReconcileStrategy.whenFailedOrHibernated, + priorityClassName=quota, session=Session( image=image, urlPath=ui_path, @@ -458,7 +471,7 @@ async def _handler( workingDir=work_dir.as_posix(), runAsUser=environment.uid, runAsGroup=environment.gid, - resources=Resources(requests=requests), + resources=Resources(requests=requests, limits=limits if len(limits) > 0 else None), extraVolumeMounts=[], command=environment.command, args=environment.args, @@ -521,7 +534,8 @@ async def _handler( "oidc_issuer_url": self.nb_config.sessions.oidc.issuer_url, "session_cookie_minimal": True, "skip_provider_button": True, - "redirect_url": urljoin(base_server_url + "/", "oauth2/callback"), + # NOTE: If the redirect url is not HTTPS then some or identity providers will fail. + "redirect_url": urljoin(base_server_https_url + "/", "oauth2/callback"), "cookie_path": base_server_path, "proxy_prefix": parsed_proxy_url.path, "authenticated_emails_file": "/authorized_emails", diff --git a/components/renku_data_services/notebooks/config/dynamic.py b/components/renku_data_services/notebooks/config/dynamic.py index 81719dde9..f5129c219 100644 --- a/components/renku_data_services/notebooks/config/dynamic.py +++ b/components/renku_data_services/notebooks/config/dynamic.py @@ -261,8 +261,10 @@ def from_env(cls) -> Self: def base_path(self, server_name: str) -> str: return f"/sessions/{server_name}" - def base_url(self, server_name: str) -> str: + def base_url(self, server_name: str, force_https: bool = False) -> str: scheme = "https" if self.tls_secret else "http" + if force_https: + scheme = "https" return urlunparse((scheme, self.host, self.base_path(server_name), None, None, None)) diff --git a/components/renku_data_services/notebooks/cr_amalthea_session.py b/components/renku_data_services/notebooks/cr_amalthea_session.py index bc8606cfa..67b265f6d 100644 --- a/components/renku_data_services/notebooks/cr_amalthea_session.py +++ b/components/renku_data_services/notebooks/cr_amalthea_session.py @@ -1,6 +1,6 @@ # generated by datamodel-codegen: # filename: -# timestamp: 2024-11-13T08:52:55+00:00 +# timestamp: 2024-11-29T10:46:16+00:00 from __future__ import annotations @@ -3072,6 +3072,19 @@ class EnvItem2(BaseCRD): ) +class Type3(Enum): + none = "none" + tcp = "tcp" + http = "http" + + +class ReadinessProbe2(BaseCRD): + model_config = ConfigDict( + extra="allow", + ) + type: Type3 = Field(default="tcp", description="The type of readiness probe") + + class Resources3(BaseCRD): model_config = ConfigDict( extra="allow", @@ -3125,6 +3138,9 @@ class Session(BaseCRD): gt=0, lt=65535, ) + readinessProbe: ReadinessProbe2 = Field( + default={}, description="The readiness probe to use on the session container" + ) resources: Optional[Resources3] = Field( default=None, description="Resource requirements and limits in the same format as a Pod in Kubernetes", @@ -3221,6 +3237,10 @@ class Spec(BaseCRD): default=None, description="Selector which must match a node's labels for the pod to be scheduled on that node.\nPassed right through to the Statefulset used for the session.", ) + priorityClassName: Optional[str] = Field( + default=None, + description="The name of the priority class assigned to the session Pod.", + ) reconcileStrategy: ReconcileStrategy = Field( default="always", description="Indicates how Amalthea should reconcile the child resources for a session. This can be problematic because\nnewer versions of Amalthea may include new versions of the sidecars or other changes not reflected\nin the AmaltheaSession CRD, so simply updating Amalthea could cause existing sessions to restart\nbecause the sidecars will have a newer image or for other reasons because the code changed.\nHibernating the session and deleting it will always work as expected regardless of the strategy.\nThe status of the session and all hibernation or auto-cleanup functionality will always work as expected.\nA few values are possible:\n- never: Amalthea will never update any of the child resources and will ignore any changes to the CR\n- always: This is the expected method of operation for an operator, changes to the spec are always reconciled\n- whenHibernatedOrFailed: To avoid interrupting a running session, reconciliation of the child components\n are only done when the session has a Failed or Hibernated status", diff --git a/test/utils.py b/test/utils.py index cd3536cee..4407959f9 100644 --- a/test/utils.py +++ b/test/utils.py @@ -327,11 +327,15 @@ def remove_id_from_rc(rc: rp_models.ResourceClass) -> rp_models.ResourceClass: return rp_models.ResourceClass.from_dict(kwargs) +def remove_quota_from_rc(rc: rp_models.ResourceClass) -> rp_models.ResourceClass: + return rc.update(quota=None) + + def remove_id_from_rp(rp: rp_models.ResourcePool) -> rp_models.ResourcePool: quota = rp.quota if isinstance(quota, rp_models.Quota): quota = remove_id_from_quota(quota) - classes = [remove_id_from_rc(rc) for rc in rp.classes] + classes = [remove_quota_from_rc(remove_id_from_rc(rc)) for rc in rp.classes] return rp_models.ResourcePool( name=rp.name, id=None,