API: refactor and fix get_result(wait=True)

The previous `GET /dispatches/{dispatch_id}` endpoint was trying to do too much. Its responsibilities are now separated into two endpoints: * `GET /dispatches`: bulk query dispatch summaries (including status) with options to filter by `dispatch_id`, sort chronologically, and also limit the output to status only. * `GET /dispatches/{dispatch_id}`: download manifest To achieve the desired behavior of `get_result(id, wait=True)`, the client 1. Polls the dispatch status by querying the first endpoint. 2. Downloads the manifest after the dispatch has reached a final status. The server no longer returns 503 errors when the dispatch is not yet "ready". A 503 status code is not entirely accurate here because it is intended to convey temporary service unavailablity resulting from server overload or rate limiting. However, the fact that the workflow is still running does not indicate any fault of the server. These changes will allow `get_result(dispatch_id, wait=True)` to wait as long as required instead of erroring out after some time. Supporting improvements: DAL: Add sorting and pagination to Controller DAL: improve bulk get when retrieving only some columns Directly select the specified columns instead of retrieving the whole ORM entities and deferring column loading using load_only
AgnostiqHQ · Jun 14, 2024 · dd00785 · dd00785
1 parent 215d8d3
commit dd00785
Show file tree

Hide file tree

Showing 14 changed files with 321 additions and 409 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -39,6 +39,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - Improved handling of Covalent version mismatches between client and
   executor environments
+- `get_result(wait=True)` will wait as long as needed
 
 ### Removed
 

diff --git a/covalent/_dispatcher_plugins/local.py b/covalent/_dispatcher_plugins/local.py
@@ -129,8 +129,6 @@ def dispatch(
             Wrapper function which takes the inputs of the workflow as arguments
         """
 
-        multistage = get_config("sdk.multistage_dispatch") == "true"
-
         # Extract triggers here
         if "triggers" in orig_lattice.metadata:
             triggers_data = orig_lattice.metadata.pop("triggers")
@@ -155,14 +153,7 @@ def wrapper(*args, **kwargs) -> str:
                 The dispatch id of the workflow.
             """
 
-            if multistage:
-                dispatch_id = LocalDispatcher.register(orig_lattice, dispatcher_addr)(
-                    *args, **kwargs
-                )
-            else:
-                dispatch_id = LocalDispatcher.submit(orig_lattice, dispatcher_addr)(
-                    *args, **kwargs
-                )
+            dispatch_id = LocalDispatcher.register(orig_lattice, dispatcher_addr)(*args, **kwargs)
 
             if triggers_data:
                 LocalDispatcher.register_triggers(triggers_data, dispatch_id)
@@ -237,61 +228,6 @@ def wrapper(*args, **kwargs) -> str:
 
         return wrapper
 
-    @staticmethod
-    def submit(
-        orig_lattice: Lattice,
-        dispatcher_addr: str = None,
-    ) -> Callable:
-        """
-        Wrapping the dispatching functionality to allow input passing
-        and server address specification.
-
-        Afterwards, send the lattice to the dispatcher server and return
-        the assigned dispatch id.
-
-        Args:
-            orig_lattice: The lattice/workflow to send to the dispatcher server.
-            dispatcher_addr: The address of the dispatcher server.  If None then then defaults to the address set in Covalent's config.
-
-        Returns:
-            Wrapper function which takes the inputs of the workflow as arguments
-        """
-
-        if dispatcher_addr is None:
-            dispatcher_addr = format_server_url()
-
-        @wraps(orig_lattice)
-        def wrapper(*args, **kwargs) -> str:
-            """
-            Send the lattice to the dispatcher server and return
-            the assigned dispatch id.
-
-            Args:
-                *args: The inputs of the workflow.
-                **kwargs: The keyword arguments of the workflow.
-
-            Returns:
-                The dispatch id of the workflow.
-            """
-
-            if not isinstance(orig_lattice, Lattice):
-                message = f"Dispatcher expected a Lattice, received {type(orig_lattice)} instead."
-                app_log.error(message)
-                raise TypeError(message)
-
-            lattice = deepcopy(orig_lattice)
-
-            lattice.build_graph(*args, **kwargs)
-
-            # Serialize the transport graph to JSON
-            json_lattice = lattice.serialize_to_json()
-            endpoint = "/api/v2/dispatches/submit"
-            r = APIClient(dispatcher_addr).post(endpoint, data=json_lattice)
-            r.raise_for_status()
-            return r.content.decode("utf-8").strip().replace('"', "")
-
-        return wrapper
-
     @staticmethod
     def start(
         dispatch_id: str,

diff --git a/covalent/_results_manager/results_manager.py b/covalent/_results_manager/results_manager.py
@@ -19,12 +19,11 @@
 
 import contextlib
 import os
+import time
 from pathlib import Path
-from typing import Dict, List, Optional
+from typing import List, Optional
 
 from furl import furl
-from requests.adapters import HTTPAdapter
-from urllib3.util import Retry
 
 from .._api.apiclient import CovalentAPIClient
 from .._serialize.common import load_asset
@@ -40,9 +39,9 @@
 from .._shared_files.exceptions import MissingLatticeRecordError
 from .._shared_files.schemas.asset import AssetSchema
 from .._shared_files.schemas.result import ResultSchema
+from .._shared_files.util_classes import RESULT_STATUS, Status
 from .._shared_files.utils import copy_file_locally, format_server_url
 from .result import Result
-from .wait import EXTREME
 
 app_log = logger.app_log
 log_stack_info = logger.log_stack_info
@@ -139,12 +138,20 @@ def cancel(dispatch_id: str, task_ids: List[int] = None, dispatcher_addr: str =
 # Multi-part
 
 
+def _query_dispatch_status(dispatch_id: str, api_client: CovalentAPIClient):
+    endpoint = "/api/v2/dispatches"
+    resp = api_client.get(endpoint, params={"dispatch_id": dispatch_id, "status_only": True})
+    resp.raise_for_status()
+    dispatches = resp.json()["dispatches"]
+    if len(dispatches) == 0:
+        raise MissingLatticeRecordError
+
+    return dispatches[0]["status"]
+
+
 def _get_result_export_from_dispatcher(
-    dispatch_id: str,
-    wait: bool = False,
-    status_only: bool = False,
-    dispatcher_addr: str = None,
-) -> Dict:
+    dispatch_id: str, api_client: CovalentAPIClient
+) -> ResultSchema:
     """
     Internal function to get the results of a dispatch from the server without checking if it is ready to read.
 
@@ -161,24 +168,13 @@ def _get_result_export_from_dispatcher(
         MissingLatticeRecordError: If the result is not found.
     """
 
-    if dispatcher_addr is None:
-        dispatcher_addr = format_server_url()
-
-    retries = int(EXTREME) if wait else 5
-
-    adapter = HTTPAdapter(max_retries=Retry(total=retries, backoff_factor=1))
-    api_client = CovalentAPIClient(dispatcher_addr, adapter=adapter, auto_raise=False)
-
     endpoint = f"/api/v2/dispatches/{dispatch_id}"
-    response = api_client.get(
-        endpoint,
-        params={"wait": wait, "status_only": status_only},
-    )
+    response = api_client.get(endpoint)
     if response.status_code == 404:
         raise MissingLatticeRecordError
     response.raise_for_status()
     export = response.json()
-    return export
+    return ResultSchema.model_validate(export)
 
 
 # Function to download default assets
@@ -346,11 +342,17 @@ def from_dispatch_id(
         wait: bool = False,
         dispatcher_addr: str = None,
     ) -> "ResultManager":
-        export = _get_result_export_from_dispatcher(
-            dispatch_id, wait, status_only=False, dispatcher_addr=dispatcher_addr
-        )
+        if dispatcher_addr is None:
+            dispatcher_addr = format_server_url()
 
-        manifest = ResultSchema.model_validate(export["result_export"])
+        api_client = CovalentAPIClient(dispatcher_addr)
+        if wait:
+            status = Status(_query_dispatch_status(dispatch_id, api_client))
+            while not RESULT_STATUS.is_terminal(status):
+                time.sleep(1)
+                status = Status(_query_dispatch_status(dispatch_id, api_client))
+
+        manifest = _get_result_export_from_dispatcher(dispatch_id, api_client)
 
         # sort the nodes
         manifest.lattice.transport_graph.nodes.sort(key=lambda x: x.id)
@@ -408,14 +410,15 @@ def _get_result_multistage(
 
     """
 
+    if dispatcher_addr is None:
+        dispatcher_addr = format_server_url()
+
+    api_client = CovalentAPIClient(dispatcher_addr)
     try:
         if status_only:
-            return _get_result_export_from_dispatcher(
-                dispatch_id=dispatch_id,
-                wait=wait,
-                status_only=status_only,
-                dispatcher_addr=dispatcher_addr,
-            )
+            status = _query_dispatch_status(dispatch_id, api_client)
+            return {"id": dispatch_id, "status": status}
+
         rm = get_result_manager(dispatch_id, results_dir, wait, dispatcher_addr)
         _get_default_assets(rm)
 
@@ -496,23 +499,14 @@ def get_result(
         The Result object from the Covalent server
 
     """
-    max_attempts = int(os.getenv("COVALENT_GET_RESULT_RETRIES", 10))
-    num_attempts = 0
-    while num_attempts < max_attempts:
-        try:
-            return _get_result_multistage(
-                dispatch_id=dispatch_id,
-                wait=wait,
-                dispatcher_addr=dispatcher_addr,
-                status_only=status_only,
-                results_dir=results_dir,
-                workflow_output=workflow_output,
-                intermediate_outputs=intermediate_outputs,
-                sublattice_results=sublattice_results,
-                qelectron_db=qelectron_db,
-            )
-
-        except RecursionError as re:
-            app_log.error(re)
-            num_attempts += 1
-    raise RuntimeError("Timed out waiting for result. Please retry or check dispatch.")
+    return _get_result_multistage(
+        dispatch_id=dispatch_id,
+        wait=wait,
+        dispatcher_addr=dispatcher_addr,
+        status_only=status_only,
+        results_dir=results_dir,
+        workflow_output=workflow_output,
+        intermediate_outputs=intermediate_outputs,
+        sublattice_results=sublattice_results,
+        qelectron_db=qelectron_db,
+    )
diff --git a/covalent/_shared_files/defaults.py b/covalent/_shared_files/defaults.py
@@ -67,9 +67,6 @@ def get_default_sdk_config():
             + "/covalent/dispatches"
         ),
         "task_packing": "true" if os.environ.get("COVALENT_ENABLE_TASK_PACKING") else "false",
-        "multistage_dispatch": (
-            "false" if os.environ.get("COVALENT_DISABLE_MULTISTAGE_DISPATCH") == "1" else "true"
-        ),
         "results_dir": os.environ.get(
             "COVALENT_RESULTS_DIR"
         )  # COVALENT_RESULTS_DIR is where the client downloads workflow artifacts during get_result() which is different from COVALENT_DATA_DIR

diff --git a/covalent/triggers/base.py b/covalent/triggers/base.py
@@ -15,8 +15,6 @@
 # limitations under the License.
 
 
-import asyncio
-import json
 from abc import abstractmethod
 
 import requests
@@ -108,17 +106,12 @@ def _get_status(self) -> Status:
         """
 
         if self.use_internal_funcs:
-            from covalent_dispatcher._service.app import export_result
+            from covalent_dispatcher._service.app import get_dispatches_bulk
 
-            response = asyncio.run_coroutine_threadsafe(
-                export_result(self.lattice_dispatch_id, status_only=True),
-                self.event_loop,
-            ).result()
-
-            if isinstance(response, dict):
-                return response["status"]
-
-            return json.loads(response.body.decode()).get("status")
+            response = get_dispatches_bulk(
+                dispatch_id=[self.lattice_dispatch_id], status_only=True
+            )
+            return response.dispatches[0].status
 
         from .. import get_result
 

diff --git a/covalent_dispatcher/_dal/controller.py b/covalent_dispatcher/_dal/controller.py
@@ -17,10 +17,12 @@
 
 from __future__ import annotations
 
-from typing import Generic, Type, TypeVar
+from typing import Generic, List, Optional, Sequence, Type, TypeVar, Union
 
 from sqlalchemy import select, update
-from sqlalchemy.orm import Session, load_only
+from sqlalchemy.engine import Row
+from sqlalchemy.orm import Session
+from sqlalchemy.sql.expression import Select, desc
 
 from .._db import models
 
@@ -50,11 +52,16 @@ def get(
         cls,
         session: Session,
         *,
+        stmt: Optional[Select] = None,
         fields: list,
         equality_filters: dict,
         membership_filters: dict,
         for_update: bool = False,
-    ):
+        sort_fields: List[str] = [],
+        reverse: bool = True,
+        offset: int = 0,
+        max_items: Optional[int] = None,
+    ) -> Union[Sequence[Row], Sequence[T]]:
         """Bulk ORM-enabled SELECT.
 
         Args:
@@ -64,19 +71,40 @@ def get(
             membership_filters: Dict{field_name: value_list}
             for_update: Whether to lock the selected rows
 
+        Returns:
+            A list of SQLAlchemy Rows or whole ORM entities depending
+        on whether only a subset of fields is specified.
+
         """
-        stmt = select(cls.model)
+        if stmt is None:
+            if len(fields) > 0:
+                entities = [getattr(cls.model, attr) for attr in fields]
+                stmt = select(*entities)
+            else:
+                stmt = select(cls.model)
+
         for attr, val in equality_filters.items():
             stmt = stmt.where(getattr(cls.model, attr) == val)
         for attr, vals in membership_filters.items():
             stmt = stmt.where(getattr(cls.model, attr).in_(vals))
-        if len(fields) > 0:
-            attrs = [getattr(cls.model, f) for f in fields]
-            stmt = stmt.options(load_only(*attrs))
         if for_update:
             stmt = stmt.with_for_update()
-
-        return session.scalars(stmt).all()
+        for attr in sort_fields:
+            if reverse:
+                stmt = stmt.order_by(desc(getattr(cls.model, attr)))
+            else:
+                stmt = stmt.order_by(getattr(cls.model, attr))
+
+        stmt = stmt.offset(offset)
+        if max_items:
+            stmt = stmt.limit(max_items)
+
+        if len(fields) == 0:
+            # Return whole ORM entities
+            return session.scalars(stmt).all()
+        else:
+            # Return a named tuple containing the selected cols
+            return session.execute(stmt).all()
 
     @classmethod
     def get_by_primary_key(