adding ability for APIRequest to retry and save to a file

langflow-ai · Nov 19, 2024 · a5d5082 · a5d5082
1 parent 6133fed
commit a5d5082
Show file tree

Hide file tree

Showing 2 changed files with 139 additions and 8 deletions.
diff --git a/src/backend/base/langflow/components/data/api_request.py b/src/backend/base/langflow/components/data/api_request.py
@@ -1,14 +1,21 @@
 import asyncio
 import json
+import mimetypes
+import re
+import tempfile
+from datetime import datetime
+from zoneinfo import ZoneInfo
+from pathlib import Path
 from typing import Any
 from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
 
 import httpx
+import validators
 from loguru import logger
 
 from langflow.base.curl.parse import parse_context
 from langflow.custom import Component
-from langflow.io import DataInput, DropdownInput, IntInput, MessageTextInput, NestedDictInput, Output
+from langflow.io import BoolInput, DataInput, DropdownInput, IntInput, MessageTextInput, NestedDictInput, Output
 from langflow.schema import Data
 from langflow.schema.dotdict import dotdict
 
@@ -73,6 +80,20 @@ class APIRequestComponent(Component):
             value=5,
             info="The timeout to use for the request.",
         ),
+        BoolInput(
+            name="follow_redirects",
+            display_name="Follow Redirects",
+            value=True,
+            info="Whether to follow http redirects.",
+            advanced=True,
+        ),
+        BoolInput(
+            name="save_to_file",
+            display_name="Save to File",
+            value=False,
+            info="Save the API response to a temporary file",
+            advanced=True,
+        ),
     ]
 
     outputs = [
@@ -113,6 +134,9 @@ async def make_request(
         headers: dict | None = None,
         body: dict | None = None,
         timeout: int = 5,
+        *, 
+        follow_redirects: bool = True,
+        save_to_file: bool = False,
     ) -> Data:
         method = method.upper()
         if method not in {"GET", "POST", "PATCH", "PUT", "DELETE"}:
@@ -129,20 +153,58 @@ async def make_request(
                 raise ValueError(msg) from e
 
         data = body or None
+        redirection_history = []
 
         try:
-            response = await client.request(method, url, headers=headers, json=data, timeout=timeout)
-            try:
-                result = response.json()
-            except Exception:  # noqa: BLE001
-                logger.opt(exception=True).debug("Error decoding JSON response")
-                result = response.text
+            response = await client.request(
+                method,
+                url,
+                headers=headers,
+                json=data,
+                timeout=timeout,
+                follow_redirects=follow_redirects,
+            )
+
+            redirection_history = [
+                {"url": str(redirect.url), "status_code": redirect.status_code} for redirect in response.history
+            ]
+
+            if response.is_redirect:
+                redirection_history.append({"url": str(response.url), "status_code": response.status_code})
+
+            is_binary, file_path = self._response_info(response, with_file_path=save_to_file)
+
+            if save_to_file:
+                mode = "wb" if is_binary else "w"
+                encoding = response.encoding if mode == "w" else None
+                with file_path.open(mode, encoding=encoding) as f:
+                    f.write(response.content if is_binary else response.text)
+
+                return Data(
+                    data={
+                        "source": url,
+                        "headers": headers,
+                        "status_code": response.status_code,
+                        "file_path": str(file_path),
+                        **({"redirection_history": redirection_history} if redirection_history else {}),
+                    },
+                )
+            # Populate result when not saving to a file
+            if is_binary:
+                result = response.content
+            else:
+                try:
+                    result = response.json()
+                except Exception:  # noqa: BLE001
+                    logger.opt(exception=True).debug("Error decoding JSON response")
+                    result = response.text
             return Data(
                 data={
                     "source": url,
                     "headers": headers,
                     "status_code": response.status_code,
                     "result": result,
+                    **({"redirection_history": redirection_history} if redirection_history else {}),
                 },
             )
         except httpx.TimeoutException:
@@ -162,6 +224,7 @@ async def make_request(
                     "headers": headers,
                     "status_code": 500,
                     "error": str(exc),
+                    **({"redirection_history": redirection_history} if redirection_history else {}),
                 },
             )
 
@@ -179,6 +242,13 @@ async def make_requests(self) -> list[Data]:
         headers = self.headers or {}
         body = self.body or {}
         timeout = self.timeout
+        follow_redirects = self.follow_redirects
+        save_to_file = self.save_to_file
+
+        invalid_urls = [url for url in urls if not validators.url(url)]
+        if invalid_urls:
+            msg = f"Invalid URLs provided: {invalid_urls}"
+            raise ValueError(msg)
 
         if isinstance(self.query_params, str):
             query_params = dict(parse_qsl(self.query_params))
@@ -201,9 +271,69 @@ async def make_requests(self) -> list[Data]:
         async with httpx.AsyncClient() as client:
             results = await asyncio.gather(
                 *[
-                    self.make_request(client, method, u, headers, rec, timeout)
+                    self.make_request(client, method, u, headers, rec, timeout, follow_redirects=follow_redirects, save_to_file=save_to_file)
                     for u, rec in zip(urls, bodies, strict=True)
                 ]
             )
         self.status = results
         return results
+
+    def _response_info(self, response: httpx.Response, *, with_file_path: bool = False) -> tuple[bool, Path | None]:
+        """Determine the file path and whether the response content is binary.
+
+        Args:
+            response (Response): The HTTP response object.
+            with_file_path (bool): Whether to save the response content to a file.
+
+        Returns:
+            Tuple[bool, Path | None]:
+                A tuple containing a boolean indicating if the content is binary and the full file path (if applicable).
+        """
+        # Determine if the content is binary
+        content_type = response.headers.get("Content-Type", "")
+        is_binary = "application/octet-stream" in content_type or "application/binary" in content_type
+
+        if not with_file_path:
+            return is_binary, None
+
+        # Step 1: Set up a subdirectory for the component in the OS temp directory
+        component_temp_dir = Path(tempfile.gettempdir()) / self.__class__.__name__
+        component_temp_dir.mkdir(parents=True, exist_ok=True)
+
+        # Step 2: Extract filename from Content-Disposition
+        filename = None
+        if "Content-Disposition" in response.headers:
+            content_disposition = response.headers["Content-Disposition"]
+            filename_match = re.search(r'filename="(.+?)"', content_disposition)
+            if not filename_match:  # Try to match RFC 5987 style
+                filename_match = re.search(r"filename\*=(?:UTF-8'')?(.+)", content_disposition)
+            if filename_match:
+                extracted_filename = filename_match.group(1)
+                # Ensure the filename is unique
+                if (component_temp_dir / extracted_filename).exists():
+                    timestamp = datetime.now(ZoneInfo("UTC")).strftime("%Y%m%d%H%M%S%f")
+                    filename = f"{timestamp}-{extracted_filename}"
+                else:
+                    filename = extracted_filename
+
+        # Step 3: Infer file extension or use part of the request URL if no filename
+        if not filename:
+            # Extract the last segment of the URL path
+            url_path = urlparse(response.request.url).path
+            base_name = Path(url_path).name  # Get the last segment of the path
+            if not base_name:  # If the path ends with a slash or is empty
+                base_name = "response"
+
+            # Infer file extension
+            extension = mimetypes.guess_extension(content_type.split(";")[0]) if content_type else None
+            if not extension:
+                extension = ".bin" if is_binary else ".txt"  # Default extensions
+
+            # Combine the base name with timestamp and extension
+            timestamp = datetime.now(ZoneInfo("UTC")).strftime("%Y%m%d%H%M%S%f")
+            filename = f"{timestamp}-{base_name}{extension}"
+
+        # Step 4: Define the full file path
+        file_path = component_temp_dir / filename
+
+        return is_binary, file_path
diff --git a/src/backend/base/pyproject.toml b/src/backend/base/pyproject.toml
@@ -162,6 +162,7 @@ dependencies = [
     "fastapi-pagination>=0.12.29",
     "defusedxml>=0.7.1",
     "pypdf~=5.1.0",
+    "validators>=0.34.0",
 ]
 
 [project.urls]