From 392da375cb9d2809b523df0f4d29628162a303c9 Mon Sep 17 00:00:00 2001 From: supercoder-dev Date: Fri, 5 Jul 2024 13:38:08 +0530 Subject: [PATCH] Add markdown() method to Response class --- curl_cffi/requests/models.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/curl_cffi/requests/models.py b/curl_cffi/requests/models.py index e1af07b..9127337 100644 --- a/curl_cffi/requests/models.py +++ b/curl_cffi/requests/models.py @@ -270,6 +270,33 @@ async def aclose(self): if self.astream_task: await self.astream_task + + def markdown(self) -> str: + """ + Extract markdown text from the response content. + + Returns: + str: The markdown text extracted from the response content. + + Raises: + ImportError: If the required dependencies for markdown extraction are not installed. + """ + try: + from bs4 import BeautifulSoup + except ImportError: + raise ImportError("BeautifulSoup is required for markdown extraction. Install it using 'pip install beautifulsoup4'.") + + try: + import html2text + except ImportError: + raise ImportError("html2text is required for markdown extraction. Install it using 'pip install html2text'.") + + soup = BeautifulSoup(self.content, 'html.parser') + text_maker = html2text.HTML2Text() + text_maker.ignore_links = True + return text_maker.handle(str(soup)) + + # It prints the status code of the response instead of # the object's memory location. def __repr__(self) -> str: