adds commit classification method to llm_service

SAP · Jul 11, 2024 · aded174 · aded174
1 parent 5646e02
commit aded174
Show file tree

Hide file tree

Showing 3 changed files with 56 additions and 4 deletions.
diff --git a/prospector/llm/llm_service.py b/prospector/llm/llm_service.py
@@ -3,9 +3,11 @@
 import validators
 from langchain_core.language_models.llms import LLM
 from langchain_core.output_parsers import StrOutputParser
+from requests import HTTPError
 
 from llm.instantiation import create_model_instance
-from llm.prompts import prompt_best_guess
+from llm.prompts.classify_commit import zero_shot as cc_zero_shot
+from llm.prompts.get_repository_url import prompt_best_guess
 from log.logger import logger
 from util.config_parser import LLMServiceConfig
 from util.singleton import Singleton
@@ -74,3 +76,53 @@ def get_repository_url(self, advisory_description, advisory_references) -> str:
             raise RuntimeError(f"Prompt-model chain could not be invoked: {e}")
 
         return url
+
+    def classify_commit(
+        self, diff: str, repository_name: str, commit_message: str
+    ) -> bool:
+        """Ask an LLM whether a commit is security relevant or not. The response will be either True or False.
+
+        Args:
+            candidate (Commit): The commit to input into the LLM
+
+        Returns:
+            True if the commit is deemed security relevant, False if not.
+
+        Raises:
+            ValueError if there is an error in the model invocation or the response was not valid.
+        """
+        try:
+            chain = cc_zero_shot | self.model | StrOutputParser()
+
+            is_relevant = chain.invoke(
+                {
+                    "diff": diff,
+                    "repository_name": repository_name,
+                    "commit_message": commit_message,
+                }
+            )
+            logger.info(f"LLM returned is_relevant={is_relevant}")
+
+        except HTTPError as e:
+            # if the diff is too big, a 400 error is returned -> silently ignore by returning False for this commit
+            status_code = e.response.status_code
+            if status_code == 400:
+                return False
+            raise RuntimeError(f"Prompt-model chain could not be invoked: {e}")
+        except Exception as e:
+            raise RuntimeError(f"Prompt-model chain could not be invoked: {e}")
+
+        if is_relevant in [
+            "True",
+            "ANSWER:True",
+            "```ANSWER:True```",
+        ]:
+            return True
+        elif is_relevant in [
+            "False",
+            "ANSWER:False",
+            "```ANSWER:False```",
+        ]:
+            return False
+        else:
+            raise RuntimeError(f"The model returned an invalid response: {is_relevant}")
diff --git a/prospector/llm/prompts/classify_commit.py b/prospector/llm/prompts/classify_commit.py
@@ -2,8 +2,8 @@
 
 zero_shot = PromptTemplate.from_template(
     """Is the following commit security relevant or not?
-Please provide the output as a boolean value: ```ANSWER:```
-If it is security relevant just answer ```ANSWER:True``` otherwise answer ```ANSWER:False```.
+Please provide the output as a boolean value, either True or False.
+If it is security relevant just answer True otherwise answer False. Do not return anything else.
 
 To provide you with some context, the name of the repository is: {repository_name}, and the
 commit message is: {commit_message}.
@@ -12,5 +12,5 @@
 {diff}\n
 
 
-```ANSWER: ```\n"""
+Your answer:\n"""
 )
diff --git a/prospector/llm/prompts.py → prospector/llm/prompts/get_repository_url.py b/prospector/llm/prompts.py → prospector/llm/prompts/get_repository_url.py