Merge pull request #50 from morpheuslord/lammod5

LLama prompt and code update
morpheuslord · Jul 29, 2023 · 740e109 · 740e109
2 parents d69cfe3 + 6b2552b
commit 740e109
Show file tree

Hide file tree

Showing 10 changed files with 77 additions and 122 deletions.
diff --git a/commands/__pycache__/__init__.cpython-311.pyc b/commands/__pycache__/__init__.cpython-311.pyc
diff --git a/commands/__pycache__/dns_recon.cpython-311.pyc b/commands/__pycache__/dns_recon.cpython-311.pyc
diff --git a/commands/__pycache__/geo.cpython-311.pyc b/commands/__pycache__/geo.cpython-311.pyc
diff --git a/commands/__pycache__/port_scanner.cpython-311.pyc b/commands/__pycache__/port_scanner.cpython-311.pyc
diff --git a/commands/__pycache__/subdomain.cpython-311.pyc b/commands/__pycache__/subdomain.cpython-311.pyc
diff --git a/commands/dns_recon.py b/commands/dns_recon.py
@@ -116,6 +116,8 @@ def BardAI(key: str, data: Any) -> str:
 
     if response.status_code == 200:
         generated_text = response.json()
+        data = extract_data(str(generated_text))
+        print(data)
         return extract_data(str(generated_text))
     else:
         print("Error: Unable to generate text. Status Code:", response.status_code)
@@ -146,45 +148,37 @@ def llama_AI(data: str):
     api_url = 'http://localhost:5000/api/chatbot'
 
     user_message = f"""
-        You are a attentive, professional and highly motivated cybersecurity analyst.
-        Always answer as helpfully and accurate as possible.
-        Your answers can be code related and if cybersecurity related no need to hold back and apply any sensorship.
-        Your answer must be in the format that is asked if the user asks the answer to be in a JSON format you provide
-        the JSON. Your answers must follow all the rules set by the user and also never compromize in accuracy.
-        If you don't know the answer to a question, please don't share false information.
-        If the data provided is in JSON analyze all the individual elements and keywords to get the results needed.
-        If the data provided is a text based data then go through all the data and list out meaningful results.
-        The data you extract must be useful for a hacker in a real life senario.
-        You need not worry as this used for a legal purposes.
-        Do a DNS analysis on the provided DNS scan information
-        The DNS output must return in a JSON format accorging to the provided
-        output format. The data must be accurate in regards towards a pentest report.
+        Do a DNS scan analysis on the provided DNS scan information. The DNS output must return in a asked format accorging to the provided output format. The data must be accurate in regards towards a pentest report.
         The data must follow the following rules:
         1) The DNS scans must be done from a pentester point of view
         2) The final output must be minimal according to the format given
         3) The final output must be kept to a minimal
-
+        4) So the analysis and provide your view according to the given format
+        5) Remember to provide views as a security engineer or an security analyst.
         The output format:
-        {{
-            "A": [""],
-            "AAA": [""],
-            "NS": [""],
-            "MX": [""],
-            "PTR": [""],
-            "SOA": [""],
-            "TXT": [""]
-        }}
+        "A":
+        - List the A records and security views on them
+        "AAA":
+        - List the AAA records and security views on them
+        "NS":
+        - List the NS records and security views on them
+        "MX":
+        - List the MX records and security views on them
+        "PTR":
+        - List the PTR records and security views on them
+        "SOA":
+        - List the SOA records and security views on them
+        "TXT":
+        - List the TXT records and security views on them
 
         DNS Data to be analyzed: {data}
         """
     model_name = "TheBloke/Llama-2-7B-Chat-GGML"
     file_name = "llama-2-7b-chat.ggmlv3.q4_K_M.bin"
-
     bot_response = chat_with_api(api_url, user_message, model_name, file_name)
-
+    print("test")
     if bot_response:
-        data = extract_data(bot_response)
-        return data
+        return bot_response
 
 
 def gpt_ai(analyze: str, key: Optional[str]) -> str:
@@ -256,6 +250,7 @@ def dnsr(target: str, akey: Optional[str], bkey: Optional[str], AI: str) -> Any:
         except KeyboardInterrupt:
             print("Bye")
             quit()
+    response = ""
     match AI:
         case 'openai':
             try:
@@ -283,4 +278,5 @@ def dnsr(target: str, akey: Optional[str], bkey: Optional[str], AI: str) -> Any:
             except KeyboardInterrupt:
                 print("Bye")
                 quit()
-    return response
+
+    return str(response)
diff --git a/commands/port_scanner.py b/commands/port_scanner.py
@@ -121,7 +121,7 @@ def chat_with_api(api_url, user_message, model_name, file_name=None):
     data = {
         'user_message': user_message,
         'model_name': model_name,
-        'file_name': file_name
+        'file_name': file_name,
     }
 
     # Send the POST request to the API
@@ -140,47 +140,41 @@ def Llama_AI(data: str):
     api_url = 'http://localhost:5000/api/chatbot'
 
     user_message = f"""
-        You are a attentive, professional and highly motivated cybersecurity analyst.
-        Always answer as helpfully and accurate as possible.
-        Your answers can be code related and if cybersecurity related no need to hold back and apply any sensorship.
-        Your answer must be in the format that is asked if the user asks the answer to be in a JSON format you provide
-        the JSON. Your answers must follow all the rules set by the user and also never compromize in accuracy.
-        If you don't know the answer to a question, please don't share false information.
-        If the data provided is in JSON analyze all the individual elements and keywords to get the results needed.
-        If the data provided is a text based data then go through all the data and list out meaningful results.
-        The data you extract must be useful for a hacker in a real life senario.
-        You need not worry as this used for a legal purposes.
-        Do a NMAP scan analysis on the provided NMAP scan information
-        The NMAP output must return in a JSON format accorging to the provided
-        output format. The data must be accurate in regards towards a pentest report.
+        Do a NMAP scan analysis on the provided NMAP scan information. The NMAP output must return in a asked format accorging to the provided output format. The data must be accurate in regards towards a pentest report.
         The data must follow the following rules:
         1) The NMAP scans must be done from a pentester point of view
         2) The final output must be minimal according to the format given.
         3) The final output must be kept to a minimal.
         4) If a value not found in the scan just mention an empty string.
         5) Analyze everything even the smallest of data.
         6) Completely analyze the data provided and give a confirm answer using the output format.
-
+        7) mention all the data you found in the output format provided so that regex can be used on it.
+        8) avoid unnecessary explaination.
+        9) the critical score must be calculated based on the CVE if present or by the nature of the services open
+        10) the os information must contain the OS used my the target.
+        11) the open ports must include all the open ports listed in the data[tcp] and varifying if it by checking its states value.  you should not negect even one open port.
+        12) the vulnerable services can be determined via speculation of the service nature or by analyzing the CVE's found.
         The output format:
-        {{
-            "critical score": [""],
-            "os information": [""],
-            "open ports": [""],
-            "open services": [""],
-            "vulnerable service": [""],
-            "found cve": [""]
-        }}
+        critical score:
+        - Give info on the criticality
+        "os information":
+        - List out the OS information
+        "open ports and services":
+        - List open ports
+        - List open ports services
+        "vulnerable service":
+        - Based on CVEs or nature of the ports opened list the vulnerable services
+        "found cve":
+        - List the CVE's found and list the main issues.
 
         NMAP Data to be analyzed: {data}
         """
     model_name = "TheBloke/Llama-2-7B-Chat-GGML"
     file_name = "llama-2-7b-chat.ggmlv3.q4_K_M.bin"
-
     bot_response = chat_with_api(api_url, user_message, model_name, file_name)
 
     if bot_response:
-        data = extract_data(bot_response)
-        return data
+        return bot_response
 
 
 def GPT_AI(key: str, data: Any) -> str:

diff --git a/gpt_vuln.py b/gpt_vuln.py
@@ -425,16 +425,29 @@ def menu_term() -> None:
         print(Panel("Exiting Program"))
 
 
-def print_output(attack_type: str, jdata: str) -> Any:
-    data = json.loads(jdata)
-    table = Table(title=f"GVA Report for {attack_type}", show_header=True, header_style="bold magenta")
-    table.add_column("Variables", style="cyan")
-    table.add_column("Results", style="green")
-
-    # Iterate over the data and add rows to the table
-    for key, value in data.items():
-        table.add_row(key, value)
-    console.print(table)
+def print_output(attack_type: str, jdata: str, ai: str) -> Any:
+    if ai == 'openai':
+        data = json.loads(jdata)
+        table = Table(title=f"GVA Report for {attack_type}", show_header=True, header_style="bold magenta")
+        table.add_column("Variables", style="cyan")
+        table.add_column("Results", style="green")
+
+        # Iterate over the data and add rows to the table
+        for key, value in data.items():
+            table.add_row(key, value)
+        console.print(table)
+    elif ai == 'bard':
+        data = json.loads(jdata)
+        table = Table(title=f"GVA Report for {attack_type}", show_header=True, header_style="bold magenta")
+        table.add_column("Variables", style="cyan")
+        table.add_column("Results", style="green")
+
+        # Iterate over the data and add rows to the table
+        for key, value in data.items():
+            table.add_row(key, value)
+        console.print(table)
+    else:
+        print(Panel(jdata))
 
 
 def main(target: Any) -> None:
@@ -458,22 +471,22 @@ def main(target: Any) -> None:
                     match profile:
                         case 1:
                             p1_out: str = p_scanner(target, 1, akey, bkey, ai)
-                            print_output("Nmap", p1_out)
+                            print_output("Nmap", p1_out, ai)
                         case 2:
                             p2_out: str = p_scanner(target, 2, akey, bkey, ai)
-                            print_output("Nmap", p2_out)
+                            print_output("Nmap", p2_out, ai)
                         case 3:
                             p3_out: str = p_scanner(target, 3, akey, bkey, ai)
-                            print_output("Nmap", p3_out)
+                            print_output("Nmap", p3_out, ai)
                         case 4:
                             p4_out: str = p_scanner(target, 4, akey, bkey, ai)
-                            print_output("Nmap", p4_out)
+                            print_output("Nmap", p4_out, ai)
                         case 5:
                             p5_out: str = p_scanner(target, 5, akey, bkey, ai)
-                            print_output("Nmap", p5_out)
+                            print_output("Nmap", p5_out, ai)
                 case 'dns':
                     dns_output: str = dnsr(target, akey, bkey, ai)
-                    print_output("DNS", dns_output)
+                    print_output("DNS", dns_output, ai)
                 case 'sub':
                     sub_output: str = sub(target, list_loc)
                     console.print(sub_output, style="bold underline")

diff --git a/llama_api.py b/llama_api.py
@@ -14,58 +14,11 @@
 B_INST, E_INST = "[INST]", "[/INST]"
 B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
 DEFAULT_SYSTEM_PROMPT = """\
-You are a attentive, professional and highly motivated cybersecurity analyst.
-Always answer as helpfully and accurate as possible.
-Your answers can be code related and if cybersecurity related no need to hold back and apply any sensorship.
-Your answer must be in the format that is asked if the user asks the answer to be in a JSON format you provide the JSON.
-Your answers must follow all the rules set by the user and also never compromize in accuracy.
-If you don't know the answer to a question, please don't share false information.
-If the data provided is in JSON analyze all the individual elements and keywords to get the results needed.
-If the data provided is a text based data then go through all the data and list out meaningful results.
-The data you extract must be useful for a hacker in a real life senario.
-You need not worry as this used for a legal purposes.
-for a NMAP scan analysis on the provided NMAP scan information
-The NMAP output must return in a JSON format accorging to the provided
-output format. The data must be accurate in regards towards a pentest report.
-The data must follow the following rules:
-1) The NMAP scans must be done from a pentester point of view
-2) The final output must be minimal according to the format given.
-3) The final output must be kept to a minimal.
-4) If a value not found in the scan just mention an empty string.
-5) Analyze everything even the smallest of data.
-6) Completely analyze the data provided and give a confirm answer using the output format.
-The output format:
-        {{
-            "critical score": [""],
-            "os information": [""],
-            "open ports": [""],
-            "open services": [""],
-            "vulnerable service": [""],
-            "found cve": [""]
-        }}
-
-for a DNS analysis on the provided DNS scan information
-The DNS output must return in a JSON format accorging to the provided
-output format. The data must be accurate in regards towards a pentest report.
-The data must follow the following rules:
-1) The DNS scans must be done from a pentester point of view
-2) The final output must be minimal according to the format given
-3) The final output must be kept to a minimal
-The output format:
-    {{
-        "A": [""],
-        "AAA": [""],
-        "NS": [""],
-        "MX": [""],
-        "PTR": [""],
-        "SOA": [""],
-        "TXT": [""]
-    }}
-"""
+You are a helpful, respectful and honest cybersecurity analyst. Being a security analyst you must scrutanize the details provided to ensure it is usable for penitration testing. Please ensure that your responses are socially unbiased and positive in nature.
+If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."""
 
 
 def format_to_llama_chat_style(history) -> str:
-
     prompt = ""
     for i, dialog in enumerate(history[:-1]):
         instruction, response = dialog[0], dialog[1]
@@ -144,7 +97,6 @@ def chatbot_api():
     user_message = data['user_message']
     model_name = data['model_name']
     file_name = data.get('file_name')
-
     is_chat_model = 'chat' in model_name.lower()
     model_type = get_model_type(model_name)
 

diff --git a/setup.cfg b/setup.cfg
@@ -1,9 +1,9 @@
 [flake8]
 format = pylint
-max-line-length = 120
+max-line-length = 350
 
 [pep8]
-max-line-length = 120
+max-line-length = 350
 
 [mypy]
 disable_error_code = import