diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..bc1a7db --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,35 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "RunServiceOnce", + "type": "python", + "request": "launch", + "module": "assemblyline_v4_service.dev.run_service_once", + "cwd": "${workspaceFolder}", + "args": [ + "-d", + "badlist.badlist.Badlist", + "/path/to/sample" + ], + "justMyCode": false, + }, + { + "name": "RunUpdater", + "type": "python", + "request": "launch", + "module": "badlist.update_server", + "cwd": "${workspaceFolder}", + "env": { + "UPDATER_DIR": "${workspaceFolder}/updates", + "SERVICE_PATH": "badlist.badlist.Badlist", + "AL_SERVICE_NAME": "Badlist", + "UI_SERVER": "https://nginx/" + }, + "justMyCode": false, + }, + ] +} diff --git a/.vscode/settings.json b/.vscode/settings.json index 49d23d0..53d8bc1 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,6 +1,6 @@ { "editor.codeActionsOnSave": { - "source.organizeImports": true, + "source.organizeImports": true }, "editor.formatOnSave": true, "editor.rulers": [ @@ -8,31 +8,29 @@ ], "editor.tabSize": 4, "editor.wordWrap": "wordWrapColumn", - "editor.wordWrapColumn": 120, + "editor.wordWrapColumn": 180, "files.insertFinalNewline": true, "files.trimFinalNewlines": true, "files.trimTrailingWhitespace": true, "isort.args": [ "-l", "120", - "--profile=black", + "--profile=black" // "--src=${workspaceFolder}" ], - "python.formatting.autopep8Args": [ - "--max-line-length", - "120", - "--experimental" - ], - "python.formatting.provider": "autopep8", - "python.formatting.blackArgs": [ + "[python]": { + "editor.defaultFormatter": "ms-python.black-formatter" + }, + "black-formatter.args": [ "--line-length=120" ], - "python.linting.enabled": true, - "python.linting.flake8Enabled": true, - "python.linting.flake8Args": [ + "flake8.args": [ "--max-line-length=120", //Added the ignore of E203 for now : https://github.com/PyCQA/pycodestyle/issues/373 "--ignore=E203,W503" ], - "python.linting.pylintEnabled": false, + "autopep8.args": [ + "--max-line-length=120", + ], + "python.testing.pytestEnabled": true, } diff --git a/Dockerfile b/Dockerfile index e395e38..b2e88c8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,7 @@ ARG branch=latest FROM cccs/assemblyline-v4-service-base:$branch -ENV SERVICE_PATH badlist.Badlist +ENV SERVICE_PATH badlist.badlist.Badlist # Copy Badlist service code WORKDIR /opt/al_service diff --git a/badlist.py b/badlist/badlist.py similarity index 100% rename from badlist.py rename to badlist/badlist.py diff --git a/badlist/update_server.py b/badlist/update_server.py new file mode 100644 index 0000000..0a15eed --- /dev/null +++ b/badlist/update_server.py @@ -0,0 +1,314 @@ +import csv +import json +import os +import re +from copy import deepcopy +from queue import Queue +from typing import List, Set + +from assemblyline.odm.base import ( + DOMAIN_ONLY_REGEX, + FULL_URI, + IP_ONLY_REGEX, + MD5_REGEX, + SHA1_REGEX, + SHA256_REGEX, + SSDEEP_REGEX, + TLSH_REGEX, +) +from assemblyline_v4_service.updater.updater import ServiceUpdater + +BLOCKLIST_UPDATE_BATCH = int(os.environ.get("BLOCKLIST_UPDATE_BATCH", "1000")) + +IOC_CHECK = { + "ip": re.compile(IP_ONLY_REGEX).match, + "domain": re.compile(DOMAIN_ONLY_REGEX).match, + "uri": re.compile(FULL_URI).match, + "sha256": re.compile(SHA256_REGEX).match, + "sha1": re.compile(SHA1_REGEX).match, + "md5": re.compile(MD5_REGEX).match, + "ssdeep": re.compile(SSDEEP_REGEX).match, + "tlsh": re.compile(TLSH_REGEX).match, + "malware_family": lambda x: True, +} + + +NETWORK_IOC_TYPES = ["ip", "domain", "uri"] +FILEHASH_TYPES = ["sha256", "sha1", "md5", "ssdeep", "tlsh"] + + +class SetEncoder(json.JSONEncoder): + def default(self, o): + if isinstance(o, set): + return list(o) + return json.JSONEncoder.default(self, o) + + +class BadlistUpdateServer(ServiceUpdater): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.malware_families: Set[str] = set() + self.attributions: Set[str] = set() + self.update_queue = Queue() + + def do_local_update(self): + ... + + # A sanity check to make sure we do in fact have things to send to services + def _inventory_check(self) -> bool: + success = True + if not self.attributions: + # Queue an update for any sources that contribute to attributions list + [ + self.update_queue.put(_s.name) + for _s in self._service.update_config.sources + if self._service.config["updater"][_s.name]["type"] == "attribution_list" + ] + + if not self.malware_families: + # Queue an update for any sources that contribute to the malware families list + [ + self.update_queue.put(_s.name) + for _s in self._service.update_config.sources + if self._service.config["updater"][_s.name]["type"] == "malware_family_list" + ] + + blocklist_sources = set( + [ + _s.name + for _s in self._service.update_config.sources + if self._service.config["updater"][_s.name]["type"] == "blocklist" + ] + ) + + missing_blocklists = { + s for s in blocklist_sources if self.datastore.badlist.search(f"sources.name:{s}", rows=0)["total"] == 0 + } + + if missing_blocklists != blocklist_sources: + # We have at least one blocklist source to work with for the time being + success = True + + # Trigger an update for the blocklists that are missing + if missing_blocklists: + [self.update_queue.put(source) for source in missing_blocklists] + self.trigger_update() + + return success + + def import_update(self, files_sha256, al_client, source_name, default_classification): + blocklist_batch = [] + + def sanitize_data(data: str, type: str, validate=True) -> List[str]: + if not data: + return [] + + # Normalize data (parsing based off Malpedia API output) + data = data.split(".", 1)[-1] + data = data.replace("-", "").replace("_", "").replace("#", "").replace('"', "").upper() + data = data.split(",") if "," in data else [data] + + if not validate: + return data + + if type == "malware_family": + return [d for d in data if d in self.malware_families] + elif type == "attribution": + return [d for d in data if d in self.attributions] + + def update_blocklist( + ioc_type: str, + ioc_value: str, + malware_family: List[str], + attribution: List[str], + references: List[str], + bl_type: str, + ): + def prepare_item(bl_item): + # See if there's any attribution details we can add to the item before adding to the list + attr = source_cfg.get("default_attribution", {}) + if malware_family: + attr["family"] = list(set(malware_family)) + + if attribution: + attr["actor"] = list(set(attribution)) + + bl_item["attribution"] = attr + + references = [r for r in references if re.match(FULL_URI, r)] + badlist_items = [] + + # Normalize IOC values for when performing lookups + ioc_value = ioc_value.lower() + + # Build item for badlist + badlist_item_base = { + "classification": default_classification, + "sources": [ + { + "classification": default_classification, + "name": source_name, + "reason": ["IOC was reported by source as malicious"] + references, + "type": "external", + } + ], + } + + if bl_type == "tag": + if ioc_type in NETWORK_IOC_TYPES: + # Tag applies to both static and dynamic + for network_type in ["static", "dynamic"]: + badlist_item = deepcopy(badlist_item_base) + badlist_item.update( + { + "type": "tag", + "tag": {"type": f"network.{network_type}.{ioc_type}", "value": ioc_value}, + } + ) + badlist_items.append(badlist_item) + elif bl_type == "file": + # Set hash information + badlist_item = deepcopy(badlist_item_base) + badlist_item.update( + { + "type": "file", + "hashes": {ioc_type: ioc_value}, + } + ) + badlist_items.append(badlist_item) + + [prepare_item(bl_item) for bl_item in badlist_items] + blocklist_batch.extend(badlist_items) + if len(blocklist_batch) > BLOCKLIST_UPDATE_BATCH: + al_client.badlist.add_update_many(blocklist_batch) + blocklist_batch.clear() + + source_cfg = self._service.config["updater"][source_name] + + if source_cfg["type"] == "blocklist": + # This source is meant to contribute to the blocklist + ignore_terms = source_cfg.get("ignore_terms", []) + if source_cfg["format"] == "csv": + start_index = source_cfg.get("start", 0) + for file, _ in files_sha256: + with open(file, "r") as fp: + for row in list(csv.reader(fp, delimiter=","))[start_index:]: + if not row: + # If no data in row, skip + continue + row = [r.strip(' "') for r in row] + joined_row = ",".join(row) + if any(t in joined_row for t in ignore_terms) or joined_row.startswith("#"): + # Skip row + continue + + references = [] if not source_cfg.get("reference") else [row[source_cfg["reference"]]] + # Get malware family + malware_family = ( + sanitize_data(row[source_cfg["malware_family"]], type="malware_family") + if source_cfg.get("malware_family") + else [] + ) + + # Get attribution + attribution = ( + sanitize_data(row[source_cfg["attribution"]], type="attribution") + if source_cfg.get("attribution") + else [] + ) + + # Iterate over all IOC types + for ioc_type in NETWORK_IOC_TYPES + FILEHASH_TYPES: + if source_cfg.get(ioc_type) is None: + continue + ioc_value = row[source_cfg[ioc_type]] + + if ioc_type == "ip": + # Ensure port information is not included + ioc_value = ioc_value.split(":", 1)[0] + + # If there are multiple IOC types in the same column, verify the IOC type + if not IOC_CHECK[ioc_type](ioc_value): + continue + update_blocklist( + ioc_type, + ioc_value, + malware_family, + attribution, + references, + bl_type="tag" if ioc_type in NETWORK_IOC_TYPES else "file", + ) + + elif source_cfg["format"] == "json": + for file, _ in files_sha256: + with open(file, "r") as fp: + blocklist_data = json.load(fp) + if isinstance(blocklist_data, list): + for data in blocklist_data: + json_dump = json.dumps(data) + if any(t in json_dump for t in ignore_terms): + # Skip block + continue + references = ( + [] if not source_cfg.get("reference") else [data.get(source_cfg.get("reference"))] + ) + malware_family = sanitize_data( + data.get(source_cfg.get("malware_family")), type="malware_family" + ) + + # Get attribution + attribution = sanitize_data(data.get(source_cfg.get("attribution")), type="attribution") + + for ioc_type in NETWORK_IOC_TYPES + FILEHASH_TYPES: + ioc_value = data.get(source_cfg.get(ioc_type)) + if ioc_value: + update_blocklist( + ioc_type, + ioc_value, + malware_family, + attribution, + references, + bl_type="tag" if ioc_type in NETWORK_IOC_TYPES else "file", + ) + if blocklist_batch: + al_client.badlist.add_update_many(blocklist_batch) + + elif source_cfg["type"] == "malware_family_list": + # This source is meant to contributes to the list of valid malware families + if source_cfg["format"] == "list": + # Expect a flat list containing a series of malware family names + for file, _ in files_sha256: + # Add normalized family names to list + with open(file, "r") as fp: + for malware_family in json.load(fp): + self.malware_families = self.malware_families.union( + set( + sanitize_data( + malware_family, + type="malware_family", + validate=False, + ) + ) + ) + elif source_cfg["type"] == "attribution_list": + # This source is meant to contributes to the list of valid attribution names + if source_cfg["format"] == "list": + # Expect a flat list containing a series of attribution names + for file, _ in files_sha256: + # Add normalized family names to list + with open(file, "r") as fp: + # Let's assume no sanitization is required and just merge the set of names + self.attributions = self.attributions.union( + set( + sanitize_data( + ",".join(json.load(fp)), + type="attribution", + validate=False, + ) + ) + ) + + +if __name__ == "__main__": + with BadlistUpdateServer() as server: + server.serve_forever() diff --git a/service_manifest.yml b/service_manifest.yml index fc00070..5bc0a07 100644 --- a/service_manifest.yml +++ b/service_manifest.yml @@ -20,19 +20,6 @@ privileged: true enabled: true -config: - cache_timeout_seconds: 1800 - lookup_md5: false - lookup_sha1: false - lookup_sha256: true - lookup_ssdeep: false - lookup_tlsh: true - -docker_config: - image: ${REGISTRY}cccs/assemblyline-service-badlist:$SERVICE_TAG - cpu_cores: 0.4 - ram_mb: 256 - heuristics: - heur_id: 1 name: Badlisted File @@ -49,3 +36,143 @@ heuristics: score: 500 filetype: "*" description: This file is similar to a file found in the list of know bad files + +config: + cache_timeout_seconds: 1800 + lookup_md5: false + lookup_sha1: false + lookup_sha256: true + lookup_ssdeep: false + lookup_tlsh: true + lookup_ip: false + lookup_domain: false + lookup_url: false + updater: + phishunt: + type: blocklist + format: csv + uri: 0 + urlhaus: + type: blocklist + format: csv + uri: 2 + malware_family: 6 + reference: 7 + start: 9 + blackbook: + type: blocklist + format: csv + domain: 0 + malware_family: 1 + start: 1 + feodotracker: + type: blocklist + format: json + ip: ip_address + domain: hostname + malware_family: malware + threatfox: + type: blocklist + format: csv + start: 9 + ip: 2 + domain: 2 + uri: 2 + malware_family: 5 + reference: 10 + malpedia: + type: malware_family_list + format: list + malpedia_attribution: + type: attribution_list + format: list + "threatview.io C2 Hunt Feed": + type: blocklist + format: csv + start: 3 + ip: 0 + domain: 2 + reference: 5 + default_attribution: + family: ["COBALT STRIKE"] + "threatview.io IP Blocklist": + type: blocklist + format: csv + ip: 0 + "threatview.io Domain Blocklist": + type: blocklist + format: csv + domain: 0 + "threatview.io MD5 Hash Blocklist": + type: blocklist + format: csv + md5: 0 + "threatview.io URL Blocklist": + type: blocklist + format: csv + uri: 0 + "threatview.io SHA Hash Blocklist": + type: blocklist + format: csv + sha1: 0 + + +docker_config: + image: ${REGISTRY}cccs/assemblyline-service-badlist:$SERVICE_TAG + cpu_cores: 0.4 + ram_mb: 256 + +dependencies: + updates: + container: + allow_internet_access: true + command: ["python", "-m", "badlist.update_server"] + image: ${REGISTRY}cccs/assemblyline-service-badlist:$SERVICE_TAG + ports: ["5003"] + ram_mb: 4096 + run_as_core: True + +update_config: + generates_signatures: false + sources: + - name: malpedia + uri: https://malpedia.caad.fkie.fraunhofer.de/api/list/families?format=json + pattern: .*\/families + - name: malpedia_attribution + uri: https://malpedia.caad.fkie.fraunhofer.de/api/list/actors?format=json + pattern: .*\/actors + - name: phishunt + uri: https://phishunt.io/feed.txt + pattern: .*\/feed\.txt + - name: urlhaus + uri: https://urlhaus.abuse.ch/downloads/csv + pattern: .*\/csv\.txt$ + - name: blackbook + uri: https://github.com/stamparm/blackbook.git + pattern: .*\.csv$ + - name: feodotracker + uri: https://feodotracker.abuse.ch/downloads/ipblocklist.json + pattern: .*ipblocklist\.json + - name: threatfox + uri: https://threatfox.abuse.ch/export/csv/full + pattern: .*\.csv + - name: "threatview.io C2 Hunt Feed" + uri: https://threatview.io/Downloads/High-Confidence-CobaltStrike-C2%20-Feeds.txt + pattern: .*\.txt + - name: "threatview.io IP Blocklist" + uri: https://threatview.io/Downloads/IP-High-Confidence-Feed.txt + pattern: .*\.txt + - name: "threatview.io Domain Blocklist" + uri: https://threatview.io/Downloads/DOMAIN-High-Confidence-Feed.txt + pattern: .*\.txt + - name: "threatview.io MD5 Hash Blocklist" + uri: https://threatview.io/Downloads/MD5-HASH-ALL.txt + pattern: .*\.txt + - name: "threatview.io URL Blocklist" + uri: https://threatview.io/Downloads/URL-High-Confidence-Feed.txt + pattern: .*\.txt + - name: "threatview.io SHA Hash Blocklist" + uri: https://threatview.io/Downloads/SHA-HASH-FEED.txt + pattern: .*\.txt + update_interval_seconds: 900 # Every 15 minutes + wait_for_update: false