From 10b0ca168f4764b1121b6f316fdc041ee825fac4 Mon Sep 17 00:00:00 2001 From: cccs-rs Date: Thu, 23 Nov 2023 19:07:35 +0000 Subject: [PATCH 1/5] Take on updater logic from NetRep --- .vscode/settings.json | 26 ++-- Dockerfile | 2 +- badlist.py => badlist/badlist.py | 0 badlist/update_server.py | 240 +++++++++++++++++++++++++++++++ service_manifest.yml | 105 ++++++++++++-- 5 files changed, 345 insertions(+), 28 deletions(-) rename badlist.py => badlist/badlist.py (100%) create mode 100644 badlist/update_server.py diff --git a/.vscode/settings.json b/.vscode/settings.json index 49d23d0..53d8bc1 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,6 +1,6 @@ { "editor.codeActionsOnSave": { - "source.organizeImports": true, + "source.organizeImports": true }, "editor.formatOnSave": true, "editor.rulers": [ @@ -8,31 +8,29 @@ ], "editor.tabSize": 4, "editor.wordWrap": "wordWrapColumn", - "editor.wordWrapColumn": 120, + "editor.wordWrapColumn": 180, "files.insertFinalNewline": true, "files.trimFinalNewlines": true, "files.trimTrailingWhitespace": true, "isort.args": [ "-l", "120", - "--profile=black", + "--profile=black" // "--src=${workspaceFolder}" ], - "python.formatting.autopep8Args": [ - "--max-line-length", - "120", - "--experimental" - ], - "python.formatting.provider": "autopep8", - "python.formatting.blackArgs": [ + "[python]": { + "editor.defaultFormatter": "ms-python.black-formatter" + }, + "black-formatter.args": [ "--line-length=120" ], - "python.linting.enabled": true, - "python.linting.flake8Enabled": true, - "python.linting.flake8Args": [ + "flake8.args": [ "--max-line-length=120", //Added the ignore of E203 for now : https://github.com/PyCQA/pycodestyle/issues/373 "--ignore=E203,W503" ], - "python.linting.pylintEnabled": false, + "autopep8.args": [ + "--max-line-length=120", + ], + "python.testing.pytestEnabled": true, } diff --git a/Dockerfile b/Dockerfile index e395e38..b2e88c8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,7 @@ ARG branch=latest FROM cccs/assemblyline-v4-service-base:$branch -ENV SERVICE_PATH badlist.Badlist +ENV SERVICE_PATH badlist.badlist.Badlist # Copy Badlist service code WORKDIR /opt/al_service diff --git a/badlist.py b/badlist/badlist.py similarity index 100% rename from badlist.py rename to badlist/badlist.py diff --git a/badlist/update_server.py b/badlist/update_server.py new file mode 100644 index 0000000..009b0f4 --- /dev/null +++ b/badlist/update_server.py @@ -0,0 +1,240 @@ +import csv +import json +import os +import re +from typing import List, Set + +from assemblyline.odm.base import DOMAIN_ONLY_REGEX, FULL_URI, IP_ONLY_REGEX +from assemblyline.odm.models.badlist import Badlist as BadlistItem +from assemblyline_v4_service.updater.updater import ServiceUpdater + +IOC_CHECK = { + "ip": re.compile(IP_ONLY_REGEX).match, + "domain": re.compile(DOMAIN_ONLY_REGEX).match, + "uri": re.compile(FULL_URI).match, + "malware_family": lambda x: True, +} + + +NETWORK_IOC_TYPES = ["ip", "domain", "uri"] + + +class SetEncoder(json.JSONEncoder): + def default(self, o): + if isinstance(o, set): + return list(o) + return json.JSONEncoder.default(self, o) + + +class BadlistUpdateServer(ServiceUpdater): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.malware_families: Set[str] = set() + self.attributions: Set[str] = set() + + # A sanity check to make sure we do in fact have things to send to services + def _inventory_check(self) -> bool: + success = True + + def _trigger_update(source): + self._current_source = source + self.set_source_update_time(0) + self.trigger_update() + + if not self.attributions: + # Trigger an update for any sources that contribute to attributions list + [ + _trigger_update(_s.name) + for _s in self._service.update_config.sources + if self._service.config["updater"][_s.name]["type"] == "attribution_list" + ] + + if not self.malware_families: + # Trigger an update for any sources that contribute to the malware families list + [ + _trigger_update(_s.name) + for _s in self._service.update_config.sources + if self._service.config["updater"][_s.name]["type"] == "malware_family_list" + ] + + blocklist_sources = set( + [ + _s.name + for _s in self._service.update_config.sources + if self._service.config["updater"][_s.name]["type"] == "blocklist" + ] + ) + missing_blocklists = blocklist_sources - set(os.listdir(self._update_dir)) + + if missing_blocklists != blocklist_sources: + # We have at least one blocklist source to work with for the time being + success = True + + # Trigger an update for the blocklists that are missing + [_trigger_update(source) for source in missing_blocklists] + + return success + + def import_update(self, files_sha256, al_client, source_name, default_classification): + def sanitize_data(data: str, type: str, validate=True) -> List[str]: + if not data: + return [] + + # Normalize data (parsing based off Malpedia API output) + data = data.split(".", 1)[-1] + data = data.replace("-", "").replace("_", "").replace("#", "").replace('"', "").lower() + data = data.split(",") if "," in data else [data] + + if not validate: + return data + + if type == "malware_family": + return [d for d in data if d in self.malware_families] + elif type == "attribution": + return [d for d in data if d in self.attributions] + + def update_blocklist( + ioc_type: str, ioc_value: str, malware_family: List[str], attribution: List[str], references: List[str] + ): + references = [r for r in references if re.match(FULL_URI, r)] + + # Normalize IOC values for when performing lookups + ioc_value = ioc_value.lower() + + # Build item for badlist + badlist_item = { + "type": "tag", + "tag": {"type": f"network.static.{ioc_type}", "value": ioc_value}, + "sources": [ + { + "classification": default_classification, + "name": source_name, + "reason": ["IOC was reported by source as malicious"] + references, + "type": "external", + } + ], + } + + # See if there's any attribution details we can add to the item before adding to the list + attr = {} + if malware_family: + attr["family"] = malware_family + + if attribution: + attr["actor"] = attribution + + if attr: + badlist_item["attribution"] = attr + + al_client.badlist.add_update(badlist_item) + + source_cfg = self._service.config["updater"][source_name] + + if source_cfg["type"] == "blocklist": + # This source is meant to contribute to the blocklist + ignore_terms = source_cfg.get("ignore_terms", []) + if source_cfg["format"] == "csv": + start_index = source_cfg.get("start", 0) + for file, _ in files_sha256: + with open(file, "r") as fp: + for row in list(csv.reader(fp, delimiter=","))[start_index:]: + row = [r.strip(' "') for r in row] + joined_row = ",".join(row) + if any(t in joined_row for t in ignore_terms) or joined_row.startswith("#"): + # Skip row + continue + + references = [] if not source_cfg.get("reference") else [row[source_cfg["reference"]]] + # Get malware family + malware_family = ( + sanitize_data(row[source_cfg["malware_family"]], type="malware_family") + if source_cfg.get("malware_family") + else [] + ) + + # Get attribution + attribution = ( + sanitize_data(row[source_cfg["attribution"]], type="attribution") + if source_cfg.get("attribution") + else [] + ) + + # Iterate over all IOC types + for ioc_type in NETWORK_IOC_TYPES: + if source_cfg.get(ioc_type) is None: + continue + ioc_value = row[source_cfg[ioc_type]] + + if ioc_type == "ip": + # Ensure port information is not included + ioc_value = ioc_value.split(":", 1)[0] + + # If there are multiple IOC types in the same column, verify the IOC type + if not IOC_CHECK[ioc_type](ioc_value): + continue + update_blocklist(ioc_type, ioc_value, malware_family, attribution, references) + + elif source_cfg["format"] == "json": + for file, _ in files_sha256: + with open(file, "r") as fp: + blocklist_data = json.load(fp) + if isinstance(blocklist_data, list): + for data in blocklist_data: + json_dump = json.dumps(data) + if any(t in json_dump for t in ignore_terms): + # Skip block + continue + references = ( + [] if not source_cfg.get("reference") else [data.get(source_cfg.get("reference"))] + ) + malware_family = sanitize_data( + data.get(source_cfg.get("malware_family")), type="malware_family" + ) + + # Get attribution + attribution = sanitize_data(data.get(source_cfg.get("attribution")), type="attribution") + + for ioc_type in NETWORK_IOC_TYPES: + ioc_value = data.get(source_cfg.get(ioc_type)) + if ioc_value: + update_blocklist(ioc_type, ioc_value, malware_family, attribution, references) + + elif source_cfg["type"] == "malware_family_list": + # This source is meant to contributes to the list of valid malware families + if source_cfg["format"] == "list": + # Expect a flat list containing a series of malware family names + for file, _ in files_sha256: + # Add normalized family names to list + with open(file, "r") as fp: + for malware_family in json.load(fp): + self.malware_families = self.malware_families.union( + set( + sanitize_data( + malware_family, + type="malware_family", + validate=False, + ) + ) + ) + elif source_cfg["type"] == "attribution_list": + # This source is meant to contributes to the list of valid attribution names + if source_cfg["format"] == "list": + # Expect a flat list containing a series of attribution names + for file, _ in files_sha256: + # Add normalized family names to list + with open(file, "r") as fp: + # Let's assume no sanitization is required and just merge the set of names + self.attributions = self.attributions.union( + set( + sanitize_data( + ",".join(json.load(fp)), + type="attribution", + validate=False, + ) + ) + ) + + +if __name__ == "__main__": + with BadlistUpdateServer() as server: + server.serve_forever() diff --git a/service_manifest.yml b/service_manifest.yml index 909327e..50e17ad 100644 --- a/service_manifest.yml +++ b/service_manifest.yml @@ -19,19 +19,6 @@ disable_cache: false enabled: true -config: - cache_timeout_seconds: 1800 - lookup_md5: false - lookup_sha1: false - lookup_sha256: true - lookup_ssdeep: false - lookup_tlsh: true - -docker_config: - image: ${REGISTRY}cccs/assemblyline-service-badlist:$SERVICE_TAG - cpu_cores: 0.4 - ram_mb: 256 - heuristics: - heur_id: 1 name: Badlisted File @@ -48,3 +35,95 @@ heuristics: score: 500 filetype: "*" description: This file is similar to a file found in the list of know bad files + +config: + cache_timeout_seconds: 1800 + lookup_md5: false + lookup_sha1: false + lookup_sha256: true + lookup_ssdeep: false + lookup_tlsh: true + lookup_ip: false + lookup_domain: false + lookup_url: false + updater: + phishunt: + type: blocklist + format: csv + uri: 0 + urlhaus: + type: blocklist + format: csv + uri: 2 + malware_family: 6 + reference: 7 + start: 9 + blackbook: + type: blocklist + format: csv + domain: 0 + malware_family: 1 + start: 1 + feodotracker: + type: blocklist + format: json + ip: ip_address + domain: hostname + malware_family: malware + threatfox: + type: blocklist + format: csv + start: 9 + ip: 2 + domain: 2 + uri: 2 + malware_family: 5 + reference: 10 + malpedia: + type: malware_family_list + format: list + malpedia_attribution: + type: attribution_list + format: list + +docker_config: + image: ${REGISTRY}cccs/assemblyline-service-badlist:$SERVICE_TAG + cpu_cores: 0.4 + ram_mb: 256 + +dependencies: + updates: + container: + allow_internet_access: true + command: ["python", "-m", "badlist.update_server"] + image: ${REGISTRY}cccs/assemblyline-service-badlist:$SERVICE_TAG + ports: ["5003"] + ram_mb: 4096 + run_as_core: True + +update_config: + generates_signatures: false + sources: + - name: malpedia + uri: https://malpedia.caad.fkie.fraunhofer.de/api/list/families?format=json + pattern: .*\/families + - name: malpedia_attribution + uri: https://malpedia.caad.fkie.fraunhofer.de/api/list/actors?format=json + pattern: .*\/actors + - name: phishunt + uri: https://phishunt.io/feed.txt + pattern: .*\/feed\.txt + - name: urlhaus + uri: https://urlhaus.abuse.ch/downloads/csv + pattern: .*\/csv\.txt$ + - name: blackbook + uri: https://github.com/stamparm/blackbook.git + pattern: .*\.csv$ + - name: feodotracker + uri: https://feodotracker.abuse.ch/downloads/ipblocklist.json + pattern: .*ipblocklist\.json + - name: threatfox + uri: https://threatfox.abuse.ch/export/csv/full + pattern: .*\.csv + update_interval_seconds: 900 # Every 15 minutes + wait_for_update: false From d1641935ebfe0480efd71fcd518dc3d6269ca8f9 Mon Sep 17 00:00:00 2001 From: cccs-rs Date: Fri, 24 Nov 2023 16:05:08 +0000 Subject: [PATCH 2/5] Refer to badlist index to look for missing sources to auto-trigger source updates --- badlist/update_server.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/badlist/update_server.py b/badlist/update_server.py index 009b0f4..ad68ef2 100644 --- a/badlist/update_server.py +++ b/badlist/update_server.py @@ -64,7 +64,10 @@ def _trigger_update(source): if self._service.config["updater"][_s.name]["type"] == "blocklist" ] ) - missing_blocklists = blocklist_sources - set(os.listdir(self._update_dir)) + + missing_blocklists = { + s for s in blocklist_sources if self.datastore.badlist.search(f"sources.name:{s}", rows=0)["total"] == 0 + } if missing_blocklists != blocklist_sources: # We have at least one blocklist source to work with for the time being From 6f4c822e6ecae3014dc59c01be2b8d40b0f5f680 Mon Sep 17 00:00:00 2001 From: cccs-rs Date: Fri, 24 Nov 2023 16:07:31 +0000 Subject: [PATCH 3/5] Ensure no duplicate attributions are being saved --- badlist/update_server.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/badlist/update_server.py b/badlist/update_server.py index ad68ef2..d04af1b 100644 --- a/badlist/update_server.py +++ b/badlist/update_server.py @@ -121,10 +121,10 @@ def update_blocklist( # See if there's any attribution details we can add to the item before adding to the list attr = {} if malware_family: - attr["family"] = malware_family + attr["family"] = list(set(malware_family)) if attribution: - attr["actor"] = attribution + attr["actor"] = list(set(attribution)) if attr: badlist_item["attribution"] = attr From 319186e2b4ef6b65660fc2fcf8aeccb1de7b3403 Mon Sep 17 00:00:00 2001 From: cccs-rs Date: Mon, 18 Dec 2023 18:48:59 +0000 Subject: [PATCH 4/5] Add support for file-based blocklisting --- .vscode/launch.json | 35 +++++++++++++ badlist/update_server.py | 109 ++++++++++++++++++++++++++++++--------- service_manifest.yml | 48 +++++++++++++++++ 3 files changed, 169 insertions(+), 23 deletions(-) create mode 100644 .vscode/launch.json diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..bc1a7db --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,35 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "RunServiceOnce", + "type": "python", + "request": "launch", + "module": "assemblyline_v4_service.dev.run_service_once", + "cwd": "${workspaceFolder}", + "args": [ + "-d", + "badlist.badlist.Badlist", + "/path/to/sample" + ], + "justMyCode": false, + }, + { + "name": "RunUpdater", + "type": "python", + "request": "launch", + "module": "badlist.update_server", + "cwd": "${workspaceFolder}", + "env": { + "UPDATER_DIR": "${workspaceFolder}/updates", + "SERVICE_PATH": "badlist.badlist.Badlist", + "AL_SERVICE_NAME": "Badlist", + "UI_SERVER": "https://nginx/" + }, + "justMyCode": false, + }, + ] +} diff --git a/badlist/update_server.py b/badlist/update_server.py index d04af1b..f3413e2 100644 --- a/badlist/update_server.py +++ b/badlist/update_server.py @@ -1,22 +1,36 @@ import csv import json -import os import re +from copy import deepcopy from typing import List, Set -from assemblyline.odm.base import DOMAIN_ONLY_REGEX, FULL_URI, IP_ONLY_REGEX -from assemblyline.odm.models.badlist import Badlist as BadlistItem +from assemblyline.odm.base import ( + DOMAIN_ONLY_REGEX, + FULL_URI, + IP_ONLY_REGEX, + MD5_REGEX, + SHA1_REGEX, + SHA256_REGEX, + SSDEEP_REGEX, + TLSH_REGEX, +) from assemblyline_v4_service.updater.updater import ServiceUpdater IOC_CHECK = { "ip": re.compile(IP_ONLY_REGEX).match, "domain": re.compile(DOMAIN_ONLY_REGEX).match, "uri": re.compile(FULL_URI).match, + "sha256": re.compile(SHA256_REGEX).match, + "sha1": re.compile(SHA1_REGEX).match, + "md5": re.compile(MD5_REGEX).match, + "ssdeep": re.compile(SSDEEP_REGEX).match, + "tlsh": re.compile(TLSH_REGEX).match, "malware_family": lambda x: True, } NETWORK_IOC_TYPES = ["ip", "domain", "uri"] +FILEHASH_TYPES = ["sha256", "sha1", "md5", "ssdeep", "tlsh"] class SetEncoder(json.JSONEncoder): @@ -32,6 +46,9 @@ def __init__(self, *args, **kwargs): self.malware_families: Set[str] = set() self.attributions: Set[str] = set() + def do_local_update(self): + ... + # A sanity check to make sure we do in fact have things to send to services def _inventory_check(self) -> bool: success = True @@ -85,7 +102,7 @@ def sanitize_data(data: str, type: str, validate=True) -> List[str]: # Normalize data (parsing based off Malpedia API output) data = data.split(".", 1)[-1] - data = data.replace("-", "").replace("_", "").replace("#", "").replace('"', "").lower() + data = data.replace("-", "").replace("_", "").replace("#", "").replace('"', "").upper() data = data.split(",") if "," in data else [data] if not validate: @@ -97,17 +114,33 @@ def sanitize_data(data: str, type: str, validate=True) -> List[str]: return [d for d in data if d in self.attributions] def update_blocklist( - ioc_type: str, ioc_value: str, malware_family: List[str], attribution: List[str], references: List[str] + ioc_type: str, + ioc_value: str, + malware_family: List[str], + attribution: List[str], + references: List[str], + bl_type: str, ): + def prepare_item(bl_item): + # See if there's any attribution details we can add to the item before adding to the list + attr = source_cfg.get("default_attribution", {}) + if malware_family: + attr["family"] = list(set(malware_family)) + + if attribution: + attr["actor"] = list(set(attribution)) + + bl_item["attribution"] = attr + references = [r for r in references if re.match(FULL_URI, r)] + badlist_items = [] # Normalize IOC values for when performing lookups ioc_value = ioc_value.lower() # Build item for badlist - badlist_item = { - "type": "tag", - "tag": {"type": f"network.static.{ioc_type}", "value": ioc_value}, + badlist_item_base = { + "classification": default_classification, "sources": [ { "classification": default_classification, @@ -118,18 +151,31 @@ def update_blocklist( ], } - # See if there's any attribution details we can add to the item before adding to the list - attr = {} - if malware_family: - attr["family"] = list(set(malware_family)) - - if attribution: - attr["actor"] = list(set(attribution)) - - if attr: - badlist_item["attribution"] = attr + if bl_type == "tag": + if ioc_type in NETWORK_IOC_TYPES: + # Tag applies to both static and dynamic + for network_type in ["static", "dynamic"]: + badlist_item = deepcopy(badlist_item_base) + badlist_item.update( + { + "type": "tag", + "tag": {"type": f"network.{network_type}.{ioc_type}", "value": ioc_value}, + } + ) + badlist_items.append(badlist_item) + elif bl_type == "file": + # Set hash information + badlist_item = deepcopy(badlist_item_base) + badlist_item.update( + { + "type": "file", + "hashes": {ioc_type: ioc_value}, + } + ) + badlist_items.append(badlist_item) - al_client.badlist.add_update(badlist_item) + [prepare_item(bl_item) for bl_item in badlist_items] + al_client.badlist.add_update_many(badlist_items) source_cfg = self._service.config["updater"][source_name] @@ -141,6 +187,9 @@ def update_blocklist( for file, _ in files_sha256: with open(file, "r") as fp: for row in list(csv.reader(fp, delimiter=","))[start_index:]: + if not row: + # If no data in row, skip + continue row = [r.strip(' "') for r in row] joined_row = ",".join(row) if any(t in joined_row for t in ignore_terms) or joined_row.startswith("#"): @@ -163,7 +212,7 @@ def update_blocklist( ) # Iterate over all IOC types - for ioc_type in NETWORK_IOC_TYPES: + for ioc_type in NETWORK_IOC_TYPES + FILEHASH_TYPES: if source_cfg.get(ioc_type) is None: continue ioc_value = row[source_cfg[ioc_type]] @@ -175,7 +224,14 @@ def update_blocklist( # If there are multiple IOC types in the same column, verify the IOC type if not IOC_CHECK[ioc_type](ioc_value): continue - update_blocklist(ioc_type, ioc_value, malware_family, attribution, references) + update_blocklist( + ioc_type, + ioc_value, + malware_family, + attribution, + references, + bl_type="tag" if ioc_type in NETWORK_IOC_TYPES else "file", + ) elif source_cfg["format"] == "json": for file, _ in files_sha256: @@ -197,10 +253,17 @@ def update_blocklist( # Get attribution attribution = sanitize_data(data.get(source_cfg.get("attribution")), type="attribution") - for ioc_type in NETWORK_IOC_TYPES: + for ioc_type in NETWORK_IOC_TYPES + FILEHASH_TYPES: ioc_value = data.get(source_cfg.get(ioc_type)) if ioc_value: - update_blocklist(ioc_type, ioc_value, malware_family, attribution, references) + update_blocklist( + ioc_type, + ioc_value, + malware_family, + attribution, + references, + bl_type="tag" if ioc_type in NETWORK_IOC_TYPES else "file", + ) elif source_cfg["type"] == "malware_family_list": # This source is meant to contributes to the list of valid malware families diff --git a/service_manifest.yml b/service_manifest.yml index 50e17ad..3e762dc 100644 --- a/service_manifest.yml +++ b/service_manifest.yml @@ -85,6 +85,36 @@ config: malpedia_attribution: type: attribution_list format: list + "threatview.io C2 Hunt Feed": + type: blocklist + format: csv + start: 3 + ip: 0 + domain: 2 + reference: 5 + default_attribution: + family: ["COBALT STRIKE"] + "threatview.io IP Blocklist": + type: blocklist + format: csv + ip: 0 + "threatview.io Domain Blocklist": + type: blocklist + format: csv + domain: 0 + "threatview.io MD5 Hash Blocklist": + type: blocklist + format: csv + md5: 0 + "threatview.io URL Blocklist": + type: blocklist + format: csv + uri: 0 + "threatview.io SHA Hash Blocklist": + type: blocklist + format: csv + sha1: 0 + docker_config: image: ${REGISTRY}cccs/assemblyline-service-badlist:$SERVICE_TAG @@ -125,5 +155,23 @@ update_config: - name: threatfox uri: https://threatfox.abuse.ch/export/csv/full pattern: .*\.csv + - name: "threatview.io C2 Hunt Feed" + uri: https://threatview.io/Downloads/High-Confidence-CobaltStrike-C2%20-Feeds.txt + pattern: .*\.txt + - name: "threatview.io IP Blocklist" + uri: https://threatview.io/Downloads/IP-High-Confidence-Feed.txt + pattern: .*\.txt + - name: "threatview.io Domain Blocklist" + uri: https://threatview.io/Downloads/DOMAIN-High-Confidence-Feed.txt + pattern: .*\.txt + - name: "threatview.io MD5 Hash Blocklist" + uri: https://threatview.io/Downloads/MD5-HASH-ALL.txt + pattern: .*\.txt + - name: "threatview.io URL Blocklist" + uri: https://threatview.io/Downloads/URL-High-Confidence-Feed.txt + pattern: .*\.txt + - name: "threatview.io SHA Hash Blocklist" + uri: https://threatview.io/Downloads/SHA-HASH-FEED.txt + pattern: .*\.txt update_interval_seconds: 900 # Every 15 minutes wait_for_update: false From 7ddcc89adb97e54fb0d68ac805f3e424ff02f36d Mon Sep 17 00:00:00 2001 From: cccs-rs Date: Thu, 18 Jan 2024 17:05:13 +0000 Subject: [PATCH 5/5] Batch adding items to the badlist --- badlist/update_server.py | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/badlist/update_server.py b/badlist/update_server.py index f3413e2..0a15eed 100644 --- a/badlist/update_server.py +++ b/badlist/update_server.py @@ -1,7 +1,9 @@ import csv import json +import os import re from copy import deepcopy +from queue import Queue from typing import List, Set from assemblyline.odm.base import ( @@ -16,6 +18,8 @@ ) from assemblyline_v4_service.updater.updater import ServiceUpdater +BLOCKLIST_UPDATE_BATCH = int(os.environ.get("BLOCKLIST_UPDATE_BATCH", "1000")) + IOC_CHECK = { "ip": re.compile(IP_ONLY_REGEX).match, "domain": re.compile(DOMAIN_ONLY_REGEX).match, @@ -45,6 +49,7 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.malware_families: Set[str] = set() self.attributions: Set[str] = set() + self.update_queue = Queue() def do_local_update(self): ... @@ -52,24 +57,18 @@ def do_local_update(self): # A sanity check to make sure we do in fact have things to send to services def _inventory_check(self) -> bool: success = True - - def _trigger_update(source): - self._current_source = source - self.set_source_update_time(0) - self.trigger_update() - if not self.attributions: - # Trigger an update for any sources that contribute to attributions list + # Queue an update for any sources that contribute to attributions list [ - _trigger_update(_s.name) + self.update_queue.put(_s.name) for _s in self._service.update_config.sources if self._service.config["updater"][_s.name]["type"] == "attribution_list" ] if not self.malware_families: - # Trigger an update for any sources that contribute to the malware families list + # Queue an update for any sources that contribute to the malware families list [ - _trigger_update(_s.name) + self.update_queue.put(_s.name) for _s in self._service.update_config.sources if self._service.config["updater"][_s.name]["type"] == "malware_family_list" ] @@ -91,11 +90,15 @@ def _trigger_update(source): success = True # Trigger an update for the blocklists that are missing - [_trigger_update(source) for source in missing_blocklists] + if missing_blocklists: + [self.update_queue.put(source) for source in missing_blocklists] + self.trigger_update() return success def import_update(self, files_sha256, al_client, source_name, default_classification): + blocklist_batch = [] + def sanitize_data(data: str, type: str, validate=True) -> List[str]: if not data: return [] @@ -175,7 +178,10 @@ def prepare_item(bl_item): badlist_items.append(badlist_item) [prepare_item(bl_item) for bl_item in badlist_items] - al_client.badlist.add_update_many(badlist_items) + blocklist_batch.extend(badlist_items) + if len(blocklist_batch) > BLOCKLIST_UPDATE_BATCH: + al_client.badlist.add_update_many(blocklist_batch) + blocklist_batch.clear() source_cfg = self._service.config["updater"][source_name] @@ -264,6 +270,8 @@ def prepare_item(bl_item): references, bl_type="tag" if ioc_type in NETWORK_IOC_TYPES else "file", ) + if blocklist_batch: + al_client.badlist.add_update_many(blocklist_batch) elif source_cfg["type"] == "malware_family_list": # This source is meant to contributes to the list of valid malware families