Merge branch 'master' into master

mandiant · Jun 7, 2024 · 85aeb1e · 85aeb1e
2 parents 2202a98 + 76a4a58
commit 85aeb1e
Show file tree

Hide file tree

Showing 26 changed files with 1,034 additions and 283 deletions.
diff --git a/.github/mypy/mypy.ini b/.github/mypy/mypy.ini
@@ -1,8 +1,5 @@
 [mypy]
 
-[mypy-halo.*]
-ignore_missing_imports = True
-
 [mypy-tqdm.*]
 ignore_missing_imports = True
 

diff --git a/.github/pyinstaller/pyinstaller.spec b/.github/pyinstaller/pyinstaller.spec
@@ -1,10 +1,19 @@
 # -*- mode: python -*-
 # Copyright (C) 2020 Mandiant, Inc. All Rights Reserved.
-import os.path
-import subprocess
+import sys
 
 import wcwidth
+import capa.rules.cache
 
+from pathlib import Path
+
+# SPECPATH is a global variable which points to .spec file path
+capa_dir = Path(SPECPATH).parent.parent
+rules_dir = capa_dir / 'rules'
+cache_dir = capa_dir / 'cache'
+
+if not capa.rules.cache.generate_rule_cache(rules_dir, cache_dir):
+    sys.exit(-1)
 
 a = Analysis(
     # when invoking pyinstaller from the project root,
@@ -26,7 +35,7 @@ a = Analysis(
         # so we manually embed the wcwidth resources here.
         #
         # ref: https://stackoverflow.com/a/62278462/87207
-        (os.path.dirname(wcwidth.__file__), "wcwidth"),
+        (Path(wcwidth.__file__).parent, "wcwidth"),
     ],
     # when invoking pyinstaller from the project root,
     # this gets run from the project root.

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -52,8 +52,6 @@ jobs:
         run: python -m pip install --upgrade pip setuptools
       - name: Install capa with build requirements
         run: pip install -e .[build]
-      - name: Cache the rule set
-        run: python ./scripts/cache-ruleset.py ./rules/ ./cache/
       - name: Build standalone executable
         run: pyinstaller --log-level DEBUG .github/pyinstaller/pyinstaller.spec
       - name: Does it run (PE)?

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -45,7 +45,9 @@ jobs:
     - name: Lint with flake8
       run: pre-commit run flake8 --hook-stage manual
     - name: Check types with mypy
-      run:  pre-commit run mypy --hook-stage manual
+      run: pre-commit run mypy --hook-stage manual
+    - name: Check imports against dependencies
+      run: pre-commit run deptry --hook-stage manual
 
   rule_linter:
     runs-on: ubuntu-20.04
@@ -106,7 +108,7 @@ jobs:
     name: Binary Ninja tests for ${{ matrix.python-version }}
     env:
       BN_SERIAL: ${{ secrets.BN_SERIAL }}
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
     needs: [tests]
     strategy:
       fail-fast: false

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -127,3 +127,13 @@ repos:
         -   "--ignore=tests/test_scripts.py"
         always_run: true
         pass_filenames: false
+
+-   repo: local
+    hooks:
+    -   id: deptry
+        name: deptry
+        stages: [push, manual]
+        language: system
+        entry: deptry .
+        always_run: true
+        pass_filenames: false
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,11 +8,14 @@
 - add function in capa/helpers to load plain and compressed JSON reports #1883 @Rohit1123
 - document Antivirus warnings and VirusTotal false positive detections #2028 @RionEV @mr-tz
 - Add json to sarif conversion script @reversingwithme
+- render maec/* fields #843 @s-ff
+- replace Halo spinner with Rich #2086 @s-ff
+- optimize rule matching #2080 @williballenthin
 
 ### Breaking Changes
 
 
-### New Rules (8)
+### New Rules (17)
 
 - impact/wipe-disk/delete-drive-layout-via-ioctl [email protected]
 - host-interaction/driver/interact-with-driver-via-ioctl [email protected]
@@ -22,6 +25,15 @@
 - nursery/unmount-volume-via-ioctl [email protected]
 - data-manipulation/encryption/rc4/encrypt-data-using-rc4-via-systemfunction033 [email protected]
 - anti-analysis/anti-forensic/self-deletion/self-delete-using-alternate-data-streams [email protected]
+- nursery/change-memory-permission-on-linux [email protected]
+- nursery/check-file-permission-on-linux [email protected]
+- nursery/check-if-process-is-running-under-android-emulator-on-android [email protected]
+- nursery/map-or-unmap-memory-on-linux [email protected]
+- persistence/act-as-share-provider-dll [email protected]
+- persistence/act-as-windbg-extension [email protected]
+- persistence/act-as-time-provider-dll [email protected]
+- host-interaction/gui/window/hide/hide-graphical-window-from-taskbar [email protected]
+- compiler/dart/compiled-with-dart [email protected]
 -
 
 ### Bug Fixes
@@ -42,6 +54,8 @@
 - ci: update github workflows to use latest version of actions that were using a deprecated version of node #1967 #2003 capa-rules#883 @sjha2048 @Ana06
 - ci: update binja version to stable 4.0 #2016 @xusheng6
 - ci: update github workflows to reflect the latest ghidrathon installation and bumped up jep, ghidra versions  #2020 @psahithireddy
+- ci: include rule caching in PyInstaller build process #2097 @s-ff
+- add deptry support #1497 @s-ff
 
 ### Raw diffs
 - [capa v7.0.1...master](https://github.com/mandiant/capa/compare/v7.0.1...master)
@@ -295,7 +309,7 @@ For those that use capa as a library, we've introduced some limited breaking cha
 - [capa-rules v5.1.0...v6.0.0](https://github.com/mandiant/capa-rules/compare/v5.1.0...v6.0.0)
 
 ## v5.1.0
-capa version 5.1.0 adds a Protocol Buffers (protobuf) format for result documents. Additionally, the [Vector35](https://vector35.com/) team contributed a new feature extractor using Binary Ninja. Other new features are a new CLI flag to override the detected operating system, functionality to read and render existing result documents, and a output color format that's easier to read.
+capa version 5.1.0 adds a Protocol Buffers (protobuf) format for result documents. Additionally, the [Vector35](https://vector35.com/) team contributed a new feature extractor using Binary Ninja. Other new features are a new CLI flag to override the detected operating system, functionality to read and render existing result documents, and an output color format that's easier to read.
 
 Over 25 capa rules have been added and improved.
 
@@ -1494,7 +1508,7 @@ The IDA Pro integration is now distributed as a real plugin, instead of a script
   - updates distributed PyPI/`pip install --upgrade` without touching your `%IDADIR%`
   - generally doing thing the "right way"
 
-How to get this new version? Its easy: download [capa_explorer.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ida/plugin/capa_explorer.py) to your IDA plugins directory and update your capa installation (incidentally, this is a good opportunity to migrate to `pip install flare-capa` instead of git checkouts). Now you should see the plugin listed in the `Edit > Plugins > FLARE capa explorer` menu in IDA. 
+How to get this new version? It's easy: download [capa_explorer.py](https://raw.githubusercontent.com/mandiant/capa/master/capa/ida/plugin/capa_explorer.py) to your IDA plugins directory and update your capa installation (incidentally, this is a good opportunity to migrate to `pip install flare-capa` instead of git checkouts). Now you should see the plugin listed in the `Edit > Plugins > FLARE capa explorer` menu in IDA. 
 
 Please refer to the plugin [readme](https://github.com/mandiant/capa/blob/master/capa/ida/plugin/README.md) for additional information on installing and using the IDA Pro plugin.
 

diff --git a/capa/engine.py b/capa/engine.py
@@ -270,6 +270,14 @@ def evaluate(self, features: FeatureSet, short_circuit=True):
 MatchResults = Mapping[str, List[Tuple[Address, Result]]]
 
 
+def get_rule_namespaces(rule: "capa.rules.Rule") -> Iterator[str]:
+    namespace = rule.meta.get("namespace")
+    if namespace:
+        while namespace:
+            yield namespace
+            namespace, _, _ = namespace.rpartition("/")
+
+
 def index_rule_matches(features: FeatureSet, rule: "capa.rules.Rule", locations: Iterable[Address]):
     """
     record into the given featureset that the given rule matched at the given locations.
@@ -280,11 +288,8 @@ def index_rule_matches(features: FeatureSet, rule: "capa.rules.Rule", locations:
     updates `features` in-place. doesn't modify the remaining arguments.
     """
     features[capa.features.common.MatchedRule(rule.name)].update(locations)
-    namespace = rule.meta.get("namespace")
-    if namespace:
-        while namespace:
-            features[capa.features.common.MatchedRule(namespace)].update(locations)
-            namespace, _, _ = namespace.rpartition("/")
+    for namespace in get_rule_namespaces(rule):
+        features[capa.features.common.MatchedRule(namespace)].update(locations)
 
 
 def match(rules: List["capa.rules.Rule"], features: FeatureSet, addr: Address) -> Tuple[FeatureSet, MatchResults]:

diff --git a/capa/features/common.py b/capa/features/common.py
@@ -128,7 +128,7 @@ def __eq__(self, other):
 
     def __lt__(self, other):
         # implementing sorting by serializing to JSON is a huge hack.
-        # its slow, inelegant, and probably doesn't work intuitively;
+        # it's slow, inelegant, and probably doesn't work intuitively;
         # however, we only use it for deterministic output, so it's good enough for now.
 
         # circular import
@@ -227,7 +227,7 @@ def evaluate(self, features: "capa.engine.FeatureSet", short_circuit=True):
             if self.value in feature.value:
                 matches[feature.value].update(locations)
                 if short_circuit:
-                    # we found one matching string, thats sufficient to match.
+                    # we found one matching string, that's sufficient to match.
                     # don't collect other matching strings in this mode.
                     break
 
@@ -322,7 +322,7 @@ def evaluate(self, features: "capa.engine.FeatureSet", short_circuit=True):
             if self.re.search(feature.value):
                 matches[feature.value].update(locations)
                 if short_circuit:
-                    # we found one matching string, thats sufficient to match.
+                    # we found one matching string, that's sufficient to match.
                     # don't collect other matching strings in this mode.
                     break
 
@@ -385,10 +385,12 @@ def __init__(self, value: bytes, description=None):
         self.value = value
 
     def evaluate(self, features: "capa.engine.FeatureSet", short_circuit=True):
+        assert isinstance(self.value, bytes)
+
         capa.perf.counters["evaluate.feature"] += 1
         capa.perf.counters["evaluate.feature.bytes"] += 1
+        capa.perf.counters["evaluate.feature.bytes." + str(len(self.value))] += 1
 
-        assert isinstance(self.value, bytes)
         for feature, locations in features.items():
             if not isinstance(feature, (Bytes,)):
                 continue
@@ -486,6 +488,6 @@ def __init__(self, value: str, description=None):
 def is_global_feature(feature):
     """
     is this a feature that is extracted at every scope?
-    today, these are OS and arch features.
+    today, these are OS, arch, and format features.
     """
-    return isinstance(feature, (OS, Arch))
+    return isinstance(feature, (OS, Arch, Format))
diff --git a/capa/features/extractors/ghidra/extractor.py b/capa/features/extractors/ghidra/extractor.py
@@ -34,7 +34,7 @@ def __init__(self):
                 # https://ghidra.re/ghidra_docs/api/ghidra/program/model/listing/Program.html
                 #
                 # the hashes are stored in the database, not computed on the fly,
-                # so its probably not trivial to add SHA1.
+                # so it's probably not trivial to add SHA1.
                 sha1="",
                 sha256=capa.ghidra.helpers.get_file_sha256(),
             )

diff --git a/capa/features/freeze/__init__.py b/capa/features/freeze/__init__.py
@@ -485,7 +485,7 @@ def dumps_dynamic(extractor: DynamicFeatureExtractor) -> str:
                     address=Address.from_capa(addr),
                     feature=feature_from_capa(feature),
                 )  # type: ignore
-                # Mypy is unable to recognise `basic_block` as a argument due to alias
+                # Mypy is unable to recognise `basic_block` as an argument due to alias
                 for feature, addr in extractor.extract_thread_features(p, t)
             ]
 

diff --git a/capa/ida/plugin/__init__.py b/capa/ida/plugin/__init__.py
@@ -125,7 +125,7 @@ def install_icon():
         return False
 
     # resource leak here. need to call `ida_kernwin.free_custom_icon`?
-    # however, since we're not cycling this icon a lot, its probably ok.
+    # however, since we're not cycling this icon a lot, it's probably ok.
     # expect to leak exactly one icon per application load.
     icon = ida_kernwin.load_custom_icon(data=ICON)
 

diff --git a/capa/loader.py b/capa/loader.py
@@ -11,7 +11,7 @@
 from typing import Set, Dict, List, Optional
 from pathlib import Path
 
-import halo
+from rich.console import Console
 from typing_extensions import assert_never
 
 import capa.perf
@@ -176,6 +176,10 @@ def get_extractor(
       UnsupportedArchError
       UnsupportedOSError
     """
+
+    # stderr=True is used here to redirect the spinner banner to stderr, so that users can redirect capa's output.
+    console = Console(stderr=True, quiet=disable_progress)
+
     if backend == BACKEND_CAPE:
         import capa.features.extractors.cape.extractor
 
@@ -222,7 +226,7 @@ def get_extractor(
             if os_ == OS_AUTO and not is_supported_os(input_path):
                 raise UnsupportedOSError()
 
-        with halo.Halo(text="analyzing program", spinner="simpleDots", stream=sys.stderr, enabled=not disable_progress):
+        with console.status("analyzing program...", spinner="dots"):
             bv: BinaryView = binaryninja.load(str(input_path))
             if bv is None:
                 raise RuntimeError(f"Binary Ninja cannot open file {input_path}")
@@ -247,7 +251,7 @@ def get_extractor(
             if os_ == OS_AUTO and not is_supported_os(input_path):
                 raise UnsupportedOSError()
 
-        with halo.Halo(text="analyzing program", spinner="simpleDots", stream=sys.stderr, enabled=not disable_progress):
+        with console.status("analyzing program...", spinner="dots"):
             vw = get_workspace(input_path, input_format, sigpaths)
 
             if should_save_workspace:

diff --git a/capa/render/default.py b/capa/render/default.py
@@ -102,7 +102,11 @@ def render_capabilities(doc: rd.ResultDocument, ostream: StringIO):
 
     if rows:
         ostream.write(
-            tabulate.tabulate(rows, headers=[width("Capability", 50), width("Namespace", 50)], tablefmt="mixed_outline")
+            tabulate.tabulate(
+                rows,
+                headers=[width("Capability", 50), width("Namespace", 50)],
+                tablefmt="mixed_outline",
+            )
         )
         ostream.write("\n")
     else:
@@ -148,7 +152,55 @@ def render_attack(doc: rd.ResultDocument, ostream: StringIO):
     if rows:
         ostream.write(
             tabulate.tabulate(
-                rows, headers=[width("ATT&CK Tactic", 20), width("ATT&CK Technique", 80)], tablefmt="mixed_grid"
+                rows,
+                headers=[width("ATT&CK Tactic", 20), width("ATT&CK Technique", 80)],
+                tablefmt="mixed_grid",
+            )
+        )
+        ostream.write("\n")
+
+
+def render_maec(doc: rd.ResultDocument, ostream: StringIO):
+    """
+    example::
+
+        +--------------------------+-----------------------------------------------------------+
+        | MAEC Category            | MAEC Value                                                |
+        |--------------------------+-----------------------------------------------------------|
+        | analysis-conclusion      | malicious                                                 |
+        |--------------------------+-----------------------------------------------------------|
+        | malware-family           | PlugX                                                     |
+        |--------------------------+-----------------------------------------------------------|
+        | malware-category         | downloader                                                |
+        |                          | launcher                                                  |
+        +--------------------------+-----------------------------------------------------------+
+    """
+    maec_categories = {
+        "analysis_conclusion",
+        "analysis_conclusion_ov",
+        "malware_family",
+        "malware_category",
+        "malware_category_ov",
+    }
+    maec_table = collections.defaultdict(set)
+    for rule in rutils.maec_rules(doc):
+        for maec_category in maec_categories:
+            maec_value = getattr(rule.meta.maec, maec_category, None)
+            if maec_value:
+                maec_table[maec_category].add(maec_value)
+
+    rows = []
+    for category in sorted(maec_categories):
+        values = maec_table.get(category, set())
+        if values:
+            rows.append((rutils.bold(category.replace("_", "-")), "\n".join(sorted(values))))
+
+    if rows:
+        ostream.write(
+            tabulate.tabulate(
+                rows,
+                headers=[width("MAEC Category", 25), width("MAEC Value", 75)],
+                tablefmt="mixed_grid",
             )
         )
         ostream.write("\n")
@@ -191,7 +243,9 @@ def render_mbc(doc: rd.ResultDocument, ostream: StringIO):
     if rows:
         ostream.write(
             tabulate.tabulate(
-                rows, headers=[width("MBC Objective", 25), width("MBC Behavior", 75)], tablefmt="mixed_grid"
+                rows,
+                headers=[width("MBC Objective", 25), width("MBC Behavior", 75)],
+                tablefmt="mixed_grid",
             )
         )
         ostream.write("\n")
@@ -204,6 +258,8 @@ def render_default(doc: rd.ResultDocument):
     ostream.write("\n")
     render_attack(doc, ostream)
     ostream.write("\n")
+    render_maec(doc, ostream)
+    ostream.write("\n")
     render_mbc(doc, ostream)
     ostream.write("\n")
     render_capabilities(doc, ostream)

diff --git a/capa/render/result_document.py b/capa/render/result_document.py
@@ -418,7 +418,7 @@ def from_capa(
                             # doc[locations] contains all matches for the given namespace.
                             # for example, the feature might be `match: anti-analysis/packer`
                             # which matches against "generic unpacker" and "UPX".
-                            # in this case, doc[locations] contains locations for *both* of thse.
+                            # in this case, doc[locations] contains locations for *both* of those.
                             #
                             # rule_matches contains the matches for the specific rule.
                             # this is a subset of doc[locations].