mandiant · mr-tz · Dec 12, 2024 · Dec 12, 2024 · Dec 12, 2024 · Dec 12, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -12,6 +12,9 @@
 
 ### Bug Fixes
 
+- vmray: load more analysis archives @mr-tz
+- dynamic: only check file limitations for static file formats @mr-tz
+
 ### capa Explorer Web
 
 ### capa Explorer IDA Pro plugin

diff --git a/capa/features/extractors/vmray/__init__.py b/capa/features/extractors/vmray/__init__.py
@@ -34,9 +34,10 @@ class VMRayMonitorProcess:
     pid: int  # process ID assigned by OS
     ppid: int  # parent process ID assigned by OS
     monitor_id: int  # unique ID assigned to process by VMRay
+    origin_monitor_id: int  # unique VMRay ID of parent process
     image_name: str
-    filename: str
-    cmd_line: str
+    filename: Optional[str] = ""
+    cmd_line: Optional[str] = ""
 
 
 class VMRayAnalysis:
@@ -165,6 +166,7 @@ def _compute_monitor_processes(self):
                 process.os_pid,
                 ppid,
                 process.monitor_id,
+                process.origin_monitor_id,
                 process.image_name,
                 process.filename,
                 process.cmd_line,
@@ -176,6 +178,7 @@ def _compute_monitor_processes(self):
                 monitor_process.os_pid,
                 monitor_process.os_parent_pid,
                 monitor_process.process_id,
+                monitor_process.parent_id,
                 monitor_process.image_name,
                 monitor_process.filename,
                 monitor_process.cmd_line,
@@ -185,7 +188,18 @@ def _compute_monitor_processes(self):
                 self.monitor_processes[monitor_process.process_id] = vmray_monitor_process
             else:
                 # we expect monitor processes recorded in both SummaryV2.json and flog.xml to equal
-                assert self.monitor_processes[monitor_process.process_id] == vmray_monitor_process
+                # to ensure this, we compare the pid, monitor_id, and origin_monitor_id
+                # for the other fields we've observed cases with slight deviations, e.g.,
+                # the ppid for a process in flog.xml is not set correctly, all other data is equal
+                sv2p = self.monitor_processes[monitor_process.process_id]
+                if self.monitor_processes[monitor_process.process_id] != vmray_monitor_process:
+                    logger.debug("processes differ: %s (sv2) vs. %s (flog)", sv2p, vmray_monitor_process)
+
+                assert (sv2p.pid, sv2p.monitor_id, sv2p.origin_monitor_id) == (
+                    vmray_monitor_process.pid,
+                    vmray_monitor_process.monitor_id,
+                    vmray_monitor_process.origin_monitor_id,
+                )
 
     def _compute_monitor_threads(self):
         for monitor_thread in self.flog.analysis.monitor_threads:

diff --git a/capa/features/extractors/vmray/models.py b/capa/features/extractors/vmray/models.py
@@ -276,7 +276,7 @@ class ElfFileHeader(BaseModel):
 
 class ElfFile(BaseModel):
     # file_header: ElfFileHeader
-    sections: list[ElfFileSection]
+    sections: list[ElfFileSection] = []
 
 
 class StaticData(BaseModel):
@@ -314,10 +314,11 @@ class Process(BaseModel):
     # is_ioc: bool
     monitor_id: int
     # monitor_reason: str
+    origin_monitor_id: int  # VMRay ID of parent process
     os_pid: int
-    filename: SanitizedString
+    filename: Optional[SanitizedString] = ""
     image_name: str
-    cmd_line: SanitizedString
+    cmd_line: Optional[SanitizedString] = ""
     ref_parent_process: Optional[GenericReference] = None
 
 

diff --git a/capa/main.py b/capa/main.py
@@ -748,15 +748,13 @@ def find_file_limitations_from_cli(args, rules: RuleSet, file_extractors: list[F
     args:
       args: The parsed command line arguments from `install_common_args`.
 
+    Dynamic feature extractors can handle packed samples and do not need to be considered here.
+
     raises:
       ShouldExitError: if the program is invoked incorrectly and should exit.
     """
     found_file_limitation = False
     for file_extractor in file_extractors:
-        if isinstance(file_extractor, DynamicFeatureExtractor):
-            # Dynamic feature extractors can handle packed samples
-            continue
-
         try:
             pure_file_capabilities, _ = find_file_capabilities(rules, file_extractor, {})
         except PEFormatError as e:
@@ -962,8 +960,11 @@ def main(argv: Optional[list[str]] = None):
         ensure_input_exists_from_cli(args)
         input_format = get_input_format_from_cli(args)
         rules = get_rules_from_cli(args)
-        file_extractors = get_file_extractors_from_cli(args, input_format)
-        found_file_limitation = find_file_limitations_from_cli(args, rules, file_extractors)
+        found_file_limitation = False
+        if input_format in STATIC_FORMATS:
+            # only static extractors have file limitations
+            file_extractors = get_file_extractors_from_cli(args, input_format)
+            found_file_limitation = find_file_limitations_from_cli(args, rules, file_extractors)
     except ShouldExitError as e:
         return e.status_code