diff --git a/CHANGELOG.md b/CHANGELOG.md index a0c53ad9f..ec3369aef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,9 @@ ### Bug Fixes +- vmray: load more analysis archives @mr-tz +- dynamic: only check file limitations for static file formats @mr-tz + ### capa Explorer Web ### capa Explorer IDA Pro plugin diff --git a/capa/features/extractors/vmray/__init__.py b/capa/features/extractors/vmray/__init__.py index dc719211a..94226a5af 100644 --- a/capa/features/extractors/vmray/__init__.py +++ b/capa/features/extractors/vmray/__init__.py @@ -34,9 +34,10 @@ class VMRayMonitorProcess: pid: int # process ID assigned by OS ppid: int # parent process ID assigned by OS monitor_id: int # unique ID assigned to process by VMRay + origin_monitor_id: int # unique VMRay ID of parent process image_name: str - filename: str - cmd_line: str + filename: Optional[str] = "" + cmd_line: Optional[str] = "" class VMRayAnalysis: @@ -165,6 +166,7 @@ def _compute_monitor_processes(self): process.os_pid, ppid, process.monitor_id, + process.origin_monitor_id, process.image_name, process.filename, process.cmd_line, @@ -176,6 +178,7 @@ def _compute_monitor_processes(self): monitor_process.os_pid, monitor_process.os_parent_pid, monitor_process.process_id, + monitor_process.parent_id, monitor_process.image_name, monitor_process.filename, monitor_process.cmd_line, @@ -185,7 +188,18 @@ def _compute_monitor_processes(self): self.monitor_processes[monitor_process.process_id] = vmray_monitor_process else: # we expect monitor processes recorded in both SummaryV2.json and flog.xml to equal - assert self.monitor_processes[monitor_process.process_id] == vmray_monitor_process + # to ensure this, we compare the pid, monitor_id, and origin_monitor_id + # for the other fields we've observed cases with slight deviations, e.g., + # the ppid for a process in flog.xml is not set correctly, all other data is equal + sv2p = self.monitor_processes[monitor_process.process_id] + if self.monitor_processes[monitor_process.process_id] != vmray_monitor_process: + logger.debug("processes differ: %s (sv2) vs. %s (flog)", sv2p, vmray_monitor_process) + + assert (sv2p.pid, sv2p.monitor_id, sv2p.origin_monitor_id) == ( + vmray_monitor_process.pid, + vmray_monitor_process.monitor_id, + vmray_monitor_process.origin_monitor_id, + ) def _compute_monitor_threads(self): for monitor_thread in self.flog.analysis.monitor_threads: diff --git a/capa/features/extractors/vmray/models.py b/capa/features/extractors/vmray/models.py index 755f494fe..761a879ba 100644 --- a/capa/features/extractors/vmray/models.py +++ b/capa/features/extractors/vmray/models.py @@ -276,7 +276,7 @@ class ElfFileHeader(BaseModel): class ElfFile(BaseModel): # file_header: ElfFileHeader - sections: list[ElfFileSection] + sections: list[ElfFileSection] = [] class StaticData(BaseModel): @@ -314,10 +314,11 @@ class Process(BaseModel): # is_ioc: bool monitor_id: int # monitor_reason: str + origin_monitor_id: int # VMRay ID of parent process os_pid: int - filename: SanitizedString + filename: Optional[SanitizedString] = "" image_name: str - cmd_line: SanitizedString + cmd_line: Optional[SanitizedString] = "" ref_parent_process: Optional[GenericReference] = None diff --git a/capa/main.py b/capa/main.py index 2e3a5900c..4e9dbd13a 100644 --- a/capa/main.py +++ b/capa/main.py @@ -748,15 +748,13 @@ def find_file_limitations_from_cli(args, rules: RuleSet, file_extractors: list[F args: args: The parsed command line arguments from `install_common_args`. + Dynamic feature extractors can handle packed samples and do not need to be considered here. + raises: ShouldExitError: if the program is invoked incorrectly and should exit. """ found_file_limitation = False for file_extractor in file_extractors: - if isinstance(file_extractor, DynamicFeatureExtractor): - # Dynamic feature extractors can handle packed samples - continue - try: pure_file_capabilities, _ = find_file_capabilities(rules, file_extractor, {}) except PEFormatError as e: @@ -962,8 +960,11 @@ def main(argv: Optional[list[str]] = None): ensure_input_exists_from_cli(args) input_format = get_input_format_from_cli(args) rules = get_rules_from_cli(args) - file_extractors = get_file_extractors_from_cli(args, input_format) - found_file_limitation = find_file_limitations_from_cli(args, rules, file_extractors) + found_file_limitation = False + if input_format in STATIC_FORMATS: + # only static extractors have file limitations + file_extractors = get_file_extractors_from_cli(args, input_format) + found_file_limitation = find_file_limitations_from_cli(args, rules, file_extractors) except ShouldExitError as e: return e.status_code