Skip to content

Commit

Permalink
Update call.py to extract call features and map data types
Browse files Browse the repository at this point in the history
  • Loading branch information
r-sm2024 committed Jul 5, 2024
1 parent f684e88 commit 4eeefea
Show file tree
Hide file tree
Showing 10 changed files with 150 additions and 130 deletions.
3 changes: 2 additions & 1 deletion capa/features/address.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@

class Address(abc.ABC):
@abc.abstractmethod
def __eq__(self, other): ...
def __eq__(self, other):
...

@abc.abstractmethod
def __lt__(self, other):
Expand Down
9 changes: 6 additions & 3 deletions capa/features/extractors/vmray/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.

from typing import Dict, List
from collections import defaultdict

Expand Down Expand Up @@ -59,16 +60,18 @@ def _compute_exports(self):
self.exports[export.address] = export.api.name

def _compute_imports(self):
# TODO (meh): https://github.com/mandiant/capa/issues/2148
...
if self.sample_file_static_data.pe:
for imports in self.sample_file_static_data.pe.exports:
self.imports[imports.address] = imports.api.name

def _compute_sections(self):
if self.sample_file_static_data.pe:
for section in self.sample_file_static_data.pe.sections:
self.sections[section.virtual_address] = section.name

def _compute_process_threads(self):
# logs/flog.xml appears to be the only file that contains thread-related
# logs/flog.xml
# appears to be the only file that contains thread-related
# so we use it here to map processes to threads
for function_call in self.flog.analysis.function_calls:
pid: int = int(function_call.process_id)
Expand Down
118 changes: 85 additions & 33 deletions capa/features/extractors/vmray/call.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,61 +5,115 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.

import logging
from typing import Tuple, Iterator

from capa.helpers import assert_never
from capa.features.insn import API, Number
from capa.features.common import String, Feature
from capa.features.address import Address

from capa.features.extractors.vmray.models import FunctionCall
from capa.features.extractors.vmray.models import In_Out, FunctionCall
from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, ProcessHandle

logger = logging.getLogger(__name__)


def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
def extract_function_calls(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
"""
this method extracts the given call's features (such as API name and arguments),
and returns them as API, Number, and String features.
This method extracts the given call's features (such as API name and
arguments), and returns them as API, Number, and String features.
args:
call: FunctionCall object representing the XML fncall element
Args:
ph: process handle (for defining the extraction scope)
th: thread handle (for defining the extraction scope)
ch: call handle (for defining the extraction scope)
yields: Feature, address; where Feature is either: API, Number, or String.
Yields:
Feature, address; where Feature is either: API, Number, or String.
"""

# TODO (meh): update for new models https://github.com/mandiant/capa/issues/2148
# print(ch)

# TODO(meh): update for new models
# https://github.com/mandiant/capa/issues/2148

# Extract API name
yield API(ch.inner.name), ch.inner.address
call: FunctionCall = ch.inner

# Extract arguments from <in>
for param in ch.inner.in_:
value = param.value
if isinstance(value, str):
yield String(value), ch.inner.address

elif isinstance(value, int):
yield Number(value), ch.inner.address

else:
assert_never(value)
if call is not None:
if call.in_ is not None:
for param in call.in_:
if isinstance(param, In_Out):
type: str = param.type
value: str = param.value
if type is not None:
if type in [
"signed_8bit",
"unsigned_8bit",
"signed_16bit",
"unsigned_16bit",
"signed_32bit",
"unsigned_32bit",
"signed_64bit",
"unsigned_64bit",
"double",
]:
yield Number(value), ch.address
elif type in [
"unknown",
"void",
"bool",
"void_ptr",
"ptr",
"str",
"array",
"container",
"bindata",
"undefined_type",
]:
yield String(value), ch.address
else:
assert_never(value)

# Extract return value from <out>
if ch.inner.out is not None:
value = ch.inner.out.value
if isinstance(value, str):
yield String(value), ch.inner.address

elif isinstance(value, int):
yield Number(value), ch.inner.address
if call is not None:
if call.out_ is not None:
for param in call.out_:
if isinstance(param, In_Out):
type = param.type
value = param.value
if type is not None:
if type in [
"signed_8bit",
"unsigned_8bit",
"signed_16bit",
"unsigned_16bit",
"signed_32bit",
"unsigned_32bit",
"signed_64bit",
"unsigned_64bit",
"double",
]:
yield Number(value), ch.address
elif type in [
"unknown",
"void",
"bool",
"void_ptr",
"ptr",
"str",
"array",
"container",
"bindata",
"undefined_type",
]:
yield String(value), ch.address
else:
assert_never(value)

else:
assert_never(value)
# Extract API name
yield API(call.name), ch.address


def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]:
Expand All @@ -68,6 +122,4 @@ def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Ite
yield feature, addr


CALL_HANDLERS = (
extract_function_calls,
extract_features,)
CALL_HANDLERS = (extract_function_calls,)
61 changes: 0 additions & 61 deletions capa/features/extractors/vmray/example.py

This file was deleted.

19 changes: 10 additions & 9 deletions capa/features/extractors/vmray/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import capa.features.extractors.vmray.call
import capa.features.extractors.vmray.file
import capa.features.extractors.vmray.global_
from capa.features.common import String, Feature, Characteristic, Feature
from capa.features.common import Feature, Characteristic
from capa.features.address import NO_ADDRESS, Address, ThreadAddress, DynamicCallAddress, AbsoluteVirtualAddress
from capa.features.extractors.vmray import VMRayAnalysis
from capa.features.extractors.vmray.models import Flog, Process, SummaryV2, FunctionCall
Expand Down Expand Up @@ -46,7 +46,8 @@ def __init__(self, analysis: VMRayAnalysis):
self.global_features = list(capa.features.extractors.vmray.global_.extract_features(self.analysis))

def get_base_address(self) -> Address:
# value according to the PE header, the actual trace may use a different imagebase
# value according to the PE header
# the actual trace may use a different imagebase
return AbsoluteVirtualAddress(self.analysis.base_address)

def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]:
Expand All @@ -56,8 +57,7 @@ def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]:
yield from self.global_features

def get_import_names(self) -> Iterator[Tuple[Feature, Address]]:
for filename_id, filename_data in self.analysis.filenames.items():
yield String(filename_data["filename"]), NO_ADDRESS
yield from capa.features.extractors.vmray.file.extract_import_names(self.analysis)

def get_processes(self) -> Iterator[ProcessHandle]:
yield from capa.features.extractors.vmray.file.get_processes(self.analysis)
Expand Down Expand Up @@ -92,20 +92,21 @@ def extract_call_features(
self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle
) -> Iterator[Tuple[Feature, Address]]:
yield from capa.features.extractors.vmray.call.extract_features(ph, th, ch)
def get_call_name(ph, th, ch) -> str:

def get_call_name(self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> str:
fncallname: FunctionCall = ch.inner
return fncallname.name

@classmethod
def from_zipfile(cls, zipfile_path: Path):
with ZipFile(zipfile_path, "r") as zipfile:
# TODO (meh): is default password "infected" good enough?? https://github.com/mandiant/capa/issues/2148
# TODO (meh): is default password "infected" good enough?
# https://github.com/mandiant/capa/issues/2148
sv2_json = json.loads(zipfile.read("logs/summary_v2.json", pwd=b"infected"))
sv2 = SummaryV2.model_validate(sv2_json)

flog_xml = zipfile.read("logs/flog.xml", pwd=b"infected")
flog_json = xmltodict.parse(flog_xml, attr_prefix="")
flog = Flog.model_validate(flog_json)

return cls(VMRayAnalysis(sv2, flog))
return cls(VMRayAnalysis(sv2, flog))
24 changes: 18 additions & 6 deletions capa/features/extractors/vmray/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.

import logging
from typing import Dict, Tuple, Iterator

Expand All @@ -22,7 +23,8 @@ def get_processes(analysis: VMRayAnalysis) -> Iterator[ProcessHandle]:
processes: Dict[str, Process] = analysis.sv2.processes

for process in processes.values():
# TODO (meh): should we use the OS process ID or vmray-assigned ID? https://github.com/mandiant/capa/issues/2148
# TODO(meh): should we use the OS process ID or vmray-assigned ID?
# https://github.com/mandiant/capa/issues/2148
pid = process.monitor_id
ppid = processes[process.ref_parent_process.path[1]].monitor_id if process.ref_parent_process else 0

Expand All @@ -46,27 +48,37 @@ def extract_section_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Ad
yield Section(name), AbsoluteVirtualAddress(addr)


def extract_referenced_filenames(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
def extract_referenced_filenames(
analysis: VMRayAnalysis,
) -> Iterator[Tuple[Feature, Address]]:
for filename in analysis.sv2.filenames.values():
yield String(filename.filename), NO_ADDRESS


def extract_referenced_mutex_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
def extract_referenced_mutex_names(
analysis: VMRayAnalysis,
) -> Iterator[Tuple[Feature, Address]]:
for mutex in analysis.sv2.mutexes.values():
yield String(mutex.name), NO_ADDRESS


def extract_referenced_domain_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
def extract_referenced_domain_names(
analysis: VMRayAnalysis,
) -> Iterator[Tuple[Feature, Address]]:
for domain in analysis.sv2.domains.values():
yield String(domain.domain), NO_ADDRESS


def extract_referenced_ip_addresses(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
def extract_referenced_ip_addresses(
analysis: VMRayAnalysis,
) -> Iterator[Tuple[Feature, Address]]:
for ip_address in analysis.sv2.ip_addresses.values():
yield String(ip_address.ip_address), NO_ADDRESS


def extract_referenced_registry_key_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]:
def extract_referenced_registry_key_names(
analysis: VMRayAnalysis,
) -> Iterator[Tuple[Feature, Address]]:
for registry_record in analysis.sv2.registry_records.values():
yield String(registry_record.reg_key_name), NO_ADDRESS

Expand Down
Loading

0 comments on commit 4eeefea

Please sign in to comment.