Skip to content

Commit

Permalink
Merge branch 'master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
mr-tz authored Dec 4, 2024
2 parents 26363cd + f57f909 commit c05dfb6
Show file tree
Hide file tree
Showing 13 changed files with 179 additions and 141 deletions.
12 changes: 6 additions & 6 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ repos:
hooks:
- id: isort
name: isort
stages: [commit, push, manual]
stages: [pre-commit, pre-push, manual]
language: system
entry: isort
args:
Expand All @@ -46,7 +46,7 @@ repos:
hooks:
- id: black
name: black
stages: [commit, push, manual]
stages: [pre-commit, pre-push, manual]
language: system
entry: black
args:
Expand All @@ -64,7 +64,7 @@ repos:
hooks:
- id: ruff
name: ruff
stages: [commit, push, manual]
stages: [pre-commit, pre-push, manual]
language: system
entry: ruff
args:
Expand All @@ -82,7 +82,7 @@ repos:
hooks:
- id: flake8
name: flake8
stages: [push, manual]
stages: [pre-push, manual]
language: system
entry: flake8
args:
Expand All @@ -101,7 +101,7 @@ repos:
hooks:
- id: mypy
name: mypy
stages: [push, manual]
stages: [pre-push, manual]
language: system
entry: mypy
args:
Expand All @@ -119,7 +119,7 @@ repos:
hooks:
- id: deptry
name: deptry
stages: [push, manual]
stages: [pre-push, manual]
language: system
entry: deptry .
always_run: true
Expand Down
14 changes: 13 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@

- allow call as valid subscope for call scoped rules @mr-tz
- support loading and analyzing a Binary Ninja database #2496 @xusheng6
- vmray: record process command line details @mr-tz

### Breaking Changes

- remove support for Python 3.8 and use Python 3.10 as minimum now #1966 @mr-tz

### New Rules (10)
### New Rules (18)

- nursery/get-shadow-password-file-entry-on-linux [email protected]
- nursery/set-shadow-password-file-entry-on-linux [email protected]
Expand All @@ -23,6 +24,14 @@
- nursery/persist-via-print-processors-registry-key [email protected]
- linking/static/touchsocket/linked-against-touchsocket [email protected]
- runtime/dotnet/compiled-with-dotnet-aot [email protected]
- nursery/persist-via-errorhandler-script [email protected]
- nursery/persist-via-get-variable-hijack [email protected]
- nursery/persist-via-iphlpapi-dll-hijack [email protected]
- nursery/persist-via-lnk-shortcut [email protected]
- nursery/persist-via-powershell-profile [email protected]
- nursery/persist-via-windows-accessibility-tools [email protected]
- nursery/persist-via-windows-terminal-profile [email protected]
- nursery/write-to-browser-extension-directory [email protected]
-

### Bug Fixes
Expand All @@ -33,6 +42,9 @@
- binja: support loading raw x86/x86_64 shellcode #2489 @xusheng6
- binja: fix crash when the IL of certain functions are not available. #2249 @xusheng6
- binja: major performance improvement on the binja extractor. #1414 @xusheng6
- cape: make Process model flexible and procmemory optional to load newest reports #2466 @mr-tz
- binja: fix unit test failure by fixing up the analysis for file al-khaser_x64.exe_ #2507 @xusheng6
- binja: move the stack string detection to function level #2516 @xusheng6

### capa Explorer Web

Expand Down
101 changes: 4 additions & 97 deletions capa/features/extractors/binja/basicblock.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,111 +5,21 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.

import string
from typing import Iterator

from binaryninja import Function
from binaryninja import BasicBlock as BinjaBasicBlock
from binaryninja import (
BinaryView,
SymbolType,
RegisterValueType,
VariableSourceType,
MediumLevelILOperation,
MediumLevelILBasicBlock,
MediumLevelILInstruction,
)

from capa.features.common import Feature, Characteristic
from capa.features.address import Address
from capa.features.basicblock import BasicBlock
from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
from capa.features.extractors.base_extractor import BBHandle, FunctionHandle


def get_printable_len_ascii(s: bytes) -> int:
"""Return string length if all operand bytes are ascii or utf16-le printable"""
count = 0
for c in s:
if c == 0:
return count
if c < 127 and chr(c) in string.printable:
count += 1
return count


def get_printable_len_wide(s: bytes) -> int:
"""Return string length if all operand bytes are ascii or utf16-le printable"""
if all(c == 0x00 for c in s[1::2]):
return get_printable_len_ascii(s[::2])
return 0


def get_stack_string_len(f: Function, il: MediumLevelILInstruction) -> int:
bv: BinaryView = f.view

if il.operation != MediumLevelILOperation.MLIL_CALL:
return 0

target = il.dest
if target.operation not in [MediumLevelILOperation.MLIL_CONST, MediumLevelILOperation.MLIL_CONST_PTR]:
return 0

addr = target.value.value
sym = bv.get_symbol_at(addr)
if not sym or sym.type not in [SymbolType.LibraryFunctionSymbol, SymbolType.SymbolicFunctionSymbol]:
return 0

if sym.name not in ["__builtin_strncpy", "__builtin_strcpy", "__builtin_wcscpy"]:
return 0

if len(il.params) < 2:
return 0

dest = il.params[0]
if dest.operation in [MediumLevelILOperation.MLIL_ADDRESS_OF, MediumLevelILOperation.MLIL_VAR]:
var = dest.src
else:
return 0

if var.source_type != VariableSourceType.StackVariableSourceType:
return 0

src = il.params[1]
if src.value.type != RegisterValueType.ConstantDataAggregateValue:
return 0

s = f.get_constant_data(RegisterValueType.ConstantDataAggregateValue, src.value.value)
return max(get_printable_len_ascii(bytes(s)), get_printable_len_wide(bytes(s)))


def bb_contains_stackstring(f: Function, bb: MediumLevelILBasicBlock) -> bool:
"""check basic block for stackstring indicators
true if basic block contains enough moves of constant bytes to the stack
"""
count = 0
for il in bb:
count += get_stack_string_len(f, il)
if count > MIN_STACKSTRING_LEN:
return True

return False


def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
"""extract stackstring indicators from basic block"""
bb: tuple[BinjaBasicBlock, MediumLevelILBasicBlock] = bbh.inner
if bb[1] is not None and bb_contains_stackstring(fh.inner, bb[1]):
yield Characteristic("stack string"), bbh.address


def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
"""extract tight loop indicators from a basic block"""
bb: tuple[BinjaBasicBlock, MediumLevelILBasicBlock] = bbh.inner
for edge in bb[0].outgoing_edges:
if edge.target.start == bb[0].start:
bb: BinjaBasicBlock = bbh.inner
for edge in bb.outgoing_edges:
if edge.target.start == bb.start:
yield Characteristic("tight loop"), bbh.address


Expand All @@ -121,7 +31,4 @@ def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Featur
yield BasicBlock(), bbh.address


BASIC_BLOCK_HANDLERS = (
extract_bb_tight_loop,
extract_bb_stackstring,
)
BASIC_BLOCK_HANDLERS = (extract_bb_tight_loop,)
24 changes: 4 additions & 20 deletions capa/features/extractors/binja/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from typing import Iterator

import binaryninja as binja
from binaryninja import ILException

import capa.features.extractors.elf
import capa.features.extractors.binja.file
Expand Down Expand Up @@ -54,34 +53,19 @@ def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Featur

def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
f: binja.Function = fh.inner
# Set up a MLIL basic block dict look up to associate the disassembly basic block with its MLIL basic block
mlil_lookup = {}
try:
mlil = f.mlil
except ILException:
return

if mlil is None:
return

for mlil_bb in mlil.basic_blocks:
mlil_lookup[mlil_bb.source_block.start] = mlil_bb

for bb in f.basic_blocks:
mlil_bb = mlil_lookup.get(bb.start)

yield BBHandle(address=AbsoluteVirtualAddress(bb.start), inner=(bb, mlil_bb))
yield BBHandle(address=AbsoluteVirtualAddress(bb.start), inner=bb)

def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
yield from capa.features.extractors.binja.basicblock.extract_features(fh, bbh)

def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
import capa.features.extractors.binja.helpers as binja_helpers

bb: tuple[binja.BasicBlock, binja.MediumLevelILBasicBlock] = bbh.inner
addr = bb[0].start
bb: binja.BasicBlock = bbh.inner
addr = bb.start

for text, length in bb[0]:
for text, length in bb:
insn = binja_helpers.DisassemblyInstruction(addr, length, text)
yield InsnHandle(address=AbsoluteVirtualAddress(addr), inner=insn)
addr += length
Expand Down
110 changes: 108 additions & 2 deletions capa/features/extractors/binja/function.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,27 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import string
from typing import Iterator

from binaryninja import Function, BinaryView, SymbolType, LowLevelILOperation
from binaryninja import (
Function,
BinaryView,
SymbolType,
ILException,
RegisterValueType,
VariableSourceType,
LowLevelILOperation,
MediumLevelILOperation,
MediumLevelILBasicBlock,
MediumLevelILInstruction,
)

from capa.features.file import FunctionName
from capa.features.common import Feature, Characteristic
from capa.features.address import Address, AbsoluteVirtualAddress
from capa.features.extractors import loops
from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
from capa.features.extractors.binja.helpers import get_llil_instr_at_addr
from capa.features.extractors.base_extractor import FunctionHandle

Expand Down Expand Up @@ -95,10 +108,103 @@ def extract_function_name(fh: FunctionHandle):
yield FunctionName(name[1:]), sym.address


def get_printable_len_ascii(s: bytes) -> int:
"""Return string length if all operand bytes are ascii or utf16-le printable"""
count = 0
for c in s:
if c == 0:
return count
if c < 127 and chr(c) in string.printable:
count += 1
return count


def get_printable_len_wide(s: bytes) -> int:
"""Return string length if all operand bytes are ascii or utf16-le printable"""
if all(c == 0x00 for c in s[1::2]):
return get_printable_len_ascii(s[::2])
return 0


def get_stack_string_len(f: Function, il: MediumLevelILInstruction) -> int:
bv: BinaryView = f.view

if il.operation != MediumLevelILOperation.MLIL_CALL:
return 0

target = il.dest
if target.operation not in [MediumLevelILOperation.MLIL_CONST, MediumLevelILOperation.MLIL_CONST_PTR]:
return 0

addr = target.value.value
sym = bv.get_symbol_at(addr)
if not sym or sym.type not in [SymbolType.LibraryFunctionSymbol, SymbolType.SymbolicFunctionSymbol]:
return 0

if sym.name not in ["__builtin_strncpy", "__builtin_strcpy", "__builtin_wcscpy"]:
return 0

if len(il.params) < 2:
return 0

dest = il.params[0]
if dest.operation in [MediumLevelILOperation.MLIL_ADDRESS_OF, MediumLevelILOperation.MLIL_VAR]:
var = dest.src
else:
return 0

if var.source_type != VariableSourceType.StackVariableSourceType:
return 0

src = il.params[1]
if src.value.type != RegisterValueType.ConstantDataAggregateValue:
return 0

s = f.get_constant_data(RegisterValueType.ConstantDataAggregateValue, src.value.value)
return max(get_printable_len_ascii(bytes(s)), get_printable_len_wide(bytes(s)))


def bb_contains_stackstring(f: Function, bb: MediumLevelILBasicBlock) -> bool:
"""check basic block for stackstring indicators
true if basic block contains enough moves of constant bytes to the stack
"""
count = 0
for il in bb:
count += get_stack_string_len(f, il)
if count > MIN_STACKSTRING_LEN:
return True

return False


def extract_stackstring(fh: FunctionHandle):
"""extract stackstring indicators"""
func: Function = fh.inner
bv: BinaryView = func.view
if bv is None:
return

try:
mlil = func.mlil
except ILException:
return

for block in mlil.basic_blocks:
if bb_contains_stackstring(func, block):
yield Characteristic("stack string"), block.source_block.start


def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
for func_handler in FUNCTION_HANDLERS:
for feature, addr in func_handler(fh):
yield feature, addr


FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call, extract_function_name)
FUNCTION_HANDLERS = (
extract_function_calls_to,
extract_function_loop,
extract_recursive_call,
extract_function_name,
extract_stackstring,
)
Loading

0 comments on commit c05dfb6

Please sign in to comment.