Skip to content

Commit

Permalink
Merge branch 'autogen-ruleset-cache' of github.com:s-ff/capa into aut…
Browse files Browse the repository at this point in the history
…ogen-ruleset-cache
  • Loading branch information
mr-tz committed Jul 3, 2024
2 parents d78db76 + a73ddc4 commit 98916be
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 0 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,16 @@ Special thanks to our repeat and new contributors:
- replace Halo spinner with Rich #2086 @s-ff
- optimize rule matching #2080 @williballenthin
<<<<<<< HEAD
<<<<<<< HEAD
- add aarch64 as a valid architecture #2144 [email protected] @williballenthin
- relax dependency version requirements for the capa library #2053 @williballenthin
- add scripts dependency group and update documentation #2145 @mr-tz
=======
- regenerate ruleset cache automatically on source change (only in dev mode) #2133 @s-ff
>>>>>>> 699f49d2 (check if git dir exists, and return sorted modified file paths)
=======
- regenerate ruleset cache automatically on source change (only in dev mode) #2133 @s-ff
>>>>>>> a73ddc41f41aaf451f2da6c00807404895f80c78
### New Rules (25)

Expand Down
46 changes: 46 additions & 0 deletions capa/rules/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import capa.rules
import capa.helpers
import capa.version
import capa.rules.utils

logger = logging.getLogger(__name__)

Expand All @@ -27,6 +28,50 @@


def compute_cache_identifier(rule_content: List[bytes]) -> CacheIdentifier:
<<<<<<< HEAD
=======
# is this a development environment?
# if yes, include the modified files contents and/or commit hash
# in computing the cache identifier
try:
if capa.rules.utils.is_dev_environment():
modified_files = capa.rules.utils.get_modified_files()
commit_hash = capa.rules.utils.get_git_commit_hash()

if modified_files or commit_hash:
hash = hashlib.sha256()
hash.update(capa.version.__version__.encode("utf-8"))
hash.update(b"\x00")

for file in modified_files:
try:
file_content = file.read_bytes()
logger.debug("found modified source file %s", file)
hash.update(file_content)
hash.update(b"\x00")
except FileNotFoundError as e:
logger.error("modified file not found: %s", file)
logger.error("%s", e)

if commit_hash:
hash.update(commit_hash.encode("ascii"))
hash.update(b"\x00")

# include the hash of the rule contents
rule_hashes = sorted([hashlib.sha256(buf).hexdigest() for buf in rule_content])
for rule_hash in rule_hashes:
hash.update(rule_hash.encode("ascii"))
hash.update(b"\x00")

logger.debug(
"developer environment detected, ruleset cache will be auto-generated upon each source modification"
)
return hash.hexdigest()
except Exception as e:
logger.warning("failed to compute ruleset cache identifier in developer mode: %s", str(e))
logger.warning("falling back to default cache identifier based on rules contents")

>>>>>>> a73ddc41f41aaf451f2da6c00807404895f80c78
# this is not a development environment, only use rule contents in
# computing the cache identifier
hash = hashlib.sha256()
Expand Down Expand Up @@ -109,6 +154,7 @@ def get_ruleset_content(ruleset: capa.rules.RuleSet) -> List[bytes]:

def compute_ruleset_cache_identifier(ruleset: capa.rules.RuleSet) -> CacheIdentifier:
rule_contents = get_ruleset_content(ruleset)

return compute_cache_identifier(rule_contents)


Expand Down
74 changes: 74 additions & 0 deletions capa/rules/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import sys
import shutil
import logging
import subprocess
from typing import List, Optional
from pathlib import Path

logger = logging.getLogger(__name__)


def is_dev_environment() -> bool:
if getattr(sys, "frozen", False):
# running as a PyInstaller executable
return False

if "site-packages" in __file__:
# running from a site-packages installation
return False

capa_root = Path(__file__).resolve().parent.parent.parent
git_dir = capa_root / ".git"

if not git_dir.is_dir():
# .git directory doesn't exist
return False

git_exe = shutil.which("git")
if not git_exe:
# git is not found in PATH
return False

return True


def get_modified_files() -> List[Path]:
try:
# use git status to retrieve tracked modified files
result = subprocess.run(
["git", "--no-pager", "status", "--porcelain", "--untracked-files=no"],
capture_output=True,
text=True,
check=True,
)

# retrieve .py source files
# ' M': the file has staged modifications
# 'M ': the file has unstaged modifications
# 'MM': the file has both staged and unstaged modifications
files: List[Path] = []
for line in result.stdout.splitlines():
if line.startswith(("M ", "MM", " M")) and line.endswith(".py"):
file_path = Path(line[3:])
files.append(file_path)

return sorted(files)
except (subprocess.CalledProcessError, FileNotFoundError):
return []


def get_git_commit_hash() -> Optional[str]:
try:
result = subprocess.run(["git", "rev-parse", "HEAD"], capture_output=True, text=True, check=True)
commit_hash = result.stdout.strip()
logger.debug("git commit hash %s", commit_hash)
return commit_hash
except (subprocess.CalledProcessError, FileNotFoundError):
return None

0 comments on commit 98916be

Please sign in to comment.