diff --git a/fuzzing/search_chunks_fuzzer.py b/fuzzing/search_chunks_fuzzer.py new file mode 100755 index 0000000000..c01cfbf7bd --- /dev/null +++ b/fuzzing/search_chunks_fuzzer.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +import logging +import sys +from pathlib import Path + +import atheris +import structlog + + +def set_unblob_log_level(level=logging.CRITICAL): + logger = logging.getLogger("unblob") + + def logger_factory(): + return logger + + structlog.configure(logger_factory=logger_factory) + logger.setLevel(level) + + +def extract(inpath: Path, outpath: Path): # noqa: ARG001 + return + + +with atheris.instrument_imports(include=["unblob"], exclude=["unblob_native"]): + from unblob.extractors.command import Command + from unblob.file_utils import File + from unblob.finder import search_chunks + from unblob.models import Task, TaskResult + from unblob.processing import ExtractionConfig + + # NOTE: monkey patch Command extractor so we don't loose time executing subprocesses + Command.extract = classmethod(extract) # type: ignore + + +@atheris.instrument_func +def test_search_chunks(data): + config = ExtractionConfig( + extract_root=Path("/dev/shm"), # noqa: S108 + force_extract=True, + entropy_depth=0, + entropy_plot=False, + skip_magic=[], + skip_extension=[], + skip_extraction=False, + process_num=1, + keep_extracted_chunks=True, + verbose=0, + ) + + if not len(data): + return + + with File.from_bytes(data) as file: + task = Task( + path=Path("/dev/shm/nonexistent"), depth=0, blob_id="" # noqa: S108 + ) + result = TaskResult(task) + search_chunks(file, len(data), config.handlers, result) + + +if __name__ == "__main__": + set_unblob_log_level() + atheris.Setup(sys.argv, test_search_chunks) + atheris.Fuzz() diff --git a/poetry.lock b/poetry.lock index 4cadb92054..be01e8c0aa 100644 --- a/poetry.lock +++ b/poetry.lock @@ -21,6 +21,22 @@ files = [ {file = "arpy-2.3.0.tar.gz", hash = "sha256:8302829a991cfcef2630b61e00f315db73164021cecbd7fb1fc18525f83f339c"}, ] +[[package]] +name = "atheris" +version = "2.3.0" +description = "A coverage-guided fuzzer for Python and Python extensions." +optional = false +python-versions = "*" +files = [ + {file = "atheris-2.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b91cb296d60915c3efa4f6db48f09c4678b574cddb7ca98035f1cb9d9fb96f64"}, + {file = "atheris-2.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4e43d1ee4760916a84ff73c9c6cf9ac6eee80fc030479bbed43fe0b8e994981"}, + {file = "atheris-2.3.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d4e83c8a518e7f7d1c82ee52453255bf6309b7b335657534e83747d4fdfa110"}, + {file = "atheris-2.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c353952ee375bf851527e8b2ea57fdefd7ad16aadfe18143801998075485eccd"}, + {file = "atheris-2.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9877b6c2bd5386f9fbbb2989a939c717383d9639f5476411c06fe50fe2fe09a6"}, + {file = "atheris-2.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:005bde4b5a70e998b7fa097e9aa195972dcc2e04092156a0149cff7aa0de970e"}, + {file = "atheris-2.3.0.tar.gz", hash = "sha256:cf1fdf5fa220a41a2f262b32363fc566549502b2cb0addf4e1baad5531c0e825"}, +] + [[package]] name = "attrs" version = "23.2.0" @@ -1442,7 +1458,6 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -1900,4 +1915,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.8" -content-hash = "d16e1f61a2f49840197cc2aa4a9216941669dfa3e68801982026c08a12b10caa" +content-hash = "2cd4e7dbae462e2de26da10470c99d4393793d4c24e8f527504f1aeafc995e6c" diff --git a/pyproject.toml b/pyproject.toml index d0412f7dc1..21cdb2254e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,7 @@ pre-commit = "^3.5.0" pytest-cov = "^3.0.0" ruff = "^0.1.13" pyyaml = "^6.0.1" +atheris = { version = "^2.3.0", python = "<3.12" } [tool.poetry.group.docs] optional = true