diff --git a/tests/test_cli.py b/tests/test_cli.py index e38c58486b..720015859a 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import List, Optional, Type +from typing import Iterable, List, Optional, Type from unittest import mock import pytest @@ -10,7 +10,12 @@ from unblob.extractors.command import MultiFileCommand from unblob.handlers import BUILTIN_HANDLERS from unblob.models import DirectoryHandler, Glob, Handler, HexString, MultiFile -from unblob.processing import DEFAULT_DEPTH, DEFAULT_PROCESS_NUM, ExtractionConfig +from unblob.processing import ( + DEFAULT_DEPTH, + DEFAULT_PROCESS_NUM, + DEFAULT_SKIP_MAGIC, + ExtractionConfig, +) from unblob.ui import ( NullProgressReporter, ProgressReporter, @@ -367,3 +372,51 @@ def test_skip_extraction( assert ( process_file_mock.call_args.args[0].skip_extraction == skip_extraction ), fail_message + + +@pytest.mark.parametrize( + "args, skip_magic, fail_message", + [ + ([], DEFAULT_SKIP_MAGIC, "Should have kept default skip magics"), + ( + ["--skip-magic", "SUPERMAGIC"], + (*DEFAULT_SKIP_MAGIC, "SUPERMAGIC"), + "Should have kept default skip magics", + ), + (["--clear-skip-magics"], [], "Should have cleared default skip magics"), + ( + ["--clear-skip-magics", "--skip-magic", "SUPERMAGIC"], + ["SUPERMAGIC"], + "Should have cleared default skip magics", + ), + ( + ["--clear-skip-magics", "--skip-magic", DEFAULT_SKIP_MAGIC[1]], + [DEFAULT_SKIP_MAGIC[1]], + "Should allow user specified and remove the rest", + ), + ], +) +def test_clear_skip_magics( + args: List[str], skip_magic: Iterable[str], fail_message: str, tmp_path: Path +): + runner = CliRunner() + in_path = ( + Path(__file__).parent + / "integration" + / "archive" + / "zip" + / "regular" + / "__input__" + / "apple.zip" + ) + params = [*args, "--extract-dir", str(tmp_path), str(in_path)] + + process_file_mock = mock.MagicMock() + with mock.patch.object(unblob.cli, "process_file", process_file_mock): + result = runner.invoke(unblob.cli.cli, params) + + assert result.exit_code == 0 + process_file_mock.assert_called_once() + assert sorted(process_file_mock.call_args.args[0].skip_magic) == sorted( + skip_magic + ), fail_message diff --git a/unblob/cli.py b/unblob/cli.py index 9a117c86e1..fd65675052 100755 --- a/unblob/cli.py +++ b/unblob/cli.py @@ -168,9 +168,11 @@ def __init__( "--skip-magic", "skip_magic", type=click.STRING, - default=DEFAULT_SKIP_MAGIC, - help="Skip processing files with given magic prefix", - show_default=True, + help=f"""Skip processing files with given magic prefix. + The provided values are appended to unblob's own skip magic list unless + --clear-skip-magic is provided. + [default: {', '.join(DEFAULT_SKIP_MAGIC)}] + """, multiple=True, ) @click.option( @@ -182,6 +184,14 @@ def __init__( show_default=True, multiple=True, ) +@click.option( + "--clear-skip-magics", + "clear_skip_magics", + is_flag=True, + show_default=True, + default=False, + help="Clear unblob's own skip magic list.", +) @click.option( "-p", "--process-num", @@ -246,6 +256,7 @@ def cli( entropy_depth: int, skip_magic: Iterable[str], skip_extension: Iterable[str], + clear_skip_magics: bool, # noqa: FBT001 skip_extraction: bool, # noqa: FBT001 keep_extracted_chunks: bool, # noqa: FBT001 handlers: Handlers, @@ -263,6 +274,9 @@ def cli( extra_dir_handlers = plugin_manager.load_dir_handlers_from_plugins() dir_handlers += tuple(extra_dir_handlers) + extra_magics_to_skip = () if clear_skip_magics else DEFAULT_SKIP_MAGIC + skip_magic = tuple(sorted(set(skip_magic).union(extra_magics_to_skip))) + config = ExtractionConfig( extract_root=extract_root, force_extract=force, diff --git a/unblob/processing.py b/unblob/processing.py index fd38c8fd2c..83eb9dc64d 100644 --- a/unblob/processing.py +++ b/unblob/processing.py @@ -52,31 +52,31 @@ DEFAULT_PROCESS_NUM = multiprocessing.cpu_count() DEFAULT_SKIP_MAGIC = ( "BFLT", - "JPEG", + "Composite Document File V2 Document", + "Erlang BEAM file", "GIF", - "PNG", - "SQLite", - "compiled Java class", - "TrueType Font data", - "PDF document", - "magic binary file", - "MS Windows icon resource", - "Web Open Font Format", "GNU message catalog", - "Xilinx BIT data", + "HP Printer Job Language", + "Intel serial flash for PCH ROM", + "JPEG", + "MPEG", + "MS Windows icon resource", + "Macromedia Flash data", "Microsoft Excel", - "Microsoft Word", - "Microsoft PowerPoint", "Microsoft OOXML", + "Microsoft PowerPoint", + "Microsoft Word", "OpenDocument", - "Macromedia Flash data", - "MPEG", - "HP Printer Job Language", - "Erlang BEAM file", - "python", # (e.g. python 2.7 byte-compiled) - "Composite Document File V2 Document", + "PDF document", + "PNG", + "SQLite", + "TrueType Font data", + "Web Open Font Format", "Windows Embedded CE binary image", - "Intel serial flash for PCH ROM", + "Xilinx BIT data", + "compiled Java class", + "magic binary file", + "python", # # (e.g. python 2.7 byte-compiled) ) DEFAULT_SKIP_EXTENSION = (".rlib",)