From 7bdb24bcba353799e796d7e5a37664fc511034d2 Mon Sep 17 00:00:00 2001 From: Quentin Kaiser Date: Mon, 25 Sep 2023 13:10:50 +0200 Subject: [PATCH] fix(handlers): add support for unix-compatible (aka v7) tar files. --- tests/handlers/archive/test_tar.py | 80 ++++++++++++++++++- .../archive/tar/__input__/cherry.v7.tar | 3 + .../cherry.v7.tar_extract/fruits/cherry1.txt | 3 + .../cherry.v7.tar_extract/fruits/cherry2.txt | 3 + .../cherry.v7.tar_extract/fruits/cherry3.txt | 3 + .../cherry.v7.tar_extract/fruits/cherry4.txt | 3 + unblob/handlers/__init__.py | 3 +- unblob/handlers/archive/tar.py | 31 ++++--- 8 files changed, 116 insertions(+), 13 deletions(-) create mode 100644 tests/integration/archive/tar/__input__/cherry.v7.tar create mode 100644 tests/integration/archive/tar/__output__/cherry.v7.tar_extract/fruits/cherry1.txt create mode 100644 tests/integration/archive/tar/__output__/cherry.v7.tar_extract/fruits/cherry2.txt create mode 100644 tests/integration/archive/tar/__output__/cherry.v7.tar_extract/fruits/cherry3.txt create mode 100644 tests/integration/archive/tar/__output__/cherry.v7.tar_extract/fruits/cherry4.txt diff --git a/tests/handlers/archive/test_tar.py b/tests/handlers/archive/test_tar.py index cefa8706f8..9ed985361c 100644 --- a/tests/handlers/archive/test_tar.py +++ b/tests/handlers/archive/test_tar.py @@ -2,7 +2,11 @@ from helpers import unhex from unblob.file_utils import File -from unblob.handlers.archive.tar import TarHandler, _get_tar_end_offset +from unblob.handlers.archive.tar import ( + TarUnixHandler, + TarUstarHandler, + _get_tar_end_offset, +) GNU_TAR_CONTENTS = unhex( """\ @@ -120,6 +124,58 @@ """ ) +UNIX_TAR_CONTENT = unhex( + """\ +00000000 66 72 75 69 74 73 2f 00 00 00 00 00 00 00 00 00 |fruits/.........| +00000010 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| +* +00000060 00 00 00 00 30 30 30 30 37 37 35 00 30 30 30 31 |....0000775.0001| +00000070 37 35 30 00 30 30 30 31 37 35 30 00 30 30 30 30 |750.0001750.0000| +00000080 30 30 30 30 30 30 30 00 31 34 35 30 34 32 36 32 |0000000.14504262| +00000090 30 37 37 00 30 30 37 34 30 34 00 20 35 00 00 00 |077.007404. 5...| +000000a0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| +* +00000140 00 00 00 00 00 00 00 00 00 30 30 30 30 30 30 30 |.........0000000| +00000150 00 30 30 30 30 30 30 30 00 00 00 00 00 00 00 00 |.0000000........| +00000160 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| +* +00000200 66 72 75 69 74 73 2f 61 70 70 6c 65 2e 74 78 74 |fruits/apple.txt| +00000210 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| +* +00000260 00 00 00 00 30 30 30 30 36 36 34 00 30 30 30 31 |....0000664.0001| +00000270 37 35 30 00 30 30 30 31 37 35 30 00 30 30 30 30 |750.0001750.0000| +00000280 30 30 30 30 30 30 36 00 31 34 35 30 34 32 36 32 |0000006.14504262| +00000290 30 37 31 00 30 31 31 31 35 34 00 20 00 00 00 00 |071.011154. ....| +000002a0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| +* +00000340 00 00 00 00 00 00 00 00 00 30 30 30 30 30 30 30 |.........0000000| +00000350 00 30 30 30 30 30 30 30 00 00 00 00 00 00 00 00 |.0000000........| +00000360 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| +* +00000400 61 70 70 6c 65 0a 00 00 00 00 00 00 00 00 00 00 |apple...........| +00000410 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| +* +00000600 66 72 75 69 74 73 2f 63 68 65 72 72 79 2e 74 78 |fruits/cherry.tx| +00000610 74 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |t...............| +00000620 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| +* +00000660 00 00 00 00 30 30 30 30 36 36 34 00 30 30 30 31 |....0000664.0001| +00000670 37 35 30 00 30 30 30 31 37 35 30 00 30 30 30 30 |750.0001750.0000| +00000680 30 30 30 30 30 30 37 00 31 34 35 30 34 32 36 32 |0000007.14504262| +00000690 30 37 37 00 30 31 31 33 35 36 00 20 00 00 00 00 |077.011356. ....| +000006a0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| +* +00000740 00 00 00 00 00 00 00 00 00 30 30 30 30 30 30 30 |.........0000000| +00000750 00 30 30 30 30 30 30 30 00 00 00 00 00 00 00 00 |.0000000........| +00000760 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| +* +00000800 63 68 65 72 72 79 0a 00 00 00 00 00 00 00 00 00 |cherry..........| +00000810 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| +* +00002800 +""" +) + PADDING_TO_DEFAULT_BLOCKING_FACTOR = unhex( """\ 00000400 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| @@ -303,12 +359,30 @@ def test_different_blocking_factor(): pytest.param(b"some prefix ", id="nonzero-prefix"), ], ) -def test_calculate_chunk(prefix): +def test_calculate_chunk_ustar(prefix): tar_file = File.from_bytes(prefix + GNU_TAR_CONTENTS) - handler = TarHandler() + handler = TarUstarHandler() chunk = handler.calculate_chunk(tar_file, len(prefix)) assert chunk is not None assert chunk.start_offset == len(prefix) assert chunk.end_offset == len(prefix) + len(GNU_TAR_CONTENTS) + + +@pytest.mark.parametrize( + "prefix", + [ + pytest.param(b"", id="zero-prefix"), + pytest.param(b"some prefix ", id="nonzero-prefix"), + ], +) +def test_calculate_chunk_unix(prefix): + tar_file = File.from_bytes(prefix + UNIX_TAR_CONTENT) + handler = TarUnixHandler() + + chunk = handler.calculate_chunk(tar_file, len(prefix)) + + assert chunk is not None + assert chunk.start_offset == len(prefix) + assert chunk.end_offset == len(prefix) + len(UNIX_TAR_CONTENT) diff --git a/tests/integration/archive/tar/__input__/cherry.v7.tar b/tests/integration/archive/tar/__input__/cherry.v7.tar new file mode 100644 index 0000000000..2252cc0d91 --- /dev/null +++ b/tests/integration/archive/tar/__input__/cherry.v7.tar @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f799945e335bcd22cae7f2a53033781b8a181a266a12bd04d6ab3ed3f5ba1fd2 +size 10240 diff --git a/tests/integration/archive/tar/__output__/cherry.v7.tar_extract/fruits/cherry1.txt b/tests/integration/archive/tar/__output__/cherry.v7.tar_extract/fruits/cherry1.txt new file mode 100644 index 0000000000..16555f8308 --- /dev/null +++ b/tests/integration/archive/tar/__output__/cherry.v7.tar_extract/fruits/cherry1.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7592083f2355ad7e207557efabb3594bf62c9e39677298e8265766a37d835c39 +size 8 diff --git a/tests/integration/archive/tar/__output__/cherry.v7.tar_extract/fruits/cherry2.txt b/tests/integration/archive/tar/__output__/cherry.v7.tar_extract/fruits/cherry2.txt new file mode 100644 index 0000000000..19462226ea --- /dev/null +++ b/tests/integration/archive/tar/__output__/cherry.v7.tar_extract/fruits/cherry2.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb6f92894363eceff37d58287d2b8b37bb17a00b320312ed59924a8ec07004a6 +size 8 diff --git a/tests/integration/archive/tar/__output__/cherry.v7.tar_extract/fruits/cherry3.txt b/tests/integration/archive/tar/__output__/cherry.v7.tar_extract/fruits/cherry3.txt new file mode 100644 index 0000000000..5d1ed100f4 --- /dev/null +++ b/tests/integration/archive/tar/__output__/cherry.v7.tar_extract/fruits/cherry3.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4d09282b5ac47cc58aa5dc4fe3e8d6829ac737adfe49ee32efee9cf4bf6cdf3 +size 8 diff --git a/tests/integration/archive/tar/__output__/cherry.v7.tar_extract/fruits/cherry4.txt b/tests/integration/archive/tar/__output__/cherry.v7.tar_extract/fruits/cherry4.txt new file mode 100644 index 0000000000..328350b1a3 --- /dev/null +++ b/tests/integration/archive/tar/__output__/cherry.v7.tar_extract/fruits/cherry4.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7843a44cca6d57497113ed505d027b9f5ffac78f1de7809f81ae9b314d943e79 +size 8 diff --git a/unblob/handlers/__init__.py b/unblob/handlers/__init__.py index d06fe54061..6b60d73293 100644 --- a/unblob/handlers/__init__.py +++ b/unblob/handlers/__init__.py @@ -70,7 +70,8 @@ arc.ARCHandler, arj.ARJHandler, cab.CABHandler, - tar.TarHandler, + tar.TarUstarHandler, + tar.TarUnixHandler, cpio.PortableASCIIHandler, cpio.PortableASCIIWithCRCHandler, cpio.PortableOldASCIIHandler, diff --git a/unblob/handlers/archive/tar.py b/unblob/handlers/archive/tar.py index 92d3a76309..0a9b457e35 100644 --- a/unblob/handlers/archive/tar.py +++ b/unblob/handlers/archive/tar.py @@ -12,6 +12,7 @@ ExtractResult, File, HexString, + Regex, StructHandler, ValidChunk, ) @@ -98,17 +99,10 @@ def extract(self, inpath: Path, outdir: Path): return ExtractResult(reports=tarfile.reports) -class TarHandler(StructHandler): +class _TarHandler(StructHandler): NAME = "tar" - PATTERNS = [ - HexString("75 73 74 61 72 20 20 00"), - HexString("75 73 74 61 72 00 30 30"), - ] - - # Since the magic is at 257, we have to subtract that from the match offset - # to get to the start of the file. - PATTERN_MATCH_OFFSET = -MAGIC_OFFSET + PATTERNS = [] C_DEFINITIONS = r""" typedef struct posix_header @@ -146,3 +140,22 @@ def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk] if end_offset == -1: return None return ValidChunk(start_offset=start_offset, end_offset=end_offset) + + +class TarUstarHandler(_TarHandler): + PATTERNS = [ + HexString("75 73 74 61 72 20 20 00"), + HexString("75 73 74 61 72 00 30 30"), + ] + + # Since the magic is at 257, we have to subtract that from the match offset + # to get to the start of the file. + PATTERN_MATCH_OFFSET = -MAGIC_OFFSET + + +class TarUnixHandler(_TarHandler): + PATTERNS = [ + Regex( + r"[\w]{1,99}.*[\x30-\x37|\x20]{7}\x00[\x30-\x39|\x20]{7}\x00[\x30-\x39|\x20|\x00]{8}[\x30-\x39|\x20|\x00]{12}[\x30-\x39|\x20|\x00]{12}[\x30-\x39|\x20|\x00]{8}[\x00|\x30-\x31][\w|\x00]{100}" + ), + ]