Skip to content

Commit

Permalink
fix(handler): improve tar handler to support sparse archives.
Browse files Browse the repository at this point in the history
A custom TarInfo that keeps the actual entry size rather than the
original file size is used when calculating the end offset.
  • Loading branch information
qkaiser committed Dec 14, 2023
1 parent d086d25 commit e30118f
Show file tree
Hide file tree
Showing 7 changed files with 28 additions and 2 deletions.
3 changes: 3 additions & 0 deletions tests/integration/archive/tar/__input__/sparse.tar
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
11 changes: 11 additions & 0 deletions unblob/handlers/archive/_safe_tarfile.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import os
import tarfile
from pathlib import Path
from tarfile import (
TarInfo,
)

from structlog import get_logger

Expand All @@ -13,6 +16,14 @@
MAX_PATH_LEN = 255


class SafeTarInfo(TarInfo):
def _proc_member(self, tarfile) -> "SafeTarInfo":
orig_size = self.size
self = super()._proc_member(tarfile)
self.size = orig_size
return self


class SafeTarFile:
def __init__(self, inpath: Path):
self.inpath = inpath
Expand Down
4 changes: 2 additions & 2 deletions unblob/handlers/archive/tar.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
StructHandler,
ValidChunk,
)
from ._safe_tarfile import SafeTarFile
from ._safe_tarfile import SafeTarFile, SafeTarInfo

logger = get_logger()

Expand All @@ -43,7 +43,7 @@ def _get_tar_end_offset(file: File, offset=0):

def _get_end_of_last_tar_entry(file) -> int:
try:
tf = tarfile.TarFile(mode="r", fileobj=file)
tf = tarfile.TarFile(mode="r", fileobj=file, tarinfo=SafeTarInfo)
except tarfile.TarError:
return -1

Expand Down

0 comments on commit e30118f

Please sign in to comment.