Merge pull request #908 from onekey-sec/ruff-update

Run up-to-date ruff from pre-commit together with `ruff format`
onekey-sec · Jul 24, 2024 · e0d9805 · e0d9805
2 parents 18a2ce1 + 3a0c406
commit e0d9805
Show file tree

Hide file tree

Showing 37 changed files with 112 additions and 91 deletions.
diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml
@@ -29,6 +29,9 @@ jobs:
       - name: Setup Rust cache
         uses: Swatinem/rust-cache@v2
 
+      - name: Setup 3rd party dependencies
+        uses: ./.github/actions/setup-dependencies
+
       - name: Check pre-commit hook
         uses: pre-commit/[email protected]
 

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -13,18 +13,22 @@ repos:
       - id: check-yaml
       - id: check-added-large-files
 
-  - repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: "v0.0.290"
+  - repo: local
     hooks:
       - id: ruff
         name: Check python (ruff)
-        args: [--show-source, --fix, --exit-non-zero-on-fix]
-
-  - repo: https://github.com/psf/black
-    rev: 23.1.0
-    hooks:
-      - id: black
-        name: Check black
+        language: system
+        types_or: [python, pyi]
+        require_serial: true
+        entry: poetry run ruff check --force-exclude
+        args:
+          [--output-format=full, --fix, --show-fixes, --exit-non-zero-on-fix]
+      - id: ruff-format
+        name: Format python (ruff)
+        language: system
+        types_or: [python, pyi]
+        require_serial: true
+        entry: poetry run ruff format --force-exclude
 
   - repo: https://github.com/jendrikseipp/vulture
     rev: v2.7

diff --git a/docs/development.md b/docs/development.md
@@ -581,13 +581,13 @@ code to see what it looks like in real world applications.
 
 ### Code style
 
-We adhere to PEP8 and enforce proper formatting of source files using
-[black](https://github.com/psf/black) so you should not worry about formatting
-source code at all, `pre-commit` will take care of it.
+We adhere to PEP8 and enforce proper formatting of source files using [ruff
+format](https://docs.astral.sh/ruff/formatter/) so you should not worry about
+formatting source code at all, `pre-commit` will take care of it.
 
-For linting we use [ruff](https://ruff.rs). Lint errors can be shown in your
-editor of choice by one of the [editor
-plugins](https://beta.ruff.rs/docs/editor-integrations/).
+For linting we use [ruff check](https://docs.astral.sh/ruff/linter/). Lint
+errors can be shown in your editor of choice by one of the [editor
+plugins](https://docs.astral.sh/ruff/editors/).
 
 ### File Format Correctness
 

diff --git a/flake.nix b/flake.nix
@@ -67,9 +67,21 @@
         (system:
           with nixpkgsFor.${system}; {
             default = mkShell {
-              packages = [
+              venvDir = "./.venv";
+              buildInputs = [
+                # A Python interpreter including the 'venv' module is required to bootstrap
+                # the environment.
+                python3Packages.python
+
+                # This executes some shell code to initialize a venv in $venvDir before
+                # dropping into the shell
+                python3Packages.venvShellHook
+
+                # This hook is used to patch downloaded binaries in venv to use libraries
+                # from the nix store.
+                autoPatchelfHook
+
                 unblob.runtimeDeps
-                ruff
                 pyright
                 python3Packages.pytest
                 python3Packages.pytest-cov
@@ -78,7 +90,18 @@
                 nvfetcher
               ];
 
-              env.LD_LIBRARY_PATH = lib.makeLibraryPath [ file ];
+              postVenvCreation =
+                let
+                  apply_patches = lib.concatMapStringsSep
+                    "\n"
+                    (patch: "patch -f -p1 -d $VIRTUAL_ENV/lib/python3*/site-packages < ${patch}")
+                    pkgs.python3Packages.python-magic.patches;
+                in
+                ''
+                  poetry install --all-extras --sync --with dev
+                  autoPatchelf "$VIRTUAL_ENV/"
+                  ${apply_patches}
+                '';
             };
           });
 

diff --git a/fuzzing/search_chunks_fuzzer.py b/fuzzing/search_chunks_fuzzer.py
@@ -52,7 +52,9 @@ def test_search_chunks(data):
 
     with File.from_bytes(data) as file:
         task = Task(
-            path=Path("/dev/shm/nonexistent"), depth=0, blob_id=""  # noqa: S108
+            path=Path("/dev/shm/nonexistent"),  # noqa: S108
+            depth=0,
+            blob_id="",
         )
         result = TaskResult(task)
         search_chunks(file, len(data), config.handlers, result)

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -39,7 +39,7 @@ pytest = "^8.0.0"
 pyright = "^1.1.349"
 pre-commit = "^3.5.0"
 pytest-cov = ">=3,<6"
-ruff = ">=0.1.13,<0.6.0"
+ruff = "0.5.4"
 pyyaml = "^6.0.1"
 atheris = { version = "^2.3.0", python = "<3.12" }
 
@@ -59,6 +59,7 @@ unblob = "unblob.cli:main"
 [tool.ruff]
 target-version = "py38"
 
+[tool.ruff.lint]
 select = [
   "A",      # flake8-builtins
   "ARG",    # flake8-unused-arguments
@@ -72,7 +73,6 @@ select = [
   "FBT",    # flake8-boolean-trap
   "G",      # flake8-logging-format
   "I",      # isort
-  "ISC",    # flake8-implicit-str-concats
   "N",      # pep8-naming
   "PIE",    # flake8-pie
   "PL",     # pylint
@@ -113,7 +113,7 @@ ignore = [
 # Do not remove unused imports automatically in __init__.py files
 ignore-init-module-imports = true
 
-[tool.ruff.per-file-ignores]
+[tool.ruff.lint.per-file-ignores]
 ".github/*" = [
   "T201"  # print
 ]
@@ -122,20 +122,20 @@ ignore-init-module-imports = true
   "FBT003",  # boolean-positional-value-in-function-call:     allowed test parameters
 ]
 
-[tool.ruff.flake8-comprehensions]
+[tool.ruff.lint.flake8-comprehensions]
 allow-dict-calls-with-keyword-arguments = true  # code like `dict(a=1, b=2)` is allowed
 
-[tool.ruff.mccabe]
+[tool.ruff.lint.mccabe]
 max-complexity = 8
 
-[tool.ruff.pylint]
+[tool.ruff.lint.pylint]
 allow-magic-value-types = [
   "bytes",
   "int",   # bunch of ad-hoc bitmask values
   "str",
 ]
 
-[tool.ruff.flake8-pytest-style]
+[tool.ruff.lint.flake8-pytest-style]
 fixture-parentheses = false
 mark-parentheses = false
 parametrize-names-type = "csv"

diff --git a/tests/test_cleanup.py b/tests/test_cleanup.py
@@ -2,6 +2,7 @@
 
 The tests use zip files as inputs - for simplicity
 """
+
 import io
 import zipfile
 from pathlib import Path

diff --git a/tests/test_helpers.py b/tests/test_helpers.py
@@ -39,7 +39,7 @@ def test_hexdump(hexdump):
 def test_with_squized_data():
     binary = unhex(WITH_SQUEEZED_DATA)
     assert binary[:0x10] == EXPECTED[:0x10]
-    assert binary[0x10:0x30] == b"\xFF\x00" * 0x10
+    assert binary[0x10:0x30] == b"\xff\x00" * 0x10
     assert binary[0x30:] == EXPECTED[0x10:]
 
 
@@ -53,7 +53,7 @@ def test_with_squized_data():
 def test_with_squized_end():
     binary = unhex(WITH_SQUEEZED_END)
     assert len(binary) == 0x40
-    assert binary == b"\xFF" * 0x40
+    assert binary == b"\xff" * 0x40
 
 
 WITH_SQUEEZE_AFTER_SQUEEZE = """\

diff --git a/tests/test_processing.py b/tests/test_processing.py
@@ -438,10 +438,10 @@ def get_all(file_name, report_type: Type[ReportType]) -> List[ReportType]:
     assert unknown_entropy.block_size == 1024
     assert round(unknown_entropy.mean, 2) == 98.05  # noqa: PLR2004
     assert unknown_entropy.highest == 100.0  # noqa: PLR2004
-    assert unknown_entropy.lowest == 0.0  # noqa: PLR2004
+    assert unknown_entropy.lowest == 0.0
 
     # we should have entropy calculated for files without extractions, except for empty files
-    assert [] == get_all("empty.txt", EntropyReport)
+    assert get_all("empty.txt", EntropyReport) == []
     assert [EntropyReport(percentages=[100.0], block_size=1024, mean=100.0)] == get_all(
         "0-255.bin", EntropyReport
     )

diff --git a/unblob/cli.py b/unblob/cli.py
@@ -48,7 +48,9 @@ def get_version():
 
 
 def show_version(
-    ctx: click.Context, _param: click.Option, value: bool  # noqa: FBT001
+    ctx: click.Context,
+    _param: click.Option,
+    value: bool,  # noqa: FBT001
 ) -> None:
     if not value or ctx.resilient_parsing:
         return
@@ -57,7 +59,9 @@ def show_version(
 
 
 def show_external_dependencies(
-    ctx: click.Context, _param: click.Option, value: bool  # noqa: FBT001
+    ctx: click.Context,
+    _param: click.Option,
+    value: bool,  # noqa: FBT001
 ) -> None:
     if not value or ctx.resilient_parsing:
         return

diff --git a/unblob/extractor.py b/unblob/extractor.py
@@ -1,4 +1,5 @@
 """File extraction related functions."""
+
 import errno
 import os
 from pathlib import Path

diff --git a/unblob/extractors/command.py b/unblob/extractors/command.py
@@ -37,7 +37,7 @@ def extract(self, inpath: Path, outdir: Path):
         cmd = self._make_extract_command(inpath, outdir)
         command = shlex.join(cmd)
         logger.debug("Running extract command", command=command)
-        stdout_file: Union[int, "io.FileIO"] = subprocess.PIPE
+        stdout_file: Union[int, io.FileIO] = subprocess.PIPE
 
         def no_op():
             pass

diff --git a/unblob/file_utils.py b/unblob/file_utils.py
@@ -613,7 +613,7 @@ def create_hardlink(self, src: Path, dst: Path):
                 )
                 self.record_problem(safe_link.format_report(not_enough_privileges))
 
-    def open(  # noqa: A003
+    def open(
         self, path, mode: Literal["wb+", "rb+", "xb+"] = "wb+"
     ) -> io.BufferedRandom:
         """Create/open binary file for random access read-writing.

diff --git a/unblob/finder.py b/unblob/finder.py
@@ -2,6 +2,7 @@
 
 The main "entry point" is search_chunks_by_priority.
 """
+
 from functools import lru_cache
 from typing import List, Optional
 

diff --git a/unblob/handlers/archive/arc.py b/unblob/handlers/archive/arc.py
@@ -58,7 +58,7 @@ def valid_header(self, header) -> bool:
             return False
         if header.header_type > 0x07:
             return False
-        if not self.valid_name(header.name):
+        if not self.valid_name(header.name):  # noqa: SIM103
             return False
         return True
 

diff --git a/unblob/handlers/archive/dlink/encrpted_img.py b/unblob/handlers/archive/dlink/encrpted_img.py
@@ -53,9 +53,7 @@ class EncrptedHandler(StructHandler):
     EXTRACTOR = EncrptedExtractor()
 
     def is_valid_header(self, header) -> bool:
-        if header.size < len(header):
-            return False
-        return True
+        return header.size >= len(header)
 
     def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk]:
         header = self.parse_header(file, endian=Endian.BIG)

diff --git a/unblob/handlers/archive/dlink/shrs.py b/unblob/handlers/archive/dlink/shrs.py
@@ -76,9 +76,7 @@ def is_valid_header(self, header, file: File) -> bool:
         digest = hashlib.sha512(file.read(header.file_size_no_padding)).digest()
         # we seek back to where we were
         file.seek(-header.file_size_no_padding, io.SEEK_CUR)
-        if digest != header.encrypted_digest:
-            return False
-        return True
+        return digest == header.encrypted_digest
 
     def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk]:
         header = self.parse_header(file, endian=Endian.BIG)

diff --git a/unblob/handlers/archive/instar/bneg.py b/unblob/handlers/archive/instar/bneg.py
@@ -76,7 +76,7 @@ def is_valid_header(self, header) -> bool:
             return False
         if header.major != 0x1:
             return False
-        if header.minor != 0x1:
+        if header.minor != 0x1:  # noqa: SIM103
             return False
 
         return True

diff --git a/unblob/handlers/archive/netgear/trx.py b/unblob/handlers/archive/netgear/trx.py
@@ -44,18 +44,16 @@ def extract(self, inpath: Path, outdir: Path):
             header = self._struct_parser.parse("trx_header_t", file, Endian.LITTLE)
             file.seek(0, io.SEEK_END)
             eof = file.tell()
-            i = 0
             offsets = sorted(
                 [
                     offset
                     for offset in [*cast(Iterable, header.offsets), eof]
                     if offset > 0
                 ]
             )
-            for start_offset, end_offset in zip(offsets, offsets[1:]):
+            for i, (start_offset, end_offset) in enumerate(zip(offsets, offsets[1:])):
                 chunk = Chunk(start_offset=start_offset, end_offset=end_offset)
                 carve_chunk_to_file(outdir.joinpath(Path(f"part{i}")), file, chunk)
-                i += 1
 
 
 class NetgearTRXBase(StructHandler):
@@ -75,9 +73,7 @@ def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk]
         )
 
     def is_valid_header(self, header) -> bool:
-        if header.len < len(header):
-            return False
-        return True
+        return header.len >= len(header)
 
     def _is_crc_valid(self, file: File, start_offset: int, header) -> bool:
         file.seek(start_offset + CRC_CONTENT_OFFSET)

diff --git a/unblob/handlers/archive/sevenzip.py b/unblob/handlers/archive/sevenzip.py
@@ -17,6 +17,7 @@
 
 https://py7zr.readthedocs.io/en/latest/archive_format.html
 """
+
 import binascii
 from pathlib import Path
 from typing import Optional

diff --git a/unblob/handlers/archive/stuffit.py b/unblob/handlers/archive/stuffit.py
@@ -17,6 +17,7 @@
 If you have the resources to add support for these archive formats,
 feel free to do so !
 """
+
 from typing import Optional
 
 from structlog import get_logger

diff --git a/unblob/handlers/archive/tar.py b/unblob/handlers/archive/tar.py
@@ -94,7 +94,7 @@ def _find_end_of_padding(file, *, find_from: int) -> int:
 class TarExtractor(Extractor):
     def extract(self, inpath: Path, outdir: Path):
         with contextlib.closing(SafeTarFile(inpath)) as tarfile:
-            tarfile.extractall(outdir)
+            tarfile.extractall(outdir)  # noqa: S202 tarfile-unsafe-members
         return ExtractResult(reports=tarfile.reports)
 
 

diff --git a/unblob/handlers/archive/xiaomi/hdr.py b/unblob/handlers/archive/xiaomi/hdr.py
@@ -88,7 +88,7 @@ def is_valid_blob_header(blob_header) -> bool:
 def is_valid_header(header) -> bool:
     if header.signature_offset < len(header):
         return False
-    if not header.blob_offsets[0]:
+    if not header.blob_offsets[0]:  # noqa: SIM103
         return False
     return True
 

diff --git a/unblob/handlers/compression/compress.py b/unblob/handlers/compression/compress.py
@@ -31,6 +31,7 @@
 wild.  The only ones I observed were followed by null bytes sentinels,
 which helps identifying the exact end offset.
 """
+
 import io
 from typing import List, Optional
 

diff --git a/unblob/handlers/compression/gzip.py b/unblob/handlers/compression/gzip.py
@@ -14,6 +14,7 @@
 BadGzipFile errors when gzip library tries to read the next stream
 header.
 """
+
 import gzip
 import io
 import struct
-Original file line number
+Diff line change
@@ Expand Up / @@ -2,6 +2,7 @@ @@
     The main "entry point" is search_chunks_by_priority.
     """
     from functools import lru_cache
     from typing import List, Optional
@@ Expand Down @@