Skip to content

Commit

Permalink
add test
Browse files Browse the repository at this point in the history
  • Loading branch information
ipitio committed Oct 18, 2024
1 parent 8b42f00 commit c5a5211
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 40 deletions.
14 changes: 14 additions & 0 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@ env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}

defaults:
run:
# GitHub Actions run without a TTY device. This is a workaround to get one,
# based on https://github.com/actions/runner/issues/241#issuecomment-2019042651
shell: 'script --return --quiet --log-out /dev/null --command "bash -e {0}"'

jobs:
build-and-push-image:
runs-on: ubuntu-latest
Expand All @@ -20,6 +26,14 @@ jobs:
- name: Checkout repository
uses: actions/checkout@v4

- name: Run tests
run: |
docker run \
-v ./src:/app \
-v ./pdf:/app/pdf \
$(docker build -q ./src) \
bash src/test/example.sh
- name: Log in to the Container registry
uses: docker/[email protected]
with:
Expand Down
104 changes: 65 additions & 39 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from pathlib import Path

import pymupdf
from joblib import Parallel, delayed
from natsort import natsorted, ns
from PIL import Image

Expand All @@ -34,7 +33,7 @@ def predict(base: Path, input_file: Path, args: list[str]) -> None:
[
"bash",
"-c",
f"ocrmypdf --jobs 1 {' '.join(args)} {input_file} {output_file}",
f"ocrmypdf {' '.join(args)} {input_file} {output_file}",
],
check=True,
)
Expand All @@ -45,50 +44,77 @@ def predict(base: Path, input_file: Path, args: list[str]) -> None:
pass


def cleanup(root: str, files: list[str]) -> None:
"""
Removes empty directory
Args:
root (str): The root directory
files (list[str]): The list of files
"""
if not files:
try:
os.rmdir(root)
except Exception:
pass


def merge(base: Path, root: str, files: list[str]) -> None:
"""
Merges the PDFs in the list
Args:
base (Path): The base directory
root (str): The root directory
files (list[str]): The list of files
"""
proot = Path(root)
if proot == base / "done":
return

pdf_list = [
pymupdf.open(proot / file) for file in files if file.lower().endswith(".pdf")
]
if not pdf_list:
return

merged = pymupdf.open()
for pdf in natsorted(pdf_list, key=lambda x: x.name, alg=ns.IGNORECASE):
merged.insert_pdf(pdf)

merged.save(Path(root + ".pdf"), garbage=4, deflate=True)
merged.close()

for pdf in pdf_list:
pdf.close()


if __name__ == "__main__":
pdfs = Path(sys.argv[1] if len(sys.argv) > 1 else ".")
pdfs.mkdir(exist_ok=True, parents=True)
(pdfs / "todo").mkdir(exist_ok=True, parents=True)
(pdfs / "done").mkdir(exist_ok=True, parents=True)

Parallel(n_jobs=-1)(
delayed(predict)(
pdfs,
Path(root) / file,
sys.argv[2:] if len(sys.argv) > 2 else ["--rotate-pages", "--deskew", "--skip-text", "--invalidate-digital-signatures", "--clean"],
)
for root, _, files in os.walk(pdfs / "todo")
for file in files
)
for root, _, files in os.walk(pdfs / "todo"):
for file in files:
predict(
pdfs,
Path(root) / file,
(
sys.argv[2:]
if len(sys.argv) > 2
else [
"--rotate-pages",
"--deskew",
"--skip-text",
"--invalidate-digital-signatures",
"--clean",
]
),
)

# Remove empty directories
for root, _, files in os.walk(pdfs / "todo"):
if not files:
try:
os.rmdir(root)
except Exception:
pass
cleanup(root, files)

# Merge PDFs
for root, _, files in os.walk(pdfs / "done"):
proot = Path(root)
if proot == pdfs / "done":
continue

pdf_list = [
pymupdf.open(proot / file)
for file in files
if file.lower().endswith(".pdf")
]
if not pdf_list:
continue

merged = pymupdf.open()
for pdf in natsorted(pdf_list, key=lambda x: x.name, alg=ns.IGNORECASE):
merged.insert_pdf(pdf)

merged.save(Path(root + ".pdf"), garbage=4, deflate=True)
merged.close()

for pdf in pdf_list:
pdf.close()
merge(pdfs, root, files)
1 change: 0 additions & 1 deletion src/predict.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ if ! apt_install "$langs"; then
fi

[ -d venv ] || python3 -m venv venv
export OMP_THREAD_LIMIT=1

if [[ -e venv/bin/pip3 ]]; then
source venv/bin/activate
Expand Down
14 changes: 14 additions & 0 deletions src/test/example.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash

set -e

black_box_single_pdf() {
\cp -f pdf/todo/example.pdf.bak pdf/todo/example.pdf
bash src/predict.sh pdf
[ ! -f pdf/todo/example.pdf ] || exit 1
[ -f pdf/done/example.pdf ] || exit 1
rm -f pdf/done/example.pdf
}

black_box_single_pdf
echo "All tests passed!"

0 comments on commit c5a5211

Please sign in to comment.