diff --git a/.github/workflows/ocr-benchmarks.yml b/.github/workflows/ocr-benchmarks.yml index fcafb89a..7de18cd2 100644 --- a/.github/workflows/ocr-benchmarks.yml +++ b/.github/workflows/ocr-benchmarks.yml @@ -1,5 +1,9 @@ name: OCR Poetry Benchmark Tests on: + pull_request: + paths: + - .github/workflows/ocr-benchmarks.yml + - OCR/** push: branches: - main @@ -16,31 +20,13 @@ jobs: python-version: "3.10" - name: Install dependencies run: | + sudo apt update && sudo apt install tesseract-ocr-eng tesseract-ocr cd OCR/tests python -m pip install --upgrade pip pip install poetry pip install pytest pytest-benchmark - poetry install --with dev + poetry install --with dev --with torch - name: Run tests run: | cd OCR/tests poetry run pytest benchmark_test.py -v --benchmark-json output.json - # - name: Store benchmark result - # uses: benchmark-action/github-action-benchmark@v1 - # with: - # tool: 'pytest' - # output-file-path: OCR/tests/output.json - # auto-push: false - # - name: Push benchmark result - # run: git push 'https://CDCgov:${{ secrets.GITHUB_TOKEN }}@github.com/CDCgov/IDWA.git' gh-pages:gh-pages - # - name: Fetch data.js from gh-pages - # run: | - # git config --global user.name 'GitHub Action' - # git config --global user.email 'action@github.com' - # git fetch - # git checkout gh-pages - # rm ./dev/bench/index.html - # cp ./dev/bench/index-template.html ./dev/bench/index.html - # git add . - # git commit -m "template to index" - # git push origin gh-pages \ No newline at end of file diff --git a/.github/workflows/ocr-tests.yml b/.github/workflows/ocr-tests.yml index c3754eee..abc703c1 100644 --- a/.github/workflows/ocr-tests.yml +++ b/.github/workflows/ocr-tests.yml @@ -20,6 +20,6 @@ jobs: sudo apt install tesseract-ocr-eng tesseract-ocr -y python -m pip install --upgrade pip pip install poetry - poetry install --with dev + poetry install --with dev --with torch - name: Run tests run: poetry run pytest tests/ocr_test.py tests/segmentation_template_test.py tests/alignment_test.py diff --git a/OCR/dev-dockerfile b/OCR/dev-dockerfile index d2e02d6b..2f401eed 100644 --- a/OCR/dev-dockerfile +++ b/OCR/dev-dockerfile @@ -12,4 +12,4 @@ COPY ./pyproject.toml /ocr/pyproject.toml COPY ./poetry.lock /ocr/poetry.lock WORKDIR /ocr -RUN poetry install && poetry cache list | xargs -n1 poetry cache clear --all +RUN poetry install --without=torch && poetry cache list | xargs -n1 poetry cache clear --all diff --git a/OCR/ocr/services/alignment/backends/random_perspective_transform.py b/OCR/ocr/services/alignment/backends/random_perspective_transform.py index 927d009b..3697d1df 100644 --- a/OCR/ocr/services/alignment/backends/random_perspective_transform.py +++ b/OCR/ocr/services/alignment/backends/random_perspective_transform.py @@ -4,7 +4,6 @@ from pathlib import Path -import torch import numpy as np import cv2 as cv from PIL import Image @@ -20,6 +19,8 @@ def make_transform(self, distortion_scale: float) -> object: """ Create a transformation matrix for a random perspective transform. """ + import torch + # From torchvision. BSD 3-clause height = self.image.height width = self.image.width diff --git a/OCR/poetry.lock b/OCR/poetry.lock index 73ee4c68..90c418d3 100644 --- a/OCR/poetry.lock +++ b/OCR/poetry.lock @@ -3570,4 +3570,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "45a5fb639bff707f562a3f4d44cb73158f74a9b0debb5c5bdd7c930931dd6944" +content-hash = "b3fb86071ca8f555b215a55ddc3c646a24b4d905c27a892b774f50600c1f09a7" diff --git a/OCR/pyproject.toml b/OCR/pyproject.toml index eb95539b..1380120a 100644 --- a/OCR/pyproject.toml +++ b/OCR/pyproject.toml @@ -11,11 +11,13 @@ numpy = "^1.26.4" opencv-python = "^4.9.0.80" levenshtein = "^0.25.1" fastapi = {extras = ["standard"], version = "^0.112.1"} -transformers = {extras = ["torch"], version = "^4.45.1"} pillow = "^10.3.0" datasets = "^3.0.1" tesserocr = "^2.7.1" +[tool.poetry.group.torch.dependencies] +transformers = {extras = ["torch"], version = "^4.45.1"} + [tool.poetry.group.dev.dependencies] lxml = "^5.3.0" docopt = "^0.6.2" diff --git a/dev-env.yaml b/dev-env.yaml index 56da7afe..38acb615 100644 --- a/dev-env.yaml +++ b/dev-env.yaml @@ -8,7 +8,7 @@ services: volumes: - ./OCR:/ocr restart: "no" - command: "/bin/sh -c 'poetry install && poetry run fastapi dev ocr/api.py --host 0.0.0.0 --port 8000'" + command: "/bin/sh -c 'poetry install --without=torch --sync && poetry run fastapi dev ocr/api.py --host 0.0.0.0 --port 8000'" frontend: build: context: ./frontend