Skip to content

Commit

Permalink
Use -slim python image
Browse files Browse the repository at this point in the history
  • Loading branch information
jonchang committed Dec 5, 2024
1 parent 6606e6f commit aeb1964
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 5 deletions.
9 changes: 4 additions & 5 deletions OCR/dev-dockerfile
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
FROM python:3.10-bullseye
FROM python:3.10-slim

RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 tesseract-ocr-eng tesseract-ocr -y
RUN apt-get update && apt-get install libgl1 tesseract-ocr-eng tesseract-ocr -y

# Use larger "best" training data, rather than "fast"
# Python one-liner because we don't have curl or wget
RUN python3 -c 'from urllib.request import urlopen; print(urlopen("https://github.com/tesseract-ocr/tessdata_best/raw/refs/heads/main/eng.traineddata").read())' > /usr/share/tesseract-ocr/4.00/tessdata/eng.traineddata
RUN python3 -c 'from urllib.request import urlopen; print(urlopen("https://github.com/tesseract-ocr/tessdata_best/raw/refs/heads/main/eng.traineddata").read())' > /usr/share/tesseract-ocr/5/tessdata/eng.traineddata

RUN pip install poetry

COPY ./pyproject.toml /ocr/pyproject.toml
COPY ./poetry.lock /ocr/poetry.lock

WORKDIR /ocr
RUN poetry install

RUN poetry install && poetry cache list | xargs -n1 poetry cache clear --all
1 change: 1 addition & 0 deletions OCR/ocr/services/tesseract_ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ def _guess_tessdata_path(wanted_lang="eng") -> bytes:
"/usr/local/share/tesseract/tessdata",
"/usr/share/tesseract/tessdata",
"/usr/share/tesseract-ocr/4.00/tessdata",
"/usr/share/tesseract-ocr/5/tessdata",
"/opt/homebrew/share/tessdata",
"/opt/local/share/tessdata",
]
Expand Down

0 comments on commit aeb1964

Please sign in to comment.