Skip to content

Commit

Permalink
remove first
Browse files Browse the repository at this point in the history
  • Loading branch information
ipitio committed Oct 9, 2024
1 parent 6c9263a commit 596f3f2
Showing 1 changed file with 8 additions and 6 deletions.
14 changes: 8 additions & 6 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,22 +25,24 @@ def process_pdfs(base: Path = Path(".")):

def predict(input_file: Path, output_file: Path):
relative_path = input_file.relative_to(base / "todo")
images = convert_from_path(input_file, dpi=300, fmt="jpeg")

try:
input_file.unlink()
except:
pass

print(f"Processing {relative_path}...")

# Create a PDF file to store the OCR results
doc = fitz.open()

# Perform OCR on the images
for image in convert_from_path(input_file, dpi=300, fmt="jpeg"):
for image in images:
prediction = pytesseract.image_to_pdf_or_hocr(image, extension="pdf")
doc.insert_pdf(fitz.open("pdf", prediction))
gc.collect()

try:
input_file.unlink()
except:
pass

# Save the OCR results to a new PDF file
doc.save(output_file, garbage=4, deflate=True)
doc.close()
Expand Down

0 comments on commit 596f3f2

Please sign in to comment.