Skip to content

Commit

Permalink
Update command line args
Browse files Browse the repository at this point in the history
* add a nicer interface for turning on / off various run settings
* enable tesseract option for benchmarking
  • Loading branch information
jonchang committed Dec 4, 2024
1 parent 590802e commit 6549a7d
Showing 1 changed file with 28 additions and 19 deletions.
47 changes: 28 additions & 19 deletions OCR/benchmark_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,46 +3,55 @@
from ocr.services.batch_segmentation import BatchSegmentationOCR
from ocr.services.batch_metrics import BatchMetricsAnalysis

from ocr.services.tesseract_ocr import TesseractOCR, PSM
from ocr.services.image_ocr import ImageOCR

def main():
parser = argparse.ArgumentParser(description="Run OCR and metrics analysis.")
def parse_args():
parser = argparse.ArgumentParser(description="Run OCR and metrics analysis.", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("image_folder", help="Path to the folder containing image files.")
parser.add_argument("segmentation_template", help="Path to the segmentation template.")
parser.add_argument("labels_path", help="Path to the labels file (JSON).")
parser.add_argument("output_folder", help="Path to the folder where OCR results will be saved.")
parser.add_argument("ground_truth_folder", help="Path to the folder with ground truth JSON files.")
parser.add_argument("csv_output_folder", help="Path to the folder where CSV metrics will be saved.")
parser.add_argument(
"run_type",
type=int,
choices=[1, 2, 3],
help="Choose run type: 1 for Segmentation Only, 2 for Metrics Only, 3 for Both.",
)
parser.add_argument("--ocr", action=argparse.BooleanOptionalAction, default=True, help="Run (or don't run) segmentation and OCR analysis")
parser.add_argument("--metrics", action=argparse.BooleanOptionalAction, default=True, help="Run (or don't run) metrics analysis")
parser.add_argument("--model", choices=["tesseract", "trocr"], default="trocr", help="OCR model to run for `--ocr` option.")

args = parser.parse_args()
return args


def main():
args = parse_args()
ocr_results = None

if args.run_type == 1: # Segmentation Only
print("Running segmentation and OCR...")
if args.ocr:
print(f"Running segmentation and OCR using {args.model}...")
ocr_results = run_segmentation_and_ocr(args)
elif args.run_type == 2: # Metrics Only
if args.metrics:
print("Running metrics analysis...")
run_metrics_analysis(args, ocr_results=None)
elif args.run_type == 3:
print("Running both segmentation,ocr and metrics analysis...")
ocr_results = run_segmentation_and_ocr(args)
run_metrics_analysis(args, ocr_results)
run_metrics_analysis(args, ocr_results=ocr_results)


def run_segmentation_and_ocr(args):
"""
Runs segmentation and OCR processing.
Returns OCR results with processing time.
"""
segmentation_ocr = BatchSegmentationOCR(
args.image_folder, args.segmentation_template, args.labels_path, args.output_folder
)

model = None

if args.model == "tesseract":
# Disable border rejection of text too close to the edge of the image as we, not tesseract, are doing segmentation
# Enforce single-line mode for tesseract
model = TesseractOCR(psm=PSM.SINGLE_LINE, variables=dict(
tessedit_image_border="0",
))
elif args.model == "trocr":
model = ImageOCR()

segmentation_ocr = BatchSegmentationOCR(args.image_folder, args.segmentation_template, args.labels_path, args.output_folder, model=model)
ocr_results = segmentation_ocr.process_images()
print(f"OCR results saved to: {args.output_folder}")
return ocr_results
Expand Down

0 comments on commit 6549a7d

Please sign in to comment.