Update command line args

* add a nicer interface for turning on / off various run settings * enable tesseract option for benchmarking
CDCgov · Dec 4, 2024 · 6549a7d · 6549a7d
1 parent 590802e
commit 6549a7d
Showing 1 changed file with 28 additions and 19 deletions.
diff --git a/OCR/benchmark_main.py b/OCR/benchmark_main.py
@@ -3,46 +3,55 @@
 from ocr.services.batch_segmentation import BatchSegmentationOCR
 from ocr.services.batch_metrics import BatchMetricsAnalysis
 
+from ocr.services.tesseract_ocr import TesseractOCR, PSM
+from ocr.services.image_ocr import ImageOCR
 
-def main():
-    parser = argparse.ArgumentParser(description="Run OCR and metrics analysis.")
+def parse_args():
+    parser = argparse.ArgumentParser(description="Run OCR and metrics analysis.", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
     parser.add_argument("image_folder", help="Path to the folder containing image files.")
     parser.add_argument("segmentation_template", help="Path to the segmentation template.")
     parser.add_argument("labels_path", help="Path to the labels file (JSON).")
     parser.add_argument("output_folder", help="Path to the folder where OCR results will be saved.")
     parser.add_argument("ground_truth_folder", help="Path to the folder with ground truth JSON files.")
     parser.add_argument("csv_output_folder", help="Path to the folder where CSV metrics will be saved.")
-    parser.add_argument(
-        "run_type",
-        type=int,
-        choices=[1, 2, 3],
-        help="Choose run type: 1 for Segmentation Only, 2 for Metrics Only, 3 for Both.",
-    )
+    parser.add_argument("--ocr", action=argparse.BooleanOptionalAction, default=True, help="Run (or don't run) segmentation and OCR analysis")
+    parser.add_argument("--metrics", action=argparse.BooleanOptionalAction, default=True, help="Run (or don't run) metrics analysis")
+    parser.add_argument("--model", choices=["tesseract", "trocr"], default="trocr", help="OCR model to run for `--ocr` option.")
 
     args = parser.parse_args()
+    return args
+
 
+def main():
+    args = parse_args()
     ocr_results = None
 
-    if args.run_type == 1:  # Segmentation Only
-        print("Running segmentation and OCR...")
+    if args.ocr:
+        print(f"Running segmentation and OCR using {args.model}...")
         ocr_results = run_segmentation_and_ocr(args)
-    elif args.run_type == 2:  # Metrics Only
+    if args.metrics:
         print("Running metrics analysis...")
-        run_metrics_analysis(args, ocr_results=None)
-    elif args.run_type == 3:
-        print("Running both segmentation,ocr and metrics analysis...")
-        ocr_results = run_segmentation_and_ocr(args)
-        run_metrics_analysis(args, ocr_results)
+        run_metrics_analysis(args, ocr_results=ocr_results)
 
 
 def run_segmentation_and_ocr(args):
     """
     Runs segmentation and OCR processing.
     Returns OCR results with processing time.
     """
-    segmentation_ocr = BatchSegmentationOCR(
-        args.image_folder, args.segmentation_template, args.labels_path, args.output_folder
-    )
+
+    model = None
+
+    if args.model == "tesseract":
+        # Disable border rejection of text too close to the edge of the image as we, not tesseract, are doing segmentation
+        # Enforce single-line mode for tesseract
+        model = TesseractOCR(psm=PSM.SINGLE_LINE, variables=dict(
+            tessedit_image_border="0",
+            ))
+    elif args.model == "trocr":
+        model = ImageOCR()
+
+    segmentation_ocr = BatchSegmentationOCR(args.image_folder, args.segmentation_template, args.labels_path, args.output_folder, model=model)
     ocr_results = segmentation_ocr.process_images()
     print(f"OCR results saved to: {args.output_folder}")
     return ocr_results