Merge pull request #259 from VikParuchuri/dev

Bugfixes and `pdftext` improvements
VikParuchuri · Dec 12, 2024 · b46d5ce · b46d5ce
2 parents 0ce57a2 + a3fde2f
commit b46d5ce
Show file tree

Hide file tree

Showing 6 changed files with 590 additions and 757 deletions.
diff --git a/README.md b/README.md
@@ -230,13 +230,15 @@ Setting the `LAYOUT_BATCH_SIZE` env var properly will make a big difference when
 from PIL import Image
 from surya.detection import batch_text_detection
 from surya.layout import batch_layout_detection
-from surya.model.layout.model import load_model, load_processor
+from surya.model.detection.model import load_model as load_det_model, load_processor as load_det_processor
+from surya.model.layout.model import load_model as load_layout_model
+from surya.model.layout.processor import load_processor as load_layout_processor
 
 image = Image.open(IMAGE_PATH)
-model = load_model()
-processor = load_processor()
-det_model = load_model()
-det_processor = load_processor()
+model = load_layout_model()
+processor = load_layout_processor()
+det_model = load_det_model()
+det_processor = load_det_processor()
 
 # layout_predictions is a list of dicts, one per image
 line_predictions = batch_text_detection([image], det_model, det_processor)

diff --git a/ocr_app.py b/ocr_app.py
@@ -204,22 +204,22 @@ def page_count(pdf_file):
 if text_det:
     det_img, pred = text_detection(pil_image)
     with col1:
-        st.image(det_img, caption="Detected Text", use_column_width=True)
+        st.image(det_img, caption="Detected Text", use_container_width=True)
         st.json(pred.model_dump(exclude=["heatmap", "affinity_map"]), expanded=True)
 
 
 # Run layout
 if layout_det:
     layout_img, pred = layout_detection(pil_image)
     with col1:
-        st.image(layout_img, caption="Detected Layout", use_column_width=True)
+        st.image(layout_img, caption="Detected Layout", use_container_width=True)
         st.json(pred.model_dump(exclude=["segmentation_map"]), expanded=True)
 
 # Run OCR
 if text_rec:
     rec_img, pred = ocr(pil_image, pil_image_highres, languages)
     with col1:
-        st.image(rec_img, caption="OCR Result", use_column_width=True)
+        st.image(rec_img, caption="OCR Result", use_container_width=True)
         json_tab, text_tab = st.tabs(["JSON", "Text Lines (for debugging)"])
         with json_tab:
             st.json(pred.model_dump(), expanded=True)
@@ -230,8 +230,8 @@ def page_count(pdf_file):
 if table_rec:
     table_img, pred = table_recognition(pil_image, pil_image_highres, in_file, page_number - 1 if page_number else None, use_pdf_boxes, skip_table_detection)
     with col1:
-        st.image(table_img, caption="Table Recognition", use_column_width=True)
+        st.image(table_img, caption="Table Recognition", use_container_width=True)
         st.json([p.model_dump() for p in pred], expanded=True)
 
 with col2:
-    st.image(pil_image, caption="Uploaded Image", use_column_width=True)
+    st.image(pil_image, caption="Uploaded Image", use_container_width=True)