Parse model output using pre-determined keys.

ubclaunchpad · Dec 6, 2024 · b68912c · b68912c
1 parent cf9508e
commit b68912c
Show file tree

Hide file tree

Showing 5 changed files with 54 additions and 4 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,4 @@
-venv/
+venv*
 *.pyc
 *.pyo
 *.pyd

diff --git a/backend/src/routes/transcription-route.js b/backend/src/routes/transcription-route.js
@@ -12,11 +12,12 @@ router.post("/", auth, async (req, res) => {
     }
 
     const imageFile = req.files.image;
+    // const keyFile = req.files.keys; // TODO: implement ability to send key file
     const formData = new FormData();
     formData.append("image", imageFile.data, imageFile.name);
 
     const response = await axios.post(
-      "http://localhost:5000/transcribe",
+      "http://127.0.0.1:5000/transcribe", //TODO: change this endpoint after deploying
       formData,
       {
         headers: {

diff --git a/transcription/app.py b/transcription/app.py
@@ -4,6 +4,8 @@
 from transformers import AutoProcessor, AutoModelForCausalLM
 import torch
 
+from transcription import load_keys, parse_florence_output
+
 app = Flask(__name__)
 CORS(app)
 
@@ -16,6 +18,7 @@
 
 @app.route("/api/transcribe", methods=["POST"])
 def transcribe():
+    print("START OF ENDPOINT")
     if "image" not in request.files:
         return jsonify({"error": "No image file provided"}), 400
 
@@ -33,8 +36,9 @@ def transcribe():
             do_sample=False
         )
         generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
-
-        return jsonify({"transcription": generated_text})
+        keys = load_keys("keys.json")
+        json_result = parse_florence_output(generated_text, keys)
+        return json_result
     except Exception as e:
         return jsonify({"error": str(e)}), 500
 

diff --git a/transcription/keys.json b/transcription/keys.json
@@ -0,0 +1,18 @@
+{
+    "keys": [
+        "Case No.",
+        "Patient ID",
+        "Type",
+        "Surgeon",
+        "OR Date",
+        "Age",
+        "M/F",
+        "Indication for Surgery/Reason for Referral",
+        "HPI",
+        "Meds",
+        "Allergies",
+        "ID",
+        "PMHx",
+        "Social"
+    ]
+}
diff --git a/transcription/transcription.py b/transcription/transcription.py
@@ -0,0 +1,27 @@
+import json
+import re
+
+
+def load_keys(filePath):
+    with open(filePath, 'r') as file:
+        data = json.load(file)
+    return data['keys']
+
+def parse_florence_output(output, keys):
+    if isinstance(output, dict):
+        output = json.dumps(output) # convert to JSON-formatted string
+
+    parsed_data = {}
+
+    for key in keys:
+        # Use regex to find the value for the key
+        pattern = re.compile(f"{re.escape(key)}:(.*?)(?=(?:{'|'.join(map(re.escape, keys))}|$))", re.DOTALL)
+        match = pattern.search(output)
+
+        if match:
+            value = match.group(1).strip()
+            parsed_data[key] = value
+
+    # Convert the parsed data to JSON format
+    json_data = json.dumps(parsed_data, indent=4)
+    return json_data
-Original file line number
+Diff line change
@@ -1,4 +1,4 @@
-    venv/
+    venv*
     *.pyc
     *.pyo
     *.pyd
@@ Expand Down @@