Skip to content

Commit

Permalink
Parse model output using pre-determined keys.
Browse files Browse the repository at this point in the history
  • Loading branch information
romansinkus committed Dec 6, 2024
1 parent cf9508e commit b68912c
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 4 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
venv/
venv*
*.pyc
*.pyo
*.pyd
Expand Down
3 changes: 2 additions & 1 deletion backend/src/routes/transcription-route.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,12 @@ router.post("/", auth, async (req, res) => {
}

const imageFile = req.files.image;
// const keyFile = req.files.keys; // TODO: implement ability to send key file
const formData = new FormData();
formData.append("image", imageFile.data, imageFile.name);

const response = await axios.post(
"http://localhost:5000/transcribe",
"http://127.0.0.1:5000/transcribe", //TODO: change this endpoint after deploying
formData,
{
headers: {
Expand Down
8 changes: 6 additions & 2 deletions transcription/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from transformers import AutoProcessor, AutoModelForCausalLM
import torch

from transcription import load_keys, parse_florence_output

app = Flask(__name__)
CORS(app)

Expand All @@ -16,6 +18,7 @@

@app.route("/api/transcribe", methods=["POST"])
def transcribe():
print("START OF ENDPOINT")
if "image" not in request.files:
return jsonify({"error": "No image file provided"}), 400

Expand All @@ -33,8 +36,9 @@ def transcribe():
do_sample=False
)
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]

return jsonify({"transcription": generated_text})
keys = load_keys("keys.json")
json_result = parse_florence_output(generated_text, keys)
return json_result
except Exception as e:
return jsonify({"error": str(e)}), 500

Expand Down
18 changes: 18 additions & 0 deletions transcription/keys.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"keys": [
"Case No.",
"Patient ID",
"Type",
"Surgeon",
"OR Date",
"Age",
"M/F",
"Indication for Surgery/Reason for Referral",
"HPI",
"Meds",
"Allergies",
"ID",
"PMHx",
"Social"
]
}
27 changes: 27 additions & 0 deletions transcription/transcription.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import json
import re


def load_keys(filePath):
with open(filePath, 'r') as file:
data = json.load(file)
return data['keys']

def parse_florence_output(output, keys):
if isinstance(output, dict):
output = json.dumps(output) # convert to JSON-formatted string

parsed_data = {}

for key in keys:
# Use regex to find the value for the key
pattern = re.compile(f"{re.escape(key)}:(.*?)(?=(?:{'|'.join(map(re.escape, keys))}|$))", re.DOTALL)
match = pattern.search(output)

if match:
value = match.group(1).strip()
parsed_data[key] = value

# Convert the parsed data to JSON format
json_data = json.dumps(parsed_data, indent=4)
return json_data

0 comments on commit b68912c

Please sign in to comment.