From eaadc67c8620e708b3448935b97fbde1eea7b16e Mon Sep 17 00:00:00 2001
From: Arjun K Haridas <51917087+topguns837@users.noreply.github.com>
Date: Sun, 11 Feb 2024 00:02:37 +0530
Subject: [PATCH] Re-wrote YOLOv5 Plugin using torch instead of OpenCV (#59)

* Re-wrote YOLOv5 Plugin using torch instead of OpenCV

* Added copyright to YOLOv5 Detector

* Removing flake8 linting issues

* Removing flake8 linting issues

---------

Co-authored-by: topguns837 <arjun.k@orangewood.co>
---
 .../object_detection/Detectors/YOLOv5.py      | 139 +++---------------
 1 file changed, 20 insertions(+), 119 deletions(-)
 mode change 100644 => 100755 object_detection/object_detection/Detectors/YOLOv5.py

diff --git a/object_detection/object_detection/Detectors/YOLOv5.py b/object_detection/object_detection/Detectors/YOLOv5.py
old mode 100644
new mode 100755
index 3a7825d..4553004
--- a/object_detection/object_detection/Detectors/YOLOv5.py
+++ b/object_detection/object_detection/Detectors/YOLOv5.py
@@ -14,151 +14,52 @@
 
 import os
 
-import cv2
-import numpy as np
+import torch
 
 from ..DetectorBase import DetectorBase
 
 
 class YOLOv5(DetectorBase):
-    def __init__(self, conf_threshold=0.7,
-                 score_threshold=0.4, nms_threshold=0.25,
-                 is_cuda=1):
 
+    def __init__(self, conf_threshold=0.7):
         super().__init__()
+        self.conf_threshold = conf_threshold
 
-        # opencv img input
-        self.frame = None
-        self.net = None
-        self.INPUT_WIDTH = 640
-        self.INPUT_HEIGHT = 640
-        self.CONFIDENCE_THRESHOLD = conf_threshold
-
-        self.is_cuda = is_cuda
-
-    # load model and prepare its backend to either run on GPU or CPU,
-    # see if it can be added in constructor
     def build_model(self, model_dir_path, weight_file_name):
-        model_path = os.path.join(model_dir_path, weight_file_name)
-
         try:
-            self.net = cv2.dnn.readNet(model_path)
+            model_path = os.path.join(model_dir_path, weight_file_name)
+            self.model = torch.hub.load('ultralytics/yolov5:v6.0', 'custom', path=model_path,
+                                        force_reload=True)
         except Exception as e:
-            print("Loading the model failed with exception {}".format(e))
+            print("Loading model failed with exception: {}".format(e))
             raise Exception("Error loading given model from path: {}.".format(model_path) +
-                            "Maybe the file doesn't exist?")
+                            " Maybe the file doesn't exist?")
 
-        if self.is_cuda:
-            print("is_cuda was set to True. Attempting to use CUDA")
-            self.net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
-            self.net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA_FP16)
-        else:
-            print("is_cuda was set to False. Running on CPU")
-            self.net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
-            self.net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)
-
-    # load classes.txt that contains mapping of model with labels
-    # TODO: add try/except to raise exception that tells the use to
-    # check the name if it is classes.txt
     def load_classes(self, model_dir_path):
         self.class_list = []
-        with open(model_dir_path + "/classes.txt", "r") as f:
-            self.class_list = [cname.strip() for cname in f.readlines()]
-        return self.class_list
-
-    def detect(self, image):
-        # convert image to 640x640
-        blob = cv2.dnn.blobFromImage(image, 1/255.0, (self.INPUT_WIDTH, self.INPUT_HEIGHT),
-                                     swapRB=True, crop=False)
-        self.net.setInput(blob)
-        preds = self.net.forward()
-        return preds
-
-    # extract bounding box, class IDs and confidences of detected objects
-    # YOLOv5 returns a 3D tensor of dimension 25200*(5 + n_classes)
-    def wrap_detection(self, input_image, output_data):
-        class_ids = []
-        confidences = []
-        boxes = []
-
-        rows = output_data.shape[0]
-
-        image_width, image_height, _ = input_image.shape
-
-        x_factor = image_width / self.INPUT_WIDTH
-        y_factor = image_height / self.INPUT_HEIGHT
-
-        # Iterate through all the 25200 vectors
-        for r in range(rows):
-            row = output_data[r]
-
-            # Continue only if Pc > conf_threshold
-            confidence = row[4]
-            if confidence >= self.CONFIDENCE_THRESHOLD:
 
-                # One-hot encoded vector representing class of object
-                classes_scores = row[5:]
-
-                # Returns min and max values in a array alongwith their indices
-                _, _, _, max_indx = cv2.minMaxLoc(classes_scores)
-
-                # Extract the column index of the maximum values in classes_scores
-                class_id = max_indx[1]
-
-                # Continue of the class score is greater than a threshold
-                # class_score represents the probability of an object belonging to that class
-                if (classes_scores[class_id] > .25):
-
-                    confidences.append(confidence)
-
-                    class_ids.append(class_id)
-
-                    x, y, w, h = row[0].item(), row[1].item(), row[2].item(), row[3].item()
-                    left = int((x - 0.5 * w) * x_factor)
-                    top = int((y - 0.5 * h) * y_factor)
-                    width = int(w * x_factor)
-                    height = int(h * y_factor)
-                    box = np.array([left, top, width, height])
-                    boxes.append(box)
-
-        # removing intersecting bounding boxes
-        indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.25, 0.45)
-
-        result_class_ids = []
-        result_confidences = []
-        result_boxes = []
-
-        for i in indexes:
-            result_confidences.append(confidences[i])
-            result_class_ids.append(class_ids[i])
-            result_boxes.append(boxes[i])
-
-        return result_class_ids, result_confidences, result_boxes
+        with open(os.path.join(model_dir_path, 'classes.txt')) as f:
+            self.class_list = [cname.strip() for cname in f.readlines()]
 
-    # makes image square with dimension max(h, w)
-    def format_yolov5(self):
-        row, col, _ = self.frame.shape
-        _max = max(col, row)
-        result = np.zeros((_max, _max, 3), np.uint8)
-        result[0:row, 0:col] = self.frame
-        return result
+        return self.class_list
 
     def get_predictions(self, cv_image):
-        # Clear list
-        self.predictions = []
-
         if cv_image is None:
             # TODO: show warning message (different color, maybe)
             return None, None
         else:
             self.frame = cv_image
+            class_id = []
+            confidence = []
+            boxes = []
 
-            # make image square
-            inputImage = self.format_yolov5()
+            results = self.model(self.frame)
 
-            outs = self.detect(inputImage)
-            class_ids, confidences, boxes = self.wrap_detection(inputImage, outs[0])
+            for *xyxy, conf, label in results.xyxy[0]:
+                class_id.append(int(label))
+                confidence.append(conf.item())
+                boxes.append([int(xy) for xy in xyxy])
 
-            super().create_predictions_list(class_ids, confidences, boxes)
+            super().create_predictions_list(class_id, confidence, boxes)
 
             return self.predictions