add openvino benchmark

Tony607 · Feb 16, 2019 · 6ba95da · 6ba95da
1 parent 24f732f
commit 6ba95da
Show file tree

Hide file tree

Showing 5 changed files with 552 additions and 69 deletions.
diff --git a/README.md b/README.md
@@ -1,6 +1,7 @@
 # [How to train an object detection model easy for free](https://www.dlology.com/blog/how-to-train-an-object-detection-model-easy-for-free/) | DLology Blog
 
 
+
 ## How to Run
 
 Easy way: run [this Colab Notebook](https://colab.research.google.com/github/Tony607/object_detection_demo/blob/master/tensorflow_object_detection_training_colab.ipynb).
@@ -25,7 +26,7 @@ python resize_images.py --raw-dir ./data/raw --save-dir ./data/images --ext jpg
 Resized images locate in `./data/images/`
 - Train/test split those files into two directories, `./data/images/train` and `./data/images/test`
 
-- Annotate reized images with [labelImg](https://tzutalin.github.io/labelImg/), generate `xml` files inside `./data/images/train` and `./data/images/test` folders. 
+- Annotate resized images with [labelImg](https://tzutalin.github.io/labelImg/), generate `xml` files inside `./data/images/train` and `./data/images/test` folders. 
 
 *Tips: use shortcuts (`w`: draw box, `d`: next file, `a`: previous file, etc.) to accelerate the annotation.*
 
@@ -39,21 +40,47 @@ Resized images locate in `./data/images/`
 ## How to run inference on frozen TensorFlow graph
 
 Requirements:
-- `frozen_inference_graph.pb` Frozen TensorFlow object detection model downloaded from Colab after training.
+- `frozen_inference_graph.pb` Frozen TensorFlow object detection model downloaded from Colab after training. 
 - `label_map.pbtxt` File used to map correct name for predicted class index downloaded from Colab after training.
 
+You can also opt to download my [copy](https://github.com/Tony607/REPO/releases/download/V0.1/checkpoint.zip) of those files from the GitHub Release page.
+
+
 Run the following Jupyter notebook locally.
 ```
 local_inference_test.ipynb
 ```
+# [How to run TensorFlow object detection model faster with Intel Graphics](https://www.dlology.com/blog/how-to-run-tensorflow-object-detection-model-faster-with-intel-graphics/) | DLology Blog
 
 ## How to deploy the trained custom object detection model with OpenVINO
 
 Requirements:
 - Frozen TensorFlow object detection model. i.e. `frozen_inference_graph.pb` downloaded from Colab after training.
-- The modified pipline config file used for training. Also downloaded from Colab after training.
+- The modified pipeline config file used for training. Also downloaded from Colab after training.
+
+You can also opt to download my [copy](https://github.com/Tony607/REPO/releases/download/V0.1/checkpoint.zip) of those files from the GitHub Release page.
 
 Run the following Jupyter notebook locally and follow the instructions in side.
 ```
 deploy/openvino_convert_tf_object_detection.ipynb
-```
+```
+## Run the benchmark
+
+Examples
+
+Benchmark SSD mobileNet V2 on GPU with FP16 quantized weights.
+```
+cd ./deploy
+python openvino_inference_benchmark.py\
+     --model-dir ./models/ssd_mobilenet_v2_custom_trained/FP16\
+     --device GPU\
+     --data-type FP16\
+     --img ../test/15.jpg
+```
+TensorFlow benchmark on cpu
+```
+python local_inference_test.py\
+     --model ./models/frozen_inference_graph.pb\
+     --img ./test/15.jpg\
+     --cpu
+```
diff --git a/deploy/openvino_convert_tf_object_detection.ipynb b/deploy/openvino_convert_tf_object_detection.ipynb
diff --git a/deploy/openvino_inference_benchmark.py b/deploy/openvino_inference_benchmark.py
@@ -0,0 +1,160 @@
+"""
+## Example to benchmark SSD mobileNet V2 on Neural Compute stick.
+```
+python openvino_inference_benchmark.py\
+     --model-dir ./models/ssd_mobilenet_v2_custom_trained/FP16\
+     --device MYRIAD\
+     --data-type FP16\
+     --img ../test/15.jpg
+```
+"""
+
+import os
+import time
+import glob
+import platform
+from PIL import Image
+import numpy as np
+
+# Check path like C:\Intel\computer_vision_sdk\python\python3.5 or ~/intel/computer_vision_sdk/python/python3.5 exists in PYTHONPATH.
+is_win = "windows" in platform.platform().lower()
+if is_win:
+    message = "Please run `C:\\Intel\\computer_vision_sdk\\bin\\setupvars.bat` before running this."
+else:
+    message = "Add the following line to ~/.bashrc and re-run.\nsource ~/intel/computer_vision_sdk/bin/setupvars.sh"
+
+assert "computer_vision_sdk" in os.environ["PYTHONPATH"], message
+
+
+try:
+    from openvino import inference_engine as ie
+    from openvino.inference_engine import IENetwork, IEPlugin
+except Exception as e:
+    exception_type = type(e).__name__
+    print(
+        "The following error happened while importing Python API module:\n[ {} ] {}".format(
+            exception_type, e
+        )
+    )
+    sys.exit(1)
+
+
+def pre_process_image(imagePath, img_shape):
+    """pre process an image from image path.
+    
+    Arguments:
+        imagePath {str} -- input image file path.
+        img_shape {tuple} -- Target height and width as a tuple.
+    
+    Returns:
+        np.array -- Preprocessed image.
+    """
+
+    # Model input format
+    assert isinstance(img_shape, tuple) and len(img_shape) == 2
+
+    n, c, h, w = [1, 3, img_shape[0], img_shape[1]]
+    image = Image.open(imagePath)
+    processed_img = image.resize((h, w), resample=Image.BILINEAR)
+
+    processed_img = np.array(processed_img).astype(np.uint8)
+
+    # Change data layout from HWC to CHW
+    processed_img = processed_img.transpose((2, 0, 1))
+    processed_img = processed_img.reshape((n, c, h, w))
+
+    return processed_img, np.array(image)
+
+
+if __name__ == "__main__":
+    import argparse
+
+    # python argparse_test.py 5 -v --color RED
+    parser = argparse.ArgumentParser(description="OpenVINO Inference speed benchmark.")
+    # parser.add_argument("-v", "--verbose", help="increase output verbosity",
+    #                     action="store_true")
+    parser.add_argument(
+        "--model-dir",
+        help="Directory where the OpenVINO IR .xml and .bin files exist.",
+        type=str,
+    )
+    parser.add_argument(
+        "--device", help="Device to run inference: GPU, CPU or MYRIAD", type=str
+    )
+    parser.add_argument(
+        "--data-type", help="Input image file path.", type=str, default=None
+    )
+    parser.add_argument("--img", help="Path to a sample image to inference.", type=str)
+    args = parser.parse_args()
+
+    # Directory to model xml and bin files.
+    output_dir = args.model_dir
+    assert os.path.isdir(output_dir), "`{}` does not exist".format(output_dir)
+
+    # Devices: GPU (intel), CPU or MYRIAD
+    plugin_device = args.device
+    data_type = args.data_type
+    # Converted model take fixed size image as input,
+    # we simply use same size for image width and height.
+    img_height = 300
+
+    DATA_TYPE_MAP = {"GPU": "FP16", "CPU": "FP32", "MYRIAD": "FP16"}
+    assert (
+        plugin_device in DATA_TYPE_MAP
+    ), "Unsupported device: `{}`, not found in `{}`".format(
+        plugin_device, list(DATA_TYPE_MAP.keys())
+    )
+
+    if data_type is None:
+        data_type = DATA_TYPE_MAP.get(plugin_device)
+
+    # Path to a sample image to inference.
+    img_fname = args.img
+    assert os.path.isfile(img_fname)
+
+    # Plugin initialization for specified device and load extensions library if specified.
+    plugin_dir = None
+    model_xml = glob.glob(os.path.join(output_dir, "*.xml"))[-1]
+    model_bin = glob.glob(os.path.join(output_dir, "*.bin"))[-1]
+    # Devices: GPU (intel), CPU, MYRIAD
+    plugin = IEPlugin(plugin_device, plugin_dirs=plugin_dir)
+    # Read IR
+    net = IENetwork(model=model_xml, weights=model_bin)
+    assert len(net.inputs.keys()) == 1
+    assert len(net.outputs) == 1
+    input_blob = next(iter(net.inputs))
+    out_blob = next(iter(net.outputs))
+    # Load network to the plugin
+    exec_net = plugin.load(network=net)
+    del net
+
+    # Run inference
+    img_shape = (img_height, img_height)
+    processed_img, image = pre_process_image(img_fname, img_shape)
+    res = exec_net.infer(inputs={input_blob: processed_img})
+
+    print(res["DetectionOutput"].shape)
+
+    probability_threshold = 0.5
+    preds = [
+        pred for pred in res["DetectionOutput"][0][0] if pred[2] > probability_threshold
+    ]
+
+    for pred in preds:
+        class_label = pred[1]
+        probability = pred[2]
+        print(
+            "Predict class label:{}, with probability: {}".format(
+                class_label, probability
+            )
+        )
+
+    times = []
+    for i in range(20):
+        start_time = time.time()
+        res = exec_net.infer(inputs={input_blob: processed_img})
+        delta = time.time() - start_time
+        times.append(delta)
+    mean_delta = np.array(times).mean()
+    fps = 1 / mean_delta
+    print('average(sec):{:.3f},fps:{:.2f}'.format(mean_delta,fps))
diff --git a/local_inference_test.ipynb b/local_inference_test.ipynb
diff --git a/local_inference_test.py b/local_inference_test.py
@@ -0,0 +1,156 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+import os
+import glob
+import time
+import numpy as np
+import os
+import six.moves.urllib as urllib
+import sys
+import tarfile
+import tensorflow as tf
+import zipfile
+
+from collections import defaultdict
+from io import StringIO
+from PIL import Image
+from object_detection.utils import ops as utils_ops
+
+
+if __name__ == "__main__":
+    import argparse
+
+    # python argparse_test.py 5 -v --color RED
+    parser = argparse.ArgumentParser(
+        description="TensorFlow Inference speed benchmark for object detection model."
+    )
+    # parser.add_argument("-v", "--verbose", help="increase output verbosity",
+    #                     action="store_true")
+    parser.add_argument(
+        "--model",
+        help="Path to the frozen graph .pb file.",
+        type=str,
+        default="./models/frozen_inference_graph.pb",
+    )
+
+    parser.add_argument(
+        "--cpu", help="Force to use CPU during inference.", action="store_true"
+    )
+    parser.add_argument("--img", help="Path to a sample image to inference.", type=str)
+    args = parser.parse_args()
+
+    # Path to frozen detection graph. This is the actual model that is used for the object detection.
+    PATH_TO_CKPT = args.model
+
+    image_path = args.img
+
+    assert os.path.isfile(PATH_TO_CKPT)
+    assert os.path.isfile(image_path)
+
+    detection_graph = tf.Graph()
+    with detection_graph.as_default():
+        od_graph_def = tf.GraphDef()
+        with tf.gfile.GFile(PATH_TO_CKPT, "rb") as fid:
+            serialized_graph = fid.read()
+            od_graph_def.ParseFromString(serialized_graph)
+            tf.import_graph_def(od_graph_def, name="")
+
+    def load_image_into_numpy_array(image):
+        (im_width, im_height) = image.size
+        return (
+            np.array(image.getdata()).reshape((im_height, im_width, 3)).astype(np.uint8)
+        )
+
+    def run_inference_benchmark(image, graph, trial=20, gpu=True):
+        """Run TensorFlow inference benchmark.
+        
+        Arguments:
+            image {np.array} -- Input image as an Numpy array.
+            graph {tf.Graph} -- TensorFlow graph object.
+        
+        Keyword Arguments:
+            trial {int} -- Number of inference to run for averaging. (default: {20})
+            gpu {bool} -- Use Nvidia GPU when available. (default: {True})
+        
+        Returns:
+            int -- Frame per seconds benchmark result.
+        """
+
+        with graph.as_default():
+            if gpu:
+                config = tf.ConfigProto()
+            else:
+                config = tf.ConfigProto(device_count={"GPU": 0})
+            with tf.Session(config=config) as sess:
+                # Get handles to input and output tensors
+                ops = tf.get_default_graph().get_operations()
+                all_tensor_names = {output.name for op in ops for output in op.outputs}
+                tensor_dict = {}
+                for key in [
+                    "num_detections",
+                    "detection_boxes",
+                    "detection_scores",
+                    "detection_classes",
+                    "detection_masks",
+                ]:
+                    tensor_name = key + ":0"
+                    if tensor_name in all_tensor_names:
+                        tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
+                            tensor_name
+                        )
+                if "detection_masks" in tensor_dict:
+                    # The following processing is only for single image
+                    detection_boxes = tf.squeeze(tensor_dict["detection_boxes"], [0])
+                    detection_masks = tf.squeeze(tensor_dict["detection_masks"], [0])
+                    # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
+                    real_num_detection = tf.cast(
+                        tensor_dict["num_detections"][0], tf.int32
+                    )
+                    detection_boxes = tf.slice(
+                        detection_boxes, [0, 0], [real_num_detection, -1]
+                    )
+                    detection_masks = tf.slice(
+                        detection_masks, [0, 0, 0], [real_num_detection, -1, -1]
+                    )
+                    detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
+                        detection_masks, detection_boxes, image.shape[0], image.shape[1]
+                    )
+                    detection_masks_reframed = tf.cast(
+                        tf.greater(detection_masks_reframed, 0.5), tf.uint8
+                    )
+                    # Follow the convention by adding back the batch dimension
+                    tensor_dict["detection_masks"] = tf.expand_dims(
+                        detection_masks_reframed, 0
+                    )
+                image_tensor = tf.get_default_graph().get_tensor_by_name(
+                    "image_tensor:0"
+                )
+
+                # Run inference
+                times = []
+                # Kick start the first inference which takes longer and followings.
+                output_dict = sess.run(
+                    tensor_dict, feed_dict={image_tensor: np.expand_dims(image, 0)}
+                )
+                for i in range(trial):
+                    start_time = time.time()
+                    output_dict = sess.run(
+                        tensor_dict, feed_dict={image_tensor: np.expand_dims(image, 0)}
+                    )
+                    delta = time.time() - start_time
+                    times.append(delta)
+                mean_delta = np.array(times).mean()
+                fps = 1 / mean_delta
+                print("average(sec):{:.3f},fps:{:.2f}".format(mean_delta, fps))
+
+        return fps
+
+    image = Image.open(image_path)
+    # the array based representation of the image will be used later in order to prepare the
+    # result image with boxes and labels on it.
+    image_np = load_image_into_numpy_array(image)
+    # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
+    image_np_expanded = np.expand_dims(image_np, axis=0)
+    # Actual detection benchmark.
+    fps = run_inference_benchmark(image_np, detection_graph, trial=20, gpu=not args.cpu)