diff --git a/.gitignore b/.gitignore index 5ad9c3953..c38af9e2a 100644 --- a/.gitignore +++ b/.gitignore @@ -26,16 +26,24 @@ share/python-wheels/ *.egg-info/ .installed.cfg *.egg +*darwin.so MANIFEST - +*.idea # DepthAI-Specific dataset/ +.fw_cache/ depthai.calib +mesh_left.calib +mesh_right.calib +intrinsic.json +intrinsic_calib_m2 *.h264 *.h265 *.mkv *.mp4 -*.blob.sh*cmx* +*.blob* *.cmd +*.mvcmd *.so +*.prof diff --git a/README.md b/README.md index 46fb2d482..0dde257f0 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ python3 -m pip install -r requirements.txt Optional: For command line autocomplete when pressing TAB, only bash interpreter supported now: Add to .bashrc: -`echo 'eval "$(register-python-argcomplete depthai.py)"' >> ~/.bashrc` +`echo 'eval "$(register-python-argcomplete depthai_demo.py)"' >> ~/.bashrc` If you use any other interpreter: https://kislyuk.github.io/argcomplete/ diff --git a/calibrate.py b/calibrate.py index d657851ce..5efce3bbd 100755 --- a/calibrate.py +++ b/calibrate.py @@ -15,6 +15,9 @@ import shutil import consts.resource_paths import json +from depthai_helpers.config_manager import BlobManager +from depthai_helpers.version_check import check_depthai_version +check_depthai_version() use_cv = True try: @@ -126,6 +129,16 @@ class Main: def __init__(self): self.args = vars(parse_args()) + blob_man_args = { + 'cnn_model':'mobilenet-ssd', + 'cnn_model2':'', + 'model_compilation_target':'auto' + } + shaves = 7 + cmx_slices = 7 + NN_engines = 1 + blobMan = BlobManager(blob_man_args, True , shaves, cmx_slices,NN_engines) + self.config = { 'streams': ['left', 'right'] if not on_embedded else @@ -137,11 +150,11 @@ def __init__(self): }, 'ai': { - 'blob_file': consts.resource_paths.blob_fpath, - 'blob_file_config': consts.resource_paths.blob_config_fpath, - 'shaves' : 7, - 'cmx_slices' : 7, - 'NN_engines' : 1, + 'blob_file': blobMan.blob_file, + 'blob_file_config': blobMan.blob_file_config, + 'shaves' : shaves, + 'cmx_slices' : cmx_slices, + 'NN_engines' : NN_engines, }, 'board_config': { @@ -156,7 +169,7 @@ def __init__(self): 'mono': { # 1280x720, 1280x800, 640x400 (binning enabled) - 'resolution_h': 720, + 'resolution_h': 800, 'fps': 30.0, }, }, diff --git a/calibrate_and_test.py b/calibrate_and_test.py index 6bd2342e6..ae7c777af 100755 --- a/calibrate_and_test.py +++ b/calibrate_and_test.py @@ -24,7 +24,7 @@ def cleanup(): args+="'"+arg+"' " calibrate_cmd = "python3 calibrate.py " + args -test_cmd = """python3 depthai_demo.py -co '{"streams": [{"name": "depth_raw", "max_fps": 12.0}]}'""" +test_cmd = """python3 depthai_demo.py -co '{"streams": [{"name": "depth", "max_fps": 12.0}]}'""" atexit.register(cleanup) diff --git a/consts/resource_paths.py b/consts/resource_paths.py index 6384ccaf3..542305c7f 100644 --- a/consts/resource_paths.py +++ b/consts/resource_paths.py @@ -1,5 +1,3 @@ -import os -from os import path from pathlib import Path @@ -15,9 +13,19 @@ def relative_to_abs_path(relative_path): device_usb2_cmd_fpath = relative_to_abs_path('../depthai_usb2.cmd') boards_dir_path = relative_to_abs_path('../resources/boards') + "/" custom_calib_fpath = relative_to_abs_path('../resources/depthai.calib') +left_mesh_fpath = relative_to_abs_path('../resources/mesh_left.calib') +right_mesh_fpath = relative_to_abs_path('../resources/mesh_right.calib') + +right_map_x_fpath = relative_to_abs_path('../resources/map_x_right.calib') +right_map_y_fpath = relative_to_abs_path('../resources/map_y_right.calib') +left_map_x_fpath = relative_to_abs_path('../resources/map_x_left.calib') +left_map_y_fpath = relative_to_abs_path('../resources/map_y_left.calib') + nn_resource_path = relative_to_abs_path('../resources/nn')+"/" blob_fpath = relative_to_abs_path('../resources/nn/mobilenet-ssd/mobilenet-ssd.blob') blob_config_fpath = relative_to_abs_path('../resources/nn/mobilenet-ssd/mobilenet-ssd.json') +tests_functional_path = relative_to_abs_path('../testsFunctional/') + "/" + if custom_calib_fpath is not None and Path(custom_calib_fpath).exists(): calib_fpath = custom_calib_fpath diff --git a/depthai_demo.py b/depthai_demo.py index 7d617d01c..3c4910d3c 100755 --- a/depthai_demo.py +++ b/depthai_demo.py @@ -9,559 +9,374 @@ from datetime import datetime import cv2 import numpy as np - +import sys import depthai + print('Using depthai module from: ', depthai.__file__) +print('Depthai version installed: ', depthai.__version__) + +from depthai_helpers.version_check import check_depthai_version +check_depthai_version() import consts.resource_paths from depthai_helpers import utils -from depthai_helpers.cli_utils import cli_print, parse_args, PrintColors -from depthai_helpers.model_downloader import download_model +from depthai_helpers.cli_utils import cli_print, PrintColors + +from depthai_helpers.config_manager import DepthConfigManager +from depthai_helpers.arg_manager import CliArgs + +is_rpi = platform.machine().startswith('arm') or platform.machine().startswith('aarch64') + from depthai_helpers.object_tracker_handler import show_tracklets global args, cnn_model2 -try: - args = vars(parse_args()) -except: - os._exit(2) - -compile_model = args['shaves'] is not None and args['cmx_slices'] is not None and args['NN_engines'] - -stream_list = args['streams'] - -if args['config_overwrite']: - args['config_overwrite'] = json.loads(args['config_overwrite']) - -print("Using Arguments=",args) - -usb2_mode = False -if args['force_usb2']: - cli_print("FORCE USB2 MODE", PrintColors.WARNING) - usb2_mode = True -else: - usb2_mode = False - -debug_mode = False -cmd_file = '' -if args['dev_debug'] == None: - # Debug -debug flag NOT present, - debug_mode = False -elif args['dev_debug'] == '': - # If just -debug flag is present -> cmd_file = '' (wait for device to be connected beforehand) - debug_mode = True - cmd_file = '' -else: - debug_mode = True - cmd_file = args['dev_debug'] - - -calc_dist_to_bb = True -if args['disable_depth']: - calc_dist_to_bb = False - -from depthai_helpers.mobilenet_ssd_handler import decode_mobilenet_ssd, show_mobilenet_ssd -decode_nn=decode_mobilenet_ssd -show_nn=show_mobilenet_ssd - -if args['cnn_model'] == 'age-gender-recognition-retail-0013': - from depthai_helpers.age_gender_recognition_handler import decode_age_gender_recognition, show_age_gender_recognition - decode_nn=decode_age_gender_recognition - show_nn=show_age_gender_recognition - calc_dist_to_bb=False - -if args['cnn_model'] == 'emotions-recognition-retail-0003': - from depthai_helpers.emotion_recognition_handler import decode_emotion_recognition, show_emotion_recognition - decode_nn=decode_emotion_recognition - show_nn=show_emotion_recognition - calc_dist_to_bb=False - -if args['cnn_model'] == 'tiny-yolo': - from depthai_helpers.tiny_yolo_v3_handler import decode_tiny_yolo, show_tiny_yolo - decode_nn=decode_tiny_yolo - show_nn=show_tiny_yolo - calc_dist_to_bb=False - compile_model=False - -if args['cnn_model'] in ['facial-landmarks-35-adas-0002', 'landmarks-regression-retail-0009']: - from depthai_helpers.landmarks_recognition_handler import decode_landmarks_recognition, show_landmarks_recognition - decode_nn=decode_landmarks_recognition - show_nn=show_landmarks_recognition - calc_dist_to_bb=False - -if args['cnn_model']: - cnn_model_path = consts.resource_paths.nn_resource_path + args['cnn_model']+ "/" + args['cnn_model'] - blob_file = cnn_model_path + ".blob" - suffix="" - if calc_dist_to_bb: - suffix="_depth" - blob_file_config = cnn_model_path + suffix + ".json" - -blob_file2 = "" -blob_file_config2 = "" -cnn_model2 = None -if args['cnn_model2']: - print("Using CNN2:", args['cnn_model2']) - cnn_model2 = args['cnn_model2'] - cnn_model_path = consts.resource_paths.nn_resource_path + args['cnn_model2']+ "/" + args['cnn_model2'] - blob_file2 = cnn_model_path + ".blob" - blob_file_config2 = cnn_model_path + ".json" - if not Path(blob_file2).exists(): - cli_print("\nWARNING: NN2 blob not found in: " + blob_file2, PrintColors.WARNING) - os._exit(1) - if not Path(blob_file_config2).exists(): - cli_print("\nWARNING: NN2 json not found in: " + blob_file_config2, PrintColors.WARNING) - os._exit(1) - -blob_file_path = Path(blob_file) -blob_file_config_path = Path(blob_file_config) -if not blob_file_path.exists(): - cli_print("\nWARNING: NN blob not found in: " + blob_file, PrintColors.WARNING) - os._exit(1) - -if not blob_file_config_path.exists(): - cli_print("\nWARNING: NN json not found in: " + blob_file_config, PrintColors.WARNING) - os._exit(1) - -with open(blob_file_config) as f: - data = json.load(f) - -try: - labels = data['mappings']['labels'] -except: - labels = None - print("Labels not found in json!") - - -#print('depthai.__version__ == %s' % depthai.__version__) -#print('depthai.__dev_version__ == %s' % depthai.__dev_version__) - -if platform.system() == 'Linux': - ret = subprocess.call(['grep', '-irn', 'ATTRS{idVendor}=="03e7"', '/etc/udev/rules.d'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - if(ret != 0): - cli_print("\nWARNING: Usb rules not found", PrintColors.WARNING) - cli_print("\nSet rules: \n" - """echo 'SUBSYSTEM=="usb", ATTRS{idVendor}=="03e7", MODE="0666"' | sudo tee /etc/udev/rules.d/80-movidius.rules \n""" - "sudo udevadm control --reload-rules && udevadm trigger \n" - "Disconnect/connect usb cable on host! \n", PrintColors.RED) - os._exit(1) - -if args['cnn_camera'] == 'left_right': - if args['NN_engines'] is None: - args['NN_engines'] = 2 - args['shaves'] = 6 if args['shaves'] is None else args['shaves'] - args['shaves'] % 2 - args['cmx_slices'] = 6 if args['cmx_slices'] is None else args['cmx_slices'] - args['cmx_slices'] % 2 - compile_model = True - cli_print('Running NN on both cams requires 2 NN engines!', PrintColors.RED) - -default_blob=True -if compile_model: - default_blob=False - shave_nr = args['shaves'] - cmx_slices = args['cmx_slices'] - NCE_nr = args['NN_engines'] - - if NCE_nr == 2: - if shave_nr % 2 == 1 or cmx_slices % 2 == 1: - cli_print("shave_nr and cmx_slices config must be even number when NCE is 2!", PrintColors.RED) - exit(2) - shave_nr_opt = int(shave_nr / 2) - cmx_slices_opt = int(cmx_slices / 2) - else: - shave_nr_opt = int(shave_nr) - cmx_slices_opt = int(cmx_slices) - - outblob_file = blob_file + ".sh" + str(shave_nr) + "cmx" + str(cmx_slices) + "NCE" + str(NCE_nr) - - if(not Path(outblob_file).exists()): - cli_print("Compiling model for {0} shaves, {1} cmx_slices and {2} NN_engines ".format(str(shave_nr), str(cmx_slices), str(NCE_nr)), PrintColors.RED) - ret = download_model(args['cnn_model'], shave_nr_opt, cmx_slices_opt, NCE_nr, outblob_file) - if(ret != 0): - cli_print("Model compile failed. Falling back to default.", PrintColors.WARNING) - default_blob=True - else: - blob_file = outblob_file - else: - cli_print("Compiled mode found: compiled for {0} shaves, {1} cmx_slices and {2} NN_engines ".format(str(shave_nr), str(cmx_slices), str(NCE_nr)), PrintColors.GREEN) - blob_file = outblob_file - - if args['cnn_model2']: - outblob_file = blob_file2 + ".sh" + str(shave_nr) + "cmx" + str(cmx_slices) + "NCE" + str(NCE_nr) - if(not Path(outblob_file).exists()): - cli_print("Compiling model2 for {0} shaves, {1} cmx_slices and {2} NN_engines ".format(str(shave_nr), str(cmx_slices), str(NCE_nr)), PrintColors.RED) - ret = download_model(args['cnn_model2'], shave_nr_opt, cmx_slices_opt, NCE_nr, outblob_file) - if(ret != 0): - cli_print("Model compile failed. Falling back to default.", PrintColors.WARNING) - default_blob=True - else: - blob_file2 = outblob_file + +class DepthAI: + global is_rpi + process_watchdog_timeout=10 #seconds + nnet_packets = None + data_packets = None + runThread = True + + def reset_process_wd(self): + global wd_cutoff + wd_cutoff=monotonic()+self.process_watchdog_timeout + return + + def on_trackbar_change(self, value): + self.device.send_disparity_confidence_threshold(value) + return + + def stopLoop(self): + self.runThread = False + + def startLoop(self): + cliArgs = CliArgs() + args = vars(cliArgs.parse_args()) + + configMan = DepthConfigManager(args) + if is_rpi and args['pointcloud']: + raise NotImplementedError("Point cloud visualization is currently not supported on RPI") + # these are largely for debug and dev. + cmd_file, debug_mode = configMan.getCommandFile() + usb2_mode = configMan.getUsb2Mode() + + # decode_nn and show_nn are functions that are dependent on the neural network that's being run. + decode_nn = configMan.decode_nn + show_nn = configMan.show_nn + + # Labels for the current neural network. They are parsed from the blob config file. + labels = configMan.labels + NN_json = configMan.NN_config + + # This json file is sent to DepthAI. It communicates what options you'd like to enable and what model you'd like to run. + config = configMan.jsonConfig + + # Create a list of enabled streams () + stream_names = [stream if isinstance(stream, str) else stream['name'] for stream in configMan.stream_list] + + enable_object_tracker = 'object_tracker' in stream_names + + # grab video file, if option exists + video_file = configMan.video_file + + + self.device = None + if debug_mode: + print('Cmd file: ', cmd_file, ' args["device_id"]: ', args['device_id']) + self.device = depthai.Device(cmd_file, args['device_id']) else: - cli_print("Compiled mode found: compiled for {0} shaves, {1} cmx_slices and {2} NN_engines ".format(str(shave_nr), str(cmx_slices), str(NCE_nr)), PrintColors.GREEN) - blob_file2 = outblob_file - -if default_blob: - #default - shave_nr = 7 - cmx_slices = 7 - NCE_nr = 1 - -# Do not modify the default values in the config Dict below directly. Instead, use the `-co` argument when running this script. -config = { - # Possible streams: - # ['left', 'right', 'jpegout', 'video', 'previewout', 'metaout', 'depth_raw', 'disparity', 'disparity_color'] - # If "left" is used, it must be in the first position. - # To test depth use: - # 'streams': [{'name': 'depth_raw', "max_fps": 12.0}, {'name': 'previewout', "max_fps": 12.0}, ], - 'streams': stream_list, - 'depth': - { - 'calibration_file': consts.resource_paths.calib_fpath, - 'padding_factor': 0.3, - 'depth_limit_m': 10.0, # In meters, for filtering purpose during x,y,z calc - 'confidence_threshold' : 0.5, #Depth is calculated for bounding boxes with confidence higher than this number - }, - 'ai': - { - 'blob_file': blob_file, - 'blob_file_config': blob_file_config, - 'blob_file2': blob_file2, - 'blob_file_config2': blob_file_config2, - 'calc_dist_to_bb': calc_dist_to_bb, - 'keep_aspect_ratio': not args['full_fov_nn'], - 'camera_input': args['cnn_camera'], - 'shaves' : shave_nr, - 'cmx_slices' : cmx_slices, - 'NN_engines' : NCE_nr, - }, - # object tracker - 'ot': - { - 'max_tracklets' : 20, #maximum 20 is supported - 'confidence_threshold' : 0.5, #object is tracked only for detections over this threshold - }, - 'board_config': - { - 'swap_left_and_right_cameras': args['swap_lr'], # True for 1097 (RPi Compute) and 1098OBC (USB w/onboard cameras) - 'left_fov_deg': args['field_of_view'], # Same on 1097 and 1098OBC - 'rgb_fov_deg': args['rgb_field_of_view'], - 'left_to_right_distance_cm': args['baseline'], # Distance between stereo cameras - 'left_to_rgb_distance_cm': args['rgb_baseline'], # Currently unused - 'store_to_eeprom': args['store_eeprom'], - 'clear_eeprom': args['clear_eeprom'], - 'override_eeprom': args['override_eeprom'], - }, - 'camera': - { - 'rgb': - { - # 3840x2160, 1920x1080 - # only UHD/1080p/30 fps supported for now - 'resolution_h': args['rgb_resolution'], - 'fps': args['rgb_fps'], - }, - 'mono': - { - # 1280x720, 1280x800, 640x400 (binning enabled) - 'resolution_h': args['mono_resolution'], - 'fps': args['mono_fps'], - }, - }, - 'app': - { - 'sync_video_meta_streams': args['sync_video_meta'], - }, - #'video_config': - #{ - # 'rateCtrlMode': 'cbr', # Options: cbr / vbr - # 'profile': 'h265_main', # Options: 'h264_baseline' / 'h264_main' / 'h264_high' / 'h265_main / 'mjpeg' ' - # 'bitrate': 8000000, # When using CBR (H264/H265 only) - # 'maxBitrate': 8000000, # When using CBR (H264/H265 only) - # 'keyframeFrequency': 30, (H264/H265 only) - # 'numBFrames': 0, (H264/H265 only) - # 'quality': 80 # (0 - 100%) When using VBR or MJPEG profile - #} - #'video_config': - #{ - # 'profile': 'mjpeg', - # 'quality': 95 - #} -} - -if args['board']: - board_path = Path(args['board']) - if not board_path.exists(): - board_path = Path(consts.resource_paths.boards_dir_path) / Path(args['board'].upper()).with_suffix('.json') - if not board_path.exists(): - print('ERROR: Board config not found: {}'.format(board_path)) - os._exit(2) - with open(board_path) as fp: - board_config = json.load(fp) - utils.merge(board_config, config) -if args['config_overwrite'] is not None: - config = utils.merge(args['config_overwrite'],config) - print("Merged Pipeline config with overwrite",config) - -if 'depth_raw' in config['streams'] and ('disparity_color' in config['streams'] or 'disparity' in config['streams']): - print('ERROR: depth_raw is mutually exclusive with disparity_color') - exit(2) - -# Append video stream if video recording was requested and stream is not already specified -video_file = None -if args['video'] is not None: - - # open video file - try: - video_file = open(args['video'], 'wb') - if config['streams'].count('video') == 0: - config['streams'].append('video') - except IOError: - print("Error: couldn't open video file for writing. Disabled video output stream") - if config['streams'].count('video') == 1: - config['streams'].remove('video') - - -# Create a list of enabled streams () -stream_names = [stream if isinstance(stream, str) else stream['name'] for stream in stream_list] - -enable_object_tracker = 'object_tracker' in stream_names - - -device = None -if debug_mode: - print('Cmd file: ', cmd_file, ' args["device_id"]: ', args['device_id']) - device = depthai.Device(cmd_file, args['device_id']) -else: - device = depthai.Device(args['device_id'], usb2_mode) - -print('Available streams: ' + str(device.get_available_streams())) - -# create the pipeline, here is the first connection with the device -p = device.create_pipeline(config=config) - -if p is None: - print('Pipeline is not created.') - exit(3) - -nn2depth = device.get_nn_to_depth_bbox_mapping() - - -t_start = time() -frame_count = {} -frame_count_prev = {} -nnet_prev = {} -nnet_prev["entries_prev"] = {} -nnet_prev["nnet_source"] = {} -frame_count['nn'] = {} -frame_count_prev['nn'] = {} - -NN_cams = {'rgb', 'left', 'right'} - -for cam in NN_cams: - nnet_prev["entries_prev"][cam] = [] - nnet_prev["nnet_source"][cam] = [] - frame_count['nn'][cam] = 0 - frame_count_prev['nn'][cam] = 0 - -stream_windows = [] -for s in stream_names: - if s == 'previewout': + self.device = depthai.Device(args['device_id'], usb2_mode) + + print(stream_names) + print('Available streams: ' + str(self.device.get_available_streams())) + + # create the pipeline, here is the first connection with the device + p = self.device.create_pipeline(config=config) + + if p is None: + print('Pipeline is not created.') + exit(3) + + + nn2depth = self.device.get_nn_to_depth_bbox_mapping() + + t_start = time() + frame_count = {} + frame_count_prev = {} + nnet_prev = {} + nnet_prev["entries_prev"] = {} + nnet_prev["nnet_source"] = {} + frame_count['nn'] = {} + frame_count_prev['nn'] = {} + + NN_cams = {'rgb', 'left', 'right'} + for cam in NN_cams: - stream_windows.append(s + '-' + cam) - else: - stream_windows.append(s) - -for w in stream_windows: - frame_count[w] = 0 - frame_count_prev[w] = 0 - -tracklets = None - -process_watchdog_timeout=10 #seconds -def reset_process_wd(): - global wd_cutoff - wd_cutoff=monotonic()+process_watchdog_timeout - return - -reset_process_wd() - - -def on_trackbar_change(value): - device.send_disparity_confidence_threshold(value) - return - -for stream in stream_names: - if stream in ["disparity", "disparity_color", "depth_raw"]: - cv2.namedWindow(stream) - trackbar_name = 'Disparity confidence' - conf_thr_slider_min = 0 - conf_thr_slider_max = 255 - cv2.createTrackbar(trackbar_name, stream, conf_thr_slider_min, conf_thr_slider_max, on_trackbar_change) - cv2.setTrackbarPos(trackbar_name, stream, args['disparity_confidence_threshold']) -ops = 0 -prevTime = time() -while True: - # retreive data from the device - # data is stored in packets, there are nnet (Neural NETwork) packets which have additional functions for NNet result interpretation - nnet_packets, data_packets = p.get_available_nnet_and_data_packets(True) - - ### Uncomment to print ops - # ops = ops + 1 - # if time() - prevTime > 1.0: - # print('OPS: ', ops) - # ops = 0 - # prevTime = time() - - packets_len = len(nnet_packets) + len(data_packets) - if packets_len != 0: - reset_process_wd() - else: - cur_time=monotonic() - if cur_time > wd_cutoff: - print("process watchdog timeout") - os._exit(10) - - for _, nnet_packet in enumerate(nnet_packets): - meta = nnet_packet.getMetadata() - camera = 'rgb' - if meta != None: - camera = meta.getCameraName() - nnet_prev["nnet_source"][camera] = nnet_packet - nnet_prev["entries_prev"][camera] = decode_nn(nnet_packet, config=config) - frame_count['metaout'] += 1 - frame_count['nn'][camera] += 1 - - for packet in data_packets: - window_name = packet.stream_name - if packet.stream_name not in stream_names: - continue # skip streams that were automatically added - packetData = packet.getData() - if packetData is None: - print('Invalid packet data!') - continue - elif packet.stream_name == 'previewout': - meta = packet.getMetadata() - camera = 'rgb' - if meta != None: - camera = meta.getCameraName() - - window_name = 'previewout-' + camera - # the format of previewout image is CHW (Chanel, Height, Width), but OpenCV needs HWC, so we - # change shape (3, 300, 300) -> (300, 300, 3) - data0 = packetData[0,:,:] - data1 = packetData[1,:,:] - data2 = packetData[2,:,:] - frame = cv2.merge([data0, data1, data2]) - - nn_frame = show_nn(nnet_prev["entries_prev"][camera], frame, labels=labels, config=config) - if enable_object_tracker and tracklets is not None: - nn_frame = show_tracklets(tracklets, nn_frame, labels) - cv2.putText(nn_frame, "fps: " + str(frame_count_prev[window_name]), (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0)) - cv2.putText(nn_frame, "NN fps: " + str(frame_count_prev['nn'][camera]), (2, frame.shape[0]-4), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 255, 0)) - cv2.imshow(window_name, nn_frame) - elif packet.stream_name == 'left' or packet.stream_name == 'right' or packet.stream_name == 'disparity': - frame_bgr = packetData - cv2.putText(frame_bgr, packet.stream_name, (25, 25), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0)) - cv2.putText(frame_bgr, "fps: " + str(frame_count_prev[window_name]), (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0)) - if args['draw_bb_depth']: - camera = args['cnn_camera'] - if packet.stream_name == 'disparity': - if camera == 'left_right': - camera = 'right' - elif camera != 'rgb': - camera = packet.getMetadata().getCameraName() - show_nn(nnet_prev["entries_prev"][camera], frame_bgr, labels=labels, config=config, nn2depth=nn2depth) - cv2.imshow(window_name, frame_bgr) - elif packet.stream_name.startswith('depth') or packet.stream_name == 'disparity_color': - frame = packetData - - if len(frame.shape) == 2: - if frame.dtype == np.uint8: # grayscale - cv2.putText(frame, packet.stream_name, (25, 25), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255)) - cv2.putText(frame, "fps: " + str(frame_count_prev[window_name]), (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255)) - else: # uint16 - frame = (65535 // frame).astype(np.uint8) - #colorize depth map, comment out code below to obtain grayscale - frame = cv2.applyColorMap(frame, cv2.COLORMAP_HOT) - # frame = cv2.applyColorMap(frame, cv2.COLORMAP_JET) - cv2.putText(frame, packet.stream_name, (25, 25), cv2.FONT_HERSHEY_SIMPLEX, 1.0, 255) - cv2.putText(frame, "fps: " + str(frame_count_prev[window_name]), (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, 255) - else: # bgr - cv2.putText(frame, packet.stream_name, (25, 25), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 255, 255)) - cv2.putText(frame, "fps: " + str(frame_count_prev[window_name]), (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, 255) - - if args['draw_bb_depth']: - camera = args['cnn_camera'] - if camera == 'left_right': - camera = 'right' - show_nn(nnet_prev["entries_prev"][camera], frame, labels=labels, config=config, nn2depth=nn2depth) - cv2.imshow(window_name, frame) - - elif packet.stream_name == 'jpegout': - jpg = packetData - mat = cv2.imdecode(jpg, cv2.IMREAD_COLOR) - cv2.imshow('jpegout', mat) - - elif packet.stream_name == 'video': - videoFrame = packetData - videoFrame.tofile(video_file) - #mjpeg = packetData - #mat = cv2.imdecode(mjpeg, cv2.IMREAD_COLOR) - #cv2.imshow('mjpeg', mat) - - elif packet.stream_name == 'meta_d2h': - str_ = packet.getDataAsStr() - dict_ = json.loads(str_) - - print('meta_d2h Temp', - ' CSS:' + '{:6.2f}'.format(dict_['sensors']['temperature']['css']), - ' MSS:' + '{:6.2f}'.format(dict_['sensors']['temperature']['mss']), - ' UPA:' + '{:6.2f}'.format(dict_['sensors']['temperature']['upa0']), - ' DSS:' + '{:6.2f}'.format(dict_['sensors']['temperature']['upa1'])) - elif packet.stream_name == 'object_tracker': - tracklets = packet.getObjectTracker() - - frame_count[window_name] += 1 - - t_curr = time() - if t_start + 1.0 < t_curr: - t_start = t_curr - # print("metaout fps: " + str(frame_count_prev["metaout"])) + nnet_prev["entries_prev"][cam] = None + nnet_prev["nnet_source"][cam] = None + frame_count['nn'][cam] = 0 + frame_count_prev['nn'][cam] = 0 stream_windows = [] for s in stream_names: if s == 'previewout': for cam in NN_cams: stream_windows.append(s + '-' + cam) - frame_count_prev['nn'][cam] = frame_count['nn'][cam] - frame_count['nn'][cam] = 0 else: stream_windows.append(s) + for w in stream_windows: - frame_count_prev[w] = frame_count[w] frame_count[w] = 0 + frame_count_prev[w] = 0 - key = cv2.waitKey(1) - if key == ord('c'): - if 'jpegout' in stream_names == 0: - print("'jpegout' stream not enabled. Try settings -s jpegout to enable it") - else: - device.request_jpeg() - elif key == ord('f'): - device.request_af_trigger() - elif key == ord('1'): - device.request_af_mode(depthai.AutofocusMode.AF_MODE_AUTO) - elif key == ord('2'): - device.request_af_mode(depthai.AutofocusMode.AF_MODE_CONTINUOUS_VIDEO) - elif key == ord('q'): - break + tracklets = None + + self.reset_process_wd() + time_start = time() + def print_packet_info_header(): + print('[hostTimestamp streamName] devTstamp seq camSrc width height Bpp') + def print_packet_info(packet, stream_name): + meta = packet.getMetadata() + print("[{:.6f} {:15s}]".format(time()-time_start, stream_name), end='') + if meta is not None: + source = meta.getCameraName() + if stream_name.startswith('disparity') or stream_name.startswith('depth'): + source += '(rectif)' + print(" {:.6f}".format(meta.getTimestamp()), meta.getSequenceNum(), source, end='') + print('', meta.getFrameWidth(), meta.getFrameHeight(), meta.getFrameBytesPP(), end='') + print() + return + + def keypress_handler(self, key, stream_names): + cam_l = depthai.CameraControl.CamId.LEFT + cam_r = depthai.CameraControl.CamId.RIGHT + cmd_ae_region = depthai.CameraControl.Command.AE_REGION + cmd_exp_comp = depthai.CameraControl.Command.EXPOSURE_COMPENSATION + keypress_handler_lut = { + ord('f'): lambda: self.device.request_af_trigger(), + ord('1'): lambda: self.device.request_af_mode(depthai.AutofocusMode.AF_MODE_AUTO), + ord('2'): lambda: self.device.request_af_mode(depthai.AutofocusMode.AF_MODE_CONTINUOUS_VIDEO), + # 5,6,7,8,9,0: short example for using ISP 3A controls + ord('5'): lambda: self.device.send_camera_control(cam_l, cmd_ae_region, '0 0 200 200 1'), + ord('6'): lambda: self.device.send_camera_control(cam_l, cmd_ae_region, '1000 0 200 200 1'), + ord('7'): lambda: self.device.send_camera_control(cam_l, cmd_exp_comp, '-2'), + ord('8'): lambda: self.device.send_camera_control(cam_l, cmd_exp_comp, '+2'), + ord('9'): lambda: self.device.send_camera_control(cam_r, cmd_exp_comp, '-2'), + ord('0'): lambda: self.device.send_camera_control(cam_r, cmd_exp_comp, '+2'), + } + if key in keypress_handler_lut: + keypress_handler_lut[key]() + elif key == ord('c'): + if 'jpegout' in stream_names: + self.device.request_jpeg() + else: + print("'jpegout' stream not enabled. Try settings -s jpegout to enable it") + return + + for stream in stream_names: + if stream in ["disparity", "disparity_color", "depth"]: + cv2.namedWindow(stream) + trackbar_name = 'Disparity confidence' + conf_thr_slider_min = 0 + conf_thr_slider_max = 255 + cv2.createTrackbar(trackbar_name, stream, conf_thr_slider_min, conf_thr_slider_max, self.on_trackbar_change) + cv2.setTrackbarPos(trackbar_name, stream, args['disparity_confidence_threshold']) + + right_rectified = None + pcl_converter = None + + ops = 0 + prevTime = time() + if args['verbose']: print_packet_info_header() + while self.runThread: + # retreive data from the device + # data is stored in packets, there are nnet (Neural NETwork) packets which have additional functions for NNet result interpretation + self.nnet_packets, self.data_packets = p.get_available_nnet_and_data_packets(blocking=True) + + ### Uncomment to print ops + # ops = ops + 1 + # if time() - prevTime > 1.0: + # print('OPS: ', ops) + # ops = 0 + # prevTime = time() + + packets_len = len(self.nnet_packets) + len(self.data_packets) + if packets_len != 0: + self.reset_process_wd() + else: + cur_time=monotonic() + if cur_time > wd_cutoff: + print("process watchdog timeout") + os._exit(10) + + for _, nnet_packet in enumerate(self.nnet_packets): + if args['verbose']: print_packet_info(nnet_packet, 'NNet') + + meta = nnet_packet.getMetadata() + camera = 'rgb' + if meta != None: + camera = meta.getCameraName() + nnet_prev["nnet_source"][camera] = nnet_packet + nnet_prev["entries_prev"][camera] = decode_nn(nnet_packet, config=config, NN_json=NN_json) + frame_count['metaout'] += 1 + frame_count['nn'][camera] += 1 + + for packet in self.data_packets: + window_name = packet.stream_name + if packet.stream_name not in stream_names: + continue # skip streams that were automatically added + if args['verbose']: print_packet_info(packet, packet.stream_name) + packetData = packet.getData() + if packetData is None: + print('Invalid packet data!') + continue + elif packet.stream_name == 'previewout': + meta = packet.getMetadata() + camera = 'rgb' + if meta != None: + camera = meta.getCameraName() + + window_name = 'previewout-' + camera + # the format of previewout image is CHW (Chanel, Height, Width), but OpenCV needs HWC, so we + # change shape (3, 300, 300) -> (300, 300, 3) + data0 = packetData[0,:,:] + data1 = packetData[1,:,:] + data2 = packetData[2,:,:] + frame = cv2.merge([data0, data1, data2]) + if nnet_prev["entries_prev"][camera] is not None: + frame = show_nn(nnet_prev["entries_prev"][camera], frame, NN_json=NN_json, config=config) + if enable_object_tracker and tracklets is not None: + frame = show_tracklets(tracklets, frame, labels) + cv2.putText(frame, "fps: " + str(frame_count_prev[window_name]), (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0)) + cv2.putText(frame, "NN fps: " + str(frame_count_prev['nn'][camera]), (2, frame.shape[0]-4), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 255, 0)) + cv2.imshow(window_name, frame) + elif packet.stream_name in ['left', 'right', 'disparity', 'rectified_left', 'rectified_right']: + frame_bgr = packetData + if args['pointcloud'] and packet.stream_name == 'rectified_right': + right_rectified = packetData + cv2.putText(frame_bgr, packet.stream_name, (25, 25), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0)) + cv2.putText(frame_bgr, "fps: " + str(frame_count_prev[window_name]), (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0)) + camera = None + if args['draw_bb_depth']: + camera = args['cnn_camera'] + if packet.stream_name == 'disparity': + if camera == 'left_right': + camera = 'right' + elif camera != 'rgb': + camera = packet.getMetadata().getCameraName() + if nnet_prev["entries_prev"][camera] is not None: + frame_bgr = show_nn(nnet_prev["entries_prev"][camera], frame_bgr, NN_json=NN_json, config=config, nn2depth=nn2depth) + cv2.imshow(window_name, frame_bgr) + elif packet.stream_name.startswith('depth') or packet.stream_name == 'disparity_color': + frame = packetData + + if len(frame.shape) == 2: + if frame.dtype == np.uint8: # grayscale + cv2.putText(frame, packet.stream_name, (25, 25), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255)) + cv2.putText(frame, "fps: " + str(frame_count_prev[window_name]), (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255)) + else: # uint16 + if args['pointcloud'] and "depth" in stream_names and "rectified_right" in stream_names and right_rectified is not None: + try: + from depthai_helpers.projector_3d import PointCloudVisualizer + except ImportError as e: + raise ImportError(f"\033[1;5;31mError occured when importing PCL projector: {e} \033[0m ") + if pcl_converter is None: + pcl_converter = PointCloudVisualizer(self.device.get_right_intrinsic(), 1280, 720) + right_rectified = cv2.flip(right_rectified, 1) + pcl_converter.rgbd_to_projection(frame, right_rectified) + pcl_converter.visualize_pcd() + + frame = (65535 // frame).astype(np.uint8) + #colorize depth map, comment out code below to obtain grayscale + frame = cv2.applyColorMap(frame, cv2.COLORMAP_HOT) + # frame = cv2.applyColorMap(frame, cv2.COLORMAP_JET) + cv2.putText(frame, packet.stream_name, (25, 25), cv2.FONT_HERSHEY_SIMPLEX, 1.0, 255) + cv2.putText(frame, "fps: " + str(frame_count_prev[window_name]), (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, 255) + else: # bgr + cv2.putText(frame, packet.stream_name, (25, 25), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 255, 255)) + cv2.putText(frame, "fps: " + str(frame_count_prev[window_name]), (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, 255) + + if args['draw_bb_depth']: + camera = args['cnn_camera'] + if camera == 'left_right': + camera = 'right' + if nnet_prev["entries_prev"][camera] is not None: + frame = show_nn(nnet_prev["entries_prev"][camera], frame, NN_json=NN_json, config=config, nn2depth=nn2depth) + cv2.imshow(window_name, frame) + + elif packet.stream_name == 'jpegout': + jpg = packetData + mat = cv2.imdecode(jpg, cv2.IMREAD_COLOR) + cv2.imshow('jpegout', mat) + + elif packet.stream_name == 'video': + videoFrame = packetData + videoFrame.tofile(video_file) + #mjpeg = packetData + #mat = cv2.imdecode(mjpeg, cv2.IMREAD_COLOR) + #cv2.imshow('mjpeg', mat) + elif packet.stream_name == 'color': + meta = packet.getMetadata() + w = meta.getFrameWidth() + h = meta.getFrameHeight() + yuv420p = packetData.reshape( (h * 3 // 2, w) ) + bgr = cv2.cvtColor(yuv420p, cv2.COLOR_YUV2BGR_IYUV) + scale = configMan.getColorPreviewScale() + bgr = cv2.resize(bgr, ( int(w*scale), int(h*scale) ), interpolation = cv2.INTER_AREA) + cv2.putText(bgr, packet.stream_name, (25, 25), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0)) + cv2.putText(bgr, "fps: " + str(frame_count_prev[window_name]), (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0)) + cv2.imshow("color", bgr) + + elif packet.stream_name == 'meta_d2h': + str_ = packet.getDataAsStr() + dict_ = json.loads(str_) + + print('meta_d2h Temp', + ' CSS:' + '{:6.2f}'.format(dict_['sensors']['temperature']['css']), + ' MSS:' + '{:6.2f}'.format(dict_['sensors']['temperature']['mss']), + ' UPA:' + '{:6.2f}'.format(dict_['sensors']['temperature']['upa0']), + ' DSS:' + '{:6.2f}'.format(dict_['sensors']['temperature']['upa1'])) + elif packet.stream_name == 'object_tracker': + tracklets = packet.getObjectTracker() + + frame_count[window_name] += 1 + + t_curr = time() + if t_start + 1.0 < t_curr: + t_start = t_curr + # print("metaout fps: " + str(frame_count_prev["metaout"])) + + stream_windows = [] + for s in stream_names: + if s == 'previewout': + for cam in NN_cams: + stream_windows.append(s + '-' + cam) + frame_count_prev['nn'][cam] = frame_count['nn'][cam] + frame_count['nn'][cam] = 0 + else: + stream_windows.append(s) + for w in stream_windows: + frame_count_prev[w] = frame_count[w] + frame_count[w] = 0 + + key = cv2.waitKey(1) + if key == ord('q'): + break + else: + keypress_handler(self, key, stream_names) -del p # in order to stop the pipeline object should be deleted, otherwise device will continue working. This is required if you are going to add code after the main loop, otherwise you can ommit it. -del device + del p # in order to stop the pipeline object should be deleted, otherwise device will continue working. This is required if you are going to add code after the main loop, otherwise you can ommit it. + del self.device -# Close video output file if was opened -if video_file is not None: - video_file.close() + # Close video output file if was opened + if video_file is not None: + video_file.close() -print('py: DONE.') + print('py: DONE.') +if __name__ == "__main__": + dai = DepthAI() + dai.startLoop() diff --git a/depthai_helpers/age_gender_recognition_handler.py b/depthai_helpers/age_gender_recognition_handler.py index 868a1e4c3..c0eb1ac24 100644 --- a/depthai_helpers/age_gender_recognition_handler.py +++ b/depthai_helpers/age_gender_recognition_handler.py @@ -1,25 +1,41 @@ import cv2 import numpy as np - +import json def decode_age_gender_recognition(nnet_packet, **kwargs): - detections = [] - for _, e in enumerate(nnet_packet.entries()): - if e[1]["female"] > 0.8 or e[1]["male"] > 0.8: - detections.append(e[0]["age"]) - if e[1]["female"] > e[1]["male"]: - detections.append("female") - else: - detections.append("male") - return detections - -def show_age_gender_recognition(entries_prev, frame, **kwargs): - # img_h = frame.shape[0] - # img_w = frame.shape[1] - if len(entries_prev) != 0: - age = (int)(entries_prev[0]*100) + config = kwargs['config'] + + output_list = nnet_packet.getOutputsList() + + age_out = output_list[0] + age = age_out[0,0,0,0] + gender_out = output_list[1] + female_conf = gender_out[0,0,0,0] + male_conf = gender_out[0,1,0,0] + + detection = None + + conf_thr = 0.5 + if female_conf > conf_thr or male_conf > conf_thr: + gender = "male" + if female_conf > male_conf: + gender = "female" + age = (int)(age*100) + detection = dict(gender=gender, age=age) + + return detection + +def decode_age_gender_recognition_json(nnet_packet, **kwargs): + detections = decode_age_gender_recognition(nnet_packet, **kwargs) + return json.dumps(detections) + +def show_age_gender_recognition(decoded_nn, frame, **kwargs): + + if decoded_nn is not None: + gender = decoded_nn["gender"] + age = decoded_nn["age"] cv2.putText(frame, "Age: " + str(age), (0, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) - gender = entries_prev[1] cv2.putText(frame, "G: " + str(gender), (0, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) + frame = cv2.resize(frame, (300, 300)) return frame diff --git a/depthai_helpers/arg_manager.py b/depthai_helpers/arg_manager.py new file mode 100644 index 000000000..110e3b3b6 --- /dev/null +++ b/depthai_helpers/arg_manager.py @@ -0,0 +1,223 @@ +import os +import argparse +try: + import argcomplete +except ImportError: + raise ImportError('\033[1;5;31m argcomplete module not found, run python3 -m pip install -r requirements.txt \033[0m') +from argcomplete.completers import ChoicesCompleter + +from depthai_helpers.cli_utils import cli_print, PrintColors +import consts.resource_paths + + +def _get_immediate_subdirectories(a_dir): + return [name for name in os.listdir(a_dir) + if os.path.isdir(os.path.join(a_dir, name))] + +_stream_choices = ("metaout", "previewout", "jpegout", "left", "right", "depth", "disparity", "disparity_color", + "meta_d2h", "object_tracker", "rectified_left", "rectified_right", "color") +_CNN_choices = _get_immediate_subdirectories(consts.resource_paths.nn_resource_path) +_CNN2_choices = ['landmarks-regression-retail-0009', 'facial-landmarks-35-adas-0002', 'emotions-recognition-retail-0003'] + +def _stream_type(option): + max_fps = None + option_list = option.split(",") + option_args = len(option_list) + if option_args not in [1, 2]: + msg_string = "{0} format is invalid. See --help".format(option) + cli_print(msg_string, PrintColors.WARNING) + raise ValueError(msg_string) + + transition_map = {"depth_raw" : "depth"} + stream_name = option_list[0] + if stream_name in transition_map: + cli_print("Stream option " + stream_name + " is deprecated, use: " + transition_map[stream_name], PrintColors.WARNING) + stream_name = transition_map[stream_name] + + if stream_name not in _stream_choices: + msg_string = "{0} is not in available stream list: \n{1}".format(stream_name, _stream_choices) + cli_print(msg_string, PrintColors.WARNING) + raise ValueError(msg_string) + + if option_args == 1: + stream_dict = {"name": stream_name} + else: + try: + max_fps = float(option_list[1]) + except ValueError: + msg_string = "In option: {0} {1} is not a number!".format(option, option_list[1]) + cli_print(msg_string, PrintColors.WARNING) + + stream_dict = {"name": stream_name, "max_fps": max_fps} + return stream_dict + +class CliArgs: + args = [] + + def __init__(self): + super().__init__() + + def parse_args(self): + epilog_text = """ + Displays video streams captured by DepthAI. + + Example usage: + + ## Show the depth stream: + python3 test.py -s disparity_color,12 + ## Show the depth stream and NN output: + python3 test.py -s metaout previewout,12 disparity_color,12 + """ + parser = argparse.ArgumentParser(epilog=epilog_text, formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument("-co", "--config_overwrite", default=None, + type=str, required=False, + help="JSON-formatted pipeline config object. This will be override defaults used in this " + "script.") + + parser.add_argument("-brd", "--board", default=None, type=str, + help="BW1097, BW1098OBC - Board type from resources/boards/ (not case-sensitive). " + "Or path to a custom .json board config. Mutually exclusive with [-fv -rfv -b -r -w]") + + parser.add_argument("-sh", "--shaves", default=None, type=int, choices=range(1,15), metavar="[1-14]", + help="Number of shaves used by NN.") + + parser.add_argument("-cmx", "--cmx_slices", default=None, type=int, choices=range(1,15), metavar="[1-14]", + help="Number of cmx slices used by NN.") + + parser.add_argument("-nce", "--NN_engines", default=None, type=int, choices=[1, 2], metavar="[1-2]", + help="Number of NN_engines used by NN.") + + parser.add_argument("-mct", "--model-compilation-target", default="auto", + type=str, required=False, choices=["auto","local","cloud"], + help="Compile model lcoally or in cloud?") + + parser.add_argument("-rgbr", "--rgb_resolution", default=1080, type=int, choices=[1080, 2160, 3040], + help="RGB cam res height: (1920x)1080, (3840x)2160 or (4056x)3040. Default: %(default)s") + + parser.add_argument("-rgbf", "--rgb_fps", default=30.0, type=float, + help="RGB cam fps: max 118.0 for H:1080, max 42.0 for H:2160. Default: %(default)s") + + parser.add_argument("-cs", "--color_scale", default=1.0, type=float, + help="Scale factor for 'color' stream preview window. Default: %(default)s") + + parser.add_argument("-monor", "--mono_resolution", default=720, type=int, choices=[400, 720, 800], + help="Mono cam res height: (1280x)720, (1280x)800 or (640x)400 - binning. Default: %(default)s") + + parser.add_argument("-monof", "--mono_fps", default=30.0, type=float, + help="Mono cam fps: max 60.0 for H:720 or H:800, max 120.0 for H:400. Default: %(default)s") + + parser.add_argument("-dct", "--disparity_confidence_threshold", default=200, type=int, + choices=range(0,256), metavar="[0-255]", + help="Disparity_confidence_threshold.") + + parser.add_argument("-med", "--stereo_median_size", default=7, type=int, choices=[0, 3, 5, 7], + help="Disparity / depth median filter kernel size (N x N) . 0 = filtering disabled. Default: %(default)s") + + parser.add_argument("-lrc", "--stereo_lr_check", default=False, action="store_true", + help="Enable stereo 'Left-Right check' feature.") + parser.add_argument("-fv", "--field-of-view", default=None, type=float, + help="Horizontal field of view (HFOV) for the stereo cameras in [deg]. Default: 71.86deg.") + + parser.add_argument("-rfv", "--rgb-field-of-view", default=None, type=float, + help="Horizontal field of view (HFOV) for the RGB camera in [deg]. Default: 68.7938deg.") + + parser.add_argument("-b", "--baseline", default=None, type=float, + help="Left/Right camera baseline in [cm]. Default: 9.0cm.") + + parser.add_argument("-r", "--rgb-baseline", default=None, type=float, + help="Distance the RGB camera is from the Left camera. Default: 2.0cm.") + + parser.add_argument("-w", "--no-swap-lr", dest="swap_lr", default=None, action="store_false", + help="Do not swap the Left and Right cameras.") + + parser.add_argument("-e", "--store-eeprom", default=False, action="store_true", + help="Store the calibration and board_config (fov, baselines, swap-lr) in the EEPROM onboard") + + parser.add_argument("--clear-eeprom", default=False, action="store_true", + help="Invalidate the calib and board_config from EEPROM") + + parser.add_argument("-o", "--override-eeprom", default=False, action="store_true", + help="Use the calib and board_config from host, ignoring the EEPROM data if programmed") + + parser.add_argument("-dev", "--device-id", default="", type=str, + help="USB port number for the device to connect to. Use the word 'list' to show all devices " + "and exit.") + parser.add_argument("-debug", "--dev_debug", nargs="?", default=None, const='', type=str, required=False, + help="Used by board developers for debugging. Can take parameter to device binary") + parser.add_argument("-fusb2", "--force_usb2", default=None, action="store_true", + help="Force usb2 connection") + + parser.add_argument("-cnn", "--cnn_model", default="mobilenet-ssd", type=str, choices=_CNN_choices, + help="Cnn model to run on DepthAI") + + parser.add_argument("-cnn2", "--cnn_model2", default="", type=str, choices=_CNN2_choices, + help="Cnn model to run on DepthAI for second-stage inference") + + parser.add_argument('-cam', "--cnn_camera", default='rgb', + choices=['rgb', 'left', 'right', 'left_right', 'rectified_left', 'rectified_right', 'rectified_left_right'], + help='Choose camera input for CNN (default: %(default)s)') + + parser.add_argument("-dd", "--disable_depth", default=False, action="store_true", + help="Disable depth calculation on CNN models with bounding box output") + + parser.add_argument("-bb", "--draw-bb-depth", default=False, action="store_true", + help="Draw the bounding boxes over the left/right/depth* streams") + + parser.add_argument("-ff", "--full-fov-nn", default=False, action="store_true", + help="Full RGB FOV for NN, not keeping the aspect ratio") + + parser.add_argument("-sync", "--sync-video-meta", default=False, action="store_true", + help="Synchronize 'previewout' and 'metaout' streams") + + parser.add_argument("-seq", "--sync-sequence-numbers", default=False, action="store_true", + help="Synchronize sequence numbers for all packets. Experimental") + + parser.add_argument("-u", "--usb-chunk-KiB", default=64, type=int, + help="USB transfer chunk on device. Higher (up to megabytes) " + "may improve throughput, or 0 to disable chunking. Default: %(default)s") + + parser.add_argument("-fw", "--firmware", default=None, type=str, + help="Commit hash for custom FW, downloaded from Artifactory") + + parser.add_argument("-vv", "--verbose", default=False, action="store_true", + help="Verbose, print info about received packets.") + parser.add_argument("-s", "--streams", + nargs="+", + type=_stream_type, + dest="streams", + default=["metaout", "previewout"], + help=("Define which streams to enable \n" + "Format: stream_name or stream_name,max_fps \n" + "Example: -s metaout previewout \n" + "Example: -s metaout previewout,10 depth_sipp,10"))\ + .completer=ChoicesCompleter(_stream_choices) + + parser.add_argument("-v", "--video", default=None, type=str, required=False, + help="Path where to save video stream (existing file will be overwritten)") + + parser.add_argument("-pcl", "--pointcloud", default=False, action="store_true", + help="Convert Depth map image to point clouds") + + parser.add_argument("-mesh", "--use_mesh", default=False, action="store_true", + help="use mesh for rectification of the stereo images") + + parser.add_argument("-mirror_rectified", "--mirror_rectified", default='true', choices=['true', 'false'], + help="Normally, rectified_left/_right are mirrored for Stereo engine constraints. " + "If false, disparity/depth will be mirrored instead. Default: true") + argcomplete.autocomplete(parser) + + options = parser.parse_args() + any_options_set = any([options.field_of_view, options.rgb_field_of_view, options.baseline, options.rgb_baseline, + options.swap_lr]) + if (options.board is not None) and any_options_set: + parser.error("[-brd] is mutually exclusive with [-fv -rfv -b -r -w]") + + # Set some defaults after the above check + if not options.board: + options.field_of_view = 71.86 + options.rgb_field_of_view = 68.7938 + options.baseline = 9.0 + options.rgb_baseline = 2.0 + options.swap_lr = True + + return options diff --git a/depthai_helpers/calibration_utils.py b/depthai_helpers/calibration_utils.py index 6a4dd863a..f5cc5e72a 100644 --- a/depthai_helpers/calibration_utils.py +++ b/depthai_helpers/calibration_utils.py @@ -5,6 +5,8 @@ import numpy as np import re import time +import consts.resource_paths +import json # Creates a set of 13 polygon coordinates def setPolygonCoordinates(height, width): @@ -75,12 +77,72 @@ def calibrate(self, filepath, square_size, out_filepath, flags): # process images, detect corners, refine and save data self.process_images(filepath) - # run calibration procedure and construct Homography - self.stereo_calibrate() - + # run calibration procedure and construct Homography and mesh + self.stereo_calibrate_two_homography_calib() # save data to binary file - self.H.tofile(out_filepath) + R1_fp32 = self.R1.astype(np.float32) + R2_fp32 = self.R2.astype(np.float32) + M1_fp32 = self.M1.astype(np.float32) + M2_fp32 = self.M2.astype(np.float32) + R_fp32 = self.R.astype(np.float32) + T_fp32 = self.T.astype(np.float32) + M3_fp32 = np.zeros((3, 3), dtype = np.float32) + R_rgb_fp32 = np.zeros((3, 3), dtype = np.float32) + T_rgb_fp32 = np.zeros(3, dtype = np.float32) + d1_coeff_fp32 = self.d1.astype(np.float32) + d2_coeff_fp32 = self.d2.astype(np.float32) + d3_coeff_fp32 = np.zeros(14, dtype = np.float32) + + with open(out_filepath, "wb") as fp: + fp.write(R1_fp32.tobytes()) # goes to left camera + fp.write(R2_fp32.tobytes()) # goes to right camera + fp.write(M1_fp32.tobytes()) # left camera intrinsics + fp.write(M2_fp32.tobytes()) # right camera intrinsics + fp.write(R_fp32.tobytes()) # Rotation matrix left -> right + fp.write(T_fp32.tobytes()) # Translation vector left -> right + fp.write(M3_fp32.tobytes()) # rgb camera intrinsics ## Currently a zero matrix + fp.write(R_rgb_fp32.tobytes()) # Rotation matrix left -> rgb ## Currently Identity matrix + fp.write(T_rgb_fp32.tobytes()) # Translation vector left -> rgb ## Currently vector of zeros + fp.write(d1_coeff_fp32.tobytes()) # distortion coeff of left camera + fp.write(d2_coeff_fp32.tobytes()) # distortion coeff of right camera + fp.write(d3_coeff_fp32.tobytes()) # distortion coeff of rgb camera - currently zeros + + if 0: # Print matrices, to compare with device data + np.set_printoptions(suppress=True, precision=6) + print("\nR1 (left)"); print(R1_fp32) + print("\nR2 (right)"); print(R2_fp32) + print("\nM1 (left)"); print(M1_fp32) + print("\nM2 (right)"); print(M2_fp32) + print("\nR"); print(R_fp32) + print("\nT"); print(T_fp32) + print("\nM3 (rgb)"); print(M3_fp32) + + if 0: # Print computed homography, to compare with device data + np.set_printoptions(suppress=True, precision=6) + for res_height in [800, 720, 400]: + m1 = np.copy(M1_fp32) + m2 = np.copy(M2_fp32) + if res_height == 720: + m1[1,2] -= 40 + m2[1,2] -= 40 + if res_height == 400: + m_scale = [[0.5, 0, 0], + [ 0, 0.5, 0], + [ 0, 0, 1]] + m1 = np.matmul(m_scale, m1) + m2 = np.matmul(m_scale, m2) + h1 = np.matmul(np.matmul(m2, R1_fp32), np.linalg.inv(m1)) + h2 = np.matmul(np.matmul(m2, R2_fp32), np.linalg.inv(m2)) + h1 = np.linalg.inv(h1) + h2 = np.linalg.inv(h2) + print('\nHomography H1, H2 for height =', res_height) + print(h1) + print() + print(h2) + + # self.create_save_mesh() + # append specific flags to file with open(out_filepath, "ab") as fp: fp.write(bytearray(flags)) @@ -88,8 +150,10 @@ def calibrate(self, filepath, square_size, out_filepath, flags): print("Calibration file written to %s." % (out_filepath)) print("\tTook %i seconds to run image processing." % (round(time.time() - start_time, 2))) # show debug output for visual inspection - print("\nRectifying dataset for visual inspection") - self.show_rectified_images(filepath, out_filepath) + print("\nRectifying dataset for visual inspection using Mesh") + self.show_rectified_images_two_calib(filepath, False) + print("\nRectifying dataset for visual inspection using Two Homography") + self.show_rectified_images_two_calib(filepath, True) def process_images(self, filepath): """Read images, detect corners, refine corners, and save data.""" @@ -112,10 +176,14 @@ def process_images(self, filepath): assert len(images_left) != 0, "ERROR: Images not read correctly, check directory" assert len(images_right) != 0, "ERROR: Images not read correctly, check directory" + self.temp_img_r_point_list = [] + self.temp_img_l_point_list = [] + for image_left, image_right in zip(images_left, images_right): img_l = cv2.imread(image_left, 0) img_r = cv2.imread(image_right, 0) + assert img_l is not None, "ERROR: Images not read correctly" assert img_r is not None, "ERROR: Images not read correctly" @@ -142,6 +210,8 @@ def process_images(self, filepath): rt = cv2.cornerSubPix(img_r, corners_r, (5, 5), (-1, -1), self.criteria) self.imgpoints_r.append(corners_r) + self.temp_img_l_point_list.append([corners_l]) + self.temp_img_r_point_list.append([corners_r]) self.calib_successes.append(polygon_from_image_name(image_left)) print("\t[OK]. Took %i seconds." % (round(time.time() - start_time, 2))) else: @@ -166,7 +236,7 @@ def ensure_valid_images(self): raise AssertionError("Missing valid image sets for %i polygons. Re-run calibration with the\n'-p %s' argument to re-capture images for these polygons." % (len(missing), arg_value)) else: return True - + def stereo_calibrate(self): """Calibrate camera and construct Homography.""" # init camera calibrations @@ -203,10 +273,7 @@ def stereo_calibrate(self): # construct Homography plane_depth = 40000000.0 # arbitrary plane depth #TODO: Need to understand effect of plane_depth. Why does this improve some boards' cals? - n = np.array([[0.0], [0.0], [-1.0]]) - d_inv = 1.0 / plane_depth - H = (R - d_inv * np.dot(T, n.transpose())) - self.H = np.dot(self.M2, np.dot(H, np.linalg.inv(self.M1))) + self.H = np.dot(self.M2, np.dot(R, np.linalg.inv(self.M1))) self.H /= self.H[2, 2] # rectify Homography for right camera disparity = (self.M1[0, 0] * T[0] / plane_depth) @@ -215,6 +282,506 @@ def stereo_calibrate(self): print("Rectifying Homography...") print(self.H) + def stereo_calibrate_two_homography_calib(self): + """Calibrate camera and construct Homography.""" + # init camera calibrations + rt, self.M1, self.d1, self.r1, self.t1 = cv2.calibrateCamera( + self.objpoints, self.imgpoints_l, self.img_shape, None, None) + rt, self.M2, self.d2, self.r2, self.t2 = cv2.calibrateCamera( + self.objpoints, self.imgpoints_r, self.img_shape, None, None) + + # config + flags = 0 + #flags |= cv2.CALIB_FIX_ASPECT_RATIO + flags |= cv2.CALIB_USE_INTRINSIC_GUESS + #flags |= cv2.CALIB_SAME_FOCAL_LENGTH + #flags |= cv2.CALIB_ZERO_TANGENT_DIST + flags |= cv2.CALIB_RATIONAL_MODEL + #flags |= cv2.CALIB_FIX_K1 + #flags |= cv2.CALIB_FIX_K2 + #flags |= cv2.CALIB_FIX_K3 + #flags |= cv2.CALIB_FIX_K4 + #flags |= cv2.CALIB_FIX_K5 + #flags |= cv2.CALIB_FIX_K6 + #flags |= cv::CALIB_ZERO_TANGENT_DIST + + stereocalib_criteria = (cv2.TERM_CRITERIA_COUNT + + cv2.TERM_CRITERIA_EPS, 100, 1e-5) + + # stereo calibration procedure + ret, self.M1, self.d1, self.M2, self.d2, self.R, self.T, E, F = cv2.stereoCalibrate( + self.objpoints, self.imgpoints_l, self.imgpoints_r, + self.M1, self.d1, self.M2, self.d2, self.img_shape, + criteria=stereocalib_criteria, flags=flags) + + self.R1, self.R2, self.P1, self.P2, self.Q, validPixROI1, validPixROI2 = cv2.stereoRectify( + self.M1, + self.d1, + self.M2, + self.d2, + self.img_shape, self.R, self.T) + + self.H1 = np.matmul(np.matmul(self.M2, self.R1), np.linalg.inv(self.M1)) + self.H2 = np.matmul(np.matmul(self.M2, self.R2), np.linalg.inv(self.M2)) + + + def create_save_mesh(self): #, output_path): + + map_x_l, map_y_l = cv2.initUndistortRectifyMap(self.M1, self.d1, self.R1, self.M2, self.img_shape, cv2.CV_32FC1) + map_x_r, map_y_r = cv2.initUndistortRectifyMap(self.M2, self.d2, self.R2, self.M2, self.img_shape, cv2.CV_32FC1) + print("Distortion coeff left cam") + print(self.d1) + print("Distortion coeff right cam ") + print(self.d2) + + # print(str(type(map_x_l))) + # map_x_l.tofile(consts.resource_paths.left_mesh_fpath) + # map_y_l.tofile(out_filepath) + # map_x_r.tofile(out_filepath) + # map_y_r.tofile(out_filepath) + + map_x_l_fp32 = map_x_l.astype(np.float32) + map_y_l_fp32 = map_y_l.astype(np.float32) + map_x_r_fp32 = map_x_r.astype(np.float32) + map_y_r_fp32 = map_y_r.astype(np.float32) + + with open(consts.resource_paths.left_mesh_fpath, "ab") as fp: + fp.write(map_x_l_fp32.tobytes()) + fp.write(map_y_l_fp32.tobytes()) + + with open(consts.resource_paths.right_mesh_fpath, "ab") as fp: + fp.write(map_x_r_fp32.tobytes()) + fp.write(map_y_r_fp32.tobytes()) + + print("shape of maps") + print(map_x_l.shape) + print(map_y_l.shape) + print(map_x_r.shape) + print(map_y_r.shape) + + + + # meshCellSize = 16 + # mesh_left = [] + # mesh_right = [] + + # for y in range(map_x_l.shape[0] + 1): + # if y % meshCellSize == 0: + # row_left = [] + # row_right = [] + # for x in range(map_x_l.shape[1] + 1): + # if x % meshCellSize == 0: + # if y == map_x_l.shape[0] and x == map_x_l.shape[1]: + # row_left.append(map_y_l[y - 1, x - 1]) + # row_left.append(map_x_l[y - 1, x - 1]) + # row_right.append(map_y_r[y - 1, x - 1]) + # row_right.append(map_x_r[y - 1, x - 1]) + # elif y == map_x_l.shape[0]: + # row_left.append(map_y_l[y - 1, x]) + # row_left.append(map_x_l[y - 1, x]) + # row_right.append(map_y_r[y - 1, x]) + # row_right.append(map_x_r[y - 1, x]) + # elif x == map_x_l.shape[1]: + # row_left.append(map_y_l[y, x - 1]) + # row_left.append(map_x_l[y, x - 1]) + # row_right.append(map_y_r[y, x - 1]) + # row_right.append(map_x_r[y, x - 1]) + # else: + # row_left.append(map_y_l[y, x]) + # row_left.append(map_x_l[y, x]) + # row_right.append(map_y_r[y, x]) + # row_right.append(map_x_r[y, x]) + # if (map_x_l.shape[1] % meshCellSize) % 2 != 0: + # row_left.append(0) + # row_left.append(0) + # row_right.append(0) + # row_right.append(0) + + # mesh_left.append(row_left) + # mesh_right.append(row_right) + + # mesh_left = np.array(mesh_left) + # mesh_right = np.array(mesh_right) + # mesh_left.tofile(consts.resource_paths.left_mesh_fpath) + # mesh_right.tofile(consts.resource_paths.right_mesh_fpath) + + def show_rectified_images_two_calib(self, dataset_dir, use_homo): + images_left = glob.glob(dataset_dir + '/left/*.png') + images_right = glob.glob(dataset_dir + '/right/*.png') + images_left.sort() + images_right.sort() + assert len(images_left) != 0, "ERROR: Images not read correctly" + assert len(images_right) != 0, "ERROR: Images not read correctly" + + if not use_homo: + mapx_l, mapy_l = cv2.initUndistortRectifyMap(self.M1, self.d1, self.R1, self.P1, self.img_shape, cv2.CV_32FC1) + mapx_r, mapy_r = cv2.initUndistortRectifyMap(self.M2, self.d2, self.R2, self.P2, self.img_shape, cv2.CV_32FC1) + # mapx_l, mapy_l = self.rectify_map(self.M1, self.d1[0], self.R1) + # mapx_r, mapy_r = self.rectify_map(self.M2, self.d2[0], self.R2) + + image_data_pairs = [] + for image_left, image_right in zip(images_left, images_right): + # read images + img_l = cv2.imread(image_left, 0) + img_r = cv2.imread(image_right, 0) + if not use_homo: + img_l = cv2.remap(img_l, mapx_l, mapy_l, cv2.INTER_LINEAR) + img_r = cv2.remap(img_r, mapx_r, mapy_r, cv2.INTER_LINEAR) + else: + # img_l = cv2.undistort(img_l, self.M1, self.d1, None, self.M1) + # img_r = cv2.undistort(img_r, self.M2, self.d2, None, self.M2) + + # warp right image + img_l = cv2.warpPerspective(img_l, self.H1, img_l.shape[::-1], + cv2.INTER_CUBIC + + cv2.WARP_FILL_OUTLIERS + + cv2.WARP_INVERSE_MAP) + + img_r = cv2.warpPerspective(img_r, self.H2, img_r.shape[::-1], + cv2.INTER_CUBIC + + cv2.WARP_FILL_OUTLIERS + + cv2.WARP_INVERSE_MAP) + + + image_data_pairs.append((img_l, img_r)) + + # compute metrics + imgpoints_r = [] + imgpoints_l = [] + for image_data_pair in image_data_pairs: + flags = 0 + flags |= cv2.CALIB_CB_ADAPTIVE_THRESH + flags |= cv2.CALIB_CB_NORMALIZE_IMAGE + flags |= cv2.CALIB_CB_FAST_CHECK + ret_l, corners_l = cv2.findChessboardCorners(image_data_pair[0], + (9, 6), flags) + ret_r, corners_r = cv2.findChessboardCorners(image_data_pair[1], + (9, 6), flags) + + # termination criteria + self.criteria = (cv2.TERM_CRITERIA_MAX_ITER + + cv2.TERM_CRITERIA_EPS, 10, 0.05) + + # if corners are found in both images, refine and add data + if ret_l and ret_r: + rt = cv2.cornerSubPix(image_data_pair[0], corners_l, (5, 5), + (-1, -1), self.criteria) + rt = cv2.cornerSubPix(image_data_pair[1], corners_r, (5, 5), + (-1, -1), self.criteria) + imgpoints_l.extend(corners_l) + imgpoints_r.extend(corners_r) + # epi_error_sum = 0 + # for l_pt, r_pt in zip(corners_l, corners_r): + # epi_error_sum += abs(l_pt[0][1] - r_pt[0][1]) + + # print("Average Epipolar Error per image on host: " + str(epi_error_sum / len(corners_l))) + + epi_error_sum = 0 + for l_pt, r_pt in zip(imgpoints_l, imgpoints_r): + epi_error_sum += abs(l_pt[0][1] - r_pt[0][1]) + + print("Average Epipolar Error: " + str(epi_error_sum / len(imgpoints_r))) + + for image_data_pair in image_data_pairs: + img_concat = cv2.hconcat([image_data_pair[0], image_data_pair[1]]) + img_concat = cv2.cvtColor(img_concat, cv2.COLOR_GRAY2RGB) + + # draw epipolar lines for debug purposes + line_row = 0 + while line_row < img_concat.shape[0]: + cv2.line(img_concat, + (0, line_row), (img_concat.shape[1], line_row), + (0, 255, 0), 1) + line_row += 30 + + # show image + print("Displaying Stereo Pair for visual inspection. Press the [ESC] key to exit.") + while (1): + cv2.imshow('Stereo Pair', img_concat) + k = cv2.waitKey(33) + if k == 32: # Esc key to stop + break + elif k == 27: + os._exit(0) + # raise SystemExit() + elif k == -1: # normally -1 returned,so don't print it + continue + + + def rectify_map(self, M, d, R): + fx = M[0,0] + fy = M[1,1] + u0 = M[0,2] + v0 = M[1,2] + # distortion coefficients + print("distortion coeff") + print(d) + print(d.shape) + k1 = d[0] + k2 = d[1] + p1 = d[2] + p2 = d[3] + k3 = d[4] + s4 = d[11] + k4 = d[5] + k5 = d[6] + k6 = d[7] + s1 = d[8] + s2 = d[9] + s3 = d[10] + tauX = d[12] + tauY = d[13] + + + + matTilt = np.identity(3, dtype=np.float32) + ir = np.linalg.inv(np.matmul(self.M2, R)) ## Change it to using LU + # s_x = [] ## NOT USED + # s_y = [] + # s_w = [] + # for i in range(8): + # s_x.append(ir[0,0] * i) + # s_y.append(ir[1,0] * i) + # s_w.append(ir[2,0] * i) + map_x = np.zeros((800,1280),dtype=np.float32) + map_y = np.zeros((800,1280),dtype=np.float32) + + for i in range(800): + _x = i * ir[0,1] + ir[0,2] + _y = i * ir[1,1] + ir[1,2] + _w = i * ir[2,1] + ir[2,2] + + for j in range(1280): + _x += ir[0,0] + _y += ir[1,0] + _w += ir[2,0] + w = 1/_w + x = _x * w + y = _y * w + + x2 = x*x + y2 = y*y + r2 = x2 + y2 + _2xy = 2*x*y + kr = (1 + ((k3*r2 + k2)*r2 + k1)*r2)/(1 + ((k6*r2 + k5)*r2 + k4)*r2) + xd = (x*kr + p1*_2xy + p2*(r2 + 2*x2) + s1*r2+s2*r2*r2) + yd = (y*kr + p1*(r2 + 2*y2) + p2*_2xy + s3*r2+s4*r2*r2) + vec_3d = np.array([xd,yd,1]).reshape(3,1) + vecTilt = np.matmul(matTilt, vec_3d); + # Vec3d vecTilt = matTilt*cv::Vec3d(xd, yd, 1); + invProj = 1./vecTilt[2] if vecTilt[2] else 1 + # double invProj = vecTilt(2) ? 1./vecTilt(2) : 1; + # double u = fx*invProj*vecTilt(0) + u0; + # double v = fy*invProj*vecTilt(1) + v0; + u = fx*invProj*vecTilt[0] + u0; # u0 and v0 is from the M1 + v = fy*invProj*vecTilt[1] + v0; + map_x[i,j] = u + map_y[i,j] = v + print("map built") + return map_x, map_y + + + def stereo_calibrate_two_homography_uncalib(self): + """Calibrate camera and construct Homography.""" + # init camera calibrations + rt, self.M1, self.d1, self.r1, self.t1 = cv2.calibrateCamera( + self.objpoints, self.imgpoints_l, self.img_shape, None, None) + rt, self.M2, self.d2, self.r2, self.t2 = cv2.calibrateCamera( + self.objpoints, self.imgpoints_r, self.img_shape, None, None) + + # config + flags = 0 + #flags |= cv2.CALIB_FIX_ASPECT_RATIO + flags |= cv2.CALIB_USE_INTRINSIC_GUESS + #flags |= cv2.CALIB_SAME_FOCAL_LENGTH + #flags |= cv2.CALIB_ZERO_TANGENT_DIST + flags |= cv2.CALIB_RATIONAL_MODEL + #flags |= cv2.CALIB_FIX_K1 + #flags |= cv2.CALIB_FIX_K2 + #flags |= cv2.CALIB_FIX_K3 + #flags |= cv2.CALIB_FIX_K4 + #flags |= cv2.CALIB_FIX_K5 + #flags |= cv2.CALIB_FIX_K6 + stereocalib_criteria = (cv2.TERM_CRITERIA_COUNT + + cv2.TERM_CRITERIA_EPS, 100, 1e-5) + + # stereo calibration procedure + ret, self.M1, self.d1, self.M2, self.d2, R, T, E, F = cv2.stereoCalibrate( + self.objpoints, self.imgpoints_l, self.imgpoints_r, + self.M1, self.d1, self.M2, self.d2, self.img_shape, + criteria=stereocalib_criteria, flags=flags) + + assert ret < 1.0, "[ERROR] Calibration RMS error < 1.0 (%i). Re-try image capture." % (ret) + print("[OK] Calibration successful w/ RMS error=" + str(ret)) + + F_test, mask = cv2.findFundamentalMat(np.array(self.imgpoints_l).reshape(-1, 2), + np.array(self.imgpoints_r).reshape(-1, 2), + cv2.FM_RANSAC, 2) # try ransac and other methods too. + + res, self.H1, self.H2 = cv2.stereoRectifyUncalibrated(np.array(self.imgpoints_l).reshape(-1, 2), + np.array(self.imgpoints_r).reshape(-1, 2), + F_test, + self.img_shape, 2) + + def test_img_vis(self, dataset_dir): + images_left = glob.glob(dataset_dir + '/left/*.png') + images_right = glob.glob(dataset_dir + '/right/*.png') + images_left.sort() + images_right.sort() + + map1_l, map2_l = cv2.initUndistortRectifyMap(self.M1, self.d1, self.R1, self.M1, self.img_shape, cv2.CV_32FC1) + map1_r, map2_r = cv2.initUndistortRectifyMap(self.M2, self.d2, self.R2, self.M2, self.img_shape, cv2.CV_32FC1) + + map1_l_review, map2_l_review = self.rectify_map(self.M1, self.d1[0], self.R1) + map1_r_review, map2_r_review = self.rectify_map(self.M2, self.d2[0], self.R2) + + image_data_pairs = [] + for image_left, image_right in zip(images_left, images_right): + + img_l = cv2.imread(image_left, 0) + img_r = cv2.imread(image_right, 0) + + img_l_cv2 = cv2.remap(img_l, map1_l, map2_l, cv2.INTER_CUBIC) + img_r_cv2 = cv2.remap(img_r, map1_r, map2_r, cv2.INTER_CUBIC) + + img_l_review = cv2.remap(img_l, map1_l_review, map2_l_review, cv2.INTER_CUBIC) + img_r_review = cv2.remap(img_r, map1_r_review, map2_r_review, cv2.INTER_CUBIC) + img_concat = cv2.hconcat([img_l_review, img_l_cv2]) + img_concat = cv2.cvtColor(img_concat, cv2.COLOR_GRAY2RGB) + while(1): + cv2.imshow('Stereo Pair', img_concat) + k = cv2.waitKey(33) + if k == 32: + break + elif k == 27: # Esc key to stop + break + # raise SystemExit() + elif k == -1: # normally -1 returned,so don't print it + continue + img_concat = cv2.hconcat([img_r_review, img_r_cv2]) + img_concat = cv2.cvtColor(img_concat, cv2.COLOR_GRAY2RGB) + while(1): + cv2.imshow('Stereo Pair', img_concat) + k = cv2.waitKey(33) + if k == 32: + break + elif k == 27: # Esc key to stop + break + # raise SystemExit() + elif k == -1: # normally -1 returned,so don't print it + continue + + + def show_rectified_images_two_uncalib(self, dataset_dir): + images_left = glob.glob(dataset_dir + '/left/*.png') + images_right = glob.glob(dataset_dir + '/right/*.png') + images_left.sort() + images_right.sort() + + assert len(images_left) != 0, "ERROR: Images not read correctly" + assert len(images_right) != 0, "ERROR: Images not read correctly" + + # H = np.fromfile(calibration_file, dtype=np.float32).reshape((3, 3)) + + # print("Using Homography from file, with values: ") + # print(H) + + # H1 = np.linalg.inv(H1) + # H2 = np.linalg.inv(H2) + R1 = np.matmul(np.matmul(np.linalg.inv(self.M1), self.H1), self.M1) + R2 = np.matmul(np.matmul(np.linalg.inv(self.M2), self.H2), self.M2) + img_l = cv2.imread(images_left[0], 0) + map1_l, map2_l = cv2.initUndistortRectifyMap(self.M1, self.d1, R1, self.M1, self.img_shape, cv2.CV_32FC1) + map1_r, map2_r = cv2.initUndistortRectifyMap(self.M2, self.d2, R2, self.M2, self.img_shape, cv2.CV_32FC1) + + + + image_data_pairs = [] + for image_left, image_right in zip(images_left, images_right): + # read images + img_l = cv2.imread(image_left, 0) + img_r = cv2.imread(image_right, 0) + # img_l = cv2.remap(img_l, map1_l, map2_l, cv2.INTER_CUBIC) + # img_r = cv2.remap(img_r, map1_r, map2_r, cv2.INTER_CUBIC) + + # cv2.initUndistortRectifyMap(self.M1) + + + # # warp right image + img_l = cv2.warpPerspective(img_l, H1, img_l.shape[::-1], + cv2.INTER_CUBIC + + cv2.WARP_FILL_OUTLIERS + + cv2.WARP_INVERSE_MAP) + + img_r = cv2.warpPerspective(img_r, H2, img_r.shape[::-1], + cv2.INTER_CUBIC + + cv2.WARP_FILL_OUTLIERS + + cv2.WARP_INVERSE_MAP) + + + image_data_pairs.append((img_l, img_r)) + + # compute metrics + imgpoints_r = [] + imgpoints_l = [] + for image_data_pair in image_data_pairs: + flags = 0 + flags |= cv2.CALIB_CB_ADAPTIVE_THRESH + flags |= cv2.CALIB_CB_NORMALIZE_IMAGE + ret_l, corners_l = cv2.findChessboardCorners(image_data_pair[0], + (9, 6), flags) + ret_r, corners_r = cv2.findChessboardCorners(image_data_pair[1], + (9, 6), flags) + + # termination criteria + self.criteria = (cv2.TERM_CRITERIA_MAX_ITER + + cv2.TERM_CRITERIA_EPS, 30, 0.001) + + # if corners are found in both images, refine and add data + if ret_l and ret_r: + rt = cv2.cornerSubPix(image_data_pair[0], corners_l, (5, 5), + (-1, -1), self.criteria) + rt = cv2.cornerSubPix(image_data_pair[1], corners_r, (5, 5), + (-1, -1), self.criteria) + imgpoints_l.extend(corners_l) + imgpoints_r.extend(corners_r) + + epi_error_sum = 0 + for l_pt, r_pt in zip(imgpoints_l, imgpoints_r): + epi_error_sum += abs(l_pt[0][1] - r_pt[0][1]) + + print("Average Epipolar Error: " + str(epi_error_sum / len(imgpoints_r))) + + for image_data_pair in image_data_pairs: + img_concat = cv2.hconcat([image_data_pair[0], image_data_pair[1]]) + img_concat = cv2.cvtColor(img_concat, cv2.COLOR_GRAY2RGB) + + # draw epipolar lines for debug purposes + line_row = 0 + while line_row < img_concat.shape[0]: + cv2.line(img_concat, + (0, line_row), (img_concat.shape[1], line_row), + (0, 255, 0), 1) + line_row += 30 + + # show image + print("Displaying Stereo Pair for visual inspection. Press the [ESC] key to exit.") + while (1): + cv2.imshow('Stereo Pair', img_concat) + k = cv2.waitKey(33) + if k == 32: # Esc key to stop + break + elif k == 27: + os._exit(0) + # raise SystemExit() + elif k == -1: # normally -1 returned,so don't print it + continue + + + + + + def show_rectified_images(self, dataset_dir, calibration_file): images_left = glob.glob(dataset_dir + '/left/*.png') images_right = glob.glob(dataset_dir + '/right/*.png') @@ -258,7 +825,7 @@ def show_rectified_images(self, dataset_dir, calibration_file): # termination criteria self.criteria = (cv2.TERM_CRITERIA_MAX_ITER + - cv2.TERM_CRITERIA_EPS, 30, 0.001) + cv2.TERM_CRITERIA_EPS, 10, 0.05) # if corners are found in both images, refine and add data if ret_l and ret_r: @@ -268,6 +835,8 @@ def show_rectified_images(self, dataset_dir, calibration_file): (-1, -1), self.criteria) imgpoints_l.extend(corners_l) imgpoints_r.extend(corners_r) + epi_error_sum = 0 + epi_error_sum = 0 for l_pt, r_pt in zip(imgpoints_l, imgpoints_r): diff --git a/depthai_helpers/cli_utils.py b/depthai_helpers/cli_utils.py index 950f0f104..254252d97 100644 --- a/depthai_helpers/cli_utils.py +++ b/depthai_helpers/cli_utils.py @@ -1,57 +1,9 @@ #!/usr/bin/env python3 -import argparse -try: - import argcomplete -except ImportError: - raise ImportError('argcomplete module not found, run python3 -m pip install -r requirements.txt ') -from argcomplete.completers import ChoicesCompleter - from enum import Enum import os import consts.resource_paths -def _get_immediate_subdirectories(a_dir): - return [name for name in os.listdir(a_dir) - if os.path.isdir(os.path.join(a_dir, name))] - -_stream_choices = ("metaout", "previewout", "jpegout", "left", "right", "depth_raw", "disparity", "disparity_color", "meta_d2h", "object_tracker") -_CNN_choices = _get_immediate_subdirectories(consts.resource_paths.nn_resource_path) - -def _stream_type(option): - max_fps = None - option_list = option.split(",") - option_args = len(option_list) - if option_args not in [1, 2]: - msg_string = "{0} format is invalid. See --help".format(option) - cli_print(msg_string, PrintColors.WARNING) - raise ValueError(msg_string) - - deprecated_choices = ("depth_sipp", "depth_color_h") - transition_map = {"depth_sipp" : "disparity_color", "depth_color_h" : "disparity_color"} - stream_name = option_list[0] - if stream_name in deprecated_choices: - cli_print("Stream option " + stream_name + " is deprecated, use: " + transition_map[stream_name], PrintColors.WARNING) - stream_name = transition_map[stream_name] - - if stream_name not in _stream_choices: - msg_string = "{0} is not in available stream list: \n{1}".format(stream_name, _stream_choices) - cli_print(msg_string, PrintColors.WARNING) - raise ValueError(msg_string) - - if option_args == 1: - stream_dict = {"name": stream_name} - else: - try: - max_fps = float(option_list[1]) - except ValueError: - msg_string = "In option: {0} {1} is not a number!".format(option, option_list[1]) - cli_print(msg_string, PrintColors.WARNING) - - stream_dict = {"name": stream_name, "max_fps": max_fps} - return stream_dict - - class RangeFloat(object): def __init__(self, start, end): self.start = start @@ -80,7 +32,6 @@ class PrintColors(Enum): BOLD = "\033[1m" UNDERLINE = "\033[4m" - def cli_print(msg, print_color): """ Prints to console with input print color type @@ -88,135 +39,3 @@ def cli_print(msg, print_color): if not isinstance(print_color, PrintColors): raise ValueError("Must use PrintColors type in cli_print") print("{0}{1}{2}".format(print_color.value, msg, PrintColors.ENDC.value)) - - -def parse_args(): - epilog_text = """ - Displays video streams captured by DepthAI. - - Example usage: - - ## Show the depth stream: - python3 test.py -s disparity_color,12 - ## Show the depth stream and NN output: - python3 test.py -s metaout previewout,12 disparity_color,12 - """ - parser = argparse.ArgumentParser(epilog=epilog_text, formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument("-co", "--config_overwrite", default=None, - type=str, required=False, - help="JSON-formatted pipeline config object. This will be override defaults used in this " - "script.") - - parser.add_argument("-brd", "--board", default=None, type=str, - help="BW1097, BW1098OBC - Board type from resources/boards/ (not case-sensitive). " - "Or path to a custom .json board config. Mutually exclusive with [-fv -rfv -b -r -w]") - - parser.add_argument("-sh", "--shaves", default=None, type=int, choices=range(1,15), metavar="[1-14]", - help="Number of shaves used by NN.") - - parser.add_argument("-cmx", "--cmx_slices", default=None, type=int, choices=range(1,15), metavar="[1-14]", - help="Number of cmx slices used by NN.") - - parser.add_argument("-nce", "--NN_engines", default=None, type=int, choices=range(0,3), metavar="[0-2]", - help="Number of NN_engines used by NN.") - - parser.add_argument("-rgbr", "--rgb_resolution", default=1080, type=int, choices=[1080, 2160, 3040], - help="RGB cam res height: (1920x)1080, (3840x)2160 or (4056x)3040. Default: %(default)s") - - parser.add_argument("-rgbf", "--rgb_fps", default=30.0, type=float, - help="RGB cam fps: max 118.0 for H:1080, max 42.0 for H:2160. Default: %(default)s") - - parser.add_argument("-monor", "--mono_resolution", default=720, type=int, choices=[400, 720, 800], - help="Mono cam res height: (1280x)720, (1280x)800 or (640x)400 - binning. Default: %(default)s") - - parser.add_argument("-monof", "--mono_fps", default=30.0, type=float, - help="Mono cam fps: max 60.0 for H:720 or H:800, max 120.0 for H:400. Default: %(default)s") - - parser.add_argument("-dct", "--disparity_confidence_threshold", default=200, type=int, - choices=range(0,256), metavar="[0-255]", - help="Disparity_confidence_threshold.") - - parser.add_argument("-fv", "--field-of-view", default=None, type=float, - help="Horizontal field of view (HFOV) for the stereo cameras in [deg]. Default: 71.86deg.") - - parser.add_argument("-rfv", "--rgb-field-of-view", default=None, type=float, - help="Horizontal field of view (HFOV) for the RGB camera in [deg]. Default: 68.7938deg.") - - parser.add_argument("-b", "--baseline", default=None, type=float, - help="Left/Right camera baseline in [cm]. Default: 9.0cm.") - - parser.add_argument("-r", "--rgb-baseline", default=None, type=float, - help="Distance the RGB camera is from the Left camera. Default: 2.0cm.") - - parser.add_argument("-w", "--no-swap-lr", dest="swap_lr", default=None, action="store_false", - help="Do not swap the Left and Right cameras.") - - parser.add_argument("-e", "--store-eeprom", default=False, action="store_true", - help="Store the calibration and board_config (fov, baselines, swap-lr) in the EEPROM onboard") - - parser.add_argument("--clear-eeprom", default=False, action="store_true", - help="Invalidate the calib and board_config from EEPROM") - - parser.add_argument("-o", "--override-eeprom", default=False, action="store_true", - help="Use the calib and board_config from host, ignoring the EEPROM data if programmed") - - parser.add_argument("-dev", "--device-id", default="", type=str, - help="USB port number for the device to connect to. Use the word 'list' to show all devices " - "and exit.") - parser.add_argument("-debug", "--dev_debug", nargs="?", default=None, const='', type=str, required=False, - help="Used by board developers for debugging. Can take parameter to device binary") - parser.add_argument("-fusb2", "--force_usb2", default=None, action="store_true", - help="Force usb2 connection") - - parser.add_argument("-cnn", "--cnn_model", default="mobilenet-ssd", type=str, choices=_CNN_choices, - help="Cnn model to run on DepthAI") - - parser.add_argument("-cnn2", "--cnn_model2", default="", type=str, choices=_CNN_choices, - help="Cnn model to run on DepthAI for second-stage inference") - - parser.add_argument('-cam', "--cnn_camera", default='rgb', choices=['rgb', 'left', 'right', 'left_right'], - help='Choose camera input for CNN (default: %(default)s)') - - parser.add_argument("-dd", "--disable_depth", default=False, action="store_true", - help="Disable depth calculation on CNN models with bounding box output") - - parser.add_argument("-bb", "--draw-bb-depth", default=False, action="store_true", - help="Draw the bounding boxes over the left/right/depth* streams") - - parser.add_argument("-ff", "--full-fov-nn", default=False, action="store_true", - help="Full RGB FOV for NN, not keeping the aspect ratio") - - parser.add_argument("-sync", "--sync-video-meta", default=False, action="store_true", - help="Synchronize 'previewout' and 'metaout' streams") - - parser.add_argument("-s", "--streams", - nargs="+", - type=_stream_type, - dest="streams", - default=["metaout", "previewout"], - help=("Define which streams to enable \n" - "Format: stream_name or stream_name,max_fps \n" - "Example: -s metaout previewout \n" - "Example: -s metaout previewout,10 depth_sipp,10"))\ - .completer=ChoicesCompleter(_stream_choices) - - parser.add_argument("-v", "--video", default=None, type=str, required=False, - help="Path where to save video stream (existing file will be overwritten)") - - argcomplete.autocomplete(parser) - - options = parser.parse_args() - any_options_set = any([options.field_of_view, options.rgb_field_of_view, options.baseline, options.rgb_baseline, - options.swap_lr]) - if (options.board is not None) and any_options_set: - parser.error("[-brd] is mutually exclusive with [-fv -rfv -b -r -w]") - - # Set some defaults after the above check - if not options.board: - options.field_of_view = 71.86 - options.rgb_field_of_view = 68.7938 - options.baseline = 9.0 - options.rgb_baseline = 2.0 - options.swap_lr = True - - return options diff --git a/depthai_helpers/config_manager.py b/depthai_helpers/config_manager.py new file mode 100644 index 000000000..85b0e18ac --- /dev/null +++ b/depthai_helpers/config_manager.py @@ -0,0 +1,441 @@ +import os +import json +import platform +import subprocess +from pathlib import Path +import consts.resource_paths +import urllib.request + +from depthai_helpers import utils +from model_compiler.model_compiler import download_and_compile_NN_model +from consts.resource_paths import nn_resource_path as model_zoo_folder +from depthai_helpers.cli_utils import cli_print, PrintColors + +class DepthConfigManager: + labels = "" + NN_config = None + custom_fw_commit = '' + + def __init__(self, args): + self.args = args + self.stream_list = args['streams'] + self.calc_dist_to_bb = not self.args['disable_depth'] + + # Prepare handler methods (decode_nn, show_nn) for the network we want to run. + self.importAndSetCallbacksForNN() + self.generateJsonConfig() + + + def getUsb2Mode(self): + usb2_mode = False + if self.args['force_usb2']: + cli_print("FORCE USB2 MODE", PrintColors.WARNING) + usb2_mode = True + else: + usb2_mode = False + return usb2_mode + + def getColorPreviewScale(self): + if self.args['color_scale']: + return self.args['color_scale'] + else: + return 1.0 + + def getCustomFirmwarePath(self, commit): + fwdir = '.fw_cache/' + if not os.path.exists(fwdir): + os.mkdir(fwdir) + fw_variant = '' + if self.getUsb2Mode(): + fw_variant = 'usb2-' + fname = 'depthai-' + fw_variant + commit + '.cmd' + path = fwdir + fname + if not Path(path).exists(): + url = 'https://artifacts.luxonis.com/artifactory/luxonis-myriad-snapshot-local/depthai-device-side/' + url += commit + '/' + fname + print('Downloading custom FW:', url) + # Need this to avoid "HTTP Error 403: Forbidden" + class CustomURLopener(urllib.request.FancyURLopener): + version = "Mozilla/5.0" + # FancyURLopener doesn't report by default errors like 404 + def http_error_default(self, url, fp, errcode, errmsg, headers): + raise ValueError(errcode) + url_opener = CustomURLopener() + with url_opener.open(url) as response, open(path, 'wb') as outf: + outf.write(response.read()) + return path + + def getCommandFile(self): + debug_mode = False + cmd_file = '' + if self.args['firmware'] != None: + self.custom_fw_commit = self.args['firmware'] + if self.args['dev_debug'] == None: + # Debug -debug flag NOT present, check first for custom firmware + if self.custom_fw_commit == '': + debug_mode = False + else: + debug_mode = True + cmd_file = self.getCustomFirmwarePath(self.custom_fw_commit) + elif self.args['dev_debug'] == '': + # If just -debug flag is present -> cmd_file = '' (wait for device to be connected beforehand) + debug_mode = True + else: + debug_mode = True + cmd_file = self.args['dev_debug'] + + return cmd_file, debug_mode + + + def importAndSetCallbacksForNN(self): + # why inline imports? Could just make a dict for all these to make managing them easier and less verbose. + from depthai_helpers.mobilenet_ssd_handler import decode_mobilenet_ssd, show_mobilenet_ssd, decode_mobilenet_ssd_json + self.decode_nn=decode_mobilenet_ssd + self.show_nn=show_mobilenet_ssd + self.decode_nn_json=decode_mobilenet_ssd_json + + if self.args['cnn_model'] == 'age-gender-recognition-retail-0013': + from depthai_helpers.age_gender_recognition_handler import decode_age_gender_recognition, show_age_gender_recognition, decode_age_gender_recognition_json + self.decode_nn=decode_age_gender_recognition + self.show_nn=show_age_gender_recognition + self.decode_nn_json=decode_age_gender_recognition_json + self.calc_dist_to_bb=False + + if self.args['cnn_model'] == 'emotions-recognition-retail-0003': + from depthai_helpers.emotion_recognition_handler import decode_emotion_recognition, show_emotion_recognition, decode_emotion_recognition_json + self.decode_nn=decode_emotion_recognition + self.show_nn=show_emotion_recognition + self.decode_nn_json=decode_emotion_recognition_json + self.calc_dist_to_bb=False + + if self.args['cnn_model'] in ['tiny-yolo-v3', 'yolo-v3']: + from depthai_helpers.tiny_yolo_v3_handler import decode_tiny_yolo, show_tiny_yolo, decode_tiny_yolo_json + self.decode_nn=decode_tiny_yolo + self.show_nn=show_tiny_yolo + self.decode_nn_json=decode_tiny_yolo_json + + if self.args['cnn_model'] in ['facial-landmarks-35-adas-0002', 'landmarks-regression-retail-0009']: + from depthai_helpers.landmarks_recognition_handler import decode_landmarks_recognition, show_landmarks_recognition, decode_landmarks_recognition_json + self.decode_nn=decode_landmarks_recognition + self.show_nn=show_landmarks_recognition + self.decode_nn_json=decode_landmarks_recognition_json + self.calc_dist_to_bb=False + + # backward compatibility + if self.args['cnn_model'] == 'openpose': + self.args['cnn_model'] = 'human-pose-estimation-0001' + if self.args['cnn_model'] == 'human-pose-estimation-0001': + from depthai_helpers.openpose_handler import decode_openpose, show_openpose + self.decode_nn=decode_openpose + self.show_nn=show_openpose + self.calc_dist_to_bb=False + + if self.args['cnn_model'] == 'openpose2': + self.args['cnn_model'] = 'mobileNetV2-PoseEstimation' + if self.args['cnn_model'] == 'mobileNetV2-PoseEstimation': + from depthai_helpers.openpose2_handler import decode_openpose, show_openpose + self.decode_nn=decode_openpose + self.show_nn=show_openpose + self.calc_dist_to_bb=False + + if self.args['cnn_model'] == 'deeplabv3p_person': + from depthai_helpers.deeplabv3p_person import decode_deeplabv3p, show_deeplabv3p + self.decode_nn=decode_deeplabv3p + self.show_nn=show_deeplabv3p + self.calc_dist_to_bb=False + + + def linuxCheckApplyUsbRules(self): + if platform.system() == 'Linux': + ret = subprocess.call(['grep', '-irn', 'ATTRS{idVendor}=="03e7"', '/etc/udev/rules.d'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + if(ret != 0): + cli_print("\nWARNING: Usb rules not found", PrintColors.WARNING) + cli_print("\nSet rules: \n" + """echo 'SUBSYSTEM=="usb", ATTRS{idVendor}=="03e7", MODE="0666"' | sudo tee /etc/udev/rules.d/80-movidius.rules \n""" + "sudo udevadm control --reload-rules && udevadm trigger \n" + "Disconnect/connect usb cable on host! \n", PrintColors.RED) + os._exit(1) + + def getMaxShaveNumbers(self): + stream_names = [stream if isinstance(stream, str) else stream['name'] for stream in self.stream_list] + max_shaves = 14 + if self.args['rgb_resolution'] != 1080: + max_shaves = 11 + if 'object_tracker' in stream_names: + max_shaves = 9 + elif 'object_tracker' in stream_names: + max_shaves = 12 + + return max_shaves + + def generateJsonConfig(self): + + # something to verify usb rules are good? + self.linuxCheckApplyUsbRules() + + max_shave_nr = self.getMaxShaveNumbers() + shave_nr = max_shave_nr if self.args['shaves'] is None else self.args['shaves'] + cmx_slices = shave_nr if self.args['cmx_slices'] is None else self.args['cmx_slices'] + NCE_nr = 1 if self.args['NN_engines'] is None else self.args['NN_engines'] + + # left_right double NN check. + if self.args['cnn_camera'] in ['left_right', 'rectified_left_right']: + if NCE_nr != 2: + cli_print('Running NN on both cams requires 2 NN engines!', PrintColors.RED) + NCE_nr = 2 + + if NCE_nr == 2: + shave_nr = shave_nr - (shave_nr % 2) + cmx_slices = cmx_slices - (cmx_slices % 2) + + # Get blob files + blobMan = BlobManager(self.args, self.calc_dist_to_bb, shave_nr, cmx_slices, NCE_nr) + self.NN_config = blobMan.getNNConfig() + try: + self.labels = self.NN_config['mappings']['labels'] + except: + self.labels = None + print("Labels not found in json!") + + try: + output_format = self.NN_config['NN_config']['output_format'] + except: + NN_json = {} + NN_json['NN_config'] = {} + NN_json['NN_config']['output_format'] = "raw" + self.NN_config = NN_json + output_format = "raw" + + if output_format == "raw" and self.calc_dist_to_bb == True: + cli_print("WARNING: Depth calculation with raw output format is not supported! It's only supported for YOLO/mobilenet based NNs, disabling calc_dist_to_bb", PrintColors.WARNING) + self.calc_dist_to_bb = False + + stream_names = [stream if isinstance(stream, str) else stream['name'] for stream in self.stream_list] + if ('disparity' in stream_names or 'disparity_color' in stream_names) and self.calc_dist_to_bb == True: + cli_print("WARNING: Depth calculation with disparity/disparity_color streams is not supported! Disabling calc_dist_to_bb", PrintColors.WARNING) + self.calc_dist_to_bb = False + + # check for known bad configurations + if 'depth' in stream_names and ('disparity' in stream_names or 'disparity_color' in stream_names): + print('ERROR: depth is mutually exclusive with disparity/disparity_color') + exit(2) + + if self.args['stereo_lr_check'] == True: + raise ValueError("Left-right check option is still under development. Don;t enable it.") + + # Do not modify the default values in the config Dict below directly. Instead, use the `-co` argument when running this script. + config = { + # Possible streams: + # ['left', 'right', 'jpegout', 'video', 'previewout', 'metaout', 'depth_sipp', 'disparity', 'depth_color_h'] + # If "left" is used, it must be in the first position. + # To test depth use: + # 'streams': [{'name': 'depth_sipp', "max_fps": 12.0}, {'name': 'previewout', "max_fps": 12.0}, ], + 'streams': self.args['streams'], + 'depth': + { + 'calibration_file': consts.resource_paths.calib_fpath, + 'left_mesh_file': consts.resource_paths.left_mesh_fpath, + 'right_mesh_file': consts.resource_paths.right_mesh_fpath, + 'padding_factor': 0.3, + 'depth_limit_m': 10.0, # In meters, for filtering purpose during x,y,z calc + 'median_kernel_size': self.args['stereo_median_size'], + 'lr_check': self.args['stereo_lr_check'], + 'warp_rectify': + { + 'use_mesh' : self.args['use_mesh'], # if False, will use homography + 'mirror_frame': self.args['mirror_rectified'] == 'true', # if False, the disparity will be mirrored instead + 'edge_fill_color': 0, # gray 0..255, or -1 to replicate pixel values + }, + }, + 'ai': + { + 'blob_file': blobMan.blob_file, + 'blob_file_config': blobMan.blob_file_config, + 'blob_file2': blobMan.blob_file2, + 'blob_file_config2': blobMan.blob_file_config2, + 'calc_dist_to_bb': self.calc_dist_to_bb, + 'keep_aspect_ratio': not self.args['full_fov_nn'], + 'camera_input': self.args['cnn_camera'], + 'shaves' : shave_nr, + 'cmx_slices' : cmx_slices, + 'NN_engines' : NCE_nr, + }, + # object tracker + 'ot': + { + 'max_tracklets' : 20, #maximum 20 is supported + 'confidence_threshold' : 0.5, #object is tracked only for detections over this threshold + }, + 'board_config': + { + 'swap_left_and_right_cameras': self.args['swap_lr'], # True for 1097 (RPi Compute) and 1098OBC (USB w/onboard cameras) + 'left_fov_deg': self.args['field_of_view'], # Same on 1097 and 1098OBC + 'rgb_fov_deg': self.args['rgb_field_of_view'], + 'left_to_right_distance_cm': self.args['baseline'], # Distance between stereo cameras + 'left_to_rgb_distance_cm': self.args['rgb_baseline'], # Currently unused + 'store_to_eeprom': self.args['store_eeprom'], + 'clear_eeprom': self.args['clear_eeprom'], + 'override_eeprom': self.args['override_eeprom'], + }, + 'camera': + { + 'rgb': + { + # 3840x2160, 1920x1080 + # only UHD/1080p/30 fps supported for now + 'resolution_h': self.args['rgb_resolution'], + 'fps': self.args['rgb_fps'], + }, + 'mono': + { + # 1280x720, 1280x800, 640x400 (binning enabled) + 'resolution_h': self.args['mono_resolution'], + 'fps': self.args['mono_fps'], + }, + }, + 'app': + { + 'sync_video_meta_streams': self.args['sync_video_meta'], + 'sync_sequence_numbers' : self.args['sync_sequence_numbers'], + 'usb_chunk_KiB' : self.args['usb_chunk_KiB'], + }, + #'video_config': + #{ + # 'rateCtrlMode': 'cbr', # Options: cbr / vbr + # 'profile': 'h265_main', # Options: 'h264_baseline' / 'h264_main' / 'h264_high' / 'h265_main / 'mjpeg' ' + # 'bitrate': 8000000, # When using CBR (H264/H265 only) + # 'maxBitrate': 8000000, # When using CBR (H264/H265 only) + # 'keyframeFrequency': 30, (H264/H265 only) + # 'numBFrames': 0, (H264/H265 only) + # 'quality': 80 # (0 - 100%) When using VBR or MJPEG profile + #} + #'video_config': + #{ + # 'profile': 'mjpeg', + # 'quality': 95 + #} + } + + self.jsonConfig = self.postProcessJsonConfig(config) + + + def postProcessJsonConfig(self, config): + # merge board config, if exists + if self.args['board']: + board_path = Path(self.args['board']) + if not board_path.exists(): + board_path = Path(consts.resource_paths.boards_dir_path) / Path(self.args['board'].upper()).with_suffix('.json') + if not board_path.exists(): + print('ERROR: Board config not found: {}'.format(board_path)) + os._exit(2) + with open(board_path) as fp: + board_config = json.load(fp) + utils.merge(board_config, config) + + # handle config overwrite option. + if self.args['config_overwrite']: + self.args['config_overwrite'] = json.loads(self.args['config_overwrite']) + config = utils.merge(self.args['config_overwrite'],config) + print("Merged Pipeline config with overwrite",config) + + # Append video stream if video recording was requested and stream is not already specified + self.video_file = None + if self.args['video'] is not None: + + # open video file + try: + self.video_file = open(self.args['video'], 'wb') + if config['streams'].count('video') == 0: + config['streams'].append('video') + except IOError: + print("Error: couldn't open video file for writing. Disabled video output stream") + if config['streams'].count('video') == 1: + config['streams'].remove('video') + + return config + + + +class BlobManager: + def __init__(self, args, calc_dist_to_bb, shave_nr, cmx_slices, NCE_nr): + self.args = args + self.calc_dist_to_bb = calc_dist_to_bb + self.shave_nr = shave_nr + self.cmx_slices = cmx_slices + self.NCE_nr = NCE_nr + + if self.args['cnn_model']: + self.blob_file, self.blob_file_config = self.getBlobFiles(self.args['cnn_model']) + + self.blob_file2 = "" + self.blob_file_config2 = "" + if self.args['cnn_model2']: + print("Using CNN2:", self.args['cnn_model2']) + self.blob_file2, self.blob_file_config2 = self.getBlobFiles(self.args['cnn_model2'], False) + + # compile models + self.blob_file = self.compileBlob(self.args['cnn_model'], self.args['model_compilation_target']) + if self.args['cnn_model2']: + self.blob_file2 = self.compileBlob(self.args['cnn_model2'], self.args['model_compilation_target']) + + # verify the first blob files exist? I just copied this logic from before the refactor. Not sure if it's necessary. This makes it so this script won't run unless we have a blob file and config. + self.verifyBlobFilesExist(self.blob_file, self.blob_file_config) + + def getNNConfig(self): + # try and load labels + NN_json = None + if Path(self.blob_file_config).exists(): + with open(self.blob_file_config) as f: + if f is not None: + NN_json = json.load(f) + f.close() + + return NN_json + + def verifyBlobFilesExist(self, verifyBlob, verifyConfig): + verifyBlobPath = Path(verifyBlob) + verifyConfigPath = Path(verifyConfig) + if not verifyBlobPath.exists(): + cli_print("\nWARNING: NN blob not found in: " + verifyBlob, PrintColors.WARNING) + os._exit(1) + + if not verifyConfigPath.exists(): + print("NN config not found in: " + verifyConfig + '. Defaulting to "raw" output format!') + + def getBlobFiles(self, cnnModel, isFirstNN=True): + cnn_model_path = consts.resource_paths.nn_resource_path + cnnModel + "/" + cnnModel + blobFile = cnn_model_path + ".blob" + blobFileConfig = cnn_model_path + ".json" + + return blobFile, blobFileConfig + + def compileBlob(self, nn_model, model_compilation_target): + blob_file, _ = self.getBlobFiles(nn_model) + + shave_nr = self.shave_nr + cmx_slices = self.cmx_slices + NCE_nr = self.NCE_nr + + if NCE_nr == 2: + if shave_nr % 2 == 1 or cmx_slices % 2 == 1: + raise ValueError("shave_nr and cmx_slices config must be even number when NCE is 2!") + shave_nr_opt = int(shave_nr / 2) + cmx_slices_opt = int(cmx_slices / 2) + else: + shave_nr_opt = int(shave_nr) + cmx_slices_opt = int(cmx_slices) + + outblob_file = blob_file + ".sh" + str(shave_nr) + "cmx" + str(cmx_slices) + "NCE" + str(NCE_nr) + if(not Path(outblob_file).exists()): + cli_print("Compiling model for {0} shaves, {1} cmx_slices and {2} NN_engines ".format(str(shave_nr), str(cmx_slices), str(NCE_nr)), PrintColors.RED) + ret = download_and_compile_NN_model(nn_model, model_zoo_folder, shave_nr_opt, cmx_slices_opt, NCE_nr, outblob_file, model_compilation_target) + if(ret != 0): + cli_print("Model compile failed. Falling back to default.", PrintColors.WARNING) + raise RuntimeError("Model compilation failed! Not connected to the internet?") + else: + blob_file = outblob_file + else: + cli_print("Compiled mode found: compiled for {0} shaves, {1} cmx_slices and {2} NN_engines ".format(str(shave_nr), str(cmx_slices), str(NCE_nr)), PrintColors.GREEN) + blob_file = outblob_file + + return blob_file diff --git a/depthai_helpers/deeplabv3p_person.py b/depthai_helpers/deeplabv3p_person.py new file mode 100644 index 000000000..10447ab4e --- /dev/null +++ b/depthai_helpers/deeplabv3p_person.py @@ -0,0 +1,18 @@ +import cv2 +import numpy as np + +class_colors = [[0,0,0], [0,255,0]] +class_colors = np.asarray(class_colors, dtype=np.uint8) +output_colors = None + +def decode_deeplabv3p(nnet_packet, **kwargs): + output_tensor = nnet_packet.get_tensor(0) + output = output_tensor[0][0] + output_colors = np.take(class_colors, output, axis=0) + return output_colors + +def show_deeplabv3p(output_colors, frame, **kwargs): + if type(output_colors) is not list: + frame = cv2.addWeighted(frame,1, output_colors,0.2,0) + return frame + diff --git a/depthai_helpers/emotion_recognition_handler.py b/depthai_helpers/emotion_recognition_handler.py index 150375aa2..6d06907c4 100644 --- a/depthai_helpers/emotion_recognition_handler.py +++ b/depthai_helpers/emotion_recognition_handler.py @@ -1,23 +1,28 @@ import cv2 import numpy as np - +import json def decode_emotion_recognition(nnet_packet, **kwargs): + em_tensor = nnet_packet.get_tensor(0) detections = [] - for i in range(len(nnet_packet.entries()[0][0])): - detections.append(nnet_packet.entries()[0][0][i]) + for i in em_tensor[0]: + detections.append(i[0][0]) return detections +def decode_emotion_recognition_json(nnet_packet, **kwargs): + detections = decode_emotion_recognition(nnet_packet, **kwargs) + return json.dumps(detections) + def show_emotion_recognition(entries_prev, frame, **kwargs): - # img_h = frame.shape[0] - # img_w = frame.shape[1] - e_states = kwargs['labels'] + + NN_metadata = kwargs['NN_json'] + labels = NN_metadata['mappings']['labels'] if len(entries_prev) != 0: max_confidence = max(entries_prev) if(max_confidence > 0.7): - emotion = e_states[np.argmax(entries_prev)] + emotion = labels[np.argmax(entries_prev)] cv2.putText(frame, emotion, (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) frame = cv2.resize(frame, (300, 300)) - return frame \ No newline at end of file + return frame diff --git a/depthai_helpers/landmarks_recognition_handler.py b/depthai_helpers/landmarks_recognition_handler.py index afdfdea7f..202ba89da 100644 --- a/depthai_helpers/landmarks_recognition_handler.py +++ b/depthai_helpers/landmarks_recognition_handler.py @@ -1,29 +1,30 @@ import cv2 import numpy as np - +import json def decode_landmarks_recognition(nnet_packet, **kwargs): + landmarks_tensor = nnet_packet.get_tensor(0) landmarks = [] - for i in range(len(nnet_packet.entries()[0][0])): - landmarks.append(nnet_packet.entries()[0][0][i]) - - landmarks = list(zip(*[iter(landmarks)]*2)) + + landmarks_tensor = landmarks_tensor.reshape(landmarks_tensor.shape[1]) + landmarks = list(zip(*[iter(landmarks_tensor)]*2)) return landmarks -def show_landmarks_recognition(entries_prev, frame, **kwargs): +def decode_landmarks_recognition_json(nnet_packet, **kwargs): + landmarks = decode_landmarks_recognition(nnet_packet, **kwargs) + return json.dumps(landmarks) + +def show_landmarks_recognition(landmarks, frame, **kwargs): img_h = frame.shape[0] img_w = frame.shape[1] - if len(entries_prev) != 0: - for i in entries_prev: - try: - x = int(i[0]*img_h) - y = int(i[1]*img_w) - except: - continue + if len(landmarks) != 0: + for i in landmarks: + x = int(i[0]*img_h) + y = int(i[1]*img_w) # # print(x,y) cv2.circle(frame, (x,y), 3, (0, 0, 255)) frame = cv2.resize(frame, (300, 300)) - return frame \ No newline at end of file + return frame diff --git a/depthai_helpers/mobilenet_ssd_handler.py b/depthai_helpers/mobilenet_ssd_handler.py index 535fc4f7f..1826c019f 100644 --- a/depthai_helpers/mobilenet_ssd_handler.py +++ b/depthai_helpers/mobilenet_ssd_handler.py @@ -1,22 +1,88 @@ import cv2 +import json import numpy as np from datetime import datetime def decode_mobilenet_ssd(nnet_packet, **kwargs): + NN_metadata = kwargs['NN_json'] + output_format = NN_metadata['NN_config']['output_format'] config = kwargs['config'] - detections = [] - # the result of the MobileSSD has detection rectangles (here: entries), and we can iterate through them - for _, e in enumerate(nnet_packet.entries()): - # for MobileSSD entries are sorted by confidence - # {id == -1} or {confidence == 0} is the stopper (special for OpenVINO models and MobileSSD architecture) - if e[0]['id'] == -1.0 or e[0]['confidence'] == 0.0: - break - # save entry for further usage (as image package may arrive not the same time as nnet package) - # the lower confidence threshold - the more we get false positives - if e[0]['confidence'] > config['depth']['confidence_threshold']: - detections.append(e) + + detections_list = [] + if output_format == "detection": + detections = nnet_packet.getDetectedObjects() + for detection in detections: + detection_dict = detection.get_dict() + detections_list.append(detection_dict) + + else: + confidence_threshold = NN_metadata['NN_config']['confidence_threshold'] + res = nnet_packet.get_tensor(0) + decoding_lut = {"valid_detection" : 0, + "label" : 1, + "confidence" : 2, + "x_min" : 3, + "y_min" : 4, + "x_max" : 5, + "y_max" : 6 } + # iterate through pre-saved entries & draw rectangle & text on image: + for obj in res[0][0]: + confidence = obj[decoding_lut["confidence"]] + if obj[decoding_lut["valid_detection"]] == -1.0 or confidence == 0.0: + break + label = int(obj[decoding_lut["label"]]) + x_min = obj[decoding_lut["x_min"]] + y_min = obj[decoding_lut["y_min"]] + x_max = obj[decoding_lut["x_max"]] + y_max = obj[decoding_lut["y_max"]] + if confidence > confidence_threshold: + det_dict = dict(x_min=x_min, x_max=x_max, y_min=y_min, y_max=y_max, label=label, confidence=confidence) + detections_list.append(det_dict) + + + stage2_detections=None + # Second-stage NN + if any(x in config['ai']['blob_file2'] for x in ['landmarks-regression-retail-0009', 'facial-landmarks-35-adas-0002'] ): + landmarks_tensor = nnet_packet.get_tensor(1) + landmarks = [] + + landmarks_tensor = landmarks_tensor.reshape(landmarks_tensor.shape[1]) + landmarks = list(zip(*[iter(landmarks_tensor)]*2)) + stage2_detections=landmarks + if 'emotions-recognition-retail-0003' in config['ai']['blob_file2']: + em_tensor = nnet_packet.get_tensor(1) + # Decode + emotion_data = [] + for i in em_tensor[0]: + emotion_data.append(i[0][0]) + stage2_detections=emotion_data + + + detections = dict(stage1=detections_list, stage2=stage2_detections) return detections +def decode_mobilenet_ssd_json(nnet_packet, **kwargs): + convertList = [] + + detections = decode_mobilenet_ssd(nnet_packet, **kwargs) + + for entry in detections: + jsonConvertDict = {} + jsonConvertDict["id"] = entry[0]["id"] + jsonConvertDict["label"] = entry[0]["label"] + jsonConvertDict["confidence"] = entry[0]["confidence"] + jsonConvertDict["left"] = entry[0]["left"] + jsonConvertDict["right"] = entry[0]["right"] + jsonConvertDict["top"] = entry[0]["top"] + jsonConvertDict["bottom"] = entry[0]["bottom"] + jsonConvertDict["distance_x"] = entry[0]["distance_x"] + jsonConvertDict["distance_y"] = entry[0]["distance_y"] + jsonConvertDict["distance_z"] = entry[0]["distance_z"] + convertList.append([jsonConvertDict]) + + jsonStr = json.dumps(convertList) + + return jsonStr def nn_to_depth_coord(x, y, nn2depth): x_depth = int(nn2depth['off_x'] + x * nn2depth['max_w']) @@ -31,29 +97,35 @@ def average_depth_coord(pt1, pt2, padding_factor): avg_pt2 = (pt2[0] - x_shift), (pt2[1] - y_shift) return avg_pt1, avg_pt2 -def show_mobilenet_ssd(entries_prev, frame, **kwargs): + +def show_mobilenet_ssd(detections, frame, **kwargs): is_depth = 'nn2depth' in kwargs if is_depth: nn2depth = kwargs['nn2depth'] config = kwargs['config'] - labels = kwargs['labels'] - img_h = frame.shape[0] - img_w = frame.shape[1] + NN_metadata = kwargs['NN_json'] + try: + labels = NN_metadata['mappings']['labels'] + except: + labels = None + + frame_h = frame.shape[0] + frame_w = frame.shape[1] last_detected = datetime.now() # iterate through pre-saved entries & draw rectangle & text on image: - iteration = 0 - for e in entries_prev: + for idx, detection in enumerate(detections["stage1"]): + # print(detection) if is_depth: - pt1 = nn_to_depth_coord(e[0]['left'], e[0]['top'], nn2depth) - pt2 = nn_to_depth_coord(e[0]['right'], e[0]['bottom'], nn2depth) + pt1 = nn_to_depth_coord(detection["x_min"], detection["y_min"], nn2depth) + pt2 = nn_to_depth_coord(detection["x_max"], detection["y_max"], nn2depth) color = (255, 0, 0) # bgr avg_pt1, avg_pt2 = average_depth_coord(pt1, pt2, config['depth']['padding_factor']) cv2.rectangle(frame, avg_pt1, avg_pt2, color) color = (255, 255, 255) # bgr else: - pt1 = int(e[0]['left'] * img_w), int(e[0]['top'] * img_h) - pt2 = int(e[0]['right'] * img_w), int(e[0]['bottom'] * img_h) + pt1 = int(detection["x_min"] * frame_w), int(detection["y_min"] * frame_h) + pt2 = int(detection["x_max"] * frame_w), int(detection["y_max"] * frame_h) color = (0, 0, 255) # bgr x1, y1 = pt1 @@ -61,60 +133,53 @@ def show_mobilenet_ssd(entries_prev, frame, **kwargs): cv2.rectangle(frame, pt1, pt2, color) # Handles case where TensorEntry object label is out if range - if e[0]['label'] > len(labels): - print("Label index=",e[0]['label'], "is out of range. Not applying text to rectangle.") + if labels is not None: + label = labels[detection["label"]] else: - pt_t1 = x1, y1 + 20 - cv2.putText(frame, labels[int(e[0]['label'])], pt_t1, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) - - pt_t2 = x1, y1 + 40 - cv2.putText(frame, '{:.2f}'.format(100*e[0]['confidence']) + ' %', pt_t2, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color) - if config['ai']['calc_dist_to_bb']: - pt_t3 = x1, y1 + 60 - cv2.putText(frame, 'x:' '{:7.3f}'.format(e[0]['distance_x']) + ' m', pt_t3, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color) - - pt_t4 = x1, y1 + 80 - cv2.putText(frame, 'y:' '{:7.3f}'.format(e[0]['distance_y']) + ' m', pt_t4, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color) - - pt_t5 = x1, y1 + 100 - cv2.putText(frame, 'z:' '{:7.3f}'.format(e[0]['distance_z']) + ' m', pt_t5, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color) - # Second-stage NN - if iteration == 0: # For now we run second-stage only on first detection - if 'landmarks-regression-retail-0009' in config['ai']['blob_file2']: - # Decode - landmarks = [] - for i in range(len(e[1])): - landmarks.append(e[1][i]) - landmarks = list(zip(*[iter(landmarks)]*2)) - # Show - bb_w = x2 - x1 - bb_h = y2 - y1 - for i in landmarks: - try: - x = x1 + int(i[0]*bb_w) - y = y1 + int(i[1]*bb_h) - except: - continue - cv2.circle(frame, (x,y), 4, (255, 0, 0)) - if 'emotions-recognition-retail-0003' in config['ai']['blob_file2']: - # Decode - emotion_data = [] - for i in range(len(e[1])): - emotion_data.append(e[1][i]) - # Show - e_states = { - 0 : "neutral", - 1 : "happy", - 2 : "sad", - 3 : "surprise", - 4 : "anger" - } - pt_t3 = x2-50, y2-10 - max_confidence = max(emotion_data) - if(max_confidence > 0.7): - emotion = e_states[np.argmax(emotion_data)] - if (datetime.now() - last_detected).total_seconds() < 100: - cv2.putText(frame, emotion, pt_t3, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255, 0), 2) - iteration += 1 - return frame + label = str(detection["label"]) + pt_t1 = x1, y1 + 20 + cv2.putText(frame, label, pt_t1, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) + + pt_t2 = x1, y1 + 40 + cv2.putText(frame, '{:.2f}'.format(100*detection["confidence"]) + ' %', pt_t2, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color) + if config['ai']['calc_dist_to_bb']: + pt_t3 = x1, y1 + 60 + cv2.putText(frame, 'x:' '{:7.3f}'.format(detection["depth_x"]) + ' m', pt_t3, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color) + + pt_t4 = x1, y1 + 80 + cv2.putText(frame, 'y:' '{:7.3f}'.format(detection["depth_y"]) + ' m', pt_t4, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color) + + pt_t5 = x1, y1 + 100 + cv2.putText(frame, 'z:' '{:7.3f}'.format(detection["depth_z"]) + ' m', pt_t5, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color) + + # Second-stage NN + if idx == 0: # For now we run second-stage only on first detection + if any(x in config['ai']['blob_file2'] for x in ['landmarks-regression-retail-0009', 'facial-landmarks-35-adas-0002'] ): + landmarks = detections["stage2"] + # Show + bb_w = x2 - x1 + bb_h = y2 - y1 + for i in landmarks: + x = x1 + int(i[0]*bb_w) + y = y1 + int(i[1]*bb_h) + cv2.circle(frame, (x,y), 4, (255, 0, 0)) + if 'emotions-recognition-retail-0003' in config['ai']['blob_file2']: + # Decode + emotion_data = detections["stage2"] + # Show + e_states = { + 0 : "neutral", + 1 : "happy", + 2 : "sad", + 3 : "surprise", + 4 : "anger" + } + pt_t3 = x2-50, y2-10 + max_confidence = max(emotion_data) + if(max_confidence > 0.6): + emotion = e_states[np.argmax(emotion_data)] + if (datetime.now() - last_detected).total_seconds() < 100: + cv2.putText(frame, emotion, pt_t3, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255, 0), 2) + + return frame diff --git a/depthai_helpers/model_downloader.py b/depthai_helpers/model_downloader.py deleted file mode 100644 index 5b77b8688..000000000 --- a/depthai_helpers/model_downloader.py +++ /dev/null @@ -1,30 +0,0 @@ -import requests - - -def download_model(model, shaves, cmx_slices, nces, output_file): - PLATFORM="VPU_MYRIAD_2450" if nces == 0 else "VPU_MYRIAD_2480" - - url = "http://luxonis.com:8080/" - payload = { - 'compile_type': 'zoo', - 'model_name': model, - 'model_downloader_params': '--precisions FP16 --num_attempts 5', - 'intermediate_compiler_params': '--data_type=FP16 --mean_values [127.5,127.5,127.5] --scale_values [255,255,255]', - 'compiler_params': '-ip U8 -VPU_MYRIAD_PLATFORM ' + PLATFORM + ' -VPU_NUMBER_OF_SHAVES ' + str(shaves) +' -VPU_NUMBER_OF_CMX_SLICES ' + str(cmx_slices) - } - try: - response = requests.request("POST", url, data=payload) - except: - print("Connection timed out!") - return 1 - - if response.status_code == 200: - blob_file = open(output_file, 'wb') - blob_file.write(response.content) - blob_file.close() - else: - print("Model compilation failed with error code: " + str(response.status_code)) - print(str(response.text.encode('utf8'))) - return 2 - - return 0 \ No newline at end of file diff --git a/depthai_helpers/openpose2_handler.py b/depthai_helpers/openpose2_handler.py new file mode 100644 index 000000000..74ac6b171 --- /dev/null +++ b/depthai_helpers/openpose2_handler.py @@ -0,0 +1,186 @@ +import cv2 +import numpy as np +import depthai + +keypointsMapping = ['Nose', 'Neck', 'R-Sho', 'R-Elb', 'R-Wr', 'L-Sho', 'L-Elb', 'L-Wr', 'R-Hip', 'R-Knee', 'R-Ank', 'L-Hip', 'L-Knee', 'L-Ank', 'R-Eye', 'L-Eye', 'R-Ear', 'L-Ear'] +POSE_PAIRS = [[1,2], [1,5], [2,3], [3,4], [5,6], [6,7], [1,8], [8,9], [9,10], [1,11], [11,12], [12,13], [1,0], [0,14], [14,16], [0,15], [15,17], [2,17], [5,16]] +mapIdx = [[31,32], [39,40], [33,34], [35,36], [41,42], [43,44], [19,20], [21,22], [23,24], [25,26], [27,28], [29,30], [47,48], [49,50], [53,54], [51,52], [55,56], [37,38], [45,46]] +colors = [[0,100,255], [0,100,255], [0,255,255], [0,100,255], [0,255,255], [0,100,255], [0,255,0], [255,200,100], [255,0,255], [0,255,0], [255,200,100], [255,0,255], [0,0,255], [255,0,0], [200,200,0], [255,0,0], [200,200,0], [0,0,0]] + +def getKeypoints(probMap, threshold=0.2): + + mapSmooth = cv2.GaussianBlur(probMap, (3, 3), 0, 0) + mapMask = np.uint8(mapSmooth>threshold) + keypoints = [] + contours = None + try: + #OpenCV4.x + contours, _ = cv2.findContours(mapMask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + except: + #OpenCV3.x + _, contours, _ = cv2.findContours(mapMask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + + for cnt in contours: + blobMask = np.zeros(mapMask.shape) + blobMask = cv2.fillConvexPoly(blobMask, cnt, 1) + maskedProbMap = mapSmooth * blobMask + _, maxVal, _, maxLoc = cv2.minMaxLoc(maskedProbMap) + keypoints.append(maxLoc + (probMap[maxLoc[1], maxLoc[0]],)) + + return keypoints + + +def getValidPairs(outputs, w, h,detected_keypoints): + valid_pairs = [] + invalid_pairs = [] + n_interp_samples = 10 + paf_score_th = 0.2 + conf_th = 0.4 + for k in range(len(mapIdx)): + + pafA = outputs[0, mapIdx[k][0], :, :] + pafB = outputs[0, mapIdx[k][1], :, :] + pafA = cv2.resize(pafA, (w, h)) + pafB = cv2.resize(pafB, (w, h)) + candA = detected_keypoints[POSE_PAIRS[k][0]] + + candB = detected_keypoints[POSE_PAIRS[k][1]] + + nA = len(candA) + nB = len(candB) + + if( nA != 0 and nB != 0): + valid_pair = np.zeros((0,3)) + for i in range(nA): + max_j=-1 + maxScore = -1 + found = 0 + for j in range(nB): + d_ij = np.subtract(candB[j][:2], candA[i][:2]) + norm = np.linalg.norm(d_ij) + if norm: + d_ij = d_ij / norm + else: + continue + interp_coord = list(zip(np.linspace(candA[i][0], candB[j][0], num=n_interp_samples), + np.linspace(candA[i][1], candB[j][1], num=n_interp_samples))) + paf_interp = [] + for k in range(len(interp_coord)): + paf_interp.append([pafA[int(round(interp_coord[k][1])), int(round(interp_coord[k][0]))], + pafB[int(round(interp_coord[k][1])), int(round(interp_coord[k][0]))] ]) + paf_scores = np.dot(paf_interp, d_ij) + avg_paf_score = sum(paf_scores)/len(paf_scores) + + if ( len(np.where(paf_scores > paf_score_th)[0]) / n_interp_samples ) > conf_th : + if avg_paf_score > maxScore: + max_j = j + maxScore = avg_paf_score + found = 1 + if found: + valid_pair = np.append(valid_pair, [[candA[i][3], candB[max_j][3], maxScore]], axis=0) + + valid_pairs.append(valid_pair) + else: + invalid_pairs.append(k) + valid_pairs.append([]) + return valid_pairs, invalid_pairs + + +def getPersonwiseKeypoints(valid_pairs, invalid_pairs,keypoints_list): + personwiseKeypoints = -1 * np.ones((0, 19)) + + for k in range(len(mapIdx)): + if k not in invalid_pairs: + partAs = valid_pairs[k][:,0] + partBs = valid_pairs[k][:,1] + indexA, indexB = np.array(POSE_PAIRS[k]) + + for i in range(len(valid_pairs[k])): + found = 0 + person_idx = -1 + for j in range(len(personwiseKeypoints)): + if personwiseKeypoints[j][indexA] == partAs[i]: + person_idx = j + found = 1 + break + + if found: + personwiseKeypoints[person_idx][indexB] = partBs[i] + personwiseKeypoints[person_idx][-1] += keypoints_list[partBs[i].astype(int), 2] + valid_pairs[k][i][2] + + elif not found and k < 17: + row = -1 * np.ones(19) + row[indexA] = partAs[i] + row[indexB] = partBs[i] + row[-1] = sum(keypoints_list[valid_pairs[k][i,:2].astype(int), 2]) + valid_pairs[k][i][2] + personwiseKeypoints = np.vstack([personwiseKeypoints, row]) + return personwiseKeypoints + + + +threshold = 0.3 +nPoints = 18 +w=432 +h=368 +detected_keypoints = [] +def decode_openpose(nnet_packet, **kwargs): + # openpose from openvino has pafs in first output, keyints in second + output_list = nnet_packet.getOutputsList() + + # out_layers = nnet_packet.getOutputLayersInfo() + # print((out_layers)) + + # print(first_layer) + # print(in_layers) + + in_layers = nnet_packet.getInputLayersInfo() + first_layer = in_layers[0] + w = first_layer.get_dimension(depthai.TensorInfo.Dimension.WIDTH) + h = first_layer.get_dimension(depthai.TensorInfo.Dimension.HEIGHT) + + outputs = np.copy(output_list[0]) + outputs = outputs.astype('float32') + + detected_keypoints = [] + keypoints_list = np.zeros((0, 3)) + keypoint_id = 0 + + for part in range(nPoints): + probMap = outputs[0,part,:,:] + probMap = cv2.resize(probMap, (w, h)) # (456, 256) + keypoints = getKeypoints(probMap, threshold) + keypoints_with_id = [] + + for i in range(len(keypoints)): + keypoints_with_id.append(keypoints[i] + (keypoint_id,)) + keypoints_list = np.vstack([keypoints_list, keypoints[i]]) + keypoint_id += 1 + + detected_keypoints.append(keypoints_with_id) + + valid_pairs, invalid_pairs = getValidPairs(outputs, w, h,detected_keypoints) + personwiseKeypoints = getPersonwiseKeypoints(valid_pairs, invalid_pairs,keypoints_list) + keypoints_limbs = [detected_keypoints,personwiseKeypoints,keypoints_list] + + return keypoints_limbs + +def show_openpose(keypoints_limbs, frame, **kwargs): + frame = np.uint8(frame.copy()) + if len(keypoints_limbs) == 3: + detected_keypoints = keypoints_limbs[0] + personwiseKeypoints = keypoints_limbs[1] + keypoints_list = keypoints_limbs[2] + + for i in range(nPoints): + for j in range(len(detected_keypoints[i])): + cv2.circle(frame, detected_keypoints[i][j][0:2], 5, colors[i], -1, cv2.LINE_AA) + for i in range(17): + for n in range(len(personwiseKeypoints)): + index = personwiseKeypoints[n][np.array(POSE_PAIRS[i])] + if -1 in index: + continue + B = np.int32(keypoints_list[index.astype(int), 0]) + A = np.int32(keypoints_list[index.astype(int), 1]) + cv2.line(frame, (B[0], A[0]), (B[1], A[1]), colors[i], 3, cv2.LINE_AA) + return frame + diff --git a/depthai_helpers/openpose_handler.py b/depthai_helpers/openpose_handler.py new file mode 100644 index 000000000..16321ad18 --- /dev/null +++ b/depthai_helpers/openpose_handler.py @@ -0,0 +1,178 @@ +import cv2 +import numpy as np + +keypointsMapping = ['Nose', 'Neck', 'R-Sho', 'R-Elb', 'R-Wr', 'L-Sho', 'L-Elb', 'L-Wr', 'R-Hip', 'R-Knee', 'R-Ank', 'L-Hip', 'L-Knee', 'L-Ank', 'R-Eye', 'L-Eye', 'R-Ear', 'L-Ear'] +POSE_PAIRS = [[1,2], [1,5], [2,3], [3,4], [5,6], [6,7], [1,8], [8,9], [9,10], [1,11], [11,12], [12,13], [1,0], [0,14], [14,16], [0,15], [15,17], [2,17], [5,16]] +mapIdx = [[31,32], [39,40], [33,34], [35,36], [41,42], [43,44], [19,20], [21,22], [23,24], [25,26], [27,28], [29,30], [47,48], [49,50], [53,54], [51,52], [55,56], [37,38], [45,46]] +colors = [[0,100,255], [0,100,255], [0,255,255], [0,100,255], [0,255,255], [0,100,255], [0,255,0], [255,200,100], [255,0,255], [0,255,0], [255,200,100], [255,0,255], [0,0,255], [255,0,0], [200,200,0], [255,0,0], [200,200,0], [0,0,0]] + +def getKeypoints(probMap, threshold=0.2): + + mapSmooth = cv2.GaussianBlur(probMap, (3, 3), 0, 0) + mapMask = np.uint8(mapSmooth>threshold) + keypoints = [] + contours = None + try: + #OpenCV4.x + contours, _ = cv2.findContours(mapMask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + except: + #OpenCV3.x + _, contours, _ = cv2.findContours(mapMask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + + for cnt in contours: + blobMask = np.zeros(mapMask.shape) + blobMask = cv2.fillConvexPoly(blobMask, cnt, 1) + maskedProbMap = mapSmooth * blobMask + _, maxVal, _, maxLoc = cv2.minMaxLoc(maskedProbMap) + keypoints.append(maxLoc + (probMap[maxLoc[1], maxLoc[0]],)) + + return keypoints + + +def getValidPairs(outputs, w, h,detected_keypoints): + valid_pairs = [] + invalid_pairs = [] + n_interp_samples = 10 + paf_score_th = 0.2 + conf_th = 0.4 + for k in range(len(mapIdx)): + + pafA = outputs[0, mapIdx[k][0], :, :] + pafB = outputs[0, mapIdx[k][1], :, :] + pafA = cv2.resize(pafA, (w, h)) + pafB = cv2.resize(pafB, (w, h)) + candA = detected_keypoints[POSE_PAIRS[k][0]] + + candB = detected_keypoints[POSE_PAIRS[k][1]] + + nA = len(candA) + nB = len(candB) + + if( nA != 0 and nB != 0): + valid_pair = np.zeros((0,3)) + for i in range(nA): + max_j=-1 + maxScore = -1 + found = 0 + for j in range(nB): + d_ij = np.subtract(candB[j][:2], candA[i][:2]) + norm = np.linalg.norm(d_ij) + if norm: + d_ij = d_ij / norm + else: + continue + interp_coord = list(zip(np.linspace(candA[i][0], candB[j][0], num=n_interp_samples), + np.linspace(candA[i][1], candB[j][1], num=n_interp_samples))) + paf_interp = [] + for k in range(len(interp_coord)): + paf_interp.append([pafA[int(round(interp_coord[k][1])), int(round(interp_coord[k][0]))], + pafB[int(round(interp_coord[k][1])), int(round(interp_coord[k][0]))] ]) + paf_scores = np.dot(paf_interp, d_ij) + avg_paf_score = sum(paf_scores)/len(paf_scores) + + if ( len(np.where(paf_scores > paf_score_th)[0]) / n_interp_samples ) > conf_th : + if avg_paf_score > maxScore: + max_j = j + maxScore = avg_paf_score + found = 1 + if found: + valid_pair = np.append(valid_pair, [[candA[i][3], candB[max_j][3], maxScore]], axis=0) + + valid_pairs.append(valid_pair) + else: + invalid_pairs.append(k) + valid_pairs.append([]) + return valid_pairs, invalid_pairs + + +def getPersonwiseKeypoints(valid_pairs, invalid_pairs,keypoints_list): + personwiseKeypoints = -1 * np.ones((0, 19)) + + for k in range(len(mapIdx)): + if k not in invalid_pairs: + partAs = valid_pairs[k][:,0] + partBs = valid_pairs[k][:,1] + indexA, indexB = np.array(POSE_PAIRS[k]) + + for i in range(len(valid_pairs[k])): + found = 0 + person_idx = -1 + for j in range(len(personwiseKeypoints)): + if personwiseKeypoints[j][indexA] == partAs[i]: + person_idx = j + found = 1 + break + + if found: + personwiseKeypoints[person_idx][indexB] = partBs[i] + personwiseKeypoints[person_idx][-1] += keypoints_list[partBs[i].astype(int), 2] + valid_pairs[k][i][2] + + elif not found and k < 17: + row = -1 * np.ones(19) + row[indexA] = partAs[i] + row[indexB] = partBs[i] + row[-1] = sum(keypoints_list[valid_pairs[k][i,:2].astype(int), 2]) + valid_pairs[k][i][2] + personwiseKeypoints = np.vstack([personwiseKeypoints, row]) + return personwiseKeypoints + + + +threshold = 0.3 +nPoints = 18 +w=456 +h=256 +detected_keypoints = [] +def decode_openpose(nnet_packet, **kwargs): + # openpose from openvino has pafs in first output, keypoints in second + output_list = nnet_packet.getOutputsList() + heatmaps = np.copy(output_list[1]) + pafs = np.copy(output_list[0]) + + heatmaps = heatmaps.astype('float32') + pafs = pafs.astype('float32') + + outputs = np.concatenate((heatmaps,pafs),axis=1) + + detected_keypoints = [] + keypoints_list = np.zeros((0, 3)) + keypoint_id = 0 + + for part in range(nPoints): + probMap = outputs[0,part,:,:] + probMap = cv2.resize(probMap, (456, 256)) # (456, 256) + keypoints = getKeypoints(probMap, threshold) + keypoints_with_id = [] + + for i in range(len(keypoints)): + keypoints_with_id.append(keypoints[i] + (keypoint_id,)) + keypoints_list = np.vstack([keypoints_list, keypoints[i]]) + keypoint_id += 1 + + detected_keypoints.append(keypoints_with_id) + + valid_pairs, invalid_pairs = getValidPairs(outputs, 456, 256,detected_keypoints) + personwiseKeypoints = getPersonwiseKeypoints(valid_pairs, invalid_pairs,keypoints_list) + keypoints_limbs = [detected_keypoints,personwiseKeypoints,keypoints_list] + + return keypoints_limbs + +def show_openpose(keypoints_limbs, frame, **kwargs): + frame = np.uint8(frame.copy()) + if len(keypoints_limbs) == 3: + detected_keypoints = keypoints_limbs[0] + personwiseKeypoints = keypoints_limbs[1] + keypoints_list = keypoints_limbs[2] + + for i in range(nPoints): + for j in range(len(detected_keypoints[i])): + cv2.circle(frame, detected_keypoints[i][j][0:2], 5, colors[i], -1, cv2.LINE_AA) + for i in range(17): + for n in range(len(personwiseKeypoints)): + index = personwiseKeypoints[n][np.array(POSE_PAIRS[i])] + if -1 in index: + continue + B = np.int32(keypoints_list[index.astype(int), 0]) + A = np.int32(keypoints_list[index.astype(int), 1]) + cv2.line(frame, (B[0], A[0]), (B[1], A[1]), colors[i], 3, cv2.LINE_AA) + return frame + diff --git a/depthai_helpers/projector_3d.py b/depthai_helpers/projector_3d.py new file mode 100644 index 000000000..c665babc9 --- /dev/null +++ b/depthai_helpers/projector_3d.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python3 + +import numpy as np +import open3d as o3d + +class PointCloudVisualizer(): + def __init__(self, intrinsic_matrix, width, height): + self.depth_map = None + self.rgb = None + self.pcl = None + # self.depth_scale = depth_scale + # self.depth_trunc = depth_trunc + # self.rgbd_mode = rgbd_mode + # self.pinhole_camera_intrinsic = o3d.io.read_pinhole_camera_intrinsic(intrinsic_file) + + self.pinhole_camera_intrinsic = o3d.camera.PinholeCameraIntrinsic(width, + height, + intrinsic_matrix[0][0], + intrinsic_matrix[1][1], + intrinsic_matrix[0][2], + intrinsic_matrix[1][2]) + self.vis = o3d.visualization.Visualizer() + self.vis.create_window() + self.isstarted = False + + + # intrinsic = o3d.camera.PinholeCameraIntrinsic() + # print(str(type(intrinsic))) + # open3d::camera::PinholeCameraIntrinsic instrinsic; # TODO: Try this + # instrinsic.SetIntrinsics(480, 272, 282.15, 321.651, 270, 153.9); + # self.vis.add_geometry(self.pcl) + + + # def dummy_pcl(self): + # x = np.linspace(-3, 3, 401) + # mesh_x, mesh_y = np.meshgrid(x, x) + # z = np.sinc((np.power(mesh_x, 2) + np.power(mesh_y, 2))) + # z_norm = (z - z.min()) / (z.max() - z.min()) + # xyz = np.zeros((np.size(mesh_x), 3)) + # xyz[:, 0] = np.reshape(mesh_x, -1) + # xyz[:, 1] = np.reshape(mesh_y, -1) + # xyz[:, 2] = np.reshape(z_norm, -1) + # pcd = o3d.geometry.PointCloud() + # pcd.points = o3d.utility.Vector3dVector(xyz) + # return pcd + + def rgbd_to_projection(self, depth_map, rgb): + self.depth_map = depth_map + self.rgb = rgb + rgb_o3d = o3d.geometry.Image(self.rgb) + depth_o3d = o3d.geometry.Image(self.depth_map) + rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(rgb_o3d, depth_o3d) + if self.pcl is None: + self.pcl = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, self.pinhole_camera_intrinsic) + else: + pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, self.pinhole_camera_intrinsic) + self.pcl.points = pcd.points + self.pcl.colors = pcd.colors + return self.pcl + + + def visualize_pcd(self): + if not self.isstarted: + self.vis.add_geometry(self.pcl) + origin = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.3, origin=[0, 0, 0]) + self.vis.add_geometry(origin) + self.isstarted = True + else: + self.vis.update_geometry(self.pcl) + self.vis.poll_events() + self.vis.update_renderer() + + # def depthmap_to_projection(self, depth_map): + # # depth_map[depth_map == 65535] = 0 + # # self.depth_map = 65535 // depth_map + # self.depth_map = depth_map + # img = o3d.geometry.Image(self.depth_map) + # + # if not self.isstarted: + # print("inhere") + # self.pcl = o3d.geometry.PointCloud.create_from_depth_image(img, self.pinhole_camera_intrinsic) + # self.vis.add_geometry(self.pcl) + # self.isstarted = True + # else: + # print("not inhere") + # pcd = o3d.geometry.PointCloud.create_from_depth_image(img, self.pinhole_camera_intrinsic, depth_scale = 1000, depth_trunc = 50) + # self.pcl.points = pcd.points + # self.vis.update_geometry(self.pcl) + # self.vis.poll_events() + # self.vis.update_renderer() + + def close_window(self): + self.vis.destroy_window() + + + +def depthmap_to_projection(depth_map, intrinsic, stride = 1, depth_scale = 1, depth_trunc = 96): + # o3d_intrinsic_matrix = o3d.camera.PinholeCameraIntrinsicParameters() + # pinhole_camera_intrinsic + # o3d_intrinsic_matrix.set_intrinsics(depth_map.shape[1], depth_map.shape[0], + # intrinsic[0, 0], intrinsic[1, 1], + # intrinsic[0, 2], intrinsic[1, 2]) + + img = o3d.geometry.Image((depth_map)) + pinhole_camera_intrinsic = o3d.io.read_pinhole_camera_intrinsic("intrinsic.json") + print(pinhole_camera_intrinsic.intrinsic_matrix) + + # pcd = o3d.geometry.PointCloud.create_from_depth_image(depth_map, pinhole_camera_intrinsic, depth_scale=depth_scale, + # depth_trunc=depth_trunc, stride=stride) + pcd = o3d.geometry.PointCloud.create_from_depth_image(img, pinhole_camera_intrinsic) + + return pcd + +def visualize(pcd): + + # o3d.visualization.draw_geometries([pcd], zoom=0.3412, + # front=[0.4257, -0.2125, -0.8795], + # lookat=[2.6172, 2.0475, 1.532], + # up=[-0.0694, -0.9768, 0.2024]) + # downpcd = pcd.voxel_down_sample(voxel_size=0.05) + o3d.visualization.draw_geometries([pcd]) + +# def depthmap_to_projection(depth_map, M): # can add step size which does subsampling +# c_x = M[2,2] +# c_y = M[2,2] +# f_x = M[0,0] +# f_y = M[1,1] +# point_cloud = [] +# rows, cols = depth_map.shape +# for u in range(rows): +# for v in range(cols): +# if(depth_map[u,v] == 65535 or depth_map[u,v] == 0) # Not sure about zero +# continue +# z = depth_map[u,v] +# x = (u - c_x) * z / f_x +# y = (v - c_y) * z / f_y +# point_cloud.append((x,y,z)) +# return point_cloud + + diff --git a/depthai_helpers/tiny_yolo_v3_handler.py b/depthai_helpers/tiny_yolo_v3_handler.py index bb4b21134..c4436bad2 100644 --- a/depthai_helpers/tiny_yolo_v3_handler.py +++ b/depthai_helpers/tiny_yolo_v3_handler.py @@ -2,24 +2,19 @@ import cv2 import numpy as np from time import time - -# Adjust these thresholds -detection_threshold = 0.60 -IOU_THRESHOLD = 0.4 +import json +import depthai class YoloParams: # ------------------------------------------- Extracting layer parameters ------------------------------------------ - def __init__(self, side): + def __init__(self, side, mask, coords, classes, anchors): self.num = 3 - self.coords = 4 - self.classes = 3 - self.anchors = [10,14, 23,27, 37,58, 81,82, 135,169, 344,319] + self.coords = coords + self.classes = classes + self.anchors = anchors + + self.num = len(mask) - if side ==26: - mask=[0,1,2] - self.num = len(mask) - else: - mask=[3,4,5] maskedAnchors = [] for idx in mask: maskedAnchors += [self.anchors[idx * 2], self.anchors[idx * 2 + 1]] @@ -117,54 +112,91 @@ def intersection_over_union(box_1, box_2): def decode_tiny_yolo(nnet_packet, **kwargs): - raw_detections = nnet_packet.get_tensor("out") - raw_detections.dtype = np.float16 - NN_outputs = nnet_packet.entries()[0] - output_shapes = [(1, 24, 26, 26), (1, 24, 13, 13)] - - NN_output_list = [] - - prev_offset = 0 - for i in range(len(nnet_packet.entries()[0])): #outblob - n_size = len(NN_outputs[i]) - output = raw_detections[prev_offset:prev_offset+n_size] - output = np.reshape(output, output_shapes[i]) - NN_output_list.append(output) - prev_offset += n_size - - #render_time = 0 - #parsing_time = 0 - - # ----------------------------------------------- 6. Doing inference ----------------------------------------------- - #log.info("Starting inference...") - - objects = list() - resized_image_shape =[416,416] - original_image_shape =[416,416] - threshold = 0.5 - iou_threshold=0.5 - - - start_time = time() - for out_blob in NN_output_list: - - l_params = YoloParams(out_blob.shape[2]) - objects += parse_yolo_region(out_blob, resized_image_shape, - original_image_shape, l_params, - threshold) - parsing_time = time() - start_time + NN_metadata = kwargs['NN_json'] + output_format = NN_metadata['NN_config']['output_format'] + + in_layers = nnet_packet.getInputLayersInfo() + # print(in_layers) + input_width = in_layers[0].get_dimension(depthai.TensorInfo.Dimension.W) + input_height = in_layers[0].get_dimension(depthai.TensorInfo.Dimension.H) + + if output_format == "detection": + detections = nnet_packet.getDetectedObjects() + objects = list() + # detection_nr = detections.size + # for i in range(detection_nr): + # detection =detections[i] + for detection in detections: + confidence = detection.confidence + class_id = detection.label + xmin = int(detection.x_min * input_width) + xmax = int(detection.x_max * input_width) + ymin = int(detection.y_min * input_height) + ymax = int(detection.y_max * input_height) + depth_x = detection.depth_x + depth_y = detection.depth_y + depth_z = detection.depth_z + scaled_object = dict(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, class_id=class_id, confidence=confidence, depth_x=depth_x, depth_y=depth_y, depth_z=depth_z) + objects.append(scaled_object) + + return objects + else: + + output_list = nnet_packet.getOutputsList() + + objects = list() + resized_image_shape =[input_width,input_height] + original_image_shape =[input_width,input_height] + iou_threshold = NN_metadata['NN_config']['NN_specific_metadata']['iou_threshold'] + + + start_time = time() + for out_blob in output_list: + side = out_blob.shape[2] + side_str = 'side' + str(side) + mask = NN_metadata['NN_config']['NN_specific_metadata']['anchor_masks'][side_str] + coords = NN_metadata['NN_config']['NN_specific_metadata']['coordinates'] + classes = NN_metadata['NN_config']['NN_specific_metadata']['classes'] + anchors = NN_metadata['NN_config']['NN_specific_metadata']['anchors'] + detection_threshold = NN_metadata['NN_config']['NN_specific_metadata']['confidence_threshold'] + + l_params = YoloParams(side, mask, coords, classes, anchors) + objects += parse_yolo_region(out_blob, resized_image_shape, + original_image_shape, l_params, + detection_threshold) + parsing_time = time() - start_time # Filtering overlapping boxes with respect to the --iou_threshold CLI parameter - objects = sorted(objects, key=lambda obj : obj['confidence'], reverse=True) - for i in range(len(objects)): - if objects[i]['confidence'] == 0: - continue - for j in range(i + 1, len(objects)): - if intersection_over_union(objects[i], objects[j]) > iou_threshold: - objects[j]['confidence'] = 0 - - filtered_objects=objects - return filtered_objects + objects = sorted(objects, key=lambda obj : obj['confidence'], reverse=True) + for i in range(len(objects)): + if objects[i]['confidence'] == 0: + continue + for j in range(i + 1, len(objects)): + if intersection_over_union(objects[i], objects[j]) > iou_threshold: + objects[j]['confidence'] = 0 + + objects = [obj for obj in objects if obj['confidence'] >= detection_threshold] + + return objects + +def decode_tiny_yolo_json(nnet_packet, **kwargs): + convertList = [] + + filtered_objects = decode_tiny_yolo(nnet_packet, **kwargs) + for entry in filtered_objects: + jsonConvertDict = {} + jsonConvertDict["xmin"] = entry["xmin"] + jsonConvertDict["ymin"] = entry["ymin"] + jsonConvertDict["xmax"] = entry["xmax"] + jsonConvertDict["ymax"] = entry["ymax"] + if type(entry["confidence"]) is np.float16: + jsonConvertDict["confidence"] = entry["confidence"].item() + else: + jsonConvertDict["confidence"] = entry["confidence"] + jsonConvertDict["class_id"] = entry["class_id"] + convertList.append(jsonConvertDict) + + return json.dumps(convertList) BOX_COLOR = (0,255,0) LABEL_BG_COLOR = (70, 120, 70) # greyish green background for text @@ -172,21 +204,30 @@ def decode_tiny_yolo(nnet_packet, **kwargs): TEXT_FONT = cv2.FONT_HERSHEY_SIMPLEX def show_tiny_yolo(filtered_objects, frame, **kwargs): - labels = kwargs['labels'] - filtered_objects = [obj for obj in filtered_objects if obj['confidence'] >= detection_threshold] - for object_index in range(len(filtered_objects)): + NN_metadata = kwargs['NN_json'] + labels = NN_metadata['mappings']['labels'] + config = kwargs['config'] + + for detection in filtered_objects: # get all values from the filtered object list - xmin = filtered_objects[object_index]['xmin'] - ymin = filtered_objects[object_index]['ymin'] - xmax = filtered_objects[object_index]['xmax'] - ymax = filtered_objects[object_index]['ymax'] - confidence = filtered_objects[object_index]['confidence'] - class_id = filtered_objects[object_index]['class_id'] + xmin = detection['xmin'] + ymin = detection['ymin'] + xmax = detection['xmax'] + ymax = detection['ymax'] + confidence = detection['confidence'] + class_id = detection['class_id'] - # Set up the text for display + # Set up the text for display cv2.rectangle(frame,(xmin, ymin), (xmax, ymin+20), LABEL_BG_COLOR, -1) cv2.putText(frame, labels[class_id] + ': %.2f' % confidence, (xmin+5, ymin+15), TEXT_FONT, 0.5, TEXT_COLOR, 1) - # Set up the bounding box + # Set up the bounding box cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), BOX_COLOR, 1) - return frame \ No newline at end of file + if config['ai']['calc_dist_to_bb']: + depth_x = detection['depth_x'] + depth_y = detection['depth_y'] + depth_z = detection['depth_z'] + cv2.putText(frame, 'x:' '{:7.3f}'.format(depth_x) + ' m', (xmin, ymin+60), TEXT_FONT, 0.5, TEXT_COLOR) + cv2.putText(frame, 'y:' '{:7.3f}'.format(depth_y) + ' m', (xmin, ymin+80), TEXT_FONT, 0.5, TEXT_COLOR) + cv2.putText(frame, 'z:' '{:7.3f}'.format(depth_z) + ' m', (xmin, ymin+100), TEXT_FONT, 0.5, TEXT_COLOR) + return frame diff --git a/depthai_helpers/version_check.py b/depthai_helpers/version_check.py new file mode 100644 index 000000000..c43cab664 --- /dev/null +++ b/depthai_helpers/version_check.py @@ -0,0 +1,31 @@ +import depthai +from pathlib import Path + + +def get_version_from_requirements(): + with Path('requirements.txt').open() as f: + datafile = f.readlines() + for line in datafile: + if 'depthai' in line: + #not commented out + if not line.startswith('#'): + try: + version = line.split('==')[1] + #remove any whitespace + version = version.strip() + except: + version = None + return version + return None + +def check_depthai_version(): + version_required = get_version_from_requirements() + if version_required is not None: + print('Depthai version required: ', version_required) + if depthai.__version__.endswith('+dev'): + print('Depthai development version found, skipping check.') + elif version_required != depthai.__version__: + raise ValueError(f"\033[1;5;31mVersion mismatch \033[0m\033[91m between installed depthai lib and the required one by the script.\033[0m \n\ + Required: {version_required}\n\ + Installed: {depthai.__version__}\n\ + \033[91mRun: ./install_requirements.sh \033[0m") \ No newline at end of file diff --git a/depthai_profiler.py b/depthai_profiler.py new file mode 100755 index 000000000..f76f87d8a --- /dev/null +++ b/depthai_profiler.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 + +#depthai function profiler +import subprocess +import sys +import numpy as np + +#this is a debugging tool, that's why it's not added to requirements.txt +try: + import snakeviz +except ImportError: + raise ImportError('\033[1;5;31m snakeviz module not found, run: \033[0m python3 -m pip install snakeviz ') + +if __name__ == "__main__": + output_profile_file = 'depthai.prof' + cmd = ["python3", "-m", "cProfile", "-o", output_profile_file, "-s", "tottime", "depthai_demo.py"] + cmd = np.concatenate((cmd, sys.argv[1:])) + print(cmd) + + subprocess.run(cmd) + subprocess.run(["snakeviz", output_profile_file]) diff --git a/depthai_supervisor.py b/depthai_supervisor.py index 275097c9d..fe8a3dea9 100755 --- a/depthai_supervisor.py +++ b/depthai_supervisor.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 -#process watchdog, used to recover depthai.py -#on any userspace error in depthai.py (segfault for example) +#process watchdog, used to recover depthai-demo.py +#on any userspace error in depthai-demo.py (segfault for example) import os import signal import subprocess @@ -36,7 +36,7 @@ def cleanup(): while(run): - p = subprocess.Popen(cmd, shell=True, preexec_fn=os.setsid) + p = subprocess.Popen(cmd, shell=True) p.wait() return_code = p.returncode p=None diff --git a/install_requirements.sh b/install_requirements.sh new file mode 100755 index 000000000..bc826f024 --- /dev/null +++ b/install_requirements.sh @@ -0,0 +1,4 @@ +python3 -m pip install pip -U +# temporary workaroud for issue between main and develop +python3 -m pip uninstall depthai --yes +python3 -m pip install -r requirements.txt diff --git a/integration_test.py b/integration_test.py index 63e48bc69..8e1f44483 100755 --- a/integration_test.py +++ b/integration_test.py @@ -73,7 +73,7 @@ def cleanup(): "metaout" : 5 * 1024, "left" : 1280*720*1*30, "right" : 1280*720*1*30, - "depth_raw" : 1280*720*2*30, + "depth" : 1280*720*2*30, } streams = [ @@ -81,7 +81,7 @@ def cleanup(): "metaout", "left", "right", - "depth_raw"] + "depth"] diff --git a/model_compiler/download_and_compile.sh b/model_compiler/download_and_compile.sh deleted file mode 100755 index 3ee13eec1..000000000 --- a/model_compiler/download_and_compile.sh +++ /dev/null @@ -1,114 +0,0 @@ -#!/bin/bash - -#example usage: -#cloud compile: -#./download_and_compile.sh mobilenet-ssd 4 4 1 CLOUD_COMPILE -#local compile -#./download_and_compile.sh mobilenet-ssd 4 4 1 - - -if [ "$#" -lt 4 ]; then - echo "Invalid number of arguments." - exit 1 -fi - -cd "$(dirname "$0")" - -echo_and_run() { echo -e "\$ $* \n" ; "$@" ; } - -MODEL_NAME=$1 -SHAVE_NR=$2 -CMX_NR=$3 -NCE_NR=$4 - -if [ "$NCE_NR" = "0" ] -then -VPU_MYRIAD_PLATFORM="VPU_MYRIAD_2450" -else -VPU_MYRIAD_PLATFORM="VPU_MYRIAD_2480" -fi - -if [ "$NCE_NR" = "2" ]; then -CMX_NR_OPT=$(($CMX_NR/2)) -SHAVE_NR_OPT=$(($SHAVE_NR/2)) -else -CMX_NR_OPT=$CMX_NR -SHAVE_NR_OPT=$SHAVE_NR -fi - - -if [ "$5" = "CLOUD_COMPILE" ] -then -CLOUD_COMPILE="yes" -fi - -CUR_DIR=$PWD -NN_PATH=`realpath ../resources/nn` - -if [ "$CLOUD_COMPILE" = "yes" ] -then -CLOUD_COMPILE_SCRIPT=$PWD/model_converter.py -else - -OPENVINO_VERSION="2020.1.023" - -DOWNLOADS_DIR=`realpath downloads` -OPENVINO_PATH=`realpath /opt/intel/openvino_$OPENVINO_VERSION` - -if [ ! -d "$OPENVINO_PATH" ]; then -echo "OPENVINO_PATH doesn't exist! Openvino $OPENVINO_VERSION is not installed?" -exit 2 -fi - -if [ ! -d "$NN_PATH" ]; then -echo "NN_PATH doesn't exist" -exit 3 -fi - -source $OPENVINO_PATH/bin/setupvars.sh - -MODEL_DOWNLOADER_OPTIONS="--precisions FP16 --output_dir $DOWNLOADS_DIR --cache_dir $DOWNLOADS_DIR --num_attempts 5 --name $MODEL_NAME" -MODEL_DOWNLOADER_PATH="$OPENVINO_PATH/deployment_tools/tools/model_downloader/downloader.py" - - -MYRIAD_COMPILE_OPTIONS="-ip U8 -VPU_MYRIAD_PLATFORM $VPU_MYRIAD_PLATFORM -VPU_NUMBER_OF_SHAVES $SHAVE_NR_OPT -VPU_NUMBER_OF_CMX_SLICES $CMX_NR_OPT" -MYRIAD_COMPILE_PATH="$OPENVINO_PATH/deployment_tools/inference_engine/lib/intel64/myriad_compile" - -echo_and_run python3 $MODEL_DOWNLOADER_PATH $MODEL_DOWNLOADER_OPTIONS - -if [ -d "$DOWNLOADS_DIR/public" ]; then -cd $DOWNLOADS_DIR/public -if [ -d "$MODEL_NAME" ]; then - # $MODEL_NAME is a directory - mkdir -p $DOWNLOADS_DIR/intel/$MODEL_NAME/FP16/ - echo_and_run $OPENVINO_PATH/deployment_tools/model_optimizer/mo.py --input_model $DOWNLOADS_DIR/public/$MODEL_NAME/$MODEL_NAME.caffemodel --input_proto $DOWNLOADS_DIR/public/$MODEL_NAME/$MODEL_NAME.prototxt --data_type=FP16 --mean_values [127.5,127.5,127.5] --scale_values [255,255,255] -o $DOWNLOADS_DIR/intel/$MODEL_NAME/FP16/ -fi -fi - -fi - -BLOB_SUFFIX='.sh'$SHAVE_NR'cmx'$CMX_NR'NCE'$NCE_NR - -BLOB_OUT=$NN_PATH/$MODEL_NAME/$MODEL_NAME.blob$BLOB_SUFFIX - -if [ -f "$BLOB_OUT" ]; then - echo "$MODEL_NAME.blob$BLOB_SUFFIX already compiled." - exit 0 -fi - -cd $NN_PATH -if [ -d "$MODEL_NAME" ]; then - if [ "$CLOUD_COMPILE" = "yes" ] - then - echo_and_run python3 $CLOUD_COMPILE_SCRIPT -i $MODEL_NAME -o $BLOB_OUT --shaves $SHAVE_NR_OPT --cmx_slices $CMX_NR_OPT --NCEs $NCE_NR - else - echo_and_run $MYRIAD_COMPILE_PATH $MYRIAD_COMPILE_OPTIONS -m $DOWNLOADS_DIR/intel/$MODEL_NAME/FP16/$MODEL_NAME.xml -o $BLOB_OUT - fi - if [ $? != 0 ]; then - rm -f $BLOB_OUT - exit 4 - fi -fi - - -cd $CUR_DIR diff --git a/model_compiler/download_and_compile_all.sh b/model_compiler/download_and_compile_all.sh deleted file mode 100755 index af7566511..000000000 --- a/model_compiler/download_and_compile_all.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/bin/bash -#example usage: -#cloud compile: -#./download_and_compile_all.sh 4 4 1 CLOUD_COMPILE -#local compile -#./download_and_compile_all.sh 4 4 1 - -RED='\033[0;31m' -NC='\033[0m' # No Color - -SCRIPT_DIR="$(dirname "$0")" -cd $SCRIPT_DIR -SCRIPT_DIR=`realpath .` - -echo_and_run() { echo -e "\$ $* \n" ; "$@" ; } - -SHAVE_NR=$1 -CMX_NR=$2 -NCE_NR=$3 - -if [ "$NCE_NR" = "2" ]; then -cmx_odd=$(($CMX_NR%2)) -shave_odd=$(($SHAVE_NR%2)) -if [ "$cmx_odd" = "1" ]; then - printf "${RED}CMX_NR config must be even number when NCE is 2!${NC}\n" - exit 1 -fi - -if [ "$shave_odd" = "1" ]; then - printf "${RED}SHAVE_NR config must be even number when NCE is 2!${NC}\n" - exit 1 -fi - -fi - - -if [ "$4" = "CLOUD_COMPILE" ] -then -CLOUD_COMPILE="CLOUD_COMPILE" -else -CLOUD_COMPILE="" -fi - -NN_PATH=`realpath ../resources/nn` - -cd $NN_PATH -for f in *; do - if [ -z "$f" ]; then - continue - fi - if [ -d "$f" ]; then - echo_and_run $SCRIPT_DIR/download_and_compile.sh $f $SHAVE_NR $CMX_NR $NCE_NR $CLOUD_COMPILE - if [ $? != 0 ]; then - printf "${RED}Model compile failed with error: $? ${NC}\n" - fi - fi -done diff --git a/model_compiler/downloader/README.md b/model_compiler/downloader/README.md new file mode 100644 index 000000000..2d36d823f --- /dev/null +++ b/model_compiler/downloader/README.md @@ -0,0 +1,458 @@ +Model Downloader and other automation tools +=========================================== + +This directory contains scripts that automate certain model-related tasks +based on configuration files in the models' directories. + +* `downloader.py` (model downloader) downloads model files from online sources + and, if necessary, patches them to make them more usable with Model + Optimizer; + +* `converter.py` (model converter) converts the models that are not in the + Inference Engine IR format into that format using Model Optimizer. + +* `info_dumper.py` (model information dumper) prints information about the models + in a stable machine-readable format. + +Please use these tools instead of attempting to parse the configuration files +directly. Their format is undocumented and may change in incompatible ways in +future releases. + +Prerequisites +------------- + +1. Install Python (version 3.5.2 or higher) +2. Install the tools' dependencies with the following command: + +```sh +python3 -mpip install --user -r ./requirements.in +``` + +For the model converter, you will also need to install the OpenVINO™ +toolkit and the prerequisite libraries for Model Optimizer. See the +[OpenVINO toolkit documentation](https://docs.openvinotoolkit.org/) for details. + +If you using models from PyTorch or Caffe2 framework, you will also need to use intermediate +conversion to ONNX format. To use automatic conversion install additional dependencies. + +For models from PyTorch: +```sh +python3 -mpip install --user -r ./requirements-pytorch.in +``` +For models from Caffe2: +```sh +python3 -mpip install --user -r ./requirements-caffe2.in +``` + +When running the model downloader with Python 3.5.x on macOS, you may encounter +an error similar to the following: + +> requests.exceptions.SSLError: [...] (Caused by SSLError(SSLError(1, '[SSL: TLSV1_ALERT_PROTOCOL_VERSION] +tlsv1 alert protocol version (\_ssl.c:719)'),)) + +You can work around this by installing additional packages: + +```sh +python3 -mpip install --user 'requests[security]' +``` + +Alternatively, upgrade to Python 3.6 or a later version. + +Model downloader usage +---------------------- + +The basic usage is to run the script like this: + +```sh +./downloader.py --all +``` + +This will download all models into a directory tree rooted in the current +directory. To download into a different directory, use the `-o`/`--output_dir` +option: + +```sh +./downloader.py --all --output_dir my/download/directory +``` + +The `--all` option can be replaced with other filter options to download only +a subset of models. See the "Shared options" section. + +You may use `--precisions` flag to specify comma separated precisions of weights +to be downloaded. + +```sh +./downloader.py --name face-detection-retail-0004 --precisions FP16,INT8 +``` + +By default, the script will attempt to download each file only once. You can use +the `--num_attempts` option to change that and increase the robustness of the +download process: + +```sh +./downloader.py --all --num_attempts 5 # attempt each download five times +``` + +You can use the `--cache_dir` option to make the script use the specified directory +as a cache. The script will place a copy of each downloaded file in the cache, or, +if it is already there, retrieve it from the cache instead of downloading it again. + +```sh +./downloader.py --all --cache_dir my/cache/directory +``` + +The cache format is intended to remain compatible in future Open Model Zoo +versions, so you can use a cache to avoid redownloading most files when updating +Open Model Zoo. + +By default, the script outputs progress information as unstructured, human-readable +text. If you want to consume progress information programmatically, use the +`--progress_format` option: + +```sh +./downloader.py --all --progress_format=json +``` + +When this option is set to `json`, the script's standard output is replaced by +a machine-readable progress report, whose format is documented in the +"JSON progress report format" section. This option does not affect errors and +warnings, which will still be printed to the standard error stream in a +human-readable format. + +You can also set this option to `text` to explicitly request the default text +format. + +See the "Shared options" section for information on other options accepted by +the script. + +### JSON progress report format + +This section documents the format of the progress report produced by the script +when the `--progress_format=json` option is specified. + +The report consists of a sequence of events, where each event is represented +by a line containing a JSON-encoded object. Each event has a member with the +name `$type` whose value determines the type of the event, as well as which +additional members it contains. + +The following event types are currently defined: + +* `model_download_begin` + + Additional members: `model` (string), `num_files` (integer). + + The script started downloading the model named by `model`. + `num_files` is the number of files that will be downloaded for this model. + + This event will always be followed by a corresponding `model_download_end` event. + +* `model_download_end` + + Additional members: `model` (string), `successful` (boolean). + + The script stopped downloading the model named by `model`. + `successful` is true if every file was downloaded successfully. + +* `model_file_download_begin` + + Additional members: `model` (string), `model_file` (string), `size` (integer). + + The script started downloading the file named by `model_file` of the model named + by `model`. `size` is the size of the file in bytes. + + This event will always occur between `model_download_begin` and `model_download_end` + events for the model, and will always be followed by a corresponding + `model_file_download_end` event. + +* `model_file_download_end` + + Additional members: `model` (string), `model_file` (string), `successful` (boolean). + + The script stopped downloading the file named by `model_file` of the model named + by `model`. `successful` is true if the file was downloaded successfully. + +* `model_file_download_progress` + + Additional members: `model` (string), `model_file` (string), `size` (integer). + + The script downloaded `size` bytes of the file named by `model_file` of + the model named by `model` so far. Note that `size` can decrease in a subsequent + event if the download is interrupted and retried. + + This event will always occur between `model_file_download_begin` and + `model_file_download_end` events for the file. + +* `model_postprocessing_begin` + + Additional members: `model`. + + The script started post-download processing on the model named by `model`. + + This event will always be followed by a corresponding `model_postprocessing_end` event. + +* `model_postprocessing_end` + + Additional members: `model`. + + The script stopped post-download processing on the model named by `model`. + +Additional event types and members may be added in the future. + +Tools parsing the machine-readable format should avoid relying on undocumented details. +In particular: + +* Tools should not assume that any given event will occur for a given model/file + (unless specified otherwise above) or will only occur once. + +* Tools should not assume that events will occur in a certain order beyond + the ordering constraints specified above. Note that future versions of the script + may interleave the downloading of different files or models. + +Model converter usage +--------------------- + +The basic usage is to run the script like this: + +```sh +./converter.py --all +``` + +This will convert all models into the Inference Engine IR format. Models that +were originally in that format are ignored. Models in PyTorch and Caffe2 formats will be +converted in ONNX format first. + +The current directory must be the root of a download tree created by the model +downloader. To specify a different download tree path, use the `-d`/`--download_dir` +option: + +```sh +./converter.py --all --download_dir my/download/directory +``` + +By default, the converted models are placed into the download tree. To place them +into a different directory tree, use the `-o`/`--output_dir` option: + +```sh +./converter.py --all --output_dir my/output/directory +``` +>Note: models in intermediate format are placed to this directory too. + +The `--all` option can be replaced with other filter options to convert only +a subset of models. See the "Shared options" section. + +By default, the script will produce models in every precision that is supported +for conversion. To only produce models in a specific precision, use the `--precisions` +option: + +```sh +./converter.py --all --precisions=FP16 +``` + +If the specified precision is not supported for a model, that model will be skipped. + +The script will attempt to locate Model Optimizer using the environment +variables set by the OpenVINO™ toolkit's `setupvars.sh`/`setupvars.bat` +script. You can override this heuristic with the `--mo` option: + +```sh +./converter.py --all --mo my/openvino/path/model_optimizer/mo.py +``` + +You can add extra Model Optimizer arguments to the ones specified in the model +configuration by using the `--add-mo-arg` option. The option can be repeated +to add multiple arguments: + +```sh +./converter.py --name=caffenet --add-mo-arg=--reverse_input_channels --add-mo-arg=--silent +``` + +By default, the script will run Model Optimizer using the same Python executable +that was used to run the script itself. To use a different Python executable, +use the `-p`/`--python` option: + +```sh +./converter.py --all --python my/python +``` + +The script can run multiple conversion commands concurrently. To enable this, +use the `-j`/`--jobs` option: + +```sh +./converter.py --all -j8 # run up to 8 commands at a time +``` + +The argument to the option must be either a maximum number of concurrently +executed commands, or "auto", in which case the number of CPUs in the system is used. +By default, all commands are run sequentially. + +The script can print the conversion commands without actually running them. +To do this, use the `--dry-run` option: + +```sh +./converter.py --all --dry-run +``` + +See the "Shared options" section for information on other options accepted by +the script. + +Model information dumper usage +------------------------------ + +The basic usage is to run the script like this: + +```sh +./info_dumper.py --all +``` + +This will print to standard output information about all models. + +The only options accepted by the script are those described in the "Shared options" +section. + +The script's output is a JSON array, each element of which is a JSON object +describing a single model. Each such object has the following keys: + +* `name`: the identifier of the model, as accepted by the `--name` option. + +* `description`: text describing the model. Paragraphs are separated by line feed characters. + +* `framework`: a string identifying the framework whose format the model is downloaded in. + Current possible values are `dldt` (Inference Engine IR), `caffe`, `caffe2`, `mxnet`, `onnx`, + `pytorch` and `tf` (TensorFlow). Additional possible values might be added in the future. + +* `license_url`: an URL for the license that the model is distributed under. + +* `precisions`: the list of precisions that the model has IR files for. For models downloaded + in a format other than the Inference Engine IR format, these are the precisions that the model + converter can produce IR files in. Current possible values are: + + * `FP16` + * `FP16-INT1` + * `FP16-INT8` + * `FP32` + * `FP32-INT1` + * `FP32-INT8` + * `INT1` + * `INT8` + + Additional possible values might be added in the future. + +* `subdirectory`: the subdirectory of the output tree into which the downloaded or converted files + will be placed by the downloader or the converter, respectively. + +* `task_type`: a string identifying the type of task that the model performs. Current possible values + are: + + * `action_recognition` + * `classification` + * `detection` + * `face_recognition` + * `feature_extraction` + * `head_pose_estimation` + * `human_pose_estimation` + * `image_processing` + * `instance_segmentation` + * `object_attributes` + * `optical_character_recognition` + * `semantic_segmentation` + + Additional possible values might be added in the future. + +Shared options +-------------- + +The are certain options that both tools accept. + +`-h`/`--help` can be used to print a help message: + +```sh +./TOOL.py --help +``` + +There are several mutually exclusive filter options that select the models the +tool will process: + +* `--all` selects all models. + + ```sh + ./TOOL.py --all + ``` + +* `--name` takes a comma-separated list of patterns and selects models that match + at least one of these patterns. The patterns may contain shell-style wildcards. + + ```sh + ./TOOL.py --name 'mtcnn-p,densenet-*' + ``` + + See https://docs.python.org/3/library/fnmatch.html for a full description of + the pattern syntax. + +* `--list` takes a path to a file that must contain a list of patterns and + selects models that match at least one of those patterns. + + ```sh + ./TOOL.py --list my.lst + ``` + + The file must contain one pattern per line. The pattern syntax is the same + as for the `--name` option. Blank lines and comments starting with `#` are + ignored. For example: + + ``` + mtcnn-p + densenet-* # get all DenseNet variants + ``` + +To see the available models, you can use the `--print_all` option. When this +option is specified, the tool will print all model names defined in the +configuration file and exit: + +``` +$ ./TOOL.py --print_all +action-recognition-0001-decoder +action-recognition-0001-encoder +age-gender-recognition-retail-0013 +driver-action-recognition-adas-0002-decoder +driver-action-recognition-adas-0002-encoder +emotions-recognition-retail-0003 +face-detection-adas-0001 +face-detection-adas-binary-0001 +face-detection-retail-0004 +face-detection-retail-0005 +[...] +``` + +Either `--print_all` or one of the filter options must be specified. + +Deprecated options +------------------ + +In earlier releases, the tools used a single configuration file instead of +per-model configuration files. For compatibility, loading such a file is still +supported. However, this feature is deprecated and will be removed in a future release. + +To load a configuration file in the old format, use the `-c`/`--config` option: + +```sh +./TOOL.py --all --config my-config.yml +``` +__________ + +OpenVINO is a trademark of Intel Corporation or its subsidiaries in the U.S. +and/or other countries. + + +Copyright © 2018-2019 Intel Corporation + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/model_compiler/downloader/common.py b/model_compiler/downloader/common.py new file mode 100644 index 000000000..507ca35d1 --- /dev/null +++ b/model_compiler/downloader/common.py @@ -0,0 +1,503 @@ +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections +import contextlib +import fnmatch +import json +import re +import shlex +import shutil +import sys +import traceback + +from pathlib import Path + +import yaml + +DOWNLOAD_TIMEOUT = 5 * 60 + +# make sure to update the documentation if you modify these +KNOWN_FRAMEWORKS = { + 'caffe': None, + 'caffe2': 'caffe2_to_onnx.py', + 'dldt': None, + 'mxnet': None, + 'onnx': None, + 'pytorch': 'pytorch_to_onnx.py', + 'tf': None, +} +KNOWN_PRECISIONS = { + 'FP16', 'FP16-INT1', 'FP16-INT8', + 'FP32', 'FP32-INT1', 'FP32-INT8', + 'INT1', 'INT8', +} +KNOWN_TASK_TYPES = { + 'action_recognition', + 'classification', + 'detection', + 'face_recognition', + 'feature_extraction', + 'head_pose_estimation', + 'human_pose_estimation', + 'image_processing', + 'instance_segmentation', + 'object_attributes', + 'optical_character_recognition', + 'semantic_segmentation', +} + +RE_MODEL_NAME = re.compile(r'[0-9a-zA-Z._-]+') +RE_SHA256SUM = re.compile(r'[0-9a-fA-F]{64}') + +class Reporter: + GROUP_DECORATION = '#' * 16 + '||' + SECTION_DECORATION = '=' * 10 + ERROR_DECORATION = '#' * 10 + + def __init__(self, enable_human_output=True, enable_json_output=False, event_context={}): + self.enable_human_output = enable_human_output + self.enable_json_output = enable_json_output + self.event_context = event_context + + def print_group_heading(self, text): + if not self.enable_human_output: return + print(self.GROUP_DECORATION, text, self.GROUP_DECORATION[::-1]) + print() + + def print_section_heading(self, format, *args): + if not self.enable_human_output: return + print(self.SECTION_DECORATION, format.format(*args), flush=True) + + def print_progress(self, format, *args): + if not self.enable_human_output: return + print(format.format(*args), end='\r' if sys.stdout.isatty() else '\n', flush=True) + + def end_progress(self): + if not self.enable_human_output: return + if sys.stdout.isatty(): + print() + + def print(self, format='', *args, flush=False): + if not self.enable_human_output: return + print(format.format(*args), flush=flush) + + def log_warning(self, format, *args, exc_info=False): + if exc_info: + traceback.print_exc(file=sys.stderr) + print(self.ERROR_DECORATION, "Warning:", format.format(*args), file=sys.stderr) + + def log_error(self, format, *args, exc_info=False): + if exc_info: + traceback.print_exc(file=sys.stderr) + print(self.ERROR_DECORATION, "Error:", format.format(*args), file=sys.stderr) + + def log_details(self, format, *args): + print(self.ERROR_DECORATION, ' ', format.format(*args), file=sys.stderr) + + def emit_event(self, type, **kwargs): + if not self.enable_json_output: return + json.dump({'$type': type, **self.event_context, **kwargs}, sys.stdout, indent=None) + print() + + def with_event_context(self, **kwargs): + return Reporter( + enable_human_output=self.enable_human_output, + enable_json_output=self.enable_json_output, + event_context={**self.event_context, **kwargs}, + ) + +class DeserializationError(Exception): + def __init__(self, problem, contexts=()): + super().__init__(': '.join(contexts + (problem,))) + self.problem = problem + self.contexts = contexts + +@contextlib.contextmanager +def deserialization_context(context): + try: + yield None + except DeserializationError as exc: + raise DeserializationError(exc.problem, (context,) + exc.contexts) from exc + +def validate_string(context, value): + if not isinstance(value, str): + raise DeserializationError('{}: expected a string, got {!r}'.format(context, value)) + return value + +def validate_string_enum(context, value, known_values): + str_value = validate_string(context, value) + if str_value not in known_values: + raise DeserializationError('{}: expected one of {!r}, got {!r}'.format(context, known_values, value)) + return str_value + +def validate_relative_path(context, value): + path = Path(validate_string(context, value)) + + if path.anchor or '..' in path.parts: + raise DeserializationError('{}: disallowed absolute path or parent traversal'.format(context)) + + return path + +def validate_nonnegative_int(context, value): + if not isinstance(value, int) or value < 0: + raise DeserializationError( + '{}: expected a non-negative integer, got {!r}'.format(context, value)) + return value + +class TaggedBase: + @classmethod + def deserialize(cls, value): + try: + return cls.types[value['$type']].deserialize(value) + except KeyError: + raise DeserializationError('Unknown "$type": "{}"'.format(value['$type'])) + +class FileSource(TaggedBase): + types = {} + + @classmethod + def deserialize(cls, source): + if isinstance(source, str): + source = {'$type': 'http', 'url': source} + return super().deserialize(source) + +class FileSourceHttp(FileSource): + def __init__(self, url): + self.url = url + + @classmethod + def deserialize(cls, source): + return cls(validate_string('"url"', source['url'])) + + def start_download(self, session, chunk_size): + response = session.get(self.url, stream=True, timeout=DOWNLOAD_TIMEOUT) + response.raise_for_status() + + return response.iter_content(chunk_size=chunk_size) + +FileSource.types['http'] = FileSourceHttp + +class FileSourceGoogleDrive(FileSource): + def __init__(self, id): + self.id = id + + @classmethod + def deserialize(cls, source): + return cls(validate_string('"id"', source['id'])) + + def start_download(self, session, chunk_size): + URL = 'https://docs.google.com/uc?export=download' + response = session.get(URL, params={'id' : self.id}, stream=True, timeout=DOWNLOAD_TIMEOUT) + response.raise_for_status() + + for key, value in response.cookies.items(): + if key.startswith('download_warning'): + params = {'id': self.id, 'confirm': value} + response = session.get(URL, params=params, stream=True, timeout=DOWNLOAD_TIMEOUT) + response.raise_for_status() + + return response.iter_content(chunk_size=chunk_size) + +FileSource.types['google_drive'] = FileSourceGoogleDrive + +class ModelFile: + def __init__(self, name, size, sha256, source): + self.name = name + self.size = size + self.sha256 = sha256 + self.source = source + + @classmethod + def deserialize(cls, file): + name = validate_relative_path('"name"', file['name']) + + with deserialization_context('In file "{}"'.format(name)): + size = validate_nonnegative_int('"size"', file['size']) + + sha256 = validate_string('"sha256"', file['sha256']) + + if not RE_SHA256SUM.fullmatch(sha256): + raise DeserializationError( + '"sha256": got invalid hash {!r}'.format(sha256)) + + with deserialization_context('"source"'): + source = FileSource.deserialize(file['source']) + + return cls(name, size, sha256, source) + +class Postproc(TaggedBase): + types = {} + +class PostprocRegexReplace(Postproc): + def __init__(self, file, pattern, replacement, count): + self.file = file + self.pattern = pattern + self.replacement = replacement + self.count = count + + @classmethod + def deserialize(cls, postproc): + return cls( + validate_relative_path('"file"', postproc['file']), + re.compile(validate_string('"pattern"', postproc['pattern'])), + validate_string('"replacement"', postproc['replacement']), + validate_nonnegative_int('"count"', postproc.get('count', 0)), + ) + + def apply(self, reporter, output_dir): + postproc_file = output_dir / self.file + + reporter.print_section_heading('Replacing text in {}', postproc_file) + + postproc_file_text = postproc_file.read_text() + + orig_file = postproc_file.with_name(postproc_file.name + '.orig') + if not orig_file.exists(): + postproc_file.replace(orig_file) + + postproc_file_text, num_replacements = self.pattern.subn( + self.replacement, postproc_file_text, count=self.count) + + if num_replacements == 0: + raise RuntimeError('Invalid pattern: no occurrences found') + + if self.count != 0 and num_replacements != self.count: + raise RuntimeError('Invalid pattern: expected at least {} occurrences, but only {} found'.format( + self.count, num_replacements)) + + postproc_file.write_text(postproc_file_text) + +Postproc.types['regex_replace'] = PostprocRegexReplace + +class PostprocUnpackArchive(Postproc): + def __init__(self, file, format): + self.file = file + self.format = format + + @classmethod + def deserialize(cls, postproc): + return cls( + validate_relative_path('"file"', postproc['file']), + validate_string('"format"', postproc['format']), + ) + + def apply(self, reporter, output_dir): + postproc_file = output_dir / self.file + + reporter.print_section_heading('Unpacking {}', postproc_file) + + shutil.unpack_archive(str(postproc_file), str(output_dir), self.format) + postproc_file.unlink() # Remove the archive + +Postproc.types['unpack_archive'] = PostprocUnpackArchive + +class Model: + def __init__(self, name, subdirectory, files, postprocessing, mo_args, framework, + description, license_url, precisions, task_type, conversion_to_onnx_args): + self.name = name + self.subdirectory = subdirectory + self.files = files + self.postprocessing = postprocessing + self.mo_args = mo_args + self.framework = framework + self.description = description + self.license_url = license_url + self.precisions = precisions + self.task_type = task_type + self.conversion_to_onnx_args = conversion_to_onnx_args + self.converter_to_onnx = KNOWN_FRAMEWORKS[framework] + + @classmethod + def deserialize(cls, model, name, subdirectory): + with deserialization_context('In model "{}"'.format(name)): + if not RE_MODEL_NAME.fullmatch(name): + raise DeserializationError('Invalid name, must consist only of letters, digits or ._-') + + files = [] + file_names = set() + + for file in model['files']: + files.append(ModelFile.deserialize(file)) + + if files[-1].name in file_names: + raise DeserializationError( + 'Duplicate file name "{}"'.format(files[-1].name)) + file_names.add(files[-1].name) + + postprocessing = [] + + for i, postproc in enumerate(model.get('postprocessing', [])): + with deserialization_context('"postprocessing" #{}'.format(i)): + postprocessing.append(Postproc.deserialize(postproc)) + + framework = validate_string_enum('"framework"', model['framework'], KNOWN_FRAMEWORKS.keys()) + + conversion_to_onnx_args = model.get('conversion_to_onnx_args', None) + if KNOWN_FRAMEWORKS[framework]: + if not conversion_to_onnx_args: + raise DeserializationError('"conversion_to_onnx_args" is absent. ' + 'Framework "{}" is supported only by conversion to ONNX.' + .format(framework)) + conversion_to_onnx_args = [validate_string('"conversion_to_onnx_args" #{}'.format(i), arg) + for i, arg in enumerate(model['conversion_to_onnx_args'])] + else: + if conversion_to_onnx_args: + raise DeserializationError('Conversion to ONNX not supported for "{}" framework'.format(framework)) + + if 'model_optimizer_args' in model: + mo_args = [validate_string('"model_optimizer_args" #{}'.format(i), arg) + for i, arg in enumerate(model['model_optimizer_args'])] + + precisions = {'FP16', 'FP32'} + else: + if framework != 'dldt': + raise DeserializationError('Model not in IR format, but no conversions defined') + + mo_args = None + + files_per_precision = {} + + for file in files: + if len(file.name.parts) != 2: + raise DeserializationError('Can\'t derive precision from file name {!r}'.format(file.name)) + p = file.name.parts[0] + if p not in KNOWN_PRECISIONS: + raise DeserializationError( + 'Unknown precision {!r} derived from file name {!r}, expected one of {!r}'.format( + p, file.name, KNOWN_PRECISIONS)) + files_per_precision.setdefault(p, set()).add(file.name.parts[1]) + + for precision, precision_files in files_per_precision.items(): + for ext in ['xml', 'bin']: + if (name + '.' + ext) not in precision_files: + raise DeserializationError('No {} file for precision "{}"'.format(ext.upper(), precision)) + + precisions = set(files_per_precision.keys()) + + description = validate_string('"description"', model['description']) + + license_url = validate_string('"license"', model['license']) + + task_type = validate_string_enum('"task_type"', model['task_type'], KNOWN_TASK_TYPES) + + return cls(name, subdirectory, files, postprocessing, mo_args, framework, + description, license_url, precisions, task_type, conversion_to_onnx_args) + +def load_models(args): + models = [] + model_names = set() + + def add_model(model): + models.append(model) + + if models[-1].name in model_names: + raise DeserializationError( + 'Duplicate model name "{}"'.format(models[-1].name)) + model_names.add(models[-1].name) + + if args.config is None: # per-model configs + if args.model_root is None: + model_root = (Path(__file__).resolve().parent / './models/luxonis').resolve() + else: + model_root = Path(args.model_root) + + for config_path in sorted(model_root.glob('**/model.yml')): + subdirectory = config_path.parent.relative_to(model_root) + + with config_path.open('rb') as config_file, \ + deserialization_context('In config "{}"'.format(config_path)): + + model = yaml.safe_load(config_file) + + for bad_key in ['name', 'subdirectory']: + if bad_key in model: + raise DeserializationError('Unsupported key "{}"'.format(bad_key)) + + add_model(Model.deserialize(model, subdirectory.name, subdirectory)) + + else: # monolithic config + print('########## Warning: the --config option is deprecated and will be removed in a future release', + file=sys.stderr) + with args.config.open('rb') as config_file, \ + deserialization_context('In config "{}"'.format(args.config)): + for i, model in enumerate(yaml.safe_load(config_file)['topologies']): + with deserialization_context('In model #{}'.format(i)): + name = validate_string('"name"', model['name']) + if not name: raise DeserializationError('"name": must not be empty') + + with deserialization_context('In model "{}"'.format(name)): + subdirectory = validate_relative_path('"output"', model['output']) + + add_model(Model.deserialize(model, name, subdirectory)) + + return models + +def load_models_or_die(args): + try: + return load_models(args) + except DeserializationError as e: + indent = ' ' + + for i, context in enumerate(e.contexts): + print(indent * i + context + ':', file=sys.stderr) + print(indent * len(e.contexts) + e.problem, file=sys.stderr) + sys.exit(1) + +# requires the --print_all, --all, --name and --list arguments to be in `args` +def load_models_from_args(parser, args): + if args.print_all: + for model in load_models_or_die(args): + print(model.name) + sys.exit() + + filter_args_count = sum([args.all, args.name is not None, args.list is not None]) + + if filter_args_count > 1: + parser.error('at most one of "--all", "--name" or "--list" can be specified') + + if filter_args_count == 0: + parser.error('one of "--print_all", "--all", "--name" or "--list" must be specified') + + all_models = load_models_or_die(args) + + if args.all: + return all_models + elif args.name is not None or args.list is not None: + if args.name is not None: + patterns = args.name.split(',') + else: + patterns = [] + with args.list.open() as list_file: + for list_line in list_file: + tokens = shlex.split(list_line, comments=True) + if not tokens: continue + + patterns.append(tokens[0]) + # For now, ignore any other tokens in the line. + # We might use them as additional parameters later. + + models = collections.OrderedDict() # deduplicate models while preserving order + + for pattern in patterns: + matching_models = [model for model in all_models + if fnmatch.fnmatchcase(model.name, pattern)] + + if not matching_models: + sys.exit('No matching models: "{}"'.format(pattern)) + + for model in matching_models: + models[model.name] = model + + return list(models.values()) diff --git a/model_compiler/downloader/converter.py b/model_compiler/downloader/converter.py new file mode 100755 index 000000000..fcf3b7526 --- /dev/null +++ b/model_compiler/downloader/converter.py @@ -0,0 +1,249 @@ +#!/usr/bin/env python3 + +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import concurrent.futures +import os +import platform +import queue +import re +import shlex +import string +import subprocess +import sys +import threading + +from pathlib import Path + +import common + +class JobContext: + def printf(self, format, *args, flush=False): + raise NotImplementedError + + def subprocess(self, args): + raise NotImplementedError + + +class DirectOutputContext(JobContext): + def printf(self, format, *args, flush=False): + print(format.format(*args), flush=flush) + + def subprocess(self, args): + return subprocess.run(args).returncode == 0 + + +class QueuedOutputContext(JobContext): + def __init__(self, output_queue): + self._output_queue = output_queue + + def printf(self, format, *args, flush=False): + self._output_queue.put(format.format(*args) + '\n') + + def subprocess(self, args): + with subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + universal_newlines=True) as p: + for line in p.stdout: + self._output_queue.put(line) + return p.wait() == 0 + + +class JobWithQueuedOutput(): + def __init__(self, output_queue, future): + self._output_queue = output_queue + self._future = future + self._future.add_done_callback(lambda future: self._output_queue.put(None)) + + def complete(self): + for fragment in iter(self._output_queue.get, None): + print(fragment, end='', flush=True) # for simplicity, flush every fragment + + return self._future.result() + + def cancel(self): + self._future.cancel() + + +def quote_windows(arg): + if not arg: return '""' + if not re.search(r'\s|"', arg): return arg + # On Windows, only backslashes that precede a quote or the end of the argument must be escaped. + return '"' + re.sub(r'(\\+)$', r'\1\1', re.sub(r'(\\*)"', r'\1\1\\"', arg)) + '"' + +if platform.system() == 'Windows': + quote_arg = quote_windows +else: + quote_arg = shlex.quote + +def convert_to_onnx(context, model, output_dir, args): + context.printf('========= {}Converting {} to ONNX', + '(DRY RUN) ' if args.dry_run else '', model.name) + + conversion_to_onnx_args = [string.Template(arg).substitute(conv_dir=output_dir / model.subdirectory, + dl_dir=args.download_dir / model.subdirectory) + for arg in model.conversion_to_onnx_args] + cmd = [str(args.python), str(Path(__file__).absolute().parent / model.converter_to_onnx), *conversion_to_onnx_args] + + context.printf('Conversion to ONNX command: {}', ' '.join(map(quote_arg, cmd))) + context.printf('') + + success = True if args.dry_run else context.subprocess(cmd) + context.printf('') + + return success + +def num_jobs_arg(value_str): + if value_str == 'auto': + return os.cpu_count() or 1 + + try: + value = int(value_str) + if value > 0: return value + except ValueError: + pass + + raise argparse.ArgumentTypeError('must be a positive integer or "auto" (got {!r})'.format(value_str)) + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('-c', '--config', type=Path, metavar='CONFIG.YML', + help='model configuration file (deprecated)') + parser.add_argument('-d', '--download_dir', type=Path, metavar='DIR', + default=Path.cwd(), help='root of the directory tree with downloaded model files') + parser.add_argument('-o', '--output_dir', type=Path, metavar='DIR', + help='root of the directory tree to place converted files into') + parser.add_argument('--name', metavar='PAT[,PAT...]', + help='convert only models whose names match at least one of the specified patterns') + parser.add_argument('--list', type=Path, metavar='FILE.LST', + help='convert only models whose names match at least one of the patterns in the specified file') + parser.add_argument('--all', action='store_true', help='convert all available models') + parser.add_argument('--print_all', action='store_true', help='print all available models') + parser.add_argument('--precisions', metavar='PREC[,PREC...]', + help='run only conversions that produce models with the specified precisions') + parser.add_argument('-p', '--python', type=Path, metavar='PYTHON', default=sys.executable, + help='Python executable to run Model Optimizer with') + parser.add_argument('--mo', type=Path, metavar='MO.PY', + help='Model Optimizer entry point script') + parser.add_argument('--add-mo-arg', dest='extra_mo_args', metavar='ARG', action='append', + help='Extra argument to pass to Model Optimizer') + parser.add_argument('--dry-run', action='store_true', + help='Print the conversion commands without running them') + parser.add_argument('-j', '--jobs', type=num_jobs_arg, default=1, + help='number of conversions to run concurrently') + + parser.add_argument('--model_root', type=str, default=None, + help='path to models folder') + + args = parser.parse_args() + + mo_path = args.mo + if mo_path is None: + try: + mo_path = Path(os.environ['INTEL_OPENVINO_DIR']) / 'deployment_tools/model_optimizer/mo.py' + except KeyError: + sys.exit('Unable to locate Model Optimizer. ' + + 'Use --mo or run setupvars.sh/setupvars.bat from the OpenVINO toolkit.') + + extra_mo_args = args.extra_mo_args or [] + + if args.precisions is None: + requested_precisions = common.KNOWN_PRECISIONS + else: + requested_precisions = set(args.precisions.split(',')) + unknown_precisions = requested_precisions - common.KNOWN_PRECISIONS + if unknown_precisions: + sys.exit('Unknown precisions specified: {}.'.format(', '.join(sorted(unknown_precisions)))) + + models = common.load_models_from_args(parser, args) + + output_dir = args.download_dir if args.output_dir is None else args.output_dir + + def convert(context, model): + if model.mo_args is None: + context.printf('========= Skipping {} (no conversions defined)', model.name) + context.printf('') + return True + + model_precisions = requested_precisions & model.precisions + if not model_precisions: + context.printf('========= Skipping {} (all conversions skipped)', model.name) + context.printf('') + return True + + model_format = model.framework + + if model.conversion_to_onnx_args: + if not convert_to_onnx(context, model, output_dir, args): + return False + model_format = 'onnx' + + expanded_mo_args = [ + string.Template(arg).substitute(dl_dir=args.download_dir / model.subdirectory, + mo_dir=mo_path.parent, + conv_dir=output_dir / model.subdirectory) + for arg in model.mo_args] + + for model_precision in sorted(model_precisions): + mo_cmd = [str(args.python), '--', str(mo_path), + '--framework={}'.format(model_format), + '--data_type={}'.format(model_precision), + '--output_dir={}'.format(output_dir / model.subdirectory / model_precision), + '--model_name={}'.format(model.name), + *expanded_mo_args, *extra_mo_args] + + context.printf('========= {}Converting {} to IR ({})', + '(DRY RUN) ' if args.dry_run else '', model.name, model_precision) + + context.printf('Conversion command: {}', ' '.join(map(quote_arg, mo_cmd))) + + if not args.dry_run: + context.printf('', flush=True) + + if not context.subprocess(mo_cmd): + return False + + context.printf('') + + return True + + if args.jobs == 1 or args.dry_run: + context = DirectOutputContext() + results = [convert(context, model) for model in models] + else: + with concurrent.futures.ThreadPoolExecutor(args.jobs) as executor: + def start(model): + output_queue = queue.Queue() + return JobWithQueuedOutput( + output_queue, + executor.submit(convert, QueuedOutputContext(output_queue), model)) + + jobs = list(map(start, models)) + + try: + results = [job.complete() for job in jobs] + except: + for job in jobs: job.cancel() + raise + + failed_models = [model.name for model, successful in zip(models, results) if not successful] + + if failed_models: + print('FAILED:') + print(*sorted(failed_models), sep='\n') + sys.exit(1) + +if __name__ == '__main__': + main() diff --git a/model_compiler/downloader/downloader.py b/model_compiler/downloader/downloader.py new file mode 100755 index 000000000..e7029d360 --- /dev/null +++ b/model_compiler/downloader/downloader.py @@ -0,0 +1,295 @@ +#!/usr/bin/env python3 + +""" + Copyright (c) 2018 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +import argparse +import hashlib +import re +import requests +import shlex +import shutil +import ssl +import sys +import tempfile +import time + +from pathlib import Path + +import common + +CHUNK_SIZE = 1 << 15 if sys.stdout.isatty() else 1 << 20 + +def process_download(reporter, chunk_iterable, size, file): + start_time = time.monotonic() + progress_size = 0 + + try: + for chunk in chunk_iterable: + if chunk: + duration = time.monotonic() - start_time + progress_size += len(chunk) + if duration != 0: + speed = int(progress_size / (1024 * duration)) + percent = str(progress_size * 100 // size) + + reporter.print_progress('... {}%, {} KB, {} KB/s, {} seconds passed', + percent, int(progress_size / 1024), speed, int(duration)) + reporter.emit_event('model_file_download_progress', size=progress_size) + + file.write(chunk) + + # don't attempt to finish a file if it's bigger than expected + if progress_size > size: + break + + return progress_size + finally: + reporter.end_progress() + +def try_download(reporter, file, num_attempts, start_download, size): + for attempt in range(num_attempts): + if attempt != 0: + retry_delay = 10 + reporter.print("Will retry in {} seconds...", retry_delay, flush=True) + time.sleep(retry_delay) + + try: + chunk_iterable = start_download() + file.seek(0) + file.truncate() + actual_size = process_download(reporter, chunk_iterable, size, file) + + if actual_size > size: + reporter.log_error("Remote file is longer than expected ({} B), download aborted", size) + # no sense in retrying - if the file is longer, there's no way it'll fix itself + return False + elif actual_size < size: + reporter.log_error("Downloaded file is shorter ({} B) than expected ({} B)", + actual_size, size) + # it's possible that we got disconnected before receiving the full file, + # so try again + else: + return True + except (requests.exceptions.RequestException, ssl.SSLError): + reporter.log_error("Download failed", exc_info=True) + + return False + +def verify_hash(reporter, file, expected_hash, path, model_name): + actual_hash = hashlib.sha256() + while True: + chunk = file.read(1 << 20) + if not chunk: break + actual_hash.update(chunk) + + if actual_hash.digest() != bytes.fromhex(expected_hash): + reporter.log_error('Hash mismatch for "{}"', path) + reporter.log_details('Expected: {}', expected_hash) + reporter.log_details('Actual: {}', actual_hash.hexdigest()) + return False + return True + +class NullCache: + def has(self, hash): return False + def put(self, hash, path): pass + +class DirCache: + _FORMAT = 1 # increment if backwards-incompatible changes to the format are made + _HASH_LEN = hashlib.sha256().digest_size * 2 + + def __init__(self, cache_dir): + self._cache_dir = cache_dir / str(self._FORMAT) + self._cache_dir.mkdir(parents=True, exist_ok=True) + + self._staging_dir = self._cache_dir / 'staging' + self._staging_dir.mkdir(exist_ok=True) + + def _hash_path(self, hash): + hash = hash.lower() + assert len(hash) == self._HASH_LEN + assert re.fullmatch('[0-9a-f]+', hash) + return self._cache_dir / hash[:2] / hash[2:] + + def has(self, hash): + return self._hash_path(hash).exists() + + def get(self, hash, path): + shutil.copyfile(str(self._hash_path(hash)), str(path)) + + def put(self, hash, path): + # A file in the cache must have the hash implied by its name. So when we upload a file, + # we first copy it to a temporary file and then atomically move it to the desired name. + # This prevents interrupted runs from corrupting the cache. + with path.open('rb') as src_file: + with tempfile.NamedTemporaryFile(dir=str(self._staging_dir), delete=False) as staging_file: + staging_path = Path(staging_file.name) + shutil.copyfileobj(src_file, staging_file) + + hash_path = self._hash_path(hash) + hash_path.parent.mkdir(parents=True, exist_ok=True) + staging_path.replace(self._hash_path(hash)) + +def try_retrieve_from_cache(reporter, cache, files): + try: + if all(cache.has(file[0]) for file in files): + for hash, destination in files: + reporter.print_section_heading('Retrieving {} from the cache', destination) + cache.get(hash, destination) + reporter.print() + return True + except Exception: + reporter.log_warning('Cache retrieval failed; falling back to downloading', exc_info=True) + reporter.print() + + return False + +def try_update_cache(reporter, cache, hash, source): + try: + cache.put(hash, source) + except Exception: + reporter.log_warning('Failed to update the cache', exc_info=True) + +def try_retrieve(reporter, name, destination, model_file, cache, num_attempts, start_download): + destination.parent.mkdir(parents=True, exist_ok=True) + + if try_retrieve_from_cache(reporter, cache, [[model_file.sha256, destination]]): + return True + + reporter.print_section_heading('Downloading {}', destination) + + success = False + + with destination.open('w+b') as f: + if try_download(reporter, f, num_attempts, start_download, model_file.size): + f.seek(0) + if verify_hash(reporter, f, model_file.sha256, destination, name): + try_update_cache(reporter, cache, model_file.sha256, destination) + success = True + + reporter.print() + return success + +class DownloaderArgumentParser(argparse.ArgumentParser): + def error(self, message): + sys.stderr.write('error: %s\n' % message) + self.print_help() + sys.exit(2) + +def positive_int_arg(value_str): + try: + value = int(value_str) + if value > 0: return value + except ValueError: + pass + + raise argparse.ArgumentTypeError('must be a positive integer (got {!r})'.format(value_str)) + +def main(): + parser = DownloaderArgumentParser() + parser.add_argument('-c', '--config', type=Path, metavar='CONFIG.YML', + help='model configuration file (deprecated)') + parser.add_argument('--name', metavar='PAT[,PAT...]', + help='download only models whose names match at least one of the specified patterns') + parser.add_argument('--list', type=Path, metavar='FILE.LST', + help='download only models whose names match at least one of the patterns in the specified file') + parser.add_argument('--all', action='store_true', help='download all available models') + parser.add_argument('--print_all', action='store_true', help='print all available models') + parser.add_argument('--precisions', metavar='PREC[,PREC...]', + help='download only models with the specified precisions (actual for DLDT networks)') + parser.add_argument('-o', '--output_dir', type=Path, metavar='DIR', + default=Path.cwd(), help='path where to save models') + parser.add_argument('--cache_dir', type=Path, metavar='DIR', + help='directory to use as a cache for downloaded files') + parser.add_argument('--num_attempts', type=positive_int_arg, metavar='N', default=1, + help='attempt each download up to N times') + parser.add_argument('--progress_format', choices=('text', 'json'), default='text', + help='which format to use for progress reporting') + + parser.add_argument('--model_root', type=str, default=None, + help='path to models folder') + + args = parser.parse_args() + + reporter = common.Reporter( + enable_human_output=args.progress_format == 'text', + enable_json_output=args.progress_format == 'json') + + cache = NullCache() if args.cache_dir is None else DirCache(args.cache_dir) + models = common.load_models_from_args(parser, args) + + failed_models = set() + + if args.precisions is None: + requested_precisions = common.KNOWN_PRECISIONS + else: + requested_precisions = set(args.precisions.split(',')) + unknown_precisions = requested_precisions - common.KNOWN_PRECISIONS + if unknown_precisions: + sys.exit('Unknown precisions specified: {}.'.format(', '.join(sorted(unknown_precisions)))) + + reporter.print_group_heading('Downloading models') + with requests.Session() as session: + for model in models: + reporter.emit_event('model_download_begin', model=model.name, num_files=len(model.files)) + + output = args.output_dir / model.subdirectory + output.mkdir(parents=True, exist_ok=True) + + for model_file in model.files: + if len(model_file.name.parts) == 2: + p = model_file.name.parts[0] + if p in common.KNOWN_PRECISIONS and p not in requested_precisions: + continue + + model_file_reporter = reporter.with_event_context(model=model.name, model_file=model_file.name.as_posix()) + model_file_reporter.emit_event('model_file_download_begin', size=model_file.size) + + destination = output / model_file.name + + if not try_retrieve(model_file_reporter, model.name, destination, model_file, cache, args.num_attempts, + lambda: model_file.source.start_download(session, CHUNK_SIZE)): + shutil.rmtree(str(output)) + failed_models.add(model.name) + model_file_reporter.emit_event('model_file_download_end', successful=False) + reporter.emit_event('model_download_end', model=model.name, successful=False) + break + + model_file_reporter.emit_event('model_file_download_end', successful=True) + else: + reporter.emit_event('model_download_end', model=model.name, successful=True) + + reporter.print_group_heading('Post-processing') + for model in models: + if model.name in failed_models or not model.postprocessing: continue + + reporter.emit_event('model_postprocessing_begin', model=model.name) + + output = args.output_dir / model.subdirectory + + for postproc in model.postprocessing: + postproc.apply(reporter, output) + + reporter.emit_event('model_postprocessing_end', model=model.name) + + if failed_models: + reporter.print('FAILED:') + for failed_model_name in failed_models: + reporter.print(failed_model_name) + sys.exit(1) + +if __name__ == '__main__': + main() diff --git a/model_compiler/downloader/info_dumper.py b/model_compiler/downloader/info_dumper.py new file mode 100755 index 000000000..5d6a63bff --- /dev/null +++ b/model_compiler/downloader/info_dumper.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 + +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import json +import sys + +from pathlib import Path + +import common + +def to_info(model): + return { + 'name': model.name, + + 'description': model.description, + 'framework': model.framework, + 'license_url': model.license_url, + 'precisions': sorted(model.precisions), + 'subdirectory': str(model.subdirectory), + 'task_type': str(model.task_type), + } + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('-c', '--config', type=Path, metavar='CONFIG.YML', + help='model configuration file (deprecated)') + parser.add_argument('--name', metavar='PAT[,PAT...]', + help='only dump info for models whose names match at least one of the specified patterns') + parser.add_argument('--list', type=Path, metavar='FILE.LST', + help='only dump info for models whose names match at least one of the patterns in the specified file') + parser.add_argument('--all', action='store_true', help='dump info for all available models') + parser.add_argument('--print_all', action='store_true', help='print all available models') + args = parser.parse_args() + + models = common.load_models_from_args(parser, args) + + json.dump(list(map(to_info, models)), sys.stdout, indent=4) + print() # add a final newline + +if __name__ == '__main__': + main() diff --git a/model_compiler/downloader/requirements.in b/model_compiler/downloader/requirements.in new file mode 100644 index 000000000..1c6d8b40e --- /dev/null +++ b/model_compiler/downloader/requirements.in @@ -0,0 +1,2 @@ +pyyaml +requests diff --git a/model_compiler/model_compiler.py b/model_compiler/model_compiler.py new file mode 100644 index 000000000..c83be0fad --- /dev/null +++ b/model_compiler/model_compiler.py @@ -0,0 +1,211 @@ +import subprocess +import numpy as np +from pathlib import Path +import os, sys +import requests + +supported_openvino_version = '2020.1.023' + +def relative_to_abs_path(relative_path): + dirname = Path(__file__).parent + try: + return str((dirname / relative_path).resolve()) + except FileNotFoundError: + return None + +model_downloader_path = relative_to_abs_path('downloader/downloader.py') +ir_converter_path = relative_to_abs_path('downloader/converter.py') +download_folder_path = relative_to_abs_path('downloads') + "/" + + +def download_model(model, model_zoo_folder): + + model_downloader_options=f"--precisions FP16 --output_dir {download_folder_path} --cache_dir {download_folder_path}/.cache --num_attempts 5 --name {model} --model_root {model_zoo_folder}" + model_downloader_options = model_downloader_options.split() + downloader_cmd = ["python3", f"{model_downloader_path}"] + downloader_cmd = np.concatenate((downloader_cmd, model_downloader_options)) + # print(downloader_cmd) + result = subprocess.run(downloader_cmd) + if result.returncode != 0: + raise RuntimeError("Model downloader failed! Not connected to the internet?") + + download_location = Path(download_folder_path) / model + if(not download_location.exists()): + raise RuntimeError(f"{download_location} doesn't exist for downloaded model!") + + + return download_location + + + +def convert_model_to_ir(model, model_zoo_folder): + + converter_path = Path(ir_converter_path) + + model_converter_options=f"--precisions FP16 --output_dir {download_folder_path} --download_dir {download_folder_path} --name {model} --model_root {model_zoo_folder}" + model_converter_options = model_converter_options.split() + converter_cmd = ["python3", f"{converter_path}"] + converter_cmd = np.concatenate((converter_cmd, model_converter_options)) + # print(converter_cmd) + if subprocess.run(converter_cmd).returncode != 0: + raise RuntimeError("Model converter failed!") + + ir_model_location = Path(download_folder_path) / model / "FP16" + + + return ir_model_location + + + +def myriad_compile_model_local(shaves, cmx_slices, nces, xml_path, output_file): + + myriad_compile_path = None + if myriad_compile_path is None: + try: + myriad_compile_path = Path(os.environ['INTEL_OPENVINO_DIR']) / 'deployment_tools/inference_engine/lib/intel64/myriad_compile' + except KeyError: + sys.exit('Unable to locate Model Optimizer. ' + + 'Use --mo or run setupvars.sh/setupvars.bat from the OpenVINO toolkit.') + + PLATFORM="VPU_MYRIAD_2450" if nces == 0 else "VPU_MYRIAD_2480" + + myriad_compiler_options = f'-ip U8 -VPU_MYRIAD_PLATFORM {PLATFORM} -VPU_NUMBER_OF_SHAVES {shaves} -VPU_NUMBER_OF_CMX_SLICES {cmx_slices} -m {xml_path} -o {output_file}' + myriad_compiler_options = myriad_compiler_options.split() + + myriad_compile_cmd = np.concatenate(([myriad_compile_path], myriad_compiler_options)) + # print(myriad_compile_cmd) + + if subprocess.run(myriad_compile_cmd).returncode != 0: + raise RuntimeError("Myriad compiler failed!") + + return 0 + + + +def myriad_compile_model_cloud(xml, bin, shaves, cmx_slices, nces, output_file): + PLATFORM="VPU_MYRIAD_2450" if nces == 0 else "VPU_MYRIAD_2480" + + # use 69.214.171 instead luxonis.com to bypass cloudflare limitation of max file size + url = "http://69.164.214.171:8080/" + payload = { + 'compile_type': 'myriad', + 'compiler_params': '-ip U8 -VPU_MYRIAD_PLATFORM ' + PLATFORM + ' -VPU_NUMBER_OF_SHAVES ' + str(shaves) +' -VPU_NUMBER_OF_CMX_SLICES ' + str(cmx_slices) + } + files = [ + ('definition', open(Path(xml), 'rb')), + ('weights', open(Path(bin), 'rb')) + ] + try: + response = requests.request("POST", url, data=payload, files=files) + except: + print("Connection timed out!") + return 1 + if response.status_code == 200: + blob_file = open(output_file,'wb') + blob_file.write(response.content) + blob_file.close() + else: + print("Model compilation failed with error code: " + str(response.status_code)) + print(str(response.text.encode('utf8'))) + return 2 + + return 0 + +def download_and_compile_NN_model(model, model_zoo_folder, shaves, cmx_slices, nces, output_file, model_compilation_target='auto'): + + if model_compilation_target == 'auto' or model_compilation_target == 'local': + try: + openvino_dir = os.environ['INTEL_OPENVINO_DIR'] + print(f'Openvino installation detected {openvino_dir}') + if supported_openvino_version in openvino_dir: + model_compilation_target = 'local' + print(f'Supported openvino version installed: {supported_openvino_version}') + else: + model_compilation_target = 'cloud' + print(f'Unsupported openvino version installed at {openvino_dir}, supported version is: {supported_openvino_version}') + + except: + model_compilation_target = 'cloud' + + print(f'model_compilation_target: {model_compilation_target}') + output_location = Path(model_zoo_folder) / model / output_file + + download_location = download_model(model, model_zoo_folder) + + if model_compilation_target == 'local': + + ir_model_location = convert_model_to_ir(model, model_zoo_folder) + + if(not ir_model_location.exists()): + raise RuntimeError(f"{ir_model_location} doesn't exist for downloaded model!") + xml_path = ir_model_location / (model + ".xml") + if(not xml_path.exists()): + raise RuntimeError(f"{xml_path} doesn't exist for downloaded model!") + + return myriad_compile_model_local(shaves, cmx_slices, nces, xml_path, output_file) + + elif model_compilation_target == 'cloud': + + ir_model_location = Path(download_location) / "FP16" + if(not ir_model_location.exists()): + raise RuntimeError(f"{ir_model_location} doesn't exist for downloaded model!") + xml_path = ir_model_location / (model + ".xml") + if(not xml_path.exists()): + raise RuntimeError(f"{xml_path} doesn't exist for downloaded model!") + bin_path = ir_model_location / (model + ".bin") + if(not bin_path.exists()): + raise RuntimeError(f"{bin_path} doesn't exist for downloaded model!") + + result = myriad_compile_model_cloud(xml=xml_path, bin=bin_path, shaves = shaves, cmx_slices=cmx_slices, nces=nces, output_file=output_location) + if result != 0: + raise RuntimeError("Model compiler failed! Not connected to the internet?") + else: + assert 'model_compilation_target must be either : ["auto", "local", "cloud"]' + + return 0 + +def main(args): + + model = args['model_name'] + model_zoo_folder = args['model_zoo_folder'] + + shaves = args['shaves'] + cmx_slices = args['cmx_slices'] + nces = args['nces'] + output_file = args['output'] + model_compilation_target = args['model_compilation_target'] + + return download_and_compile_NN_model(model, model_zoo_folder, shaves, cmx_slices, nces, output_file, model_compilation_target) + +if __name__ == '__main__': + import argparse + from argparse import ArgumentParser + def parse_args(): + epilog_text = ''' + Myriad blob compiler. + ''' + parser = ArgumentParser(epilog=epilog_text,formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument("-model", "--model_name", default=None, + type=str, required=True, + help="model name") + parser.add_argument("-sh", "--shaves", default=4, type=int, + help="Number of shaves used by NN.") + parser.add_argument("-cmx", "--cmx_slices", default=4, type=int, + help="Number of cmx slices used by NN.") + parser.add_argument("-nce", "--nces", default=1, type=int, + help="Number of NCEs used by NN.") + parser.add_argument("-o", "--output", default=None, + type=Path, required=True, + help=".blob output") + parser.add_argument("-mct", "--model-compilation-target", default="auto", + type=str, required=False, choices=["auto","local","cloud"], + help="Compile model lcoally or in cloud?") + parser.add_argument("-mz", "--model-zoo-folder", default=None, + type=str, required=True, + help="Path to folder with models") + options = parser.parse_args() + return options + + args = vars(parse_args()) + ret = main(args) + exit(ret) \ No newline at end of file diff --git a/model_compiler/model_converter.py b/model_compiler/model_zoo_converter.py similarity index 100% rename from model_compiler/model_converter.py rename to model_compiler/model_zoo_converter.py diff --git a/requirements.txt b/requirements.txt index e1f57665b..4ec36d9a6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,6 @@ opencv-python==4.2.0.34; platform_machine != "armv7l" opencv-python==4.1.0.25; platform_machine == "armv7l" requests==2.24.0 argcomplete==1.12.0 -depthai==0.2.0.1 +open3d==0.10.0.0; platform_machine != "armv7l" +pyyaml==5.3.1 +depthai==0.3.0.0 diff --git a/resources/boards/ACME01.json b/resources/boards/ACME01.json new file mode 100644 index 000000000..e94758047 --- /dev/null +++ b/resources/boards/ACME01.json @@ -0,0 +1,13 @@ +{ + "board_config": + { + "name": "ACME01", + "revision": "V1.2", + "swap_left_and_right_cameras": false, + "left_fov_deg": 71.86, + "rgb_fov_deg": 68.7938, + "left_to_right_distance_cm": 7.5, + "left_to_rgb_distance_cm": 3.75 + } +} + diff --git a/resources/nn/CONTRIBUTE.md b/resources/nn/CONTRIBUTE.md new file mode 100644 index 000000000..22f4c1ee3 --- /dev/null +++ b/resources/nn/CONTRIBUTE.md @@ -0,0 +1,21 @@ + +1. Create folder with model name. E.g. `my-little-model` +2. Create `.yml` with model name. E.g. `my-little-model.yml` +3. Use a `.yml` template e.g. `mobilenet-ssd/mobilenet-ssd.yml` +Change the fields in `.yml` accordingly to the model. +`size` can be obtained: `wc -c < filename` +`sha256` can be obtained: `sha256sum filename` +`name` : "FP16/`model_name`.xml/bin" e.g. `FP16/my-little-model.xml` +`source:` +* Model stored on google drive (public link): +``` + $type: google_drive + id: 11-PX4EDxAnhymbuvnyb91ptvZAW3oPOn +``` +Where `id` is the `id` of the link. E.g. in https://drive.google.com/file/d/1pdC4eNWxyfewCJ7T0i9SXLHKt39gBDZV/view?usp=sharing +`id` is 1`pdC4eNWxyfewCJ7T0i9SXLHKt39gBDZV` + +* Model not stored on google drive: `source` is the link to the file to download. + +`framework:` `dldt` only for now. +`license:` license file to the model. \ No newline at end of file diff --git a/resources/nn/age-gender-recognition-retail-0013/age-gender-recognition-retail-0013.json b/resources/nn/age-gender-recognition-retail-0013/age-gender-recognition-retail-0013.json deleted file mode 100644 index fb070d04d..000000000 --- a/resources/nn/age-gender-recognition-retail-0013/age-gender-recognition-retail-0013.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "tensors": - [ - { - "output_tensor_name": "out", - "output_dimensions": [1, 1, 1, 1], - "output_entry_iteration_index": 0, - "output_properties_dimensions": [0], - "property_key_mapping": - [ - ["age"] - ], - "output_properties_type": "f16" - }, - { - "output_tensor_name": "out1", - "output_dimensions": [1, 2, 1, 1], - "output_entry_iteration_index": 0, - "output_properties_dimensions": [0], - "property_key_mapping": - [ - ["female", "male"] - ], - "output_properties_type": "f16" - } - ] -} - - - diff --git a/resources/nn/age-gender-recognition-retail-0013/model.yml b/resources/nn/age-gender-recognition-retail-0013/model.yml new file mode 100644 index 000000000..e17643ac1 --- /dev/null +++ b/resources/nn/age-gender-recognition-retail-0013/model.yml @@ -0,0 +1,45 @@ +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +description: >- + Age & gender classification. Used in Audience Analytics. +documentation: https://docs.openvinotoolkit.org/2020.1/_models_intel_age_gender_recognition_retail_0013_description_age_gender_recognition_retail_0013.html +task_type: object_attributes +files: + - name: FP32/age-gender-recognition-retail-0013.xml + size: 30006 + sha256: ca2c4826b9370038643d97a434bb9621adea4baf4279e5b9c3a83002456fd755 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013.xml + - name: FP32/age-gender-recognition-retail-0013.bin + size: 8552076 + sha256: 4dab79cfedebd628f3327367a91c9736a32fbf9cc733cbbf1629d16910d4ace7 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013.bin + - name: FP16/age-gender-recognition-retail-0013.xml + size: 29996 + sha256: a375a5d8617ff1913dfc1210f5bb07493d72293c0efa87c1abcdfe181ae8a2b0 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/age-gender-recognition-retail-0013/FP16/age-gender-recognition-retail-0013.xml + - name: FP16/age-gender-recognition-retail-0013.bin + size: 4276038 + sha256: 401101e0a01b3d68add39deae833bcf9f54238d37dd13e3fb1534aa8fbe4719d + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/age-gender-recognition-retail-0013/FP16/age-gender-recognition-retail-0013.bin + - name: FP32-INT8/age-gender-recognition-retail-0013.xml + size: 64387 + sha256: 930b5e2ef06d890a79e91f16ecbe54ce08abcb6923cfb56afa464750588d7a21 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/age-gender-recognition-retail-0013/FP32-INT8/age-gender-recognition-retail-0013.xml + - name: FP32-INT8/age-gender-recognition-retail-0013.bin + size: 8565952 + sha256: 0efcef2c78fa5208d85d2eb5fc69955c6d97a2ba2948c67a75b0dcb3b74da41e + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/age-gender-recognition-retail-0013/FP32-INT8/age-gender-recognition-retail-0013.bin +framework: dldt +license: https://raw.githubusercontent.com/opencv/open_model_zoo/master/LICENSE diff --git a/resources/nn/deeplabv3p_person/model.yml b/resources/nn/deeplabv3p_person/model.yml new file mode 100644 index 000000000..4815afba0 --- /dev/null +++ b/resources/nn/deeplabv3p_person/model.yml @@ -0,0 +1,19 @@ +description: >- + deeplab_v3_plus_mnv2_decoder_256 +documentation: https://github.com/PINTO0309/PINTO_model_zoo/blob/master/026_mobile-deeplabv3-plus/01_float32/download.sh +task_type: semantic_segmentation +files: + - name: FP16/deeplabv3p_person.xml + size: 138536 + sha256: bd03684c902e99df3156d331167d94d965ad12193f2a8e26903be27ac17414fd + source: + $type: google_drive + id: 1DvYCLYNeEB8Q74IEZdLb4-iY23k3Usnd + - name: FP16/deeplabv3p_person.bin + size: 4481868 + sha256: f0a667920462ed264cf7c2fc0654c2c0c3e311c763abfe99cc15232adf6bcff4 + source: + $type: google_drive + id: 1vUfUCLM9ABvN2tdF7g-IARi9xU63uBFj +framework: dldt +license: https://raw.githubusercontent.com/PINTO0309/PINTO_model_zoo/master/LICENSE diff --git a/resources/nn/emotions-recognition-retail-0003/emotions-recognition-retail-0003.json b/resources/nn/emotions-recognition-retail-0003/emotions-recognition-retail-0003.json index 85742933b..221ea4958 100644 --- a/resources/nn/emotions-recognition-retail-0003/emotions-recognition-retail-0003.json +++ b/resources/nn/emotions-recognition-retail-0003/emotions-recognition-retail-0003.json @@ -1,15 +1,8 @@ { - "tensors": - [ - { - "output_tensor_name": "out", - "output_dimensions": [1, 5, 1, 1], - "output_entry_iteration_index": 0, - "output_properties_dimensions": [0], - "property_key_mapping": [], - "output_properties_type": "f16" - } - ], + "NN_config": + { + "output_format" : "raw" + }, "mappings": { "labels": diff --git a/resources/nn/emotions-recognition-retail-0003/model.yml b/resources/nn/emotions-recognition-retail-0003/model.yml new file mode 100644 index 000000000..67947e1e0 --- /dev/null +++ b/resources/nn/emotions-recognition-retail-0003/model.yml @@ -0,0 +1,45 @@ +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +description: >- + Recognizes 5 emotions for a face. Targeted for Retail Audience Analytics. +documentation: https://docs.openvinotoolkit.org/2020.1/_models_intel_emotions_recognition_retail_0003_description_emotions_recognition_retail_0003.html +task_type: object_attributes +files: + - name: FP32/emotions-recognition-retail-0003.xml + size: 37686 + sha256: 702e374b7dc11b369c13c2f9c5fc638c617d19a9c625eba5fdac9d190fddba41 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/emotions-recognition-retail-0003/FP32/emotions-recognition-retail-0003.xml + - name: FP32/emotions-recognition-retail-0003.bin + size: 9930028 + sha256: bcb9b1a910fa3cd18a638bb1dbb0597c4ef7a080d1b83008c8e8c2c3c42b99dd + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/emotions-recognition-retail-0003/FP32/emotions-recognition-retail-0003.bin + - name: FP16/emotions-recognition-retail-0003.xml + size: 37662 + sha256: 76ba0e171289711e1d1b8418e2bdebf4940485c6c2e0a75d2572ef7dbcee762f + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/emotions-recognition-retail-0003/FP16/emotions-recognition-retail-0003.xml + - name: FP16/emotions-recognition-retail-0003.bin + size: 4965014 + sha256: e62fb4b819b3b3ad8aafcd308d4353db2f164a1a31d78de6cf5970837aeb6f7b + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/emotions-recognition-retail-0003/FP16/emotions-recognition-retail-0003.bin + - name: FP32-INT8/emotions-recognition-retail-0003.xml + size: 102858 + sha256: 0b739388e9341f083df2d9b23506a13c234782d53e08f2e8051c0189e6e926ba + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/emotions-recognition-retail-0003/FP32-INT8/emotions-recognition-retail-0003.xml + - name: FP32-INT8/emotions-recognition-retail-0003.bin + size: 9947612 + sha256: ff8ee9cee69cdc7043ec94522459770dfda865ff979a0c8beb73d7515f4a1df2 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/emotions-recognition-retail-0003/FP32-INT8/emotions-recognition-retail-0003.bin +framework: dldt +license: https://raw.githubusercontent.com/opencv/open_model_zoo/master/LICENSE diff --git a/resources/nn/face-detection-adas-0001/face-detection-adas-0001.json b/resources/nn/face-detection-adas-0001/face-detection-adas-0001.json index 028d0e9ad..f189dfd67 100644 --- a/resources/nn/face-detection-adas-0001/face-detection-adas-0001.json +++ b/resources/nn/face-detection-adas-0001/face-detection-adas-0001.json @@ -1,21 +1,10 @@ { - "tensors": - [ - { - "output_tensor_name": "out", - "output_dimensions": [1, 1, 100, 7], - "output_entry_iteration_index": 2, - "output_properties_dimensions": [3], - "property_key_mapping": - [ - [], - [], - [], - ["id", "label", "confidence", "left", "top", "right", "bottom"] - ], - "output_properties_type": "f16" - } - ], + "NN_config": + { + "output_format" : "detection", + "NN_family" : "mobilenet", + "confidence_threshold" : 0.5 + }, "mappings": { "labels": diff --git a/resources/nn/face-detection-adas-0001/face-detection-adas-0001_depth.json b/resources/nn/face-detection-adas-0001/face-detection-adas-0001_depth.json deleted file mode 100644 index 3b5b870ee..000000000 --- a/resources/nn/face-detection-adas-0001/face-detection-adas-0001_depth.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "tensors": - [ - { - "output_tensor_name": "out", - "output_dimensions": [1, 1, 100, 10], - "output_entry_iteration_index": 2, - "output_properties_dimensions": [3], - "property_key_mapping": - [ - [], - [], - [], - ["id", "label", "confidence", "left", "top", "right", "bottom", "distance_x", "distance_y", "distance_z"] - ], - "output_properties_type": "f16" - } - ], - "mappings": - { - "labels": - [ - "unknown", - "face" - ] - } -} - diff --git a/resources/nn/face-detection-adas-0001/model.yml b/resources/nn/face-detection-adas-0001/model.yml new file mode 100644 index 000000000..8567eef5f --- /dev/null +++ b/resources/nn/face-detection-adas-0001/model.yml @@ -0,0 +1,45 @@ +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +description: >- + Face Detection (MobileNet with reduced channels + SSD with weights sharing) +documentation: https://docs.openvinotoolkit.org/2020.1/_models_intel_face_detection_adas_0001_description_face_detection_adas_0001.html +task_type: detection +files: + - name: FP32/face-detection-adas-0001.xml + size: 232998 + sha256: 62b4bf7dead77e16a47428b541aa4f3c506cdf3c7e31a317aa75771dd907557c + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/face-detection-adas-0001/FP32/face-detection-adas-0001.xml + - name: FP32/face-detection-adas-0001.bin + size: 4212072 + sha256: 85a9334e031289692884e2aefbcb4ca401b003a3f25ff4dd0e669ba32f98cc0b + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/face-detection-adas-0001/FP32/face-detection-adas-0001.bin + - name: FP16/face-detection-adas-0001.xml + size: 232967 + sha256: c0b9d34eba1fe6c76755fada4dc65634068eddf343a219b501ac71439348eac1 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/face-detection-adas-0001/FP16/face-detection-adas-0001.xml + - name: FP16/face-detection-adas-0001.bin + size: 2106088 + sha256: df0f5799d801c6afb355d1c4771693c782efbb58d2eb2238982eac8fe84bc821 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/face-detection-adas-0001/FP16/face-detection-adas-0001.bin + - name: FP32-INT8/face-detection-adas-0001.xml + size: 503962 + sha256: 8959f287e3caf098948b2faee40d67e019c4b794948476fcdc8a28e58cb79125 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/face-detection-adas-0001/FP32-INT8/face-detection-adas-0001.xml + - name: FP32-INT8/face-detection-adas-0001.bin + size: 4291532 + sha256: d3ffb0da33361253d4027def5acddf104a6919c920582f084974a4f9d8529ffd + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/face-detection-adas-0001/FP32-INT8/face-detection-adas-0001.bin +framework: dldt +license: https://raw.githubusercontent.com/opencv/open_model_zoo/master/LICENSE diff --git a/resources/nn/face-detection-retail-0004/face-detection-retail-0004.json b/resources/nn/face-detection-retail-0004/face-detection-retail-0004.json index 028d0e9ad..f189dfd67 100644 --- a/resources/nn/face-detection-retail-0004/face-detection-retail-0004.json +++ b/resources/nn/face-detection-retail-0004/face-detection-retail-0004.json @@ -1,21 +1,10 @@ { - "tensors": - [ - { - "output_tensor_name": "out", - "output_dimensions": [1, 1, 100, 7], - "output_entry_iteration_index": 2, - "output_properties_dimensions": [3], - "property_key_mapping": - [ - [], - [], - [], - ["id", "label", "confidence", "left", "top", "right", "bottom"] - ], - "output_properties_type": "f16" - } - ], + "NN_config": + { + "output_format" : "detection", + "NN_family" : "mobilenet", + "confidence_threshold" : 0.5 + }, "mappings": { "labels": diff --git a/resources/nn/face-detection-retail-0004/face-detection-retail-0004_depth.json b/resources/nn/face-detection-retail-0004/face-detection-retail-0004_depth.json deleted file mode 100644 index 3b5b870ee..000000000 --- a/resources/nn/face-detection-retail-0004/face-detection-retail-0004_depth.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "tensors": - [ - { - "output_tensor_name": "out", - "output_dimensions": [1, 1, 100, 10], - "output_entry_iteration_index": 2, - "output_properties_dimensions": [3], - "property_key_mapping": - [ - [], - [], - [], - ["id", "label", "confidence", "left", "top", "right", "bottom", "distance_x", "distance_y", "distance_z"] - ], - "output_properties_type": "f16" - } - ], - "mappings": - { - "labels": - [ - "unknown", - "face" - ] - } -} - diff --git a/resources/nn/face-detection-retail-0004/model.yml b/resources/nn/face-detection-retail-0004/model.yml new file mode 100644 index 000000000..88648ecd2 --- /dev/null +++ b/resources/nn/face-detection-retail-0004/model.yml @@ -0,0 +1,46 @@ +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +description: >- + Face Detection (SqNet1.0modif+single scale) without BatchNormalization trained with + negatives. +documentation: https://docs.openvinotoolkit.org/2020.1/_models_intel_face_detection_retail_0004_description_face_detection_retail_0004.html +task_type: detection +files: + - name: FP32/face-detection-retail-0004.xml + size: 101815 + sha256: 81d31d708214bb4dd4dcef469fa18c611ad5c3f88447ba0986963696cfad8401 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/face-detection-retail-0004/FP32/face-detection-retail-0004.xml + - name: FP32/face-detection-retail-0004.bin + size: 2352984 + sha256: 89349ce12dd21c5263fb302cd3ffd4b73c35ea12ed98aff863d03a2cf3a32464 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/face-detection-retail-0004/FP32/face-detection-retail-0004.bin + - name: FP16/face-detection-retail-0004.xml + size: 101773 + sha256: 7e8e76df3b70ac24967f625f096b1a9c6f2474e099e94210b177be0a5175e521 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/face-detection-retail-0004/FP16/face-detection-retail-0004.xml + - name: FP16/face-detection-retail-0004.bin + size: 1176544 + sha256: ab7def342edab22e69ba1ef4e971983ea4e0f337c43b0a49c7ef3a7627f6cf1a + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/face-detection-retail-0004/FP16/face-detection-retail-0004.bin + - name: FP32-INT8/face-detection-retail-0004.xml + size: 231496 + sha256: 4bac1bae22ab8f34c15a7504bd97ce3e57a71a5b63ec08435a1bdf180b38ab18 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/face-detection-retail-0004/FP32-INT8/face-detection-retail-0004.xml + - name: FP32-INT8/face-detection-retail-0004.bin + size: 2372492 + sha256: f696f0651da3cf3b07915356fa6f7332dde671ac51b26e3be8209df3d7f7e5e0 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/face-detection-retail-0004/FP32-INT8/face-detection-retail-0004.bin +framework: dldt +license: https://raw.githubusercontent.com/opencv/open_model_zoo/master/LICENSE diff --git a/resources/nn/facial-landmarks-35-adas-0002/facial-landmarks-35-adas-0002.json b/resources/nn/facial-landmarks-35-adas-0002/facial-landmarks-35-adas-0002.json deleted file mode 100644 index 67d8a717b..000000000 --- a/resources/nn/facial-landmarks-35-adas-0002/facial-landmarks-35-adas-0002.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "tensors": - [ - { - "output_tensor_name": "out", - "output_dimensions": [1, 70], - "output_entry_iteration_index": 0, - "output_properties_dimensions": [0], - "property_key_mapping": [], - "output_properties_type": "f16" - } - ] -} - diff --git a/resources/nn/facial-landmarks-35-adas-0002/model.yml b/resources/nn/facial-landmarks-35-adas-0002/model.yml new file mode 100644 index 000000000..610ecf572 --- /dev/null +++ b/resources/nn/facial-landmarks-35-adas-0002/model.yml @@ -0,0 +1,45 @@ +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +description: >- + Custom-architecture convolutional neural network for 35 facial landmarks estimation. +documentation: https://docs.openvinotoolkit.org/2020.1/_models_intel_facial_landmarks_35_adas_0002_description_facial_landmarks_35_adas_0002.html +task_type: object_attributes +files: + - name: FP32/facial-landmarks-35-adas-0002.xml + size: 237139 + sha256: 4cb90657a60311184f3505ef973563180729bb7e8d5a2f6a42f9109c1af7bfdb + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/facial-landmarks-35-adas-0002/FP32/facial-landmarks-35-adas-0002.xml + - name: FP32/facial-landmarks-35-adas-0002.bin + size: 18381152 + sha256: bd41c25201c2ea688a75549c11b9e10aa98e51dcc5a6dd783e84cdcf55a1011e + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/facial-landmarks-35-adas-0002/FP32/facial-landmarks-35-adas-0002.bin + - name: FP16/facial-landmarks-35-adas-0002.xml + size: 237002 + sha256: 702d476efcab1e89519ff14b76812c61cd5123f86f5fc343512fa416a9a94e27 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/facial-landmarks-35-adas-0002/FP16/facial-landmarks-35-adas-0002.xml + - name: FP16/facial-landmarks-35-adas-0002.bin + size: 9190584 + sha256: 88a2064849da0e6a31557f489ca4d652aa99e5a9676da0c5c857ee5d8cc26fe4 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/facial-landmarks-35-adas-0002/FP16/facial-landmarks-35-adas-0002.bin + - name: FP32-INT8/facial-landmarks-35-adas-0002.xml + size: 588492 + sha256: d41ba7372ca2db93bd269db32567022109d3e2a9b7aed3da0ef1e8e021f95be2 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/facial-landmarks-35-adas-0002/FP32-INT8/facial-landmarks-35-adas-0002.xml + - name: FP32-INT8/facial-landmarks-35-adas-0002.bin + size: 18441156 + sha256: f81ebf7c25f30138c2fa44530f51f516783ea790c5b6cacbaa5b31d95c73cbb7 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/facial-landmarks-35-adas-0002/FP32-INT8/facial-landmarks-35-adas-0002.bin +framework: dldt +license: https://raw.githubusercontent.com/opencv/open_model_zoo/master/LICENSE diff --git a/resources/nn/human-pose-estimation-0001/model.yml b/resources/nn/human-pose-estimation-0001/model.yml new file mode 100644 index 000000000..96f7b00ac --- /dev/null +++ b/resources/nn/human-pose-estimation-0001/model.yml @@ -0,0 +1,45 @@ +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +description: >- + 2D human pose estimation with tuned mobilenet backbone (based on OpenPose). +documentation: https://docs.openvinotoolkit.org/2020.1/_models_intel_human_pose_estimation_0001_description_human_pose_estimation_0001.html +task_type: human_pose_estimation +files: + - name: FP32/human-pose-estimation-0001.xml + size: 143782 + sha256: 4d2cc5d3a8c395affa74ae640a63ba9bae9bd5871527d65f62a87cc5b40831e9 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/human-pose-estimation-0001/FP32/human-pose-estimation-0001.xml + - name: FP32/human-pose-estimation-0001.bin + size: 16394712 + sha256: 1cd32cb5f9f4633b8ee1451144f749ddd75038c4c5fe862e704e489908b79a78 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/human-pose-estimation-0001/FP32/human-pose-estimation-0001.bin + - name: FP16/human-pose-estimation-0001.xml + size: 143706 + sha256: 75bda095e6fa1b1c118e085c4e5f4be9d9d9db512578fd6ec57b3113d5b42dae + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/human-pose-estimation-0001/FP16/human-pose-estimation-0001.xml + - name: FP16/human-pose-estimation-0001.bin + size: 8197356 + sha256: c1c828d5d1ea2b03035692a8600f06d787fe1a20e79dac159c4293ed7063a382 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/human-pose-estimation-0001/FP16/human-pose-estimation-0001.bin + - name: FP32-INT8/human-pose-estimation-0001.xml + size: 408100 + sha256: 6e22b77c4e5f2c39bb14603c3bfa2d902210a3fdb3fe89443eab5fb149c3a470 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/human-pose-estimation-0001/FP32-INT8/human-pose-estimation-0001.xml + - name: FP32-INT8/human-pose-estimation-0001.bin + size: 16513820 + sha256: 482635ee949716049bd7dd5a69ab66a6d5f29a5cc86598c0369035be2d1fa274 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/human-pose-estimation-0001/FP32-INT8/human-pose-estimation-0001.bin +framework: dldt +license: https://raw.githubusercontent.com/opencv/open_model_zoo/master/LICENSE diff --git a/resources/nn/json_validation_schema.json b/resources/nn/json_validation_schema.json deleted file mode 100644 index 28a9ce7ad..000000000 --- a/resources/nn/json_validation_schema.json +++ /dev/null @@ -1,160 +0,0 @@ -{ - "definitions": {}, - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "http://example.com/root.json", - "type": "object", - "title": "The Root Schema", - "required": [ - "input", - "tensors", - "mappings" - ], - "properties": { - "tensors": { - "$id": "#/properties/tensors", - "type": "array", - "title": "The Tensors Schema", - "items": { - "$id": "#/properties/tensors/items", - "type": "object", - "title": "The Items Schema", - "required": [ - "output_tensor_name", - "output_dimensions", - "output_entry_iteration_index", - "output_properties_dimensions", - "property_key_mapping", - "property_value_mapping", - "output_properties_type" - ], - "properties": { - "output_tensor_name": { - "$id": "#/properties/tensors/items/properties/output_tensor_name", - "type": "string", - "title": "The Output_tensor_name Schema", - "default": "", - "examples": [ - "out" - ], - "pattern": "^(.*)$" - }, - "output_dimensions": { - "$id": "#/properties/tensors/items/properties/output_dimensions", - "type": "array", - "title": "The Output_dimensions Schema", - "items": { - "$id": "#/properties/tensors/items/properties/output_dimensions/items", - "type": "integer", - "title": "The Items Schema", - "default": 0, - "examples": [ - 1, - 1, - 100, - 7 - ] - } - }, - "output_entry_iteration_index": { - "$id": "#/properties/tensors/items/properties/output_entry_iteration_index", - "type": "integer", - "title": "The Output_entry_iteration_index Schema", - "default": 0, - "examples": [ - 2 - ] - }, - "output_properties_dimensions": { - "$id": "#/properties/tensors/items/properties/output_properties_dimensions", - "type": "array", - "title": "The Output_properties_dimensions Schema", - "items": { - "$id": "#/properties/tensors/items/properties/output_properties_dimensions/items", - "type": "integer", - "title": "The Items Schema", - "default": 0, - "examples": [ - 3 - ] - } - }, - "property_key_mapping": { - "$id": "#/properties/tensors/items/properties/property_key_mapping", - "type": "array", - "title": "The Property_key_mapping Schema", - "items": { - "$id": "#/properties/tensors/items/properties/property_key_mapping/items", - "type": "array", - "title": "The Items Schema" - } - }, - "property_value_mapping": { - "$id": "#/properties/tensors/items/properties/property_value_mapping", - "type": "array", - "title": "The Property_value_mapping Schema", - "items": { - "$id": "#/properties/tensors/items/properties/property_value_mapping/items", - "type": "array", - "title": "The Items Schema" - } - }, - "output_properties_type": { - "$id": "#/properties/tensors/items/properties/output_properties_type", - "type": "string", - "title": "The Output_properties_type Schema", - "default": "", - "examples": [ - "f16" - ], - "pattern": "^(.*)$" - } - } - } - }, - "mappings": { - "$id": "#/properties/mappings", - "type": "object", - "title": "The Mappings Schema", - "required": [ - "labels" - ], - "properties": { - "labels": { - "$id": "#/properties/mappings/properties/labels", - "type": "array", - "title": "The Labels Schema", - "items": { - "$id": "#/properties/mappings/properties/labels/items", - "type": "string", - "title": "The Items Schema", - "default": "", - "examples": [ - "background", - "aeroplane", - "bicycle", - "bird", - "boat", - "bottle", - "bus", - "car", - "cat", - "chair", - "cow", - "diningtable", - "dog", - "horse", - "motorbike", - "person", - "pottedplant", - "sheep", - "sofa", - "train", - "tvmonitor" - ], - "pattern": "^(.*)$" - } - } - } - } - } -} diff --git a/resources/nn/landmarks-regression-retail-0009/landmarks-regression-retail-0009.json b/resources/nn/landmarks-regression-retail-0009/landmarks-regression-retail-0009.json deleted file mode 100644 index 927c9c65f..000000000 --- a/resources/nn/landmarks-regression-retail-0009/landmarks-regression-retail-0009.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "tensors": - [ - { - "output_tensor_name": "out", - "output_dimensions": [1, 10], - "output_entry_iteration_index": 0, - "output_properties_dimensions": [0], - "property_key_mapping": [], - "output_properties_type": "f16" - } - ] -} - diff --git a/resources/nn/landmarks-regression-retail-0009/model.yml b/resources/nn/landmarks-regression-retail-0009/model.yml new file mode 100644 index 000000000..445d580f1 --- /dev/null +++ b/resources/nn/landmarks-regression-retail-0009/model.yml @@ -0,0 +1,46 @@ +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +description: >- + Landmark's detection. Used in Smart Classroom. The model is identical to 0002 but + trained on internal dataset with improved regression loss. +documentation: https://docs.openvinotoolkit.org/2020.1/_models_intel_landmarks_regression_retail_0009_description_landmarks_regression_retail_0009.html +task_type: object_attributes +files: + - name: FP32/landmarks-regression-retail-0009.xml + size: 42715 + sha256: ef7aa8f3f2b9730df8612fc7221bd7beb423b6001202a452723efd86993358a3 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/landmarks-regression-retail-0009/FP32/landmarks-regression-retail-0009.xml + - name: FP32/landmarks-regression-retail-0009.bin + size: 762464 + sha256: 46795837d35e8199b7c5b57e1f76297827bf516a150c0d5643197d8c325f1dbc + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/landmarks-regression-retail-0009/FP32/landmarks-regression-retail-0009.bin + - name: FP16/landmarks-regression-retail-0009.xml + size: 42692 + sha256: 01a0c7b4bbb0a12919351d78d551bbbe36a1fa62812c0f6ee1f49d7cc7bc10c0 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/landmarks-regression-retail-0009/FP16/landmarks-regression-retail-0009.xml + - name: FP16/landmarks-regression-retail-0009.bin + size: 381248 + sha256: 973b13e64b3576f754a690266b069438b531bc657233d09d7b382ca36e6cf1e4 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/landmarks-regression-retail-0009/FP16/landmarks-regression-retail-0009.bin + - name: FP32-INT8/landmarks-regression-retail-0009.xml + size: 78229 + sha256: 1bf202dd4ea57ebc17762ea3173e1c482c1c8efb2aeeec13fa073429bfd766a2 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/landmarks-regression-retail-0009/FP32-INT8/landmarks-regression-retail-0009.xml + - name: FP32-INT8/landmarks-regression-retail-0009.bin + size: 767188 + sha256: 2a4bbc1f41d80e3ad2125d9297825d49379647ee2f61d982962135d1a146ecb7 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/landmarks-regression-retail-0009/FP32-INT8/landmarks-regression-retail-0009.bin +framework: dldt +license: https://raw.githubusercontent.com/opencv/open_model_zoo/master/LICENSE diff --git a/resources/nn/mobileNetV2-PoseEstimation/model.yml b/resources/nn/mobileNetV2-PoseEstimation/model.yml new file mode 100644 index 000000000..56402c066 --- /dev/null +++ b/resources/nn/mobileNetV2-PoseEstimation/model.yml @@ -0,0 +1,15 @@ +description: >- + 2D human pose estimation from PINTO03009 +documentation: https://github.com/PINTO0309/MobileNetV2-PoseEstimation/tree/master/models/train/test/openvino/mobilenet_v2_1.4_224/FP16 +task_type: human_pose_estimation +files: + - name: FP16/mobileNetV2-PoseEstimation.xml + size: 151699 + sha256: a8e6929e4b67472fe8086a05c4426d5f49af7e4383c9e9dfda8a5eae48f2529d + source: https://raw.githubusercontent.com/PINTO0309/MobileNetV2-PoseEstimation/master/models/train/test/openvino/mobilenet_v2_1.4_224/FP16/frozen-model.xml + - name: FP16/mobileNetV2-PoseEstimation.bin + size: 4409440 + sha256: 4f5d51729dc1cda4da7b402fe3e0af0c0858ac5f0288973623f8a747fa7a77f0 + source: https://github.com/PINTO0309/MobileNetV2-PoseEstimation/blob/master/models/train/test/openvino/mobilenet_v2_1.4_224/FP16/frozen-model.bin?raw=true +framework: dldt +license: https://github.com/PINTO0309/MobileNetV2-PoseEstimation/tree/master/models/train/test/openvino/mobilenet_v2_1.4_224/FP16 diff --git a/resources/nn/mobilenet-ssd/mobilenet-ssd.json b/resources/nn/mobilenet-ssd/mobilenet-ssd.json index 75de1ad47..165f708ba 100644 --- a/resources/nn/mobilenet-ssd/mobilenet-ssd.json +++ b/resources/nn/mobilenet-ssd/mobilenet-ssd.json @@ -1,21 +1,10 @@ { - "tensors": - [ - { - "output_tensor_name": "out", - "output_dimensions": [1, 1, 100, 7], - "output_entry_iteration_index": 2, - "output_properties_dimensions": [3], - "property_key_mapping": - [ - [], - [], - [], - ["id", "label", "confidence", "left", "top", "right", "bottom"] - ], - "output_properties_type": "f16" - } - ], + "NN_config": + { + "output_format" : "detection", + "NN_family" : "mobilenet", + "confidence_threshold" : 0.5 + }, "mappings": { "labels": diff --git a/resources/nn/mobilenet-ssd/mobilenet-ssd_depth.json b/resources/nn/mobilenet-ssd/mobilenet-ssd_depth.json deleted file mode 100644 index bb5305f99..000000000 --- a/resources/nn/mobilenet-ssd/mobilenet-ssd_depth.json +++ /dev/null @@ -1,47 +0,0 @@ -{ - "tensors": - [ - { - "output_tensor_name": "out", - "output_dimensions": [1, 1, 100, 10], - "output_entry_iteration_index": 2, - "output_properties_dimensions": [3], - "property_key_mapping": - [ - [], - [], - [], - ["id", "label", "confidence", "left", "top", "right", "bottom", "distance_x", "distance_y", "distance_z"] - ], - "output_properties_type": "f16" - } - ], - "mappings": - { - "labels": - [ - "background", - "aeroplane", - "bicycle", - "bird", - "boat", - "bottle", - "bus", - "car", - "cat", - "chair", - "cow", - "diningtable", - "dog", - "horse", - "motorbike", - "person", - "pottedplant", - "sheep", - "sofa", - "train", - "tvmonitor" - ] - } -} - diff --git a/resources/nn/mobilenet-ssd/model.yml b/resources/nn/mobilenet-ssd/model.yml new file mode 100644 index 000000000..7f151f9e0 --- /dev/null +++ b/resources/nn/mobilenet-ssd/model.yml @@ -0,0 +1,43 @@ +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +description: >- + The `mobilenet-ssd` model is a Single-Shot multibox Detection (SSD) network + intended to perform object detection. This model is implemented using the Caffe\* + framework. For details about this model, check out the repository . + + The model input is a blob that consists of a single image of 1x3x300x300 in + BGR order, also like the `densenet-121` model. The BGR mean values need to be + subtracted as follows: [127.5, 127.5, 127.5] before passing the image blob into + the network. In addition, values must be divided by 0.007843. + + The model output is a typical vector containing the tracked object data, as + previously described. +documentation: https://github.com/openvinotoolkit/open_model_zoo/blob/efd238d02035f8a5417b7b1e25cd4c997d44351f/models/public/mobilenet-ssd/mobilenet-ssd.md +task_type: detection +files: + - name: FP16/mobilenet-ssd.xml + size: 177219 + sha256: ec3a3931faf1a4a5d70aa12576cc2a5f1b5b0d0be2517cc8f9c42f616fa10b2f + source: + $type: google_drive + id: 11-PX4EDxAnhymbuvnyb91ptvZAW3oPOn + - name: FP16/mobilenet-ssd.bin + size: 11566946 + sha256: db075a98c8d3e4636bb4206048b3d20f164a5a3730a5aa6b6b0cdbbfd2607fab + source: + $type: google_drive + id: 1pdC4eNWxyfewCJ7T0i9SXLHKt39gBDZV +framework: dldt +license: https://raw.githubusercontent.com/chuanqi305/MobileNet-SSD/master/LICENSE diff --git a/resources/nn/openpose b/resources/nn/openpose new file mode 120000 index 000000000..e60308758 --- /dev/null +++ b/resources/nn/openpose @@ -0,0 +1 @@ +human-pose-estimation-0001/ \ No newline at end of file diff --git a/resources/nn/openpose2 b/resources/nn/openpose2 new file mode 120000 index 000000000..23f8c2c70 --- /dev/null +++ b/resources/nn/openpose2 @@ -0,0 +1 @@ +mobileNetV2-PoseEstimation/ \ No newline at end of file diff --git a/resources/nn/pedestrian-detection-adas-0002/model.yml b/resources/nn/pedestrian-detection-adas-0002/model.yml new file mode 100644 index 000000000..b993d4176 --- /dev/null +++ b/resources/nn/pedestrian-detection-adas-0002/model.yml @@ -0,0 +1,45 @@ +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +description: >- + Pedestrian detector based on ssd + mobilenet with reduced channels number. +documentation: https://docs.openvinotoolkit.org/2020.1/_models_intel_pedestrian_detection_adas_0002_description_pedestrian_detection_adas_0002.html +task_type: detection +files: + - name: FP32/pedestrian-detection-adas-0002.xml + size: 245421 + sha256: 69dc36c52afc27f2ffa752e28e1710ca7653b3b7abda6e25c1fde7eb0767b480 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/pedestrian-detection-adas-0002/FP32/pedestrian-detection-adas-0002.xml + - name: FP32/pedestrian-detection-adas-0002.bin + size: 4660248 + sha256: de3e3b12eb631d0fa67db14d7cd3c3aaea7edc78e460485dcdae3a447f4d4288 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/pedestrian-detection-adas-0002/FP32/pedestrian-detection-adas-0002.bin + - name: FP16/pedestrian-detection-adas-0002.xml + size: 245369 + sha256: 233519498b393adc8506bd80dd09d19f89e202b544565e7821a7fdff48bfa306 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/pedestrian-detection-adas-0002/FP16/pedestrian-detection-adas-0002.xml + - name: FP16/pedestrian-detection-adas-0002.bin + size: 2330176 + sha256: 693753b7466da913b537016b31e3012f8ce6a885ac0897e3986eeca66d4ef8e6 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/pedestrian-detection-adas-0002/FP16/pedestrian-detection-adas-0002.bin + - name: FP32-INT8/pedestrian-detection-adas-0002.xml + size: 478047 + sha256: d04b25d198b5a007d5301fa60d2b67634beaa30886b11c03ddc76df8506aed9a + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/pedestrian-detection-adas-0002/FP32-INT8/pedestrian-detection-adas-0002.xml + - name: FP32-INT8/pedestrian-detection-adas-0002.bin + size: 4740408 + sha256: 38c3e849bd872a604a1374dffdb18520718c3c2d16fe13ce07642a986f68a273 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/pedestrian-detection-adas-0002/FP32-INT8/pedestrian-detection-adas-0002.bin +framework: dldt +license: https://raw.githubusercontent.com/opencv/open_model_zoo/master/LICENSE diff --git a/resources/nn/pedestrian-detection-adas-0002/pedestrian-detection-adas-0002.json b/resources/nn/pedestrian-detection-adas-0002/pedestrian-detection-adas-0002.json index 3b74557e2..7950ab79e 100644 --- a/resources/nn/pedestrian-detection-adas-0002/pedestrian-detection-adas-0002.json +++ b/resources/nn/pedestrian-detection-adas-0002/pedestrian-detection-adas-0002.json @@ -1,21 +1,10 @@ { - "tensors": - [ - { - "output_tensor_name": "out", - "output_dimensions": [1, 1, 100, 7], - "output_entry_iteration_index": 2, - "output_properties_dimensions": [3], - "property_key_mapping": - [ - [], - [], - [], - ["id", "label", "confidence", "left", "top", "right", "bottom"] - ], - "output_properties_type": "f16" - } - ], + "NN_config": + { + "output_format" : "detection", + "NN_family" : "mobilenet", + "confidence_threshold" : 0.5 + }, "mappings": { "labels": diff --git a/resources/nn/pedestrian-detection-adas-0002/pedestrian-detection-adas-0002_depth.json b/resources/nn/pedestrian-detection-adas-0002/pedestrian-detection-adas-0002_depth.json deleted file mode 100644 index fabe18e40..000000000 --- a/resources/nn/pedestrian-detection-adas-0002/pedestrian-detection-adas-0002_depth.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "tensors": - [ - { - "output_tensor_name": "out", - "output_dimensions": [1, 1, 100, 10], - "output_entry_iteration_index": 2, - "output_properties_dimensions": [3], - "property_key_mapping": - [ - [], - [], - [], - ["id", "label", "confidence", "left", "top", "right", "bottom", "distance_x", "distance_y", "distance_z"] - ], - "output_properties_type": "f16" - } - ], - "mappings": - { - "labels": - [ - "unknown", - "pedestrian" - ] - } -} - diff --git a/resources/nn/person-detection-retail-0013/model.yml b/resources/nn/person-detection-retail-0013/model.yml new file mode 100644 index 000000000..9b8f64293 --- /dev/null +++ b/resources/nn/person-detection-retail-0013/model.yml @@ -0,0 +1,45 @@ +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +description: >- + Pedestrian detection (RMNet with lrelu + SSD) +documentation: https://docs.openvinotoolkit.org/2020.1/_models_intel_person_detection_retail_0013_description_person_detection_retail_0013.html +task_type: detection +files: + - name: FP32/person-detection-retail-0013.xml + size: 354260 + sha256: d83a9b8a82a9e6fb52c73f63c4ab6e71d57592c8b2068a85f14548df7cef8eae + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/person-detection-retail-0013/FP32/person-detection-retail-0013.xml + - name: FP32/person-detection-retail-0013.bin + size: 2891364 + sha256: 6f09cb7061328942f9d5e9fc81631a4234be66a26daa50cd672d4077ee82ad44 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/person-detection-retail-0013/FP32/person-detection-retail-0013.bin + - name: FP16/person-detection-retail-0013.xml + size: 354107 + sha256: 603502ad82af0ee6b0ee9948257aece2599ac2b153df1691b3fe5b0c33fe3a88 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/person-detection-retail-0013/FP16/person-detection-retail-0013.xml + - name: FP16/person-detection-retail-0013.bin + size: 1445736 + sha256: 56eeccbeb3f27144046edacb95b459d60f3930f54051ef5b9e5253923c07b672 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/person-detection-retail-0013/FP16/person-detection-retail-0013.bin + - name: FP32-INT8/person-detection-retail-0013.xml + size: 931279 + sha256: 8f7e71c2db19f1f641346ff9cd99553e650b42c00a96c9bbfaf2bcbbd6daa49d + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/person-detection-retail-0013/FP32-INT8/person-detection-retail-0013.xml + - name: FP32-INT8/person-detection-retail-0013.bin + size: 2969676 + sha256: 523b79294453845934e068f4a906b9bf88b4c70ab705f2137e089439f4555b92 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/person-detection-retail-0013/FP32-INT8/person-detection-retail-0013.bin +framework: dldt +license: https://raw.githubusercontent.com/opencv/open_model_zoo/master/LICENSE diff --git a/resources/nn/person-detection-retail-0013/person-detection-retail-0013.json b/resources/nn/person-detection-retail-0013/person-detection-retail-0013.json index 901dd4c0b..3f514c758 100644 --- a/resources/nn/person-detection-retail-0013/person-detection-retail-0013.json +++ b/resources/nn/person-detection-retail-0013/person-detection-retail-0013.json @@ -1,21 +1,10 @@ { - "tensors": - [ - { - "output_tensor_name": "out", - "output_dimensions": [1, 1, 100, 7], - "output_entry_iteration_index": 2, - "output_properties_dimensions": [3], - "property_key_mapping": - [ - [], - [], - [], - ["id", "label", "confidence", "left", "top", "right", "bottom"] - ], - "output_properties_type": "f16" - } - ], + "NN_config": + { + "output_format" : "detection", + "NN_family" : "mobilenet", + "confidence_threshold" : 0.5 + }, "mappings": { "labels": diff --git a/resources/nn/person-detection-retail-0013/person-detection-retail-0013_depth.json b/resources/nn/person-detection-retail-0013/person-detection-retail-0013_depth.json deleted file mode 100644 index bc2997e77..000000000 --- a/resources/nn/person-detection-retail-0013/person-detection-retail-0013_depth.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "tensors": - [ - { - "output_tensor_name": "out", - "output_dimensions": [1, 1, 100, 10], - "output_entry_iteration_index": 2, - "output_properties_dimensions": [3], - "property_key_mapping": - [ - [], - [], - [], - ["id", "label", "confidence", "left", "top", "right", "bottom", "distance_x", "distance_y", "distance_z"] - ], - "output_properties_type": "f16" - } - ], - "mappings": - { - "labels": - [ - "unknown", - "person" - ] - } -} - diff --git a/resources/nn/person-vehicle-bike-detection-crossroad-1016/model.yml b/resources/nn/person-vehicle-bike-detection-crossroad-1016/model.yml new file mode 100644 index 000000000..ebdbc2cad --- /dev/null +++ b/resources/nn/person-vehicle-bike-detection-crossroad-1016/model.yml @@ -0,0 +1,46 @@ +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +description: >- + Multiclass (person - vehicle - non-vehicle) detector based on SSD detection architecture + - MobileNetV2 backbone) +documentation: https://docs.openvinotoolkit.org/2020.1/_models_intel_person_vehicle_bike_detection_crossroad_1016_description_person_vehicle_bike_detection_crossroad_1016.html +task_type: detection +files: + - name: FP32/person-vehicle-bike-detection-crossroad-1016.xml + size: 194333 + sha256: 040bded9370548d6ca02286b764334291c4ce6edb3ca6facd1758490a6433230 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/person-vehicle-bike-detection-crossroad-1016/FP32/person-vehicle-bike-detection-crossroad-1016.xml + - name: FP32/person-vehicle-bike-detection-crossroad-1016.bin + size: 11548004 + sha256: 61c9deaca903bb107aecfc31faeb402c894e0d43ab3817db8d9d05d43dfa68cf + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/person-vehicle-bike-detection-crossroad-1016/FP32/person-vehicle-bike-detection-crossroad-1016.bin + - name: FP16/person-vehicle-bike-detection-crossroad-1016.xml + size: 194219 + sha256: 3392a26b381ebbfa837cafb976a828fd8519ff58be08aac72cb54f4f40b58184 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/person-vehicle-bike-detection-crossroad-1016/FP16/person-vehicle-bike-detection-crossroad-1016.xml + - name: FP16/person-vehicle-bike-detection-crossroad-1016.bin + size: 5774054 + sha256: a0ba3bc9ca17330c731b395153651c22a666050cd06413c01059070681dc79a7 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/person-vehicle-bike-detection-crossroad-1016/FP16/person-vehicle-bike-detection-crossroad-1016.bin + - name: FP32-INT8/person-vehicle-bike-detection-crossroad-1016.xml + size: 510391 + sha256: 82d95cf77a1a5a4c2d0d8a6c364229700f293ccd8df883a081c2dc33f4025462 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/person-vehicle-bike-detection-crossroad-1016/FP32-INT8/person-vehicle-bike-detection-crossroad-1016.xml + - name: FP32-INT8/person-vehicle-bike-detection-crossroad-1016.bin + size: 11777480 + sha256: 0abdc201405b2f910f3ddd49307afd89ee43ab80009f9827b4f16e7aa7274157 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/person-vehicle-bike-detection-crossroad-1016/FP32-INT8/person-vehicle-bike-detection-crossroad-1016.bin +framework: dldt +license: https://raw.githubusercontent.com/opencv/open_model_zoo/master/LICENSE diff --git a/resources/nn/person-vehicle-bike-detection-crossroad-1016/person-vehicle-bike-detection-crossroad-1016.json b/resources/nn/person-vehicle-bike-detection-crossroad-1016/person-vehicle-bike-detection-crossroad-1016.json index 6bd947476..18bc8233f 100644 --- a/resources/nn/person-vehicle-bike-detection-crossroad-1016/person-vehicle-bike-detection-crossroad-1016.json +++ b/resources/nn/person-vehicle-bike-detection-crossroad-1016/person-vehicle-bike-detection-crossroad-1016.json @@ -1,27 +1,16 @@ { - "tensors": - [ - { - "output_tensor_name": "out", - "output_dimensions": [1, 1, 100, 7], - "output_entry_iteration_index": 2, - "output_properties_dimensions": [3], - "property_key_mapping": - [ - [], - [], - [], - ["id", "label", "confidence", "left", "top", "right", "bottom"] - ], - "output_properties_type": "f16" - } - ], + "NN_config": + { + "output_format" : "detection", + "NN_family" : "mobilenet", + "confidence_threshold" : 0.5 + }, "mappings": { "labels": [ "bike", - "vechicle", + "vehicle", "person" ] } diff --git a/resources/nn/person-vehicle-bike-detection-crossroad-1016/person-vehicle-bike-detection-crossroad-1016_depth.json b/resources/nn/person-vehicle-bike-detection-crossroad-1016/person-vehicle-bike-detection-crossroad-1016_depth.json deleted file mode 100644 index 6fdc3a771..000000000 --- a/resources/nn/person-vehicle-bike-detection-crossroad-1016/person-vehicle-bike-detection-crossroad-1016_depth.json +++ /dev/null @@ -1,29 +0,0 @@ -{ - "tensors": - [ - { - "output_tensor_name": "out", - "output_dimensions": [1, 1, 100, 10], - "output_entry_iteration_index": 2, - "output_properties_dimensions": [3], - "property_key_mapping": - [ - [], - [], - [], - ["id", "label", "confidence", "left", "top", "right", "bottom", "distance_x", "distance_y", "distance_z"] - ], - "output_properties_type": "f16" - } - ], - "mappings": - { - "labels": - [ - "bike", - "vechicle", - "person" - ] - } -} - diff --git a/resources/nn/tiny-yolo-v3/model.yml b/resources/nn/tiny-yolo-v3/model.yml new file mode 100644 index 000000000..3dda1f788 --- /dev/null +++ b/resources/nn/tiny-yolo-v3/model.yml @@ -0,0 +1,18 @@ +description: >- + Frozen darknet tiny yolo v3. +task_type: detection +files: + - name: FP16/tiny-yolo-v3.xml + size: 43911 + sha256: 6d6c7f93456942ef9a2f4d1e3c5534a9621ec8e40fee2be81b4392cd1f615831 + source: + $type: google_drive + id: 1ZBL9hDEv6_4SYjNJNm4bumQs9UhhsGD7 + - name: FP16/tiny-yolo-v3.bin + size: 17698384 + sha256: 1140fdc55128e311012aec83f107c87b2def967b2dfcb6292654806abaf57c82 + source: + $type: google_drive + id: 1fqkVq2uVJPi5SpJ9Yz8ToEQJYzx-P5RM +framework: dldt +license: https://raw.githubusercontent.com/PINTO0309/PINTO_model_zoo/master/LICENSE diff --git a/resources/nn/tiny-yolo-v3/tiny-yolo-v3.json b/resources/nn/tiny-yolo-v3/tiny-yolo-v3.json new file mode 100644 index 000000000..c07c03497 --- /dev/null +++ b/resources/nn/tiny-yolo-v3/tiny-yolo-v3.json @@ -0,0 +1,107 @@ +{ + "NN_config": + { + "output_format" : "detection", + "NN_family" : "YOLO", + "NN_specific_metadata" : + { + "classes" : 80, + "coordinates" : 4, + "anchors" : [10,14, 23,27, 37,58, 81,82, 135,169, 344,319], + "anchor_masks" : + { + "side26" : [1,2,3], + "side13" : [3,4,5] + }, + "iou_threshold" : 0.5, + "confidence_threshold" : 0.5 + } + }, + "mappings": + { + "labels": + [ + "person", + "bicycle", + "car", + "motorbike", + "aeroplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "sofa", + "pottedplant", + "bed", + "diningtable", + "toilet", + "tvmonitor", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush" + ] + } +} + diff --git a/resources/nn/tiny-yolo/tiny-yolo.json b/resources/nn/tiny-yolo/tiny-yolo.json deleted file mode 100644 index 2475c7fe3..000000000 --- a/resources/nn/tiny-yolo/tiny-yolo.json +++ /dev/null @@ -1,31 +0,0 @@ -{ - "tensors": - [ - { - "output_tensor_name": "out", - "output_dimensions": [1, 24, 26, 26], - "output_entry_iteration_index": 0, - "output_properties_dimensions": [0], - "property_key_mapping": [], - "output_properties_type": "f16" - }, - { - "output_tensor_name": "out2", - "output_dimensions": [1, 24, 13, 13], - "output_entry_iteration_index": 0, - "output_properties_dimensions": [0], - "property_key_mapping": [], - "output_properties_type": "f16" - } - ], - "mappings": - { - "labels": - [ - "mask", - "no mask", - "no mask2" - ] - } -} - diff --git a/resources/nn/vehicle-detection-adas-0002/model.yml b/resources/nn/vehicle-detection-adas-0002/model.yml new file mode 100644 index 000000000..f5bc9553f --- /dev/null +++ b/resources/nn/vehicle-detection-adas-0002/model.yml @@ -0,0 +1,46 @@ +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +description: >- + Vehicle detector based on SSD + MobileNet with reduced number of channels and depthwise + head. +documentation: https://docs.openvinotoolkit.org/2020.1/_models_intel_vehicle_detection_adas_0002_description_vehicle_detection_adas_0002.html +task_type: detection +files: + - name: FP32/vehicle-detection-adas-0002.xml + size: 216043 + sha256: b58c126cc2cf025442dd614159b0a6061c236eaecf8ae5f9d136b7b312499b31 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/vehicle-detection-adas-0002/FP32/vehicle-detection-adas-0002.xml + - name: FP32/vehicle-detection-adas-0002.bin + size: 4314552 + sha256: 6d0e114769bed9c8bd6666e098f891d7c158508bd7db6ba2bd4d09de17db1b69 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/vehicle-detection-adas-0002/FP32/vehicle-detection-adas-0002.bin + - name: FP16/vehicle-detection-adas-0002.xml + size: 215993 + sha256: 3f68133b27949aae53f2fa118be8d980c91a87f8112f8fd84ab2dc0c77e13754 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/vehicle-detection-adas-0002/FP16/vehicle-detection-adas-0002.xml + - name: FP16/vehicle-detection-adas-0002.bin + size: 2157328 + sha256: e119b3bec50bc836f2eb26a56a40a6d5e3c33bde819f3e6d00f829978092742b + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/vehicle-detection-adas-0002/FP16/vehicle-detection-adas-0002.bin + - name: FP32-INT8/vehicle-detection-adas-0002.xml + size: 438606 + sha256: b7b2531985b025f42e40251ae1151a204e38c8db83ae1d4b565a8ce729cf1156 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/vehicle-detection-adas-0002/FP32-INT8/vehicle-detection-adas-0002.xml + - name: FP32-INT8/vehicle-detection-adas-0002.bin + size: 4389736 + sha256: 9fe53648e0a377c00e8b985817cd25dad320008e8d0a1048af4ac88aabf1a929 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/vehicle-detection-adas-0002/FP32-INT8/vehicle-detection-adas-0002.bin +framework: dldt +license: https://raw.githubusercontent.com/opencv/open_model_zoo/master/LICENSE diff --git a/resources/nn/vehicle-detection-adas-0002/vehicle-detection-adas-0002.json b/resources/nn/vehicle-detection-adas-0002/vehicle-detection-adas-0002.json index d48c83c28..7c5796729 100644 --- a/resources/nn/vehicle-detection-adas-0002/vehicle-detection-adas-0002.json +++ b/resources/nn/vehicle-detection-adas-0002/vehicle-detection-adas-0002.json @@ -1,21 +1,10 @@ { - "tensors": - [ - { - "output_tensor_name": "out", - "output_dimensions": [1, 1, 100, 7], - "output_entry_iteration_index": 2, - "output_properties_dimensions": [3], - "property_key_mapping": - [ - [], - [], - [], - ["id", "label", "confidence", "left", "top", "right", "bottom"] - ], - "output_properties_type": "f16" - } - ], + "NN_config": + { + "output_format" : "detection", + "NN_family" : "mobilenet", + "confidence_threshold" : 0.5 + }, "mappings": { "labels": diff --git a/resources/nn/vehicle-detection-adas-0002/vehicle-detection-adas-0002_depth.json b/resources/nn/vehicle-detection-adas-0002/vehicle-detection-adas-0002_depth.json deleted file mode 100644 index 0338aadf6..000000000 --- a/resources/nn/vehicle-detection-adas-0002/vehicle-detection-adas-0002_depth.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "tensors": - [ - { - "output_tensor_name": "out", - "output_dimensions": [1, 1, 100, 10], - "output_entry_iteration_index": 2, - "output_properties_dimensions": [3], - "property_key_mapping": - [ - [], - [], - [], - ["id", "label", "confidence", "left", "top", "right", "bottom", "distance_x", "distance_y", "distance_z"] - ], - "output_properties_type": "f16" - } - ], - "mappings": - { - "labels": - [ - "", - "vehicle" - ] - } -} - diff --git a/resources/nn/vehicle-license-plate-detection-barrier-0106/model.yml b/resources/nn/vehicle-license-plate-detection-barrier-0106/model.yml new file mode 100644 index 000000000..996d15db1 --- /dev/null +++ b/resources/nn/vehicle-license-plate-detection-barrier-0106/model.yml @@ -0,0 +1,45 @@ +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +description: >- + Multiclass (vehicle - license plates) detector based on MobileNetV2+SSD +documentation: https://docs.openvinotoolkit.org/2020.1/_models_intel_vehicle_license_plate_detection_barrier_0106_description_vehicle_license_plate_detection_barrier_0106.html +task_type: detection +files: + - name: FP32/vehicle-license-plate-detection-barrier-0106.xml + size: 204476 + sha256: 0c4abcf31945d04fc33459625988176ade8e0986a8e2b4499c1ec511177217a4 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/vehicle-license-plate-detection-barrier-0106/FP32/vehicle-license-plate-detection-barrier-0106.xml + - name: FP32/vehicle-license-plate-detection-barrier-0106.bin + size: 2573380 + sha256: 2a6f1cd0eb8731ef059a91299bce0be026d2cebb1ae42cbee37b6aef7da2764b + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/vehicle-license-plate-detection-barrier-0106/FP32/vehicle-license-plate-detection-barrier-0106.bin + - name: FP16/vehicle-license-plate-detection-barrier-0106.xml + size: 204408 + sha256: 6fc62d36f52ccfa34eced7c8e3c260e9870a8b5c42fe413a76a1321f10a637dc + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/vehicle-license-plate-detection-barrier-0106/FP16/vehicle-license-plate-detection-barrier-0106.xml + - name: FP16/vehicle-license-plate-detection-barrier-0106.bin + size: 1286758 + sha256: 57652e098f93ee712eb412267612cb7bf2713475f96ec1e18eca10e4bef69785 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/vehicle-license-plate-detection-barrier-0106/FP16/vehicle-license-plate-detection-barrier-0106.bin + - name: FP32-INT8/vehicle-license-plate-detection-barrier-0106.xml + size: 519016 + sha256: 0037685af2cc25f569c7db066ab6e9673a0faae204d8f2f19a003b28bd21bb28 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/vehicle-license-plate-detection-barrier-0106/FP32-INT8/vehicle-license-plate-detection-barrier-0106.xml + - name: FP32-INT8/vehicle-license-plate-detection-barrier-0106.bin + size: 2642612 + sha256: 2f81d5803d76e90f9000c17083536ece01e7127c407e5bec7a6caef07ac01082 + source: https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/vehicle-license-plate-detection-barrier-0106/FP32-INT8/vehicle-license-plate-detection-barrier-0106.bin +framework: dldt +license: https://raw.githubusercontent.com/opencv/open_model_zoo/master/LICENSE diff --git a/resources/nn/vehicle-license-plate-detection-barrier-0106/vehicle-license-plate-detection-barrier-0106.json b/resources/nn/vehicle-license-plate-detection-barrier-0106/vehicle-license-plate-detection-barrier-0106.json index 5bd430592..8a51a139c 100644 --- a/resources/nn/vehicle-license-plate-detection-barrier-0106/vehicle-license-plate-detection-barrier-0106.json +++ b/resources/nn/vehicle-license-plate-detection-barrier-0106/vehicle-license-plate-detection-barrier-0106.json @@ -1,21 +1,10 @@ { - "tensors": - [ - { - "output_tensor_name": "out", - "output_dimensions": [1, 1, 100, 7], - "output_entry_iteration_index": 2, - "output_properties_dimensions": [3], - "property_key_mapping": - [ - [], - [], - [], - ["id", "label", "confidence", "left", "top", "right", "bottom"] - ], - "output_properties_type": "f16" - } - ], + "NN_config": + { + "output_format" : "detection", + "NN_family" : "mobilenet", + "confidence_threshold" : 0.5 + }, "mappings": { "labels": diff --git a/resources/nn/vehicle-license-plate-detection-barrier-0106/vehicle-license-plate-detection-barrier-0106_depth.json b/resources/nn/vehicle-license-plate-detection-barrier-0106/vehicle-license-plate-detection-barrier-0106_depth.json deleted file mode 100644 index 49d7edd67..000000000 --- a/resources/nn/vehicle-license-plate-detection-barrier-0106/vehicle-license-plate-detection-barrier-0106_depth.json +++ /dev/null @@ -1,29 +0,0 @@ -{ - "tensors": - [ - { - "output_tensor_name": "out", - "output_dimensions": [1, 1, 100, 10], - "output_entry_iteration_index": 2, - "output_properties_dimensions": [3], - "property_key_mapping": - [ - [], - [], - [], - ["id", "label", "confidence", "left", "top", "right", "bottom", "distance_x", "distance_y", "distance_z"] - ], - "output_properties_type": "f16" - } - ], - "mappings": - { - "labels": - [ - "", - "vehicle", - "license plate" - ] - } -} - diff --git a/resources/nn/yolo-v3/model.yml b/resources/nn/yolo-v3/model.yml new file mode 100644 index 000000000..ff0383dcf --- /dev/null +++ b/resources/nn/yolo-v3/model.yml @@ -0,0 +1,18 @@ +description: >- + Frozen darknet yolo v3. +task_type: detection +files: + - name: FP16/yolo-v3.xml + size: 233831 + sha256: 758a3bab6807de153bdc8201fb1abf22492489228f2bebce908be05afc226b22 + source: + $type: google_drive + id: 1GBHCB75hYopF4rRYyTrQxqx71mdqMNFd + - name: FP16/yolo-v3.bin + size: 123845726 + sha256: 3d383842e69d1c3e45a48d8eb2f9bf69efe5767a824c33db0fb9e517a405d46b + source: + $type: google_drive + id: 1laTzWexf6zX77zEDRTLHSl76Aw9d9xEO +framework: dldt +license: https://raw.githubusercontent.com/PINTO0309/PINTO_model_zoo/master/LICENSE \ No newline at end of file diff --git a/resources/nn/yolo-v3/yolo-v3.json b/resources/nn/yolo-v3/yolo-v3.json new file mode 100644 index 000000000..0f6fbb022 --- /dev/null +++ b/resources/nn/yolo-v3/yolo-v3.json @@ -0,0 +1,108 @@ +{ + "NN_config": + { + "output_format" : "detection", + "NN_family" : "YOLO", + "NN_specific_metadata" : + { + "classes" : 80, + "coordinates" : 4, + "anchors" : [10.0, 13.0, 16.0, 30.0, 33.0, 23.0, 30.0, 61.0, 62.0, 45.0, 59.0, 119.0,116.0, 90.0, 156.0,198.0,373.0, 326.0], + "anchor_masks" : + { + "side52" : [0,1,2], + "side26" : [3,4,5], + "side13" : [6,7,8] + }, + "iou_threshold" : 0.5, + "confidence_threshold" : 0.5 + } + }, + "mappings": + { + "labels": + [ + "person", + "bicycle", + "car", + "motorbike", + "aeroplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "sofa", + "pottedplant", + "bed", + "diningtable", + "toilet", + "tvmonitor", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush" + ] + } +} + diff --git a/setup.py b/setup.py deleted file mode 100644 index 248f91916..000000000 --- a/setup.py +++ /dev/null @@ -1,2 +0,0 @@ -from setuptools import setup -setup(name='depthai_extras') diff --git a/test.py b/test.py deleted file mode 120000 index 299430d57..000000000 --- a/test.py +++ /dev/null @@ -1 +0,0 @@ -depthai_supervisor.py \ No newline at end of file diff --git a/testsFunctional/runFunctionalTests.py b/testsFunctional/runFunctionalTests.py new file mode 100644 index 000000000..bf9489d12 --- /dev/null +++ b/testsFunctional/runFunctionalTests.py @@ -0,0 +1,84 @@ +import os +import consts.resource_paths + +class TestManager: + results = [] + + def printTestName(self, testName): + print("\033[1;34;40m----------------------------------------------------------------") + print("Running " + testName) + print("----------------------------------------------------------------\033[0m") + + + def printResult(self, exitCode, testName): + if exitCode == 0: + print("\033[1;32;40m----------------------------------------------------------------") + print(testName + " Passed!") + print("----------------------------------------------------------------\033[0m") + else: + print("\033[1;31;40m----------------------------------------------------------------") + print(testName + " Failed...") + print("----------------------------------------------------------------\033[0m") + + def appendResult(self, result): + self.results.append(result) + + def printSummary(self): + for result in self.results: + self.printResult(result[1], result[0]) + + + +if __name__ == "__main__": + testMan = TestManager() + testPath = consts.resource_paths.tests_functional_path + + # try data streams + #meta_d2h + result = os.system("python3 " + testPath + "test_meta_d2h.py -s " + "meta_d2h metaout") + testMan.printResult(result, "meta_d2h") + testMan.appendResult(["meta_d2h", result]) + + #jpegout - if we want to do a functional test here, we need some way to trigger and detect a jpeg capture from the test. + #result = os.system("python3 test_meta_d2h.py -s " + "previewout jpegout") + #testMan.printResult(result, "jpegout") + + #object_tracker + result = os.system("python3 " + testPath + "test_object_tracker.py -s " + "metaout object_tracker previewout") + testMan.printResult(result, "object_tracker") + print(result) + testMan.appendResult(["object_tracker", result]) + + + # try visual streams. + visualStreams = [ + "previewout", + "left", + "right", + "depth", + "disparity", + "disparity_color" + ] + + for stream in visualStreams: + testMan.printTestName(stream) + result = os.system("python3 " + testPath + "test_video.py -s " + stream) + testMan.printResult(result, stream) + testMan.appendResult([stream, result]) + + + #try various -cnn options + cnndir = consts.resource_paths.nn_resource_path + cnnlist = os.listdir(cnndir) + for cnn in cnnlist: + print(cnndir+cnn) + if os.path.isdir(cnndir+cnn): + testMan.printTestName(cnn) + runStr = "python3 " + testPath + "test_video.py -cnn " + cnn + result = os.system(runStr) + testMan.printResult(result, cnn) + testMan.appendResult([cnn, result]) + + + testMan.printSummary() + diff --git a/testsFunctional/test_meta_d2h.py b/testsFunctional/test_meta_d2h.py new file mode 100644 index 000000000..f0aadbee1 --- /dev/null +++ b/testsFunctional/test_meta_d2h.py @@ -0,0 +1,37 @@ +from depthai_demo import DepthAI +import threading +import time +import sys + +if __name__ == "__main__": + dai = DepthAI() + passed = 1 # 1 is failed, 0 is passed + + thread = threading.Thread(target=dai.startLoop) + thread.start() + + d2hLen = 0 + nnetLen = 0 + + i=0 + while i<8: + time.sleep(1) + if dai.nnet_packets is not None: + nnetLen += len(dai.nnet_packets) + if dai.data_packets is not None: + print("data packet in") + for packet in dai.data_packets: + print(packet.stream_name) + if packet.stream_name == "meta_d2h": + d2hLen += 1 + i += 1 + + if d2hLen > 0: + passed = 0 + else: + passed = 1 + + dai.stopLoop() + thread.join() + + sys.exit(passed) diff --git a/testsFunctional/test_object_tracker.py b/testsFunctional/test_object_tracker.py new file mode 100644 index 000000000..a97b40094 --- /dev/null +++ b/testsFunctional/test_object_tracker.py @@ -0,0 +1,36 @@ +from depthai_demo import DepthAI +import threading +import time +import sys + +if __name__ == "__main__": + dai = DepthAI() + passed = 1 # 1 is failed, 0 is passed + + thread = threading.Thread(target=dai.startLoop) + thread.start() + + testLen = 0 + nnetLen = 0 + + i=0 + while i<8: + time.sleep(1) + if dai.nnet_packets is not None: + nnetLen += len(dai.nnet_packets) + if dai.data_packets is not None: + for packet in dai.data_packets: + print(packet.stream_name) + if packet.stream_name == "object_tracker": + testLen += 1 + i += 1 + + if testLen > 0: + passed = 0 + else: + passed = 1 + + dai.stopLoop() + thread.join() + + sys.exit(passed) diff --git a/testsFunctional/test_video.py b/testsFunctional/test_video.py new file mode 100644 index 000000000..a66f6fd23 --- /dev/null +++ b/testsFunctional/test_video.py @@ -0,0 +1,33 @@ +from depthai_demo import DepthAI +import threading +import time +import sys + +if __name__ == "__main__": + dai = DepthAI() + passed = 1 # 1 is failed, 0 is passed + + thread = threading.Thread(target=dai.startLoop) + thread.start() + + dataLen = 0 + nnetLen = 0 + + i=0 + while i<8: + time.sleep(1) + if dai.nnet_packets is not None: + nnetLen += len(dai.nnet_packets) + if dai.data_packets is not None: + dataLen += len(dai.data_packets) + i += 1 + + if dataLen > 0: + passed = 0 + else: + passed = 1 + + dai.stopLoop() + thread.join() + + sys.exit(passed)