From 41e6de08829f66220d8bab17e459f61e47acf527 Mon Sep 17 00:00:00 2001
From: Nuzhny007 <nuzhny@mail.ru>
Date: Sat, 16 Nov 2024 22:49:52 +0300
Subject: [PATCH] Sync with latest Darknet sources

---
 src/Detector/darknet/CMakeLists.txt           |  11 +-
 src/Detector/darknet/cmake/FindCUDNN.cmake    | 240 ++++++------------
 src/Detector/darknet/cmake/FindCUDNN_.cmake   | 180 +++++++++++++
 src/Detector/darknet/include/darknet.h        |   7 +-
 .../darknet/src/activation_kernels.cu         |  67 +++--
 src/Detector/darknet/src/activations.c        |   4 +-
 src/Detector/darknet/src/art.c                |   5 +-
 src/Detector/darknet/src/blas.c               |  40 ++-
 src/Detector/darknet/src/blas_kernels.cu      | 217 ++++++++--------
 src/Detector/darknet/src/box.c                |  18 +-
 src/Detector/darknet/src/box.h                |  20 +-
 src/Detector/darknet/src/captcha.c            |   2 +-
 src/Detector/darknet/src/cifar.c              |   4 +-
 src/Detector/darknet/src/classifier.c         |  22 +-
 src/Detector/darknet/src/coco.c               |  19 +-
 src/Detector/darknet/src/col2im_kernels.cu    |   6 +-
 src/Detector/darknet/src/compare.c            |   6 +-
 src/Detector/darknet/src/connected_layer.c    |  12 +-
 src/Detector/darknet/src/conv_lstm_layer.c    |   2 +-
 .../darknet/src/convolutional_kernels.cu      |  20 +-
 .../darknet/src/convolutional_layer.c         |  16 +-
 src/Detector/darknet/src/cost_layer.c         |   4 +-
 src/Detector/darknet/src/dark_cuda.c          |  80 +++---
 src/Detector/darknet/src/dark_cuda.h          |  28 +-
 src/Detector/darknet/src/darknet.c            |  25 +-
 src/Detector/darknet/src/data.c               |  37 +--
 src/Detector/darknet/src/data.h               |   6 +-
 src/Detector/darknet/src/demo.c               |  48 ++--
 src/Detector/darknet/src/demo.h               |   2 +-
 src/Detector/darknet/src/detection_layer.c    |  50 ++--
 src/Detector/darknet/src/detector.c           |  86 ++++---
 src/Detector/darknet/src/dice.c               |   2 +-
 src/Detector/darknet/src/dropout_layer.c      |   2 +-
 .../darknet/src/dropout_layer_kernels.cu      |  14 +-
 .../darknet/src/gaussian_yolo_layer.c         |   5 +-
 src/Detector/darknet/src/gemm.c               |   2 -
 src/Detector/darknet/src/getopt.c             | 132 +++++-----
 src/Detector/darknet/src/getopt.h             |  20 +-
 src/Detector/darknet/src/go.c                 |  14 +-
 src/Detector/darknet/src/http_stream.cpp      |   9 +-
 src/Detector/darknet/src/im2col_kernels.cu    |  61 +++--
 src/Detector/darknet/src/image.c              |  32 ++-
 src/Detector/darknet/src/image.h              |   1 +
 src/Detector/darknet/src/image_opencv.cpp     |  11 +-
 src/Detector/darknet/src/layer.c              |   6 +-
 src/Detector/darknet/src/layer.h              |  56 ++--
 src/Detector/darknet/src/maxpool_layer.c      |   4 +-
 .../darknet/src/maxpool_layer_kernels.cu      |  14 +-
 src/Detector/darknet/src/network.c            |  21 +-
 src/Detector/darknet/src/network.h            |  28 +-
 src/Detector/darknet/src/network_kernels.cu   |  93 ++++++-
 src/Detector/darknet/src/option_list.h        |   4 +-
 src/Detector/darknet/src/parser.c             |  84 +++---
 src/Detector/darknet/src/region_layer.c       |   1 -
 .../darknet/src/representation_layer.c        |   2 -
 src/Detector/darknet/src/rnn_vid.c            |   2 +-
 src/Detector/darknet/src/route_layer.c        |   3 +-
 src/Detector/darknet/src/softmax_layer.c      |  67 +++--
 src/Detector/darknet/src/tag.c                |   2 +-
 src/Detector/darknet/src/tree.h               |   2 +-
 src/Detector/darknet/src/utils.c              |  36 ++-
 src/Detector/darknet/src/utils.h              |   2 +-
 src/Detector/darknet/src/writing.c            |   2 +-
 src/Detector/darknet/src/yolo.c               |  22 +-
 src/Detector/darknet/src/yolo_console_dll.cpp |  12 +-
 src/Detector/darknet/src/yolo_layer.c         |   4 +-
 src/Detector/darknet/src/yolo_v2_class.cpp    |   6 +-
 .../tensorrt_yolo/cmake/FindCUDNN.cmake       | 240 ++++++------------
 .../tensorrt_yolo/cmake/FindCUDNN.cmake_      | 180 +++++++++++++
 69 files changed, 1397 insertions(+), 1085 deletions(-)
 create mode 100644 src/Detector/darknet/cmake/FindCUDNN_.cmake
 create mode 100644 src/Detector/tensorrt_yolo/cmake/FindCUDNN.cmake_

diff --git a/src/Detector/darknet/CMakeLists.txt b/src/Detector/darknet/CMakeLists.txt
index 26c2185f9..dec1ff078 100644
--- a/src/Detector/darknet/CMakeLists.txt
+++ b/src/Detector/darknet/CMakeLists.txt
@@ -76,10 +76,6 @@ set(DARKNET_HEADER_FILES ${DARKNET_HEADER_FILES} include/darknet.h include/yolo_
 
 file(GLOB DARKNET_CUDA_FILES src/*.cu)
 
-add_definitions(-DYOLO_DETECTOR_EXPORTS)
-add_definitions(-DLIB_EXPORTS)
-add_definitions(-DGPU)
-add_definitions(-DCUDNN)
 # add_definitions(-DOPENCV)
 
 #if(NOT CMAKE_DEBUG_POSTFIX)
@@ -107,6 +103,13 @@ set(YOLO_LIBS
 
 target_link_libraries(${libname} ${YOLO_LIBS})
 
+target_compile_definitions(${libname} PRIVATE -DYOLO_DETECTOR_EXPORTS)
+target_compile_definitions(${libname} PRIVATE -DLIB_EXPORTS)
+target_compile_definitions(${libname} PRIVATE -DGPU)
+target_compile_definitions(${libname} PRIVATE -DCUDNN)
+target_compile_definitions(${libname} PRIVATE -DCUDNN_HALF)
+
+
 install(TARGETS ${libname}
         EXPORT MTTrackingExports
         ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib
diff --git a/src/Detector/darknet/cmake/FindCUDNN.cmake b/src/Detector/darknet/cmake/FindCUDNN.cmake
index 37388d30b..7a692b055 100644
--- a/src/Detector/darknet/cmake/FindCUDNN.cmake
+++ b/src/Detector/darknet/cmake/FindCUDNN.cmake
@@ -1,180 +1,104 @@
-# Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
-# file Copyright.txt or https://cmake.org/licensing for details.
+# Distributed under the OSI-approved BSD 3-Clause License.
+# Copyright Stefano Sinigardi
+
 #.rst:
 # FindCUDNN
-# -------
-#
-# Find CUDNN library
-#
-# Valiables that affect result:
-# <VERSION>, <REQUIRED>, <QUIETLY>: as usual
-#
-# <EXACT> : as usual, plus we do find '5.1' version if you wanted '5' 
-#           (not if you wanted '5.0', as usual)   
+# --------
 #
-# Result variables
+# Result Variables
 # ^^^^^^^^^^^^^^^^
 #
-# This module will set the following variables in your project:
+# This module will set the following variables in your project::
 #
-# ``CUDNN_INCLUDE``
-#   where to find cudnn.h.
-# ``CUDNN_LIBRARY``
-#   the libraries to link against to use CUDNN.
-# ``CUDNN_FOUND``
-#   If false, do not try to use CUDNN.
-# ``CUDNN_VERSION``
-#   Version of the CUDNN library we looked for 
+#  ``CUDNN_FOUND``
+#    True if CUDNN found on the local system
 #
-# Exported functions
-# ^^^^^^^^^^^^^^^^
-# function(CUDNN_INSTALL version __dest_libdir [__dest_incdir])
-#  This function will try to download and install CUDNN.
-#  CUDNN5 and CUDNN6 are supported.
+#  ``CUDNN_INCLUDE_DIRS``
+#    Location of CUDNN header files.
 #
+#  ``CUDNN_LIBRARIES``
+#    The CUDNN libraries.
+#
+#  ``CuDNN::CuDNN``
+#    The CUDNN target
 #
 
-function(CUDNN_INSTALL version dest_libdir dest_incdir dest_bindir)
-  message(STATUS "CUDNN_INSTALL: Installing CUDNN ${version}, lib:${dest_libdir}, inc:${dest_incdir}, bin:${dest_bindir}")
-  string(REGEX REPLACE "-rc$" "" version_base "${version}")
-  set(tar_libdir cuda/lib64)
-  set(tar_incdir cuda/include)
-
-  if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
-    set(url_extension tgz)
-    if("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64")
-      set(url_arch_name linux-x64 )
-    elseif("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "ppc")
-      set(url_arch_name linux-ppc64le ) 
-      #  TX1 has to be installed via JetPack
-    endif()
-  elseif  (APPLE)
-    set(url_extension tgz)
-    set(tar_libdir cuda/lib)
-    set(url_arch_name osx-x64)
-  elseif(WIN32)
-    set(url_extension zip)
-    set(tar_bindir cuda/bin)
-    set(tar_libdir cuda/lib/x64)
-    if(CMAKE_SYSTEM_VERSION MATCHES "10")
-      set(url_arch_name windows10-x64)
-    else()
-      set(url_arch_name windows7-x64)
-    endif()
-  endif()
-  
-  # Download and install CUDNN locally if not found on the system
-  if(url_arch_name) 
-    set(download_dir ${CMAKE_CURRENT_BINARY_DIR}/downloads/cudnn${version})
-    file(MAKE_DIRECTORY ${download_dir})
-    set(cudnn_filename cudnn-${CUDA_VERSION}-${url_arch_name}-v${version}.${url_extension})
-    set(base_url http://developer.download.nvidia.com/compute/redist/cudnn)
-    set(cudnn_url ${base_url}/v${version_base}/${cudnn_filename})
-    set(cudnn_file ${download_dir}/${cudnn_filename})
-    
-    if(NOT EXISTS ${cudnn_file})
-      message(STATUS "Downloading CUDNN library from NVIDIA...")
-      file(DOWNLOAD ${cudnn_url} ${cudnn_file}
-	SHOW_PROGRESS STATUS cudnn_status
-	)
-    execute_process(COMMAND ${CMAKE_COMMAND} -E tar xzvf ${cudnn_file} WORKING_DIRECTORY ${download_dir} RESULT_VARIABLE cudnn_status)
-
-      if(NOT "${cudnn_status}" MATCHES "0")
-	message(STATUS "Was not able to download CUDNN from ${cudnn_url}. Please install CuDNN manually from https://developer.nvidia.com/cuDNN")
-      endif()
-    endif()
-    
-    if(dest_bindir AND tar_bindir)
-      file(COPY ${download_dir}/${tar_bindir}/ DESTINATION ${dest_bindir})
-    endif()
-
-    if(dest_incdir)
-      file(COPY ${download_dir}/${tar_incdir}/ DESTINATION  ${dest_incdir})
-    endif()
-
-    file(COPY ${download_dir}/${tar_libdir}/ DESTINATION  ${dest_libdir} )
-
-    get_filename_component(dest_dir ${dest_libdir} DIRECTORY)
-
-    set(CUDNN_ROOT_DIR ${dest_dir} PARENT_SCOPE)
-    unset(CUDNN_LIBRARY CACHE)
-    unset(CUDNN_INCLUDE_DIR CACHE)
-
-  endif(url_arch_name)
-endfunction()
-
-#####################################################
-
-find_package(PkgConfig)
-pkg_check_modules(PC_CUDNN QUIET CUDNN)
-
-get_filename_component(__libpath_cudart "${CUDA_CUDART_LIBRARY}" PATH)
-
-# We use major only in library search as major/minor is not entirely consistent among platforms.
-# Also, looking for exact minor version of .so is in general not a good idea.
-# More strict enforcement of minor/patch version is done if/when the header file is examined.
-if(CUDNN_FIND_VERSION_EXACT)
-  SET(__cudnn_ver_suffix ".${CUDNN_FIND_VERSION_MAJOR}")
-  SET(__cudnn_lib_win_name cudnn64_${CUDNN_FIND_VERSION_MAJOR})
-else()
-  SET(__cudnn_lib_win_name cudnn64)
+include(FindPackageHandleStandardArgs)
+
+find_path(CUDNN_INCLUDE_DIR NAMES cudnn.h cudnn_v8.h cudnn_v7.h
+  HINTS $ENV{CUDA_PATH} $ENV{CUDA_TOOLKIT_ROOT_DIR} $ENV{CUDA_HOME} $ENV{CUDNN_ROOT_DIR} /usr/include
+  PATH_SUFFIXES cuda/include include)
+find_library(CUDNN_LIBRARY NAMES cudnn cudnn8 cudnn7
+  HINTS $ENV{CUDA_PATH} $ENV{CUDA_TOOLKIT_ROOT_DIR} $ENV{CUDA_HOME} $ENV{CUDNN_ROOT_DIR} /usr/lib/x86_64-linux-gnu/
+  PATH_SUFFIXES lib lib64 cuda/lib cuda/lib64 lib/x64 cuda/lib/x64)
+if(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn.h")
+  file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_HEADER_CONTENTS)
+elseif(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn_v8.h")
+  file(READ ${CUDNN_INCLUDE_DIR}/cudnn_v8.h CUDNN_HEADER_CONTENTS)
+elseif(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn_v7.h")
+  file(READ ${CUDNN_INCLUDE_DIR}/cudnn_v7.h CUDNN_HEADER_CONTENTS)
 endif()
-
-find_library(CUDNN_LIBRARY 
-  NAMES libcudnn.so${__cudnn_ver_suffix} libcudnn${__cudnn_ver_suffix}.dylib ${__cudnn_lib_win_name}
-  PATHS $ENV{LD_LIBRARY_PATH} ${__libpath_cudart} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS} ${CMAKE_INSTALL_PREFIX}
-  PATH_SUFFIXES lib lib64 bin
-  DOC "CUDNN library." )
-
-if(CUDNN_LIBRARY)
-  SET(CUDNN_MAJOR_VERSION ${CUDNN_FIND_VERSION_MAJOR})
-  set(CUDNN_VERSION ${CUDNN_MAJOR_VERSION})
-  get_filename_component(__found_cudnn_root ${CUDNN_LIBRARY} PATH)
-  find_path(CUDNN_INCLUDE_DIR 
-    NAMES cudnn.h
-    HINTS ${PC_CUDNN_INCLUDE_DIRS} ${CUDNN_ROOT_DIR} ${CUDA_TOOLKIT_INCLUDE} ${__found_cudnn_root}
-    PATH_SUFFIXES include 
-    DOC "Path to CUDNN include directory." )
+if(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn_version.h")
+  file(READ "${CUDNN_INCLUDE_DIR}/cudnn_version.h" CUDNN_VERSION_H_CONTENTS)
+  string(APPEND CUDNN_HEADER_CONTENTS "${CUDNN_VERSION_H_CONTENTS}")
+  unset(CUDNN_VERSION_H_CONTENTS)
+elseif(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn_version_v8.h")
+  file(READ "${CUDNN_INCLUDE_DIR}/cudnn_version_v8.h" CUDNN_VERSION_H_CONTENTS)
+  string(APPEND CUDNN_HEADER_CONTENTS "${CUDNN_VERSION_H_CONTENTS}")
+  unset(CUDNN_VERSION_H_CONTENTS)
+elseif(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn_version_v7.h")
+  file(READ "${CUDNN_INCLUDE_DIR}/cudnn_version_v7.h" CUDNN_VERSION_H_CONTENTS)
+  string(APPEND CUDNN_HEADER_CONTENTS "${CUDNN_VERSION_H_CONTENTS}")
+  unset(CUDNN_VERSION_H_CONTENTS)
 endif()
-
-if(CUDNN_LIBRARY AND CUDNN_INCLUDE_DIR)
-  file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_VERSION_FILE_CONTENTS)
+if(CUDNN_HEADER_CONTENTS)
   string(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)"
-    CUDNN_MAJOR_VERSION "${CUDNN_VERSION_FILE_CONTENTS}")
+               _CUDNN_VERSION_MAJOR "${CUDNN_HEADER_CONTENTS}")
   string(REGEX REPLACE "define CUDNN_MAJOR * +([0-9]+)" "\\1"
-    CUDNN_MAJOR_VERSION "${CUDNN_MAJOR_VERSION}")
+               _CUDNN_VERSION_MAJOR "${_CUDNN_VERSION_MAJOR}")
   string(REGEX MATCH "define CUDNN_MINOR * +([0-9]+)"
-    CUDNN_MINOR_VERSION "${CUDNN_VERSION_FILE_CONTENTS}")
+               _CUDNN_VERSION_MINOR "${CUDNN_HEADER_CONTENTS}")
   string(REGEX REPLACE "define CUDNN_MINOR * +([0-9]+)" "\\1"
-    CUDNN_MINOR_VERSION "${CUDNN_MINOR_VERSION}")
+               _CUDNN_VERSION_MINOR "${_CUDNN_VERSION_MINOR}")
   string(REGEX MATCH "define CUDNN_PATCHLEVEL * +([0-9]+)"
-    CUDNN_PATCH_VERSION "${CUDNN_VERSION_FILE_CONTENTS}")
+               _CUDNN_VERSION_PATCH "${CUDNN_HEADER_CONTENTS}")
   string(REGEX REPLACE "define CUDNN_PATCHLEVEL * +([0-9]+)" "\\1"
-    CUDNN_PATCH_VERSION "${CUDNN_PATCH_VERSION}")  
-  set(CUDNN_VERSION ${CUDNN_MAJOR_VERSION}.${CUDNN_MINOR_VERSION})
-endif()
-
-if(CUDNN_MAJOR_VERSION)
-  ## Fixing the case where 5.1 does not fit 'exact' 5.
-  if(CUDNN_FIND_VERSION_EXACT AND NOT CUDNN_FIND_VERSION_MINOR)
-    if("${CUDNN_MAJOR_VERSION}" STREQUAL "${CUDNN_FIND_VERSION_MAJOR}")
-      set(CUDNN_VERSION ${CUDNN_FIND_VERSION})
-    endif()
+               _CUDNN_VERSION_PATCH "${_CUDNN_VERSION_PATCH}")
+  if(NOT _CUDNN_VERSION_MAJOR)
+    set(CUDNN_VERSION "?")
+  else()
+    set(CUDNN_VERSION "${_CUDNN_VERSION_MAJOR}.${_CUDNN_VERSION_MINOR}.${_CUDNN_VERSION_PATCH}")
   endif()
-else()
-  # Try to set CUDNN version from config file
-  set(CUDNN_VERSION ${PC_CUDNN_CFLAGS_OTHER})
 endif()
 
-find_package_handle_standard_args(
-  CUDNN 
-  FOUND_VAR CUDNN_FOUND
-  REQUIRED_VARS CUDNN_LIBRARY 
-  VERSION_VAR   CUDNN_VERSION
-  )
+set(CUDNN_INCLUDE_DIRS ${CUDNN_INCLUDE_DIR})
+set(CUDNN_LIBRARIES ${CUDNN_LIBRARY})
+mark_as_advanced(CUDNN_LIBRARY CUDNN_INCLUDE_DIR)
 
-if(CUDNN_FOUND)
-  set(CUDNN_LIBRARIES ${CUDNN_LIBRARY})
-  set(CUDNN_INCLUDE_DIRS ${CUDNN_INCLUDE_DIR})
-  set(CUDNN_DEFINITIONS ${PC_CUDNN_CFLAGS_OTHER})
-endif()  
+find_package_handle_standard_args(CUDNN
+      REQUIRED_VARS  CUDNN_INCLUDE_DIR CUDNN_LIBRARY
+      VERSION_VAR    CUDNN_VERSION
+)
+
+if(WIN32)
+  set(CUDNN_DLL_DIR ${CUDNN_INCLUDE_DIR})
+  list(TRANSFORM CUDNN_DLL_DIR APPEND "/../bin")
+  find_file(CUDNN_LIBRARY_DLL NAMES cudnn64_${CUDNN_VERSION_MAJOR}.dll PATHS ${CUDNN_DLL_DIR})
+endif()
+
+if( CUDNN_FOUND AND NOT TARGET CuDNN::CuDNN )
+  if( EXISTS "${CUDNN_LIBRARY_DLL}" )
+    add_library( CuDNN::CuDNN      SHARED IMPORTED )
+    set_target_properties( CuDNN::CuDNN PROPERTIES
+      IMPORTED_LOCATION                 "${CUDNN_LIBRARY_DLL}"
+      IMPORTED_IMPLIB                   "${CUDNN_LIBRARY}"
+      INTERFACE_INCLUDE_DIRECTORIES     "${CUDNN_INCLUDE_DIR}"
+      IMPORTED_LINK_INTERFACE_LANGUAGES "C" )
+  else()
+    add_library( CuDNN::CuDNN      UNKNOWN IMPORTED )
+    set_target_properties( CuDNN::CuDNN PROPERTIES
+      IMPORTED_LOCATION                 "${CUDNN_LIBRARY}"
+      INTERFACE_INCLUDE_DIRECTORIES     "${CUDNN_INCLUDE_DIR}"
+      IMPORTED_LINK_INTERFACE_LANGUAGES "C" )
+  endif()
+endif()
diff --git a/src/Detector/darknet/cmake/FindCUDNN_.cmake b/src/Detector/darknet/cmake/FindCUDNN_.cmake
new file mode 100644
index 000000000..37388d30b
--- /dev/null
+++ b/src/Detector/darknet/cmake/FindCUDNN_.cmake
@@ -0,0 +1,180 @@
+# Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
+# file Copyright.txt or https://cmake.org/licensing for details.
+#.rst:
+# FindCUDNN
+# -------
+#
+# Find CUDNN library
+#
+# Valiables that affect result:
+# <VERSION>, <REQUIRED>, <QUIETLY>: as usual
+#
+# <EXACT> : as usual, plus we do find '5.1' version if you wanted '5' 
+#           (not if you wanted '5.0', as usual)   
+#
+# Result variables
+# ^^^^^^^^^^^^^^^^
+#
+# This module will set the following variables in your project:
+#
+# ``CUDNN_INCLUDE``
+#   where to find cudnn.h.
+# ``CUDNN_LIBRARY``
+#   the libraries to link against to use CUDNN.
+# ``CUDNN_FOUND``
+#   If false, do not try to use CUDNN.
+# ``CUDNN_VERSION``
+#   Version of the CUDNN library we looked for 
+#
+# Exported functions
+# ^^^^^^^^^^^^^^^^
+# function(CUDNN_INSTALL version __dest_libdir [__dest_incdir])
+#  This function will try to download and install CUDNN.
+#  CUDNN5 and CUDNN6 are supported.
+#
+#
+
+function(CUDNN_INSTALL version dest_libdir dest_incdir dest_bindir)
+  message(STATUS "CUDNN_INSTALL: Installing CUDNN ${version}, lib:${dest_libdir}, inc:${dest_incdir}, bin:${dest_bindir}")
+  string(REGEX REPLACE "-rc$" "" version_base "${version}")
+  set(tar_libdir cuda/lib64)
+  set(tar_incdir cuda/include)
+
+  if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
+    set(url_extension tgz)
+    if("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64")
+      set(url_arch_name linux-x64 )
+    elseif("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "ppc")
+      set(url_arch_name linux-ppc64le ) 
+      #  TX1 has to be installed via JetPack
+    endif()
+  elseif  (APPLE)
+    set(url_extension tgz)
+    set(tar_libdir cuda/lib)
+    set(url_arch_name osx-x64)
+  elseif(WIN32)
+    set(url_extension zip)
+    set(tar_bindir cuda/bin)
+    set(tar_libdir cuda/lib/x64)
+    if(CMAKE_SYSTEM_VERSION MATCHES "10")
+      set(url_arch_name windows10-x64)
+    else()
+      set(url_arch_name windows7-x64)
+    endif()
+  endif()
+  
+  # Download and install CUDNN locally if not found on the system
+  if(url_arch_name) 
+    set(download_dir ${CMAKE_CURRENT_BINARY_DIR}/downloads/cudnn${version})
+    file(MAKE_DIRECTORY ${download_dir})
+    set(cudnn_filename cudnn-${CUDA_VERSION}-${url_arch_name}-v${version}.${url_extension})
+    set(base_url http://developer.download.nvidia.com/compute/redist/cudnn)
+    set(cudnn_url ${base_url}/v${version_base}/${cudnn_filename})
+    set(cudnn_file ${download_dir}/${cudnn_filename})
+    
+    if(NOT EXISTS ${cudnn_file})
+      message(STATUS "Downloading CUDNN library from NVIDIA...")
+      file(DOWNLOAD ${cudnn_url} ${cudnn_file}
+	SHOW_PROGRESS STATUS cudnn_status
+	)
+    execute_process(COMMAND ${CMAKE_COMMAND} -E tar xzvf ${cudnn_file} WORKING_DIRECTORY ${download_dir} RESULT_VARIABLE cudnn_status)
+
+      if(NOT "${cudnn_status}" MATCHES "0")
+	message(STATUS "Was not able to download CUDNN from ${cudnn_url}. Please install CuDNN manually from https://developer.nvidia.com/cuDNN")
+      endif()
+    endif()
+    
+    if(dest_bindir AND tar_bindir)
+      file(COPY ${download_dir}/${tar_bindir}/ DESTINATION ${dest_bindir})
+    endif()
+
+    if(dest_incdir)
+      file(COPY ${download_dir}/${tar_incdir}/ DESTINATION  ${dest_incdir})
+    endif()
+
+    file(COPY ${download_dir}/${tar_libdir}/ DESTINATION  ${dest_libdir} )
+
+    get_filename_component(dest_dir ${dest_libdir} DIRECTORY)
+
+    set(CUDNN_ROOT_DIR ${dest_dir} PARENT_SCOPE)
+    unset(CUDNN_LIBRARY CACHE)
+    unset(CUDNN_INCLUDE_DIR CACHE)
+
+  endif(url_arch_name)
+endfunction()
+
+#####################################################
+
+find_package(PkgConfig)
+pkg_check_modules(PC_CUDNN QUIET CUDNN)
+
+get_filename_component(__libpath_cudart "${CUDA_CUDART_LIBRARY}" PATH)
+
+# We use major only in library search as major/minor is not entirely consistent among platforms.
+# Also, looking for exact minor version of .so is in general not a good idea.
+# More strict enforcement of minor/patch version is done if/when the header file is examined.
+if(CUDNN_FIND_VERSION_EXACT)
+  SET(__cudnn_ver_suffix ".${CUDNN_FIND_VERSION_MAJOR}")
+  SET(__cudnn_lib_win_name cudnn64_${CUDNN_FIND_VERSION_MAJOR})
+else()
+  SET(__cudnn_lib_win_name cudnn64)
+endif()
+
+find_library(CUDNN_LIBRARY 
+  NAMES libcudnn.so${__cudnn_ver_suffix} libcudnn${__cudnn_ver_suffix}.dylib ${__cudnn_lib_win_name}
+  PATHS $ENV{LD_LIBRARY_PATH} ${__libpath_cudart} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS} ${CMAKE_INSTALL_PREFIX}
+  PATH_SUFFIXES lib lib64 bin
+  DOC "CUDNN library." )
+
+if(CUDNN_LIBRARY)
+  SET(CUDNN_MAJOR_VERSION ${CUDNN_FIND_VERSION_MAJOR})
+  set(CUDNN_VERSION ${CUDNN_MAJOR_VERSION})
+  get_filename_component(__found_cudnn_root ${CUDNN_LIBRARY} PATH)
+  find_path(CUDNN_INCLUDE_DIR 
+    NAMES cudnn.h
+    HINTS ${PC_CUDNN_INCLUDE_DIRS} ${CUDNN_ROOT_DIR} ${CUDA_TOOLKIT_INCLUDE} ${__found_cudnn_root}
+    PATH_SUFFIXES include 
+    DOC "Path to CUDNN include directory." )
+endif()
+
+if(CUDNN_LIBRARY AND CUDNN_INCLUDE_DIR)
+  file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_VERSION_FILE_CONTENTS)
+  string(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)"
+    CUDNN_MAJOR_VERSION "${CUDNN_VERSION_FILE_CONTENTS}")
+  string(REGEX REPLACE "define CUDNN_MAJOR * +([0-9]+)" "\\1"
+    CUDNN_MAJOR_VERSION "${CUDNN_MAJOR_VERSION}")
+  string(REGEX MATCH "define CUDNN_MINOR * +([0-9]+)"
+    CUDNN_MINOR_VERSION "${CUDNN_VERSION_FILE_CONTENTS}")
+  string(REGEX REPLACE "define CUDNN_MINOR * +([0-9]+)" "\\1"
+    CUDNN_MINOR_VERSION "${CUDNN_MINOR_VERSION}")
+  string(REGEX MATCH "define CUDNN_PATCHLEVEL * +([0-9]+)"
+    CUDNN_PATCH_VERSION "${CUDNN_VERSION_FILE_CONTENTS}")
+  string(REGEX REPLACE "define CUDNN_PATCHLEVEL * +([0-9]+)" "\\1"
+    CUDNN_PATCH_VERSION "${CUDNN_PATCH_VERSION}")  
+  set(CUDNN_VERSION ${CUDNN_MAJOR_VERSION}.${CUDNN_MINOR_VERSION})
+endif()
+
+if(CUDNN_MAJOR_VERSION)
+  ## Fixing the case where 5.1 does not fit 'exact' 5.
+  if(CUDNN_FIND_VERSION_EXACT AND NOT CUDNN_FIND_VERSION_MINOR)
+    if("${CUDNN_MAJOR_VERSION}" STREQUAL "${CUDNN_FIND_VERSION_MAJOR}")
+      set(CUDNN_VERSION ${CUDNN_FIND_VERSION})
+    endif()
+  endif()
+else()
+  # Try to set CUDNN version from config file
+  set(CUDNN_VERSION ${PC_CUDNN_CFLAGS_OTHER})
+endif()
+
+find_package_handle_standard_args(
+  CUDNN 
+  FOUND_VAR CUDNN_FOUND
+  REQUIRED_VARS CUDNN_LIBRARY 
+  VERSION_VAR   CUDNN_VERSION
+  )
+
+if(CUDNN_FOUND)
+  set(CUDNN_LIBRARIES ${CUDNN_LIBRARY})
+  set(CUDNN_INCLUDE_DIRS ${CUDNN_INCLUDE_DIR})
+  set(CUDNN_DEFINITIONS ${PC_CUDNN_CFLAGS_OTHER})
+endif()  
diff --git a/src/Detector/darknet/include/darknet.h b/src/Detector/darknet/include/darknet.h
index d72027cc4..55ab50d5d 100644
--- a/src/Detector/darknet/include/darknet.h
+++ b/src/Detector/darknet/include/darknet.h
@@ -1032,6 +1032,11 @@ LIB_API void diounms_sort(detection *dets, int total, int classes, float thresh,
 // network.h
 LIB_API float *network_predict(network net, float *input);
 LIB_API float *network_predict_ptr(network *net, float *input);
+#ifdef CUDA_OPENGL_INTEGRATION
+LIB_API float *network_predict_gl_texture(network *net, uint32_t texture_id);
+#endif // CUDA_OPENGL_INTEGRATION
+
+LIB_API void set_batch_network(network *net, int b);
 LIB_API detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num, int letter);
 LIB_API det_num_pair* network_predict_batch(network *net, image im, int batch_size, int w, int h, float thresh, float hier, int *map, int relative, int letter);
 LIB_API void free_detections(detection *dets, int n);
@@ -1047,7 +1052,7 @@ LIB_API void reset_rnn(network *net);
 LIB_API float *network_predict_image(network *net, image im);
 LIB_API float *network_predict_image_letterbox(network *net, image im);
 LIB_API float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, float thresh_calc_avg_iou, const float iou_thresh, const int map_points, int letter_box, network *existing_net);
-LIB_API void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int dont_show, int calc_map, float thresh, float iou_thresh, int mjpeg_port, int show_imgs, int benchmark_layers, char* chart_path);
+LIB_API void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int dont_show, int calc_map, float thresh, float iou_thresh, int mjpeg_port, int show_imgs, int benchmark_layers, char* chart_path, int mAP_epochs);
 LIB_API void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh,
     float hier_thresh, int dont_show, int ext_output, int save_labels, char *outfile, int letter_box, int benchmark_layers);
 LIB_API int network_width(network *net);
diff --git a/src/Detector/darknet/src/activation_kernels.cu b/src/Detector/darknet/src/activation_kernels.cu
index 25b5cfdc8..d2dc77199 100644
--- a/src/Detector/darknet/src/activation_kernels.cu
+++ b/src/Detector/darknet/src/activation_kernels.cu
@@ -185,7 +185,7 @@ __global__ void binary_gradient_array_kernel(float *x, float *dy, int n, int s,
 
 extern "C" void binary_gradient_array_gpu(float *x, float *dx, int n, int size, BINARY_ACTIVATION a, float *y)
 {
-    binary_gradient_array_kernel << <cuda_gridsize(n / 2), BLOCK, 0, get_cuda_stream() >> >(x, dx, n / 2, size, a, y);
+    binary_gradient_array_kernel <<<cuda_gridsize(n / 2), BLOCK, 0, get_cuda_stream() >>>(x, dx, n / 2, size, a, y);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 __global__ void binary_activate_array_kernel(float *x, int n, int s, BINARY_ACTIVATION a, float *y)
@@ -200,7 +200,7 @@ __global__ void binary_activate_array_kernel(float *x, int n, int s, BINARY_ACTI
 
 extern "C" void binary_activate_array_gpu(float *x, int n, int size, BINARY_ACTIVATION a, float *y)
 {
-    binary_activate_array_kernel << <cuda_gridsize(n / 2), BLOCK, 0, get_cuda_stream() >> >(x, n / 2, size, a, y);
+    binary_activate_array_kernel <<<cuda_gridsize(n / 2), BLOCK, 0, get_cuda_stream() >>>(x, n / 2, size, a, y);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
@@ -260,7 +260,7 @@ __global__ void activate_array_mish_kernel(float *x, int n, float *activation_in
 {
     int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
     if (i < n) {
-        const float MISH_THRESHOLD = 20;
+        //const float MISH_THRESHOLD = 20;
         float x_val = x[i];
         if (activation_input) activation_input[i] = x_val;    // store value before activation
         //output_gpu[i] = x_val * tanh_activate_kernel(logf(1 + expf(x_val)));
@@ -495,14 +495,14 @@ extern "C" void activate_array_ongpu(float *x, int n, ACTIVATION a)
 {
     const int num_blocks = get_number_of_blocks(n, BLOCK);
     if (a == LINEAR) return;
-    else if (a == LEAKY || a == REVLEAKY) activate_array_leaky_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(x, n);
-    else if (a == LOGISTIC) activate_array_logistic_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(x, n);
-    else if (a == TANH) activate_array_tanh_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(x, n);
-    else if (a == HARDTAN) activate_array_hardtan_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(x, n);
-    else if (a == RELU) activate_array_relu_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(x, n);
-    else if (a == RELU6) activate_array_relu6_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(x, n);
-    else if (a == SELU) activate_array_selu_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(x, n);
-    else if (a == GELU) activate_array_gelu_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(x, n);
+    else if (a == LEAKY || a == REVLEAKY) activate_array_leaky_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>>(x, n);
+    else if (a == LOGISTIC) activate_array_logistic_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>>(x, n);
+    else if (a == TANH) activate_array_tanh_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>>(x, n);
+    else if (a == HARDTAN) activate_array_hardtan_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>>(x, n);
+    else if (a == RELU) activate_array_relu_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>>(x, n);
+    else if (a == RELU6) activate_array_relu6_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>>(x, n);
+    else if (a == SELU) activate_array_selu_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>>(x, n);
+    else if (a == GELU) activate_array_gelu_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>>(x, n);
     else
         activate_array_kernel<<<cuda_gridsize(n), BLOCK, 0, get_cuda_stream()>>>(x, n, a);
     CHECK_CUDA(cudaPeekAtLastError());
@@ -511,21 +511,21 @@ extern "C" void activate_array_ongpu(float *x, int n, ACTIVATION a)
 extern "C" void activate_array_swish_ongpu(float *x, int n, float *output_sigmoid_gpu, float *output_gpu)
 {
     const int num_blocks = get_number_of_blocks(n, BLOCK);
-    activate_array_swish_kernel << <cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >> >(x, n, output_sigmoid_gpu, output_gpu);
+    activate_array_swish_kernel <<<cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >>>(x, n, output_sigmoid_gpu, output_gpu);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
 extern "C" void activate_array_mish_ongpu(float *x, int n, float *activation_input_gpu, float *output_gpu)
 {
     const int num_blocks = get_number_of_blocks(n, BLOCK);
-    activate_array_mish_kernel << <cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >> >(x, n, activation_input_gpu, output_gpu);
+    activate_array_mish_kernel <<<cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >>>(x, n, activation_input_gpu, output_gpu);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
 extern "C" void activate_array_hard_mish_ongpu(float *x, int n, float *activation_input_gpu, float *output_gpu)
 {
     const int num_blocks = get_number_of_blocks(n, BLOCK);
-    activate_array_hard_mish_kernel << <cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >> >(x, n, activation_input_gpu, output_gpu);
+    activate_array_hard_mish_kernel <<<cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >>>(x, n, activation_input_gpu, output_gpu);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
@@ -533,22 +533,21 @@ extern "C" void gradient_array_ongpu(float *x, int n, ACTIVATION a, float *delta
 {
     const int num_blocks = get_number_of_blocks(n, BLOCK);
     if (a == LINEAR) return;
-    else if (a == LEAKY) gradient_array_leaky_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(x, n, delta);
-    else if (a == REVLEAKY) gradient_array_revleaky_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(x, n, delta);
-    else if (a == LOGISTIC) gradient_array_logistic_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(x, n, delta);
-    else if (a == TANH) gradient_array_tanh_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(x, n, delta);
-    else if (a == HARDTAN) gradient_array_hardtan_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(x, n, delta);
-    else if (a == RELU) gradient_array_relu_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(x, n, delta);
-    else if (a == RELU6) gradient_array_relu6_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(x, n, delta);
-    //else if (a == NORM_CHAN) gradient_array_relu_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(x, n, delta);
+    else if (a == LEAKY) gradient_array_leaky_kernel <<< num_blocks, BLOCK, 0, get_cuda_stream() >>> (x, n, delta);
+    else if (a == REVLEAKY) gradient_array_revleaky_kernel <<< num_blocks, BLOCK, 0, get_cuda_stream() >>> (x, n, delta);
+    else if (a == LOGISTIC) gradient_array_logistic_kernel <<< num_blocks, BLOCK, 0, get_cuda_stream() >>> (x, n, delta);
+    else if (a == TANH) gradient_array_tanh_kernel <<< num_blocks, BLOCK, 0, get_cuda_stream() >>> (x, n, delta);
+    else if (a == HARDTAN) gradient_array_hardtan_kernel <<< num_blocks, BLOCK, 0, get_cuda_stream() >>> (x, n, delta);
+    else if (a == RELU) gradient_array_relu_kernel <<< num_blocks, BLOCK, 0, get_cuda_stream() >>> (x, n, delta);
+    else if (a == RELU6) gradient_array_relu6_kernel <<< num_blocks, BLOCK, 0, get_cuda_stream() >>> (x, n, delta);
+    //else if (a == NORM_CHAN) gradient_array_relu_kernel <<< num_blocks, BLOCK, 0, get_cuda_stream() >>>(x, n, delta);
     else if (a == NORM_CHAN_SOFTMAX || a == NORM_CHAN) {
-        printf(" Error: should be used custom NORM_CHAN_SOFTMAX-function for gradient \n");
-        exit(0);
+        error("Error: should be used custom NORM_CHAN_SOFTMAX-function for gradient", DARKNET_LOC);
     }
-    else if (a == SELU) gradient_array_selu_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(x, n, delta);
-    else if (a == GELU) gradient_array_gelu_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(x, n, delta);
+    else if (a == SELU) gradient_array_selu_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>>(x, n, delta);
+    else if (a == GELU) gradient_array_gelu_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>>(x, n, delta);
     else
-        gradient_array_kernel << <cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >> > (x, n, a, delta);
+        gradient_array_kernel <<<cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >>> (x, n, a, delta);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
@@ -556,21 +555,21 @@ extern "C" void gradient_array_ongpu(float *x, int n, ACTIVATION a, float *delta
 extern "C" void gradient_array_swish_ongpu(float *x, int n, float *sigmoid_gpu, float *delta)
 {
     const int num_blocks = get_number_of_blocks(n, BLOCK);
-    gradient_array_swish_kernel << <cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >> > (x, n, sigmoid_gpu, delta);
+    gradient_array_swish_kernel <<<cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >>> (x, n, sigmoid_gpu, delta);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
 extern "C" void gradient_array_mish_ongpu(int n, float *activation_input_gpu, float *delta)
 {
     const int num_blocks = get_number_of_blocks(n, BLOCK);
-    gradient_array_mish_kernel << <cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >> > (n, activation_input_gpu, delta);
+    gradient_array_mish_kernel <<<cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >>> (n, activation_input_gpu, delta);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
 extern "C" void gradient_array_hard_mish_ongpu(int n, float *activation_input_gpu, float *delta)
 {
     const int num_blocks = get_number_of_blocks(n, BLOCK);
-    gradient_array_hard_mish_kernel << <cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >> > (n, activation_input_gpu, delta);
+    gradient_array_hard_mish_kernel <<<cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >>> (n, activation_input_gpu, delta);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
@@ -607,7 +606,7 @@ extern "C" void activate_array_normalize_channels_ongpu(float *x, int n, int bat
 
     const int num_blocks = get_number_of_blocks(size, BLOCK);
 
-    activate_array_normalize_channels_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> > (x, size, batch, channels, wh_step, output_gpu);
+    activate_array_normalize_channels_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>> (x, size, batch, channels, wh_step, output_gpu);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
@@ -655,7 +654,7 @@ extern "C" void activate_array_normalize_channels_softmax_ongpu(float *x, int n,
 
     const int num_blocks = get_number_of_blocks(size, BLOCK);
 
-    activate_array_normalize_channels_softmax_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> > (x, size, batch, channels, wh_step, output_gpu, use_max_val);
+    activate_array_normalize_channels_softmax_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>> (x, size, batch, channels, wh_step, output_gpu, use_max_val);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
@@ -698,7 +697,7 @@ extern "C" void gradient_array_normalize_channels_softmax_ongpu(float *output_gp
 
     const int num_blocks = get_number_of_blocks(size, BLOCK);
 
-    gradient_array_normalize_channels_softmax_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> > (output_gpu, size, batch, channels, wh_step, delta_gpu);
+    gradient_array_normalize_channels_softmax_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>> (output_gpu, size, batch, channels, wh_step, delta_gpu);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
@@ -741,6 +740,6 @@ extern "C" void gradient_array_normalize_channels_ongpu(float *output_gpu, int n
 
     const int num_blocks = get_number_of_blocks(size, BLOCK);
 
-    gradient_array_normalize_channels_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> > (output_gpu, size, batch, channels, wh_step, delta_gpu);
+    gradient_array_normalize_channels_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>> (output_gpu, size, batch, channels, wh_step, delta_gpu);
     CHECK_CUDA(cudaPeekAtLastError());
 }
diff --git a/src/Detector/darknet/src/activations.c b/src/Detector/darknet/src/activations.c
index 87ba1d9e3..9e8a49f72 100644
--- a/src/Detector/darknet/src/activations.c
+++ b/src/Detector/darknet/src/activations.c
@@ -323,9 +323,7 @@ float gradient(float x, ACTIVATION a)
         case NORM_CHAN_SOFTMAX_MAXVAL:
             //...
         case NORM_CHAN_SOFTMAX:
-            printf(" Error: should be used custom NORM_CHAN or NORM_CHAN_SOFTMAX-function for gradient \n");
-            exit(0);
-            return 0;
+            error("Error: should be used custom NORM_CHAN or NORM_CHAN_SOFTMAX-function for gradient", DARKNET_LOC);
         case ELU:
             return elu_gradient(x);
         case SELU:
diff --git a/src/Detector/darknet/src/art.c b/src/Detector/darknet/src/art.c
index 5fbcfcead..748e19038 100644
--- a/src/Detector/darknet/src/art.c
+++ b/src/Detector/darknet/src/art.c
@@ -40,8 +40,7 @@ void demo_art(char *cfgfile, char *weightfile, int cam_index)
 
         float *p = network_predict(net, in_s.data);
 
-        printf("\033[2J");
-        printf("\033[1;1H");
+        printf("\033[H\033[J");
 
         float score = 0;
         for(i = 0; i < n; ++i){
@@ -51,7 +50,7 @@ void demo_art(char *cfgfile, char *weightfile, int cam_index)
         score = score;
         printf("I APPRECIATE THIS ARTWORK: %10.7f%%\n", score*100);
         printf("[");
-	int upper = 30;
+    int upper = 30;
         for(i = 0; i < upper; ++i){
             printf("%c", ((i+.5) < score*upper) ? 219 : ' ');
         }
diff --git a/src/Detector/darknet/src/blas.c b/src/Detector/darknet/src/blas.c
index 884d24af9..122bca0ce 100644
--- a/src/Detector/darknet/src/blas.c
+++ b/src/Detector/darknet/src/blas.c
@@ -342,32 +342,32 @@ void fill_cpu(int N, float ALPHA, float *X, int INCX)
     }
 }
 
-void deinter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT)
+void deinter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUTPUT)
 {
     int i, j;
     int index = 0;
     for(j = 0; j < B; ++j) {
         for(i = 0; i < NX; ++i){
-            if(X) X[j*NX + i] += OUT[index];
+            if(X) X[j*NX + i] += OUTPUT[index];
             ++index;
         }
         for(i = 0; i < NY; ++i){
-            if(Y) Y[j*NY + i] += OUT[index];
+            if(Y) Y[j*NY + i] += OUTPUT[index];
             ++index;
         }
     }
 }
 
-void inter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT)
+void inter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUTPUT)
 {
     int i, j;
     int index = 0;
     for(j = 0; j < B; ++j) {
         for(i = 0; i < NX; ++i){
-            OUT[index++] = X[j*NX + i];
+            OUTPUT[index++] = X[j*NX + i];
         }
         for(i = 0; i < NY; ++i){
-            OUT[index++] = Y[j*NY + i];
+            OUTPUT[index++] = Y[j*NY + i];
         }
     }
 }
@@ -511,7 +511,7 @@ void fix_nan_and_inf_cpu(float *input, size_t size)
     int i;
     for (i = 0; i < size; ++i) {
         float val = input[i];
-        if (i > 0 && (isnan(val) || isinf(val)))
+        if (isnan(val) || isinf(val))
             input[i] = 1.0f / i;  // pseudo random value
     }
 }
@@ -594,8 +594,8 @@ float find_sim(size_t i, size_t j, contrastive_params *contrast_p, int contrast_
         if (contrast_p[z].i == i && contrast_p[z].j == j) break;
     }
     if (z == contrast_p_size) {
-        printf(" Error: find_sim(): sim isn't found: i = %d, j = %d, z = %d \n", i, j, z);
-        getchar();
+        printf(" Error: find_sim(): sim isn't found: i = %zu, j = %zu, z = %zu \n", i, j, z);
+        error("Error!", DARKNET_LOC);
     }
 
     return contrast_p[z].sim;
@@ -608,8 +608,8 @@ float find_P_constrastive(size_t i, size_t j, contrastive_params *contrast_p, in
         if (contrast_p[z].i == i && contrast_p[z].j == j) break;
     }
     if (z == contrast_p_size) {
-        printf(" Error: find_P_constrastive(): P isn't found: i = %d, j = %d, z = %d \n", i, j, z);
-        getchar();
+        printf(" Error: find_P_constrastive(): P isn't found: i = %zu, j = %zu, z = %zu \n", i, j, z);
+        error("Error!", DARKNET_LOC);
     }
 
     return contrast_p[z].P;
@@ -648,8 +648,8 @@ float P_constrastive_f_det(size_t il, int *labels, float **z, unsigned int featu
 float P_constrastive_f(size_t i, size_t l, int *labels, float **z, unsigned int feature_size, float temperature, contrastive_params *contrast_p, int contrast_p_size)
 {
     if (i == l) {
-        fprintf(stderr, " Error: in P_constrastive must be i != l, while i = %d, l = %d \n", i, l);
-        getchar();
+        fprintf(stderr, " Error: in P_constrastive must be i != l, while i = %zu, l = %zu \n", i, l);
+        error("Error!", DARKNET_LOC);
     }
 
     const float sim = find_sim(i, l, contrast_p, contrast_p_size); // cosine_similarity(z[i], z[l], feature_size);
@@ -686,8 +686,7 @@ void grad_contrastive_loss_positive_f(size_t i, int *class_ids, int *labels, siz
     if (N == 0 || temperature == 0 || vec_len == 0) {
         fprintf(stderr, " Error: N == 0 || temperature == 0 || vec_len == 0. N=%f, temperature=%f, vec_len=%f, labels[i] = %d \n",
             N, temperature, vec_len, labels[i]);
-        getchar();
-        return;
+        error("Error!", DARKNET_LOC);
     }
     const float mult = 1 / ((N - 1) * temperature * vec_len);
 
@@ -732,8 +731,7 @@ void grad_contrastive_loss_negative_f(size_t i, int *class_ids, int *labels, siz
     if (N == 0 || temperature == 0 || vec_len == 0) {
         fprintf(stderr, " Error: N == 0 || temperature == 0 || vec_len == 0. N=%f, temperature=%f, vec_len=%f, labels[i] = %d \n",
             N, temperature, vec_len, labels[i]);
-        getchar();
-        return;
+        error("Error!", DARKNET_LOC);
     }
     const float mult = 1 / ((N - 1) * temperature * vec_len);
 
@@ -782,8 +780,8 @@ void grad_contrastive_loss_negative_f(size_t i, int *class_ids, int *labels, siz
 float P_constrastive(size_t i, size_t l, int *labels, size_t num_of_samples, float **z, unsigned int feature_size, float temperature, float *cos_sim, float *exp_cos_sim)
 {
     if (i == l) {
-        fprintf(stderr, " Error: in P_constrastive must be i != l, while i = %d, l = %d \n", i, l);
-        getchar();
+        fprintf(stderr, " Error: in P_constrastive must be i != l, while i = %zu, l = %zu \n", i, l);
+        error("Error!", DARKNET_LOC);
     }
 
     //const float sim = cos_sim[i*num_of_samples + l]; // cosine_similarity(z[i], z[l], feature_size);
@@ -820,7 +818,7 @@ void grad_contrastive_loss_positive(size_t i, int *labels, size_t num_of_samples
     }
     if (N == 0 || temperature == 0 || vec_len == 0) {
         fprintf(stderr, " Error: N == 0 || temperature == 0 || vec_len == 0. N=%f, temperature=%f, vec_len=%f \n", N, temperature, vec_len);
-        getchar();
+        error("Error!", DARKNET_LOC);
     }
     const float mult = 1 / ((N - 1) * temperature * vec_len);
 
@@ -860,7 +858,7 @@ void grad_contrastive_loss_negative(size_t i, int *labels, size_t num_of_samples
     }
     if (N == 0 || temperature == 0 || vec_len == 0) {
         fprintf(stderr, " Error: N == 0 || temperature == 0 || vec_len == 0. N=%f, temperature=%f, vec_len=%f \n", N, temperature, vec_len);
-        getchar();
+        error("Error!", DARKNET_LOC);
     }
     const float mult = 1 / ((N - 1) * temperature * vec_len);
 
diff --git a/src/Detector/darknet/src/blas_kernels.cu b/src/Detector/darknet/src/blas_kernels.cu
index 85c55adfb..3bc0d90b5 100644
--- a/src/Detector/darknet/src/blas_kernels.cu
+++ b/src/Detector/darknet/src/blas_kernels.cu
@@ -34,7 +34,7 @@ void compare_2_arrays_gpu(float *one, float *two, int size)
 {
     const int num_blocks = get_number_of_blocks(size, BLOCK);
 
-    compare_2_arrays_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(one, two, size);
+    compare_2_arrays_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>>(one, two, size);
     CHECK_CUDA(cudaPeekAtLastError());
     CHECK_CUDA(cudaDeviceSynchronize());
 }
@@ -53,7 +53,7 @@ void mean_array_gpu(float *src, int size, float alpha, float *avg)
 {
     const int num_blocks = get_number_of_blocks(size, BLOCK);
 
-    mean_array_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(src, size, alpha, avg);
+    mean_array_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>>(src, size, alpha, avg);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
@@ -72,7 +72,7 @@ void scale_bias_gpu(float *output, float *scale, int batch, int filters, int spa
     const int current_size = batch * filters * spatial;
     const int num_blocks = get_number_of_blocks(current_size, BLOCK);
 
-    scale_bias_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(output, scale, batch, filters, spatial, current_size);
+    scale_bias_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>>(output, scale, batch, filters, spatial, current_size);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
@@ -117,7 +117,7 @@ void add_bias_gpu(float *output, float *biases, int batch, int filters, int spat
     const int current_size = batch * filters * spatial;
     const int num_blocks = get_number_of_blocks(current_size, BLOCK);
 
-    add_bias_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(output, biases, batch, filters, spatial, current_size);
+    add_bias_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>>(output, biases, batch, filters, spatial, current_size);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
@@ -202,7 +202,7 @@ __global__ void adam_kernel(int N, float *x, float *m, float *v, float B1, float
 
 extern "C" void adam_gpu(int n, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t)
 {
-    adam_kernel << <cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >> >(n, x, m, v, B1, B2, rate, eps, t);
+    adam_kernel <<<cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >>>(n, x, m, v, B1, B2, rate, eps, t);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
@@ -235,7 +235,7 @@ extern "C" void normalize_gpu(float *x, float *mean, float *variance, int batch,
     const int current_size = batch * filters * spatial;
     const int num_blocks = get_number_of_blocks(current_size, BLOCK);
 
-    normalize_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(current_size, x, mean, variance, batch, filters, spatial);
+    normalize_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>>(current_size, x, mean, variance, batch, filters, spatial);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
@@ -450,7 +450,7 @@ __global__ void constrain_weight_updates_kernel(int N, float coef, float *weight
 
 extern "C" void constrain_weight_updates_ongpu(int N, float coef, float *weights_gpu, float *weight_updates_gpu)
 {
-    constrain_weight_updates_kernel << <cuda_gridsize(N), BLOCK, 0, get_cuda_stream() >> >(N, coef, weights_gpu, weight_updates_gpu);
+    constrain_weight_updates_kernel <<<cuda_gridsize(N), BLOCK, 0, get_cuda_stream() >>>(N, coef, weights_gpu, weight_updates_gpu);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
@@ -512,8 +512,8 @@ __global__ void fill_kernel(int N, float ALPHA, float *X, int INCX)
 
 __global__ void mask_kernel_new_api(int n, float *x, float mask_num, float *mask, float val)
 {
-	int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
-	if (i < n && mask[i] == mask_num) x[i] = val;
+    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
+    if (i < n && mask[i] == mask_num) x[i] = val;
 }
 
 __global__ void mask_kernel(int n, float *x, float mask_num, float *mask)
@@ -573,7 +573,7 @@ __global__ void  fast_mean_kernel(float *x, int batch, int filters, int spatial,
 
 extern "C" void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean)
 {
-    fast_mean_kernel << <filters, BLOCK, 0, get_cuda_stream() >> >(x, batch, filters, spatial, mean);
+    fast_mean_kernel <<<filters, BLOCK, 0, get_cuda_stream() >>>(x, batch, filters, spatial, mean);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
@@ -669,7 +669,7 @@ __global__ void  fast_v_cbn_kernel(const float *x, float *mean, int batch, int f
 extern "C" void fast_v_cbn_gpu(const float *x, float *mean, int batch, int filters, int spatial, int minibatch_index, int max_minibatch_index, float *m_avg, float *v_avg, float *variance,
     const float alpha, float *rolling_mean_gpu, float *rolling_variance_gpu, int inverse_variance, float epsilon)
 {
-    fast_v_cbn_kernel << <filters, BLOCK, 0, get_cuda_stream() >> >(x, mean, batch, filters, spatial, minibatch_index, max_minibatch_index, m_avg, v_avg, variance, alpha, rolling_mean_gpu, rolling_variance_gpu, inverse_variance, epsilon);
+    fast_v_cbn_kernel <<<filters, BLOCK, 0, get_cuda_stream() >>>(x, mean, batch, filters, spatial, minibatch_index, max_minibatch_index, m_avg, v_avg, variance, alpha, rolling_mean_gpu, rolling_variance_gpu, inverse_variance, epsilon);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
@@ -683,7 +683,7 @@ __global__ void inverse_variance_kernel(int size, float *src, float *dst, float
 extern "C" void inverse_variance_ongpu(int size, float *src, float *dst, float epsilon)
 {
     const int num_blocks = size / BLOCK + 1;
-    inverse_variance_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(size, src, dst, epsilon);
+    inverse_variance_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>>(size, src, dst, epsilon);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
@@ -708,7 +708,7 @@ extern "C" void normalize_scale_bias_gpu(float *x, float *mean, float *variance,
     const int current_size = batch * filters * spatial;
     const int num_blocks = get_number_of_blocks(current_size, BLOCK);
 
-    normalize_scale_bias_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(current_size, x, mean, variance, scales, biases, batch, filters, spatial, inverse_variance, epsilon);
+    normalize_scale_bias_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>>(current_size, x, mean, variance, scales, biases, batch, filters, spatial, inverse_variance, epsilon);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
@@ -749,7 +749,7 @@ extern "C" void copy_ongpu(int N, float * X, int INCX, float * Y, int INCY)
 extern "C" void simple_copy_ongpu(int size, float *src, float *dst)
 {
     const int num_blocks = size / BLOCK + 1;
-    simple_copy_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(size, src, dst);
+    simple_copy_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>>(size, src, dst);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
@@ -804,7 +804,7 @@ extern "C" void reorg_ongpu(float *x, int w, int h, int c, int batch, int stride
 
 extern "C" void mask_gpu_new_api(int N, float * X, float mask_num, float * mask, float val)
 {
-	mask_kernel_new_api <<<cuda_gridsize(N), BLOCK, 0, get_cuda_stream() >>>(N, X, mask_num, mask, val);
+    mask_kernel_new_api <<<cuda_gridsize(N), BLOCK, 0, get_cuda_stream() >>>(N, X, mask_num, mask, val);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
@@ -828,7 +828,7 @@ extern "C" void constrain_ongpu(int N, float ALPHA, float * X, int INCX)
 
 extern "C" void constrain_min_max_ongpu(int N, float MIN, float MAX, float * X, int INCX)
 {
-    constrain_min_max_kernel << <cuda_gridsize(N), BLOCK, 0, get_cuda_stream() >> >(N, MIN, MAX, X, INCX);
+    constrain_min_max_kernel <<<cuda_gridsize(N), BLOCK, 0, get_cuda_stream() >>>(N, MIN, MAX, X, INCX);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
@@ -841,7 +841,7 @@ extern "C" void scal_ongpu(int N, float ALPHA, float * X, int INCX)
 
 extern "C" void scal_add_ongpu(int N, float ALPHA, float BETA, float * X, int INCX)
 {
-    scal_add_kernel << <cuda_gridsize(N), BLOCK, 0, get_cuda_stream() >> >(N, ALPHA, BETA, X, INCX);
+    scal_add_kernel <<<cuda_gridsize(N), BLOCK, 0, get_cuda_stream() >>>(N, ALPHA, BETA, X, INCX);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
@@ -855,7 +855,7 @@ extern "C" void fill_ongpu(int N, float ALPHA, float * X, int INCX)
 {
     //fill_kernel<<<cuda_gridsize(N), BLOCK, 0, get_cuda_stream()>>>(N, ALPHA, X, INCX);
     //CHECK_CUDA(cudaPeekAtLastError());
-    fill_kernel << <get_number_of_blocks(N, BLOCK), BLOCK, 0, get_cuda_stream() >> >(N, ALPHA, X, INCX);
+    fill_kernel <<<get_number_of_blocks(N, BLOCK), BLOCK, 0, get_cuda_stream() >>>(N, ALPHA, X, INCX);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
@@ -884,7 +884,7 @@ extern "C" void gradient_centralization_gpu(int w, int h, int c, int f, float *i
     const int f_size = c * h * w;
     if (f_size % WARP_SIZE == 0) {
 
-        gradient_centralization_kernel << <get_number_of_blocks(size, BLOCK), BLOCK, 0, get_cuda_stream() >> > (f, f_size, in);
+        gradient_centralization_kernel <<<get_number_of_blocks(size, BLOCK), BLOCK, 0, get_cuda_stream() >>> (f, f_size, in);
         CHECK_CUDA(cudaPeekAtLastError());
     }
 }
@@ -1003,10 +1003,10 @@ extern "C" void shortcut_multilayer_gpu(int src_outputs, int batch, int n, int *
     //printf(" src_outputs = %d, batch = %d, n = %d \n", src_outputs, batch, n);
     int size = batch * src_outputs;
     if (nweights == 0 && n == 1) {
-        shortcut_singlelayer_simple_kernel << <cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >> > (size, src_outputs, batch, n, outputs_of_layers_gpu, layers_output_gpu, out, in, weights_gpu, nweights, weights_normalization);
+        shortcut_singlelayer_simple_kernel <<<cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >>> (size, src_outputs, batch, n, outputs_of_layers_gpu, layers_output_gpu, out, in, weights_gpu, nweights, weights_normalization);
     }
     else {
-        shortcut_multilayer_kernel << <cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >> > (size, src_outputs, batch, n, outputs_of_layers_gpu, layers_output_gpu, out, in, weights_gpu, nweights, weights_normalization);
+        shortcut_multilayer_kernel <<<cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >>> (size, src_outputs, batch, n, outputs_of_layers_gpu, layers_output_gpu, out, in, weights_gpu, nweights, weights_normalization);
     }
     CHECK_CUDA(cudaPeekAtLastError());
 }
@@ -1083,7 +1083,6 @@ __global__ void backward_shortcut_multilayer_kernel(int size, int src_outputs, i
         int add_outputs = outputs_of_layers_gpu[i];
         if (src_i < add_outputs) {
             int add_index = add_outputs*src_b + src_i;
-            int out_index = id;
 
             float *layer_delta = layers_delta_gpu[i];
             if (weights_gpu) {
@@ -1126,14 +1125,14 @@ __global__ void backward_shortcut_multilayer_kernel(int size, int src_outputs, i
 extern "C" void backward_shortcut_multilayer_gpu(int src_outputs, int batch, int n, int *outputs_of_layers_gpu,
     float **layers_delta_gpu, float *delta_out, float *delta_in, float *weights_gpu, float *weight_updates_gpu, int nweights, float *in, float **layers_output_gpu, WEIGHTS_NORMALIZATION_T weights_normalization)
 {
-    const int layer_step = nweights / (n + 1);    // 1 or l.c or (l.c * l.h * l.w)
-    int step = 0;
-    if (nweights > 0) step = src_outputs / layer_step; // (l.c * l.h * l.w) or (l.w*l.h) or 1
+    //const int layer_step = nweights / (n + 1);    // 1 or l.c or (l.c * l.h * l.w)
+    //int step = 0;
+    //if (nweights > 0) step = src_outputs / layer_step; // (l.c * l.h * l.w) or (l.w*l.h) or 1
     //printf(" nweights = %d, n = %d, layer_step = %d, step = %d \n", nweights, n, layer_step, step);
 
     //printf(" src_outputs = %d, batch = %d, n = %d \n", src_outputs, batch, n);
     int size = batch * src_outputs;
-    backward_shortcut_multilayer_kernel << <cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >> > (size, src_outputs, batch, n, outputs_of_layers_gpu,
+    backward_shortcut_multilayer_kernel <<<cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >>> (size, src_outputs, batch, n, outputs_of_layers_gpu,
         layers_delta_gpu, delta_out, delta_in, weights_gpu, weight_updates_gpu, nweights, in, layers_output_gpu, weights_normalization);
     CHECK_CUDA(cudaPeekAtLastError());
 }
@@ -1202,7 +1201,7 @@ extern "C" void input_shortcut_gpu(float *in, int batch, int w1, int h1, int c1,
 {
     if (w1 == w2 && h1 == h2 && c1 == c2) {
         int size = batch * w1 * h1 * c1;
-        simple_input_shortcut_kernel << <cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >> >(in, size, add, out);
+        simple_input_shortcut_kernel <<<cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >>>(in, size, add, out);
         CHECK_CUDA(cudaPeekAtLastError());
         return;
     }
@@ -1219,9 +1218,9 @@ extern "C" void input_shortcut_gpu(float *in, int batch, int w1, int h1, int c1,
     if (sample < 1) sample = 1;
 
     int size = batch * minw * minh * minc;
-    //input_shortcut_kernel << <cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >> >(in, size, minw, minh, minc, stride, sample, batch, w1, h1, c1, add, w2, h2, c2, out);
+    //input_shortcut_kernel <<<cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >>>(in, size, minw, minh, minc, stride, sample, batch, w1, h1, c1, add, w2, h2, c2, out);
     simple_copy_ongpu(w2 * h2 * c2 * batch, in, out);
-    shortcut_kernel << <cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >> >(size, minw, minh, minc, stride, sample, batch, w1, h1, c1, add, w2, h2, c2, out);
+    shortcut_kernel <<<cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >>>(size, minw, minh, minc, stride, sample, batch, w1, h1, c1, add, w2, h2, c2, out);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
@@ -1250,18 +1249,18 @@ extern "C" void smooth_l1_gpu(int n, float *pred, float *truth, float *delta, fl
 
 __global__ void softmax_x_ent_kernel(int n, float *pred, float *truth, float *delta, float *error)
 {
-	int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
-	if (i < n) {
-		float t = truth[i];
-		float p = pred[i];
-		error[i] = (t) ? -log(p) : 0;
-		delta[i] = t - p;
-	}
+    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
+    if (i < n) {
+        float t = truth[i];
+        float p = pred[i];
+        error[i] = (t) ? -log(p) : 0;
+        delta[i] = t - p;
+    }
 }
 
 extern "C" void softmax_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error)
 {
-	softmax_x_ent_kernel << <cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >> >(n, pred, truth, delta, error);
+    softmax_x_ent_kernel <<<cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >>>(n, pred, truth, delta, error);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
@@ -1364,35 +1363,35 @@ extern "C" void softmax_gpu(float *input, int n, int offset, int groups, float t
 
 __device__ void softmax_device_new_api(float *input, int n, float temp, int stride, float *output)
 {
-	int i;
-	float sum = 0;
-	float largest = -INFINITY;
-	for (i = 0; i < n; ++i) {
-		int val = input[i*stride];
-		largest = (val>largest) ? val : largest;
-	}
-	for (i = 0; i < n; ++i) {
-		float e = expf(input[i*stride] / temp - largest / temp);
-		sum += e;
-		output[i*stride] = e;
-	}
-	for (i = 0; i < n; ++i) {
-		output[i*stride] /= sum;
-	}
+    int i;
+    float sum = 0;
+    float largest = -INFINITY;
+    for (i = 0; i < n; ++i) {
+        int val = input[i*stride];
+        largest = (val>largest) ? val : largest;
+    }
+    for (i = 0; i < n; ++i) {
+        float e = expf(input[i*stride] / temp - largest / temp);
+        sum += e;
+        output[i*stride] = e;
+    }
+    for (i = 0; i < n; ++i) {
+        output[i*stride] /= sum;
+    }
 }
 
 __global__ void softmax_kernel_new_api(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output)
 {
-	int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
-	if (id >= batch*groups) return;
-	int b = id / groups;
-	int g = id % groups;
-	softmax_device_new_api(input + b*batch_offset + g*group_offset, n, temp, stride, output + b*batch_offset + g*group_offset);
+    int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
+    if (id >= batch*groups) return;
+    int b = id / groups;
+    int g = id % groups;
+    softmax_device_new_api(input + b*batch_offset + g*group_offset, n, temp, stride, output + b*batch_offset + g*group_offset);
 }
 
 extern "C" void softmax_gpu_new_api(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output)
 {
-	softmax_kernel_new_api << <cuda_gridsize(batch*groups), BLOCK, 0, get_cuda_stream() >> >(input, n, batch, batch_offset, groups, group_offset, stride, temp, output);
+    softmax_kernel_new_api <<<cuda_gridsize(batch*groups), BLOCK, 0, get_cuda_stream() >>>(input, n, batch, batch_offset, groups, group_offset, stride, temp, output);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
@@ -1424,40 +1423,40 @@ __global__ void upsample_kernel(size_t N, float *x, int w, int h, int c, int bat
 extern "C" void upsample_gpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out)
 {
     size_t size = w*h*c*batch*stride*stride;
-    upsample_kernel << <cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >> >(size, in, w, h, c, batch, stride, forward, scale, out);
+    upsample_kernel <<<cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >>>(size, in, w, h, c, batch, stride, forward, scale, out);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
 __global__ void softmax_tree_kernel(float *input, int spatial, int batch, int stride, float temp, float *output, int groups, int *group_size, int *group_offset)
 {
-	int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
-	if (id >= spatial*batch*groups) return;
-	int s = id % spatial;
-	id = id / spatial;
-	int g = id % groups;
-	int b = id / groups;
-	int goff = group_offset[g] * spatial;
-	int boff = b*stride;
-	softmax_device_new_api(input + goff + boff + s, group_size[g], temp, spatial, output + goff + boff + s);
+    int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
+    if (id >= spatial*batch*groups) return;
+    int s = id % spatial;
+    id = id / spatial;
+    int g = id % groups;
+    int b = id / groups;
+    int goff = group_offset[g] * spatial;
+    int boff = b*stride;
+    softmax_device_new_api(input + goff + boff + s, group_size[g], temp, spatial, output + goff + boff + s);
 }
 
 extern "C" void softmax_tree_gpu(float *input, int spatial, int batch, int stride, float temp, float *output, tree hier)
 {
-	int *tree_groups_size = cuda_make_int_array_new_api(hier.group_size, hier.groups);
-	int *tree_groups_offset = cuda_make_int_array_new_api(hier.group_offset, hier.groups);
-	/*
-	static int *tree_groups_size = 0;
-	static int *tree_groups_offset = 0;
-	if(!tree_groups_size){
-	tree_groups_size = cuda_make_int_array(hier.group_size, hier.groups);
-	tree_groups_offset = cuda_make_int_array(hier.group_offset, hier.groups);
-	}
-	*/
-	int num = spatial*batch*hier.groups;
-	softmax_tree_kernel <<<cuda_gridsize(num), BLOCK, 0, get_cuda_stream() >>>(input, spatial, batch, stride, temp, output, hier.groups, tree_groups_size, tree_groups_offset);
+    int *tree_groups_size = cuda_make_int_array_new_api(hier.group_size, hier.groups);
+    int *tree_groups_offset = cuda_make_int_array_new_api(hier.group_offset, hier.groups);
+    /*
+    static int *tree_groups_size = 0;
+    static int *tree_groups_offset = 0;
+    if(!tree_groups_size){
+    tree_groups_size = cuda_make_int_array(hier.group_size, hier.groups);
+    tree_groups_offset = cuda_make_int_array(hier.group_offset, hier.groups);
+    }
+    */
+    int num = spatial*batch*hier.groups;
+    softmax_tree_kernel <<<cuda_gridsize(num), BLOCK, 0, get_cuda_stream() >>>(input, spatial, batch, stride, temp, output, hier.groups, tree_groups_size, tree_groups_offset);
     CHECK_CUDA(cudaPeekAtLastError());
-	cuda_free((float *)tree_groups_size);
-	cuda_free((float *)tree_groups_offset);
+    cuda_free((float *)tree_groups_size);
+    cuda_free((float *)tree_groups_offset);
 }
 
 
@@ -1476,7 +1475,7 @@ extern "C" void fix_nan_and_inf(float *input, size_t size)
 {
     const int block_size = BLOCK;
     const int num_blocks = get_number_of_blocks(size, block_size);
-    fix_nan_and_inf_kernel << <num_blocks, block_size, 0, get_cuda_stream() >> >(input, size);
+    fix_nan_and_inf_kernel <<<num_blocks, block_size, 0, get_cuda_stream() >>>(input, size);
     CHECK_CUDA(cudaPeekAtLastError());
     //CHECK_CUDA(cudaDeviceSynchronize());
 }
@@ -1497,7 +1496,7 @@ extern "C" void reset_nan_and_inf(float *input, size_t size)
 {
     const int block_size = BLOCK;
     const int num_blocks = get_number_of_blocks(size, block_size);
-    reset_nan_and_inf_kernel << <num_blocks, block_size, 0, get_cuda_stream() >> >(input, size);
+    reset_nan_and_inf_kernel <<<num_blocks, block_size, 0, get_cuda_stream() >>>(input, size);
     CHECK_CUDA(cudaPeekAtLastError());
     //CHECK_CUDA(cudaDeviceSynchronize());
 }
@@ -1522,7 +1521,7 @@ extern "C" int is_nan_or_inf(float *input, size_t size)
 
     const int block_size = BLOCK;
     const int num_blocks = get_number_of_blocks(size, block_size);
-    is_nan_or_inf_kernel << <num_blocks, block_size, 0, get_cuda_stream() >> >(input, size, pinned_return);
+    is_nan_or_inf_kernel <<<num_blocks, block_size, 0, get_cuda_stream() >>>(input, size, pinned_return);
     CHECK_CUDA(cudaDeviceSynchronize());
     int ret_val = *pinned_return;
 
@@ -1550,10 +1549,9 @@ extern "C" void add_3_arrays_activate(float *a1, float *a2, float *a3, size_t si
     const int block_size = BLOCK;
     const int num_blocks = get_number_of_blocks(size, block_size);
     if (!(a == LOGISTIC || a == TANH || a == LEAKY || a == LINEAR)) {
-        printf(" add_3_arrays_activate() doesn't support activation %d, it supports only LOGISTIC and TANH \n", a);
-        exit(EXIT_FAILURE);
+        error("Error: add_3_arrays_activate() supports only LOGISTIC and TANH", DARKNET_LOC);
     }
-    add_3_arrays_activate_kernel << <num_blocks, block_size, 0, get_cuda_stream() >> >(a1, a2, a3, size, a, dst);
+    add_3_arrays_activate_kernel <<<num_blocks, block_size, 0, get_cuda_stream() >>>(a1, a2, a3, size, a, dst);
 }
 
 
@@ -1569,7 +1567,7 @@ extern "C" void sum_of_mults(float *a1, float *a2, float *b1, float *b2,  size_t
 {
     const int block_size = BLOCK;
     const int num_blocks = get_number_of_blocks(size, block_size);
-    sum_of_mults_kernel << <num_blocks, block_size, 0, get_cuda_stream() >> >(a1, a2, b1, b2, size, dst);
+    sum_of_mults_kernel <<<num_blocks, block_size, 0, get_cuda_stream() >>>(a1, a2, b1, b2, size, dst);
 }
 
 
@@ -1589,10 +1587,9 @@ extern "C" void activate_and_mult(float *a1, float *a2, size_t size, ACTIVATION
     const int block_size = BLOCK;
     const int num_blocks = get_number_of_blocks(size, block_size);
     if (!(a == TANH || a == LEAKY || a == LINEAR)) {
-        printf(" activat_and_mult() doesn't support activation %d, it supports only TANH \n", a);
-        exit(EXIT_FAILURE);
+        error("Error: activat_and_mult() supports only TANH", DARKNET_LOC);
     }
-    activate_and_mult_kernel << <num_blocks, block_size, 0, get_cuda_stream() >> >(a1, a2, size, a, dst);
+    activate_and_mult_kernel <<<num_blocks, block_size, 0, get_cuda_stream() >>>(a1, a2, size, a, dst);
 }
 
 
@@ -1616,7 +1613,7 @@ extern "C" void scale_channels_gpu(float *in_w_h_c, int size, int channel_size,
 {
     const int block_size = BLOCK;
     const int num_blocks = get_number_of_blocks(size, block_size);
-    scale_channels_kernel << <num_blocks, block_size, 0, get_cuda_stream() >> >(in_w_h_c, size, channel_size, batch_size, scale_wh, scales_c, out);
+    scale_channels_kernel <<<num_blocks, block_size, 0, get_cuda_stream() >>>(in_w_h_c, size, channel_size, batch_size, scale_wh, scales_c, out);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
@@ -1673,7 +1670,7 @@ extern "C" void backward_scale_channels_gpu(float *in_w_h_c_delta, int size, int
 {
     const int block_size = BLOCK;
     const int num_blocks = get_number_of_blocks(size, block_size);
-    backward_scale_channels_kernel << <num_blocks, block_size, 0, get_cuda_stream() >> > (in_w_h_c_delta, size, channel_size, batch_size, scale_wh,
+    backward_scale_channels_kernel <<<num_blocks, block_size, 0, get_cuda_stream() >>> (in_w_h_c_delta, size, channel_size, batch_size, scale_wh,
         in_scales_c, out_from_delta,
         in_from_output, out_state_delta);
 
@@ -1693,7 +1690,7 @@ extern "C" void sam_gpu(float *in_w_h_c, int size, int channel_size, float *scal
 {
     const int block_size = BLOCK;
     const int num_blocks = get_number_of_blocks(size, block_size);
-    sam_kernel << <num_blocks, block_size, 0, get_cuda_stream() >> >(in_w_h_c, size, channel_size, scales_c, out);
+    sam_kernel <<<num_blocks, block_size, 0, get_cuda_stream() >>>(in_w_h_c, size, channel_size, scales_c, out);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
@@ -1718,7 +1715,7 @@ extern "C" void backward_sam_gpu(float *in_w_h_c_delta, int size, int channel_si
 {
     const int block_size = BLOCK;
     const int num_blocks = get_number_of_blocks(size, block_size);
-    backward_sam_kernel << <num_blocks, block_size, 0, get_cuda_stream() >> > (in_w_h_c_delta, size, channel_size,
+    backward_sam_kernel <<<num_blocks, block_size, 0, get_cuda_stream() >>> (in_w_h_c_delta, size, channel_size,
         in_scales_c, out_from_delta,
         in_from_output, out_state_delta);
 
@@ -1732,8 +1729,8 @@ __global__  void smooth_rotate_weights_kernel(const float *src_weight_gpu, float
     const int kernel_area = kernel_size * kernel_size;
     const int i = index * kernel_area;
 
-    const int stage_step = (nweights / kernel_area) / 4;  // 4 stages
-    const int stage_id = index / stage_step;
+    //const int stage_step = (nweights / kernel_area) / 4;  // 4 stages
+    //const int stage_id = index / stage_step;
 
     // nweights = (c / groups) * n * size * size;
     // kernel_area = size*size
@@ -1804,7 +1801,7 @@ extern "C" void smooth_rotate_weights_gpu(const float *src_weight_gpu, float *we
     const int kernel_area = size*size;
     const int block_size = BLOCK;
     const int num_blocks = get_number_of_blocks(nweights / kernel_area, block_size);
-    smooth_rotate_weights_kernel << <num_blocks, block_size, 0, get_cuda_stream() >> > (src_weight_gpu, weight_deform_gpu, nweights, n, size, angle, reverse);
+    smooth_rotate_weights_kernel <<<num_blocks, block_size, 0, get_cuda_stream() >>> (src_weight_gpu, weight_deform_gpu, nweights, n, size, angle, reverse);
 
     CHECK_CUDA(cudaPeekAtLastError());
 }
@@ -1902,7 +1899,7 @@ extern "C" void stretch_weights_gpu(const float *src_weight_gpu, float *weight_d
     const int kernel_area = size*size;
     const int block_size = BLOCK;
     const int num_blocks = get_number_of_blocks(nweights / kernel_area, block_size);
-    stretch_weights_kernel << <num_blocks, block_size, 0, get_cuda_stream() >> > (src_weight_gpu, weight_deform_gpu, nweights, n, size, scale, reverse);
+    stretch_weights_kernel <<<num_blocks, block_size, 0, get_cuda_stream() >>> (src_weight_gpu, weight_deform_gpu, nweights, n, size, scale, reverse);
 
     CHECK_CUDA(cudaPeekAtLastError());
 }
@@ -2008,7 +2005,7 @@ extern "C" void sway_and_flip_weights_gpu(const float *src_weight_gpu, float *we
     const int kernel_area = size*size;
     const int block_size = BLOCK;
     const int num_blocks = get_number_of_blocks(nweights / kernel_area, block_size);
-    sway_and_flip_weights_kernel << <num_blocks, block_size, 0, get_cuda_stream() >> > (src_weight_gpu, weight_deform_gpu, nweights, n, size, angle, reverse);
+    sway_and_flip_weights_kernel <<<num_blocks, block_size, 0, get_cuda_stream() >>> (src_weight_gpu, weight_deform_gpu, nweights, n, size, angle, reverse);
 
     CHECK_CUDA(cudaPeekAtLastError());
 }
@@ -2091,7 +2088,7 @@ extern "C" void rotate_weights_gpu(const float *src_weight_gpu, float *weight_de
     const int kernel_area = size*size;
     const int block_size = BLOCK;
     const int num_blocks = get_number_of_blocks(nweights / kernel_area, block_size);
-    rotate_weights_kernel << <num_blocks, block_size, 0, get_cuda_stream() >> > (src_weight_gpu, weight_deform_gpu, nweights, n, size, reverse);
+    rotate_weights_kernel <<<num_blocks, block_size, 0, get_cuda_stream() >>> (src_weight_gpu, weight_deform_gpu, nweights, n, size, reverse);
 
     CHECK_CUDA(cudaPeekAtLastError());
 }
@@ -2258,7 +2255,7 @@ extern "C" void stretch_sway_flip_weights_gpu(const float *src_weight_gpu, float
     const int kernel_area = size*size;
     const int block_size = BLOCK;
     const int num_blocks = get_number_of_blocks(nweights / kernel_area, block_size);
-    stretch_sway_flip_weights_kernel << <num_blocks, block_size, 0, get_cuda_stream() >> > (src_weight_gpu, weight_deform_gpu, nweights, n, size, angle, reverse);
+    stretch_sway_flip_weights_kernel <<<num_blocks, block_size, 0, get_cuda_stream() >>> (src_weight_gpu, weight_deform_gpu, nweights, n, size, angle, reverse);
 
     CHECK_CUDA(cudaPeekAtLastError());
 }
@@ -2285,7 +2282,7 @@ extern "C" void reduce_and_expand_array_gpu(const float *src_gpu, float *dst_gpu
     const int current_size = size / groups;
     const int block_size = BLOCK;
     const int num_blocks = get_number_of_blocks(current_size, block_size);
-    reduce_and_expand_array_kernel << <num_blocks, block_size, 0, get_cuda_stream() >> > (src_gpu, dst_gpu, current_size, groups);
+    reduce_and_expand_array_kernel <<<num_blocks, block_size, 0, get_cuda_stream() >>> (src_gpu, dst_gpu, current_size, groups);
 
     CHECK_CUDA(cudaPeekAtLastError());
 }
@@ -2308,7 +2305,7 @@ extern "C" void expand_array_gpu(const float *src_gpu, float *dst_gpu, int size,
     const int current_size = size / groups;
     const int block_size = BLOCK;
     const int num_blocks = get_number_of_blocks(current_size, block_size);
-    expand_array_kernel << <num_blocks, block_size, 0, get_cuda_stream() >> > (src_gpu, dst_gpu, current_size, groups);
+    expand_array_kernel <<<num_blocks, block_size, 0, get_cuda_stream() >>> (src_gpu, dst_gpu, current_size, groups);
 
     CHECK_CUDA(cudaPeekAtLastError());
 }
@@ -2340,7 +2337,7 @@ extern "C" void mult_inverse_array_gpu(const float *src_gpu, float *dst_gpu, int
 {
     const int block_size = BLOCK;
     const int num_blocks = get_number_of_blocks(size, block_size);
-    mult_inverse_array_kernel << <num_blocks, block_size, 0, get_cuda_stream() >> > (src_gpu, dst_gpu, size, eps, divider, clip, abs_add);
+    mult_inverse_array_kernel <<<num_blocks, block_size, 0, get_cuda_stream() >>> (src_gpu, dst_gpu, size, eps, divider, clip, abs_add);
 
     CHECK_CUDA(cudaPeekAtLastError());
 }
@@ -2385,7 +2382,7 @@ extern "C" void P_constrastive_f_det_gpu(int *labels, unsigned int feature_size,
 {
     const int block_size = BLOCK;
     const int num_blocks = get_number_of_blocks(contrast_p_size, block_size);
-    P_constrastive_f_det_kernel << <num_blocks, block_size, 0, get_cuda_stream() >> > (labels, feature_size, temperature, contrast_p, contrast_p_size);
+    P_constrastive_f_det_kernel <<<num_blocks, block_size, 0, get_cuda_stream() >>> (labels, feature_size, temperature, contrast_p, contrast_p_size);
 
     CHECK_CUDA(cudaPeekAtLastError());
 }
@@ -2432,7 +2429,7 @@ extern "C" void coord_conv_gpu(float *dst, int size, int w, int h, int chan, int
 {
     const int block_size = BLOCK;
     const int num_blocks = get_number_of_blocks(size, block_size);
-    coord_conv_kernel << <num_blocks, block_size, 0, get_cuda_stream() >> > (dst, w, h, chan, b, type);
+    coord_conv_kernel <<<num_blocks, block_size, 0, get_cuda_stream() >>> (dst, w, h, chan, b, type);
 
     CHECK_CUDA(cudaPeekAtLastError());
 }
@@ -2449,7 +2446,7 @@ __global__ void forward_implicit_kernel(int size, int batch, int nweights, float
 extern "C" void forward_implicit_gpu(int batch, int nweights, float *weight_gpu, float *output_gpu)
 {
     int size = batch * nweights;
-    forward_implicit_kernel << <cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >> > (size, batch, nweights, weight_gpu, output_gpu);
+    forward_implicit_kernel <<<cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >>> (size, batch, nweights, weight_gpu, output_gpu);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
@@ -2468,6 +2465,6 @@ __global__ void backward_implicit_kernel(int size, int batch, int nweights, floa
 extern "C" void backward_implicit_gpu(int batch, int nweights, float *weight_updates_gpu, float *delta_gpu)
 {
     int size = nweights;
-    backward_implicit_kernel << <cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >> > (size, batch, nweights, weight_updates_gpu, delta_gpu);
+    backward_implicit_kernel <<<cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >>> (size, batch, nweights, weight_updates_gpu, delta_gpu);
     CHECK_CUDA(cudaPeekAtLastError());
 }
diff --git a/src/Detector/darknet/src/box.c b/src/Detector/darknet/src/box.c
index 201ab1334..0ad1263e5 100644
--- a/src/Detector/darknet/src/box.c
+++ b/src/Detector/darknet/src/box.c
@@ -345,12 +345,12 @@ dxrep dx_box_iou(box pred, box truth, IOU_LOSS iou_loss) {
         p_dl += ((giou_C * dU_wrt_l) - (U * dC_wrt_l)) / (giou_C * giou_C);
         p_dr += ((giou_C * dU_wrt_r) - (U * dC_wrt_r)) / (giou_C * giou_C);
       }
-	  if (Iw<=0||Ih<=0) {
-		p_dt = ((giou_C * dU_wrt_t) - (U * dC_wrt_t)) / (giou_C * giou_C);
+      if (Iw<=0||Ih<=0) {
+        p_dt = ((giou_C * dU_wrt_t) - (U * dC_wrt_t)) / (giou_C * giou_C);
         p_db = ((giou_C * dU_wrt_b) - (U * dC_wrt_b)) / (giou_C * giou_C);
         p_dl = ((giou_C * dU_wrt_l) - (U * dC_wrt_l)) / (giou_C * giou_C);
         p_dr = ((giou_C * dU_wrt_r) - (U * dC_wrt_r)) / (giou_C * giou_C);
-	  }
+      }
     }
 
     float Ct = fmin(pred.y - pred.h / 2,truth.y - truth.h / 2);
@@ -418,21 +418,21 @@ dxrep dx_box_iou(box pred, box truth, IOU_LOSS iou_loss) {
             p_dw += (2*Cw*dCw_dw+2*Ch*dCh_dw)*S / (C * C);
             p_dh += (2*Cw*dCw_dh+2*Ch*dCh_dh)*S / (C * C);
         }
-	if (Iw<=0||Ih<=0){
+    if (Iw<=0||Ih<=0){
             p_dx = (2*(truth.x-pred.x)*C-(2*Cw*dCw_dx+2*Ch*dCh_dx)*S) / (C * C);
             p_dy = (2*(truth.y-pred.y)*C-(2*Cw*dCw_dy+2*Ch*dCh_dy)*S) / (C * C);
             p_dw = (2*Cw*dCw_dw+2*Ch*dCh_dw)*S / (C * C);
             p_dh = (2*Cw*dCw_dh+2*Ch*dCh_dh)*S / (C * C);
         }
     }
-	//The following codes are calculating the gradient of ciou.
+    //The following codes are calculating the gradient of ciou.
 
     if (iou_loss == CIOU) {
-	float ar_gt = truth.w / truth.h;
+    float ar_gt = truth.w / truth.h;
         float ar_pred = pred.w / pred.h;
         float ar_loss = 4 / (M_PI * M_PI) * (atan(ar_gt) - atan(ar_pred)) * (atan(ar_gt) - atan(ar_pred));
-	float alpha = ar_loss / (1 - I/U + ar_loss + 0.000001);
-	float ar_dw=8/(M_PI*M_PI)*(atan(ar_gt)-atan(ar_pred))*pred.h;
+    float alpha = ar_loss / (1 - I/U + ar_loss + 0.000001);
+    float ar_dw=8/(M_PI*M_PI)*(atan(ar_gt)-atan(ar_pred))*pred.h;
         float ar_dh=-8/(M_PI*M_PI)*(atan(ar_gt)-atan(ar_pred))*pred.w;
         if (C > 0) {
         // dar*
@@ -441,7 +441,7 @@ dxrep dx_box_iou(box pred, box truth, IOU_LOSS iou_loss) {
             p_dw += (2*Cw*dCw_dw+2*Ch*dCh_dw)*S / (C * C) + alpha * ar_dw;
             p_dh += (2*Cw*dCw_dh+2*Ch*dCh_dh)*S / (C * C) + alpha * ar_dh;
         }
-	if (Iw<=0||Ih<=0){
+    if (Iw<=0||Ih<=0){
             p_dx = (2*(truth.x-pred.x)*C-(2*Cw*dCw_dx+2*Ch*dCh_dx)*S) / (C * C);
             p_dy = (2*(truth.y-pred.y)*C-(2*Cw*dCw_dy+2*Ch*dCh_dy)*S) / (C * C);
             p_dw = (2*Cw*dCw_dw+2*Ch*dCh_dw)*S / (C * C) + alpha * ar_dw;
diff --git a/src/Detector/darknet/src/box.h b/src/Detector/darknet/src/box.h
index 4b720653c..f72e26c46 100644
--- a/src/Detector/darknet/src/box.h
+++ b/src/Detector/darknet/src/box.h
@@ -12,19 +12,19 @@ typedef struct{
 } dbox;
 
 //typedef struct detection {
-//	box bbox;
-//	int classes;
-//	float *prob;
-//	float *mask;
-//	float objectness;
-//	int sort_class;
+//    box bbox;
+//    int classes;
+//    float *prob;
+//    float *mask;
+//    float objectness;
+//    int sort_class;
 //} detection;
 
 typedef struct detection_with_class {
-	detection det;
-	// The most probable class id: the best class index in this->prob.
-	// Is filled temporary when processing results, otherwise not initialized
-	int best_class;
+    detection det;
+    // The most probable class id: the best class index in this->prob.
+    // Is filled temporary when processing results, otherwise not initialized
+    int best_class;
 } detection_with_class;
 
 #ifdef __cplusplus
diff --git a/src/Detector/darknet/src/captcha.c b/src/Detector/darknet/src/captcha.c
index 0cc159152..5fd565d04 100644
--- a/src/Detector/darknet/src/captcha.c
+++ b/src/Detector/darknet/src/captcha.c
@@ -85,7 +85,7 @@ void train_captcha(char *cfgfile, char *weightfile)
         float loss = train_network(net, train);
         if(avg_loss == -1) avg_loss = loss;
         avg_loss = avg_loss*.9 + loss*.1;
-        printf("%d: %f, %f avg, %lf seconds, %ld images\n", i, loss, avg_loss, sec(clock()-time), *net.seen);
+        printf("%d: %f, %f avg, %lf seconds, %" PRIu64 " images\n", i, loss, avg_loss, sec(clock()-time), *net.seen);
         free_data(train);
         if(i%100==0){
             char buff[256];
diff --git a/src/Detector/darknet/src/cifar.c b/src/Detector/darknet/src/cifar.c
index 1b87cb5f0..1ef221e87 100644
--- a/src/Detector/darknet/src/cifar.c
+++ b/src/Detector/darknet/src/cifar.c
@@ -29,7 +29,7 @@ void train_cifar(char *cfgfile, char *weightfile)
         float loss = train_network_sgd(net, train, 1);
         if(avg_loss == -1) avg_loss = loss;
         avg_loss = avg_loss*.95 + loss*.05;
-        printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
+        printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %" PRIu64 " images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
         if(*net.seen/N > epoch){
             epoch = *net.seen/N;
             char buff[256];
@@ -85,7 +85,7 @@ void train_cifar_distill(char *cfgfile, char *weightfile)
         float loss = train_network_sgd(net, train, 1);
         if(avg_loss == -1) avg_loss = loss;
         avg_loss = avg_loss*.95 + loss*.05;
-        printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
+        printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %" PRIu64 " images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
         if(*net.seen/N > epoch){
             epoch = *net.seen/N;
             char buff[256];
diff --git a/src/Detector/darknet/src/classifier.c b/src/Detector/darknet/src/classifier.c
index e42b1ea3a..f01303327 100644
--- a/src/Detector/darknet/src/classifier.c
+++ b/src/Detector/darknet/src/classifier.c
@@ -74,7 +74,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
     if (classes != l.outputs && (l.type == SOFTMAX || l.type == COST)) {
         printf("\n Error: num of filters = %d in the last conv-layer in cfg-file doesn't match to classes = %d in data-file \n",
             l.outputs, classes);
-        getchar();
+        error("Error!", DARKNET_LOC);
     }
 
     char **labels = get_labels(label_list);
@@ -208,7 +208,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
         if (avg_time < 0) avg_time = time_remaining;
         else avg_time = alpha_time * time_remaining + (1 -  alpha_time) * avg_time;
         start = what_time_is_it_now();
-        printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images, %f hours left\n", get_current_batch(net), (float)(*net.seen)/ train_images_num, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen, avg_time);
+        printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %" PRIu64 " images, %f hours left\n", get_current_batch(net), (float)(*net.seen)/ train_images_num, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen, avg_time);
 #ifdef OPENCV
         if (net.contrastive) {
             float cur_con_acc = -1;
@@ -845,7 +845,7 @@ void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *fi
     if (classes != l.outputs && (l.type == SOFTMAX || l.type == COST)) {
         printf("\n Error: num of filters = %d in the last conv-layer in cfg-file doesn't match to classes = %d in data-file \n",
             l.outputs, classes);
-        getchar();
+        error("Error!", DARKNET_LOC);
     }
     if (top == 0) top = option_find_int(options, "top", 1);
     if (top > classes) top = classes;
@@ -1129,11 +1129,8 @@ void threat_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_i
         sprintf(buff, "tmp/threat_%06d", count);
         //save_image(out, buff);
 
-#ifndef _WIN32
-        printf("\033[2J");
-        printf("\033[1;1H");
-#endif
-        printf("\nFPS:%.0f\n",fps);
+        printf("\033[H\033[J");
+        printf("\nFPS:%.0f\n", fps);
 
         for(i = 0; i < top; ++i){
             int index = indexes[i];
@@ -1208,8 +1205,7 @@ void gun_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_inde
         float *predictions = network_predict(net, in_s.data);
         top_predictions(net, top, indexes);
 
-        printf("\033[2J");
-        printf("\033[1;1H");
+        printf("\033[H\033[J");
 
         int threat = 0;
         for(i = 0; i < sizeof(bad_cats)/sizeof(bad_cats[0]); ++i){
@@ -1308,11 +1304,7 @@ void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_ind
         if(net.hierarchy) hierarchy_predictions(predictions, net.outputs, net.hierarchy, 1);
         top_predictions(net, top, indexes);
 
-#ifndef _WIN32
-        printf("\033[2J");
-        printf("\033[1;1H");
-#endif
-
+        printf("\033[H\033[J");
 
         if (!benchmark) {
             printf("\rFPS: %.2f  (use -benchmark command line flag for correct measurement)\n", fps);
diff --git a/src/Detector/darknet/src/coco.c b/src/Detector/darknet/src/coco.c
index 8c386a224..8ad13834b 100644
--- a/src/Detector/darknet/src/coco.c
+++ b/src/Detector/darknet/src/coco.c
@@ -291,7 +291,7 @@ void validate_coco_recall(char *cfgfile, char *weightfile)
         if (nms) do_nms(boxes, probs, side*side*l.n, 1, nms_thresh);
 
         char labelpath[4096];
-		replace_image_to_label(path, labelpath);
+        replace_image_to_label(path, labelpath);
 
         int num_labels = 0;
         box_label *truth = read_boxes(labelpath, &num_labels);
@@ -320,6 +320,7 @@ void validate_coco_recall(char *cfgfile, char *weightfile)
 
         //if (fps) free(fps);
         if (id) free(id);
+        free(truth);
         free_image(orig);
         free_image(sized);
     }
@@ -373,10 +374,9 @@ void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh)
         show_image(im, "predictions");
         free_image(im);
         free_image(sized);
-
+        free_alphabet(alphabet);
         wait_until_press_key_cv();
         destroy_all_windows_cv();
-
         if (filename) break;
     }
     free(boxes);
@@ -388,16 +388,17 @@ void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh)
 
 void run_coco(int argc, char **argv)
 {
-	int dont_show = find_arg(argc, argv, "-dont_show");
-	int mjpeg_port = find_int_arg(argc, argv, "-mjpeg_port", -1);
+    int dont_show = find_arg(argc, argv, "-dont_show");
+    int mjpeg_port = find_int_arg(argc, argv, "-mjpeg_port", -1);
     int json_port = find_int_arg(argc, argv, "-json_port", -1);
-	char *out_filename = find_char_arg(argc, argv, "-out_filename", 0);
+    char *out_filename = find_char_arg(argc, argv, "-out_filename", 0);
     char *prefix = find_char_arg(argc, argv, "-prefix", 0);
     float thresh = find_float_arg(argc, argv, "-thresh", .2);
-	float hier_thresh = find_float_arg(argc, argv, "-hier", .5);
+    float hier_thresh = find_float_arg(argc, argv, "-hier", .5);
     int cam_index = find_int_arg(argc, argv, "-c", 0);
     int frame_skip = find_int_arg(argc, argv, "-s", 0);
-	int ext_output = find_arg(argc, argv, "-ext_output");
+    int ext_output = find_arg(argc, argv, "-ext_output");
+    char *json_file_output = find_char_arg(argc, argv, "-json_file_output", 0);
 
     if(argc < 4){
         fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
@@ -412,5 +413,5 @@ void run_coco(int argc, char **argv)
     else if(0==strcmp(argv[2], "valid")) validate_coco(cfg, weights);
     else if(0==strcmp(argv[2], "recall")) validate_coco_recall(cfg, weights);
     else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, hier_thresh, cam_index, filename, coco_classes, 80, 1, frame_skip,
-		prefix, out_filename, mjpeg_port, 0, json_port, dont_show, ext_output, 0, 0, 0, 0, 0);
+        prefix, out_filename, mjpeg_port, 0, json_port, dont_show, ext_output, 0, 0, 0, 0, 0, json_file_output);
 }
diff --git a/src/Detector/darknet/src/col2im_kernels.cu b/src/Detector/darknet/src/col2im_kernels.cu
index 0e07bc37a..ae651c49b 100644
--- a/src/Detector/darknet/src/col2im_kernels.cu
+++ b/src/Detector/darknet/src/col2im_kernels.cu
@@ -126,11 +126,11 @@ void col2im_gpu_ext(const float* data_col, const int channels,
     // To avoid involving atomic operations, we will launch one kernel per
     // bottom dimension, and then in the kernel add up the top dimensions.
     // NOLINT_NEXT_LINE(whitespace/operators)
-    col2im_gpu_kernel_ext<< <CAFFE_GET_BLOCKS(num_kernels),
-        CAFFE_CUDA_NUM_THREADS >> >(
+    col2im_gpu_kernel_ext<<<CAFFE_GET_BLOCKS(num_kernels),
+        CAFFE_CUDA_NUM_THREADS >>>(
             num_kernels, data_col, height, width, channels, kernel_h, kernel_w,
             pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,
             height_col, width_col, data_im);
 
     CHECK_CUDA(cudaPeekAtLastError());
-}
\ No newline at end of file
+}
diff --git a/src/Detector/darknet/src/compare.c b/src/Detector/darknet/src/compare.c
index 62edabe96..4bacda4c2 100644
--- a/src/Detector/darknet/src/compare.c
+++ b/src/Detector/darknet/src/compare.c
@@ -1,5 +1,3 @@
-#include <stdio.h>
-
 #include "network.h"
 #include "detection_layer.h"
 #include "cost_layer.h"
@@ -7,6 +5,8 @@
 #include "parser.h"
 #include "box.h"
 
+#include <stdio.h>
+
 void train_compare(char *cfgfile, char *weightfile)
 {
     srand(time(0));
@@ -54,7 +54,7 @@ void train_compare(char *cfgfile, char *weightfile)
         float loss = train_network(net, train);
         if(avg_loss == -1) avg_loss = loss;
         avg_loss = avg_loss*.9 + loss*.1;
-        printf("%.3f: %f, %f avg, %lf seconds, %ld images\n", (float)*net.seen/N, loss, avg_loss, sec(clock()-time), *net.seen);
+        printf("%.3f: %f, %f avg, %lf seconds, %" PRIu64 " images\n", (float)*net.seen/N, loss, avg_loss, sec(clock()-time), *net.seen);
         free_data(train);
         if(i%100 == 0){
             char buff[256];
diff --git a/src/Detector/darknet/src/connected_layer.c b/src/Detector/darknet/src/connected_layer.c
index 25a5ffa1e..244e82fbb 100644
--- a/src/Detector/darknet/src/connected_layer.c
+++ b/src/Detector/darknet/src/connected_layer.c
@@ -363,12 +363,12 @@ void forward_connected_layer_gpu(connected_layer l, network_state state)
     gemm_ongpu(0,1,m,n,k,1,a,k,b,k,1,c,n);
 #endif // CUDNN
 
-	if (l.batch_normalize) {
-		forward_batchnorm_layer_gpu(l, state);
-	}
-	else {
-		add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.outputs, 1);
-	}
+    if (l.batch_normalize) {
+        forward_batchnorm_layer_gpu(l, state);
+    }
+    else {
+        add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.outputs, 1);
+    }
     //for(i = 0; i < l.batch; ++i) axpy_ongpu(l.outputs, 1, l.biases_gpu, 1, l.output_gpu + i*l.outputs, 1);
     activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation);
 }
diff --git a/src/Detector/darknet/src/conv_lstm_layer.c b/src/Detector/darknet/src/conv_lstm_layer.c
index 72e7eac38..a7804e736 100644
--- a/src/Detector/darknet/src/conv_lstm_layer.c
+++ b/src/Detector/darknet/src/conv_lstm_layer.c
@@ -1454,7 +1454,7 @@ void backward_conv_lstm_layer_gpu(layer l, network_state state)
             axpy_ongpu(l.outputs*l.batch, 1, l.bottelneck_delta_gpu + l.outputs*l.batch, 1, state.delta, 1);    // lead to nan
         }
         else {
-            axpy_ongpu(l.outputs*l.batch, l.time_normalizer, l.temp3_gpu, 1, l.dh_gpu, 1);
+            // if (l.dh_gpu) axpy_ongpu(l.outputs*l.batch, l.time_normalizer, l.temp3_gpu, 1, l.dh_gpu, 1);
         }
 
         // c
diff --git a/src/Detector/darknet/src/convolutional_kernels.cu b/src/Detector/darknet/src/convolutional_kernels.cu
index 188399bb7..debd6bf15 100644
--- a/src/Detector/darknet/src/convolutional_kernels.cu
+++ b/src/Detector/darknet/src/convolutional_kernels.cu
@@ -65,7 +65,7 @@ __global__ void binarize_weights_kernel(float *weights, int n, int size, float *
 
 void binarize_weights_gpu(float *weights, int n, int size, float *binary)
 {
-    binarize_weights_kernel << <cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >> >(weights, n, size, binary);
+    binarize_weights_kernel <<<cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >>>(weights, n, size, binary);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
@@ -113,9 +113,9 @@ void fast_binarize_weights_gpu(float *weights, int n, int size, float *binary, f
         size_t gridsize = n * size;
         const int num_blocks = get_number_of_blocks(gridsize, BLOCK);// gridsize / BLOCK + 1;
 
-        set_zero_kernel << <(n/BLOCK + 1), BLOCK, 0, get_cuda_stream() >> > (mean_arr_gpu, n);
-        reduce_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> > (weights, n, size, mean_arr_gpu);
-        binarize_weights_mean_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> > (weights, n, size, binary, mean_arr_gpu);
+        set_zero_kernel <<<(n/BLOCK + 1), BLOCK, 0, get_cuda_stream() >>> (mean_arr_gpu, n);
+        reduce_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>> (weights, n, size, mean_arr_gpu);
+        binarize_weights_mean_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>> (weights, n, size, binary, mean_arr_gpu);
         CHECK_CUDA(cudaPeekAtLastError());
     }
     else {
@@ -351,7 +351,6 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)
                     im2col_align_ongpu(state.input + i*l.c*l.h*l.w, l.c, l.h, l.w, l.size, l.stride, l.pad, l.align_workspace_gpu, l.bit_align);
                     //cudaDeviceSynchronize();
                     //stop_timer_and_show_name("im2col_align_ongpu");
-                    //getchar();
 
                     // should be optimized
                     //start_timer();
@@ -383,7 +382,6 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)
                 //}
                 //cudaDeviceSynchronize();
                 //check_error(status);
-                //getchar();
             }
 
 
@@ -533,11 +531,11 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)
         /*
         int input_nan_inf = is_nan_or_inf(state.input, l.inputs * l.batch);
         printf("\n is_nan_or_inf(state.input) = %d \n", input_nan_inf);
-        if (input_nan_inf) getchar();
+        if (input_nan_inf) error();
 
         int weights_nan_inf = is_nan_or_inf(l.weights_gpu, l.nweights);
         printf("\n is_nan_or_inf(l.weights_gpu) = %d \n", weights_nan_inf);
-        if (weights_nan_inf) getchar();
+        if (weights_nan_inf) error();
         */
 
         CHECK_CUDNN(cudnnConvolutionForward(cudnn_handle(),
@@ -997,7 +995,7 @@ void calc_avg_activation_gpu(float *src, float *dst, int size, int channels, int
 {
     const int num_blocks = get_number_of_blocks(size*batches, BLOCK);
 
-    calc_avg_activation_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> > (src, dst, size, channels, batches);
+    calc_avg_activation_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>> (src, dst, size, channels, batches);
 }
 
 
@@ -1021,7 +1019,7 @@ void assisted_activation_gpu(float alpha, float *output, float *gt_gpu, float *a
 {
     const int num_blocks = get_number_of_blocks(size*batches, BLOCK);
 
-    assisted_activation_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> > (alpha, output, gt_gpu, a_avg_gpu, size, channels, batches);
+    assisted_activation_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>> (alpha, output, gt_gpu, a_avg_gpu, size, channels, batches);
 }
 
 
@@ -1045,7 +1043,7 @@ void assisted_activation2_gpu(float alpha, float *output, float *gt_gpu, float *
 {
     const int num_blocks = get_number_of_blocks(size*batches, BLOCK);
 
-    assisted_activation2_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> > (alpha, output, gt_gpu, a_avg_gpu, size, channels, batches);
+    assisted_activation2_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>> (alpha, output, gt_gpu, a_avg_gpu, size, channels, batches);
 }
 
 void assisted_excitation_forward_gpu(convolutional_layer l, network_state state)
diff --git a/src/Detector/darknet/src/convolutional_layer.c b/src/Detector/darknet/src/convolutional_layer.c
index 88bb260ab..18f9e8b7f 100644
--- a/src/Detector/darknet/src/convolutional_layer.c
+++ b/src/Detector/darknet/src/convolutional_layer.c
@@ -330,9 +330,7 @@ void cudnn_convolutional_setup(layer *l, int cudnn_preference, size_t workspace_
     }
 
     if (!found_conv_algorithm) {
-        printf(" Error: cuDNN isn't found FWD algo for convolution.\n");
-        getchar();
-        exit(0);
+        error("Error: cuDNN hasn't found FWD algo for convolution", DARKNET_LOC);
     }
     //printf(" cuDNN FWD algo: %d, time = %f ms \n", l->fw_algo, min_time);
 
@@ -367,9 +365,7 @@ void cudnn_convolutional_setup(layer *l, int cudnn_preference, size_t workspace_
     }
 
     if (!found_conv_algorithm) {
-        printf(" Error: cuDNN isn't found BWD-data algo for convolution.\n");
-        getchar();
-        exit(0);
+        error("Error: cuDNN hasn't found BWD-data algo for convolution", DARKNET_LOC);
     }
     //printf(" cuDNN BWD-data algo: %d \n", l->bd_algo);
 
@@ -404,9 +400,7 @@ void cudnn_convolutional_setup(layer *l, int cudnn_preference, size_t workspace_
     }
 
     if (!found_conv_algorithm) {
-        printf(" Error: cuDNN isn't found BWD-filter algo for convolution.\n");
-        getchar();
-        exit(0);
+        error("Error: cuDNN hasn't found BWD-filter algo for convolution", DARKNET_LOC);
     }
     //printf(" cuDNN BWD-filter algo: %d \n", l->bf_algo);
 
@@ -550,8 +544,7 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
 
     if (l.share_layer) {
         if (l.size != l.share_layer->size || l.nweights != l.share_layer->nweights || l.c != l.share_layer->c || l.n != l.share_layer->n) {
-            printf(" Layer size, nweights, channels or filters don't match for the share_layer");
-            getchar();
+            error("Layer size, nweights, channels or filters don't match for the share_layer", DARKNET_LOC);
         }
 
         l.weights = l.share_layer->weights;
@@ -1688,4 +1681,3 @@ image *visualize_convolutional_layer(convolutional_layer l, char *window, image
     free_image(dc);
     return single_weights;
 }
-
diff --git a/src/Detector/darknet/src/cost_layer.c b/src/Detector/darknet/src/cost_layer.c
index ed1cc1344..60080b120 100644
--- a/src/Detector/darknet/src/cost_layer.c
+++ b/src/Detector/darknet/src/cost_layer.c
@@ -25,8 +25,8 @@ char *get_cost_string(COST_TYPE a)
             return "masked";
         case SMOOTH:
             return "smooth";
-		default:
-			return "sse";
+        default:
+            return "sse";
     }
 }
 
diff --git a/src/Detector/darknet/src/dark_cuda.c b/src/Detector/darknet/src/dark_cuda.c
index d155b4082..74f067724 100644
--- a/src/Detector/darknet/src/dark_cuda.c
+++ b/src/Detector/darknet/src/dark_cuda.c
@@ -54,7 +54,7 @@ void *cuda_get_context()
     return (void *)pctx;
 }
 
-void check_error(cudaError_t status)
+void check_error(cudaError_t status, const char * const filename, const char * const funcname, const int line)
 {
     cudaError_t status2 = cudaGetLastError();
     if (status != cudaSuccess)
@@ -63,10 +63,7 @@ void check_error(cudaError_t status)
         char buffer[256];
         printf("\n CUDA Error: %s\n", s);
         snprintf(buffer, 256, "CUDA Error: %s", s);
-#ifdef WIN32
-        getchar();
-#endif
-        error(buffer, DARKNET_LOC);
+        error(buffer, filename, funcname, line);
     }
     if (status2 != cudaSuccess)
     {
@@ -74,18 +71,15 @@ void check_error(cudaError_t status)
         char buffer[256];
         printf("\n CUDA Error Prev: %s\n", s);
         snprintf(buffer, 256, "CUDA Error Prev: %s", s);
-#ifdef WIN32
-        getchar();
-#endif
-        error(buffer, DARKNET_LOC);
+        error(buffer, filename, funcname, line);
     }
 }
 
-void check_error_extended(cudaError_t status, const char *file, int line, const char *date_time)
+void check_error_extended(cudaError_t status, const char * const filename, const char * const funcname, const int line)
 {
     if (status != cudaSuccess) {
-        printf("CUDA status Error: file: %s() : line: %d : build time: %s \n", file, line, date_time);
-        check_error(status);
+        printf("CUDA status Error: file: %s: func: %s() line: %d\n", filename, funcname, line);
+        check_error(status, filename, funcname, line);
     }
 #if defined(DEBUG) || defined(CUDA_DEBUG)
     cuda_debug_sync = 1;
@@ -93,9 +87,9 @@ void check_error_extended(cudaError_t status, const char *file, int line, const
     if (cuda_debug_sync) {
         status = cudaDeviceSynchronize();
         if (status != cudaSuccess)
-            printf("CUDA status = cudaDeviceSynchronize() Error: file: %s() : line: %d : build time: %s \n", file, line, date_time);
+            printf("CUDA status = cudaDeviceSynchronize() Error: file: %s: func: %s() line: %d\n", filename, funcname, line);
     }
-    check_error(status);
+    check_error(status, filename, funcname, line);
 }
 
 dim3 cuda_gridsize(size_t n){
@@ -180,7 +174,7 @@ cudnnHandle_t cudnn_handle()
 }
 
 
-void cudnn_check_error(cudnnStatus_t status)
+void cudnn_check_error(cudnnStatus_t status, const char * const filename, const char * const function, const int line)
 {
 #if defined(DEBUG) || defined(CUDA_DEBUG)
     cudaDeviceSynchronize();
@@ -198,10 +192,7 @@ void cudnn_check_error(cudnnStatus_t status)
         char buffer[256];
         printf("\n cuDNN Error: %s\n", s);
         snprintf(buffer, 256, "cuDNN Error: %s", s);
-#ifdef WIN32
-        getchar();
-#endif
-        error(buffer, DARKNET_LOC);
+        error(buffer, filename, function, line);
     }
     if (status2 != CUDNN_STATUS_SUCCESS)
     {
@@ -209,18 +200,15 @@ void cudnn_check_error(cudnnStatus_t status)
         char buffer[256];
         printf("\n cuDNN Error Prev: %s\n", s);
         snprintf(buffer, 256, "cuDNN Error Prev: %s", s);
-#ifdef WIN32
-        getchar();
-#endif
-        error(buffer, DARKNET_LOC);
+        error(buffer, filename, function, line);
     }
 }
 
-void cudnn_check_error_extended(cudnnStatus_t status, const char *file, int line, const char *date_time)
+void cudnn_check_error_extended(cudnnStatus_t status, const char * const filename, const char * const function, const int line)
 {
     if (status != CUDNN_STATUS_SUCCESS) {
-        printf("\n cuDNN status Error in: file: %s() : line: %d : build time: %s \n", file, line, date_time);
-        cudnn_check_error(status);
+        printf("\n cuDNN status Error in: file: %s function: %s() line: %d\n", filename, function, line);
+        cudnn_check_error(status, filename, function, line);
     }
 #if defined(DEBUG) || defined(CUDA_DEBUG)
     cuda_debug_sync = 1;
@@ -228,9 +216,9 @@ void cudnn_check_error_extended(cudnnStatus_t status, const char *file, int line
     if (cuda_debug_sync) {
         cudaError_t status = cudaDeviceSynchronize();
         if (status != CUDNN_STATUS_SUCCESS)
-            printf("\n cudaError_t status = cudaDeviceSynchronize() Error in: file: %s() : line: %d : build time: %s \n", file, line, date_time);
+            printf("\n cudaError_t status = cudaDeviceSynchronize() Error in: file: %s function: %s() line: %d\n", filename, function, line);
     }
-    cudnn_check_error(status);
+    cudnn_check_error(status, filename, function, line);
 }
 
 static cudnnHandle_t switchCudnnHandle[16];
@@ -251,10 +239,10 @@ void cublas_check_error(cublasStatus_t status)
     }
 }
 
-void cublas_check_error_extended(cublasStatus_t status, const char *file, int line, const char *date_time)
+void cublas_check_error_extended(cublasStatus_t status, const char * const filename, const char * const function, const int line)
 {
     if (status != CUBLAS_STATUS_SUCCESS) {
-      printf("\n cuBLAS status Error in: file: %s() : line: %d : build time: %s \n", file, line, date_time);
+      printf("\n cuBLAS status Error in: file: %s function: %s() line: %d\n", filename, function, line);
     }
 #if defined(DEBUG) || defined(CUDA_DEBUG)
     cuda_debug_sync = 1;
@@ -262,7 +250,7 @@ void cublas_check_error_extended(cublasStatus_t status, const char *file, int li
     if (cuda_debug_sync) {
         cudaError_t status = cudaDeviceSynchronize();
       if (status != CUDA_SUCCESS)
-          printf("\n cudaError_t status = cudaDeviceSynchronize() Error in: file: %s() : line: %d : build time: %s \n", file, line, date_time);
+          printf("\n cudaError_t status = cudaDeviceSynchronize() Error in: file: %s function: %s() line: %d\n", filename, function, line);
     }
     cublas_check_error(status);
 }
@@ -385,14 +373,14 @@ void free_pinned_memory()
 void pre_allocate_pinned_memory(const size_t size)
 {
     const size_t num_of_blocks = size / pinned_block_size + ((size % pinned_block_size) ? 1 : 0);
-    printf("pre_allocate... pinned_ptr = %p \n", pinned_ptr);
+    printf("pre_allocate... pinned_ptr = %p \n", (void *)pinned_ptr);
 
     pthread_mutex_lock(&mutex_pinned);
     if (!pinned_ptr) {
         pinned_ptr = (float **)calloc(num_of_blocks, sizeof(float *));
         if(!pinned_ptr) error("calloc failed in pre_allocate()", DARKNET_LOC);
 
-        printf("pre_allocate: size = %Iu MB, num_of_blocks = %Iu, block_size = %Iu MB \n",
+        printf("pre_allocate: size = %zu MB, num_of_blocks = %zu, block_size = %zu MB \n",
             size / (1024*1024), num_of_blocks, pinned_block_size / (1024 * 1024));
 
         int k;
@@ -402,7 +390,7 @@ void pre_allocate_pinned_memory(const size_t size)
             CHECK_CUDA(status);
             if (!pinned_ptr[k]) error("cudaHostAlloc failed", DARKNET_LOC);
             else {
-                printf(" Allocated %d pinned block \n", pinned_block_size);
+                printf(" Allocated %zu pinned block \n", pinned_block_size);
             }
         }
         pinned_num_of_blocks = num_of_blocks;
@@ -423,7 +411,7 @@ float *cuda_make_array_pinned_preallocated(float *x, size_t n)
     {
         if ((allocation_size + pinned_index) > pinned_block_size) {
             const float filled = (float)100 * pinned_index / pinned_block_size;
-            printf("\n Pinned block_id = %d, filled = %f %% \n", pinned_block_id, filled);
+            printf("\n Pinned block_id = %zu, filled = %f %% \n", pinned_block_id, filled);
             pinned_block_id++;
             pinned_index = 0;
         }
@@ -438,13 +426,13 @@ float *cuda_make_array_pinned_preallocated(float *x, size_t n)
 
     if(!x_cpu) {
         if (allocation_size > pinned_block_size / 2) {
-            printf("Try to allocate new pinned memory, size = %d MB \n", size / (1024 * 1024));
+            printf("Try to allocate new pinned memory, size = %zu MB \n", size / (1024 * 1024));
             cudaError_t status = cudaHostAlloc((void **)&x_cpu, size, cudaHostRegisterMapped);
             if (status != cudaSuccess) fprintf(stderr, " Can't allocate CUDA-pinned memory on CPU-RAM (pre-allocated memory is over too) \n");
             CHECK_CUDA(status);
         }
         else {
-            printf("Try to allocate new pinned BLOCK, size = %d MB \n", size / (1024 * 1024));
+            printf("Try to allocate new pinned BLOCK, size = %zu MB \n", size / (1024 * 1024));
             pinned_num_of_blocks++;
             pinned_block_id = pinned_num_of_blocks - 1;
             pinned_index = 0;
@@ -553,17 +541,17 @@ int *cuda_make_int_array(size_t n)
 
 int *cuda_make_int_array_new_api(int *x, size_t n)
 {
-	int *x_gpu;
-	size_t size = sizeof(int)*n;
-	cudaError_t status = cudaMalloc((void **)&x_gpu, size);
+    int *x_gpu;
+    size_t size = sizeof(int)*n;
+    cudaError_t status = cudaMalloc((void **)&x_gpu, size);
     CHECK_CUDA(status);
-	if (x) {
-		//status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice);
+    if (x) {
+        //status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice);
         cudaError_t status = cudaMemcpyAsync(x_gpu, x, size, cudaMemcpyHostToDevice, get_cuda_stream());
         CHECK_CUDA(status);
-	}
-	if (!x_gpu) error("Cuda malloc failed", DARKNET_LOC);
-	return x_gpu;
+    }
+    if (!x_gpu) error("Cuda malloc failed", DARKNET_LOC);
+    return x_gpu;
 }
 
 void cuda_free(float *x_gpu)
@@ -602,7 +590,7 @@ void cuda_pull_array_async(float *x_gpu, float *x, size_t n)
 {
     size_t size = sizeof(float)*n;
     cudaError_t status = cudaMemcpyAsync(x, x_gpu, size, cudaMemcpyDefault, get_cuda_stream());
-    check_error(status);
+    check_error(status, DARKNET_LOC);
     //cudaStreamSynchronize(get_cuda_stream());
 }
 
diff --git a/src/Detector/darknet/src/dark_cuda.h b/src/Detector/darknet/src/dark_cuda.h
index 9251e8776..ffe3836b6 100644
--- a/src/Detector/darknet/src/dark_cuda.h
+++ b/src/Detector/darknet/src/dark_cuda.h
@@ -25,6 +25,18 @@ extern int gpu_index;
 #include <curand.h>
 #include <cublas_v2.h>
 #include <cuda_runtime_api.h>
+
+#ifdef CUDA_OPENGL_INTEGRATION
+// On Windows, we need to include <windows.h> before
+// including OpenGL headers or else we will get various
+// compiler errors due to missing macros.
+#ifdef _WIN32
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#endif // _WIN32
+
+#include <cuda_gl_interop.h>
+#endif // CUDA_OPENGL_INTEGRATION
 //#include <driver_types.h>
 
 #ifdef CUDNN
@@ -54,11 +66,11 @@ extern int gpu_index;
 #ifdef __cplusplus
 extern "C" {
 #endif // __cplusplus
-    void check_error(cudaError_t status);
-    void check_error_extended(cudaError_t status, const char *file, int line, const char *date_time);
-    void cublas_check_error_extended(cublasStatus_t status, const char *file, int line, const char *date_time);
-#define CHECK_CUDA(X) check_error_extended(X, __FILE__ " : " __FUNCTION__, __LINE__,  __DATE__ " - " __TIME__ );
-#define CHECK_CUBLAS(X) cublas_check_error_extended(X, __FILE__ " : " __FUNCTION__, __LINE__,  __DATE__ " - " __TIME__ );
+    void check_error(cudaError_t status, const char * const filename, const char * const funcname, const int line);
+    void check_error_extended(cudaError_t status, const char * const filename, const char * const funcname, const int line);
+    void cublas_check_error_extended(cublasStatus_t status, const char * const filename, const char * const funcname, const int line);
+#define CHECK_CUDA(X) check_error_extended(X, __FILE__, __func__, __LINE__ );
+#define CHECK_CUBLAS(X) cublas_check_error_extended(X, __FILE__, __func__, __LINE__ );
 
     cublasHandle_t blas_handle();
     void free_pinned_memory();
@@ -68,7 +80,7 @@ extern "C" {
     float *cuda_make_array(float *x, size_t n);
     void **cuda_make_array_pointers(void **x, size_t n);
     int *cuda_make_int_array(size_t n);
-	int *cuda_make_int_array_new_api(int *x, size_t n);
+    int *cuda_make_int_array_new_api(int *x, size_t n);
     void cuda_push_array(float *x_gpu, float *x, size_t n);
     //LIB_API void cuda_pull_array(float *x_gpu, float *x, size_t n);
     //LIB_API void cuda_set_device(int n);
@@ -92,8 +104,8 @@ extern "C" {
 cudnnHandle_t cudnn_handle();
 enum {cudnn_fastest, cudnn_smallest, cudnn_specify};
 
-void cudnn_check_error_extended(cudnnStatus_t status, const char *file, int line, const char *date_time);
-#define CHECK_CUDNN(X) cudnn_check_error_extended(X, __FILE__ " : " __FUNCTION__, __LINE__,  __DATE__ " - " __TIME__ );
+void cudnn_check_error_extended(cudnnStatus_t status, const char * const filename, const char * const function, const int line);
+#define CHECK_CUDNN(X) cudnn_check_error_extended(X, __FILE__, __func__, __LINE__);
 #endif
 
 #ifdef __cplusplus
diff --git a/src/Detector/darknet/src/darknet.c b/src/Detector/darknet/src/darknet.c
index 13ab75f3d..392f2e466 100644
--- a/src/Detector/darknet/src/darknet.c
+++ b/src/Detector/darknet/src/darknet.c
@@ -117,7 +117,7 @@ void operations(char *cfgfile)
             ops += 2l * l.n * l.size*l.size*l.c * l.out_h*l.out_w;
         } else if(l.type == CONNECTED){
             ops += 2l * l.inputs * l.outputs;
-		} else if (l.type == RNN){
+        } else if (l.type == RNN){
             ops += 2l * l.input_layer->inputs * l.input_layer->outputs;
             ops += 2l * l.self_layer->inputs * l.self_layer->outputs;
             ops += 2l * l.output_layer->inputs * l.output_layer->outputs;
@@ -251,7 +251,7 @@ void reset_normalize_net(char *cfgfile, char *weightfile, char *outfile)
             denormalize_connected_layer(*l.ui);
             denormalize_connected_layer(*l.ug);
             denormalize_connected_layer(*l.uo);
-		}
+        }
     }
     save_weights(net, outfile);
 }
@@ -412,7 +412,7 @@ void denormalize_net(char *cfgfile, char *weightfile, char *outfile)
             l.ug->batch_normalize = 0;
             l.uo->batch_normalize = 0;
             net.layers[i].batch_normalize=0;
-		}
+        }
     }
     save_weights(net, outfile);
 }
@@ -432,7 +432,7 @@ void visualize(char *cfgfile, char *weightfile)
 int main(int argc, char **argv)
 {
 #ifdef _DEBUG
-	_CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
+    _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
     printf(" _DEBUG is used \n");
 #endif
 
@@ -440,11 +440,11 @@ int main(int argc, char **argv)
     printf(" DEBUG=1 \n");
 #endif
 
-	int i;
-	for (i = 0; i < argc; ++i) {
-		if (!argv[i]) continue;
-		strip_args(argv[i]);
-	}
+    int i;
+    for (i = 0; i < argc; ++i) {
+        if (!argv[i]) continue;
+        strip_args(argv[i]);
+    }
 
     //test_resize("data/bad.jpg");
     //test_box();
@@ -454,11 +454,6 @@ int main(int argc, char **argv)
         return 0;
     }
     gpu_index = find_int_arg(argc, argv, "-i", 0);
-    if(find_arg(argc, argv, "-nogpu")) {
-        gpu_index = -1;
-        printf("\n Currently Darknet doesn't support -nogpu flag. If you want to use CPU - please compile Darknet with GPU=0 in the Makefile, or compile darknet_no_gpu.sln on Windows.\n");
-        exit(-1);
-    }
 
 #ifndef GPU
     gpu_index = -1;
@@ -493,7 +488,7 @@ int main(int argc, char **argv)
         run_detector(argc, argv);
     } else if (0 == strcmp(argv[1], "detect")){
         float thresh = find_float_arg(argc, argv, "-thresh", .24);
-		int ext_output = find_arg(argc, argv, "-ext_output");
+        int ext_output = find_arg(argc, argv, "-ext_output");
         char *filename = (argc > 4) ? argv[4]: 0;
         test_detector("cfg/coco.data", argv[2], argv[3], filename, thresh, 0.5, 0, ext_output, 0, NULL, 0, 0);
     } else if (0 == strcmp(argv[1], "cifar")){
diff --git a/src/Detector/darknet/src/data.c b/src/Detector/darknet/src/data.c
index 7d2a46599..70e1b09b2 100644
--- a/src/Detector/darknet/src/data.c
+++ b/src/Detector/darknet/src/data.c
@@ -9,8 +9,6 @@
 #include <stdlib.h>
 #include <string.h>
 
-extern int check_mistakes;
-
 #define NUMCHARS 37
 
 pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
@@ -206,10 +204,6 @@ box_label *read_boxes(char *filename, int *n)
         char *new_line = "\n";
         fwrite(new_line, sizeof(char), strlen(new_line), fw);
         fclose(fw);
-        if (check_mistakes) {
-            printf("\n Error in read_boxes() \n");
-            getchar();
-        }
 
         *n = 0;
         return boxes;
@@ -409,7 +403,6 @@ int fill_truth_detection(const char *path, int num_boxes, int truth_size, float
             printf("\n Wrong annotation: class_id = %d. But class_id should be [from 0 to %d], file: %s \n", id, (classes-1), labelpath);
             sprintf(buff, "echo %s \"Wrong annotation: class_id = %d. But class_id should be [from 0 to %d]\" >> bad_label.list", labelpath, id, (classes-1));
             system(buff);
-            if (check_mistakes) getchar();
             ++sub;
             continue;
         }
@@ -424,7 +417,6 @@ int fill_truth_detection(const char *path, int num_boxes, int truth_size, float
             sprintf(buff, "echo %s \"Wrong annotation: x = 0 or y = 0\" >> bad_label.list", labelpath);
             system(buff);
             ++sub;
-            if (check_mistakes) getchar();
             continue;
         }
         if (x <= 0 || x > 1 || y <= 0 || y > 1) {
@@ -432,7 +424,6 @@ int fill_truth_detection(const char *path, int num_boxes, int truth_size, float
             sprintf(buff, "echo %s \"Wrong annotation: x = %f, y = %f\" >> bad_label.list", labelpath, x, y);
             system(buff);
             ++sub;
-            if (check_mistakes) getchar();
             continue;
         }
         if (w > 1) {
@@ -440,14 +431,12 @@ int fill_truth_detection(const char *path, int num_boxes, int truth_size, float
             sprintf(buff, "echo %s \"Wrong annotation: w = %f\" >> bad_label.list", labelpath, w);
             system(buff);
             w = 1;
-            if (check_mistakes) getchar();
         }
         if (h > 1) {
             printf("\n Wrong annotation: h = %f, file: %s \n", h, labelpath);
             sprintf(buff, "echo %s \"Wrong annotation: h = %f\" >> bad_label.list", labelpath, h);
             system(buff);
             h = 1;
-            if (check_mistakes) getchar();
         }
         if (x == 0) x += lowest_w;
         if (y == 0) y += lowest_h;
@@ -1057,14 +1046,11 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
 
     if (use_mixup == 2 || use_mixup == 4) {
         printf("\n cutmix=1 - isn't supported for Detector (use cutmix=1 only for Classifier) \n");
-        if (check_mistakes) getchar();
         if(use_mixup == 2) use_mixup = 0;
         else use_mixup = 3;
     }
     if (use_mixup == 3 && letter_box) {
-        //printf("\n Combination: letter_box=1 & mosaic=1 - isn't supported, use only 1 of these parameters \n");
-        //if (check_mistakes) getchar();
-        //exit(0);
+        error("Combination: letter_box=1 & mosaic=1 - isn't supported, use only 1 of these parameters", DARKNET_LOC);
     }
     if (random_gen() % 2 == 0) use_mixup = 0;
     int i;
@@ -1111,9 +1097,6 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
             if (src == NULL) {
                 printf("\n Error in load_data_detection() - OpenCV \n");
                 fflush(stdout);
-                if (check_mistakes) {
-                    getchar();
-                }
                 continue;
             }
 
@@ -1238,6 +1221,15 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
             int swidth = ow - pleft - pright;
             int sheight = oh - ptop - pbot;
 
+            if (swidth <=  0 || sheight <= 0 || (ow - pleft) <= 0 || (oh - ptop) <= 0 ) {
+                printf("\n WARNING: invalid resize. Resetting swidth: %d , sheight:  %d, pleft: %d, ptop: %d \n", dw, dh, 0 ,0);
+                printf("\n Original values: \n swidth = %d, sheight = %d, pleft = %d, pright = %d, ptop = %d, pbot = %d, ow = %d, oh = %d \n", swidth, sheight, pleft, pright, ptop, pbot, ow, oh);
+                swidth = ow;
+                sheight = oh;
+                pleft = 0;
+                ptop = 0;
+            }
+
             float sx = (float)swidth / ow;
             float sy = (float)sheight / oh;
 
@@ -1384,12 +1376,10 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
 
     //assert(use_mixup < 2);
     if (use_mixup == 2) {
-        printf("\n cutmix=1 - isn't supported for Detector \n");
-        exit(0);
+        error("cutmix=1 - isn't supported for Detector", DARKNET_LOC);
     }
     if (use_mixup == 3 || use_mixup == 4) {
-        printf("\n mosaic=1 - compile Darknet with OpenCV for using mosaic=1 \n");
-        exit(0);
+        error("mosaic=1 - compile Darknet with OpenCV for using mosaic=1", DARKNET_LOC);
     }
     int mixup = use_mixup ? random_gen() % 2 : 0;
     //printf("\n mixup = %d \n", mixup);
@@ -1558,8 +1548,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
                     show_image(sized, buff);
                     wait_until_press_key_cv();
                 }
-                printf("\nYou use flag -show_imgs, so will be saved aug_...jpg images. Press Enter: \n");
-                //getchar();
+                printf("\nYou use flag -show_imgs, so will be saved aug_...jpg images\n");
             }
 
             free_image(orig);
diff --git a/src/Detector/darknet/src/data.h b/src/Detector/darknet/src/data.h
index ecbd0188e..9f12343a0 100644
--- a/src/Detector/darknet/src/data.h
+++ b/src/Detector/darknet/src/data.h
@@ -45,8 +45,8 @@ typedef struct load_args{
     char **labels;
     int h;
     int w;
-	int c; // color depth
-	int out_w;
+    int c; // color depth
+    int out_w;
     int out_h;
     int nh;
     int nw;
@@ -55,7 +55,7 @@ typedef struct load_args{
     int classes;
     int background;
     int scale;
-	int small_object;
+    int small_object;
     float jitter;
     int flip;
     float angle;
diff --git a/src/Detector/darknet/src/demo.c b/src/Detector/darknet/src/demo.c
index 0c4033351..5a01faf36 100644
--- a/src/Detector/darknet/src/demo.c
+++ b/src/Detector/darknet/src/demo.c
@@ -73,7 +73,6 @@ void *fetch_in_thread(void *ptr)
             printf("Stream closed.\n");
             custom_atomic_store_int(&flag_exit, 1);
             custom_atomic_store_int(&run_fetch_in_thread, 0);
-            //exit(EXIT_FAILURE);
             return 0;
         }
         //in_s = resize_image(in, net.w, net.h);
@@ -142,7 +141,7 @@ double get_wall_time()
 
 void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int cam_index, const char *filename, char **names, int classes, int avgframes,
     int frame_skip, char *prefix, char *out_filename, int mjpeg_port, int dontdraw_bbox, int json_port, int dont_show, int ext_output, int letter_box_in, int time_limit_sec, char *http_post_host,
-    int benchmark, int benchmark_layers)
+    int benchmark, int benchmark_layers, char *json_file_output)
 {
     if (avgframes < 1) avgframes = 1;
     avg_frames = avgframes;
@@ -157,6 +156,15 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int
     demo_thresh = thresh;
     demo_ext_output = ext_output;
     demo_json_port = json_port;
+    char *json_buf = NULL;
+    FILE* json_file = NULL;
+
+    if (json_file_output) {
+        json_file = fopen(json_file_output, "wb");
+        char *tmp = "[\n";
+        fwrite(tmp, sizeof(char), strlen(tmp), json_file);
+    }
+
     printf("Demo\n");
     net = parse_network_cfg_custom(cfgfile, 1, 1);    // set batch=1
     if(weightfile){
@@ -201,8 +209,7 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int
 
     if (l.classes != demo_classes) {
         printf("\n Parameters don't match: in cfg-file classes=%d, in data-file classes=%d \n", l.classes, demo_classes);
-        getchar();
-        exit(0);
+        error("Error!", DARKNET_LOC);
     }
 
     flag_exit = 0;
@@ -280,8 +287,7 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int
 
             if (l.embedding_size) set_track_id(local_dets, local_nboxes, demo_thresh, l.sim_thresh, l.track_ciou_norm, l.track_history_size, l.dets_for_track, l.dets_for_show);
 
-            //printf("\033[2J");
-            //printf("\033[1;1H");
+            printf("\033[H\033[J");
             //printf("\nFPS:%.1f\n", fps);
             printf("Objects:\n\n");
 
@@ -291,6 +297,16 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int
                 send_json(local_dets, local_nboxes, l.classes, demo_names, frame_id, demo_json_port, timeout);
             }
 
+            if (json_file_output) {
+                if (json_buf) {
+                    char *tmp = ", \n";
+                    fwrite(tmp, sizeof(char), strlen(tmp), json_file);
+                }
+                json_buf = detection_to_json(local_dets, local_nboxes, l.classes, demo_names, frame_id, NULL);
+                fwrite(json_buf, sizeof(char), strlen(json_buf), json_file);
+                free(json_buf);
+            }
+
             //char *http_post_server = "webhook.site/898bbd9b-0ddd-49cf-b81d-1f56be98d870";
             if (http_post_host && !send_http_post_once) {
                 int timeout = 3;            // 3 seconds
@@ -344,12 +360,12 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int
             }
 
             while (custom_atomic_load_int(&run_detect_in_thread)) {
-                if(avg_fps > 180) this_thread_yield();
+                if(avg_fps > 50) this_thread_yield();
                 else this_thread_sleep_for(thread_wait_ms);   // custom_join(detect_thread, 0);
             }
             if (!benchmark) {
                 while (custom_atomic_load_int(&run_fetch_in_thread)) {
-                    if (avg_fps > 180) this_thread_yield();
+                    if (avg_fps > 50) this_thread_yield();
                     else this_thread_sleep_for(thread_wait_ms);   // custom_join(fetch_thread, 0);
                 }
                 free_image(det_s);
@@ -397,6 +413,11 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int
         printf("output_video_writer closed. \n");
     }
 
+    if (json_file_output) {
+        char *tmp = "\n]";
+        fwrite(tmp, sizeof(char), strlen(tmp), json_file);
+        fclose(json_file);
+    }
     this_thread_sleep_for(thread_wait_ms);
 
     custom_join(detect_thread, 0);
@@ -414,21 +435,14 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int
 
     free_ptrs((void **)names, net.layers[net.n - 1].classes);
 
-    const int nsize = 8;
-    for (j = 0; j < nsize; ++j) {
-        for (i = 32; i < 127; ++i) {
-            free_image(alphabet[j][i]);
-        }
-        free(alphabet[j]);
-    }
-    free(alphabet);
+    free_alphabet(alphabet);
     free_network(net);
     //cudaProfilerStop();
 }
 #else
 void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int cam_index, const char *filename, char **names, int classes, int avgframes,
     int frame_skip, char *prefix, char *out_filename, int mjpeg_port, int dontdraw_bbox, int json_port, int dont_show, int ext_output, int letter_box_in, int time_limit_sec, char *http_post_host,
-    int benchmark, int benchmark_layers)
+    int benchmark, int benchmark_layers, char *json_file_output)
 {
     fprintf(stderr, "Demo needs OpenCV for webcam images.\n");
 }
diff --git a/src/Detector/darknet/src/demo.h b/src/Detector/darknet/src/demo.h
index 380b72fed..15f359dd0 100644
--- a/src/Detector/darknet/src/demo.h
+++ b/src/Detector/darknet/src/demo.h
@@ -6,7 +6,7 @@
 extern "C" {
 #endif
 void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int cam_index, const char *filename, char **names, int classes, int avgframes,
-    int frame_skip, char *prefix, char *out_filename, int mjpeg_port, int dontdraw_bbox, int json_port, int dont_show, int ext_output, int letter_box_in, int time_limit_sec, char *http_post_host, int benchmark, int benchmark_layers);
+    int frame_skip, char *prefix, char *out_filename, int mjpeg_port, int dontdraw_bbox, int json_port, int dont_show, int ext_output, int letter_box_in, int time_limit_sec, char *http_post_host, int benchmark, int benchmark_layers, char *json_file_output);
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/Detector/darknet/src/detection_layer.c b/src/Detector/darknet/src/detection_layer.c
index 3c6528a9b..d3b9af8a4 100644
--- a/src/Detector/darknet/src/detection_layer.c
+++ b/src/Detector/darknet/src/detection_layer.c
@@ -287,29 +287,29 @@ void backward_detection_layer_gpu(detection_layer l, network_state state)
 
 void get_detection_detections(layer l, int w, int h, float thresh, detection *dets)
 {
-	int i, j, n;
-	float *predictions = l.output;
-	//int per_cell = 5*num+classes;
-	for (i = 0; i < l.side*l.side; ++i) {
-		int row = i / l.side;
-		int col = i % l.side;
-		for (n = 0; n < l.n; ++n) {
-			int index = i*l.n + n;
-			int p_index = l.side*l.side*l.classes + i*l.n + n;
-			float scale = predictions[p_index];
-			int box_index = l.side*l.side*(l.classes + l.n) + (i*l.n + n) * 4;
-			box b;
-			b.x = (predictions[box_index + 0] + col) / l.side * w;
-			b.y = (predictions[box_index + 1] + row) / l.side * h;
-			b.w = pow(predictions[box_index + 2], (l.sqrt ? 2 : 1)) * w;
-			b.h = pow(predictions[box_index + 3], (l.sqrt ? 2 : 1)) * h;
-			dets[index].bbox = b;
-			dets[index].objectness = scale;
-			for (j = 0; j < l.classes; ++j) {
-				int class_index = i*l.classes;
-				float prob = scale*predictions[class_index + j];
-				dets[index].prob[j] = (prob > thresh) ? prob : 0;
-			}
-		}
-	}
+    int i, j, n;
+    float *predictions = l.output;
+    //int per_cell = 5*num+classes;
+    for (i = 0; i < l.side*l.side; ++i) {
+        int row = i / l.side;
+        int col = i % l.side;
+        for (n = 0; n < l.n; ++n) {
+            int index = i*l.n + n;
+            int p_index = l.side*l.side*l.classes + i*l.n + n;
+            float scale = predictions[p_index];
+            int box_index = l.side*l.side*(l.classes + l.n) + (i*l.n + n) * 4;
+            box b;
+            b.x = (predictions[box_index + 0] + col) / l.side * w;
+            b.y = (predictions[box_index + 1] + row) / l.side * h;
+            b.w = pow(predictions[box_index + 2], (l.sqrt ? 2 : 1)) * w;
+            b.h = pow(predictions[box_index + 3], (l.sqrt ? 2 : 1)) * h;
+            dets[index].bbox = b;
+            dets[index].objectness = scale;
+            for (j = 0; j < l.classes; ++j) {
+                int class_index = i*l.classes;
+                float prob = scale*predictions[class_index + j];
+                dets[index].prob[j] = (prob > thresh) ? prob : 0;
+            }
+        }
+    }
 }
diff --git a/src/Detector/darknet/src/detector.c b/src/Detector/darknet/src/detector.c
index 73b7bedc1..0fc361429 100644
--- a/src/Detector/darknet/src/detector.c
+++ b/src/Detector/darknet/src/detector.c
@@ -19,24 +19,22 @@ typedef __compar_fn_t comparison_fn_t;
 
 #include "http_stream.h"
 
-int check_mistakes = 0;
-
 static int coco_ids[] = { 1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90 };
 
-void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int dont_show, int calc_map, float thresh, float iou_thresh, int mjpeg_port, int show_imgs, int benchmark_layers, char* chart_path)
+void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int dont_show, int calc_map, float thresh, float iou_thresh, int mjpeg_port, int show_imgs, int benchmark_layers, char* chart_path, int mAP_epochs)
 {
     list *options = read_data_cfg(datacfg);
     char *train_images = option_find_str(options, "train", "data/train.txt");
     char *valid_images = option_find_str(options, "valid", train_images);
     char *backup_directory = option_find_str(options, "backup", "/backup/");
 
+
     network net_map;
     if (calc_map) {
         FILE* valid_file = fopen(valid_images, "r");
         if (!valid_file) {
             printf("\n Error: There is no %s file for mAP calculation!\n Don't use -map flag.\n Or set valid=%s in your %s file. \n", valid_images, train_images, datacfg);
-            getchar();
-            exit(-1);
+            error("Error!", DARKNET_LOC);
         }
         else fclose(valid_file);
 
@@ -55,7 +53,6 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
         if (net_classes != names_size) {
             printf("\n Error: in the file %s number of names %d that isn't equal to classes=%d in the file %s \n",
                 name_list, names_size, net_classes, cfgfile);
-            if (net_classes > names_size) getchar();
         }
         free_ptrs((void**)names, net_map.layers[net_map.n - 1].classes);
     }
@@ -91,13 +88,17 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
 
     const int actual_batch_size = net.batch * net.subdivisions;
     if (actual_batch_size == 1) {
-        printf("\n Error: You set incorrect value batch=1 for Training! You should set batch=64 subdivision=64 \n");
-        getchar();
+        error("Error: You set incorrect value batch=1 for Training! You should set batch=64 subdivision=64", DARKNET_LOC);
     }
     else if (actual_batch_size < 8) {
         printf("\n Warning: You set batch=%d lower than 64! It is recommended to set batch=64 subdivision=64 \n", actual_batch_size);
     }
 
+    int save_after_iterations = option_find_int(options, "saveweights", (net.max_batches < 10000) ? 1000 : 10000 );  // configure when to write weights. Very useful for smaller datasets!
+	int save_last_weights_after = option_find_int(options, "savelast", 100);
+    printf("Weights are saved after: %d iterations. Last weights (*_last.weight) are stored every %d iterations. \n", save_after_iterations, save_last_weights_after );
+
+
     int imgs = net.batch * net.subdivisions * ngpus;
     printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
     data train, buffer;
@@ -298,7 +299,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
         const int iteration = get_current_iteration(net);
         //i = get_current_batch(net);
 
-        int calc_map_for_each = 4 * train_images_num / (net.batch * net.subdivisions);  // calculate mAP for each 4 Epochs
+        int calc_map_for_each = mAP_epochs * train_images_num / (net.batch * net.subdivisions);  // calculate mAP every mAP_epochs
         calc_map_for_each = fmax(calc_map_for_each, 100);
         int next_map_calc = iter_map + calc_map_for_each;
         next_map_calc = fmax(next_map_calc, net.burn_in);
@@ -308,6 +309,14 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
             if (mean_average_precision > 0) printf("\n Last accuracy mAP@%0.2f = %2.2f %%, best = %2.2f %% ", iou_thresh, mean_average_precision * 100, best_map * 100);
         }
 
+        printf("\033[H\033[J");
+        if (mean_average_precision > 0.0) {
+            printf("%d/%d: loss=%0.1f map=%0.2f best=%0.2f hours left=%0.1f\007", iteration, net.max_batches, loss, mean_average_precision, best_map, avg_time);
+        }
+        else {
+            printf("%d/%d: loss=%0.1f hours left=%0.1f\007", iteration, net.max_batches, loss, avg_time);
+        }
+
         if (net.cudnn_half) {
             if (iteration < net.burn_in * 3) fprintf(stderr, "\n Tensor Cores are disabled until the first %d iterations are reached.\n", 3 * net.burn_in);
             else fprintf(stderr, "\n Tensor Cores are used.\n");
@@ -355,7 +364,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
             iter_map = iteration;
             mean_average_precision = validate_detector_map(datacfg, cfgfile, weightfile, thresh, iou_thresh, 0, net.letter_box, &net_map);// &net_combined);
             printf("\n mean_average_precision (mAP@%0.2f) = %f \n", iou_thresh, mean_average_precision);
-            if (mean_average_precision > best_map) {
+            if (mean_average_precision >= best_map) {
                 best_map = mean_average_precision;
                 printf("New best mAP!\n");
                 char buff[256];
@@ -380,10 +389,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
         draw_train_loss(windows_name, img, img_size, avg_loss, max_img_loss, iteration, net.max_batches, mean_average_precision, draw_precision, "mAP%", avg_contrastive_acc / 100, dont_show, mjpeg_port, avg_time);
 #endif    // OPENCV
 
-        //if (i % 1000 == 0 || (i < 1000 && i % 100 == 0)) {
-        //if (i % 100 == 0) {
-        if ((iteration >= (iter_save + 10000) || iteration % 10000 == 0) ||
-            (iteration >= (iter_save + 1000) || iteration % 1000 == 0) && net.max_batches < 10000)
+        if ( (iteration >= (iter_save + save_after_iterations) || iteration % save_after_iterations == 0) )
         {
             iter_save = iteration;
 #ifdef GPU
@@ -394,7 +400,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
             save_weights(net, buff);
         }
 
-        if (iteration >= (iter_save_last + 100) || (iteration % 100 == 0 && iteration > 1)) {
+        if ( (save_after_iterations > save_last_weights_after) && (iteration >= (iter_save_last + save_last_weights_after) || (iteration % save_last_weights_after == 0 && iteration > 1))) {
             iter_save_last = iteration;
 #ifdef GPU
             if (ngpus != 1) sync_nets(nets, ngpus, 0);
@@ -483,7 +489,7 @@ static void print_cocos(FILE *fp, char *image_path, detection *dets, int num_box
             if (dets[i].prob[j] > 0) {
                 char buff[1024];
                 sprintf(buff, "{\"image_id\":%d, \"category_id\":%d, \"bbox\":[%f, %f, %f, %f], \"score\":%f},\n", image_id, coco_ids[j], bx, by, bw, bh, dets[i].prob[j]);
-                fprintf(fp, buff);
+                fprintf(fp, "%s", buff);
                 //printf("%s", buff);
             }
         }
@@ -902,6 +908,7 @@ void validate_detector_recall(char *datacfg, char *cfgfile, char *weightfile)
         }
         //fprintf(stderr, " %s - %s - ", paths[i], labelpath);
         fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals / (i + 1), avg_iou * 100 / total, 100.*correct / total);
+        free(truth);
         free(id);
         free_image(orig);
         free_image(sized);
@@ -962,7 +969,7 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
     if (net.layers[net.n - 1].classes != names_size) {
         printf("\n Error: in the file %s number of names %d that isn't equal to classes=%d in the file %s \n",
             name_list, names_size, net.layers[net.n - 1].classes, cfgfile);
-        getchar();
+        error("Error!", DARKNET_LOC);
     }
     srand(time(0));
     printf("\n calculation mAP (mean average precision)...\n");
@@ -970,13 +977,13 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
     list *plist = get_paths(valid_images);
     char **paths = (char **)list_to_array(plist);
 
+    list *plist_dif = NULL;
     char **paths_dif = NULL;
     if (difficult_valid_images) {
-        list *plist_dif = get_paths(difficult_valid_images);
+        plist_dif = get_paths(difficult_valid_images);
         paths_dif = (char **)list_to_array(plist_dif);
     }
 
-
     layer l = net.layers[net.n - 1];
     int k;
     for (k = 0; k < net.n; ++k) {
@@ -1181,6 +1188,8 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
             //if(errors_in_this_image > 0) fwrite(buff, sizeof(char), strlen(buff), reinforcement_fd);
 
             free_detections(dets, nboxes);
+            free(truth);
+            free(truth_dif);
             free(id);
             free_image(val[t]);
             free_image(val_resized[t]);
@@ -1274,7 +1283,6 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
 
     free(truth_flags);
 
-
     double mean_average_precision = 0;
 
     for (i = 0; i < classes; ++i) {
@@ -1362,7 +1370,14 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
     free(detections);
     free(truth_classes_count);
     free(detection_per_class_count);
-
+    free(paths);
+    free(paths_dif);
+    free_list_contents(plist);
+    free_list(plist);
+    if (plist_dif) {
+        free_list_contents(plist_dif);
+        free_list(plist_dif);
+    }
     free(avg_iou_per_class);
     free(tp_for_thresh_per_class);
     free(fp_for_thresh_per_class);
@@ -1468,7 +1483,6 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int
                 sprintf(buff, "echo \"Wrong label: %s - j = %d, x = %f, y = %f, width = %f, height = %f\" >> bad_label.list",
                     labelpath, j, truth[j].x, truth[j].y, truth[j].w, truth[j].h);
                 system(buff);
-                if (check_mistakes) getchar();
             }
             if (truth[j].id >= classes) {
                 classes = truth[j].id + 1;
@@ -1484,6 +1498,7 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int
             printf("\r loaded \t image: %d \t box: %d", i + 1, number_of_boxes);
         }
         free(buff);
+        free(truth);
     }
     printf("\n all loaded. \n");
     printf("\n calculating k-means++ ...");
@@ -1599,8 +1614,6 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int
     }
     free(rel_width_height_array);
     free(counter_per_class);
-
-    getchar();
 }
 
 
@@ -1624,7 +1637,6 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
     if (net.layers[net.n - 1].classes != names_size) {
         printf("\n Error: in the file %s number of names %d that isn't equal to classes=%d in the file %s \n",
             name_list, names_size, net.layers[net.n - 1].classes, cfgfile);
-        if (net.layers[net.n - 1].classes > names_size) getchar();
     }
     srand(2222222);
     char buff[256];
@@ -1757,17 +1769,7 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
     free_ptrs((void**)names, net.layers[net.n - 1].classes);
     free_list_contents_kvp(options);
     free_list(options);
-
-    int i;
-    const int nsize = 8;
-    for (j = 0; j < nsize; ++j) {
-        for (i = 32; i < 127; ++i) {
-            free_image(alphabet[j][i]);
-        }
-        free(alphabet[j]);
-    }
-    free(alphabet);
-
+    free_alphabet(alphabet);
     free_network(net);
 }
 
@@ -1795,7 +1797,6 @@ void draw_object(char *datacfg, char *cfgfile, char *weightfile, char *filename,
     if (net.layers[net.n - 1].classes != names_size) {
         printf("\n Error: in the file %s number of names %d that isn't equal to classes=%d in the file %s \n",
             name_list, names_size, net.layers[net.n - 1].classes, cfgfile);
-        if (net.layers[net.n - 1].classes > names_size) getchar();
     }
 
     srand(2222222);
@@ -1939,8 +1940,7 @@ void draw_object(char *datacfg, char *cfgfile, char *weightfile, char *filename,
 void draw_object(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, int dont_show, int it_num,
     int letter_box, int benchmark_layers)
 {
-    printf(" ./darknet detector draw ... can't be used without OpenCV and CUDA! \n");
-    getchar();
+    error("darknet detector draw ... can't be used without OpenCV and CUDA", DARKNET_LOC);
 }
 #endif // defined(OPENCV) && defined(GPU)
 
@@ -1955,7 +1955,6 @@ void run_detector(int argc, char **argv)
     int letter_box = find_arg(argc, argv, "-letter_box");
     int calc_map = find_arg(argc, argv, "-map");
     int map_points = find_int_arg(argc, argv, "-points", 0);
-    check_mistakes = find_arg(argc, argv, "-check_mistakes");
     int show_imgs = find_arg(argc, argv, "-show_imgs");
     int mjpeg_port = find_int_arg(argc, argv, "-mjpeg_port", -1);
     int avgframes = find_int_arg(argc, argv, "-avgframes", 3);
@@ -1964,6 +1963,7 @@ void run_detector(int argc, char **argv)
     char *http_post_host = find_char_arg(argc, argv, "-http_post_host", 0);
     int time_limit_sec = find_int_arg(argc, argv, "-time_limit_sec", 0);
     char *out_filename = find_char_arg(argc, argv, "-out_filename", 0);
+    char *json_file_output = find_char_arg(argc, argv, "-json_file_output", 0);
     char *outfile = find_char_arg(argc, argv, "-out", 0);
     char *prefix = find_char_arg(argc, argv, "-prefix", 0);
     float thresh = find_float_arg(argc, argv, "-thresh", .25);    // 0.24
@@ -1979,6 +1979,8 @@ void run_detector(int argc, char **argv)
     int ext_output = find_arg(argc, argv, "-ext_output");
     int save_labels = find_arg(argc, argv, "-save_labels");
     char* chart_path = find_char_arg(argc, argv, "-chart", 0);
+    // While training, decide after how many epochs mAP will be calculated. Default value is 4 which means the mAP will be calculated after each 4 epochs
+    int mAP_epochs = find_int_arg(argc, argv, "-mAP_epochs", 4);
     if (argc < 4) {
         fprintf(stderr, "usage: %s %s [train/test/valid/demo/map] [data] [cfg] [weights (optional)]\n", argv[0], argv[1]);
         return;
@@ -2017,7 +2019,7 @@ void run_detector(int argc, char **argv)
             if (weights[strlen(weights) - 1] == 0x0d) weights[strlen(weights) - 1] = 0;
     char *filename = (argc > 6) ? argv[6] : 0;
     if (0 == strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh, dont_show, ext_output, save_labels, outfile, letter_box, benchmark_layers);
-    else if (0 == strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear, dont_show, calc_map, thresh, iou_thresh, mjpeg_port, show_imgs, benchmark_layers, chart_path);
+    else if (0 == strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear, dont_show, calc_map, thresh, iou_thresh, mjpeg_port, show_imgs, benchmark_layers, chart_path, mAP_epochs);
     else if (0 == strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights, outfile);
     else if (0 == strcmp(argv[2], "recall")) validate_detector_recall(datacfg, cfg, weights);
     else if (0 == strcmp(argv[2], "map")) validate_detector_map(datacfg, cfg, weights, thresh, iou_thresh, map_points, letter_box, NULL);
@@ -2035,7 +2037,7 @@ void run_detector(int argc, char **argv)
             if (strlen(filename) > 0)
                 if (filename[strlen(filename) - 1] == 0x0d) filename[strlen(filename) - 1] = 0;
         demo(cfg, weights, thresh, hier_thresh, cam_index, filename, names, classes, avgframes, frame_skip, prefix, out_filename,
-            mjpeg_port, dontdraw_bbox, json_port, dont_show, ext_output, letter_box, time_limit_sec, http_post_host, benchmark, benchmark_layers);
+            mjpeg_port, dontdraw_bbox, json_port, dont_show, ext_output, letter_box, time_limit_sec, http_post_host, benchmark, benchmark_layers, json_file_output);
 
         free_list_contents_kvp(options);
         free_list(options);
diff --git a/src/Detector/darknet/src/dice.c b/src/Detector/darknet/src/dice.c
index 8a0393a8b..bb5d64379 100644
--- a/src/Detector/darknet/src/dice.c
+++ b/src/Detector/darknet/src/dice.c
@@ -33,7 +33,7 @@ void train_dice(char *cfgfile, char *weightfile)
         float loss = train_network(net, train);
         if(avg_loss == -1) avg_loss = loss;
         avg_loss = avg_loss*.9 + loss*.1;
-        printf("%d: %f, %f avg, %lf seconds, %ld images\n", i, loss, avg_loss, sec(clock()-time), *net.seen);
+        printf("%d: %f, %f avg, %lf seconds, %" PRIu64 " images\n", i, loss, avg_loss, sec(clock()-time), *net.seen);
         free_data(train);
         if((i % 100) == 0) net.learning_rate *= .1;
         if(i%100==0){
diff --git a/src/Detector/darknet/src/dropout_layer.c b/src/Detector/darknet/src/dropout_layer.c
index 3cc73ed24..744c708a4 100644
--- a/src/Detector/darknet/src/dropout_layer.c
+++ b/src/Detector/darknet/src/dropout_layer.c
@@ -19,7 +19,7 @@ dropout_layer make_dropout_layer(int batch, int inputs, float probability, int d
 
         if (l.w <= 0 || l.h <= 0 || l.c <= 0) {
             printf(" Error: DropBlock - there must be positive values for: l.w=%d, l.h=%d, l.c=%d \n", l.w, l.h, l.c);
-            exit(0);
+            error("Error!", DARKNET_LOC);
         }
     }
     l.inputs = inputs;
diff --git a/src/Detector/darknet/src/dropout_layer_kernels.cu b/src/Detector/darknet/src/dropout_layer_kernels.cu
index 05cde5949..3ed7a186b 100644
--- a/src/Detector/darknet/src/dropout_layer_kernels.cu
+++ b/src/Detector/darknet/src/dropout_layer_kernels.cu
@@ -165,11 +165,11 @@ void forward_dropout_layer_gpu(dropout_layer l, network_state state)
         //fill_ongpu(l.outputs * l.batch, 1, state.input, 1); // remove!!!
 
         int num_blocks = l.batch * l.c;
-        dropblock_fast_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> > (l.rand_gpu, block_prob, l.w, l.h, l.w*l.h, l.c, l.batch, block_size, l.drop_blocks_scale_gpu, state.input);
+        dropblock_fast_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>> (l.rand_gpu, block_prob, l.w, l.h, l.w*l.h, l.c, l.batch, block_size, l.drop_blocks_scale_gpu, state.input);
         CHECK_CUDA(cudaPeekAtLastError());
 
         num_blocks = get_number_of_blocks(l.batch, BLOCK);
-        set_scales_dropblock_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> > (l.drop_blocks_scale_gpu, block_size, block_size, l.outputs, l.batch);
+        set_scales_dropblock_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>> (l.drop_blocks_scale_gpu, block_size, block_size, l.outputs, l.batch);
         CHECK_CUDA(cudaPeekAtLastError());
 
         /*
@@ -205,7 +205,7 @@ void forward_dropout_layer_gpu(dropout_layer l, network_state state)
         */
 
         num_blocks = get_number_of_blocks(l.outputs * l.batch, BLOCK);
-        scale_dropblock_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> > (state.input, l.outputs * l.batch, l.outputs, l.drop_blocks_scale_gpu);
+        scale_dropblock_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>> (state.input, l.outputs * l.batch, l.outputs, l.drop_blocks_scale_gpu);
         CHECK_CUDA(cudaPeekAtLastError());
 
     }
@@ -221,7 +221,7 @@ void forward_dropout_layer_gpu(dropout_layer l, network_state state)
         cuda_push_array(layer.rand_gpu, layer.rand, size);
         */
 
-        yoloswag420blazeit360noscope << <cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >> > (state.input, size, l.rand_gpu, l.probability, l.scale);
+        yoloswag420blazeit360noscope <<<cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >>> (state.input, size, l.rand_gpu, l.probability, l.scale);
         CHECK_CUDA(cudaPeekAtLastError());
     }
 }
@@ -264,10 +264,10 @@ void backward_dropout_layer_gpu(dropout_layer l, network_state state)
         //fill_ongpu(l.outputs * l.batch, 1, state.delta, 1); // remove!!!
 
         int num_blocks = get_number_of_blocks(l.outputs * l.batch, BLOCK);
-        backward_dropblock_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(l.rand_gpu, state.delta, l.outputs * l.batch);
+        backward_dropblock_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>>(l.rand_gpu, state.delta, l.outputs * l.batch);
         CHECK_CUDA(cudaPeekAtLastError());
 
-        scale_dropblock_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> > (state.delta, l.outputs * l.batch, l.outputs, l.drop_blocks_scale_gpu);
+        scale_dropblock_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>> (state.delta, l.outputs * l.batch, l.outputs, l.drop_blocks_scale_gpu);
         CHECK_CUDA(cudaPeekAtLastError());
 
         /*
@@ -305,7 +305,7 @@ void backward_dropout_layer_gpu(dropout_layer l, network_state state)
     }
     // dropout
     else {
-        yoloswag420blazeit360noscope << <cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >> > (state.delta, size, l.rand_gpu, l.probability, l.scale);
+        yoloswag420blazeit360noscope <<<cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >>> (state.delta, size, l.rand_gpu, l.probability, l.scale);
         CHECK_CUDA(cudaPeekAtLastError());
     }
 }
diff --git a/src/Detector/darknet/src/gaussian_yolo_layer.c b/src/Detector/darknet/src/gaussian_yolo_layer.c
index bd99a89dc..f94f4a6ce 100644
--- a/src/Detector/darknet/src/gaussian_yolo_layer.c
+++ b/src/Detector/darknet/src/gaussian_yolo_layer.c
@@ -20,8 +20,6 @@
 #define M_PI 3.141592
 #endif
 
-extern int check_mistakes;
-
 layer make_gaussian_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes, int max_boxes)
 {
     int i;
@@ -51,7 +49,7 @@ layer make_gaussian_yolo_layer(int batch, int w, int h, int n, int total, int *m
     l.outputs = h*w*n*(classes + 8 + 1);
     l.inputs = l.outputs;
     l.max_boxes = max_boxes;
-	l.truth_size = 4 + 2;
+    l.truth_size = 4 + 2;
     l.truths = l.max_boxes*l.truth_size;
     l.delta = (float*)calloc(batch*l.outputs, sizeof(float));
     l.output = (float*)calloc(batch*l.outputs, sizeof(float));
@@ -470,7 +468,6 @@ void forward_gaussian_yolo_layer(const layer l, network_state state)
                         if (class_id >= l.classes) {
                             printf("\n Warning: in txt-labels class_id=%d >= classes=%d in cfg-file. In txt-labels class_id should be [from 0 to %d] \n", class_id, l.classes, l.classes - 1);
                             printf(" truth.x = %f, truth.y = %f, truth.w = %f, truth.h = %f, class_id = %d \n", truth.x, truth.y, truth.w, truth.h, class_id);
-                            if (check_mistakes) getchar();
                             continue; // if label contains class_id more than number of classes in the cfg-file
                         }
                         if(!truth.x) break;
diff --git a/src/Detector/darknet/src/gemm.c b/src/Detector/darknet/src/gemm.c
index 5f5c9689c..256061bbe 100644
--- a/src/Detector/darknet/src/gemm.c
+++ b/src/Detector/darknet/src/gemm.c
@@ -297,8 +297,6 @@ void gemm_nn_custom_bin_mean(int M, int N, int K, float ALPHA_UNUSED,
     }
 
     free(count_arr);
-
-    //getchar();
 }
 */
 
diff --git a/src/Detector/darknet/src/getopt.c b/src/Detector/darknet/src/getopt.c
index 45d2b8e63..61aa096ab 100644
--- a/src/Detector/darknet/src/getopt.c
+++ b/src/Detector/darknet/src/getopt.c
@@ -64,8 +64,8 @@ permute_args(int panonopt_start, int panonopt_end, int opt_end,
   char* swap;
 
   /*
-	 * compute lengths of blocks and number and size of cycles
-	 */
+     * compute lengths of blocks and number and size of cycles
+     */
   nnonopts = panonopt_end - panonopt_start;
   nopts = opt_end - panonopt_end;
   ncycle = gcd(nnonopts, nopts);
@@ -91,7 +91,7 @@ permute_args(int panonopt_start, int panonopt_end, int opt_end,
 #ifdef REPLACE_GETOPT
 /*
  * getopt --
- *	Parse argc/argv argument vector.
+ *    Parse argc/argv argument vector.
  *
  * [eventually this will replace the BSD getopt]
  */
@@ -99,13 +99,13 @@ int getopt(int nargc, char* const* nargv, const char* options)
 {
 
   /*
-	 * We don't pass FLAG_PERMUTE to getopt_internal() since
-	 * the BSD getopt(3) (unlike GNU) has never done this.
-	 *
-	 * Furthermore, since many privileged programs call getopt()
-	 * before dropping privileges it makes sense to keep things
-	 * as simple (and bug-free) as possible.
-	 */
+     * We don't pass FLAG_PERMUTE to getopt_internal() since
+     * the BSD getopt(3) (unlike GNU) has never done this.
+     *
+     * Furthermore, since many privileged programs call getopt()
+     * before dropping privileges it makes sense to keep things
+     * as simple (and bug-free) as possible.
+     */
   return (getopt_internal(nargc, nargv, options, NULL, NULL, 0));
 }
 #endif /* REPLACE_GETOPT */
@@ -129,24 +129,24 @@ int getopt(int nargc, char* const* nargv, const char* options)
 extern "C" {
 #endif
 
-struct option /* specification for a long form option...	*/
+struct option /* specification for a long form option...    */
 {
   const char* name; /* option name, without leading hyphens */
-  int has_arg; /* does it take an argument?		*/
-  int* flag; /* where to save its status, or NULL	*/
-  int val; /* its associated status value		*/
+  int has_arg; /* does it take an argument?        */
+  int* flag; /* where to save its status, or NULL    */
+  int val; /* its associated status value        */
 };
 
-enum /* permitted values for its `has_arg' field...	*/
+enum /* permitted values for its `has_arg' field...    */
 {
-  no_argument = 0, /* option never takes an argument	*/
-  required_argument, /* option always requires an argument	*/
-  optional_argument /* option may take an argument		*/
+  no_argument = 0, /* option never takes an argument    */
+  required_argument, /* option always requires an argument    */
+  optional_argument /* option may take an argument        */
 };
 
 /*
  * parse_long_options --
- *	Parse long options in argc/argv argument vector.
+ *    Parse long options in argc/argv argument vector.
  * Returns -1 if short_too is set and the option does not match long_options.
  */
 static int
@@ -186,9 +186,9 @@ parse_long_options(char* const* nargv, const char* options,
       break;
     }
     /*
-		 * If this is a known short option, don't allow
-		 * a partial match of a single character.
-		 */
+         * If this is a known short option, don't allow
+         * a partial match of a single character.
+         */
     if (short_too && current_argv_len == 1)
       continue;
 
@@ -212,8 +212,8 @@ parse_long_options(char* const* nargv, const char* options,
         warnx(noarg, (int)current_argv_len,
             current_argv);
       /*
-			 * XXX: GNU sets optopt to val regardless of flag
-			 */
+             * XXX: GNU sets optopt to val regardless of flag
+             */
       if (long_options[match].flag == NULL)
         optopt = long_options[match].val;
       else
@@ -225,23 +225,23 @@ parse_long_options(char* const* nargv, const char* options,
         optarg = has_equal;
       else if (long_options[match].has_arg == required_argument) {
         /*
-				 * optional argument doesn't use next nargv
-				 */
+                 * optional argument doesn't use next nargv
+                 */
         optarg = nargv[optind++];
       }
     }
     if ((long_options[match].has_arg == required_argument)
         && (optarg == NULL)) {
       /*
-			 * Missing argument; leading ':' indicates no error
-			 * should be generated.
-			 */
+             * Missing argument; leading ':' indicates no error
+             * should be generated.
+             */
       if (PRINT_ERROR)
         warnx(recargstring,
             current_argv);
       /*
-			 * XXX: GNU sets optopt to val regardless of flag
-			 */
+             * XXX: GNU sets optopt to val regardless of flag
+             */
       if (long_options[match].flag == NULL)
         optopt = long_options[match].val;
       else
@@ -271,7 +271,7 @@ parse_long_options(char* const* nargv, const char* options,
 
 /*
  * getopt_internal --
- *	Parse argc/argv argument vector.  Called by user level routines.
+ *    Parse argc/argv argument vector.  Called by user level routines.
  */
 static int
 getopt_internal(int nargc, char* const* nargv, const char* options,
@@ -285,19 +285,19 @@ getopt_internal(int nargc, char* const* nargv, const char* options,
     return (-1);
 
   /*
-	 * XXX Some GNU programs (like cvs) set optind to 0 instead of
-	 * XXX using optreset.  Work around this braindamage.
-	 */
+     * XXX Some GNU programs (like cvs) set optind to 0 instead of
+     * XXX using optreset.  Work around this braindamage.
+     */
   if (optind == 0)
     optind = optreset = 1;
 
   /*
-	 * Disable GNU extensions if POSIXLY_CORRECT is set or options
-	 * string begins with a '+'.
-	 *
-	 * CV, 2009-12-14: Check POSIXLY_CORRECT anew if optind == 0 or
-	 *                 optreset != 0 for GNU compatibility.
-	 */
+     * Disable GNU extensions if POSIXLY_CORRECT is set or options
+     * string begins with a '+'.
+     *
+     * CV, 2009-12-14: Check POSIXLY_CORRECT anew if optind == 0 or
+     *                 optreset != 0 for GNU compatibility.
+     */
   if (posixly_correct == -1 || optreset != 0)
     posixly_correct = (getenv("POSIXLY_CORRECT") != NULL);
   if (*options == '-')
@@ -322,9 +322,9 @@ getopt_internal(int nargc, char* const* nargv, const char* options,
         optind -= nonopt_end - nonopt_start;
       } else if (nonopt_start != -1) {
         /*
-				 * If we skipped non-options, set optind
-				 * to the first of them.
-				 */
+                 * If we skipped non-options, set optind
+                 * to the first of them.
+                 */
         optind = nonopt_start;
       }
       nonopt_start = nonopt_end = -1;
@@ -334,17 +334,17 @@ getopt_internal(int nargc, char* const* nargv, const char* options,
       place = EMSG; /* found non-option */
       if (flags & FLAG_ALLARGS) {
         /*
-				 * GNU extension:
-				 * return non-option as argument to option 1
-				 */
+                 * GNU extension:
+                 * return non-option as argument to option 1
+                 */
         optarg = nargv[optind++];
         return (INORDER);
       }
       if (!(flags & FLAG_PERMUTE)) {
         /*
-				 * If no permutation wanted, stop parsing
-				 * at first non-option.
-				 */
+                 * If no permutation wanted, stop parsing
+                 * at first non-option.
+                 */
         return (-1);
       }
       /* do permutation */
@@ -364,15 +364,15 @@ getopt_internal(int nargc, char* const* nargv, const char* options,
       nonopt_end = optind;
 
     /*
-		 * If we have "-" do nothing, if "--" we are done.
-		 */
+         * If we have "-" do nothing, if "--" we are done.
+         */
     if (place[1] != '\0' && *++place == '-' && place[1] == '\0') {
       optind++;
       place = EMSG;
       /*
-			 * We found an option (--), so if we skipped
-			 * non-options, we have to permute.
-			 */
+             * We found an option (--), so if we skipped
+             * non-options, we have to permute.
+             */
       if (nonopt_end != -1) {
         permute_args(nonopt_start, nonopt_end,
             optind, nargv);
@@ -384,11 +384,11 @@ getopt_internal(int nargc, char* const* nargv, const char* options,
   }
 
   /*
-	 * Check long options if:
-	 *  1) we were passed some
-	 *  2) the arg is not just "-"
-	 *  3) either the arg starts with -- we are getopt_long_only()
-	 */
+     * Check long options if:
+     *  1) we were passed some
+     *  2) the arg is not just "-"
+     *  3) either the arg starts with -- we are getopt_long_only()
+     */
   if (long_options != NULL && place != nargv[optind] && (*place == '-' || (flags & FLAG_LONGONLY))) {
     short_too = 0;
     if (*place == '-')
@@ -406,10 +406,10 @@ getopt_internal(int nargc, char* const* nargv, const char* options,
 
   if ((optchar = (int)*place++) == (int)':' || (optchar == (int)'-' && *place != '\0') || (oli = (char*)strchr(options, optchar)) == NULL) {
     /*
-		 * If the user specified "-" and  '-' isn't listed in
-		 * options, return -1 (non-option) as per POSIX.
-		 * Otherwise, it is an unknown option character (or ':').
-		 */
+         * If the user specified "-" and  '-' isn't listed in
+         * options, return -1 (non-option) as per POSIX.
+         * Otherwise, it is an unknown option character (or ':').
+         */
     if (optchar == (int)'-' && *place == '\0')
       return (-1);
     if (!*place)
@@ -462,7 +462,7 @@ getopt_internal(int nargc, char* const* nargv, const char* options,
 
 /*
  * getopt_long --
- *	Parse argc/argv argument vector.
+ *    Parse argc/argv argument vector.
  */
 int getopt_long(int nargc, char* const* nargv, const char* options,
     const struct option* long_options, int* idx)
@@ -474,7 +474,7 @@ int getopt_long(int nargc, char* const* nargv, const char* options,
 
 /*
  * getopt_long_only --
- *	Parse argc/argv argument vector.
+ *    Parse argc/argv argument vector.
  */
 int getopt_long_only(int nargc, char* const* nargv, const char* options,
     const struct option* long_options, int* idx)
diff --git a/src/Detector/darknet/src/getopt.h b/src/Detector/darknet/src/getopt.h
index 8266c734e..a9f913d4f 100644
--- a/src/Detector/darknet/src/getopt.h
+++ b/src/Detector/darknet/src/getopt.h
@@ -75,12 +75,12 @@ extern "C" {
 
 #define REPLACE_GETOPT /* use this getopt as the system getopt(3) */
 
-//extern int optind;		/* index of first non-option in argv      */
-//extern int optopt;		/* single option character, as parsed     */
-//extern int opterr;		/* flag to enable built-in diagnostics... */
-//				/* (user may set to zero, to suppress)    */
+//extern int optind;        /* index of first non-option in argv      */
+//extern int optopt;        /* single option character, as parsed     */
+//extern int opterr;        /* flag to enable built-in diagnostics... */
+//                /* (user may set to zero, to suppress)    */
 //
-//extern char *optarg;		/* pointer to argument of current option  */
+//extern char *optarg;        /* pointer to argument of current option  */
 
 #define PRINT_ERROR ((opterr) && (*options != ':'))
 
@@ -145,7 +145,7 @@ static void permute_args(int panonopt_start, int panonopt_end, int opt_end, char
 #ifdef REPLACE_GETOPT
 /*
  * getopt --
- *	Parse argc/argv argument vector.
+ *    Parse argc/argv argument vector.
  *
  * [eventually this will replace the BSD getopt]
  */
@@ -187,26 +187,26 @@ extern "C" {
 
 /*
  * parse_long_options --
- *	Parse long options in argc/argv argument vector.
+ *    Parse long options in argc/argv argument vector.
  * Returns -1 if short_too is set and the option does not match long_options.
  */
 /* static int parse_long_options(char* const* nargv, const char* options, const struct option* long_options, int* idx, int short_too); */
 
 /*
  * getopt_internal --
- *	Parse argc/argv argument vector.  Called by user level routines.
+ *    Parse argc/argv argument vector.  Called by user level routines.
  */
 /* static int getopt_internal(int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx, int flags); */
 
 /*
  * getopt_long --
- *	Parse argc/argv argument vector.
+ *    Parse argc/argv argument vector.
  */
 int getopt_long(int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx);
 
 /*
  * getopt_long_only --
- *	Parse argc/argv argument vector.
+ *    Parse argc/argv argument vector.
  */
 int getopt_long_only(int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx);
 
diff --git a/src/Detector/darknet/src/go.c b/src/Detector/darknet/src/go.c
index 1c7a1a083..5107125e7 100644
--- a/src/Detector/darknet/src/go.c
+++ b/src/Detector/darknet/src/go.c
@@ -142,7 +142,7 @@ void train_go(char *cfgfile, char *weightfile)
         float loss = train_network_datum(net, board, move) / net.batch;
         if(avg_loss == -1) avg_loss = loss;
         avg_loss = avg_loss*.95 + loss*.05;
-        printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
+        printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %" PRIu64 " images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
         if(*net.seen/N > epoch){
             epoch = *net.seen/N;
             char buff[256];
@@ -580,9 +580,9 @@ void engine_go(char *filename, char *weightfile, int multi)
                 fprintf(f, "final_status_list dead\n");
                 fclose(f);
 #ifdef _WIN32
-				FILE *p = _popen("./gnugo --mode gtp < game.txt", "r");
+                FILE *p = _popen("./gnugo --mode gtp < game.txt", "r");
 #else
-				FILE *p = popen("./gnugo --mode gtp < game.txt", "r");
+                FILE *p = popen("./gnugo --mode gtp < game.txt", "r");
 #endif
                 for(i = 0; i < count; ++i){
                     free(fgetl(p));
@@ -721,9 +721,9 @@ float score_game(float *board)
     fprintf(f, "final_score\n");
     fclose(f);
 #ifdef _WIN32
-	FILE *p = _popen("./gnugo --mode gtp < game.txt", "r");
+    FILE *p = _popen("./gnugo --mode gtp < game.txt", "r");
 #else
-	FILE *p = popen("./gnugo --mode gtp < game.txt", "r");
+    FILE *p = popen("./gnugo --mode gtp < game.txt", "r");
 #endif
     for(i = 0; i < count; ++i){
         free(fgetl(p));
@@ -740,9 +740,9 @@ float score_game(float *board)
     }
     if(player == 'W') score = -score;
 #ifdef _WIN32
-	_pclose(p);
+    _pclose(p);
 #else
-	pclose(p);
+    pclose(p);
 #endif
     return score;
 }
diff --git a/src/Detector/darknet/src/http_stream.cpp b/src/Detector/darknet/src/http_stream.cpp
index 72cc1c6e9..cac00a09d 100644
--- a/src/Detector/darknet/src/http_stream.cpp
+++ b/src/Detector/darknet/src/http_stream.cpp
@@ -508,7 +508,7 @@ class MJPG_sender
                 }
 
                 char head[400];
-                sprintf(head, "--mjpegstream\r\nContent-Type: image/jpeg\r\nContent-Length: %zu\r\n\r\n", outlen);
+                sprintf(head, "--mjpegstream\r\nContent-Type: image/jpeg\r\nContent-Length: %d\r\n\r\n", outlen);
                 _write(s, head, 0);
                 int n = _write(s, (char*)(&outbuf[0]), outlen);
                 cerr << "known client: " << s << ", sent = " << n << ", must be sent outlen = " << outlen << endl;
@@ -782,8 +782,7 @@ int check_prob(detection det, float thresh)
 int check_classes_id(detection det1, detection det2, float thresh)
 {
     if (det1.classes != det2.classes) {
-        printf(" Error: det1.classes != det2.classes \n");
-        getchar();
+        error("Error: det1.classes != det2.classes", DARKNET_LOC);
     }
 
     int det1_id = -1;
@@ -832,7 +831,7 @@ float *make_float_array(float* src, size_t size)
 }
 
 struct detection_t : detection {
-    int det_count = 0;
+    int det_count;
     detection_t(detection det) : detection(det), det_count(0)
     {
         if (embeddings) embeddings = make_float_array(det.embeddings, embedding_size);
@@ -840,7 +839,7 @@ struct detection_t : detection {
         if (uc) uc = make_float_array(det.uc, 4);
     }
 
-    detection_t(detection_t const& det) : detection(det), det_count(0)
+    detection_t(detection_t const& det) : detection(det)
     {
         if (embeddings) embeddings = make_float_array(det.embeddings, embedding_size);
         if (prob) prob = make_float_array(det.prob, classes);
diff --git a/src/Detector/darknet/src/im2col_kernels.cu b/src/Detector/darknet/src/im2col_kernels.cu
index f924b3e5c..ac7ccc84c 100644
--- a/src/Detector/darknet/src/im2col_kernels.cu
+++ b/src/Detector/darknet/src/im2col_kernels.cu
@@ -206,8 +206,8 @@ void im2col_align_ongpu(float *im,
     int height_col = (height + 2 * pad - ksize) / stride + 1;
     int width_col = (width + 2 * pad - ksize) / stride + 1;
     int num_kernels = channels * height_col * width_col;
-    im2col_align_gpu_kernel << <(num_kernels + BLOCK - 1) / BLOCK,
-        BLOCK, 0, get_cuda_stream() >> >(
+    im2col_align_gpu_kernel <<<(num_kernels + BLOCK - 1) / BLOCK,
+        BLOCK, 0, get_cuda_stream() >>>(
             num_kernels, im, height, width, ksize, pad,
             stride, height_col,
             width_col, data_col, bit_align);
@@ -334,9 +334,9 @@ void im2col_align_bin_ongpu(float *im,
     int num_kernels = channels * ksize * ksize;
     int num_blocks = num_kernels / BLOCK + 1;
 
-    //im2col_align_bin_gpu_kernel << <(num_kernels + BLOCK - 1) / BLOCK,
-    im2col_align_bin_gpu_kernel << <num_blocks,
-        BLOCK, 0, get_cuda_stream() >> >(
+    //im2col_align_bin_gpu_kernel <<<(num_kernels + BLOCK - 1) / BLOCK,
+    im2col_align_bin_gpu_kernel <<<num_blocks,
+        BLOCK, 0, get_cuda_stream() >>>(
             num_kernels, im, height, width, ksize, channels, pad,
             stride, height_col,
             width_col, data_col, bit_align);
@@ -500,7 +500,7 @@ __device__ void transpose8rS32_reversed_diagonale(unsigned char* A, unsigned cha
     B[7 * n] = reverse_byte_CUDA(x >> 24);  B[6 * n] = reverse_byte_CUDA(x >> 16);  B[5 * n] = reverse_byte_CUDA(x >> 8);  B[4 * n] = reverse_byte_CUDA(x);
     B[3 * n] = reverse_byte_CUDA(y >> 24);  B[2 * n] = reverse_byte_CUDA(y >> 16);  B[1 * n] = reverse_byte_CUDA(y >> 8);  B[0 * n] = reverse_byte_CUDA(y);
 
-    //__device__ ​ unsigned int 	__brev(unsigned int  x)
+    //__device__ ​ unsigned int     __brev(unsigned int  x)
     //Reverse the bit order of a 32 bit unsigned integer.
     // https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH__INTRINSIC__INT.html
 }
@@ -648,8 +648,8 @@ void transpose_bin_gpu(unsigned char *A, unsigned char *B, const int n, const in
     int size32 = n*m / (32*32) + 1;
     //const int num_blocks = size / BLOCK + 1;
     const int num_blocks32 = size32 / BLOCK_TRANSPOSE32 + 1;
-    transpose_bin_gpu_kernel_32 << <num_blocks32, BLOCK_TRANSPOSE32, 0, get_cuda_stream() >> >((uint32_t *)A, (uint32_t *)B, n, m, lda, ldb, block_size);
-    //transpose_bin_gpu_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(A, B, n, m, lda, ldb, block_size);
+    transpose_bin_gpu_kernel_32 <<<num_blocks32, BLOCK_TRANSPOSE32, 0, get_cuda_stream() >>>((uint32_t *)A, (uint32_t *)B, n, m, lda, ldb, block_size);
+    //transpose_bin_gpu_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>>(A, B, n, m, lda, ldb, block_size);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 // --------------------------------
@@ -676,7 +676,7 @@ void transpose_uint32_gpu(uint32_t *src, uint32_t *dst, int src_h, int src_w, in
 {
     int size = src_w * src_h;
     const int num_blocks = size / BLOCK + 1;
-    transpose_uint32_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(src, dst, src_h, src_w, src_align, dst_align);
+    transpose_uint32_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>>(src, dst, src_h, src_w, src_align, dst_align);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 // --------------------------------
@@ -742,7 +742,7 @@ void transpose_uint32_gpu_2(uint32_t *src, uint32_t *dst, int src_h, int src_w,
 
     int size = src_w_align * src_h_align;
     int num_blocks = size / TRANS_BLOCK;
-    transpose_uint32_kernel_2 << <num_blocks, TRANS_BLOCK, 0, get_cuda_stream() >> >(src, dst, src_h, src_w, src_align, dst_align);
+    transpose_uint32_kernel_2 <<<num_blocks, TRANS_BLOCK, 0, get_cuda_stream() >>>(src, dst, src_h, src_w, src_align, dst_align);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 // --------------------------------
@@ -780,7 +780,7 @@ void repack_input_gpu(float *input, float *re_packed_input, int w, int h, int c)
 {
     int size = w * h * c;
     const int num_blocks = size / BLOCK + 1;
-    repack_input_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(input, re_packed_input, w, h, c);
+    repack_input_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>>(input, re_packed_input, w, h, c);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 // --------------------------------
@@ -820,7 +820,7 @@ void repack_input_gpu_2(float *input, float *re_packed_input, int w, int h, int
 {
     int size = w * h * c;
     const int num_blocks = size / BLOCK + 1;
-    repack_input_kernel_2 << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(input, re_packed_input, w, h, c);
+    repack_input_kernel_2 <<<num_blocks, BLOCK, 0, get_cuda_stream() >>>(input, re_packed_input, w, h, c);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 // --------------------------------
@@ -870,7 +870,7 @@ void repack_input_gpu_bin(float *input, uint32_t *re_packed_input_bin, int w, in
     const int block_size = BLOCK;
     const int num_blocks = get_number_of_blocks(size, block_size);
     //printf("\n num_blocks = %d, num_blocks/32 = %d,  block_size = %d \n", num_blocks, num_blocks / 32, block_size);
-    repack_input_kernel_bin << <num_blocks, block_size, 0, get_cuda_stream() >> >(input, re_packed_input_bin, w, h, c);
+    repack_input_kernel_bin <<<num_blocks, block_size, 0, get_cuda_stream() >>>(input, re_packed_input_bin, w, h, c);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
@@ -921,7 +921,7 @@ void repack_input_gpu_bin(float *input, uint32_t *re_packed_input_bin, int w, in
     const int block_size = 256;// 128;
     const int num_blocks = get_number_of_blocks(size, block_size);
     printf("\n num_blocks = %d, num_blocks/32 = %d,  block_size = %d \n", num_blocks, num_blocks/32, block_size);
-    repack_input_kernel_bin << <num_blocks, block_size, 0, get_cuda_stream() >> >(input, re_packed_input_bin, w, h, c);
+    repack_input_kernel_bin <<<num_blocks, block_size, 0, get_cuda_stream() >>>(input, re_packed_input_bin, w, h, c);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 */
@@ -1351,17 +1351,17 @@ __global__ void gemm_nn_custom_bin_mean_transposed_tensor_kernel(int M, int N, i
 
                     // wmma::bmma_sync(c_frag, a_frag, b_frag, c_frag);
                     int32_t c_val[8];  // 8 x 32 threads = 256
-                    #pragma UNROLL
+                    #pragma unroll
                     for (int local_j = 0; local_j < 8; ++local_j)
                     {
                         uint32_t b_val_cur = __shfl_custom(b_val, local_j * 4 + k_d);
                         c_val[local_j] = __popc(xor_int32(a_val, b_val_cur));
                     }
 
-                    #pragma UNROLL
+                    #pragma unroll
                     for (int local_j = 0; local_j < 8; ++local_j)
                     {
-                        #pragma UNROLL
+                        #pragma unroll
                         for (int local_k = 0; local_k < 4; ++local_k) {
                             accum_c_val[local_j + c_x*8] += __shfl_custom(c_val[local_j], i_d * 4 + local_k);
                         }
@@ -1386,7 +1386,7 @@ __global__ void gemm_nn_custom_bin_mean_transposed_tensor_kernel(int M, int N, i
             {
                 int j_d = lane_id % WMMA_N;
                 {
-                    #pragma UNROLL
+                    #pragma unroll
                     for (int i_d = lane_id / WMMA_N; i_d < WMMA_M; i_d += WMMA_M / 2)
                     {
                         int count = C_s[warp_id*WMMA_M*WMMA_N + i_d*WMMA_N + j_d + WMMA_M*WMMA_N*32*c_x];
@@ -1506,17 +1506,17 @@ __global__ void gemm_nn_custom_bin_mean_transposed_tensor_kernel(int M, int N, i
 
                 // wmma::bmma_sync(c_frag, a_frag, b_frag, c_frag);
                 int32_t c_val[8];  // 8 x 32 threads = 256
-                #pragma UNROLL
+                #pragma unroll
                 for (int local_j = 0; local_j < 8; ++local_j)
                 {
                     uint32_t b_val_cur = __shfl_custom(b_val, local_j *4 + k_d);
                     c_val[local_j] = __popc(xor_int32(a_val, b_val_cur));
                 }
 
-                #pragma UNROLL
+                #pragma unroll
                 for (int local_j = 0; local_j < 8; ++local_j)
                 {
-                    #pragma UNROLL
+                    #pragma unroll
                     for (int local_k = 0; local_k < 4; ++local_k) {
                         accum_c_val[local_j] += __shfl_custom(c_val[local_j], i_d * 4 + local_k);
                     }
@@ -1750,8 +1750,8 @@ __global__ void gemm_nn_custom_bin_mean_transposed_gpu_kernel(int M, int N, int
 // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#wmma-subbyte
 // nvcuda::wmma::col_major ->  cutlass::MatrixLayout::kColumnMajor (matrix is not transposed)
 
-// Matrix A	Matrix B	Accumulator	Matrix Size (m-n-k)
-// precision::b1	precision::b1	int	8x8x128
+// Matrix A    Matrix B    Accumulator    Matrix Size (m-n-k)
+// precision::b1    precision::b1    int    8x8x128
 
 // The only dimensions currently supported by WMMA for XNOR
 // const int WMMA_M = 8;
@@ -1791,7 +1791,7 @@ void gemm_nn_custom_bin_mean_transposed_gpu(int M, int N, int K,
 
         //printf(" lda = %d, ldb = %d, ldc = %d, lda/32 = %d, ldb/32 = %d, ldc/32 = %d \n", lda, ldb, ldc, lda / 32, ldb / 32, ldc / 32);
         //printf("  l.c (K/9) = %d, M (l.n) = %d \n", (K%9 == 0)? K / 9: K, M);
-        gemm_nn_custom_bin_mean_transposed_tensor_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> > (
+        gemm_nn_custom_bin_mean_transposed_tensor_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>> (
             M, N, K,
             A, lda,
             B, ldb,
@@ -1800,12 +1800,11 @@ void gemm_nn_custom_bin_mean_transposed_gpu(int M, int N, int K,
             shortcut_in_gpu, shortcut_out_gpu);
 
         //cudaDeviceSynchronize();
-        //getchar();
     }
     else
 #endif  //# CUDART_VERSION >= 10000
     {
-        gemm_nn_custom_bin_mean_transposed_gpu_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> > (
+        gemm_nn_custom_bin_mean_transposed_gpu_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>> (
             M, N, K,
             A, lda,
             B, ldb,
@@ -1993,7 +1992,7 @@ void convolve_gpu(float *input, float *weights, float *output, int in_w, int in_
     const int num_blocks = array_size / BLOCK + 1;
     //printf("\n array_size = %d, num_blocks = %d, w = %d, h = %d, n = %d, c = %d, pad = %d \n", array_size, num_blocks, in_w, in_h, n, in_c, pad);
 
-    convolve_gpu_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> > (input, weights, output, in_w, in_h, in_c, n, size, pad);
+    convolve_gpu_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>> (input, weights, output, in_w, in_h, in_c, n, size, pad);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
@@ -2209,7 +2208,7 @@ void convolve_bin_gpu(float *input, float *weights, float *output, int in_w, int
     const int num_blocks = array_size / BLOCK + 1;
     //printf("\n array_size = %d, num_blocks = %d, w = %d, h = %d, n = %d, c = %d, pad = %d \n", array_size, num_blocks, in_w, in_h, n, in_c, pad);
 
-    convolve_bin_gpu_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> > (input, weights, output, in_w, in_h, in_c, n, size, pad, new_lda, mean_arr_gpu);
+    convolve_bin_gpu_kernel <<<num_blocks, BLOCK, 0, get_cuda_stream() >>> (input, weights, output, in_w, in_h, in_c, n, size, pad, new_lda, mean_arr_gpu);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
@@ -2278,11 +2277,11 @@ void im2col_gpu_ext(const float* data_im, const int channels,
         (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
     int num_kernels = channels * height_col * width_col;
     // NOLINT_NEXT_LINE(whitespace/operators)
-    im2col_gpu_kernel_ext << <CAFFE_GET_BLOCKS(num_kernels),
-        CAFFE_CUDA_NUM_THREADS >> >(
+    im2col_gpu_kernel_ext <<<CAFFE_GET_BLOCKS(num_kernels),
+        CAFFE_CUDA_NUM_THREADS >>>(
             num_kernels, data_im, height, width, kernel_h, kernel_w, pad_h,
             pad_w, stride_h, stride_w, dilation_h, dilation_w, height_col,
             width_col, data_col);
 
     CHECK_CUDA(cudaPeekAtLastError());
-}
\ No newline at end of file
+}
diff --git a/src/Detector/darknet/src/image.c b/src/Detector/darknet/src/image.c
index 8ef026d33..b238db352 100644
--- a/src/Detector/darknet/src/image.c
+++ b/src/Detector/darknet/src/image.c
@@ -13,16 +13,13 @@
 
 #ifndef STB_IMAGE_IMPLEMENTATION
 #define STB_IMAGE_IMPLEMENTATION
-#include "stb_image.h"
+#include <stb_image.h>
 #endif
 #ifndef STB_IMAGE_WRITE_IMPLEMENTATION
 #define STB_IMAGE_WRITE_IMPLEMENTATION
-#include "stb_image_write.h"
+#include <stb_image_write.h>
 #endif
 
-extern int check_mistakes;
-//int windows = 0;
-
 float colors[6][3] = { {1,0,1}, {0,0,1},{0,1,1},{0,1,0},{1,1,0},{1,0,0} };
 
 float get_color(int c, int x, int max)
@@ -280,6 +277,19 @@ image **load_alphabet()
     return alphabets;
 }
 
+void free_alphabet(image **alphabet)
+{
+    int i, j;
+    const int nsize = 8;
+    for (j = 0; j < nsize; ++j) {
+        for (i = 32; i < 127; ++i) {
+            free_image(alphabet[j][i]);
+        }
+        free(alphabet[j]);
+    }
+    free(alphabet);
+}
+
 
 
 // Creates array of detections with prob > thresh and fills best_class for them
@@ -705,7 +715,7 @@ void show_image(image p, const char *name)
 #ifdef OPENCV
     show_image_cv(p, name);
 #else
-    fprintf(stderr, "Not compiled with OpenCV, saving to %s.png instead\n", name);
+    fprintf(stderr, "Not compiled with OpenCV, saving to %s.jpg instead\n", name);
     save_image(p, name);
 #endif  // OPENCV
 }
@@ -1498,12 +1508,7 @@ image load_image_stb(char *filename, int channels)
         char *new_line = "\n";
         fwrite(new_line, sizeof(char), strlen(new_line), fw);
         fclose(fw);
-        if (check_mistakes) {
-            printf("\n Error in load_image_stb() \n");
-            getchar();
-        }
         return make_image(10, 10, 3);
-        //exit(EXIT_FAILURE);
     }
     if(channels) c = channels;
     int i,j,k;
@@ -1523,7 +1528,7 @@ image load_image_stb(char *filename, int channels)
 
 image load_image_stb_resize(char *filename, int w, int h, int c)
 {
-    image out = load_image_stb(filename, c);    // without OpenCV
+    image out = load_image_stb(filename, c);
 
     if ((h && w) && (h != out.h || w != out.w)) {
         image resized = resize_image(out, w, h);
@@ -1536,10 +1541,9 @@ image load_image_stb_resize(char *filename, int w, int h, int c)
 image load_image(char *filename, int w, int h, int c)
 {
 #ifdef OPENCV
-    //image out = load_image_stb(filename, c);
     image out = load_image_cv(filename, c);
 #else
-    image out = load_image_stb(filename, c);    // without OpenCV
+    image out = load_image_stb(filename, c);
 #endif  // OPENCV
 
     if((h && w) && (h != out.h || w != out.w)){
diff --git a/src/Detector/darknet/src/image.h b/src/Detector/darknet/src/image.h
index 90e6a0486..65ccb7c71 100644
--- a/src/Detector/darknet/src/image.h
+++ b/src/Detector/darknet/src/image.h
@@ -88,6 +88,7 @@ image load_image(char *filename, int w, int h, int c);
 image load_image_stb_resize(char *filename, int w, int h, int c);
 //LIB_API image load_image_color(char *filename, int w, int h);
 image **load_alphabet();
+void free_alphabet(image **alphabet);
 
 //float get_pixel(image m, int x, int y, int c);
 //float get_pixel_extend(image m, int x, int y, int c);
diff --git a/src/Detector/darknet/src/image_opencv.cpp b/src/Detector/darknet/src/image_opencv.cpp
index c72b0efe4..974a5144d 100644
--- a/src/Detector/darknet/src/image_opencv.cpp
+++ b/src/Detector/darknet/src/image_opencv.cpp
@@ -113,7 +113,6 @@ extern "C" mat_cv *load_image_mat_cv(const char *filename, int flag)
             cerr << "Cannot load image " << shrinked_filename << std::endl;
             std::ofstream bad_list("bad.list", std::ios::out | std::ios::app);
             bad_list << shrinked_filename << std::endl;
-            //if (check_mistakes) getchar();
             return NULL;
         }
         cv::Mat dst;
@@ -1161,12 +1160,12 @@ extern "C" void draw_train_loss(char *windows_name, mat_cv* img_src, int img_siz
             if (iteration_old == 0)
                 cv::putText(img, accuracy_name, cv::Point(10, 12), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.7, CV_RGB(255, 0, 0), 1, CV_AA);
 
-	        if (iteration_old != 0){
-            	    cv::line(img,
+            if (iteration_old != 0){
+                    cv::line(img,
                         cv::Point(img_offset + draw_size * (float)iteration_old / max_batches, draw_size * (1 - old_precision)),
                         cv::Point(img_offset + draw_size * (float)current_batch / max_batches, draw_size * (1 - precision)),
                         CV_RGB(255, 0, 0), 1, 8, 0);
-	        }
+            }
 
             sprintf(char_buff, "%2.1f%% ", precision * 100);
             cv::putText(img, char_buff, cv::Point(10, 28), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.7, CV_RGB(255, 255, 255), 5, CV_AA);
@@ -1347,8 +1346,8 @@ extern "C" image image_data_augmentation(mat_cv* mat, int w, int h,
         // Mat -> image
         out = mat_to_image(sized);
     }
-    catch (...) {
-        cerr << "OpenCV can't augment image: " << w << " x " << h << " \n";
+    catch (const std::exception& e) {
+        cerr << "OpenCV can't augment image: " << w << " x " << h << " \n" << e.what() << " \n";
         out = mat_to_image(*(cv::Mat*)mat);
     }
     return out;
diff --git a/src/Detector/darknet/src/layer.c b/src/Detector/darknet/src/layer.c
index 032a24e0f..758644b56 100644
--- a/src/Detector/darknet/src/layer.c
+++ b/src/Detector/darknet/src/layer.c
@@ -92,9 +92,9 @@ void free_layer_custom(layer l, int keep_cudnn_desc)
     if (l.bias_updates)       free(l.bias_updates), l.bias_updates = NULL;
     if (l.scales)             free(l.scales), l.scales = NULL;
     if (l.scale_updates)      free(l.scale_updates), l.scale_updates = NULL;
-    if (l.biases_ema)         free(l.biases_ema), l.biases = NULL;
-    if (l.scales_ema)         free(l.scales_ema), l.scales = NULL;
-    if (l.weights_ema)        free(l.weights_ema), l.weights = NULL;
+    if (l.biases_ema)         free(l.biases_ema), l.biases_ema = NULL;
+    if (l.scales_ema)         free(l.scales_ema), l.scales_ema = NULL;
+    if (l.weights_ema)        free(l.weights_ema), l.weights_ema = NULL;
     if (l.weights)            free(l.weights), l.weights = NULL;
     if (l.weight_updates)     free(l.weight_updates), l.weight_updates = NULL;
     if (l.align_bit_weights)  free(l.align_bit_weights);
diff --git a/src/Detector/darknet/src/layer.h b/src/Detector/darknet/src/layer.h
index e92d3b4a9..5b3d228b1 100644
--- a/src/Detector/darknet/src/layer.h
+++ b/src/Detector/darknet/src/layer.h
@@ -36,10 +36,10 @@ extern "C" {
 //    NETWORK,
 //    XNOR,
 //    REGION,
-//	YOLO,
+//    YOLO,
 //    REORG,
-//	UPSAMPLE,
-//	REORG_OLD,
+//    UPSAMPLE,
+//    REORG_OLD,
 //    BLANK
 //} LAYER_TYPE;
 
@@ -48,15 +48,15 @@ extern "C" {
 //} COST_TYPE;
 
 //typedef struct {
-//	int batch;
-//	float learning_rate;
-//	float momentum;
-//	float decay;
-//	int adam;
-//	float B1;
-//	float B2;
-//	float eps;
-//	int t;
+//    int batch;
+//    float learning_rate;
+//    float momentum;
+//    float decay;
+//    int adam;
+//    float B1;
+//    float B2;
+//    float eps;
+//    int t;
 //} update_args;
 
 /*
@@ -87,7 +87,7 @@ struct layer{
     int side;
     int stride;
     int reverse;
-	int spatial;
+    int spatial;
     int pad;
     int sqrt;
     int flip;
@@ -105,8 +105,8 @@ struct layer{
     float shift;
     float ratio;
     float learning_rate_scale;
-	int focal_loss;
-	int noloss;
+    int focal_loss;
+    int noloss;
     int softmax;
     int classes;
     int coords;
@@ -118,10 +118,10 @@ struct layer{
     int noadjust;
     int reorg;
     int log;
-	int tanh;
-	int *mask;
-	int total;
-	float bflops;
+    int tanh;
+    int *mask;
+    int total;
+    float bflops;
 
     int adam;
     float B1;
@@ -146,14 +146,14 @@ struct layer{
     float coord_scale;
     float object_scale;
     float noobject_scale;
-	float mask_scale;
+    float mask_scale;
     float class_scale;
     int bias_match;
     int random;
-	float ignore_thresh;
-	float truth_thresh;
+    float ignore_thresh;
+    float truth_thresh;
     float thresh;
-	float focus;
+    float focus;
     int classfix;
     int absolute;
 
@@ -208,7 +208,7 @@ struct layer{
     int   * input_sizes;
     float * delta;
     float * output;
-	float * loss;
+    float * loss;
     float * squared;
     float * norms;
 
@@ -298,8 +298,8 @@ struct layer{
     float * weights_gpu;
     float * weight_updates_gpu;
 
-	float * weights_gpu16;
-	float * weight_updates_gpu16;
+    float * weights_gpu16;
+    float * weight_updates_gpu16;
 
     float * biases_gpu;
     float * bias_updates_gpu;
@@ -308,7 +308,7 @@ struct layer{
     float * scale_updates_gpu;
 
     float * output_gpu;
-	float * loss_gpu;
+    float * loss_gpu;
     float * delta_gpu;
     float * rand_gpu;
     float * squared_gpu;
@@ -318,7 +318,7 @@ struct layer{
     cudnnTensorDescriptor_t srcTensorDesc16, dstTensorDesc16;
     cudnnTensorDescriptor_t dsrcTensorDesc, ddstTensorDesc;
     cudnnTensorDescriptor_t dsrcTensorDesc16, ddstTensorDesc16;
-	cudnnTensorDescriptor_t normTensorDesc, normDstTensorDesc, normDstTensorDescF16;
+    cudnnTensorDescriptor_t normTensorDesc, normDstTensorDesc, normDstTensorDescF16;
     cudnnFilterDescriptor_t weightDesc, weightDesc16;
     cudnnFilterDescriptor_t dweightDesc, dweightDesc16;
     cudnnConvolutionDescriptor_t convDesc;
diff --git a/src/Detector/darknet/src/maxpool_layer.c b/src/Detector/darknet/src/maxpool_layer.c
index 89ae55d46..0e7dfd5b5 100644
--- a/src/Detector/darknet/src/maxpool_layer.c
+++ b/src/Detector/darknet/src/maxpool_layer.c
@@ -142,7 +142,7 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s
     else cudnn_maxpool_setup(&l);
 
 #endif  // GPU
-	l.bflops = (l.size*l.size*l.c * l.out_h*l.out_w) / 1000000000.;
+    l.bflops = (l.size*l.size*l.c * l.out_h*l.out_w) / 1000000000.;
     if (avgpool) {
         if (stride_x == stride_y)
             fprintf(stderr, "avg               %2dx%2d/%2d   %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", size, size, stride_x, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops);
@@ -411,4 +411,4 @@ void backward_local_avgpool_layer(const maxpool_layer l, network_state state)
         }
     }
 
-}
\ No newline at end of file
+}
diff --git a/src/Detector/darknet/src/maxpool_layer_kernels.cu b/src/Detector/darknet/src/maxpool_layer_kernels.cu
index ab39d6b57..1d0d1bd9d 100644
--- a/src/Detector/darknet/src/maxpool_layer_kernels.cu
+++ b/src/Detector/darknet/src/maxpool_layer_kernels.cu
@@ -155,7 +155,7 @@ extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network_state sta
 
         size_t n = h*w*c*layer.batch;
 
-        forward_maxpool_depth_layer_kernel << <cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >> >(
+        forward_maxpool_depth_layer_kernel <<<cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >>>(
             n, layer.w, layer.h, layer.c, layer.out_c, layer.batch, state.input, layer.output_gpu, layer.indexes_gpu);
         CHECK_CUDA(cudaPeekAtLastError());
 
@@ -192,11 +192,11 @@ extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network_state sta
 
         size_t n = h*w*c*layer.batch;
 
-        forward_maxpool_layer_kernel << <cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >> > (n, layer.h, layer.w, layer.c, layer.stride_x, layer.stride_y, layer.size, layer.pad, state.input, layer.output_gpu, layer.indexes_gpu);
+        forward_maxpool_layer_kernel <<<cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >>> (n, layer.h, layer.w, layer.c, layer.stride_x, layer.stride_y, layer.size, layer.pad, state.input, layer.output_gpu, layer.indexes_gpu);
         CHECK_CUDA(cudaPeekAtLastError());
 
         if (layer.maxpool_zero_nonmax) {
-            forward_zero_nonmax_kernel << <cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >> > (n, state.input, layer.output_gpu);
+            forward_zero_nonmax_kernel <<<cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >>> (n, state.input, layer.output_gpu);
             CHECK_CUDA(cudaPeekAtLastError());
         }
     }
@@ -237,7 +237,7 @@ extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, network_state st
 
         size_t n = h * w * c * layer.batch;
 
-        backward_maxpool_depth_layer_kernel << <cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >> >(n, layer.w, layer.h, layer.c, layer.batch, layer.delta_gpu, state.delta, layer.indexes_gpu);
+        backward_maxpool_depth_layer_kernel <<<cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >>>(n, layer.w, layer.h, layer.c, layer.batch, layer.delta_gpu, state.delta, layer.indexes_gpu);
         CHECK_CUDA(cudaPeekAtLastError());
         return;
     }
@@ -248,7 +248,7 @@ extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, network_state st
     CHECK_CUDA(cudaPeekAtLastError());
 
     if (layer.maxpool_zero_nonmax) {
-        backward_zero_nonmax_kernel << <cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >> > (n, layer.indexes_gpu, state.delta);
+        backward_zero_nonmax_kernel <<<cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >>> (n, layer.indexes_gpu, state.delta);
         CHECK_CUDA(cudaPeekAtLastError());
     }
 }
@@ -373,7 +373,7 @@ extern "C" void forward_local_avgpool_layer_gpu(maxpool_layer layer, network_sta
 
         size_t n = h*w*c*layer.batch;
 
-        forward_local_avgpool_layer_kernel << <cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >> > (n, layer.h, layer.w, layer.c, layer.stride_x, layer.stride_y, layer.size, layer.pad, state.input, layer.output_gpu);
+        forward_local_avgpool_layer_kernel <<<cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >>> (n, layer.h, layer.w, layer.c, layer.stride_x, layer.stride_y, layer.size, layer.pad, state.input, layer.output_gpu);
         CHECK_CUDA(cudaPeekAtLastError());
     }
 }
@@ -382,6 +382,6 @@ extern "C" void backward_local_avgpool_layer_gpu(maxpool_layer layer, network_st
 {
     size_t n = layer.h*layer.w*layer.c*layer.batch;
 
-    backward_local_avgpool_layer_kernel << <cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >> >(n, layer.h, layer.w, layer.c, layer.stride_x, layer.stride_y, layer.size, layer.pad, layer.delta_gpu, state.delta);
+    backward_local_avgpool_layer_kernel <<<cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >>>(n, layer.h, layer.w, layer.c, layer.stride_x, layer.stride_y, layer.size, layer.pad, layer.delta_gpu, state.delta);
     CHECK_CUDA(cudaPeekAtLastError());
 }
diff --git a/src/Detector/darknet/src/network.c b/src/Detector/darknet/src/network.c
index 92379f053..40c1cbcb7 100644
--- a/src/Detector/darknet/src/network.c
+++ b/src/Detector/darknet/src/network.c
@@ -776,6 +776,21 @@ float *network_predict(network net, float *input)
     return out;
 }
 
+#ifdef CUDA_OPENGL_INTEGRATION
+float *network_predict_gl_texture(network *net, uint32_t texture_id)
+{
+    if(net->batch != 1) {
+        set_batch_network(net, 1);
+    }
+
+    if(gpu_index >= 0) {
+        return network_predict_gpu_gl_texture(*net, texture_id);
+    }
+
+    return NULL;
+}
+#endif // CUDA_OPENGL_INTEGRATION
+
 int num_detections(network *net, float thresh)
 {
     int i;
@@ -889,8 +904,8 @@ void custom_get_region_detections(layer l, int w, int h, int net_w, int net_h, f
         dets[j].best_class_idx = -1;
         for (i = 0; i < l.classes; ++i) {
             if (probs[j][i] > highest_prob) {
-            	highest_prob = probs[j][i];
-            	dets[j].best_class_idx = i;
+                highest_prob = probs[j][i];
+                dets[j].best_class_idx = i;
             }
             dets[j].prob[i] = probs[j][i];
         }
@@ -1033,7 +1048,7 @@ char *detection_to_json(detection *dets, int nboxes, int classes, char **names,
                 send_buf = (char *)realloc(send_buf, total_len * sizeof(char));
                 if (!send_buf) {
                     if (buf) free(buf);
-                    return 0;// exit(-1);
+                    return 0;
                 }
                 strcat(send_buf, buf);
                 free(buf);
diff --git a/src/Detector/darknet/src/network.h b/src/Detector/darknet/src/network.h
index 7661c8ef8..c8a770642 100644
--- a/src/Detector/darknet/src/network.h
+++ b/src/Detector/darknet/src/network.h
@@ -1,6 +1,17 @@
 // Oh boy, why am I about to do this....
 #ifndef NETWORK_H
 #define NETWORK_H
+
+/*
+ * Necessary in C++ to get format macros out of inttypes.h
+ */
+#ifdef __cplusplus
+#ifndef __STDC_FORMAT_MACROS
+#define __STDC_FORMAT_MACROS 1
+#endif
+#endif
+#include <inttypes.h>
+
 #include "darknet.h"
 
 #include <stdint.h>
@@ -23,7 +34,7 @@ typedef struct network{
     float *workspace;
     int n;
     int batch;
-	uint64_t *seen;
+    uint64_t *seen;
     float epoch;
     int subdivisions;
     float momentum;
@@ -61,7 +72,7 @@ typedef struct network{
     float exposure;
     float saturation;
     float hue;
-	int small_object;
+    int small_object;
 
     int gpu_index;
     tree *hierarchy;
@@ -71,11 +82,11 @@ typedef struct network{
 
     float **input_gpu;
     float **truth_gpu;
-	float **input16_gpu;
-	float **output16_gpu;
-	size_t *max_input16_size;
-	size_t *max_output16_size;
-	int wait_stream;
+    float **input16_gpu;
+    float **output16_gpu;
+    size_t *max_input16_size;
+    size_t *max_output16_size;
+    int wait_stream;
     #endif
 } network;
 
@@ -96,6 +107,7 @@ float train_networks(network *nets, int n, data d, int interval);
 void sync_nets(network *nets, int n, int interval);
 float train_network_datum_gpu(network net, float *x, float *y);
 float *network_predict_gpu(network net, float *input);
+float *network_predict_gpu_gl_texture(network net, uint32_t texture_id);
 float * get_network_output_gpu_layer(network net, int i);
 float * get_network_delta_gpu_layer(network net, int i);
 float *get_network_output_gpu(network net);
@@ -144,7 +156,7 @@ int get_predicted_class_network(network net);
 void print_network(network net);
 void visualize_network(network net);
 int resize_network(network *net, int w, int h);
-void set_batch_network(network *net, int b);
+//LIB_API void set_batch_network(network *net, int b);
 int get_network_input_size(network net);
 float get_network_cost(network net);
 //LIB_API layer* get_network_layer(network* net, int i);
diff --git a/src/Detector/darknet/src/network_kernels.cu b/src/Detector/darknet/src/network_kernels.cu
index 7fbc03cf5..3b3d4e6fe 100644
--- a/src/Detector/darknet/src/network_kernels.cu
+++ b/src/Detector/darknet/src/network_kernels.cu
@@ -222,7 +222,7 @@ void backward_network_gpu(network net, network_state state)
             int state_input_nan_inf = is_nan_or_inf(state.input, l.outputs * l.batch);
             printf("\n i - %d  is_nan_or_inf(s.delta) = %d \n", i, state_delta_nan_inf);
             printf(" i - %d  is_nan_or_inf(s.input) = %d \n", i, state_input_nan_inf);
-            if (state_delta_nan_inf || state_input_nan_inf) { printf(" found "); getchar(); }
+            if (state_delta_nan_inf || state_input_nan_inf) { printf(" found "); }
         }
         */
     }
@@ -376,7 +376,7 @@ float train_network_datum_gpu(network net, float *x, float *y)
         float scale = (get_current_iteration(net) / ((float)net.max_batches));
         //scale = sin(scale * M_PI);
         net.learning_rate = net.adversarial_lr * scale;
-        layer l = net.layers[net.n - 1];
+        //layer l = net.layers[net.n - 1];
         int y_size = get_network_output_size(net)*net.batch;
         if (net.layers[net.n - 1].truths) y_size = net.layers[net.n - 1].truths*net.batch;
         float *truth_cpu = (float *)xcalloc(y_size, sizeof(float));
@@ -747,3 +747,92 @@ float *network_predict_gpu(network net, float *input)
     //cuda_free(state.input);   // will be freed in the free_network()
     return out;
 }
+
+#ifdef CUDA_OPENGL_INTEGRATION
+float *network_predict_gpu_gl_texture(network net, uint32_t texture_id)
+{
+    if (net.gpu_index != cuda_get_device())
+        cuda_set_device(net.gpu_index);
+    int size = get_network_input_size(net) * net.batch;
+
+    // Map the OpenGL texture resource so CUDA can access it.
+    cudaGraphicsResource_t graphics_resource = NULL;
+    unsigned int flags = cudaGraphicsRegisterFlagsReadOnly;
+    CHECK_CUDA(cudaGraphicsGLRegisterImage(&graphics_resource, texture_id, GL_TEXTURE_2D, flags));
+    CHECK_CUDA(cudaGraphicsMapResources(1, &graphics_resource, 0));
+
+    //void* dev_ptr = NULL;
+    cudaArray_t dev_array = NULL;
+    CHECK_CUDA(cudaGraphicsSubResourceGetMappedArray(&dev_array, graphics_resource, 0, 0));
+
+    size_t width = net.w;
+    size_t height = net.h;
+    size_t pitch = width * sizeof(float);
+
+    CHECK_CUDA(cudaMemcpy2DFromArray(
+            net.input_state_gpu,     // dst
+            pitch,                   // dst_pitch
+            dev_array,               // src
+            0,                       // width offset
+            0,                       // height offset
+            width * sizeof(float),   // width (in bytes)
+            height * net.c,          // height (in rows)
+            cudaMemcpyDeviceToDevice // Transfer type
+    ));
+
+    network_state state;
+    state.index = 0;
+    state.net = net;
+    state.input = net.input_state_gpu;
+    state.truth = 0;
+    state.train = 0;
+    state.delta = 0;
+
+    //cudaGraphExec_t instance = (cudaGraphExec_t)net.cuda_graph_exec;
+    static cudaGraphExec_t instance;
+
+    if ((*net.cuda_graph_ready) == 0) {
+        static cudaGraph_t graph;
+        if (net.use_cuda_graph == 1) {
+            int i;
+            for (i = 0; i < 16; ++i) switch_stream(i);
+
+            cudaStream_t stream0 = switch_stream(0);
+            CHECK_CUDA(cudaDeviceSynchronize());
+            printf("Try to capture graph... \n");
+            //cudaGraph_t graph = (cudaGraph_t)net.cuda_graph;
+            CHECK_CUDA(cudaStreamBeginCapture(stream0, cudaStreamCaptureModeGlobal));
+        }
+
+        // cuda_push_array(state.input, net.input_pinned_cpu, size);
+        forward_network_gpu(net, state);
+
+        if (net.use_cuda_graph == 1) {
+            cudaStream_t stream0 = switch_stream(0);
+            CHECK_CUDA(cudaStreamEndCapture(stream0, &graph));
+            CHECK_CUDA(cudaGraphInstantiate(&instance, graph, NULL, NULL, 0));
+            (*net.cuda_graph_ready) = 1;
+            printf(" graph is captured... \n");
+            CHECK_CUDA(cudaDeviceSynchronize());
+        }
+        CHECK_CUDA(cudaStreamSynchronize(get_cuda_stream()));
+    }
+    else {
+        cudaStream_t stream0 = switch_stream(0);
+        //printf(" cudaGraphLaunch \n");
+        CHECK_CUDA( cudaGraphLaunch(instance, stream0) );
+        CHECK_CUDA( cudaStreamSynchronize(stream0) );
+        //printf(" ~cudaGraphLaunch \n");
+    }
+
+    float *out = get_network_output_gpu(net);
+    reset_wait_stream_events();
+    //cuda_free(state.input);   // will be freed in the free_network()
+
+    // Unmap the OpenGL texture.
+    cudaGraphicsUnmapResources(1, &graphics_resource, 0);
+    cudaGraphicsUnregisterResource(graphics_resource);
+
+    return out;
+}
+#endif // CUDA_OPENGL_INTEGRATION
diff --git a/src/Detector/darknet/src/option_list.h b/src/Detector/darknet/src/option_list.h
index 726b559aa..3dd66e445 100644
--- a/src/Detector/darknet/src/option_list.h
+++ b/src/Detector/darknet/src/option_list.h
@@ -26,8 +26,8 @@ float option_find_float_quiet(list *l, char *key, float def);
 void option_unused(list *l);
 
 //typedef struct {
-//	int classes;
-//	char **names;
+//    int classes;
+//    char **names;
 //} metadata;
 
 //LIB_API metadata get_metadata(char *file);
diff --git a/src/Detector/darknet/src/parser.c b/src/Detector/darknet/src/parser.c
index 91b67c2d5..65606de6e 100644
--- a/src/Detector/darknet/src/parser.c
+++ b/src/Detector/darknet/src/parser.c
@@ -217,13 +217,11 @@ convolutional_layer parse_convolutional(list *options, size_params params)
     int stretch = option_find_int_quiet(options, "stretch", 0);
     int stretch_sway = option_find_int_quiet(options, "stretch_sway", 0);
     if ((sway + rotate + stretch + stretch_sway) > 1) {
-        printf(" Error: should be used only 1 param: sway=1, rotate=1 or stretch=1 in the [convolutional] layer \n");
-        exit(0);
+        error("Error: should be used only 1 param: sway=1, rotate=1 or stretch=1 in the [convolutional] layer", DARKNET_LOC);
     }
     int deform = sway || rotate || stretch || stretch_sway;
     if (deform && size == 1) {
-        printf(" Error: params (sway=1, rotate=1 or stretch=1) should be used only with size >=3 in the [convolutional] layer \n");
-        exit(0);
+        error("Error: params (sway=1, rotate=1 or stretch=1) should be used only with size >=3 in the [convolutional] layer", DARKNET_LOC);
     }
 
     convolutional_layer layer = make_convolutional_layer(batch,1,h,w,c,n,groups,size,stride_x,stride_y,dilation,padding,activation, batch_normalize, binary, xnor, params.net.adam, use_bin_output, params.index, antialiasing, share_layer, assisted_excitation, deform, params.train);
@@ -362,17 +360,17 @@ connected_layer parse_connected(list *options, size_params params)
 
 softmax_layer parse_softmax(list *options, size_params params)
 {
-	int groups = option_find_int_quiet(options, "groups", 1);
-	softmax_layer layer = make_softmax_layer(params.batch, params.inputs, groups);
-	layer.temperature = option_find_float_quiet(options, "temperature", 1);
-	char *tree_file = option_find_str(options, "tree", 0);
-	if (tree_file) layer.softmax_tree = read_tree(tree_file);
-	layer.w = params.w;
-	layer.h = params.h;
-	layer.c = params.c;
-	layer.spatial = option_find_float_quiet(options, "spatial", 0);
-	layer.noloss = option_find_int_quiet(options, "noloss", 0);
-	return layer;
+    int groups = option_find_int_quiet(options, "groups", 1);
+    softmax_layer layer = make_softmax_layer(params.batch, params.inputs, groups);
+    layer.temperature = option_find_float_quiet(options, "temperature", 1);
+    char *tree_file = option_find_str(options, "tree", 0);
+    if (tree_file) layer.softmax_tree = read_tree(tree_file);
+    layer.w = params.w;
+    layer.h = params.h;
+    layer.c = params.c;
+    layer.spatial = option_find_float_quiet(options, "spatial", 0);
+    layer.noloss = option_find_int_quiet(options, "noloss", 0);
+    return layer;
 }
 
 contrastive_layer parse_contrastive(list *options, size_params params)
@@ -384,8 +382,7 @@ contrastive_layer parse_contrastive(list *options, size_params params)
     if(yolo_layer_id != 0) yolo_layer = params.net.layers + yolo_layer_id;
     if (yolo_layer->type != YOLO) {
         printf(" Error: [contrastive] layer should point to the [yolo] layer instead of %d layer! \n", yolo_layer_id);
-        getchar();
-        exit(0);
+        error("Error!", DARKNET_LOC);
     }
 
     contrastive_layer layer = make_contrastive_layer(params.batch, params.w, params.h, params.c, classes, params.inputs, yolo_layer);
@@ -427,7 +424,7 @@ float *get_classes_multipliers(char *cpc, const int classes, const float max_del
         int *counters_per_class = parse_yolo_mask(cpc, &classes_counters);
         if (classes_counters != classes) {
             printf(" number of values in counters_per_class = %d doesn't match with classes = %d \n", classes_counters, classes);
-            exit(0);
+            error("Error!", DARKNET_LOC);
         }
         float max_counter = 0;
         int i;
@@ -458,12 +455,8 @@ layer parse_yolo(list *options, size_params params)
     int max_boxes = option_find_int_quiet(options, "max", 200);
     layer l = make_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes, max_boxes);
     if (l.outputs != params.inputs) {
-        printf("Error: l.outputs == params.inputs \n");
-        printf("filters= in the [convolutional]-layer doesn't correspond to classes= or mask= in [yolo]-layer \n");
-        exit(EXIT_FAILURE);
+        error("Error: l.outputs == params.inputs, filters= in the [convolutional]-layer doesn't correspond to classes= or mask= in [yolo]-layer", DARKNET_LOC);
     }
-    //assert(l.outputs == params.inputs);
-
     l.show_details = option_find_int_quiet(options, "show_details", 1);
     l.max_delta = option_find_float_quiet(options, "max_delta", FLT_MAX);   // set 10
     char *cpc = option_find_str(options, "counters_per_class", 0);
@@ -532,7 +525,6 @@ layer parse_yolo(list *options, size_params params)
         printf(" embedding_size = %d \n", l.embedding_size);
         if (le.n % l.n != 0) {
             printf(" Warning: filters=%d number in embedding_layer=%d isn't divisable by number of anchors %d \n", le.n, embedding_layer_id, l.n);
-            getchar();
         }
     }
 
@@ -592,11 +584,8 @@ layer parse_gaussian_yolo(list *options, size_params params) // Gaussian_YOLOv3
     int *mask = parse_gaussian_yolo_mask(a, &num);
     layer l = make_gaussian_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes, max_boxes);
     if (l.outputs != params.inputs) {
-        printf("Error: l.outputs == params.inputs \n");
-        printf("filters= in the [convolutional]-layer doesn't correspond to classes= or mask= in [Gaussian_yolo]-layer \n");
-        exit(EXIT_FAILURE);
+        error("Error: l.outputs == params.inputs, filters= in the [convolutional]-layer doesn't correspond to classes= or mask= in [Gaussian_yolo]-layer", DARKNET_LOC);
     }
-    //assert(l.outputs == params.inputs);
     l.max_delta = option_find_float_quiet(options, "max_delta", FLT_MAX);   // set 10
     char *cpc = option_find_str(options, "counters_per_class", 0);
     l.classes_multipliers = get_classes_multipliers(cpc, classes, l.max_delta);
@@ -683,9 +672,7 @@ layer parse_region(list *options, size_params params)
 
     layer l = make_region_layer(params.batch, params.w, params.h, num, classes, coords, max_boxes);
     if (l.outputs != params.inputs) {
-        printf("Error: l.outputs == params.inputs \n");
-        printf("filters= in the [convolutional]-layer doesn't correspond to classes= or num= in [region]-layer \n");
-        exit(EXIT_FAILURE);
+        error("Error: l.outputs == params.inputs, filters= in the [convolutional]-layer doesn't correspond to classes= or num= in [region]-layer", DARKNET_LOC);
     }
     //assert(l.outputs == params.inputs);
 
@@ -936,8 +923,7 @@ layer parse_shortcut(list *options, size_params params, network net)
     else if (strcmp(weights_type_str, "per_channel") == 0) weights_type = PER_CHANNEL;
     else if (strcmp(weights_type_str, "none") != 0) {
         printf("Error: Incorrect weights_type = %s \n Use one of: none, per_feature, per_channel \n", weights_type_str);
-        getchar();
-        exit(0);
+        error("Error!", DARKNET_LOC);
     }
 
     char *weights_normalization_str = option_find_str_quiet(options, "weights_normalization", "none");
@@ -946,8 +932,7 @@ layer parse_shortcut(list *options, size_params params, network net)
     else if (strcmp(weights_normalization_str, "softmax") == 0) weights_normalization = SOFTMAX_NORMALIZATION;
     else if (strcmp(weights_type_str, "none") != 0) {
         printf("Error: Incorrect weights_normalization = %s \n Use one of: none, relu, softmax \n", weights_normalization_str);
-        getchar();
-        exit(0);
+        error("Error!", DARKNET_LOC);
     }
 
     char *l = option_find(options, "from");
@@ -1227,8 +1212,7 @@ void parse_net_options(list *options, network *net)
     net->contrastive_color = option_find_int_quiet(options, "contrastive_color", 0);
     net->unsupervised = option_find_int_quiet(options, "unsupervised", 0);
     if (net->contrastive && mini_batch < 2) {
-        printf(" Error: mini_batch size (batch/subdivisions) should be higher than 1 for Contrastive loss \n");
-        exit(0);
+        error("Error: mini_batch size (batch/subdivisions) should be higher than 1 for Contrastive loss!", DARKNET_LOC);
     }
     net->label_smooth_eps = option_find_float_quiet(options, "label_smooth_eps", 0.0f);
     net->resize_step = option_find_float_quiet(options, "resize_step", 32);
@@ -1837,7 +1821,7 @@ list *read_cfg(char *filename)
                 free(line);
                 break;
             default:
-                if(!current || (current && !read_option(line, current->options))){
+                if(!read_option(line, current->options)){
                     fprintf(stderr, "Config file error line %d, could parse: %s\n", nu, line);
                     free(line);
                 }
@@ -2006,12 +1990,12 @@ void save_weights_upto(network net, char *filename, int cutoff, int save_ema)
     FILE *fp = fopen(filename, "wb");
     if(!fp) file_error(filename);
 
-    int major = MAJOR_VERSION;
-    int minor = MINOR_VERSION;
-    int revision = PATCH_VERSION;
-    fwrite(&major, sizeof(int), 1, fp);
-    fwrite(&minor, sizeof(int), 1, fp);
-    fwrite(&revision, sizeof(int), 1, fp);
+    int32_t major = MAJOR_VERSION;
+    int32_t minor = MINOR_VERSION;
+    int32_t revision = PATCH_VERSION;
+    fwrite(&major, sizeof(int32_t), 1, fp);
+    fwrite(&minor, sizeof(int32_t), 1, fp);
+    fwrite(&revision, sizeof(int32_t), 1, fp);
     (*net.seen) = get_current_iteration(net) * net.batch * net.subdivisions; // remove this line, when you will save to weights-file both: seen & cur_iteration
     fwrite(net.seen, sizeof(uint64_t), 1, fp);
 
@@ -2267,12 +2251,12 @@ void load_weights_upto(network *net, char *filename, int cutoff)
     FILE *fp = fopen(filename, "rb");
     if(!fp) file_error(filename);
 
-    int major;
-    int minor;
-    int revision;
-    fread(&major, sizeof(int), 1, fp);
-    fread(&minor, sizeof(int), 1, fp);
-    fread(&revision, sizeof(int), 1, fp);
+    int32_t major;
+    int32_t minor;
+    int32_t revision;
+    fread(&major, sizeof(int32_t), 1, fp);
+    fread(&minor, sizeof(int32_t), 1, fp);
+    fread(&revision, sizeof(int32_t), 1, fp);
     if ((major * 10 + minor) >= 2) {
         printf("\n seen 64");
         uint64_t iseen = 0;
diff --git a/src/Detector/darknet/src/region_layer.c b/src/Detector/darknet/src/region_layer.c
index b7aba32e1..506cc9fc6 100644
--- a/src/Detector/darknet/src/region_layer.c
+++ b/src/Detector/darknet/src/region_layer.c
@@ -306,7 +306,6 @@ void forward_region_layer(const region_layer l, network_state state)
             int class_id = state.truth[t * l.truth_size + b*l.truths + 4];
             if (class_id >= l.classes) {
                 printf("\n Warning: in txt-labels class_id=%d >= classes=%d in cfg-file. In txt-labels class_id should be [from 0 to %d] \n", class_id, l.classes, l.classes-1);
-                getchar();
                 continue; // if label contains class_id more than number of classes in the cfg-file
             }
 
diff --git a/src/Detector/darknet/src/representation_layer.c b/src/Detector/darknet/src/representation_layer.c
index fe7741fa9..3efe74f58 100644
--- a/src/Detector/darknet/src/representation_layer.c
+++ b/src/Detector/darknet/src/representation_layer.c
@@ -155,5 +155,3 @@ void push_implicit_layer(layer l)
     CHECK_CUDA(cudaPeekAtLastError());
 }
 #endif
-
-
diff --git a/src/Detector/darknet/src/rnn_vid.c b/src/Detector/darknet/src/rnn_vid.c
index a5ff52784..c521c7588 100644
--- a/src/Detector/darknet/src/rnn_vid.c
+++ b/src/Detector/darknet/src/rnn_vid.c
@@ -160,7 +160,7 @@ void generate_vid_rnn(char *cfgfile, char *weightfile)
     //CvCapture* cap = cvCaptureFromFile("extra/vid/ILSVRC2015/Data/VID/snippets/val/ILSVRC2015_val_00007030.mp4");
     float *feat;
     float *next;
-	next = NULL;
+    next = NULL;
     image last;
     for(i = 0; i < 25; ++i){
         image im = get_image_from_stream_cpp(cap);
diff --git a/src/Detector/darknet/src/route_layer.c b/src/Detector/darknet/src/route_layer.c
index c92321018..23dfa0473 100644
--- a/src/Detector/darknet/src/route_layer.c
+++ b/src/Detector/darknet/src/route_layer.c
@@ -59,8 +59,7 @@ void resize_route_layer(route_layer *l, network *net)
             l->out_c += next.out_c;
         }else{
             printf("Error: Different size of input layers: %d x %d, %d x %d\n", next.out_w, next.out_h, first.out_w, first.out_h);
-            l->out_h = l->out_w = l->out_c = 0;
-            exit(EXIT_FAILURE);
+            error("Error!", DARKNET_LOC);
         }
     }
     l->out_c = l->out_c / l->groups;
diff --git a/src/Detector/darknet/src/softmax_layer.c b/src/Detector/darknet/src/softmax_layer.c
index c0693f019..6535c5780 100644
--- a/src/Detector/darknet/src/softmax_layer.c
+++ b/src/Detector/darknet/src/softmax_layer.c
@@ -14,16 +14,16 @@
 
 void softmax_tree(float *input, int batch, int inputs, float temp, tree *hierarchy, float *output)
 {
-	int b;
-	for (b = 0; b < batch; ++b) {
-		int i;
-		int count = 0;
-		for (i = 0; i < hierarchy->groups; ++i) {
-			int group_size = hierarchy->group_size[i];
-			softmax(input + b*inputs + count, group_size, temp, output + b*inputs + count, 1);
-			count += group_size;
-		}
-	}
+    int b;
+    for (b = 0; b < batch; ++b) {
+        int i;
+        int count = 0;
+        for (i = 0; i < hierarchy->groups; ++i) {
+            int group_size = hierarchy->group_size[i];
+            softmax(input + b*inputs + count, group_size, temp, output + b*inputs + count, 1);
+            count += group_size;
+        }
+    }
 }
 
 softmax_layer make_softmax_layer(int batch, int inputs, int groups)
@@ -89,28 +89,28 @@ void pull_softmax_layer_output(const softmax_layer layer)
 void forward_softmax_layer_gpu(const softmax_layer l, network_state net)
 {
     if(l.softmax_tree){
-		softmax_tree_gpu(net.input, 1, l.batch, l.inputs, l.temperature, l.output_gpu, *l.softmax_tree);
-		/*
-		int i;
-		int count = 0;
-		for (i = 0; i < l.softmax_tree->groups; ++i) {
-		int group_size = l.softmax_tree->group_size[i];
-		softmax_gpu(net.input_gpu + count, group_size, l.batch, l.inputs, 1, 0, 1, l.temperature, l.output_gpu + count);
-		count += group_size;
-		}
-		*/
+        softmax_tree_gpu(net.input, 1, l.batch, l.inputs, l.temperature, l.output_gpu, *l.softmax_tree);
+        /*
+        int i;
+        int count = 0;
+        for (i = 0; i < l.softmax_tree->groups; ++i) {
+        int group_size = l.softmax_tree->group_size[i];
+        softmax_gpu(net.input_gpu + count, group_size, l.batch, l.inputs, 1, 0, 1, l.temperature, l.output_gpu + count);
+        count += group_size;
+        }
+        */
     } else {
         if(l.spatial){
-			softmax_gpu_new_api(net.input, l.c, l.batch*l.c, l.inputs/l.c, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu);
+            softmax_gpu_new_api(net.input, l.c, l.batch*l.c, l.inputs/l.c, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu);
         }else{
-			softmax_gpu_new_api(net.input, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output_gpu);
+            softmax_gpu_new_api(net.input, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output_gpu);
         }
     }
     if(net.truth && !l.noloss){
         softmax_x_ent_gpu(l.batch*l.inputs, l.output_gpu, net.truth, l.delta_gpu, l.loss_gpu);
         if(l.softmax_tree){
-			mask_gpu_new_api(l.batch*l.inputs, l.delta_gpu, SECRET_NUM, net.truth, 0);
-			mask_gpu_new_api(l.batch*l.inputs, l.loss_gpu, SECRET_NUM, net.truth, 0);
+            mask_gpu_new_api(l.batch*l.inputs, l.delta_gpu, SECRET_NUM, net.truth, 0);
+            mask_gpu_new_api(l.batch*l.inputs, l.loss_gpu, SECRET_NUM, net.truth, 0);
         }
         cuda_pull_array(l.loss_gpu, l.loss, l.batch*l.inputs);
         l.cost[0] = sum_array(l.loss, l.batch*l.inputs);
@@ -119,7 +119,7 @@ void forward_softmax_layer_gpu(const softmax_layer l, network_state net)
 
 void backward_softmax_layer_gpu(const softmax_layer layer, network_state state)
 {
-	axpy_ongpu(layer.batch*layer.inputs, state.net.loss_scale, layer.delta_gpu, 1, state.delta, 1);
+    axpy_ongpu(layer.batch*layer.inputs, state.net.loss_scale, layer.delta_gpu, 1, state.delta, 1);
 }
 
 #endif
@@ -151,12 +151,10 @@ contrastive_layer make_contrastive_layer(int batch, int w, int h, int c, int cla
         l.truths = yolo_layer->truths;
         if (l.embedding_size != yolo_layer->embedding_size) {
             printf(" Error: [contrastive] embedding_size=%d isn't equal to [yolo] embedding_size=%d. They should use the same [convolutional] layer \n", l.embedding_size, yolo_layer->embedding_size);
-            getchar();
-            exit(0);
+            error("Error!", DARKNET_LOC);
         }
         if (l.inputs % (l.n*l.h*l.w) != 0) {
             printf(" Warning: filters= number in the previous (embedding) layer isn't divisable by number of anchors %d \n", l.n);
-            getchar();
         }
     }
     else {
@@ -200,7 +198,7 @@ contrastive_layer make_contrastive_layer(int batch, int w, int h, int c, int cla
     printf(" max_contr_size = %d MB \n", max_contr_size / (1024*1024));
     l.contrast_p_gpu = (contrastive_params *)cuda_make_array(NULL, max_contr_size);
 #endif
-    fprintf(stderr, "contrastive %4d x%4d x%4d x emb_size %4d x batch: %4d  classes = %4d, step = %4d \n", w, h, l.n, l.embedding_size, batch, l.classes, step);
+    fprintf(stderr, "contrastive %4d x%4d x%4d x emb_size %4d x batch: %4d  classes = %4d, step = %4zu \n", w, h, l.n, l.embedding_size, batch, l.classes, step);
     if(l.detection) fprintf(stderr, "detection \n");
     return l;
 }
@@ -350,7 +348,7 @@ void forward_contrastive_layer(contrastive_layer l, network_state state)
                                     }
 
                                     if (sim > 1.001 || sim < -1.001) {
-                                        printf(" sim = %f, ", sim); getchar();
+                                        printf(" sim = %f, ", sim);
                                     }
                                 }
                             }
@@ -406,7 +404,7 @@ void forward_contrastive_layer(contrastive_layer l, network_state state)
                 const float P = P_constrastive(b, b2, l.labels, l.batch, z, l.embedding_size, l.temperature, l.cos_sim);
                 l.p_constrastive[b*l.batch + b2] = P;
                 if (P > 1 || P < -1) {
-                    printf(" p = %f, ", P); getchar();
+                    printf(" p = %f, ", P);
                 }
             }
         }
@@ -421,7 +419,7 @@ void forward_contrastive_layer(contrastive_layer l, network_state state)
         const int max_contr_size = (l.max_boxes*l.batch)*(l.max_boxes*l.batch);
         if (max_contr_size < contr_size) {
             printf(" Error: too large number of bboxes: contr_size = %d > max_contr_size  = %d \n", contr_size, max_contr_size);
-            exit(0);
+            error("Error!", DARKNET_LOC);
         }
         int *labels = NULL;
         if (contr_size > 2) {
@@ -480,11 +478,8 @@ void forward_contrastive_layer(contrastive_layer l, network_state state)
                                                 break;
                                             }
 
-                                        //if (q == contr_size) getchar();
-
-
                                         //if (P > 1 || P < -1) {
-                                        //    printf(" p = %f, z_index = %d, z_index2 = %d ", P, z_index, z_index2); getchar();
+                                        //    printf(" p = %f, z_index = %d, z_index2 = %d ", P, z_index, z_index2);
                                         //}
                                     }
                                 }
diff --git a/src/Detector/darknet/src/tag.c b/src/Detector/darknet/src/tag.c
index d7e1349a2..c1e031b2c 100644
--- a/src/Detector/darknet/src/tag.c
+++ b/src/Detector/darknet/src/tag.c
@@ -63,7 +63,7 @@ void train_tag(char *cfgfile, char *weightfile, int clear)
         float loss = train_network(net, train);
         if(avg_loss == -1) avg_loss = loss;
         avg_loss = avg_loss*.9 + loss*.1;
-        printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
+        printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %" PRIu64 " images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
         free_data(train);
         if(*net.seen/N > epoch){
             epoch = *net.seen/N;
diff --git a/src/Detector/darknet/src/tree.h b/src/Detector/darknet/src/tree.h
index 863797387..c5a09aa97 100644
--- a/src/Detector/darknet/src/tree.h
+++ b/src/Detector/darknet/src/tree.h
@@ -6,7 +6,7 @@
 //    int *leaf;
 //    int n;
 //    int *parent;
-//	int *child;
+//    int *child;
 //    int *group;
 //    char **name;
 //
diff --git a/src/Detector/darknet/src/utils.c b/src/Detector/darknet/src/utils.c
index 0c271b7bf..2ad33d5e5 100644
--- a/src/Detector/darknet/src/utils.c
+++ b/src/Detector/darknet/src/utils.c
@@ -18,6 +18,7 @@
 #else
 #include <sys/time.h>
 #include <sys/stat.h>
+#include <execinfo.h>
 #endif
 
 
@@ -239,12 +240,23 @@ void trim(char *str)
     free(buffer);
 }
 
+char *strlaststr(char *haystack, char *needle)
+{
+    char *p = strstr(haystack, needle), *r = NULL;
+    while (p != NULL)
+    {
+        r = p;
+        p = strstr(p + 1, needle);
+    }
+    return r;
+}
+
 void find_replace_extension(char *str, char *orig, char *rep, char *output)
 {
     char* buffer = (char*)calloc(8192, sizeof(char));
 
     sprintf(buffer, "%s", str);
-    char *p = strstr(buffer, orig);
+    char *p = strlaststr(buffer, orig);
     int offset = (p - buffer);
     int chars_from_end = strlen(buffer) - offset;
     if (!p || chars_from_end != strlen(orig)) {  // Is 'orig' even in 'str' AND is 'orig' found at the end of 'str'?
@@ -325,10 +337,30 @@ void top_k(float *a, int n, int k, int *index)
     }
 }
 
+
+void log_backtrace()
+{
+#ifndef WIN32
+    void * buffer[50];
+    int count = backtrace(buffer, sizeof(buffer));
+    char **symbols = backtrace_symbols(buffer, count);
+
+    fprintf(stderr, "backtrace (%d entries)\n", count);
+
+    for (int idx = 0; idx < count; idx ++)
+    {
+        fprintf(stderr, "%d/%d: %s\n", idx + 1, count, symbols[idx]);
+    }
+
+    free(symbols);
+#endif
+}
+
 void error(const char * const msg, const char * const filename, const char * const funcname, const int line)
 {
-    fprintf(stderr, "Darknet error location: %s, %s, line #%d\n", filename, funcname, line);
+    fprintf(stderr, "Darknet error location: %s, %s(), line #%d\n", filename, funcname, line);
     perror(msg);
+    log_backtrace();
     exit(EXIT_FAILURE);
 }
 
diff --git a/src/Detector/darknet/src/utils.h b/src/Detector/darknet/src/utils.h
index f84054b8b..6a4ea8a35 100644
--- a/src/Detector/darknet/src/utils.h
+++ b/src/Detector/darknet/src/utils.h
@@ -8,7 +8,7 @@
 #include <stdbool.h>
 
 #ifndef M_PI
-#define M_PI       3.14159265358979323846   // pi
+#define M_PI       3.14159265358979323846
 #endif
 
 #ifdef __cplusplus
diff --git a/src/Detector/darknet/src/writing.c b/src/Detector/darknet/src/writing.c
index 29785b7b7..1fed538fa 100644
--- a/src/Detector/darknet/src/writing.c
+++ b/src/Detector/darknet/src/writing.c
@@ -65,7 +65,7 @@ void train_writing(char *cfgfile, char *weightfile)
 
         if(avg_loss == -1) avg_loss = loss;
         avg_loss = avg_loss*.9 + loss*.1;
-        printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
+        printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %" PRIu64 " images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
         free_data(train);
         if(get_current_batch(net)%100 == 0){
             char buff[256];
diff --git a/src/Detector/darknet/src/yolo.c b/src/Detector/darknet/src/yolo.c
index 92b5f1e6e..ef68acabc 100644
--- a/src/Detector/darknet/src/yolo.c
+++ b/src/Detector/darknet/src/yolo.c
@@ -198,7 +198,7 @@ void validate_yolo(char *cfgfile, char *weightfile)
 
     fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start));
     if (fps) {
-        for (j = 0; j < classes; ++j) {
+        for(j = 0; j < classes; ++j){
             fclose(fps[j]);
         }
         free(fps);
@@ -251,7 +251,7 @@ void validate_yolo_recall(char *cfgfile, char *weightfile)
         if (nms) do_nms(boxes, probs, side*side*l.n, 1, nms);
 
         char labelpath[4096];
-		replace_image_to_label(path, labelpath);
+        replace_image_to_label(path, labelpath);
 
         int num_labels = 0;
         box_label *truth = read_boxes(labelpath, &num_labels);
@@ -278,6 +278,7 @@ void validate_yolo_recall(char *cfgfile, char *weightfile)
 
         fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals/(i+1), avg_iou*100/total, 100.*correct/total);
         free(id);
+        free(truth);
         free_image(orig);
         free_image(sized);
     }
@@ -327,11 +328,11 @@ void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh)
 
         free_image(im);
         free_image(sized);
-
+        free_alphabet(alphabet);
         wait_until_press_key_cv();
         destroy_all_windows_cv();
 
-        if (filename) break;
+      if (filename) break;
     }
     free(boxes);
     for(j = 0; j < l.side*l.side*l.n; ++j) {
@@ -342,16 +343,17 @@ void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh)
 
 void run_yolo(int argc, char **argv)
 {
-	int dont_show = find_arg(argc, argv, "-dont_show");
-	int mjpeg_port = find_int_arg(argc, argv, "-mjpeg_port", -1);
+    int dont_show = find_arg(argc, argv, "-dont_show");
+    int mjpeg_port = find_int_arg(argc, argv, "-mjpeg_port", -1);
     int json_port = find_int_arg(argc, argv, "-json_port", -1);
-	char *out_filename = find_char_arg(argc, argv, "-out_filename", 0);
+    char *out_filename = find_char_arg(argc, argv, "-out_filename", 0);
     char *prefix = find_char_arg(argc, argv, "-prefix", 0);
     float thresh = find_float_arg(argc, argv, "-thresh", .2);
-	float hier_thresh = find_float_arg(argc, argv, "-hier", .5);
+    float hier_thresh = find_float_arg(argc, argv, "-hier", .5);
     int cam_index = find_int_arg(argc, argv, "-c", 0);
     int frame_skip = find_int_arg(argc, argv, "-s", 0);
-	int ext_output = find_arg(argc, argv, "-ext_output");
+    int ext_output = find_arg(argc, argv, "-ext_output");
+    char *json_file_output = find_char_arg(argc, argv, "-json_file_output", 0);
     if(argc < 4){
         fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
         return;
@@ -365,5 +367,5 @@ void run_yolo(int argc, char **argv)
     else if(0==strcmp(argv[2], "valid")) validate_yolo(cfg, weights);
     else if(0==strcmp(argv[2], "recall")) validate_yolo_recall(cfg, weights);
     else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, hier_thresh, cam_index, filename, voc_names, 20, 1, frame_skip,
-		prefix, out_filename, mjpeg_port, 0, json_port, dont_show, ext_output, 0, 0, 0, 0, 0);
+        prefix, out_filename, mjpeg_port, 0, json_port, dont_show, ext_output, 0, 0, 0, 0, 0, json_file_output);
 }
diff --git a/src/Detector/darknet/src/yolo_console_dll.cpp b/src/Detector/darknet/src/yolo_console_dll.cpp
index 27dca6661..a87574c51 100644
--- a/src/Detector/darknet/src/yolo_console_dll.cpp
+++ b/src/Detector/darknet/src/yolo_console_dll.cpp
@@ -354,9 +354,7 @@ int main(int argc, char *argv[])
                 if (filename == "zed_camera" || file_ext == "svo") {
                     std::cout << "ZED 3D Camera " << zed.open(init_params) << std::endl;
                     if (!zed.isOpened()) {
-                        std::cout << " Error: ZED Camera should be connected to USB 3.0. And ZED_SDK should be installed. \n";
-                        getchar();
-                        return 0;
+                        error("Error: ZED Camera should be connected to USB 3.0. And ZED_SDK should be installed", DARKNET_LOC);
                     }
                     cur_frame = zed_capture_rgb(zed);
                     use_zed_camera = true;
@@ -696,8 +694,12 @@ int main(int argc, char *argv[])
             show_console_result(result_vec, obj_names);
 #endif  // OPENCV
         }
-        catch (std::exception &e) { std::cerr << "exception: " << e.what() << "\n"; getchar(); }
-        catch (...) { std::cerr << "unknown exception \n"; getchar(); }
+        catch (std::exception &e) {
+            std::cerr << "exception: " << e.what() << "\n";
+        }
+        catch (...) {
+            std::cerr << "unknown exception \n";
+        }
         filename.clear();
     }
 
diff --git a/src/Detector/darknet/src/yolo_layer.c b/src/Detector/darknet/src/yolo_layer.c
index 0eae2fc8f..de9d09904 100644
--- a/src/Detector/darknet/src/yolo_layer.c
+++ b/src/Detector/darknet/src/yolo_layer.c
@@ -11,8 +11,6 @@
 #include <string.h>
 #include <stdlib.h>
 
-extern int check_mistakes;
-
 layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes, int max_boxes)
 {
     int i;
@@ -31,6 +29,7 @@ layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int
     l.classes = classes;
     l.cost = (float*)xcalloc(1, sizeof(float));
     l.biases = (float*)xcalloc(total * 2, sizeof(float));
+    l.nbiases = total * 2;
     if(mask) l.mask = mask;
     else{
         l.mask = (int*)xcalloc(n, sizeof(int));
@@ -433,7 +432,6 @@ void *process_batch(void* ptr)
                         if (class_id >= l.classes || class_id < 0) {
                             printf("\n Warning: in txt-labels class_id=%d >= classes=%d in cfg-file. In txt-labels class_id should be [from 0 to %d] \n", class_id, l.classes, l.classes - 1);
                             printf("\n truth.x = %f, truth.y = %f, truth.w = %f, truth.h = %f, class_id = %d \n", truth.x, truth.y, truth.w, truth.h, class_id);
-                            if (check_mistakes) getchar();
                             continue; // if label contains class_id more than number of classes in the cfg-file and class_id check garbage value
                         }
 
diff --git a/src/Detector/darknet/src/yolo_v2_class.cpp b/src/Detector/darknet/src/yolo_v2_class.cpp
index df3f7acf7..6c0681909 100644
--- a/src/Detector/darknet/src/yolo_v2_class.cpp
+++ b/src/Detector/darknet/src/yolo_v2_class.cpp
@@ -13,7 +13,7 @@ extern "C" {
 #include "image.h"
 #include "demo.h"
 #include "option_list.h"
-#include "stb_image.h"
+#include <stb_image.h>
 }
 //#include <sys/time.h>
 
@@ -69,7 +69,7 @@ int get_device_count() {
     return count;
 #else
     return -1;
-#endif	// GPU
+#endif    // GPU
 }
 
 bool built_with_cuda(){
@@ -106,7 +106,7 @@ int get_device_name(int gpu, char* deviceName) {
     return 1;
 #else
     return -1;
-#endif	// GPU
+#endif    // GPU
 }
 
 #ifdef GPU
diff --git a/src/Detector/tensorrt_yolo/cmake/FindCUDNN.cmake b/src/Detector/tensorrt_yolo/cmake/FindCUDNN.cmake
index 37388d30b..7a692b055 100644
--- a/src/Detector/tensorrt_yolo/cmake/FindCUDNN.cmake
+++ b/src/Detector/tensorrt_yolo/cmake/FindCUDNN.cmake
@@ -1,180 +1,104 @@
-# Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
-# file Copyright.txt or https://cmake.org/licensing for details.
+# Distributed under the OSI-approved BSD 3-Clause License.
+# Copyright Stefano Sinigardi
+
 #.rst:
 # FindCUDNN
-# -------
-#
-# Find CUDNN library
-#
-# Valiables that affect result:
-# <VERSION>, <REQUIRED>, <QUIETLY>: as usual
-#
-# <EXACT> : as usual, plus we do find '5.1' version if you wanted '5' 
-#           (not if you wanted '5.0', as usual)   
+# --------
 #
-# Result variables
+# Result Variables
 # ^^^^^^^^^^^^^^^^
 #
-# This module will set the following variables in your project:
+# This module will set the following variables in your project::
 #
-# ``CUDNN_INCLUDE``
-#   where to find cudnn.h.
-# ``CUDNN_LIBRARY``
-#   the libraries to link against to use CUDNN.
-# ``CUDNN_FOUND``
-#   If false, do not try to use CUDNN.
-# ``CUDNN_VERSION``
-#   Version of the CUDNN library we looked for 
+#  ``CUDNN_FOUND``
+#    True if CUDNN found on the local system
 #
-# Exported functions
-# ^^^^^^^^^^^^^^^^
-# function(CUDNN_INSTALL version __dest_libdir [__dest_incdir])
-#  This function will try to download and install CUDNN.
-#  CUDNN5 and CUDNN6 are supported.
+#  ``CUDNN_INCLUDE_DIRS``
+#    Location of CUDNN header files.
 #
+#  ``CUDNN_LIBRARIES``
+#    The CUDNN libraries.
+#
+#  ``CuDNN::CuDNN``
+#    The CUDNN target
 #
 
-function(CUDNN_INSTALL version dest_libdir dest_incdir dest_bindir)
-  message(STATUS "CUDNN_INSTALL: Installing CUDNN ${version}, lib:${dest_libdir}, inc:${dest_incdir}, bin:${dest_bindir}")
-  string(REGEX REPLACE "-rc$" "" version_base "${version}")
-  set(tar_libdir cuda/lib64)
-  set(tar_incdir cuda/include)
-
-  if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
-    set(url_extension tgz)
-    if("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64")
-      set(url_arch_name linux-x64 )
-    elseif("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "ppc")
-      set(url_arch_name linux-ppc64le ) 
-      #  TX1 has to be installed via JetPack
-    endif()
-  elseif  (APPLE)
-    set(url_extension tgz)
-    set(tar_libdir cuda/lib)
-    set(url_arch_name osx-x64)
-  elseif(WIN32)
-    set(url_extension zip)
-    set(tar_bindir cuda/bin)
-    set(tar_libdir cuda/lib/x64)
-    if(CMAKE_SYSTEM_VERSION MATCHES "10")
-      set(url_arch_name windows10-x64)
-    else()
-      set(url_arch_name windows7-x64)
-    endif()
-  endif()
-  
-  # Download and install CUDNN locally if not found on the system
-  if(url_arch_name) 
-    set(download_dir ${CMAKE_CURRENT_BINARY_DIR}/downloads/cudnn${version})
-    file(MAKE_DIRECTORY ${download_dir})
-    set(cudnn_filename cudnn-${CUDA_VERSION}-${url_arch_name}-v${version}.${url_extension})
-    set(base_url http://developer.download.nvidia.com/compute/redist/cudnn)
-    set(cudnn_url ${base_url}/v${version_base}/${cudnn_filename})
-    set(cudnn_file ${download_dir}/${cudnn_filename})
-    
-    if(NOT EXISTS ${cudnn_file})
-      message(STATUS "Downloading CUDNN library from NVIDIA...")
-      file(DOWNLOAD ${cudnn_url} ${cudnn_file}
-	SHOW_PROGRESS STATUS cudnn_status
-	)
-    execute_process(COMMAND ${CMAKE_COMMAND} -E tar xzvf ${cudnn_file} WORKING_DIRECTORY ${download_dir} RESULT_VARIABLE cudnn_status)
-
-      if(NOT "${cudnn_status}" MATCHES "0")
-	message(STATUS "Was not able to download CUDNN from ${cudnn_url}. Please install CuDNN manually from https://developer.nvidia.com/cuDNN")
-      endif()
-    endif()
-    
-    if(dest_bindir AND tar_bindir)
-      file(COPY ${download_dir}/${tar_bindir}/ DESTINATION ${dest_bindir})
-    endif()
-
-    if(dest_incdir)
-      file(COPY ${download_dir}/${tar_incdir}/ DESTINATION  ${dest_incdir})
-    endif()
-
-    file(COPY ${download_dir}/${tar_libdir}/ DESTINATION  ${dest_libdir} )
-
-    get_filename_component(dest_dir ${dest_libdir} DIRECTORY)
-
-    set(CUDNN_ROOT_DIR ${dest_dir} PARENT_SCOPE)
-    unset(CUDNN_LIBRARY CACHE)
-    unset(CUDNN_INCLUDE_DIR CACHE)
-
-  endif(url_arch_name)
-endfunction()
-
-#####################################################
-
-find_package(PkgConfig)
-pkg_check_modules(PC_CUDNN QUIET CUDNN)
-
-get_filename_component(__libpath_cudart "${CUDA_CUDART_LIBRARY}" PATH)
-
-# We use major only in library search as major/minor is not entirely consistent among platforms.
-# Also, looking for exact minor version of .so is in general not a good idea.
-# More strict enforcement of minor/patch version is done if/when the header file is examined.
-if(CUDNN_FIND_VERSION_EXACT)
-  SET(__cudnn_ver_suffix ".${CUDNN_FIND_VERSION_MAJOR}")
-  SET(__cudnn_lib_win_name cudnn64_${CUDNN_FIND_VERSION_MAJOR})
-else()
-  SET(__cudnn_lib_win_name cudnn64)
+include(FindPackageHandleStandardArgs)
+
+find_path(CUDNN_INCLUDE_DIR NAMES cudnn.h cudnn_v8.h cudnn_v7.h
+  HINTS $ENV{CUDA_PATH} $ENV{CUDA_TOOLKIT_ROOT_DIR} $ENV{CUDA_HOME} $ENV{CUDNN_ROOT_DIR} /usr/include
+  PATH_SUFFIXES cuda/include include)
+find_library(CUDNN_LIBRARY NAMES cudnn cudnn8 cudnn7
+  HINTS $ENV{CUDA_PATH} $ENV{CUDA_TOOLKIT_ROOT_DIR} $ENV{CUDA_HOME} $ENV{CUDNN_ROOT_DIR} /usr/lib/x86_64-linux-gnu/
+  PATH_SUFFIXES lib lib64 cuda/lib cuda/lib64 lib/x64 cuda/lib/x64)
+if(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn.h")
+  file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_HEADER_CONTENTS)
+elseif(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn_v8.h")
+  file(READ ${CUDNN_INCLUDE_DIR}/cudnn_v8.h CUDNN_HEADER_CONTENTS)
+elseif(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn_v7.h")
+  file(READ ${CUDNN_INCLUDE_DIR}/cudnn_v7.h CUDNN_HEADER_CONTENTS)
 endif()
-
-find_library(CUDNN_LIBRARY 
-  NAMES libcudnn.so${__cudnn_ver_suffix} libcudnn${__cudnn_ver_suffix}.dylib ${__cudnn_lib_win_name}
-  PATHS $ENV{LD_LIBRARY_PATH} ${__libpath_cudart} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS} ${CMAKE_INSTALL_PREFIX}
-  PATH_SUFFIXES lib lib64 bin
-  DOC "CUDNN library." )
-
-if(CUDNN_LIBRARY)
-  SET(CUDNN_MAJOR_VERSION ${CUDNN_FIND_VERSION_MAJOR})
-  set(CUDNN_VERSION ${CUDNN_MAJOR_VERSION})
-  get_filename_component(__found_cudnn_root ${CUDNN_LIBRARY} PATH)
-  find_path(CUDNN_INCLUDE_DIR 
-    NAMES cudnn.h
-    HINTS ${PC_CUDNN_INCLUDE_DIRS} ${CUDNN_ROOT_DIR} ${CUDA_TOOLKIT_INCLUDE} ${__found_cudnn_root}
-    PATH_SUFFIXES include 
-    DOC "Path to CUDNN include directory." )
+if(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn_version.h")
+  file(READ "${CUDNN_INCLUDE_DIR}/cudnn_version.h" CUDNN_VERSION_H_CONTENTS)
+  string(APPEND CUDNN_HEADER_CONTENTS "${CUDNN_VERSION_H_CONTENTS}")
+  unset(CUDNN_VERSION_H_CONTENTS)
+elseif(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn_version_v8.h")
+  file(READ "${CUDNN_INCLUDE_DIR}/cudnn_version_v8.h" CUDNN_VERSION_H_CONTENTS)
+  string(APPEND CUDNN_HEADER_CONTENTS "${CUDNN_VERSION_H_CONTENTS}")
+  unset(CUDNN_VERSION_H_CONTENTS)
+elseif(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn_version_v7.h")
+  file(READ "${CUDNN_INCLUDE_DIR}/cudnn_version_v7.h" CUDNN_VERSION_H_CONTENTS)
+  string(APPEND CUDNN_HEADER_CONTENTS "${CUDNN_VERSION_H_CONTENTS}")
+  unset(CUDNN_VERSION_H_CONTENTS)
 endif()
-
-if(CUDNN_LIBRARY AND CUDNN_INCLUDE_DIR)
-  file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_VERSION_FILE_CONTENTS)
+if(CUDNN_HEADER_CONTENTS)
   string(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)"
-    CUDNN_MAJOR_VERSION "${CUDNN_VERSION_FILE_CONTENTS}")
+               _CUDNN_VERSION_MAJOR "${CUDNN_HEADER_CONTENTS}")
   string(REGEX REPLACE "define CUDNN_MAJOR * +([0-9]+)" "\\1"
-    CUDNN_MAJOR_VERSION "${CUDNN_MAJOR_VERSION}")
+               _CUDNN_VERSION_MAJOR "${_CUDNN_VERSION_MAJOR}")
   string(REGEX MATCH "define CUDNN_MINOR * +([0-9]+)"
-    CUDNN_MINOR_VERSION "${CUDNN_VERSION_FILE_CONTENTS}")
+               _CUDNN_VERSION_MINOR "${CUDNN_HEADER_CONTENTS}")
   string(REGEX REPLACE "define CUDNN_MINOR * +([0-9]+)" "\\1"
-    CUDNN_MINOR_VERSION "${CUDNN_MINOR_VERSION}")
+               _CUDNN_VERSION_MINOR "${_CUDNN_VERSION_MINOR}")
   string(REGEX MATCH "define CUDNN_PATCHLEVEL * +([0-9]+)"
-    CUDNN_PATCH_VERSION "${CUDNN_VERSION_FILE_CONTENTS}")
+               _CUDNN_VERSION_PATCH "${CUDNN_HEADER_CONTENTS}")
   string(REGEX REPLACE "define CUDNN_PATCHLEVEL * +([0-9]+)" "\\1"
-    CUDNN_PATCH_VERSION "${CUDNN_PATCH_VERSION}")  
-  set(CUDNN_VERSION ${CUDNN_MAJOR_VERSION}.${CUDNN_MINOR_VERSION})
-endif()
-
-if(CUDNN_MAJOR_VERSION)
-  ## Fixing the case where 5.1 does not fit 'exact' 5.
-  if(CUDNN_FIND_VERSION_EXACT AND NOT CUDNN_FIND_VERSION_MINOR)
-    if("${CUDNN_MAJOR_VERSION}" STREQUAL "${CUDNN_FIND_VERSION_MAJOR}")
-      set(CUDNN_VERSION ${CUDNN_FIND_VERSION})
-    endif()
+               _CUDNN_VERSION_PATCH "${_CUDNN_VERSION_PATCH}")
+  if(NOT _CUDNN_VERSION_MAJOR)
+    set(CUDNN_VERSION "?")
+  else()
+    set(CUDNN_VERSION "${_CUDNN_VERSION_MAJOR}.${_CUDNN_VERSION_MINOR}.${_CUDNN_VERSION_PATCH}")
   endif()
-else()
-  # Try to set CUDNN version from config file
-  set(CUDNN_VERSION ${PC_CUDNN_CFLAGS_OTHER})
 endif()
 
-find_package_handle_standard_args(
-  CUDNN 
-  FOUND_VAR CUDNN_FOUND
-  REQUIRED_VARS CUDNN_LIBRARY 
-  VERSION_VAR   CUDNN_VERSION
-  )
+set(CUDNN_INCLUDE_DIRS ${CUDNN_INCLUDE_DIR})
+set(CUDNN_LIBRARIES ${CUDNN_LIBRARY})
+mark_as_advanced(CUDNN_LIBRARY CUDNN_INCLUDE_DIR)
 
-if(CUDNN_FOUND)
-  set(CUDNN_LIBRARIES ${CUDNN_LIBRARY})
-  set(CUDNN_INCLUDE_DIRS ${CUDNN_INCLUDE_DIR})
-  set(CUDNN_DEFINITIONS ${PC_CUDNN_CFLAGS_OTHER})
-endif()  
+find_package_handle_standard_args(CUDNN
+      REQUIRED_VARS  CUDNN_INCLUDE_DIR CUDNN_LIBRARY
+      VERSION_VAR    CUDNN_VERSION
+)
+
+if(WIN32)
+  set(CUDNN_DLL_DIR ${CUDNN_INCLUDE_DIR})
+  list(TRANSFORM CUDNN_DLL_DIR APPEND "/../bin")
+  find_file(CUDNN_LIBRARY_DLL NAMES cudnn64_${CUDNN_VERSION_MAJOR}.dll PATHS ${CUDNN_DLL_DIR})
+endif()
+
+if( CUDNN_FOUND AND NOT TARGET CuDNN::CuDNN )
+  if( EXISTS "${CUDNN_LIBRARY_DLL}" )
+    add_library( CuDNN::CuDNN      SHARED IMPORTED )
+    set_target_properties( CuDNN::CuDNN PROPERTIES
+      IMPORTED_LOCATION                 "${CUDNN_LIBRARY_DLL}"
+      IMPORTED_IMPLIB                   "${CUDNN_LIBRARY}"
+      INTERFACE_INCLUDE_DIRECTORIES     "${CUDNN_INCLUDE_DIR}"
+      IMPORTED_LINK_INTERFACE_LANGUAGES "C" )
+  else()
+    add_library( CuDNN::CuDNN      UNKNOWN IMPORTED )
+    set_target_properties( CuDNN::CuDNN PROPERTIES
+      IMPORTED_LOCATION                 "${CUDNN_LIBRARY}"
+      INTERFACE_INCLUDE_DIRECTORIES     "${CUDNN_INCLUDE_DIR}"
+      IMPORTED_LINK_INTERFACE_LANGUAGES "C" )
+  endif()
+endif()
diff --git a/src/Detector/tensorrt_yolo/cmake/FindCUDNN.cmake_ b/src/Detector/tensorrt_yolo/cmake/FindCUDNN.cmake_
new file mode 100644
index 000000000..37388d30b
--- /dev/null
+++ b/src/Detector/tensorrt_yolo/cmake/FindCUDNN.cmake_
@@ -0,0 +1,180 @@
+# Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
+# file Copyright.txt or https://cmake.org/licensing for details.
+#.rst:
+# FindCUDNN
+# -------
+#
+# Find CUDNN library
+#
+# Valiables that affect result:
+# <VERSION>, <REQUIRED>, <QUIETLY>: as usual
+#
+# <EXACT> : as usual, plus we do find '5.1' version if you wanted '5' 
+#           (not if you wanted '5.0', as usual)   
+#
+# Result variables
+# ^^^^^^^^^^^^^^^^
+#
+# This module will set the following variables in your project:
+#
+# ``CUDNN_INCLUDE``
+#   where to find cudnn.h.
+# ``CUDNN_LIBRARY``
+#   the libraries to link against to use CUDNN.
+# ``CUDNN_FOUND``
+#   If false, do not try to use CUDNN.
+# ``CUDNN_VERSION``
+#   Version of the CUDNN library we looked for 
+#
+# Exported functions
+# ^^^^^^^^^^^^^^^^
+# function(CUDNN_INSTALL version __dest_libdir [__dest_incdir])
+#  This function will try to download and install CUDNN.
+#  CUDNN5 and CUDNN6 are supported.
+#
+#
+
+function(CUDNN_INSTALL version dest_libdir dest_incdir dest_bindir)
+  message(STATUS "CUDNN_INSTALL: Installing CUDNN ${version}, lib:${dest_libdir}, inc:${dest_incdir}, bin:${dest_bindir}")
+  string(REGEX REPLACE "-rc$" "" version_base "${version}")
+  set(tar_libdir cuda/lib64)
+  set(tar_incdir cuda/include)
+
+  if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
+    set(url_extension tgz)
+    if("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64")
+      set(url_arch_name linux-x64 )
+    elseif("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "ppc")
+      set(url_arch_name linux-ppc64le ) 
+      #  TX1 has to be installed via JetPack
+    endif()
+  elseif  (APPLE)
+    set(url_extension tgz)
+    set(tar_libdir cuda/lib)
+    set(url_arch_name osx-x64)
+  elseif(WIN32)
+    set(url_extension zip)
+    set(tar_bindir cuda/bin)
+    set(tar_libdir cuda/lib/x64)
+    if(CMAKE_SYSTEM_VERSION MATCHES "10")
+      set(url_arch_name windows10-x64)
+    else()
+      set(url_arch_name windows7-x64)
+    endif()
+  endif()
+  
+  # Download and install CUDNN locally if not found on the system
+  if(url_arch_name) 
+    set(download_dir ${CMAKE_CURRENT_BINARY_DIR}/downloads/cudnn${version})
+    file(MAKE_DIRECTORY ${download_dir})
+    set(cudnn_filename cudnn-${CUDA_VERSION}-${url_arch_name}-v${version}.${url_extension})
+    set(base_url http://developer.download.nvidia.com/compute/redist/cudnn)
+    set(cudnn_url ${base_url}/v${version_base}/${cudnn_filename})
+    set(cudnn_file ${download_dir}/${cudnn_filename})
+    
+    if(NOT EXISTS ${cudnn_file})
+      message(STATUS "Downloading CUDNN library from NVIDIA...")
+      file(DOWNLOAD ${cudnn_url} ${cudnn_file}
+	SHOW_PROGRESS STATUS cudnn_status
+	)
+    execute_process(COMMAND ${CMAKE_COMMAND} -E tar xzvf ${cudnn_file} WORKING_DIRECTORY ${download_dir} RESULT_VARIABLE cudnn_status)
+
+      if(NOT "${cudnn_status}" MATCHES "0")
+	message(STATUS "Was not able to download CUDNN from ${cudnn_url}. Please install CuDNN manually from https://developer.nvidia.com/cuDNN")
+      endif()
+    endif()
+    
+    if(dest_bindir AND tar_bindir)
+      file(COPY ${download_dir}/${tar_bindir}/ DESTINATION ${dest_bindir})
+    endif()
+
+    if(dest_incdir)
+      file(COPY ${download_dir}/${tar_incdir}/ DESTINATION  ${dest_incdir})
+    endif()
+
+    file(COPY ${download_dir}/${tar_libdir}/ DESTINATION  ${dest_libdir} )
+
+    get_filename_component(dest_dir ${dest_libdir} DIRECTORY)
+
+    set(CUDNN_ROOT_DIR ${dest_dir} PARENT_SCOPE)
+    unset(CUDNN_LIBRARY CACHE)
+    unset(CUDNN_INCLUDE_DIR CACHE)
+
+  endif(url_arch_name)
+endfunction()
+
+#####################################################
+
+find_package(PkgConfig)
+pkg_check_modules(PC_CUDNN QUIET CUDNN)
+
+get_filename_component(__libpath_cudart "${CUDA_CUDART_LIBRARY}" PATH)
+
+# We use major only in library search as major/minor is not entirely consistent among platforms.
+# Also, looking for exact minor version of .so is in general not a good idea.
+# More strict enforcement of minor/patch version is done if/when the header file is examined.
+if(CUDNN_FIND_VERSION_EXACT)
+  SET(__cudnn_ver_suffix ".${CUDNN_FIND_VERSION_MAJOR}")
+  SET(__cudnn_lib_win_name cudnn64_${CUDNN_FIND_VERSION_MAJOR})
+else()
+  SET(__cudnn_lib_win_name cudnn64)
+endif()
+
+find_library(CUDNN_LIBRARY 
+  NAMES libcudnn.so${__cudnn_ver_suffix} libcudnn${__cudnn_ver_suffix}.dylib ${__cudnn_lib_win_name}
+  PATHS $ENV{LD_LIBRARY_PATH} ${__libpath_cudart} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS} ${CMAKE_INSTALL_PREFIX}
+  PATH_SUFFIXES lib lib64 bin
+  DOC "CUDNN library." )
+
+if(CUDNN_LIBRARY)
+  SET(CUDNN_MAJOR_VERSION ${CUDNN_FIND_VERSION_MAJOR})
+  set(CUDNN_VERSION ${CUDNN_MAJOR_VERSION})
+  get_filename_component(__found_cudnn_root ${CUDNN_LIBRARY} PATH)
+  find_path(CUDNN_INCLUDE_DIR 
+    NAMES cudnn.h
+    HINTS ${PC_CUDNN_INCLUDE_DIRS} ${CUDNN_ROOT_DIR} ${CUDA_TOOLKIT_INCLUDE} ${__found_cudnn_root}
+    PATH_SUFFIXES include 
+    DOC "Path to CUDNN include directory." )
+endif()
+
+if(CUDNN_LIBRARY AND CUDNN_INCLUDE_DIR)
+  file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_VERSION_FILE_CONTENTS)
+  string(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)"
+    CUDNN_MAJOR_VERSION "${CUDNN_VERSION_FILE_CONTENTS}")
+  string(REGEX REPLACE "define CUDNN_MAJOR * +([0-9]+)" "\\1"
+    CUDNN_MAJOR_VERSION "${CUDNN_MAJOR_VERSION}")
+  string(REGEX MATCH "define CUDNN_MINOR * +([0-9]+)"
+    CUDNN_MINOR_VERSION "${CUDNN_VERSION_FILE_CONTENTS}")
+  string(REGEX REPLACE "define CUDNN_MINOR * +([0-9]+)" "\\1"
+    CUDNN_MINOR_VERSION "${CUDNN_MINOR_VERSION}")
+  string(REGEX MATCH "define CUDNN_PATCHLEVEL * +([0-9]+)"
+    CUDNN_PATCH_VERSION "${CUDNN_VERSION_FILE_CONTENTS}")
+  string(REGEX REPLACE "define CUDNN_PATCHLEVEL * +([0-9]+)" "\\1"
+    CUDNN_PATCH_VERSION "${CUDNN_PATCH_VERSION}")  
+  set(CUDNN_VERSION ${CUDNN_MAJOR_VERSION}.${CUDNN_MINOR_VERSION})
+endif()
+
+if(CUDNN_MAJOR_VERSION)
+  ## Fixing the case where 5.1 does not fit 'exact' 5.
+  if(CUDNN_FIND_VERSION_EXACT AND NOT CUDNN_FIND_VERSION_MINOR)
+    if("${CUDNN_MAJOR_VERSION}" STREQUAL "${CUDNN_FIND_VERSION_MAJOR}")
+      set(CUDNN_VERSION ${CUDNN_FIND_VERSION})
+    endif()
+  endif()
+else()
+  # Try to set CUDNN version from config file
+  set(CUDNN_VERSION ${PC_CUDNN_CFLAGS_OTHER})
+endif()
+
+find_package_handle_standard_args(
+  CUDNN 
+  FOUND_VAR CUDNN_FOUND
+  REQUIRED_VARS CUDNN_LIBRARY 
+  VERSION_VAR   CUDNN_VERSION
+  )
+
+if(CUDNN_FOUND)
+  set(CUDNN_LIBRARIES ${CUDNN_LIBRARY})
+  set(CUDNN_INCLUDE_DIRS ${CUDNN_INCLUDE_DIR})
+  set(CUDNN_DEFINITIONS ${PC_CUDNN_CFLAGS_OTHER})
+endif()