From 41c216a1ad7b6052b66d5fdd1c24ec7818901232 Mon Sep 17 00:00:00 2001
From: Matevz Morato <matevz.morato@gmail.com>
Date: Sun, 29 Dec 2024 12:42:04 +0100
Subject: [PATCH 01/11] Add initial on host implementations

---
 CMakeLists.txt                                |   3 +-
 .../src/pipeline/node/BenchmarkBindings.cpp   |   6 +-
 examples/python/HostNodes/benchmark.py        |  34 +++++
 .../pipeline/datatype/BenchmarkReport.hpp     |  14 +-
 include/depthai/pipeline/node/BenchmarkIn.hpp |  37 ++++-
 .../depthai/pipeline/node/BenchmarkOut.hpp    |  20 ++-
 .../properties/BenchmarkInProperties.hpp      |  32 +++++
 ...tiesOut.hpp => BenchmarkOutProperties.hpp} |   6 +-
 .../properties/BenchmarkProperties.hpp        |  22 ---
 .../properties/BenchmarkPropertiesIn.hpp      |  22 ---
 src/pipeline/node/Benchmark.cpp               |  24 ----
 src/pipeline/node/BenchmarkIn.cpp             | 133 ++++++++++++++++++
 src/pipeline/node/BenchmarkOut.cpp            |  66 +++++++++
 13 files changed, 328 insertions(+), 91 deletions(-)
 create mode 100644 examples/python/HostNodes/benchmark.py
 create mode 100644 include/depthai/properties/BenchmarkInProperties.hpp
 rename include/depthai/properties/{BenchmarkPropertiesOut.hpp => BenchmarkOutProperties.hpp} (71%)
 delete mode 100644 include/depthai/properties/BenchmarkProperties.hpp
 delete mode 100644 include/depthai/properties/BenchmarkPropertiesIn.hpp
 delete mode 100644 src/pipeline/node/Benchmark.cpp
 create mode 100644 src/pipeline/node/BenchmarkIn.cpp
 create mode 100644 src/pipeline/node/BenchmarkOut.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index cafae167f..ed2dd190d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -368,7 +368,8 @@ set(TARGET_CORE_SOURCES
     src/pipeline/node/DetectionNetwork.cpp
     src/pipeline/node/Script.cpp
     src/pipeline/node/Pool.cpp
-    src/pipeline/node/Benchmark.cpp
+    src/pipeline/node/BenchmarkIn.cpp
+    src/pipeline/node/BenchmarkOut.cpp
     src/pipeline/node/SpatialDetectionNetwork.cpp
     src/pipeline/node/SystemLogger.cpp
     src/pipeline/node/SpatialLocationCalculator.cpp
diff --git a/bindings/python/src/pipeline/node/BenchmarkBindings.cpp b/bindings/python/src/pipeline/node/BenchmarkBindings.cpp
index 781fbc1bc..79e6d23cc 100644
--- a/bindings/python/src/pipeline/node/BenchmarkBindings.cpp
+++ b/bindings/python/src/pipeline/node/BenchmarkBindings.cpp
@@ -30,9 +30,13 @@ void bind_benchmark(pybind11::module& m, void* pCallstack) {
     benchmarkOut.def_readonly("out", &BenchmarkOut::out, DOC(dai, node, BenchmarkOut, out))
         .def_readonly("input", &BenchmarkOut::input, DOC(dai, node, BenchmarkOut, input))
         .def("setNumMessagesToSend", &BenchmarkOut::setNumMessagesToSend, py::arg("num"), DOC(dai, node, BenchmarkOut, setNumMessagesToSend))
+        .def("setRunOnHost", &BenchmarkOut::setRunOnHost, py::arg("runOnHost"), DOC(dai, node, BenchmarkOut, setRunOnHost))
         .def("setFps", &BenchmarkOut::setFps, py::arg("fps"), DOC(dai, node, BenchmarkOut, setFps));
     benchmarkIn.def_readonly("input", &BenchmarkIn::input, DOC(dai, node, BenchmarkIn, input))
         .def_readonly("report", &BenchmarkIn::report, DOC(dai, node, BenchmarkIn, report))
         .def_readonly("passthrough", &BenchmarkIn::passthrough, DOC(dai, node, BenchmarkIn, passthrough))
-        .def("setNumMessagesToGet", &BenchmarkIn::setNumMessagesToGet, py::arg("num"), DOC(dai, node, BenchmarkIn, setNumMessagesToGet));
+        .def("setRunOnHost", &BenchmarkIn::setRunOnHost, py::arg("runOnHost"), DOC(dai, node, BenchmarkIn, setRunOnHost))
+        .def("logReportsAsWarnings", &BenchmarkIn::logReportsAsWarnings, py::arg("logReportsAsWarnings"), DOC(dai, node, BenchmarkIn, logReportsAsWarnings))
+        .def("measureIndividualLatencies", &BenchmarkIn::measureIndividualLatencies, py::arg("attachLatencies"), DOC(dai, node, BenchmarkIn, measureIndividualLatencies))
+        .def("sendReportEveryNMessages", &BenchmarkIn::sendReportEveryNMessages, py::arg("num"), DOC(dai, node, BenchmarkIn, sendReportEveryNMessages));
 }
diff --git a/examples/python/HostNodes/benchmark.py b/examples/python/HostNodes/benchmark.py
new file mode 100644
index 000000000..701f15298
--- /dev/null
+++ b/examples/python/HostNodes/benchmark.py
@@ -0,0 +1,34 @@
+import depthai as dai
+import time
+
+class TestSource(dai.node.ThreadedHostNode):
+    def __init__(self, name: str):
+        super().__init__()
+        self.name = name
+        self.output = self.createOutput()
+
+    def run(self):
+        while self.isRunning():
+            buffer = dai.Buffer()
+            print(f"{self.name} node is sending a buffer!")
+            self.output.send(buffer)
+            time.sleep(1)
+
+with dai.Pipeline(createImplicitDevice=False) as p:
+    # Create nodes
+    source = TestSource("source")
+    benchmarkIn = p.create(dai.node.BenchmarkIn)
+    benchmarkIn.setRunOnHost(True)
+    benchmarkIn.sendReportEveryNMessages(100)
+    benchmarkOut = p.create(dai.node.BenchmarkOut)
+    benchmarkOut.setRunOnHost(True)
+    benchmarkOut.setFps(30)
+    benchmarkOut.out.link(benchmarkIn.input)
+    outputQueue = benchmarkIn.report.createOutputQueue()
+    source.output.link(benchmarkOut.input)
+
+    p.start()
+    while p.isRunning():
+        benchmarkReport = outputQueue.get()
+        assert isinstance(benchmarkReport, dai.BenchmarkReport)
+        print(f"FPS is {benchmarkReport.fps}")
\ No newline at end of file
diff --git a/include/depthai/pipeline/datatype/BenchmarkReport.hpp b/include/depthai/pipeline/datatype/BenchmarkReport.hpp
index 4d496449f..b70e53ca9 100644
--- a/include/depthai/pipeline/datatype/BenchmarkReport.hpp
+++ b/include/depthai/pipeline/datatype/BenchmarkReport.hpp
@@ -3,22 +3,20 @@
 #include "depthai/pipeline/datatype/Buffer.hpp"
 namespace dai {
 
-// TODO(before mainline) - API not supported on RVC2
 /**
  * BenchmarkReport message.
  */
 class BenchmarkReport : public Buffer {
    public:
     BenchmarkReport() = default;
-    virtual ~BenchmarkReport() = default;
 
-    float fps;
-    float timeTotal;  // seconds
-    float numMessagesReceived;
-    float averageLatency;
+    float fps = 0.0f;
+    float timeTotal = 0.0f;  // seconds
+    float numMessagesReceived = 0;
+    float averageLatency = 0.0f;  // seconds
+
+    // Only filled if measureIndividualLatencies is set to true
     std::vector<float> latencies;
-    // TODO Add jitter, timestamps for start/end, possibly a vector of timestamps for all messages
-    // TODO BEFORE MAINLINE add setters and getters
 
     void serialize(std::vector<std::uint8_t>& metadata, DatatypeEnum& datatype) const override {
         metadata = utility::serialize(*this);
diff --git a/include/depthai/pipeline/node/BenchmarkIn.hpp b/include/depthai/pipeline/node/BenchmarkIn.hpp
index 80166c1b8..6b6aa9e94 100644
--- a/include/depthai/pipeline/node/BenchmarkIn.hpp
+++ b/include/depthai/pipeline/node/BenchmarkIn.hpp
@@ -3,13 +3,12 @@
 #include <depthai/pipeline/DeviceNode.hpp>
 
 // shared
-#include <depthai/properties/BenchmarkPropertiesIn.hpp>
+#include <depthai/properties/BenchmarkInProperties.hpp>
 
 namespace dai {
 namespace node {
 
-// TODO(before mainline) - API not supported on RVC2
-class BenchmarkIn : public DeviceNodeCRTP<DeviceNode, BenchmarkIn, BenchmarkPropertiesIn> {
+class BenchmarkIn : public DeviceNodeCRTP<DeviceNode, BenchmarkIn, BenchmarkInProperties>, public HostRunnable {
    public:
     constexpr static const char* NAME = "BenchmarkIn";
     using DeviceNodeCRTP::DeviceNodeCRTP;
@@ -30,11 +29,35 @@ class BenchmarkIn : public DeviceNodeCRTP<DeviceNode, BenchmarkIn, BenchmarkProp
     Output report{*this, {"report", DEFAULT_GROUP, {{{DatatypeEnum::BenchmarkReport, false}}}}};
 
     /**
-     * Set number of messages that the nodes retrieves before sending the report
-     * The passthrough keeps getting forwarded after the report is sent
-     * @param num of messages to get for report
+     * Specify how many messages to measure for each report
      */
-    void setNumMessagesToGet(int num);
+    void sendReportEveryNMessages(uint32_t n);
+
+    /**
+     * Specify whether to run on host or device
+     * By default, the node will run on device.
+     */
+    void setRunOnHost(bool runOnHost);
+
+    /**
+     * Check if the node is set to run on host
+     */
+    bool runOnHost() const override;
+
+    /**
+    * Log the reports as warnings
+    */
+    void logReportsAsWarnings(bool logReportsAsWarnings);
+
+    /**
+     * Attach latencies to the report
+     */
+    void measureIndividualLatencies(bool attachLatencies);
+
+    void run() override;
+
+   private:
+    bool runOnHostVar = false;
 };
 
 }  // namespace node
diff --git a/include/depthai/pipeline/node/BenchmarkOut.hpp b/include/depthai/pipeline/node/BenchmarkOut.hpp
index f21b4fdce..4cff5a8a8 100644
--- a/include/depthai/pipeline/node/BenchmarkOut.hpp
+++ b/include/depthai/pipeline/node/BenchmarkOut.hpp
@@ -3,12 +3,12 @@
 #include <depthai/pipeline/DeviceNode.hpp>
 
 // shared
-#include <depthai/properties/BenchmarkPropertiesOut.hpp>
+#include <depthai/properties/BenchmarkOutProperties.hpp>
 
 namespace dai {
 namespace node {
 
-class BenchmarkOut : public DeviceNodeCRTP<DeviceNode, BenchmarkOut, BenchmarkPropertiesOut> {
+class BenchmarkOut : public DeviceNodeCRTP<DeviceNode, BenchmarkOut, BenchmarkOutProperties>, public HostRunnable{
    public:
     constexpr static const char* NAME = "BenchmarkOut";
     using DeviceNodeCRTP::DeviceNodeCRTP;
@@ -34,7 +34,21 @@ class BenchmarkOut : public DeviceNodeCRTP<DeviceNode, BenchmarkOut, BenchmarkPr
      */
     void setFps(float fps);
 
-    void buildInternal() override;
+    /**
+     * Specify whether to run on host or device
+     * By default, the node will run on device.
+     */
+    void setRunOnHost(bool runOnHost);
+
+    /**
+     * Check if the node is set to run on host
+     */
+    bool runOnHost() const override;
+
+    void run() override;
+    
+   private:
+    bool runOnHostVar = false;
 };
 
 }  // namespace node
diff --git a/include/depthai/properties/BenchmarkInProperties.hpp b/include/depthai/properties/BenchmarkInProperties.hpp
new file mode 100644
index 000000000..2497ed959
--- /dev/null
+++ b/include/depthai/properties/BenchmarkInProperties.hpp
@@ -0,0 +1,32 @@
+#pragma once
+
+#include "depthai/common/ProcessorType.hpp"
+#include "depthai/common/optional.hpp"
+#include "depthai/pipeline/datatype/DatatypeEnum.hpp"
+#include "depthai/properties/Properties.hpp"
+
+namespace dai {
+
+/**
+ * Specify benchmark properties (number of messages to send/receive)
+ */
+struct BenchmarkInProperties : PropertiesSerializable<Properties, BenchmarkInProperties> {
+    /**
+     * Specify how many messages to measure for each report 
+     */
+    uint32_t reportEveryNMessages = 50;
+
+    /**
+    * Specify whether the latenices are attached to the report individually
+    */
+    bool attachLatencies = false;
+
+    /**
+     * Send the reports also as logger warnings
+     */
+    bool logReportsAsWarnings = true;
+};
+
+DEPTHAI_SERIALIZE_EXT(BenchmarkInProperties, reportEveryNMessages);
+
+}  // namespace dai
diff --git a/include/depthai/properties/BenchmarkPropertiesOut.hpp b/include/depthai/properties/BenchmarkOutProperties.hpp
similarity index 71%
rename from include/depthai/properties/BenchmarkPropertiesOut.hpp
rename to include/depthai/properties/BenchmarkOutProperties.hpp
index 19ee57c22..def89ad74 100644
--- a/include/depthai/properties/BenchmarkPropertiesOut.hpp
+++ b/include/depthai/properties/BenchmarkOutProperties.hpp
@@ -10,11 +10,11 @@ namespace dai {
 /**
  * Specify benchmark properties (number of messages to send/receive)
  */
-struct BenchmarkPropertiesOut : PropertiesSerializable<Properties, BenchmarkPropertiesOut> {
+struct BenchmarkOutProperties : PropertiesSerializable<Properties, BenchmarkOutProperties> {
     /**
      * Number of messages to send
      */
-    int numMessages = 50;
+    int numMessages = -1;
 
     /**
      * FPS for sending, 0 means as fast as possible
@@ -22,6 +22,6 @@ struct BenchmarkPropertiesOut : PropertiesSerializable<Properties, BenchmarkProp
     float fps = 0;
 };
 
-DEPTHAI_SERIALIZE_EXT(BenchmarkPropertiesOut, numMessages, fps);
+DEPTHAI_SERIALIZE_EXT(BenchmarkOutProperties, numMessages, fps);
 
 }  // namespace dai
diff --git a/include/depthai/properties/BenchmarkProperties.hpp b/include/depthai/properties/BenchmarkProperties.hpp
deleted file mode 100644
index 2b02f7361..000000000
--- a/include/depthai/properties/BenchmarkProperties.hpp
+++ /dev/null
@@ -1,22 +0,0 @@
-#pragma once
-
-#include "depthai/common/ProcessorType.hpp"
-#include "depthai/common/optional.hpp"
-#include "depthai/pipeline/datatype/DatatypeEnum.hpp"
-#include "depthai/properties/Properties.hpp"
-
-namespace dai {
-
-/**
- * Specify benchmark properties (number of messages to send/receive)
- */
-struct BenchmarkProperties : PropertiesSerializable<Properties, BenchmarkProperties> {
-    /**
-     * Number of messages to send
-     */
-    int numMessages = 50;
-};
-
-DEPTHAI_SERIALIZE_EXT(BenchmarkProperties, numMessages);
-
-}  // namespace dai
diff --git a/include/depthai/properties/BenchmarkPropertiesIn.hpp b/include/depthai/properties/BenchmarkPropertiesIn.hpp
deleted file mode 100644
index 8bbe350f3..000000000
--- a/include/depthai/properties/BenchmarkPropertiesIn.hpp
+++ /dev/null
@@ -1,22 +0,0 @@
-#pragma once
-
-#include "depthai/common/ProcessorType.hpp"
-#include "depthai/common/optional.hpp"
-#include "depthai/pipeline/datatype/DatatypeEnum.hpp"
-#include "depthai/properties/Properties.hpp"
-
-namespace dai {
-
-/**
- * Specify benchmark properties (number of messages to send/receive)
- */
-struct BenchmarkPropertiesIn : PropertiesSerializable<Properties, BenchmarkPropertiesIn> {
-    /**
-     * Number of messages to send
-     */
-    int numMessages = 50;
-};
-
-DEPTHAI_SERIALIZE_EXT(BenchmarkPropertiesIn, numMessages);
-
-}  // namespace dai
diff --git a/src/pipeline/node/Benchmark.cpp b/src/pipeline/node/Benchmark.cpp
deleted file mode 100644
index f4af2492e..000000000
--- a/src/pipeline/node/Benchmark.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-#include "depthai/pipeline/node/BenchmarkIn.hpp"
-#include "depthai/pipeline/node/BenchmarkOut.hpp"
-
-namespace dai {
-namespace node {
-
-void BenchmarkOut::setNumMessagesToSend(int num) {
-    properties.numMessages = num;
-}
-
-void BenchmarkOut::setFps(float fps) {
-    properties.fps = fps;
-}
-
-void BenchmarkIn::setNumMessagesToGet(int num) {
-    properties.numMessages = num;
-}
-
-void BenchmarkOut::buildInternal() {
-    properties.numMessages = -1;  // By default send messages indefinitely
-}
-
-}  // namespace node
-}  // namespace dai
diff --git a/src/pipeline/node/BenchmarkIn.cpp b/src/pipeline/node/BenchmarkIn.cpp
new file mode 100644
index 000000000..414950840
--- /dev/null
+++ b/src/pipeline/node/BenchmarkIn.cpp
@@ -0,0 +1,133 @@
+#include "depthai/pipeline/node/BenchmarkIn.hpp"
+#include "depthai/pipeline/datatype/BenchmarkReport.hpp"
+#include <chrono>
+
+namespace dai {
+namespace node {
+
+void BenchmarkIn::sendReportEveryNMessages(uint32_t num) {
+    properties.reportEveryNMessages = num;
+}
+
+void BenchmarkIn::setRunOnHost(bool runOnHost) {
+    runOnHostVar = runOnHost;
+}
+
+bool BenchmarkIn::runOnHost() const {
+    return runOnHostVar;
+}
+
+void BenchmarkIn::logReportsAsWarnings(bool logReportsAsWarnings) {
+    properties.logReportsAsWarnings = logReportsAsWarnings;
+}
+
+void BenchmarkIn::measureIndividualLatencies(bool attachLatencies) {
+    properties.attachLatencies = attachLatencies;
+}
+
+void BenchmarkIn::run() {
+    using namespace std::chrono;
+
+    uint32_t numMessages = properties.reportEveryNMessages;
+
+    // Decide if we will store latencies or not
+    bool storeLatencies = false;
+    if(properties.attachLatencies) {
+        if(numMessages <= 1000) {
+            // We'll store latencies for this batch
+            storeLatencies = true;
+        } else {
+            // Warn upfront if user wanted latencies but # messages is too high
+            logger->warn("Number of messages > 1000, latencies not individually attached.");
+        }
+    }
+
+    uint32_t messageCount = 0;
+    float totalLatency = 0.0f;
+
+    std::vector<float> latencies;
+    if(storeLatencies) {
+        latencies.reserve(numMessages);
+    }
+
+    auto start = steady_clock::now();
+
+    while(isRunning()) {
+        auto inMessage = input.get<dai::Buffer>();
+
+        // If this is the first message of the batch, reset counters
+        if(messageCount == 0) {
+            start = steady_clock::now();
+            totalLatency = 0.0f;
+
+            // Clear vector if we are storing latencies
+            if(storeLatencies) {
+                latencies.clear();
+                latencies.reserve(numMessages);
+            }
+        }
+
+        if(messageCount < numMessages) {
+            auto currentTs = steady_clock::now();
+            // If the message itself has a real timestamp, use that instead:
+            // auto messageTs = inMessage->getTimestamp();
+            auto messageTs = currentTs;  // In example, just use currentTs
+
+            duration<float> diff = currentTs - messageTs;
+            logger->trace("Frame latency: {} s", diff.count());
+
+            // Accumulate for average
+            totalLatency += diff.count();
+
+            // Optionally store individual latencies
+            if(storeLatencies) {
+                latencies.push_back(diff.count());
+            }
+
+            messageCount++;
+        } 
+        else {
+            // We reached our batch size, so time to compute and send the report
+            auto stop = steady_clock::now();
+            duration<float> durationS = stop - start;
+
+            auto reportMessage = std::make_shared<dai::BenchmarkReport>();
+            reportMessage->numMessagesReceived = numMessages;
+            reportMessage->timeTotal = durationS.count();
+            reportMessage->fps = numMessages / durationS.count();
+            reportMessage->averageLatency = totalLatency / numMessages;
+
+            // Attach latencies only if we're storing them
+            if(storeLatencies) {
+                reportMessage->latencies = latencies;
+            }
+
+            // Decide how to log (warn or info) once, then do all the logs
+            auto logFunc = [&](auto fmt, auto... args) {
+                if(properties.logReportsAsWarnings) {
+                    logger->warn(fmt, std::forward<decltype(args)>(args)...);
+                } else {
+                    logger->trace(fmt, std::forward<decltype(args)>(args)...);
+                }
+            };
+
+            // Unconditional logs, using chosen severity
+            logFunc("FPS: {}", reportMessage->fps);
+            logFunc("Messages took {} s", reportMessage->timeTotal);
+            logFunc("Average latency: {} s", reportMessage->averageLatency);
+
+            // Send out the report
+            report.send(reportMessage);
+            logger->trace("Sent report message");
+
+            // Reset for next batch
+            messageCount = 0;
+        }
+
+        // Passthrough the message
+        passthrough.send(inMessage);
+    }
+}
+
+}  // namespace node
+}  // namespace dai
diff --git a/src/pipeline/node/BenchmarkOut.cpp b/src/pipeline/node/BenchmarkOut.cpp
new file mode 100644
index 000000000..98a6bc63f
--- /dev/null
+++ b/src/pipeline/node/BenchmarkOut.cpp
@@ -0,0 +1,66 @@
+#include "depthai/pipeline/node/BenchmarkOut.hpp"
+
+namespace dai {
+namespace node {
+
+void BenchmarkOut::setNumMessagesToSend(int num) {
+    properties.numMessages = num;
+}
+
+void BenchmarkOut::setFps(float fps) {
+    properties.fps = fps;
+}
+
+void BenchmarkOut::setRunOnHost(bool runOnHost) {
+    runOnHostVar = runOnHost;
+}
+
+bool BenchmarkOut::runOnHost() const {
+    return runOnHostVar;
+}
+
+void BenchmarkOut::run() {
+    using namespace std::chrono;
+
+    logger->trace("Wait for the input message.");
+    auto inMessage = input.get();
+
+    bool useTiming = (properties.fps > 0);
+
+    auto frameDurationDouble = std::chrono::duration<double>(1.0 / properties.fps);
+    auto frameDuration = std::chrono::duration_cast<std::chrono::steady_clock::duration>(frameDurationDouble);
+
+    auto nextFrameTime = steady_clock::now();
+    for(int i = 0; (i < properties.numMessages || properties.numMessages == -1) && isRunning(); i++) {
+        auto imgMessage = std::dynamic_pointer_cast<dai::ImgFrame>(inMessage);
+        if(imgMessage != nullptr) {
+            logger->trace("Sending img message with id {}", i);
+
+            // Copying metadata and pointing to same data
+            auto newMessage = std::make_shared<dai::ImgFrame>();
+            newMessage->setMetadata(imgMessage);
+            newMessage->data = imgMessage->data;
+            newMessage->setTimestampDevice(steady_clock::now());
+            out.send(newMessage);
+        } else {
+            logger->trace("Sending message with id {}", i);
+            out.send(inMessage);
+        }
+
+        if(useTiming) {
+            nextFrameTime += frameDuration;
+
+            auto now = steady_clock::now();
+            if(nextFrameTime > now) {
+                auto sleepTime = nextFrameTime - now;
+                std::this_thread::sleep_for(sleepTime);
+            }
+        }
+    }
+
+    logger->trace("Benchmark out sent all messages!");
+}
+
+
+}  // namespace node
+}  // namespace dai

From cd9f3371e1a3f081a8d3b44cccabae44258edf22 Mon Sep 17 00:00:00 2001
From: Matevz Morato <matevz.morato@gmail.com>
Date: Sun, 29 Dec 2024 12:56:08 +0100
Subject: [PATCH 02/11] Change which timestamp is used based on where the node
 runs

---
 src/pipeline/node/BenchmarkIn.cpp  | 9 ++++++---
 src/pipeline/node/BenchmarkOut.cpp | 6 +++++-
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/src/pipeline/node/BenchmarkIn.cpp b/src/pipeline/node/BenchmarkIn.cpp
index 414950840..40bbf4bf1 100644
--- a/src/pipeline/node/BenchmarkIn.cpp
+++ b/src/pipeline/node/BenchmarkIn.cpp
@@ -69,9 +69,12 @@ void BenchmarkIn::run() {
 
         if(messageCount < numMessages) {
             auto currentTs = steady_clock::now();
-            // If the message itself has a real timestamp, use that instead:
-            // auto messageTs = inMessage->getTimestamp();
-            auto messageTs = currentTs;  // In example, just use currentTs
+            auto messageTs = steady_clock::now();
+            if(runOnHostVar) {
+                messageTs = inMessage->getTimestamp();
+            } else {
+                messageTs = inMessage->getTimestampDevice();
+            }
 
             duration<float> diff = currentTs - messageTs;
             logger->trace("Frame latency: {} s", diff.count());
diff --git a/src/pipeline/node/BenchmarkOut.cpp b/src/pipeline/node/BenchmarkOut.cpp
index 98a6bc63f..afbed9634 100644
--- a/src/pipeline/node/BenchmarkOut.cpp
+++ b/src/pipeline/node/BenchmarkOut.cpp
@@ -40,7 +40,11 @@ void BenchmarkOut::run() {
             auto newMessage = std::make_shared<dai::ImgFrame>();
             newMessage->setMetadata(imgMessage);
             newMessage->data = imgMessage->data;
-            newMessage->setTimestampDevice(steady_clock::now());
+            if(runOnHostVar) {
+                newMessage->setTimestamp(steady_clock::now());
+            } else {
+                newMessage->setTimestampDevice(steady_clock::now());
+            }
             out.send(newMessage);
         } else {
             logger->trace("Sending message with id {}", i);

From eaf27df37532aa46d99578e79863322f74b7ebb5 Mon Sep 17 00:00:00 2001
From: Matevz Morato <matevz.morato@gmail.com>
Date: Sun, 29 Dec 2024 14:58:32 +0100
Subject: [PATCH 03/11] Bump RVC4 FW to use host implementations of benchmark
 node

---
 cmake/Depthai/DepthaiDeviceRVC4Config.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/Depthai/DepthaiDeviceRVC4Config.cmake b/cmake/Depthai/DepthaiDeviceRVC4Config.cmake
index 982160495..407a52c43 100644
--- a/cmake/Depthai/DepthaiDeviceRVC4Config.cmake
+++ b/cmake/Depthai/DepthaiDeviceRVC4Config.cmake
@@ -4,4 +4,4 @@ set(DEPTHAI_DEVICE_RVC4_MATURITY "snapshot")
 
 # "version if applicable"
 # set(DEPTHAI_DEVICE_RVC4_VERSION "0.0.1+93f7b75a885aa32f44c5e9f53b74470c49d2b1af")
-set(DEPTHAI_DEVICE_RVC4_VERSION "0.0.1+81617bcfe7b7da9eda9654b5b3d3d3254b59a47d")
+set(DEPTHAI_DEVICE_RVC4_VERSION "0.0.1+7f850f7d36329e0e994af1a1cd83de723a88edef")

From 575f08bedda6648f3aad0f79ee89902a73de2090 Mon Sep 17 00:00:00 2001
From: Matevz Morato <matevz.morato@gmail.com>
Date: Sun, 29 Dec 2024 19:46:38 +0100
Subject: [PATCH 04/11] Add missing serializations

---
 include/depthai/properties/BenchmarkInProperties.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/depthai/properties/BenchmarkInProperties.hpp b/include/depthai/properties/BenchmarkInProperties.hpp
index 2497ed959..93a9349b8 100644
--- a/include/depthai/properties/BenchmarkInProperties.hpp
+++ b/include/depthai/properties/BenchmarkInProperties.hpp
@@ -12,7 +12,7 @@ namespace dai {
  */
 struct BenchmarkInProperties : PropertiesSerializable<Properties, BenchmarkInProperties> {
     /**
-     * Specify how many messages to measure for each report 
+     * Specify how many messages to measure for each report
      */
     uint32_t reportEveryNMessages = 50;
 
@@ -27,6 +27,6 @@ struct BenchmarkInProperties : PropertiesSerializable<Properties, BenchmarkInPro
     bool logReportsAsWarnings = true;
 };
 
-DEPTHAI_SERIALIZE_EXT(BenchmarkInProperties, reportEveryNMessages);
+DEPTHAI_SERIALIZE_EXT(BenchmarkInProperties, reportEveryNMessages, attachLatencies, logReportsAsWarnings);
 
 }  // namespace dai

From 25b3cc2a08b88aced97db7ad4288cff4b4d9e830 Mon Sep 17 00:00:00 2001
From: Matevz Morato <matevz.morato@gmail.com>
Date: Sun, 29 Dec 2024 20:08:42 +0100
Subject: [PATCH 05/11] [RVC4 FW] Bump FW with properties serialization

---
 cmake/Depthai/DepthaiDeviceRVC4Config.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/Depthai/DepthaiDeviceRVC4Config.cmake b/cmake/Depthai/DepthaiDeviceRVC4Config.cmake
index 407a52c43..bda073a99 100644
--- a/cmake/Depthai/DepthaiDeviceRVC4Config.cmake
+++ b/cmake/Depthai/DepthaiDeviceRVC4Config.cmake
@@ -4,4 +4,4 @@ set(DEPTHAI_DEVICE_RVC4_MATURITY "snapshot")
 
 # "version if applicable"
 # set(DEPTHAI_DEVICE_RVC4_VERSION "0.0.1+93f7b75a885aa32f44c5e9f53b74470c49d2b1af")
-set(DEPTHAI_DEVICE_RVC4_VERSION "0.0.1+7f850f7d36329e0e994af1a1cd83de723a88edef")
+set(DEPTHAI_DEVICE_RVC4_VERSION "0.0.1+19b67f81b54c146d079d2cbd4485fa153337dc5a")

From ae6af8212f7f631e30128bbf22cf5028401c54fa Mon Sep 17 00:00:00 2001
From: Matevz Morato <matevz.morato@gmail.com>
Date: Sun, 29 Dec 2024 20:16:45 +0100
Subject: [PATCH 06/11] [RVC2] Complete BenchmarkIn implementation

---
 cmake/Depthai/DepthaiDeviceSideConfig.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/Depthai/DepthaiDeviceSideConfig.cmake b/cmake/Depthai/DepthaiDeviceSideConfig.cmake
index 9a0146539..23835ccbb 100644
--- a/cmake/Depthai/DepthaiDeviceSideConfig.cmake
+++ b/cmake/Depthai/DepthaiDeviceSideConfig.cmake
@@ -2,7 +2,7 @@
 set(DEPTHAI_DEVICE_SIDE_MATURITY "snapshot")
 
 # "full commit hash of device side binary"
-set(DEPTHAI_DEVICE_SIDE_COMMIT "c3e98b39b6a5445b2187b4109d03a146c6df37dd")
+set(DEPTHAI_DEVICE_SIDE_COMMIT "6b5e087c440e3833a0128d686fe3b6926681f113")
 
 # "version if applicable"
 set(DEPTHAI_DEVICE_SIDE_VERSION "")

From 1a0c8f637ef648d48f5935ae54706e50992ae7cf Mon Sep 17 00:00:00 2001
From: Matevz Morato <matevz.morato@gmail.com>
Date: Sun, 29 Dec 2024 22:27:37 +0100
Subject: [PATCH 07/11] [RVC2] Add BenchmarkOut implementation

---
 cmake/Depthai/DepthaiDeviceSideConfig.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/Depthai/DepthaiDeviceSideConfig.cmake b/cmake/Depthai/DepthaiDeviceSideConfig.cmake
index 23835ccbb..20e2509ad 100644
--- a/cmake/Depthai/DepthaiDeviceSideConfig.cmake
+++ b/cmake/Depthai/DepthaiDeviceSideConfig.cmake
@@ -2,7 +2,7 @@
 set(DEPTHAI_DEVICE_SIDE_MATURITY "snapshot")
 
 # "full commit hash of device side binary"
-set(DEPTHAI_DEVICE_SIDE_COMMIT "6b5e087c440e3833a0128d686fe3b6926681f113")
+set(DEPTHAI_DEVICE_SIDE_COMMIT "5e016a328ac84324fb3c6bd8904141191f29dc2e")
 
 # "version if applicable"
 set(DEPTHAI_DEVICE_SIDE_VERSION "")

From 45a0397f2285b3338d74d42ef19ef6dfb97190b6 Mon Sep 17 00:00:00 2001
From: Matevz Morato <matevz.morato@gmail.com>
Date: Mon, 30 Dec 2024 11:21:54 +0100
Subject: [PATCH 08/11] Add examples and tests

---
 examples/python/Benchmark/benchmark_camera.py | 17 ++++
 examples/python/Benchmark/benchmark_nn.py     | 50 ++++++++++++
 examples/python/Benchmark/benchmark_simple.py | 29 +++++++
 examples/python/CMakeLists.txt                | 12 +++
 examples/python/HostNodes/benchmark.py        | 34 --------
 tests/CMakeLists.txt                          |  4 +
 .../pipeline/node/benchmark_test.cpp          | 80 +++++++++++++++++++
 .../regression/camera_concurrency.cpp         |  2 +-
 8 files changed, 193 insertions(+), 35 deletions(-)
 create mode 100644 examples/python/Benchmark/benchmark_camera.py
 create mode 100644 examples/python/Benchmark/benchmark_nn.py
 create mode 100644 examples/python/Benchmark/benchmark_simple.py
 delete mode 100644 examples/python/HostNodes/benchmark.py
 create mode 100644 tests/src/ondevice_tests/pipeline/node/benchmark_test.cpp

diff --git a/examples/python/Benchmark/benchmark_camera.py b/examples/python/Benchmark/benchmark_camera.py
new file mode 100644
index 000000000..97d7adc6f
--- /dev/null
+++ b/examples/python/Benchmark/benchmark_camera.py
@@ -0,0 +1,17 @@
+#!/usr/bin/env python3
+import depthai as dai
+import time
+
+# Create pipeline
+with dai.Pipeline() as pipeline:
+    # Define source and output
+    cam = pipeline.create(dai.node.Camera).build()
+    benchmarkIn = pipeline.create(dai.node.BenchmarkIn)
+    # benchmarkIn.setRunOnHost(True) # The node can also run on host and include the transfer limitation, default is False
+    output = cam.requestFullResolutionOutput()
+    output.link(benchmarkIn.input)
+
+    # Connect to device and start pipeline
+    pipeline.start()
+    while pipeline.isRunning():
+        time.sleep(1) # Let the logger print out the FPS
diff --git a/examples/python/Benchmark/benchmark_nn.py b/examples/python/Benchmark/benchmark_nn.py
new file mode 100644
index 000000000..b327460ab
--- /dev/null
+++ b/examples/python/Benchmark/benchmark_nn.py
@@ -0,0 +1,50 @@
+import depthai as dai
+import cv2
+import numpy as np
+
+device = dai.Device()
+modelPath = dai.getModelFromZoo(dai.NNModelDescription("yolov6-nano", platform=device.getPlatformAsString()))
+modelArhive = dai.NNArchive(modelPath)
+inputSize = modelArhive.getInputSize()
+type = modelArhive.getConfig().model.inputs[0].preprocessing.daiType
+
+if type:
+    try:
+        frameType = dai.ImgFrame.Type.__getattribute__(type)
+    except AttributeError:
+        type = None
+
+if not type:
+    if device.getPlatform() == dai.Platform.RVC2:
+        frameType = dai.ImgFrame.Type.BGR888p
+    else:
+        frameType = dai.ImgFrame.Type.BGR888i
+
+
+# Construct the input (white) image for benchmarking
+img = np.ones((inputSize[1], inputSize[0], 3), np.uint8) * 255
+inputFrame = dai.ImgFrame()
+inputFrame.setCvFrame(img, frameType)
+
+with dai.Pipeline(device) as p:
+    benchmarkOut = p.create(dai.node.BenchmarkOut)
+    benchmarkOut.setRunOnHost(False) # The node can run on host or on device
+    benchmarkOut.setFps(-1) # As fast as possible
+
+    neuralNetwork = p.create(dai.node.NeuralNetwork).build(benchmarkOut.out, modelArhive)
+
+    benchmarkIn = p.create(dai.node.BenchmarkIn)
+    benchmarkIn.setRunOnHost(False) # The node can run on host or on device
+    benchmarkIn.sendReportEveryNMessages(100)
+    benchmarkIn.logReportsAsWarnings(False)
+    neuralNetwork.out.link(benchmarkIn.input)
+
+    outputQueue = benchmarkIn.report.createOutputQueue()
+    inputQueue = benchmarkOut.input.createInputQueue()
+
+    p.start()
+    inputQueue.send(inputFrame)
+    while p.isRunning():
+        benchmarkReport = outputQueue.get()
+        assert isinstance(benchmarkReport, dai.BenchmarkReport)
+        print(f"FPS is {benchmarkReport.fps}")
diff --git a/examples/python/Benchmark/benchmark_simple.py b/examples/python/Benchmark/benchmark_simple.py
new file mode 100644
index 000000000..9afe70b6d
--- /dev/null
+++ b/examples/python/Benchmark/benchmark_simple.py
@@ -0,0 +1,29 @@
+import depthai as dai
+
+with dai.Pipeline(createImplicitDevice=False) as p:
+    # Create a BenchmarkOut node
+    # It will listen on the input to get the first message and then send it out at a specified rate
+    # The node sends the same message out (creates new pointers), not deep copies.
+    benchmarkOut = p.create(dai.node.BenchmarkOut)
+    benchmarkOut.setRunOnHost(True) # The node can run on host or on device
+    benchmarkOut.setFps(30)
+
+    # Create a BenchmarkIn node
+    # Thisn node is receiving the messages on the input and measuring the FPS and latency.
+    # In the case that the input is with BenchmarkOut, the latency measurement is not always possible, as the message is not deep copied,
+    # which means that the timestamps stay the same.
+    benchmarkIn = p.create(dai.node.BenchmarkIn)
+    benchmarkIn.setRunOnHost(True) # The node can run on host or on device
+    benchmarkIn.sendReportEveryNMessages(100)
+
+    benchmarkOut.out.link(benchmarkIn.input)
+    outputQueue = benchmarkIn.report.createOutputQueue()
+    inputQueue = benchmarkOut.input.createInputQueue()
+
+    p.start()
+    imgFrame = dai.ImgFrame()
+    inputQueue.send(imgFrame)
+    while p.isRunning():
+        benchmarkReport = outputQueue.get()
+        assert isinstance(benchmarkReport, dai.BenchmarkReport)
+        print(f"FPS is {benchmarkReport.fps}")
diff --git a/examples/python/CMakeLists.txt b/examples/python/CMakeLists.txt
index 2578d1b27..5c8b5ed0d 100644
--- a/examples/python/CMakeLists.txt
+++ b/examples/python/CMakeLists.txt
@@ -223,3 +223,15 @@ set_tests_properties(py_script_simple PROPERTIES FAIL_REGULAR_EXPRESSION "\\[err
 
 add_python_example(script_all_cameras Script/script_switch_all_cameras.py)
 dai_set_example_test_labels(script_all_cameras ondevice rvc2_all rvc4 ci)
+
+## Benchmark node
+add_python_example(benchmark_node Benchmark/benchmark_simple.py)
+dai_set_example_test_labels(benchmark_node ondevice rvc2_all rvc4 ci)
+set_tests_properties(py_benchmark_node PROPERTIES FAIL_REGULAR_EXPRESSION "\\[error\\];\\[critical\\]")
+
+add_python_example(benchmark_cameras Benchmark/benchmark_camera.py)
+dai_set_example_test_labels(benchmark_cameras ondevice rvc2_all rvc4 ci)
+set_tests_properties(py_benchmark_cameras PROPERTIES FAIL_REGULAR_EXPRESSION "\\[error\\];\\[critical\\]")
+
+add_python_example(benchmark_nn Benchmark/benchmark_nn.py)
+dai_set_example_test_labels(benchmark_nn ondevice rvc2_all rvc4 ci)
diff --git a/examples/python/HostNodes/benchmark.py b/examples/python/HostNodes/benchmark.py
deleted file mode 100644
index 701f15298..000000000
--- a/examples/python/HostNodes/benchmark.py
+++ /dev/null
@@ -1,34 +0,0 @@
-import depthai as dai
-import time
-
-class TestSource(dai.node.ThreadedHostNode):
-    def __init__(self, name: str):
-        super().__init__()
-        self.name = name
-        self.output = self.createOutput()
-
-    def run(self):
-        while self.isRunning():
-            buffer = dai.Buffer()
-            print(f"{self.name} node is sending a buffer!")
-            self.output.send(buffer)
-            time.sleep(1)
-
-with dai.Pipeline(createImplicitDevice=False) as p:
-    # Create nodes
-    source = TestSource("source")
-    benchmarkIn = p.create(dai.node.BenchmarkIn)
-    benchmarkIn.setRunOnHost(True)
-    benchmarkIn.sendReportEveryNMessages(100)
-    benchmarkOut = p.create(dai.node.BenchmarkOut)
-    benchmarkOut.setRunOnHost(True)
-    benchmarkOut.setFps(30)
-    benchmarkOut.out.link(benchmarkIn.input)
-    outputQueue = benchmarkIn.report.createOutputQueue()
-    source.output.link(benchmarkOut.input)
-
-    p.start()
-    while p.isRunning():
-        benchmarkReport = outputQueue.get()
-        assert isinstance(benchmarkReport, dai.BenchmarkReport)
-        print(f"FPS is {benchmarkReport.fps}")
\ No newline at end of file
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index c83c632fd..20017176f 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -400,3 +400,7 @@ dai_set_test_labels(stereo_depth_node_test ondevice rvc2_all rvc4 ci)
 # ImageManipV2 test
 dai_add_test(image_manip_v2_node_test src/ondevice_tests/pipeline/node/image_manip_v2_test.cpp)
 dai_set_test_labels(image_manip_v2_node_test ondevice rvc2_all rvc4 ci)
+
+# Benchmark tests
+dai_add_test(benchmark_test src/ondevice_tests/pipeline/node/benchmark_test.cpp)
+dai_set_test_labels(benchmark_test ondevice rvc2_all rvc4 ci)
diff --git a/tests/src/ondevice_tests/pipeline/node/benchmark_test.cpp b/tests/src/ondevice_tests/pipeline/node/benchmark_test.cpp
new file mode 100644
index 000000000..f5eda08a8
--- /dev/null
+++ b/tests/src/ondevice_tests/pipeline/node/benchmark_test.cpp
@@ -0,0 +1,80 @@
+#include <catch2/catch_all.hpp>
+#include <catch2/catch_test_macros.hpp>
+#include "depthai/capabilities/ImgFrameCapability.hpp"
+#include "depthai/common/CameraBoardSocket.hpp"
+#include "depthai/depthai.hpp"
+#include "depthai/pipeline/MessageQueue.hpp"
+#include "depthai/pipeline/datatype/ImgFrame.hpp"
+#include "depthai/pipeline/node/Camera.hpp"
+
+void testBenchmarkIn(bool benchmarkInRunOnHost, bool benchmarkOutRunOnHost, float fps, bool passthrough) {
+    // Create pipeline
+    dai::Pipeline p;
+    auto benchmarkIn = p.create<dai::node::BenchmarkIn>();
+    benchmarkIn->setRunOnHost(benchmarkInRunOnHost);
+    auto benchmarkOut = p.create<dai::node::BenchmarkOut>();
+    benchmarkOut->setRunOnHost(benchmarkOutRunOnHost);
+    benchmarkOut->setFps(fps);
+    benchmarkOut->out.link(benchmarkIn->input);
+
+    auto inputQueue = benchmarkOut->input.createInputQueue();
+    auto reportQueue = benchmarkIn->report.createOutputQueue();
+    std::shared_ptr<dai::MessageQueue> passthroughQueue;
+    if(passthrough) {
+        passthroughQueue = benchmarkIn->passthrough.createOutputQueue(10, false);
+    }
+    p.start();
+    auto inputFrame = std::make_shared<dai::ImgFrame>();
+    inputQueue->send(inputFrame);
+    for(int i = 0; i < 10; i++) {
+        if(passthrough) {
+            auto passthroughFrame = passthroughQueue->get<dai::ImgFrame>();
+            REQUIRE(passthroughFrame != nullptr);
+        }
+        auto reportData = reportQueue->get<dai::BenchmarkReport>();
+        REQUIRE(reportData != nullptr);
+        REQUIRE(reportData->numMessagesReceived > 1);
+        REQUIRE(reportData->fps == Catch::Approx(fps).epsilon(0.05));
+    }
+}
+
+void testCameraBenchmarking(float fps) {
+    dai::Pipeline p;
+    auto cam = p.create<dai::node::Camera>()->build(dai::CameraBoardSocket::CAM_A);
+    auto* output = cam->requestOutput(std::pair(640, 400), std::nullopt, dai::ImgResizeMode::CROP, fps);
+    REQUIRE(output != nullptr);
+    auto benchmarkIn = p.create<dai::node::BenchmarkIn>();
+    output->link(benchmarkIn->input);
+    auto reportQueue = benchmarkIn->report.createOutputQueue();
+    p.start();
+    for(int i = 0; i < 10; i++) {
+        auto reportData = reportQueue->get<dai::BenchmarkReport>();
+        REQUIRE(reportData != nullptr);
+        REQUIRE(reportData->numMessagesReceived > 1);
+        REQUIRE(reportData->fps == Catch::Approx(fps).epsilon(0.05));
+    }
+}
+
+TEST_CASE("BenchmarkIn and BenchmarkOut run on device") {
+    testBenchmarkIn(false, false, 30.0f, true);
+}
+
+TEST_CASE("BenchmarkIn run on host, BenchmarkOut run on device") {
+    testBenchmarkIn(true, false, 30.0f, true);
+}
+
+TEST_CASE("BenchmarkIn run on device, BenchmarkOut run on host") {
+    testBenchmarkIn(false, true, 30.0f, true);
+}
+
+TEST_CASE("BenchmarkIn and BenchmarkOut run on host") {
+    testBenchmarkIn(true, true, 30.0f, true);
+}
+
+TEST_CASE("BenchmarkIn and BenchmarkOut run on device - high FPS") {
+    testBenchmarkIn(false, false, 1000.0f, false);
+}
+
+TEST_CASE("Camera benchmarking") {
+    testCameraBenchmarking(30.0f);
+}
diff --git a/tests/src/ondevice_tests/regression/camera_concurrency.cpp b/tests/src/ondevice_tests/regression/camera_concurrency.cpp
index a3ab0f25f..fbe09ddd7 100644
--- a/tests/src/ondevice_tests/regression/camera_concurrency.cpp
+++ b/tests/src/ondevice_tests/regression/camera_concurrency.cpp
@@ -27,7 +27,7 @@ TEST_CASE("camera_concurrency") {
     for(auto* output : cameraOutputs) {
         auto node = pipeline.create<dai::node::BenchmarkIn>();
         output->link(node->input);
-        node->setNumMessagesToGet(numMessagesToGet);
+        node->sendReportEveryNMessages(numMessagesToGet);
         queues.push_back(node->report.createOutputQueue());
         benchmarkNodes.push_back(node);
     }

From e74550b0a5f5e71644ece81d24f78fd9dd23eb14 Mon Sep 17 00:00:00 2001
From: Matevz Morato <matevz.morato@gmail.com>
Date: Mon, 30 Dec 2024 14:22:30 +0100
Subject: [PATCH 09/11] Bump the epsilon in benchmark test

---
 tests/src/ondevice_tests/pipeline/node/benchmark_test.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/src/ondevice_tests/pipeline/node/benchmark_test.cpp b/tests/src/ondevice_tests/pipeline/node/benchmark_test.cpp
index f5eda08a8..cdb3688ed 100644
--- a/tests/src/ondevice_tests/pipeline/node/benchmark_test.cpp
+++ b/tests/src/ondevice_tests/pipeline/node/benchmark_test.cpp
@@ -34,7 +34,7 @@ void testBenchmarkIn(bool benchmarkInRunOnHost, bool benchmarkOutRunOnHost, floa
         auto reportData = reportQueue->get<dai::BenchmarkReport>();
         REQUIRE(reportData != nullptr);
         REQUIRE(reportData->numMessagesReceived > 1);
-        REQUIRE(reportData->fps == Catch::Approx(fps).epsilon(0.05));
+        REQUIRE(reportData->fps == Catch::Approx(fps).epsilon(0.1));
     }
 }
 
@@ -51,7 +51,7 @@ void testCameraBenchmarking(float fps) {
         auto reportData = reportQueue->get<dai::BenchmarkReport>();
         REQUIRE(reportData != nullptr);
         REQUIRE(reportData->numMessagesReceived > 1);
-        REQUIRE(reportData->fps == Catch::Approx(fps).epsilon(0.05));
+        REQUIRE(reportData->fps == Catch::Approx(fps).epsilon(0.1));
     }
 }
 

From 0b6ce61c6ebddbdd13bd5fa83bad9a50665db7aa Mon Sep 17 00:00:00 2001
From: Matevz Morato <matevz.morato@gmail.com>
Date: Mon, 30 Dec 2024 18:49:28 +0100
Subject: [PATCH 10/11] Add bigger timeout for connecting to the device

---
 examples/python/CMakeLists.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/examples/python/CMakeLists.txt b/examples/python/CMakeLists.txt
index 5c8b5ed0d..7ac0f52e6 100644
--- a/examples/python/CMakeLists.txt
+++ b/examples/python/CMakeLists.txt
@@ -40,6 +40,7 @@ function(add_python_example example_name python_script_path)
         # Python path (to find compiled module)
         "PYTHONPATH=$<TARGET_FILE_DIR:${TARGET_NAME}>${SYS_PATH_SEPARATOR}$ENV{PYTHONPATH}"
         "DEPTHAI_SEARCH_TIMEOUT=15000"
+        "DEPTHAI_CONNECT_TIMEOUT=15000"
         "DEPTHAI_RECONNECT_TIMEOUT=0"
         # ASAN in case of sanitizers
         "${ASAN_ENVIRONMENT_VARS}"
@@ -60,6 +61,7 @@ function(add_python_example example_name python_script_path)
             # Python path (to find compiled module)
             "PYTHONPATH=$<TARGET_FILE_DIR:${TARGET_NAME}>${SYS_PATH_SEPARATOR}$ENV{PYTHONPATH}"
             "DEPTHAI_SEARCH_TIMEOUT=30000"
+            "DEPTHAI_CONNECT_TIMEOUT=30000"
             "DEPTHAI_RECONNECT_TIMEOUT=0"
             # ASAN in case of sanitizers
             ${ASAN_ENVIRONMENT_VARS}

From c773172c56ae7f42197a5074d9279365a8cb6510 Mon Sep 17 00:00:00 2001
From: Matevz Morato <matevz.morato@gmail.com>
Date: Tue, 31 Dec 2024 00:20:36 +0100
Subject: [PATCH 11/11] Update comments in the benchmark examples

---
 examples/python/Benchmark/benchmark_camera.py | 3 +--
 examples/python/Benchmark/benchmark_nn.py     | 5 +++--
 examples/python/Benchmark/benchmark_simple.py | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/examples/python/Benchmark/benchmark_camera.py b/examples/python/Benchmark/benchmark_camera.py
index 97d7adc6f..e89398673 100644
--- a/examples/python/Benchmark/benchmark_camera.py
+++ b/examples/python/Benchmark/benchmark_camera.py
@@ -4,14 +4,13 @@
 
 # Create pipeline
 with dai.Pipeline() as pipeline:
-    # Define source and output
+    # Create the nodes
     cam = pipeline.create(dai.node.Camera).build()
     benchmarkIn = pipeline.create(dai.node.BenchmarkIn)
     # benchmarkIn.setRunOnHost(True) # The node can also run on host and include the transfer limitation, default is False
     output = cam.requestFullResolutionOutput()
     output.link(benchmarkIn.input)
 
-    # Connect to device and start pipeline
     pipeline.start()
     while pipeline.isRunning():
         time.sleep(1) # Let the logger print out the FPS
diff --git a/examples/python/Benchmark/benchmark_nn.py b/examples/python/Benchmark/benchmark_nn.py
index b327460ab..154584086 100644
--- a/examples/python/Benchmark/benchmark_nn.py
+++ b/examples/python/Benchmark/benchmark_nn.py
@@ -1,7 +1,8 @@
 import depthai as dai
-import cv2
 import numpy as np
 
+
+# First prepare the model for benchmarking
 device = dai.Device()
 modelPath = dai.getModelFromZoo(dai.NNModelDescription("yolov6-nano", platform=device.getPlatformAsString()))
 modelArhive = dai.NNArchive(modelPath)
@@ -43,7 +44,7 @@
     inputQueue = benchmarkOut.input.createInputQueue()
 
     p.start()
-    inputQueue.send(inputFrame)
+    inputQueue.send(inputFrame) # Send the input image only once
     while p.isRunning():
         benchmarkReport = outputQueue.get()
         assert isinstance(benchmarkReport, dai.BenchmarkReport)
diff --git a/examples/python/Benchmark/benchmark_simple.py b/examples/python/Benchmark/benchmark_simple.py
index 9afe70b6d..ac377c07b 100644
--- a/examples/python/Benchmark/benchmark_simple.py
+++ b/examples/python/Benchmark/benchmark_simple.py
@@ -9,9 +9,9 @@
     benchmarkOut.setFps(30)
 
     # Create a BenchmarkIn node
-    # Thisn node is receiving the messages on the input and measuring the FPS and latency.
+    # This node is receiving the messages on the input and measuring the FPS and latency.
     # In the case that the input is with BenchmarkOut, the latency measurement is not always possible, as the message is not deep copied,
-    # which means that the timestamps stay the same.
+    # which means that the timestamps stay the same and latency virtually increases over time.
     benchmarkIn = p.create(dai.node.BenchmarkIn)
     benchmarkIn.setRunOnHost(True) # The node can run on host or on device
     benchmarkIn.sendReportEveryNMessages(100)