From 41c216a1ad7b6052b66d5fdd1c24ec7818901232 Mon Sep 17 00:00:00 2001 From: Matevz Morato Date: Sun, 29 Dec 2024 12:42:04 +0100 Subject: [PATCH 01/11] Add initial on host implementations --- CMakeLists.txt | 3 +- .../src/pipeline/node/BenchmarkBindings.cpp | 6 +- examples/python/HostNodes/benchmark.py | 34 +++++ .../pipeline/datatype/BenchmarkReport.hpp | 14 +- include/depthai/pipeline/node/BenchmarkIn.hpp | 37 ++++- .../depthai/pipeline/node/BenchmarkOut.hpp | 20 ++- .../properties/BenchmarkInProperties.hpp | 32 +++++ ...tiesOut.hpp => BenchmarkOutProperties.hpp} | 6 +- .../properties/BenchmarkProperties.hpp | 22 --- .../properties/BenchmarkPropertiesIn.hpp | 22 --- src/pipeline/node/Benchmark.cpp | 24 ---- src/pipeline/node/BenchmarkIn.cpp | 133 ++++++++++++++++++ src/pipeline/node/BenchmarkOut.cpp | 66 +++++++++ 13 files changed, 328 insertions(+), 91 deletions(-) create mode 100644 examples/python/HostNodes/benchmark.py create mode 100644 include/depthai/properties/BenchmarkInProperties.hpp rename include/depthai/properties/{BenchmarkPropertiesOut.hpp => BenchmarkOutProperties.hpp} (71%) delete mode 100644 include/depthai/properties/BenchmarkProperties.hpp delete mode 100644 include/depthai/properties/BenchmarkPropertiesIn.hpp delete mode 100644 src/pipeline/node/Benchmark.cpp create mode 100644 src/pipeline/node/BenchmarkIn.cpp create mode 100644 src/pipeline/node/BenchmarkOut.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index cafae167f..ed2dd190d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -368,7 +368,8 @@ set(TARGET_CORE_SOURCES src/pipeline/node/DetectionNetwork.cpp src/pipeline/node/Script.cpp src/pipeline/node/Pool.cpp - src/pipeline/node/Benchmark.cpp + src/pipeline/node/BenchmarkIn.cpp + src/pipeline/node/BenchmarkOut.cpp src/pipeline/node/SpatialDetectionNetwork.cpp src/pipeline/node/SystemLogger.cpp src/pipeline/node/SpatialLocationCalculator.cpp diff --git a/bindings/python/src/pipeline/node/BenchmarkBindings.cpp b/bindings/python/src/pipeline/node/BenchmarkBindings.cpp index 781fbc1bc..79e6d23cc 100644 --- a/bindings/python/src/pipeline/node/BenchmarkBindings.cpp +++ b/bindings/python/src/pipeline/node/BenchmarkBindings.cpp @@ -30,9 +30,13 @@ void bind_benchmark(pybind11::module& m, void* pCallstack) { benchmarkOut.def_readonly("out", &BenchmarkOut::out, DOC(dai, node, BenchmarkOut, out)) .def_readonly("input", &BenchmarkOut::input, DOC(dai, node, BenchmarkOut, input)) .def("setNumMessagesToSend", &BenchmarkOut::setNumMessagesToSend, py::arg("num"), DOC(dai, node, BenchmarkOut, setNumMessagesToSend)) + .def("setRunOnHost", &BenchmarkOut::setRunOnHost, py::arg("runOnHost"), DOC(dai, node, BenchmarkOut, setRunOnHost)) .def("setFps", &BenchmarkOut::setFps, py::arg("fps"), DOC(dai, node, BenchmarkOut, setFps)); benchmarkIn.def_readonly("input", &BenchmarkIn::input, DOC(dai, node, BenchmarkIn, input)) .def_readonly("report", &BenchmarkIn::report, DOC(dai, node, BenchmarkIn, report)) .def_readonly("passthrough", &BenchmarkIn::passthrough, DOC(dai, node, BenchmarkIn, passthrough)) - .def("setNumMessagesToGet", &BenchmarkIn::setNumMessagesToGet, py::arg("num"), DOC(dai, node, BenchmarkIn, setNumMessagesToGet)); + .def("setRunOnHost", &BenchmarkIn::setRunOnHost, py::arg("runOnHost"), DOC(dai, node, BenchmarkIn, setRunOnHost)) + .def("logReportsAsWarnings", &BenchmarkIn::logReportsAsWarnings, py::arg("logReportsAsWarnings"), DOC(dai, node, BenchmarkIn, logReportsAsWarnings)) + .def("measureIndividualLatencies", &BenchmarkIn::measureIndividualLatencies, py::arg("attachLatencies"), DOC(dai, node, BenchmarkIn, measureIndividualLatencies)) + .def("sendReportEveryNMessages", &BenchmarkIn::sendReportEveryNMessages, py::arg("num"), DOC(dai, node, BenchmarkIn, sendReportEveryNMessages)); } diff --git a/examples/python/HostNodes/benchmark.py b/examples/python/HostNodes/benchmark.py new file mode 100644 index 000000000..701f15298 --- /dev/null +++ b/examples/python/HostNodes/benchmark.py @@ -0,0 +1,34 @@ +import depthai as dai +import time + +class TestSource(dai.node.ThreadedHostNode): + def __init__(self, name: str): + super().__init__() + self.name = name + self.output = self.createOutput() + + def run(self): + while self.isRunning(): + buffer = dai.Buffer() + print(f"{self.name} node is sending a buffer!") + self.output.send(buffer) + time.sleep(1) + +with dai.Pipeline(createImplicitDevice=False) as p: + # Create nodes + source = TestSource("source") + benchmarkIn = p.create(dai.node.BenchmarkIn) + benchmarkIn.setRunOnHost(True) + benchmarkIn.sendReportEveryNMessages(100) + benchmarkOut = p.create(dai.node.BenchmarkOut) + benchmarkOut.setRunOnHost(True) + benchmarkOut.setFps(30) + benchmarkOut.out.link(benchmarkIn.input) + outputQueue = benchmarkIn.report.createOutputQueue() + source.output.link(benchmarkOut.input) + + p.start() + while p.isRunning(): + benchmarkReport = outputQueue.get() + assert isinstance(benchmarkReport, dai.BenchmarkReport) + print(f"FPS is {benchmarkReport.fps}") \ No newline at end of file diff --git a/include/depthai/pipeline/datatype/BenchmarkReport.hpp b/include/depthai/pipeline/datatype/BenchmarkReport.hpp index 4d496449f..b70e53ca9 100644 --- a/include/depthai/pipeline/datatype/BenchmarkReport.hpp +++ b/include/depthai/pipeline/datatype/BenchmarkReport.hpp @@ -3,22 +3,20 @@ #include "depthai/pipeline/datatype/Buffer.hpp" namespace dai { -// TODO(before mainline) - API not supported on RVC2 /** * BenchmarkReport message. */ class BenchmarkReport : public Buffer { public: BenchmarkReport() = default; - virtual ~BenchmarkReport() = default; - float fps; - float timeTotal; // seconds - float numMessagesReceived; - float averageLatency; + float fps = 0.0f; + float timeTotal = 0.0f; // seconds + float numMessagesReceived = 0; + float averageLatency = 0.0f; // seconds + + // Only filled if measureIndividualLatencies is set to true std::vector latencies; - // TODO Add jitter, timestamps for start/end, possibly a vector of timestamps for all messages - // TODO BEFORE MAINLINE add setters and getters void serialize(std::vector& metadata, DatatypeEnum& datatype) const override { metadata = utility::serialize(*this); diff --git a/include/depthai/pipeline/node/BenchmarkIn.hpp b/include/depthai/pipeline/node/BenchmarkIn.hpp index 80166c1b8..6b6aa9e94 100644 --- a/include/depthai/pipeline/node/BenchmarkIn.hpp +++ b/include/depthai/pipeline/node/BenchmarkIn.hpp @@ -3,13 +3,12 @@ #include // shared -#include +#include namespace dai { namespace node { -// TODO(before mainline) - API not supported on RVC2 -class BenchmarkIn : public DeviceNodeCRTP { +class BenchmarkIn : public DeviceNodeCRTP, public HostRunnable { public: constexpr static const char* NAME = "BenchmarkIn"; using DeviceNodeCRTP::DeviceNodeCRTP; @@ -30,11 +29,35 @@ class BenchmarkIn : public DeviceNodeCRTP // shared -#include +#include namespace dai { namespace node { -class BenchmarkOut : public DeviceNodeCRTP { +class BenchmarkOut : public DeviceNodeCRTP, public HostRunnable{ public: constexpr static const char* NAME = "BenchmarkOut"; using DeviceNodeCRTP::DeviceNodeCRTP; @@ -34,7 +34,21 @@ class BenchmarkOut : public DeviceNodeCRTP { + /** + * Specify how many messages to measure for each report + */ + uint32_t reportEveryNMessages = 50; + + /** + * Specify whether the latenices are attached to the report individually + */ + bool attachLatencies = false; + + /** + * Send the reports also as logger warnings + */ + bool logReportsAsWarnings = true; +}; + +DEPTHAI_SERIALIZE_EXT(BenchmarkInProperties, reportEveryNMessages); + +} // namespace dai diff --git a/include/depthai/properties/BenchmarkPropertiesOut.hpp b/include/depthai/properties/BenchmarkOutProperties.hpp similarity index 71% rename from include/depthai/properties/BenchmarkPropertiesOut.hpp rename to include/depthai/properties/BenchmarkOutProperties.hpp index 19ee57c22..def89ad74 100644 --- a/include/depthai/properties/BenchmarkPropertiesOut.hpp +++ b/include/depthai/properties/BenchmarkOutProperties.hpp @@ -10,11 +10,11 @@ namespace dai { /** * Specify benchmark properties (number of messages to send/receive) */ -struct BenchmarkPropertiesOut : PropertiesSerializable { +struct BenchmarkOutProperties : PropertiesSerializable { /** * Number of messages to send */ - int numMessages = 50; + int numMessages = -1; /** * FPS for sending, 0 means as fast as possible @@ -22,6 +22,6 @@ struct BenchmarkPropertiesOut : PropertiesSerializable { - /** - * Number of messages to send - */ - int numMessages = 50; -}; - -DEPTHAI_SERIALIZE_EXT(BenchmarkProperties, numMessages); - -} // namespace dai diff --git a/include/depthai/properties/BenchmarkPropertiesIn.hpp b/include/depthai/properties/BenchmarkPropertiesIn.hpp deleted file mode 100644 index 8bbe350f3..000000000 --- a/include/depthai/properties/BenchmarkPropertiesIn.hpp +++ /dev/null @@ -1,22 +0,0 @@ -#pragma once - -#include "depthai/common/ProcessorType.hpp" -#include "depthai/common/optional.hpp" -#include "depthai/pipeline/datatype/DatatypeEnum.hpp" -#include "depthai/properties/Properties.hpp" - -namespace dai { - -/** - * Specify benchmark properties (number of messages to send/receive) - */ -struct BenchmarkPropertiesIn : PropertiesSerializable { - /** - * Number of messages to send - */ - int numMessages = 50; -}; - -DEPTHAI_SERIALIZE_EXT(BenchmarkPropertiesIn, numMessages); - -} // namespace dai diff --git a/src/pipeline/node/Benchmark.cpp b/src/pipeline/node/Benchmark.cpp deleted file mode 100644 index f4af2492e..000000000 --- a/src/pipeline/node/Benchmark.cpp +++ /dev/null @@ -1,24 +0,0 @@ -#include "depthai/pipeline/node/BenchmarkIn.hpp" -#include "depthai/pipeline/node/BenchmarkOut.hpp" - -namespace dai { -namespace node { - -void BenchmarkOut::setNumMessagesToSend(int num) { - properties.numMessages = num; -} - -void BenchmarkOut::setFps(float fps) { - properties.fps = fps; -} - -void BenchmarkIn::setNumMessagesToGet(int num) { - properties.numMessages = num; -} - -void BenchmarkOut::buildInternal() { - properties.numMessages = -1; // By default send messages indefinitely -} - -} // namespace node -} // namespace dai diff --git a/src/pipeline/node/BenchmarkIn.cpp b/src/pipeline/node/BenchmarkIn.cpp new file mode 100644 index 000000000..414950840 --- /dev/null +++ b/src/pipeline/node/BenchmarkIn.cpp @@ -0,0 +1,133 @@ +#include "depthai/pipeline/node/BenchmarkIn.hpp" +#include "depthai/pipeline/datatype/BenchmarkReport.hpp" +#include + +namespace dai { +namespace node { + +void BenchmarkIn::sendReportEveryNMessages(uint32_t num) { + properties.reportEveryNMessages = num; +} + +void BenchmarkIn::setRunOnHost(bool runOnHost) { + runOnHostVar = runOnHost; +} + +bool BenchmarkIn::runOnHost() const { + return runOnHostVar; +} + +void BenchmarkIn::logReportsAsWarnings(bool logReportsAsWarnings) { + properties.logReportsAsWarnings = logReportsAsWarnings; +} + +void BenchmarkIn::measureIndividualLatencies(bool attachLatencies) { + properties.attachLatencies = attachLatencies; +} + +void BenchmarkIn::run() { + using namespace std::chrono; + + uint32_t numMessages = properties.reportEveryNMessages; + + // Decide if we will store latencies or not + bool storeLatencies = false; + if(properties.attachLatencies) { + if(numMessages <= 1000) { + // We'll store latencies for this batch + storeLatencies = true; + } else { + // Warn upfront if user wanted latencies but # messages is too high + logger->warn("Number of messages > 1000, latencies not individually attached."); + } + } + + uint32_t messageCount = 0; + float totalLatency = 0.0f; + + std::vector latencies; + if(storeLatencies) { + latencies.reserve(numMessages); + } + + auto start = steady_clock::now(); + + while(isRunning()) { + auto inMessage = input.get(); + + // If this is the first message of the batch, reset counters + if(messageCount == 0) { + start = steady_clock::now(); + totalLatency = 0.0f; + + // Clear vector if we are storing latencies + if(storeLatencies) { + latencies.clear(); + latencies.reserve(numMessages); + } + } + + if(messageCount < numMessages) { + auto currentTs = steady_clock::now(); + // If the message itself has a real timestamp, use that instead: + // auto messageTs = inMessage->getTimestamp(); + auto messageTs = currentTs; // In example, just use currentTs + + duration diff = currentTs - messageTs; + logger->trace("Frame latency: {} s", diff.count()); + + // Accumulate for average + totalLatency += diff.count(); + + // Optionally store individual latencies + if(storeLatencies) { + latencies.push_back(diff.count()); + } + + messageCount++; + } + else { + // We reached our batch size, so time to compute and send the report + auto stop = steady_clock::now(); + duration durationS = stop - start; + + auto reportMessage = std::make_shared(); + reportMessage->numMessagesReceived = numMessages; + reportMessage->timeTotal = durationS.count(); + reportMessage->fps = numMessages / durationS.count(); + reportMessage->averageLatency = totalLatency / numMessages; + + // Attach latencies only if we're storing them + if(storeLatencies) { + reportMessage->latencies = latencies; + } + + // Decide how to log (warn or info) once, then do all the logs + auto logFunc = [&](auto fmt, auto... args) { + if(properties.logReportsAsWarnings) { + logger->warn(fmt, std::forward(args)...); + } else { + logger->trace(fmt, std::forward(args)...); + } + }; + + // Unconditional logs, using chosen severity + logFunc("FPS: {}", reportMessage->fps); + logFunc("Messages took {} s", reportMessage->timeTotal); + logFunc("Average latency: {} s", reportMessage->averageLatency); + + // Send out the report + report.send(reportMessage); + logger->trace("Sent report message"); + + // Reset for next batch + messageCount = 0; + } + + // Passthrough the message + passthrough.send(inMessage); + } +} + +} // namespace node +} // namespace dai diff --git a/src/pipeline/node/BenchmarkOut.cpp b/src/pipeline/node/BenchmarkOut.cpp new file mode 100644 index 000000000..98a6bc63f --- /dev/null +++ b/src/pipeline/node/BenchmarkOut.cpp @@ -0,0 +1,66 @@ +#include "depthai/pipeline/node/BenchmarkOut.hpp" + +namespace dai { +namespace node { + +void BenchmarkOut::setNumMessagesToSend(int num) { + properties.numMessages = num; +} + +void BenchmarkOut::setFps(float fps) { + properties.fps = fps; +} + +void BenchmarkOut::setRunOnHost(bool runOnHost) { + runOnHostVar = runOnHost; +} + +bool BenchmarkOut::runOnHost() const { + return runOnHostVar; +} + +void BenchmarkOut::run() { + using namespace std::chrono; + + logger->trace("Wait for the input message."); + auto inMessage = input.get(); + + bool useTiming = (properties.fps > 0); + + auto frameDurationDouble = std::chrono::duration(1.0 / properties.fps); + auto frameDuration = std::chrono::duration_cast(frameDurationDouble); + + auto nextFrameTime = steady_clock::now(); + for(int i = 0; (i < properties.numMessages || properties.numMessages == -1) && isRunning(); i++) { + auto imgMessage = std::dynamic_pointer_cast(inMessage); + if(imgMessage != nullptr) { + logger->trace("Sending img message with id {}", i); + + // Copying metadata and pointing to same data + auto newMessage = std::make_shared(); + newMessage->setMetadata(imgMessage); + newMessage->data = imgMessage->data; + newMessage->setTimestampDevice(steady_clock::now()); + out.send(newMessage); + } else { + logger->trace("Sending message with id {}", i); + out.send(inMessage); + } + + if(useTiming) { + nextFrameTime += frameDuration; + + auto now = steady_clock::now(); + if(nextFrameTime > now) { + auto sleepTime = nextFrameTime - now; + std::this_thread::sleep_for(sleepTime); + } + } + } + + logger->trace("Benchmark out sent all messages!"); +} + + +} // namespace node +} // namespace dai From cd9f3371e1a3f081a8d3b44cccabae44258edf22 Mon Sep 17 00:00:00 2001 From: Matevz Morato Date: Sun, 29 Dec 2024 12:56:08 +0100 Subject: [PATCH 02/11] Change which timestamp is used based on where the node runs --- src/pipeline/node/BenchmarkIn.cpp | 9 ++++++--- src/pipeline/node/BenchmarkOut.cpp | 6 +++++- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/pipeline/node/BenchmarkIn.cpp b/src/pipeline/node/BenchmarkIn.cpp index 414950840..40bbf4bf1 100644 --- a/src/pipeline/node/BenchmarkIn.cpp +++ b/src/pipeline/node/BenchmarkIn.cpp @@ -69,9 +69,12 @@ void BenchmarkIn::run() { if(messageCount < numMessages) { auto currentTs = steady_clock::now(); - // If the message itself has a real timestamp, use that instead: - // auto messageTs = inMessage->getTimestamp(); - auto messageTs = currentTs; // In example, just use currentTs + auto messageTs = steady_clock::now(); + if(runOnHostVar) { + messageTs = inMessage->getTimestamp(); + } else { + messageTs = inMessage->getTimestampDevice(); + } duration diff = currentTs - messageTs; logger->trace("Frame latency: {} s", diff.count()); diff --git a/src/pipeline/node/BenchmarkOut.cpp b/src/pipeline/node/BenchmarkOut.cpp index 98a6bc63f..afbed9634 100644 --- a/src/pipeline/node/BenchmarkOut.cpp +++ b/src/pipeline/node/BenchmarkOut.cpp @@ -40,7 +40,11 @@ void BenchmarkOut::run() { auto newMessage = std::make_shared(); newMessage->setMetadata(imgMessage); newMessage->data = imgMessage->data; - newMessage->setTimestampDevice(steady_clock::now()); + if(runOnHostVar) { + newMessage->setTimestamp(steady_clock::now()); + } else { + newMessage->setTimestampDevice(steady_clock::now()); + } out.send(newMessage); } else { logger->trace("Sending message with id {}", i); From eaf27df37532aa46d99578e79863322f74b7ebb5 Mon Sep 17 00:00:00 2001 From: Matevz Morato Date: Sun, 29 Dec 2024 14:58:32 +0100 Subject: [PATCH 03/11] Bump RVC4 FW to use host implementations of benchmark node --- cmake/Depthai/DepthaiDeviceRVC4Config.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/Depthai/DepthaiDeviceRVC4Config.cmake b/cmake/Depthai/DepthaiDeviceRVC4Config.cmake index 982160495..407a52c43 100644 --- a/cmake/Depthai/DepthaiDeviceRVC4Config.cmake +++ b/cmake/Depthai/DepthaiDeviceRVC4Config.cmake @@ -4,4 +4,4 @@ set(DEPTHAI_DEVICE_RVC4_MATURITY "snapshot") # "version if applicable" # set(DEPTHAI_DEVICE_RVC4_VERSION "0.0.1+93f7b75a885aa32f44c5e9f53b74470c49d2b1af") -set(DEPTHAI_DEVICE_RVC4_VERSION "0.0.1+81617bcfe7b7da9eda9654b5b3d3d3254b59a47d") +set(DEPTHAI_DEVICE_RVC4_VERSION "0.0.1+7f850f7d36329e0e994af1a1cd83de723a88edef") From 575f08bedda6648f3aad0f79ee89902a73de2090 Mon Sep 17 00:00:00 2001 From: Matevz Morato Date: Sun, 29 Dec 2024 19:46:38 +0100 Subject: [PATCH 04/11] Add missing serializations --- include/depthai/properties/BenchmarkInProperties.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/depthai/properties/BenchmarkInProperties.hpp b/include/depthai/properties/BenchmarkInProperties.hpp index 2497ed959..93a9349b8 100644 --- a/include/depthai/properties/BenchmarkInProperties.hpp +++ b/include/depthai/properties/BenchmarkInProperties.hpp @@ -12,7 +12,7 @@ namespace dai { */ struct BenchmarkInProperties : PropertiesSerializable { /** - * Specify how many messages to measure for each report + * Specify how many messages to measure for each report */ uint32_t reportEveryNMessages = 50; @@ -27,6 +27,6 @@ struct BenchmarkInProperties : PropertiesSerializable Date: Sun, 29 Dec 2024 20:08:42 +0100 Subject: [PATCH 05/11] [RVC4 FW] Bump FW with properties serialization --- cmake/Depthai/DepthaiDeviceRVC4Config.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/Depthai/DepthaiDeviceRVC4Config.cmake b/cmake/Depthai/DepthaiDeviceRVC4Config.cmake index 407a52c43..bda073a99 100644 --- a/cmake/Depthai/DepthaiDeviceRVC4Config.cmake +++ b/cmake/Depthai/DepthaiDeviceRVC4Config.cmake @@ -4,4 +4,4 @@ set(DEPTHAI_DEVICE_RVC4_MATURITY "snapshot") # "version if applicable" # set(DEPTHAI_DEVICE_RVC4_VERSION "0.0.1+93f7b75a885aa32f44c5e9f53b74470c49d2b1af") -set(DEPTHAI_DEVICE_RVC4_VERSION "0.0.1+7f850f7d36329e0e994af1a1cd83de723a88edef") +set(DEPTHAI_DEVICE_RVC4_VERSION "0.0.1+19b67f81b54c146d079d2cbd4485fa153337dc5a") From ae6af8212f7f631e30128bbf22cf5028401c54fa Mon Sep 17 00:00:00 2001 From: Matevz Morato Date: Sun, 29 Dec 2024 20:16:45 +0100 Subject: [PATCH 06/11] [RVC2] Complete BenchmarkIn implementation --- cmake/Depthai/DepthaiDeviceSideConfig.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/Depthai/DepthaiDeviceSideConfig.cmake b/cmake/Depthai/DepthaiDeviceSideConfig.cmake index 9a0146539..23835ccbb 100644 --- a/cmake/Depthai/DepthaiDeviceSideConfig.cmake +++ b/cmake/Depthai/DepthaiDeviceSideConfig.cmake @@ -2,7 +2,7 @@ set(DEPTHAI_DEVICE_SIDE_MATURITY "snapshot") # "full commit hash of device side binary" -set(DEPTHAI_DEVICE_SIDE_COMMIT "c3e98b39b6a5445b2187b4109d03a146c6df37dd") +set(DEPTHAI_DEVICE_SIDE_COMMIT "6b5e087c440e3833a0128d686fe3b6926681f113") # "version if applicable" set(DEPTHAI_DEVICE_SIDE_VERSION "") From 1a0c8f637ef648d48f5935ae54706e50992ae7cf Mon Sep 17 00:00:00 2001 From: Matevz Morato Date: Sun, 29 Dec 2024 22:27:37 +0100 Subject: [PATCH 07/11] [RVC2] Add BenchmarkOut implementation --- cmake/Depthai/DepthaiDeviceSideConfig.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/Depthai/DepthaiDeviceSideConfig.cmake b/cmake/Depthai/DepthaiDeviceSideConfig.cmake index 23835ccbb..20e2509ad 100644 --- a/cmake/Depthai/DepthaiDeviceSideConfig.cmake +++ b/cmake/Depthai/DepthaiDeviceSideConfig.cmake @@ -2,7 +2,7 @@ set(DEPTHAI_DEVICE_SIDE_MATURITY "snapshot") # "full commit hash of device side binary" -set(DEPTHAI_DEVICE_SIDE_COMMIT "6b5e087c440e3833a0128d686fe3b6926681f113") +set(DEPTHAI_DEVICE_SIDE_COMMIT "5e016a328ac84324fb3c6bd8904141191f29dc2e") # "version if applicable" set(DEPTHAI_DEVICE_SIDE_VERSION "") From 45a0397f2285b3338d74d42ef19ef6dfb97190b6 Mon Sep 17 00:00:00 2001 From: Matevz Morato Date: Mon, 30 Dec 2024 11:21:54 +0100 Subject: [PATCH 08/11] Add examples and tests --- examples/python/Benchmark/benchmark_camera.py | 17 ++++ examples/python/Benchmark/benchmark_nn.py | 50 ++++++++++++ examples/python/Benchmark/benchmark_simple.py | 29 +++++++ examples/python/CMakeLists.txt | 12 +++ examples/python/HostNodes/benchmark.py | 34 -------- tests/CMakeLists.txt | 4 + .../pipeline/node/benchmark_test.cpp | 80 +++++++++++++++++++ .../regression/camera_concurrency.cpp | 2 +- 8 files changed, 193 insertions(+), 35 deletions(-) create mode 100644 examples/python/Benchmark/benchmark_camera.py create mode 100644 examples/python/Benchmark/benchmark_nn.py create mode 100644 examples/python/Benchmark/benchmark_simple.py delete mode 100644 examples/python/HostNodes/benchmark.py create mode 100644 tests/src/ondevice_tests/pipeline/node/benchmark_test.cpp diff --git a/examples/python/Benchmark/benchmark_camera.py b/examples/python/Benchmark/benchmark_camera.py new file mode 100644 index 000000000..97d7adc6f --- /dev/null +++ b/examples/python/Benchmark/benchmark_camera.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python3 +import depthai as dai +import time + +# Create pipeline +with dai.Pipeline() as pipeline: + # Define source and output + cam = pipeline.create(dai.node.Camera).build() + benchmarkIn = pipeline.create(dai.node.BenchmarkIn) + # benchmarkIn.setRunOnHost(True) # The node can also run on host and include the transfer limitation, default is False + output = cam.requestFullResolutionOutput() + output.link(benchmarkIn.input) + + # Connect to device and start pipeline + pipeline.start() + while pipeline.isRunning(): + time.sleep(1) # Let the logger print out the FPS diff --git a/examples/python/Benchmark/benchmark_nn.py b/examples/python/Benchmark/benchmark_nn.py new file mode 100644 index 000000000..b327460ab --- /dev/null +++ b/examples/python/Benchmark/benchmark_nn.py @@ -0,0 +1,50 @@ +import depthai as dai +import cv2 +import numpy as np + +device = dai.Device() +modelPath = dai.getModelFromZoo(dai.NNModelDescription("yolov6-nano", platform=device.getPlatformAsString())) +modelArhive = dai.NNArchive(modelPath) +inputSize = modelArhive.getInputSize() +type = modelArhive.getConfig().model.inputs[0].preprocessing.daiType + +if type: + try: + frameType = dai.ImgFrame.Type.__getattribute__(type) + except AttributeError: + type = None + +if not type: + if device.getPlatform() == dai.Platform.RVC2: + frameType = dai.ImgFrame.Type.BGR888p + else: + frameType = dai.ImgFrame.Type.BGR888i + + +# Construct the input (white) image for benchmarking +img = np.ones((inputSize[1], inputSize[0], 3), np.uint8) * 255 +inputFrame = dai.ImgFrame() +inputFrame.setCvFrame(img, frameType) + +with dai.Pipeline(device) as p: + benchmarkOut = p.create(dai.node.BenchmarkOut) + benchmarkOut.setRunOnHost(False) # The node can run on host or on device + benchmarkOut.setFps(-1) # As fast as possible + + neuralNetwork = p.create(dai.node.NeuralNetwork).build(benchmarkOut.out, modelArhive) + + benchmarkIn = p.create(dai.node.BenchmarkIn) + benchmarkIn.setRunOnHost(False) # The node can run on host or on device + benchmarkIn.sendReportEveryNMessages(100) + benchmarkIn.logReportsAsWarnings(False) + neuralNetwork.out.link(benchmarkIn.input) + + outputQueue = benchmarkIn.report.createOutputQueue() + inputQueue = benchmarkOut.input.createInputQueue() + + p.start() + inputQueue.send(inputFrame) + while p.isRunning(): + benchmarkReport = outputQueue.get() + assert isinstance(benchmarkReport, dai.BenchmarkReport) + print(f"FPS is {benchmarkReport.fps}") diff --git a/examples/python/Benchmark/benchmark_simple.py b/examples/python/Benchmark/benchmark_simple.py new file mode 100644 index 000000000..9afe70b6d --- /dev/null +++ b/examples/python/Benchmark/benchmark_simple.py @@ -0,0 +1,29 @@ +import depthai as dai + +with dai.Pipeline(createImplicitDevice=False) as p: + # Create a BenchmarkOut node + # It will listen on the input to get the first message and then send it out at a specified rate + # The node sends the same message out (creates new pointers), not deep copies. + benchmarkOut = p.create(dai.node.BenchmarkOut) + benchmarkOut.setRunOnHost(True) # The node can run on host or on device + benchmarkOut.setFps(30) + + # Create a BenchmarkIn node + # Thisn node is receiving the messages on the input and measuring the FPS and latency. + # In the case that the input is with BenchmarkOut, the latency measurement is not always possible, as the message is not deep copied, + # which means that the timestamps stay the same. + benchmarkIn = p.create(dai.node.BenchmarkIn) + benchmarkIn.setRunOnHost(True) # The node can run on host or on device + benchmarkIn.sendReportEveryNMessages(100) + + benchmarkOut.out.link(benchmarkIn.input) + outputQueue = benchmarkIn.report.createOutputQueue() + inputQueue = benchmarkOut.input.createInputQueue() + + p.start() + imgFrame = dai.ImgFrame() + inputQueue.send(imgFrame) + while p.isRunning(): + benchmarkReport = outputQueue.get() + assert isinstance(benchmarkReport, dai.BenchmarkReport) + print(f"FPS is {benchmarkReport.fps}") diff --git a/examples/python/CMakeLists.txt b/examples/python/CMakeLists.txt index 2578d1b27..5c8b5ed0d 100644 --- a/examples/python/CMakeLists.txt +++ b/examples/python/CMakeLists.txt @@ -223,3 +223,15 @@ set_tests_properties(py_script_simple PROPERTIES FAIL_REGULAR_EXPRESSION "\\[err add_python_example(script_all_cameras Script/script_switch_all_cameras.py) dai_set_example_test_labels(script_all_cameras ondevice rvc2_all rvc4 ci) + +## Benchmark node +add_python_example(benchmark_node Benchmark/benchmark_simple.py) +dai_set_example_test_labels(benchmark_node ondevice rvc2_all rvc4 ci) +set_tests_properties(py_benchmark_node PROPERTIES FAIL_REGULAR_EXPRESSION "\\[error\\];\\[critical\\]") + +add_python_example(benchmark_cameras Benchmark/benchmark_camera.py) +dai_set_example_test_labels(benchmark_cameras ondevice rvc2_all rvc4 ci) +set_tests_properties(py_benchmark_cameras PROPERTIES FAIL_REGULAR_EXPRESSION "\\[error\\];\\[critical\\]") + +add_python_example(benchmark_nn Benchmark/benchmark_nn.py) +dai_set_example_test_labels(benchmark_nn ondevice rvc2_all rvc4 ci) diff --git a/examples/python/HostNodes/benchmark.py b/examples/python/HostNodes/benchmark.py deleted file mode 100644 index 701f15298..000000000 --- a/examples/python/HostNodes/benchmark.py +++ /dev/null @@ -1,34 +0,0 @@ -import depthai as dai -import time - -class TestSource(dai.node.ThreadedHostNode): - def __init__(self, name: str): - super().__init__() - self.name = name - self.output = self.createOutput() - - def run(self): - while self.isRunning(): - buffer = dai.Buffer() - print(f"{self.name} node is sending a buffer!") - self.output.send(buffer) - time.sleep(1) - -with dai.Pipeline(createImplicitDevice=False) as p: - # Create nodes - source = TestSource("source") - benchmarkIn = p.create(dai.node.BenchmarkIn) - benchmarkIn.setRunOnHost(True) - benchmarkIn.sendReportEveryNMessages(100) - benchmarkOut = p.create(dai.node.BenchmarkOut) - benchmarkOut.setRunOnHost(True) - benchmarkOut.setFps(30) - benchmarkOut.out.link(benchmarkIn.input) - outputQueue = benchmarkIn.report.createOutputQueue() - source.output.link(benchmarkOut.input) - - p.start() - while p.isRunning(): - benchmarkReport = outputQueue.get() - assert isinstance(benchmarkReport, dai.BenchmarkReport) - print(f"FPS is {benchmarkReport.fps}") \ No newline at end of file diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index c83c632fd..20017176f 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -400,3 +400,7 @@ dai_set_test_labels(stereo_depth_node_test ondevice rvc2_all rvc4 ci) # ImageManipV2 test dai_add_test(image_manip_v2_node_test src/ondevice_tests/pipeline/node/image_manip_v2_test.cpp) dai_set_test_labels(image_manip_v2_node_test ondevice rvc2_all rvc4 ci) + +# Benchmark tests +dai_add_test(benchmark_test src/ondevice_tests/pipeline/node/benchmark_test.cpp) +dai_set_test_labels(benchmark_test ondevice rvc2_all rvc4 ci) diff --git a/tests/src/ondevice_tests/pipeline/node/benchmark_test.cpp b/tests/src/ondevice_tests/pipeline/node/benchmark_test.cpp new file mode 100644 index 000000000..f5eda08a8 --- /dev/null +++ b/tests/src/ondevice_tests/pipeline/node/benchmark_test.cpp @@ -0,0 +1,80 @@ +#include +#include +#include "depthai/capabilities/ImgFrameCapability.hpp" +#include "depthai/common/CameraBoardSocket.hpp" +#include "depthai/depthai.hpp" +#include "depthai/pipeline/MessageQueue.hpp" +#include "depthai/pipeline/datatype/ImgFrame.hpp" +#include "depthai/pipeline/node/Camera.hpp" + +void testBenchmarkIn(bool benchmarkInRunOnHost, bool benchmarkOutRunOnHost, float fps, bool passthrough) { + // Create pipeline + dai::Pipeline p; + auto benchmarkIn = p.create(); + benchmarkIn->setRunOnHost(benchmarkInRunOnHost); + auto benchmarkOut = p.create(); + benchmarkOut->setRunOnHost(benchmarkOutRunOnHost); + benchmarkOut->setFps(fps); + benchmarkOut->out.link(benchmarkIn->input); + + auto inputQueue = benchmarkOut->input.createInputQueue(); + auto reportQueue = benchmarkIn->report.createOutputQueue(); + std::shared_ptr passthroughQueue; + if(passthrough) { + passthroughQueue = benchmarkIn->passthrough.createOutputQueue(10, false); + } + p.start(); + auto inputFrame = std::make_shared(); + inputQueue->send(inputFrame); + for(int i = 0; i < 10; i++) { + if(passthrough) { + auto passthroughFrame = passthroughQueue->get(); + REQUIRE(passthroughFrame != nullptr); + } + auto reportData = reportQueue->get(); + REQUIRE(reportData != nullptr); + REQUIRE(reportData->numMessagesReceived > 1); + REQUIRE(reportData->fps == Catch::Approx(fps).epsilon(0.05)); + } +} + +void testCameraBenchmarking(float fps) { + dai::Pipeline p; + auto cam = p.create()->build(dai::CameraBoardSocket::CAM_A); + auto* output = cam->requestOutput(std::pair(640, 400), std::nullopt, dai::ImgResizeMode::CROP, fps); + REQUIRE(output != nullptr); + auto benchmarkIn = p.create(); + output->link(benchmarkIn->input); + auto reportQueue = benchmarkIn->report.createOutputQueue(); + p.start(); + for(int i = 0; i < 10; i++) { + auto reportData = reportQueue->get(); + REQUIRE(reportData != nullptr); + REQUIRE(reportData->numMessagesReceived > 1); + REQUIRE(reportData->fps == Catch::Approx(fps).epsilon(0.05)); + } +} + +TEST_CASE("BenchmarkIn and BenchmarkOut run on device") { + testBenchmarkIn(false, false, 30.0f, true); +} + +TEST_CASE("BenchmarkIn run on host, BenchmarkOut run on device") { + testBenchmarkIn(true, false, 30.0f, true); +} + +TEST_CASE("BenchmarkIn run on device, BenchmarkOut run on host") { + testBenchmarkIn(false, true, 30.0f, true); +} + +TEST_CASE("BenchmarkIn and BenchmarkOut run on host") { + testBenchmarkIn(true, true, 30.0f, true); +} + +TEST_CASE("BenchmarkIn and BenchmarkOut run on device - high FPS") { + testBenchmarkIn(false, false, 1000.0f, false); +} + +TEST_CASE("Camera benchmarking") { + testCameraBenchmarking(30.0f); +} diff --git a/tests/src/ondevice_tests/regression/camera_concurrency.cpp b/tests/src/ondevice_tests/regression/camera_concurrency.cpp index a3ab0f25f..fbe09ddd7 100644 --- a/tests/src/ondevice_tests/regression/camera_concurrency.cpp +++ b/tests/src/ondevice_tests/regression/camera_concurrency.cpp @@ -27,7 +27,7 @@ TEST_CASE("camera_concurrency") { for(auto* output : cameraOutputs) { auto node = pipeline.create(); output->link(node->input); - node->setNumMessagesToGet(numMessagesToGet); + node->sendReportEveryNMessages(numMessagesToGet); queues.push_back(node->report.createOutputQueue()); benchmarkNodes.push_back(node); } From e74550b0a5f5e71644ece81d24f78fd9dd23eb14 Mon Sep 17 00:00:00 2001 From: Matevz Morato Date: Mon, 30 Dec 2024 14:22:30 +0100 Subject: [PATCH 09/11] Bump the epsilon in benchmark test --- tests/src/ondevice_tests/pipeline/node/benchmark_test.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/src/ondevice_tests/pipeline/node/benchmark_test.cpp b/tests/src/ondevice_tests/pipeline/node/benchmark_test.cpp index f5eda08a8..cdb3688ed 100644 --- a/tests/src/ondevice_tests/pipeline/node/benchmark_test.cpp +++ b/tests/src/ondevice_tests/pipeline/node/benchmark_test.cpp @@ -34,7 +34,7 @@ void testBenchmarkIn(bool benchmarkInRunOnHost, bool benchmarkOutRunOnHost, floa auto reportData = reportQueue->get(); REQUIRE(reportData != nullptr); REQUIRE(reportData->numMessagesReceived > 1); - REQUIRE(reportData->fps == Catch::Approx(fps).epsilon(0.05)); + REQUIRE(reportData->fps == Catch::Approx(fps).epsilon(0.1)); } } @@ -51,7 +51,7 @@ void testCameraBenchmarking(float fps) { auto reportData = reportQueue->get(); REQUIRE(reportData != nullptr); REQUIRE(reportData->numMessagesReceived > 1); - REQUIRE(reportData->fps == Catch::Approx(fps).epsilon(0.05)); + REQUIRE(reportData->fps == Catch::Approx(fps).epsilon(0.1)); } } From 0b6ce61c6ebddbdd13bd5fa83bad9a50665db7aa Mon Sep 17 00:00:00 2001 From: Matevz Morato Date: Mon, 30 Dec 2024 18:49:28 +0100 Subject: [PATCH 10/11] Add bigger timeout for connecting to the device --- examples/python/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/python/CMakeLists.txt b/examples/python/CMakeLists.txt index 5c8b5ed0d..7ac0f52e6 100644 --- a/examples/python/CMakeLists.txt +++ b/examples/python/CMakeLists.txt @@ -40,6 +40,7 @@ function(add_python_example example_name python_script_path) # Python path (to find compiled module) "PYTHONPATH=$${SYS_PATH_SEPARATOR}$ENV{PYTHONPATH}" "DEPTHAI_SEARCH_TIMEOUT=15000" + "DEPTHAI_CONNECT_TIMEOUT=15000" "DEPTHAI_RECONNECT_TIMEOUT=0" # ASAN in case of sanitizers "${ASAN_ENVIRONMENT_VARS}" @@ -60,6 +61,7 @@ function(add_python_example example_name python_script_path) # Python path (to find compiled module) "PYTHONPATH=$${SYS_PATH_SEPARATOR}$ENV{PYTHONPATH}" "DEPTHAI_SEARCH_TIMEOUT=30000" + "DEPTHAI_CONNECT_TIMEOUT=30000" "DEPTHAI_RECONNECT_TIMEOUT=0" # ASAN in case of sanitizers ${ASAN_ENVIRONMENT_VARS} From c773172c56ae7f42197a5074d9279365a8cb6510 Mon Sep 17 00:00:00 2001 From: Matevz Morato Date: Tue, 31 Dec 2024 00:20:36 +0100 Subject: [PATCH 11/11] Update comments in the benchmark examples --- examples/python/Benchmark/benchmark_camera.py | 3 +-- examples/python/Benchmark/benchmark_nn.py | 5 +++-- examples/python/Benchmark/benchmark_simple.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/python/Benchmark/benchmark_camera.py b/examples/python/Benchmark/benchmark_camera.py index 97d7adc6f..e89398673 100644 --- a/examples/python/Benchmark/benchmark_camera.py +++ b/examples/python/Benchmark/benchmark_camera.py @@ -4,14 +4,13 @@ # Create pipeline with dai.Pipeline() as pipeline: - # Define source and output + # Create the nodes cam = pipeline.create(dai.node.Camera).build() benchmarkIn = pipeline.create(dai.node.BenchmarkIn) # benchmarkIn.setRunOnHost(True) # The node can also run on host and include the transfer limitation, default is False output = cam.requestFullResolutionOutput() output.link(benchmarkIn.input) - # Connect to device and start pipeline pipeline.start() while pipeline.isRunning(): time.sleep(1) # Let the logger print out the FPS diff --git a/examples/python/Benchmark/benchmark_nn.py b/examples/python/Benchmark/benchmark_nn.py index b327460ab..154584086 100644 --- a/examples/python/Benchmark/benchmark_nn.py +++ b/examples/python/Benchmark/benchmark_nn.py @@ -1,7 +1,8 @@ import depthai as dai -import cv2 import numpy as np + +# First prepare the model for benchmarking device = dai.Device() modelPath = dai.getModelFromZoo(dai.NNModelDescription("yolov6-nano", platform=device.getPlatformAsString())) modelArhive = dai.NNArchive(modelPath) @@ -43,7 +44,7 @@ inputQueue = benchmarkOut.input.createInputQueue() p.start() - inputQueue.send(inputFrame) + inputQueue.send(inputFrame) # Send the input image only once while p.isRunning(): benchmarkReport = outputQueue.get() assert isinstance(benchmarkReport, dai.BenchmarkReport) diff --git a/examples/python/Benchmark/benchmark_simple.py b/examples/python/Benchmark/benchmark_simple.py index 9afe70b6d..ac377c07b 100644 --- a/examples/python/Benchmark/benchmark_simple.py +++ b/examples/python/Benchmark/benchmark_simple.py @@ -9,9 +9,9 @@ benchmarkOut.setFps(30) # Create a BenchmarkIn node - # Thisn node is receiving the messages on the input and measuring the FPS and latency. + # This node is receiving the messages on the input and measuring the FPS and latency. # In the case that the input is with BenchmarkOut, the latency measurement is not always possible, as the message is not deep copied, - # which means that the timestamps stay the same. + # which means that the timestamps stay the same and latency virtually increases over time. benchmarkIn = p.create(dai.node.BenchmarkIn) benchmarkIn.setRunOnHost(True) # The node can run on host or on device benchmarkIn.sendReportEveryNMessages(100)