From 74d27767c31abf548fa5cfa70d07c1df8e4b5f46 Mon Sep 17 00:00:00 2001
From: Naushir Patuck <naush@raspberrypi.com>
Date: Fri, 6 Sep 2024 09:50:33 +0100
Subject: [PATCH 1/4] core: Add release version number to version string

Signed-off-by: Naushir Patuck <naush@raspberrypi.com>
---
 core/meson.build |  2 +-
 utils/version.py | 68 +++++++++++++++++++++++++-----------------------
 2 files changed, 37 insertions(+), 33 deletions(-)

diff --git a/core/meson.build b/core/meson.build
index 0de2f99f..fbc23068 100644
--- a/core/meson.build
+++ b/core/meson.build
@@ -39,7 +39,7 @@ if fs.is_file(dist_version_file)
     version_cmd += fs.read(dist_version_file)
 endif
 
-version_cpp = vcs_tag(command : version_cmd,
+version_cpp = vcs_tag(command : [version_cmd, meson.project_version()],
                       replace_string: '@VER@',
                       input : 'version.cpp.in',
                       output : 'version.cpp',
diff --git a/utils/version.py b/utils/version.py
index 48d7e059..ed508f83 100755
--- a/utils/version.py
+++ b/utils/version.py
@@ -11,42 +11,46 @@
 digits = 12
 
 
-def generate_version():
+def generate_version(git_sha):
+    if git_sha is None:
+        # Check if this is a git directory
+        r = subprocess.run(['git', 'rev-parse', '--git-dir'],
+                           stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, text=True)
+        if r.returncode:
+            raise RuntimeError('Invalid git directory!')
+
+        # Get commit id
+        r = subprocess.run(['git', 'rev-parse', '--verify', 'HEAD'],
+                           stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True)
+        if r.returncode:
+            raise RuntimeError('Invalid git commit!')
+
+        commit = r.stdout.strip('\n')[0:digits] + '-intree'
+
+        # Check dirty status
+        r = subprocess.run(['git', 'diff-index', '--quiet', 'HEAD'],
+                           stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, text=True)
+        if r.returncode:
+            commit = commit + '-dirty'
+    else:
+        commit = git_sha.lower().strip()
+        if any(c not in hexdigits for c in commit):
+            raise RuntimeError('Invalid git sha!')
+
+        commit = commit[0:digits]
+
+    return commit
+
+
+if __name__ == "__main__":
     try:
-        if len(sys.argv) == 1:
-            # Check if this is a git directory
-            r = subprocess.run(['git', 'rev-parse', '--git-dir'],
-                               stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, text=True)
-            if r.returncode:
-                raise RuntimeError('Invalid git directory!')
-
-            # Get commit id
-            r = subprocess.run(['git', 'rev-parse', '--verify', 'HEAD'],
-                               stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True)
-            if r.returncode:
-                raise RuntimeError('Invalid git commit!')
-
-            commit = r.stdout.strip('\n')[0:digits] + '-intree'
-
-            # Check dirty status
-            r = subprocess.run(['git', 'diff-index', '--quiet', 'HEAD'],
-                               stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, text=True)
-            if r.returncode:
-                commit = commit + '-dirty'
-        else:
-            commit = sys.argv[1].lower().strip()
-            if any(c not in hexdigits for c in commit):
-                raise RuntimeError('Invalid git sha!')
-
-            commit = commit[0:digits]
+        rpicam_apps_ver = '0.0.0' if len(sys.argv) < 2 else sys.argv[1]
+        git_sha = None if len(sys.argv) < 3 else sys.argv[2]
+        commit = generate_version(git_sha)
 
     except RuntimeError as e:
         print(f'ERR: {e}', file=sys.stderr)
         commit = '0' * digits + '-invalid'
 
     finally:
-        print(f'{commit} {datetime.now().strftime("%d-%m-%Y (%H:%M:%S)")}', end="")
-
-
-if __name__ == "__main__":
-    generate_version()
+        print(f'v{rpicam_apps_ver}-{commit} {datetime.now().strftime("%d-%m-%Y (%H:%M:%S)")}', end="")

From 9341f751d01136045cf75f048a351f68ce8f6b5f Mon Sep 17 00:00:00 2001
From: Naushir Patuck <naush@raspberrypi.com>
Date: Tue, 10 Sep 2024 13:07:51 +0100
Subject: [PATCH 2/4] postprocessing: plot_pose_cv_stage: Support multiple
 poses

Switch to using vectors for the list of points/confidences so that
multiple poses can be drawn.

Signed-off-by: Naushir Patuck <naush@raspberrypi.com>
---
 post_processing_stages/plot_pose_cv_stage.cpp | 32 +++++++++++--------
 .../pose_estimation_tf_stage.cpp              |  7 ++--
 2 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/post_processing_stages/plot_pose_cv_stage.cpp b/post_processing_stages/plot_pose_cv_stage.cpp
index c3ccabda..8ef6bced 100644
--- a/post_processing_stages/plot_pose_cv_stage.cpp
+++ b/post_processing_stages/plot_pose_cv_stage.cpp
@@ -95,25 +95,31 @@ bool PlotPoseCvStage::Process(CompletedRequestPtr &completed_request)
 	uint32_t *ptr = (uint32_t *)buffer.data();
 	StreamInfo info = app_->GetStreamInfo(stream_);
 
-	std::vector<cv::Rect> rects;
-	std::vector<libcamera::Point> lib_locations;
-	std::vector<Point> cv_locations;
-	std::vector<float> confidences;
-
+	std::vector<std::vector<libcamera::Point>> lib_locations;
+	std::vector<std::vector<float>> confidences;
 	completed_request->post_process_metadata.Get("pose_estimation.locations", lib_locations);
 	completed_request->post_process_metadata.Get("pose_estimation.confidences", confidences);
 
-	if (!confidences.empty() && !lib_locations.empty())
+	unsigned int i = 0;
+	for (auto const &loc : lib_locations)
 	{
-		Mat image(info.height, info.width, CV_8U, ptr, info.stride);
-		for (libcamera::Point lib_location : lib_locations)
+		std::vector<cv::Rect> rects;
+		std::vector<Point> cv_locations;
+
+		std::vector<float> &conf = confidences[i];
+
+		if (!conf.empty() && !loc.empty())
 		{
-			Point cv_location;
-			cv_location.x = lib_location.x;
-			cv_location.y = lib_location.y;
-			cv_locations.push_back(cv_location);
+			Mat image(info.height, info.width, CV_8U, ptr, info.stride);
+			for (libcamera::Point lib_location : loc)
+			{
+				Point cv_location;
+				cv_location.x = lib_location.x;
+				cv_location.y = lib_location.y;
+				cv_locations.push_back(cv_location);
+			}
+			drawFeatures(image, cv_locations, conf);
 		}
-		drawFeatures(image, cv_locations, confidences);
 	}
 	return false;
 }
diff --git a/post_processing_stages/pose_estimation_tf_stage.cpp b/post_processing_stages/pose_estimation_tf_stage.cpp
index 00429547..43c86907 100644
--- a/post_processing_stages/pose_estimation_tf_stage.cpp
+++ b/post_processing_stages/pose_estimation_tf_stage.cpp
@@ -55,8 +55,11 @@ void PoseEstimationTfStage::checkConfiguration()
 
 void PoseEstimationTfStage::applyResults(CompletedRequestPtr &completed_request)
 {
-	completed_request->post_process_metadata.Set("pose_estimation.locations", locations_);
-	completed_request->post_process_metadata.Set("pose_estimation.confidences", confidences_);
+	std::vector<std::vector<libcamera::Point>> lib_locations { locations_ };
+	std::vector<std::vector<float>> confidences { confidences_ };
+
+	completed_request->post_process_metadata.Set("pose_estimation.locations", lib_locations);
+	completed_request->post_process_metadata.Set("pose_estimation.confidences", confidences);
 }
 
 void PoseEstimationTfStage::interpretOutputs()

From 650e472ed616a1a28c46f50cf9558d10f5f16470 Mon Sep 17 00:00:00 2001
From: Naushir Patuck <naush@raspberrypi.com>
Date: Fri, 6 Sep 2024 12:44:34 +0100
Subject: [PATCH 3/4] postprocessing: Add initial support for IMX500
 postprocessing

Add postprocessing stages using the IMX500 inference output. MobileNet
SSD object inference and PoseNet pose estimation software stages have
been implemented in this commit.

Signed-off-by: Naushir Patuck <naush@raspberrypi.com>
---
 assets/imx500_mobilenet_ssd.json              | 123 +++
 assets/imx500_posenet.json                    |  29 +
 meson.build                                   |   1 +
 meson_options.txt                             |  10 +
 .../imx500/imx500_object_detection.cpp        | 337 +++++++
 .../imx500/imx500_posenet.cpp                 | 861 ++++++++++++++++++
 .../imx500/imx500_post_processing_stage.cpp   | 280 ++++++
 .../imx500/imx500_post_processing_stage.hpp   |  78 ++
 post_processing_stages/imx500/meson.build     |  31 +
 post_processing_stages/meson.build            |   5 +
 .../post_processing_stage.hpp                 |  21 +
 utils/download-imx500-models.sh               |  42 +
 12 files changed, 1818 insertions(+)
 create mode 100644 assets/imx500_mobilenet_ssd.json
 create mode 100644 assets/imx500_posenet.json
 create mode 100644 post_processing_stages/imx500/imx500_object_detection.cpp
 create mode 100644 post_processing_stages/imx500/imx500_posenet.cpp
 create mode 100644 post_processing_stages/imx500/imx500_post_processing_stage.cpp
 create mode 100644 post_processing_stages/imx500/imx500_post_processing_stage.hpp
 create mode 100644 post_processing_stages/imx500/meson.build
 create mode 100755 utils/download-imx500-models.sh

diff --git a/assets/imx500_mobilenet_ssd.json b/assets/imx500_mobilenet_ssd.json
new file mode 100644
index 00000000..0a45abf4
--- /dev/null
+++ b/assets/imx500_mobilenet_ssd.json
@@ -0,0 +1,123 @@
+{
+    "imx500_object_detection":
+    {
+        "max_detections" : 5,
+        "threshold" : 0.6,
+        "network_file": "/usr/share/imx500-models/imx500_network_ssd_mobilenetv2_fpnlite_320x320_pp.rpk",
+
+        "save_input_tensor":
+        {
+            "filename": "/home/pi/input_tensor.raw",
+            "num_tensors": 10,
+            "norm_val": [384, 384, 384, 0],
+            "norm_shift": [0, 0, 0, 0]
+        },
+
+        "temporal_filter":
+        {
+            "tolerance": 0.1,
+            "factor": 0.2,
+            "visible_frames": 4,
+            "hidden_frames": 2
+        },
+    
+        "classes":
+        [
+            "person",
+            "bicycle",
+            "car",
+            "motorcycle",
+            "airplane",
+            "bus",
+            "train",
+            "truck",
+            "boat",
+            "traffic light",
+            "fire hydrant",
+            "-",
+            "stop sign",
+            "parking meter",
+            "bench",
+            "bird",
+            "cat",
+            "dog",
+            "horse",
+            "sheep",
+            "cow",
+            "elephant",
+            "bear",
+            "zebra",
+            "giraffe",
+            "-",
+            "backpack",
+            "umbrella",
+            "-",
+            "-",
+            "handbag",
+            "tie",
+            "suitcase",
+            "frisbee",
+            "skis",
+            "snowboard",
+            "sports ball",
+            "kite",
+            "baseball bat",
+            "baseball glove",
+            "skateboard",
+            "surfboard",
+            "tennis racket",
+            "bottle",
+            "-",
+            "wine glass",
+            "cup",
+            "fork",
+            "knife",
+            "spoon",
+            "bowl",
+            "banana",
+            "apple",
+            "sandwich",
+            "orange",
+            "broccoli",
+            "carrot",
+            "hot dog",
+            "pizza",
+            "donut",
+            "cake",
+            "chair",
+            "couch",
+            "potted plant",
+            "bed",
+            "-",
+            "dining table",
+            "-",
+            "-",
+            "toilet",
+            "-",
+            "tv",
+            "laptop",
+            "mouse",
+            "remote",
+            "keyboard",
+            "cell phone",
+            "microwave",
+            "oven",
+            "toaster",
+            "sink",
+            "refrigerator",
+            "-",
+            "book",
+            "clock",
+            "vase",
+            "scissors",
+            "teddy bear",
+            "hair drier",
+            "toothbrush"
+        ]
+    },
+
+    "object_detect_draw_cv":
+    {
+        "line_thickness" : 2
+    }
+}
diff --git a/assets/imx500_posenet.json b/assets/imx500_posenet.json
new file mode 100644
index 00000000..c008f2a5
--- /dev/null
+++ b/assets/imx500_posenet.json
@@ -0,0 +1,29 @@
+{
+    "imx500_posenet":
+    {
+        "max_detections" : 5,
+        "threshold" : 0.4,
+        "offset_refinement_steps": 5,
+        "nms_radius": 10.0,
+        "network_file": "/usr/share/imx500-models/imx500_network_posenet.rpk",
+
+        "save_input_tensor":
+        {
+            "filename": "/home/pi/posenet_input_tensor.raw",
+            "num_tensors": 10
+        },
+
+        "temporal_filter":
+        {
+            "tolerance": 0.3,
+            "factor": 0.3,
+            "visible_frames": 8,
+            "hidden_frames": 2
+        }
+    },
+
+    "plot_pose_cv":
+    {
+        "confidence_threshold" : 0.2
+    }    
+}
diff --git a/meson.build b/meson.build
index 7d4c61c8..1a742df3 100644
--- a/meson.build
+++ b/meson.build
@@ -80,5 +80,6 @@ summary({
             'OpenCV postprocessing' : enable_opencv,
             'TFLite postprocessing' : enable_tflite,
             'Hailo postprocessing' : enable_hailo,
+            'IMX500 postprocessing' : get_option('enable_imx500'),
         },
         bool_yn : true, section : 'Build configuration')
diff --git a/meson_options.txt b/meson_options.txt
index 13b6de16..0f24d546 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -43,3 +43,13 @@ option('download_hailo_models',
         type : 'boolean',
         value : true,
         description : 'Download and install the Hailo postprocessing models')
+
+option('enable_imx500',
+        type : 'boolean',
+        value : true,
+        description : 'Enable IMX500 postprocessing support')
+
+option('download_imx500_models',
+        type : 'boolean',
+        value : true,
+        description : 'Download and install the IMX500 postprocessing models')
diff --git a/post_processing_stages/imx500/imx500_object_detection.cpp b/post_processing_stages/imx500/imx500_object_detection.cpp
new file mode 100644
index 00000000..310340f9
--- /dev/null
+++ b/post_processing_stages/imx500/imx500_object_detection.cpp
@@ -0,0 +1,337 @@
+/* SPDX-License-Identifier: BSD-2-Clause */
+/*
+ * Copyright (C) 2024, Raspberry Pi Ltd
+ *
+ * imx500_object_detection.cpp - IMX500 object detection for various networks
+ */
+
+#include <algorithm>
+#include <array>
+#include <mutex>
+#include <string>
+#include <vector>
+
+#include <libcamera/control_ids.h>
+#include <libcamera/geometry.h>
+
+#include "core/rpicam_app.hpp"
+#include "post_processing_stages/object_detect.hpp"
+#include "post_processing_stages/post_processing_stage.hpp"
+
+#include "imx500_post_processing_stage.hpp"
+
+using Rectangle = libcamera::Rectangle;
+using Size = libcamera::Size;
+namespace controls = libcamera::controls;
+
+#define NAME "imx500_object_detection"
+
+struct Bbox
+{
+	float x0;
+	float y0;
+	float x1;
+	float y1;
+};
+
+struct ObjectDetectionOutput
+{
+	unsigned int num_detections = 0;
+	std::vector<Bbox> bboxes;
+	std::vector<float> scores;
+	std::vector<float> classes;
+};
+
+class ObjectDetection : public IMX500PostProcessingStage
+{
+public:
+	ObjectDetection(RPiCamApp *app) : IMX500PostProcessingStage(app) {}
+
+	char const *Name() const override;
+
+	void Read(boost::property_tree::ptree const &params) override;
+
+	void Configure() override;
+
+	bool Process(CompletedRequestPtr &completed_request) override;
+
+private:
+	int processOutputTensor(std::vector<Detection> &objects, const std::vector<float> &output_tensor,
+							const CnnOutputTensorInfo &output_tensor_info, const Rectangle &scaler_crop) const;
+	void filterOutputObjects(std::vector<Detection> &objects);
+
+	struct LtObject
+	{
+		Detection params;
+		unsigned int visible;
+		unsigned int hidden;
+		bool matched;
+	};
+
+	std::vector<LtObject> lt_objects_;
+	std::mutex lt_lock_;
+
+	// Config params
+	unsigned int max_detections_;
+	float threshold_;
+	std::vector<std::string> classes_;
+	bool temporal_filtering_;
+	float tolerance_;
+	float factor_;
+	unsigned int visible_frames_;
+	unsigned int hidden_frames_;
+};
+
+char const *ObjectDetection::Name() const
+{
+	return NAME;
+}
+
+void ObjectDetection::Read(boost::property_tree::ptree const &params)
+{
+	max_detections_ = params.get<unsigned int>("max_detections");
+	threshold_ = params.get<float>("threshold", 0.5f);
+	classes_ = PostProcessingStage::GetJsonArray<std::string>(params, "classes");
+
+	if (params.find("temporal_filter") != params.not_found())
+	{
+		temporal_filtering_ = true;
+		tolerance_ = params.get<float>("temporal_filter.tolerance", 0.05);
+		factor_ = params.get<float>("temporal_filter.factor", 0.2);
+		visible_frames_ = params.get<unsigned int>("temporal_filter.visible_frames", 5);
+		hidden_frames_ = params.get<unsigned int>("temporal_filter.hidden_frames", 2);
+	}
+	else
+		temporal_filtering_ = false;
+
+	IMX500PostProcessingStage::Read(params);
+}
+
+void ObjectDetection::Configure()
+{
+	IMX500PostProcessingStage::Configure();
+	IMX500PostProcessingStage::ShowFwProgressBar();
+}
+
+bool ObjectDetection::Process(CompletedRequestPtr &completed_request)
+{
+	auto scaler_crop = completed_request->metadata.get(controls::ScalerCrop);
+	if (!raw_stream_ || !scaler_crop)
+	{
+		LOG_ERROR("Must have RAW stream and scaler crop available to get sensor dimensions!");
+		return false;
+	}
+
+	auto output = completed_request->metadata.get(controls::rpi::CnnOutputTensor);
+	auto info = completed_request->metadata.get(controls::rpi::CnnOutputTensorInfo);
+	std::vector<Detection> objects;
+
+	// Process() can be concurrently called through different threads for consecutive CompletedRequests if
+	// things are running behind.  So protect access to the lt_objects_ state object.
+	std::scoped_lock<std::mutex> l(lt_lock_);
+
+	if (output && info)
+	{
+		std::vector<float> output_tensor(output->data(), output->data() + output->size());
+		CnnOutputTensorInfo output_tensor_info = *reinterpret_cast<const CnnOutputTensorInfo *>(info->data());
+
+		processOutputTensor(objects, output_tensor, output_tensor_info, *scaler_crop);
+
+		if (temporal_filtering_)
+		{
+			filterOutputObjects(objects);
+			if (lt_objects_.size())
+			{
+				objects.clear();
+				for (auto const &obj : lt_objects_)
+				{
+					if (!obj.hidden)
+						objects.push_back(obj.params);
+				}
+			}
+		}
+		else
+		{
+			// If no temporal filtering, make sure we fill lt_objects_ with the current result as it may be used if
+			// there is no output tensor on subsequent frames.
+			for (auto const &obj : objects)
+				lt_objects_.push_back({ obj, 0, 0, 0 });
+		}
+	}
+	else
+	{
+		// No output tensor, so simply reuse the results from the lt_objects_.
+		for (auto const &obj : lt_objects_)
+		{
+			if (!obj.hidden)
+				objects.push_back(obj.params);
+		}
+	}
+
+	if (objects.size())
+		completed_request->post_process_metadata.Set("object_detect.results", objects);
+
+	return IMX500PostProcessingStage::Process(completed_request);
+}
+
+static int createObjectDetectionData(ObjectDetectionOutput &output, const std::vector<float> &data,
+									 unsigned int total_detections)
+{
+	unsigned int count = 0;
+
+	// Extract bounding box co-ordinates
+	for (unsigned int i = 0; i < total_detections; i++)
+	{
+		Bbox bbox;
+		bbox.y0 = data.at(count + i);
+		bbox.x0 = data.at(count + i + (1 * total_detections));
+		bbox.y1 = data.at(count + i + (2 * total_detections));
+		bbox.x1 = data.at(count + i + (3 * total_detections));
+		output.bboxes.push_back(bbox);
+	}
+	count += (total_detections * 4);
+
+	// Extract scores
+	for (unsigned int i = 0; i < total_detections; i++)
+	{
+		output.scores.push_back(data.at(count));
+		count++;
+	}
+
+	// Extract class indices
+	for (unsigned int i = 0; i < total_detections; i++)
+	{
+		output.classes.push_back(data.at(count));
+		count++;
+	}
+
+	// Extract number of detections
+	unsigned int num_detections = data.at(count);
+	if (num_detections > total_detections)
+	{
+		LOG(1, "Unexpected value for num_detections: " << num_detections << ", setting it to " << total_detections);
+		num_detections = total_detections;
+	}
+
+	output.num_detections = num_detections;
+	return 0;
+}
+
+int ObjectDetection::processOutputTensor(std::vector<Detection> &objects, const std::vector<float> &output_tensor,
+										 const CnnOutputTensorInfo &output_tensor_info,
+										 const Rectangle &scaler_crop) const
+{
+	if (output_tensor_info.num_tensors != 4)
+	{
+		LOG_ERROR("Invalid number of tensors " << output_tensor_info.num_tensors << ", expected 4");
+		return -1;
+	}
+
+	const unsigned int total_detections = output_tensor_info.info[0].tensor_data_num / 4;
+	ObjectDetectionOutput output;
+
+	// 4x coords + 1x labels + 1x confidences + 1 total detections
+	if (output_tensor.size() != 6 * total_detections + 1)
+	{
+		LOG_ERROR("Invalid tensor size " << output_tensor.size() << ", expected " << 6 * total_detections + 1);
+		return -1;
+	}
+
+	int ret = createObjectDetectionData(output, output_tensor, total_detections);
+	if (ret)
+	{
+		LOG_ERROR("Failed to create object detection data");
+		return -1;
+	}
+
+	for (unsigned int i = 0; i < std::min(output.num_detections, max_detections_); i++)
+	{
+		uint8_t class_index = (uint8_t)output.classes[i];
+
+		// Filter detections
+		if (output.scores[i] < threshold_ || class_index >= classes_.size())
+			continue;
+
+		// Extract bounding box co-ordinates in the inference image co-ordinates and convert to the final ISP output
+		// co-ordinates.
+		std::vector<float> coords{ output.bboxes[i].x0, output.bboxes[i].y0,
+								   output.bboxes[i].x1 - output.bboxes[i].x0,
+								   output.bboxes[i].y1 - output.bboxes[i].y0 };
+		const Rectangle obj_scaled = ConvertInferenceCoordinates(coords, scaler_crop);
+
+		objects.emplace_back(class_index, classes_[class_index], output.scores[i],
+							 obj_scaled.x, obj_scaled.y, obj_scaled.width, obj_scaled.height);
+	}
+
+	LOG(2, "Number of objects detected: " << objects.size());
+	for (unsigned i = 0; i < objects.size(); i++)
+		LOG(2, "[" << i << "] : " << objects[i].toString());
+
+	return 0;
+}
+
+void ObjectDetection::filterOutputObjects(std::vector<Detection> &objects)
+{
+	const Size isp_output_size = output_stream_->configuration().size;
+
+	for (auto &lt_obj : lt_objects_)
+		lt_obj.matched = false;
+
+	for (auto const &object : objects)
+	{
+		bool matched = false;
+		for (auto &lt_obj : lt_objects_)
+		{
+			// Try and match a detected object in our long term list.
+			if (object.category == lt_obj.params.category &&
+				std::abs(object.box.x - lt_obj.params.box.x) < tolerance_ * isp_output_size.width &&
+				std::abs(object.box.y - lt_obj.params.box.y) < tolerance_ * isp_output_size.height &&
+				std::abs((int)object.box.width - (int)lt_obj.params.box.width) < tolerance_ * isp_output_size.width &&
+				std::abs((int)object.box.height - (int)lt_obj.params.box.height) < tolerance_ * isp_output_size.height)
+			{
+				lt_obj.matched = matched = true;
+				lt_obj.params.confidence = object.confidence;
+				lt_obj.params.box.x = factor_ * object.box.x + (1 - factor_) * lt_obj.params.box.x;
+				lt_obj.params.box.y = factor_ * object.box.y + (1 - factor_) * lt_obj.params.box.y;
+				lt_obj.params.box.width = factor_ * object.box.width + (1 - factor_) * lt_obj.params.box.width;
+				lt_obj.params.box.height = factor_ * object.box.height + (1 - factor_) * lt_obj.params.box.height;
+				// Reset the visibility counter for when the object next disappears.
+				lt_obj.visible = visible_frames_;
+				// Decrement the hidden counter until the object becomes visible in the list.
+				lt_obj.hidden = std::max(0, (int)lt_obj.hidden - 1);
+				break;
+			}
+		}
+
+		// Add the object to the long term list if not found.  This object will remain hidden for hidden_frames_
+		// consecutive frames.
+		if (!matched)
+			lt_objects_.push_back({ object, visible_frames_, hidden_frames_, 1 });
+	}
+
+	for (auto &lt_obj : lt_objects_)
+	{
+		if (!lt_obj.matched)
+		{
+			// If a non matched object in the long term list is still hidden, set visible count to 0 so that it must be
+			// matched for hidden_frames_ consecutive frames before becoming visible. Otherwise, decrement the visible
+			// count of unmatched objects in the long term list.
+			if (lt_obj.hidden)
+				lt_obj.visible = 0;
+			else
+				lt_obj.visible--;
+		}
+	}
+
+	// Remove now invisible objects from the long term list.
+	lt_objects_.erase(std::remove_if(lt_objects_.begin(), lt_objects_.end(),
+						[] (const LtObject &obj) { return !obj.matched && !obj.visible; }),
+					  lt_objects_.end());
+}
+
+static PostProcessingStage *Create(RPiCamApp *app)
+{
+	return new ObjectDetection(app);
+}
+
+static RegisterStage reg(NAME, &Create);
diff --git a/post_processing_stages/imx500/imx500_posenet.cpp b/post_processing_stages/imx500/imx500_posenet.cpp
new file mode 100644
index 00000000..a965bd20
--- /dev/null
+++ b/post_processing_stages/imx500/imx500_posenet.cpp
@@ -0,0 +1,861 @@
+/* SPDX-License-Identifier: BSD-2-Clause */
+/*
+ * Copyright (C) 2024, Raspberry Pi Ltd
+ *
+ * imx500_posenet.cpp - IMX500 inference for PoseNet
+ */
+
+#include <algorithm>
+#include <array>
+#include <cmath>
+#include <limits>
+#include <mutex>
+#include <numeric>
+#include <queue>
+#include <string>
+#include <vector>
+
+#include <libcamera/control_ids.h>
+#include <libcamera/geometry.h>
+
+#include "core/rpicam_app.hpp"
+#include "post_processing_stages/post_processing_stage.hpp"
+
+#include "imx500_post_processing_stage.hpp"
+
+using Rectangle = libcamera::Rectangle;
+using Size = libcamera::Size;
+namespace controls = libcamera::controls;
+
+namespace
+{
+
+constexpr Size INPUT_TENSOR_SIZE = { 481, 353 };
+constexpr Size MAP_SIZE = { 31, 23 };
+constexpr unsigned int NUM_KEYPOINTS = 17;
+// These 16 edges allow traversing of the pose graph along the mid_offsets (see
+// paper for details).
+static constexpr int NUM_EDGES = 16;
+constexpr unsigned int STRIDE = 16;
+constexpr unsigned int NUM_HEATMAPS = NUM_KEYPOINTS * MAP_SIZE.width * MAP_SIZE.height;
+constexpr unsigned int NUM_SHORT_OFFSETS = 2 * NUM_KEYPOINTS * MAP_SIZE.width * MAP_SIZE.height;
+constexpr unsigned int NUM_MID_OFFSETS = 64 * MAP_SIZE.width * MAP_SIZE.height;
+
+enum KeypointType
+{
+	Nose,
+	LeftEye,
+	RightEye,
+	LeftEar,
+	RightEar,
+	LeftShoulder,
+	RightShoulder,
+	LeftElbow,
+	RightElbow,
+	LeftWrist,
+	RightWrist,
+	LeftHip,
+	RightHip,
+	LeftKnee,
+	RightKnee,
+	LeftAnkle,
+	RightAnkle
+};
+
+const std::array<std::pair<KeypointType, KeypointType>, 32> EdgeList = { {
+	// Forward edges
+	{ Nose, LeftEye },
+	{ LeftEye, LeftEar },
+	{ Nose, RightEye },
+	{ RightEye, RightEar },
+	{ Nose, LeftShoulder },
+	{ LeftShoulder, LeftElbow },
+	{ LeftElbow, LeftWrist },
+	{ LeftShoulder, LeftHip },
+	{ LeftHip, LeftKnee },
+	{ LeftKnee, LeftAnkle },
+	{ Nose, RightShoulder },
+	{ RightShoulder, RightElbow },
+	{ RightElbow, RightWrist },
+	{ RightShoulder, RightHip },
+	{ RightHip, RightKnee },
+	{ RightKnee, RightAnkle },
+
+	// Backward edges
+	{ LeftEye, Nose },
+	{ LeftEar, LeftEye },
+	{ RightEye, Nose },
+	{ RightEar, RightEye },
+	{ LeftShoulder, Nose },
+	{ LeftElbow, LeftShoulder },
+	{ LeftWrist, LeftElbow },
+	{ LeftHip, LeftShoulder },
+	{ LeftKnee, LeftHip },
+	{ LeftAnkle, LeftKnee },
+	{ RightShoulder, Nose },
+	{ RightElbow, RightShoulder },
+	{ RightWrist, RightElbow },
+	{ RightHip, RightShoulder },
+	{ RightKnee, RightHip },
+	{ RightAnkle, RightKnee },
+} };
+
+struct Point
+{
+	float y, x;
+};
+
+// An adjacency list representing the directed edges connecting keypoints.
+struct AdjacencyList
+{
+	explicit AdjacencyList(const int num_nodes) : child_ids(num_nodes), edge_ids(num_nodes) {}
+
+	// child_ids[i] is a vector holding the node ids of all children of the i-th
+	// node and edge_ids[i] is a vector holding the edge ids of all edges stemming
+	// from the i-th node. If the k-th edge in the graph starts at the i-th node
+	// and ends at the j-th node, then child_ids[i] and edge_ids will contain j
+	// and k, respectively, at corresponding positions.
+	std::vector<std::vector<int>> child_ids;
+	std::vector<std::vector<int>> edge_ids;
+};
+
+// Defines a 2-D keypoint with (x, y) float coordinates and its type id.
+struct KeypointWithScore
+{
+	KeypointWithScore(const Point &_pt, const int _id, const float _score) : point(_pt), id(_id), score(_score) {}
+
+	[[maybe_unused]] friend std::ostream &operator<<(std::ostream &ost, const KeypointWithScore &keypoint)
+	{
+		return ost << keypoint.point.y << ", " << keypoint.point.x << ", " << keypoint.id << ", " << keypoint.score;
+	}
+
+	bool operator<(const KeypointWithScore &other) const { return score < other.score; }
+	bool operator>(const KeypointWithScore &other) const { return score > other.score; }
+
+	Point point;
+	int id;
+	float score;
+};
+
+using KeypointQueue = std::priority_queue<KeypointWithScore, std::vector<KeypointWithScore>>;
+using PoseKeypoints = std::array<Point, NUM_KEYPOINTS>;
+using PoseKeypointScores = std::array<float, NUM_KEYPOINTS>;
+
+struct PoseResults
+{
+	float pose_score;
+	PoseKeypoints pose_keypoints;
+	PoseKeypointScores pose_keypoint_scores;
+};
+
+float sigmoid(const float x)
+{
+	return 1.0f / (1.0f + std::exp(-x));
+}
+
+float log_odds(const float x)
+{
+	return -std::log(1.0f / (x + 1E-6) - 1.0f);
+}
+
+// Computes the squared distance between a pair of 2-D points.
+float squared_distance(const Point &a, const Point &b)
+{
+	const float dy = b.y - a.y;
+	const float dx = b.x - a.x;
+	return dy * dy + dx * dx;
+}
+
+std::vector<float> format_tensor(const float *data, unsigned int size, unsigned int div)
+{
+	std::vector<float> tensor(size * MAP_SIZE.width * MAP_SIZE.height);
+
+	for (unsigned int i = 0; i < size; i++)
+	{
+		for (unsigned int j = 0; j < MAP_SIZE.width; j++)
+		{
+			for (unsigned int k = 0; k < MAP_SIZE.height; k++)
+			{
+				tensor[(size * MAP_SIZE.width * k) + (size * j) + i] =
+					data[(MAP_SIZE.width * MAP_SIZE.height * i) + (j * MAP_SIZE.height) + k] / div;
+			}
+		}
+	}
+
+	return tensor;
+}
+
+// Build an adjacency list of the pose graph.
+AdjacencyList build_agency_list()
+{
+	AdjacencyList adjacency_list(NUM_KEYPOINTS);
+
+	for (unsigned int k = 0; k < EdgeList.size(); ++k)
+	{
+		const int parent_id = EdgeList[k].first;
+		const int child_id = EdgeList[k].second;
+		adjacency_list.child_ids[parent_id].push_back(child_id);
+		adjacency_list.edge_ids[parent_id].push_back(k);
+	}
+
+	return adjacency_list;
+}
+
+bool pass_keypoint_nms(std::vector<PoseKeypoints> poses, const size_t num_poses, const KeypointWithScore &keypoint,
+					   const float squared_nms_radius)
+{
+	for (unsigned int i = 0; i < num_poses; ++i)
+	{
+		if (squared_distance(keypoint.point, poses[i][keypoint.id]) <= squared_nms_radius)
+			return false;
+	}
+
+	return true;
+}
+
+// Finds the indices of the scores if we sort them in decreasing order.
+void decreasing_arg_sort(std::vector<int> &indices, const float *scores, unsigned int num_scores)
+{
+	indices.resize(num_scores);
+	std::iota(indices.begin(), indices.end(), 0);
+	std::sort(indices.begin(), indices.end(), [&scores](const int i, const int j) { return scores[i] > scores[j]; });
+}
+
+// Finds the indices of the scores if we sort them in decreasing order.
+void decreasing_arg_sort(std::vector<int> &indices, const std::vector<float> &scores)
+{
+	decreasing_arg_sort(indices, scores.data(), scores.size());
+}
+
+// Helper function for 1-D linear interpolation. It computes the floor and the
+// ceiling of the input coordinate, as well as the weighting factor between the
+// two interpolation endpoints, such that:
+// y = (1 - x_lerp) * vec[x_floor] + x_lerp * vec[x_ceil]
+void build_linear_interpolation(const float x, const int n, int *x_floor, int *x_ceil, float *x_lerp)
+{
+	const float x_proj = std::clamp(x, 0.0f, n - 1.0f);
+	*x_floor = static_cast<int>(std::floor(x_proj));
+	*x_ceil = static_cast<int>(std::ceil(x_proj));
+	*x_lerp = x - (*x_floor);
+}
+
+// Helper function for 2-D bilinear interpolation. It computes the four corners
+// of the 2x2 cell that contain the input coordinates (x, y), as well as the
+// weighting factor between the four interpolation endpoints, such that:
+// y =
+//   (1 - y_lerp) * ((1 - x_lerp) * vec[top_left] + x_lerp * vec[top_right]) +
+//   y_lerp * ((1 - x_lerp) * tensor[bottom_left] + x_lerp * vec[bottom_right])
+void build_bilinear_interpolation(const Point point, const unsigned int num_channels, int *top_left, int *top_right,
+								  int *bottom_left, int *bottom_right, float *y_lerp, float *x_lerp)
+{
+	int y_floor;
+	int y_ceil;
+	build_linear_interpolation(point.y, MAP_SIZE.height, &y_floor, &y_ceil, y_lerp);
+	int x_floor;
+	int x_ceil;
+	build_linear_interpolation(point.x, MAP_SIZE.width, &x_floor, &x_ceil, x_lerp);
+	*top_left = (y_floor * MAP_SIZE.width + x_floor) * num_channels;
+	*top_right = (y_floor * MAP_SIZE.width + x_ceil) * num_channels;
+	*bottom_left = (y_ceil * MAP_SIZE.width + x_floor) * num_channels;
+	*bottom_right = (y_ceil * MAP_SIZE.width + x_ceil) * num_channels;
+}
+
+// Sample the input tensor values at position (x, y) and at multiple channels.
+// The input tensor has shape [height, width, num_channels]. We bilinearly
+// sample its value at tensor(y, x, c), for c in the channels specified. This
+// is faster than calling the single channel interpolation function multiple
+// times because the computation of the positions needs to be done only once.
+std::vector<float> sample_tensor_at_multiple_channels(const std::vector<float> &tensor, const Point &point,
+													  const std::vector<int> &result_channels,
+													  unsigned int num_channels)
+{
+	int top_left, top_right, bottom_left, bottom_right;
+	float y_lerp, x_lerp;
+
+	build_bilinear_interpolation(point, num_channels, &top_left, &top_right, &bottom_left, &bottom_right, &y_lerp,
+								 &x_lerp);
+
+	std::vector<float> result;
+	for (auto &c : result_channels)
+	{
+		result.push_back((1 - y_lerp) * ((1 - x_lerp) * tensor[top_left + c] + x_lerp * tensor[top_right + c]) +
+						 y_lerp * ((1 - x_lerp) * tensor[bottom_left + c] + x_lerp * tensor[bottom_right + c]));
+	}
+
+	return result;
+}
+
+// Sample the input tensor values at position (x, y) and at a single channel.
+// The input tensor has shape [height, width, num_channels]. We bilinearly
+// sample its value at tensor(y, x, channel).
+float sample_tensor_at_single_channel(const std::vector<float> &tensor, const Point &point, unsigned int num_channels,
+									  const int c)
+{
+	std::vector<float> result = sample_tensor_at_multiple_channels(tensor, point, std::vector<int> { c }, num_channels);
+	return result[0];
+}
+
+KeypointQueue build_keypoint_queue(const std::vector<float> &scores, const std::vector<float> &short_offsets,
+								   const float score_threshold)
+{
+	constexpr unsigned int local_maximum_radius = 1;
+	unsigned int score_index = 0;
+	KeypointQueue queue;
+
+	for (unsigned int y = 0; y < MAP_SIZE.height; ++y)
+	{
+		for (unsigned int x = 0; x < MAP_SIZE.width; ++x)
+		{
+			unsigned int offset_index = 2 * score_index;
+			for (unsigned int j = 0; j < NUM_KEYPOINTS; ++j)
+			{
+				const float score = scores[score_index];
+				if (score >= score_threshold)
+				{
+					// Only consider keypoints whose score is maximum in a local window.
+					bool local_maximum = true;
+					const unsigned int y_start = std::max((int)y - (int)local_maximum_radius, 0);
+					const unsigned int y_end = std::min(y + local_maximum_radius + 1, MAP_SIZE.height);
+					for (unsigned int y_current = y_start; y_current < y_end; ++y_current)
+					{
+						const unsigned int x_start = std::max((int)x - (int)local_maximum_radius, 0);
+						const unsigned int x_end = std::min(x + local_maximum_radius + 1, MAP_SIZE.width);
+						for (unsigned int x_current = x_start; x_current < x_end; ++x_current)
+						{
+							if (scores[y_current * MAP_SIZE.width * NUM_KEYPOINTS + x_current * NUM_KEYPOINTS + j] >
+									score)
+							{
+								local_maximum = false;
+								break;
+							}
+						}
+						if (!local_maximum)
+							break;
+					}
+					if (local_maximum)
+					{
+						const float dy = short_offsets[offset_index];
+						const float dx = short_offsets[offset_index + NUM_KEYPOINTS];
+						const float y_refined = std::clamp(y + dy, 0.0f, MAP_SIZE.height - 1.0f);
+						const float x_refined = std::clamp(x + dx, 0.0f, MAP_SIZE.width - 1.0f);
+						queue.emplace(Point { y_refined, x_refined }, j, score);
+					}
+				}
+
+				++score_index;
+				++offset_index;
+			}
+		}
+	}
+
+	return queue;
+}
+
+// Follows the mid-range offsets, and then refines the position by the short-
+// range offsets for a fixed number of steps.
+Point find_displaced_position(const std::vector<float> &short_offsets, const std::vector<float> &mid_offsets,
+							  const Point &source, const int edge_id, const int target_id,
+							  const int offset_refinement_steps)
+{
+	float y = source.y, x = source.x;
+
+	// Follow the mid-range offsets.
+	std::vector<int> channels = { edge_id, NUM_EDGES + edge_id };
+
+	// Total size of mid_offsets is height x width x 2*2*num_edges
+	std::vector<float> offsets = sample_tensor_at_multiple_channels(mid_offsets, source, channels, 2 * 2 * NUM_EDGES);
+	y = std::clamp(y + offsets[0], 0.0f, MAP_SIZE.height - 1.0f);
+	x = std::clamp(x + offsets[1], 0.0f, MAP_SIZE.width - 1.0f);
+
+	// Refine by the short-range offsets.
+	channels[0] = target_id;
+	channels[1] = NUM_KEYPOINTS + target_id;
+	for (int i = 0; i < offset_refinement_steps; ++i)
+	{
+		offsets = sample_tensor_at_multiple_channels(short_offsets, Point { y, x }, channels, 2 * NUM_KEYPOINTS);
+		y = std::clamp(y + offsets[0], 0.0f, MAP_SIZE.height - 1.0f);
+		x = std::clamp(x + offsets[1], 0.0f, MAP_SIZE.width - 1.0f);
+	}
+
+	return Point { y, x };
+}
+
+void backtrack_decode_pose(const std::vector<float> &scores, const std::vector<float> &short_offsets,
+						   const std::vector<float> &mid_offsets, const KeypointWithScore &root,
+						   const AdjacencyList &adjacency_list, PoseKeypoints &pose_keypoints,
+						   PoseKeypointScores &keypoint_scores, unsigned int offset_refinement_steps)
+{
+	const float root_score = sample_tensor_at_single_channel(scores, root.point, root.id, NUM_KEYPOINTS);
+
+	// Used in order to put candidate keypoints in a priority queue w.r.t. their
+	// score. Keypoints with higher score have higher priority and will be
+	// decoded/processed first.
+	KeypointQueue decode_queue;
+	decode_queue.push(KeypointWithScore(root.point, root.id, root_score));
+
+	// Keeps track of the keypoints whose position has already been decoded.
+	std::vector<bool> keypoint_decoded(NUM_KEYPOINTS, false);
+
+	while (!decode_queue.empty())
+	{
+		// The top element in the queue is the next keypoint to be processed.
+		const KeypointWithScore current_keypoint = decode_queue.top();
+		decode_queue.pop();
+
+		if (keypoint_decoded[current_keypoint.id])
+			continue;
+
+		pose_keypoints[current_keypoint.id] = current_keypoint.point;
+		keypoint_scores[current_keypoint.id] = current_keypoint.score;
+
+		keypoint_decoded[current_keypoint.id] = true;
+
+		// Add the children of the current keypoint that have not been decoded yet
+		// to the priority queue.
+		const int num_children = adjacency_list.child_ids[current_keypoint.id].size();
+		for (int j = 0; j < num_children; ++j)
+		{
+			const int child_id = adjacency_list.child_ids[current_keypoint.id][j];
+			int edge_id = adjacency_list.edge_ids[current_keypoint.id][j];
+			if (keypoint_decoded[child_id])
+				continue;
+
+			// The mid-offsets block is organized as 4 blocks of NUM_EDGES:
+			// [fwd Y offsets][fwd X offsets][bwd Y offsets][bwd X offsets]
+			// OTOH edge_id is [0,NUM_EDGES) for forward edges and
+			// [NUM_EDGES, 2*NUM_EDGES) for backward edges.
+			// Thus if the edge is a backward edge (>NUM_EDGES) then we need
+			// to start 16 indices later to be correctly aligned with the mid-offsets.
+			if (edge_id > NUM_EDGES)
+				edge_id += NUM_EDGES;
+
+			const Point child_point = find_displaced_position(short_offsets, mid_offsets, current_keypoint.point,
+															  edge_id, child_id, offset_refinement_steps);
+			const float child_score = sample_tensor_at_single_channel(scores, child_point, NUM_KEYPOINTS, child_id);
+			decode_queue.emplace(child_point, child_id, child_score);
+		}
+	}
+}
+
+void find_overlapping_keypoints(std::vector<bool> &mask, const PoseKeypoints &pose1, const PoseKeypoints &pose2,
+								const float squared_radius)
+{
+	for (unsigned int k = 0; k < mask.size(); ++k)
+	{
+		if (squared_distance(pose1[k], pose2[k]) <= squared_radius)
+			mask[k] = true;
+	}
+}
+
+void perform_soft_keypoint_NMS(std::vector<float> &all_instance_scores, const std::vector<int> &decreasing_indices,
+							   const std::vector<PoseKeypoints> &all_keypoint_coords,
+							   const std::vector<PoseKeypointScores> &all_keypoint_scores,
+							   const float squared_nms_radius)
+{
+	const int num_instances = decreasing_indices.size();
+
+	all_instance_scores.resize(num_instances);
+	// Indicates the occlusion status of the keypoints of the active instance.
+	std::vector<bool> keypoint_occluded(NUM_KEYPOINTS);
+	// Indices of the keypoints of the active instance in decreasing score value.
+	std::vector<int> indices(NUM_KEYPOINTS);
+	for (int i = 0; i < num_instances; ++i)
+	{
+		const int current_index = decreasing_indices[i];
+		// Find the keypoints of the current instance which are overlapping with
+		// the corresponding keypoints of the higher-scoring instances and
+		// zero-out their contribution to the score of the current instance.
+		std::fill(keypoint_occluded.begin(), keypoint_occluded.end(), false);
+		for (int j = 0; j < i; ++j)
+		{
+			const int previous_index = decreasing_indices[j];
+			find_overlapping_keypoints(keypoint_occluded, all_keypoint_coords[current_index],
+									   all_keypoint_coords[previous_index], squared_nms_radius);
+		}
+		// We compute the argsort keypoint indices based on the original keypoint
+		// scores, but we do not let them contribute to the instance score if they
+		// have been non-maximum suppressed.
+		decreasing_arg_sort(indices, all_keypoint_scores[current_index].data(),
+							all_keypoint_scores[current_index].size());
+		float total_score = 0.0f;
+		for (unsigned int k = 0; k < NUM_KEYPOINTS; ++k)
+		{
+			if (!keypoint_occluded[indices[k]])
+				total_score += all_keypoint_scores[current_index][indices[k]];
+		}
+		all_instance_scores[current_index] = total_score / NUM_KEYPOINTS;
+	}
+}
+
+} // namespace
+
+#define NAME "imx500_posenet"
+
+class PoseNet : public IMX500PostProcessingStage
+{
+public:
+	PoseNet(RPiCamApp *app) : IMX500PostProcessingStage(app) {}
+
+	char const *Name() const override;
+
+	void Read(boost::property_tree::ptree const &params) override;
+
+	void Configure() override;
+
+	bool Process(CompletedRequestPtr &completed_request) override;
+
+private:
+	std::vector<PoseResults> decodeAllPoses(const std::vector<float> &scores, const std::vector<float> &short_offsets,
+											const std::vector<float> &mid_offsets);
+	void translateCoordinates(std::vector<PoseResults> &results, const Rectangle &scaler_crop) const;
+	void filterOutputObjects(const std::vector<PoseResults> &results);
+
+	struct LtResults
+	{
+		PoseResults results;
+		unsigned int visible;
+		unsigned int hidden;
+		bool matched;
+	};
+
+	std::vector<LtResults> lt_results_;
+	std::mutex lt_lock_;
+
+	// Config params:
+	float threshold_;
+	unsigned int max_detections_;
+	unsigned int offset_refinement_steps_;
+	float nms_radius_;
+	bool temporal_filtering_;
+
+	float tolerance_;
+	float factor_;
+	unsigned int visible_frames_;
+	unsigned int hidden_frames_;
+};
+
+char const *PoseNet::Name() const
+{
+	return NAME;
+}
+
+void PoseNet::Read(boost::property_tree::ptree const &params)
+{
+	max_detections_ = params.get<unsigned int>("max_detections", 10);
+	threshold_ = params.get<float>("threshold", 0.5f);
+	offset_refinement_steps_ = params.get<unsigned int>("offset_refinement_steps", 5);
+	nms_radius_ = params.get<float>("nms_radius", 10) / STRIDE;
+
+	if (params.find("temporal_filter") != params.not_found())
+	{
+		temporal_filtering_ = true;
+		tolerance_ = params.get<float>("temporal_filter.tolerance", 0.05);
+		factor_ = params.get<float>("temporal_filter.factor", 0.2);
+		visible_frames_ = params.get<unsigned int>("temporal_filter.visible_frames", 5);
+		hidden_frames_ = params.get<unsigned int>("temporal_filter.hidden_frames", 2);
+	}
+	else
+		temporal_filtering_ = false;
+
+	IMX500PostProcessingStage::Read(params);
+}
+
+void PoseNet::Configure()
+{
+	IMX500PostProcessingStage::Configure();
+	IMX500PostProcessingStage::ShowFwProgressBar();
+}
+
+bool PoseNet::Process(CompletedRequestPtr &completed_request)
+{
+	auto scaler_crop = completed_request->metadata.get(controls::ScalerCrop);
+	if (!raw_stream_ || !scaler_crop)
+	{
+		LOG_ERROR("Must have RAW stream and scaler crop available to get sensor dimensions!");
+		return false;
+	}
+
+	auto output = completed_request->metadata.get(controls::rpi::CnnOutputTensor);
+	if (!output)
+	{
+		LOG_ERROR("No output tensor found in metadata!");
+		return false;
+	}
+
+	if (output->size() < NUM_HEATMAPS + NUM_SHORT_OFFSETS + NUM_MID_OFFSETS)
+	{
+		LOG_ERROR("Unexpected output tensor size: " << output->size());
+		return false;
+	}
+
+	std::vector<float> scores =
+		format_tensor((float *)output->data(), NUM_HEATMAPS / (MAP_SIZE.width * MAP_SIZE.height), 1);
+	std::vector<float> short_offsets = format_tensor((float *)output->data() + NUM_HEATMAPS,
+													 NUM_SHORT_OFFSETS / (MAP_SIZE.width * MAP_SIZE.height), STRIDE);
+	std::vector<float> mid_offsets = format_tensor((float *)output->data() + NUM_HEATMAPS + NUM_SHORT_OFFSETS,
+												   NUM_MID_OFFSETS / (MAP_SIZE.width * MAP_SIZE.height), STRIDE);
+
+	std::vector<PoseResults> results = decodeAllPoses(scores, short_offsets, mid_offsets);
+	translateCoordinates(results, *scaler_crop);
+
+	std::vector<std::vector<libcamera::Point>> locations;
+	std::vector<std::vector<float>> confidences;
+
+	if (temporal_filtering_)
+	{
+		// Process() can be concurrently called through different threads for consecutive CompletedRequests if
+		// things are running behind.  So protect access to the lt_results_ state object.
+		std::scoped_lock<std::mutex> l(lt_lock_);
+
+		filterOutputObjects(results);
+		for (auto const &lt_r : lt_results_)
+		{
+			if (lt_r.hidden)
+				continue;
+
+			locations.push_back({});
+			confidences.push_back({});
+
+			for (auto const &s : lt_r.results.pose_keypoint_scores)
+				confidences.back().push_back(s);
+			for (auto const &k : lt_r.results.pose_keypoints)
+				locations.back().emplace_back(k.x, k.y);
+		}
+	}
+	else if (results.size())
+	{
+		for (auto const &result : results)
+		{
+			locations.push_back({});
+			confidences.push_back({});
+
+			for (auto const &s : result.pose_keypoint_scores)
+				confidences.back().push_back(s);
+			for (auto const &k : result.pose_keypoints)
+				locations.back().emplace_back(k.x, k.y);
+		}
+	}
+
+	if (locations.size() && locations.size() == confidences.size())
+	{
+		completed_request->post_process_metadata.Set("pose_estimation.locations", locations);
+		completed_request->post_process_metadata.Set("pose_estimation.confidences", confidences);
+	}
+
+	return IMX500PostProcessingStage::Process(completed_request);
+}
+
+// Decodes poses from the score map, the short and mid offsets.
+// "Block space" refers to the output y and z size of the network.
+// For example if the network that takes a (353,481) (y,x) input image will have
+// an output field of 23, 31. Thus the sizes of the input vectors to this
+// functions will be
+//   scores: 23 x 31 x NUM_KEYPOINTS
+//   short_offsets 23 x 31 x 2 x NUM_KEYPOINTS (x and y per keypoint)
+//   mid_offsets 23 x 31 x 2 x 2 2 NUM_EDGES (x and y for each fwd and bwd edge)
+// Thus height and width need to be set to 23 and 31 respectively.
+// nms_radius must also be given in these units.
+// The output coordinates will be in pixel coordinates.
+//
+// For details see https://arxiv.org/abs/1803.08225
+// PersonLab: Person Pose Estimation and Instance Segmentation with a
+// Bottom-Up, Part-Based, Geometric Embedding Model
+// George Papandreou, Tyler Zhu, Liang-Chieh Chen, Spyros Gidaris,
+// Jonathan Tompson, Kevin Murphy
+std::vector<PoseResults> PoseNet::decodeAllPoses(const std::vector<float> &scores,
+												 const std::vector<float> &short_offsets,
+												 const std::vector<float> &mid_offsets)
+{
+	const float min_score_logit = log_odds(threshold_);
+
+	KeypointQueue queue = build_keypoint_queue(scores, short_offsets, min_score_logit);
+	AdjacencyList adjacency_list = build_agency_list();
+
+	std::vector<int> indices(NUM_KEYPOINTS);
+
+	// Generate at most max_detections object instances per image in decreasing
+	// root part score order.
+	std::vector<float> all_instance_scores;
+
+	std::vector<PoseKeypoints> scratch_poses(NUM_KEYPOINTS);
+	std::vector<PoseKeypointScores> scratch_keypoint_scores(max_detections_);
+
+	unsigned int pose_counter = 0;
+	while (pose_counter < max_detections_ && !queue.empty())
+	{
+		// The top element in the queue is the next root candidate.
+		const KeypointWithScore root = queue.top();
+		queue.pop();
+
+		// Reject a root candidate if it is within a disk of nms_radius_ pixels
+		// from the corresponding part of a previously detected instance.
+		if (!pass_keypoint_nms(scratch_poses, pose_counter, root, nms_radius_ * nms_radius_))
+			continue;
+
+		auto &next_pose = scratch_poses[pose_counter];
+		auto &next_scores = scratch_keypoint_scores[pose_counter];
+		for (unsigned int k = 0; k < NUM_KEYPOINTS; ++k)
+		{
+			next_pose[k].x = -1.0f;
+			next_pose[k].y = -1.0f;
+			next_scores[k] = -1E5;
+		}
+
+		backtrack_decode_pose(scores, short_offsets, mid_offsets, root, adjacency_list, next_pose, next_scores,
+							  offset_refinement_steps_);
+
+		// Convert keypoint-level scores from log-odds to probabilities and compute
+		// an initial instance-level score as the average of the scores of the top-k
+		// scoring keypoints.
+		for (unsigned int k = 0; k < NUM_KEYPOINTS; ++k)
+			next_scores[k] = sigmoid(next_scores[k]);
+
+		decreasing_arg_sort(indices, next_scores.data(), next_scores.size());
+
+		float instance_score = 0.0f;
+		for (unsigned int j = 0; j < NUM_KEYPOINTS; ++j)
+			instance_score += next_scores[indices[j]];
+
+		instance_score /= NUM_KEYPOINTS;
+
+		if (instance_score >= threshold_)
+		{
+			pose_counter++;
+			all_instance_scores.push_back(instance_score);
+		}
+	}
+
+	// Sort the detections in decreasing order of their instance-level scores.
+	std::vector<int> decreasing_indices;
+	decreasing_arg_sort(decreasing_indices, all_instance_scores);
+
+	// Keypoint-level soft non-maximum suppression and instance-level rescoring as
+	// the average of the top-k keypoints in terms of their keypoint-level scores.
+	perform_soft_keypoint_NMS(all_instance_scores, decreasing_indices, scratch_poses, scratch_keypoint_scores,
+							  nms_radius_ * nms_radius_);
+
+	// Sort the detections in decreasing order of their final instance-level
+	// scores. Usually the order does not change but this is not guaranteed.
+	decreasing_arg_sort(decreasing_indices, all_instance_scores);
+
+	std::vector<PoseResults> results;
+	for (int index : decreasing_indices)
+	{
+		if (all_instance_scores[index] < threshold_)
+			break;
+
+		// New result.
+		results.push_back({});
+
+		// Rescale keypoint coordinates into pixel space (much more useful for user).
+		for (unsigned int k = 0; k < NUM_KEYPOINTS; ++k)
+		{
+			results.back().pose_keypoints[k].y = scratch_poses[index][k].y * STRIDE;
+			results.back().pose_keypoints[k].x = scratch_poses[index][k].x * STRIDE;
+		}
+
+		std::copy(scratch_keypoint_scores[index].begin(), scratch_keypoint_scores[index].end(),
+				  results.back().pose_keypoint_scores.begin());
+		results.back().pose_score = all_instance_scores[index];
+	}
+
+	return results;
+}
+
+void PoseNet::translateCoordinates(std::vector<PoseResults> &results, const Rectangle &scaler_crop) const
+{
+	for (auto &r : results)
+	{
+		for (auto &keypoint : r.pose_keypoints)
+		{
+			std::vector<float> coords{ keypoint.x / (INPUT_TENSOR_SIZE.width - 1),
+									   keypoint.y / (INPUT_TENSOR_SIZE.height - 1),
+									   0, 0 };
+			Rectangle translated = ConvertInferenceCoordinates(coords, scaler_crop);
+			keypoint.x = translated.x;
+			keypoint.y = translated.y;
+		}
+	}
+}
+
+void PoseNet::filterOutputObjects(const std::vector<PoseResults> &results)
+{
+	const Size isp_output_size = output_stream_->configuration().size;
+
+	for (auto &lt_r : lt_results_)
+		lt_r.matched = false;
+
+	for (auto const &r : results)
+	{
+		bool matched = false;
+		for (auto &lt_r : lt_results_)
+		{
+			// Try and match a detected object in our long term list.
+			bool found = true;
+			for (unsigned int i = 0; i < NUM_KEYPOINTS; i++)
+			{
+				if (std::abs(lt_r.results.pose_keypoints[i].x - r.pose_keypoints[i].x) >
+						tolerance_ * isp_output_size.width ||
+					std::abs(lt_r.results.pose_keypoints[i].y - r.pose_keypoints[i].y) >
+						tolerance_ * isp_output_size.height)
+				{
+					found = false;
+					break;
+				}
+			}
+
+			if (found)
+			{
+				lt_r.matched = matched = true;
+				lt_r.results.pose_score = r.pose_score;
+
+				for (unsigned int i = 0; i < NUM_KEYPOINTS; i++)
+				{
+					lt_r.results.pose_keypoint_scores[i] =
+						factor_ * r.pose_keypoint_scores[i] + (1 - factor_) * lt_r.results.pose_keypoint_scores[i];
+					lt_r.results.pose_keypoints[i].x =
+						factor_ * r.pose_keypoints[i].x + (1 - factor_) * lt_r.results.pose_keypoints[i].x;
+					lt_r.results.pose_keypoints[i].y =
+						factor_ * r.pose_keypoints[i].y + (1 - factor_) * lt_r.results.pose_keypoints[i].y;
+				}
+
+				// Reset the visibility counter for when the result next disappears.
+				lt_r.visible = visible_frames_;
+				// Decrement the hidden counter until the result becomes visible in the list.
+				lt_r.hidden = std::max(0, (int)lt_r.hidden - 1);
+				break;
+			}
+		}
+
+		// Add the result to the long term list if not found.  This result will remain hidden for hidden_frames_
+		// consecutive frames.
+		if (!matched)
+			lt_results_.push_back({ r, visible_frames_, hidden_frames_, true });
+	}
+
+	for (auto &lt_r : lt_results_)
+	{
+		if (!lt_r.matched)
+		{
+			// If a non matched result in the long term list is still hidden, set visible count to 0 so that it must be
+			// matched for hidden_frames_ consecutive frames before becoming visible. Otherwise, decrement the visible
+			// count of unmatched objects in the long term list.
+			if (lt_r.hidden)
+				lt_r.visible = 0;
+			else
+				lt_r.visible--;
+		}
+	}
+
+	// Remove now invisible objects from the long term list.
+	lt_results_.erase(std::remove_if(lt_results_.begin(), lt_results_.end(),
+						[] (const LtResults &obj) { return !obj.matched && !obj.visible; }),
+					  lt_results_.end());
+}
+
+PostProcessingStage *Create(RPiCamApp *app)
+{
+	return new PoseNet(app);
+}
+
+RegisterStage reg(NAME, &Create);
diff --git a/post_processing_stages/imx500/imx500_post_processing_stage.cpp b/post_processing_stages/imx500/imx500_post_processing_stage.cpp
new file mode 100644
index 00000000..5114871f
--- /dev/null
+++ b/post_processing_stages/imx500/imx500_post_processing_stage.cpp
@@ -0,0 +1,280 @@
+/* SPDX-License-Identifier: BSD-2-Clause */
+/*
+ * Copyright (C) 2024, Raspberry Pi Ltd
+ *
+ * imx500_post_rpocessing_stage.cpp - IMX500 post processing stage base class
+ */
+#include <chrono>
+#include <cmath>
+#include <fcntl.h>
+#include <filesystem>
+#include <linux/videodev2.h>
+#include <sstream>
+#include <string>
+#include <sys/ioctl.h>
+#include <thread>
+#include <unistd.h>
+
+#include <boost/property_tree/ptree.hpp>
+
+#include "imx500_post_processing_stage.hpp"
+
+#include <libcamera/control_ids.h>
+
+using Stream = libcamera::Stream;
+using Rectangle = libcamera::Rectangle;
+using Size = libcamera::Size;
+
+using namespace std::chrono_literals;
+
+namespace fs = std::filesystem;
+
+namespace
+{
+
+const unsigned int ROI_CTRL_ID = 0x00982900;
+const unsigned int NETWORK_FW_CTRL_ID = 0x00982901;
+
+inline int16_t conv_reg_signed(int16_t reg)
+{
+	constexpr unsigned int ROT_DNN_NORM_SIGNED_SHT = 8;
+	constexpr unsigned int ROT_DNN_NORM_MASK = 0x01FF;
+
+	if (!((reg >> ROT_DNN_NORM_SIGNED_SHT) & 1))
+		return reg;
+	else
+		return -((~reg + 1) & ROT_DNN_NORM_MASK);
+}
+
+} // namespace
+
+
+IMX500PostProcessingStage::IMX500PostProcessingStage(RPiCamApp *app)
+	: PostProcessingStage(app), device_fd_(-1)
+{
+	for (unsigned int i = 0; i < 16; i++)
+	{
+		const fs::path test_dir { "/sys/class/video4linux/v4l-subdev" + std::to_string(i) + "/device" };
+		const fs::path module_dir { test_dir.string() + "/driver/module" };
+		const fs::path id_dir { test_dir.string() + "/of_node" };
+
+		if (fs::exists(module_dir) && fs::is_symlink(module_dir))
+		{
+			fs::path ln = fs::read_symlink(module_dir);
+			if (ln.string().find("imx500") != std::string::npos)
+			{
+				const std::string dev_node { "/dev/v4l-subdev" + std::to_string(i) };
+				device_fd_ = open(dev_node.c_str(), O_RDONLY, 0);
+
+				/* Find the progress indicator sysfs dev nodes. */
+				const std::string test_dir_str = fs::read_symlink(test_dir).string();
+				const std::size_t pos = test_dir_str.find_last_of("/") + 1;
+				assert(pos != std::string::npos);
+
+				const std::string imx500_device_id = test_dir_str.substr(pos);
+				std::string spi_device_id = imx500_device_id;
+				const std::size_t rep = spi_device_id.find("001a");
+				spi_device_id.replace(rep, 4, "0040");
+
+				const fs::path imx500_progress { "/sys/kernel/debug/imx500-fw:" + imx500_device_id + "/fw_progress" };
+				const fs::path spi_progress { "/sys/kernel/debug/rp2040-spi:" + spi_device_id + "/transfer_progress" };
+
+				fw_progress_.open(imx500_progress.c_str(), std::ios_base::in);
+				fw_progress_chunk_.open(spi_progress.c_str(), std::ios_base::in);
+
+				break;
+			}
+		}
+	}
+
+	if (device_fd_ < 0)
+		throw std::runtime_error("Cannot open imx500 device node");
+
+	SetInferenceRoiAbs({ 0, 0, 4056, 3040 });
+}
+
+IMX500PostProcessingStage::~IMX500PostProcessingStage()
+{
+	if (device_fd_ >= 0)
+		close(device_fd_);
+}
+
+void IMX500PostProcessingStage::Read(boost::property_tree::ptree const &params)
+{
+	if (params.find("save_input_tensor") != params.not_found())
+	{
+		auto const &pt = params.get_child("save_input_tensor");
+
+		std::string filename = pt.get<std::string>("filename");
+		num_input_tensors_saved_ = pt.get<unsigned int>("num_tensors", 1);
+		input_tensor_file_ = std::ofstream(filename, std::ios::out | std::ios::binary);
+
+		norm_val_ = PostProcessingStage::GetJsonArray<int32_t>(pt, "norm_val", { 0, 0, 0, 0 });
+		norm_shift_ = PostProcessingStage::GetJsonArray<uint8_t>(pt, "norm_shift", { 0, 0, 0, 0 });
+		div_val_ = PostProcessingStage::GetJsonArray<int16_t>(pt, "div_val", { 1, 1, 1, 1 });
+		div_shift_ = pt.get<unsigned int>("div_shift", 0);
+	}
+
+	/* Load the network firmware. */
+	std::string network_file = params.get<std::string>("network_file");
+	if (!fs::exists(network_file))
+		throw std::runtime_error(network_file + " not found!");
+
+	int fd = open(network_file.c_str(), O_RDONLY, 0);
+
+	v4l2_control ctrl { NETWORK_FW_CTRL_ID, fd };
+	int ret = ioctl(device_fd_, VIDIOC_S_CTRL, &ctrl);
+	if (ret)
+		throw std::runtime_error("failed to set network fw ioctl");
+
+	close(fd);
+
+	LOG(1, "\n------------------------------------------------------------------------------------------------------------------\n"
+		"NOTE: Loading network firmware onto the IMX500 can take several minutes, please do not close down the application."
+		"\n------------------------------------------------------------------------------------------------------------------\n");
+}
+
+void IMX500PostProcessingStage::Configure()
+{
+	output_stream_ = app_->GetMainStream();
+	raw_stream_ = app_->RawStream();
+	save_frames_ = num_input_tensors_saved_;
+}
+
+bool IMX500PostProcessingStage::Process(CompletedRequestPtr &completed_request)
+{
+	auto input = completed_request->metadata.get(controls::rpi::CnnInputTensor);
+
+	if (input && input_tensor_file_.is_open())
+	{
+		// There is a chance that this may be called through multiple threads, so serialize the file access.
+		std::scoped_lock<std::mutex> l(lock_);
+
+		for (unsigned int i = 0; i < input->size(); i++)
+		{
+			const unsigned int channel = i % 3; // Assume RGB interleaved format.
+			int16_t sample = static_cast<int8_t>(input->data()[i]);
+			sample = (sample << norm_shift_[channel]) - conv_reg_signed(norm_val_[channel]);
+			sample = ((sample << div_shift_) / div_val_[channel]) & 0xFF;
+			input_tensor_file_.put(static_cast<uint8_t>(sample));
+		}
+
+		if (--save_frames_ == 0)
+			input_tensor_file_.close();
+	}
+
+	return false;
+}
+
+Rectangle IMX500PostProcessingStage::ConvertInferenceCoordinates(const std::vector<float> &coords,
+																 const Rectangle &scaler_crop) const
+{
+	// Convert the inference image co-ordinates into the final ISP output co-ordinates.
+	const Size &isp_output_size = output_stream_->configuration().size;
+	const Size &sensor_output_size = raw_stream_->configuration().size;
+	const Rectangle sensor_crop = scaler_crop.scaledBy(sensor_output_size, full_sensor_resolution_.size());
+
+	if (coords.size() != 4)
+		return {};
+
+	// Object scaled to the full sensor resolution
+	Rectangle obj;
+	obj.x = std::round(coords[0] * (full_sensor_resolution_.width - 1));
+	obj.y = std::round(coords[1] * (full_sensor_resolution_.height - 1));
+	obj.width = std::round(coords[2] * (full_sensor_resolution_.width - 1));
+	obj.height = std::round(coords[3] * (full_sensor_resolution_.height - 1));
+
+	// Object on inference image -> sensor image
+	const Rectangle obj_sensor = obj.scaledBy(sensor_output_size, full_sensor_resolution_.size());
+	// -> bounded to the ISP crop on the sensor image
+	const Rectangle obj_bound = obj_sensor.boundedTo(sensor_crop);
+	// -> translated by the start of the crop offset
+	const Rectangle obj_translated = obj_bound.translatedBy(-sensor_crop.topLeft());
+	// -> and finally scaled to the ISP output.
+	const Rectangle obj_scaled = obj_translated.scaledBy(isp_output_size, sensor_crop.size());
+
+	LOG(2, obj << " -> (sensor) " << obj_sensor << " -> (bound) " << obj_bound
+			   << " -> (translate) " << obj_translated << " -> (scaled) " << obj_scaled);
+
+	return obj_scaled;
+}
+
+void IMX500PostProcessingStage::SetInferenceRoiAbs(const Rectangle &roi_) const
+{
+	Rectangle roi = roi_.boundedTo(full_sensor_resolution_);
+	const uint32_t roi_array[4] = { (uint32_t)roi.x, (uint32_t)roi.y, (uint32_t)roi.width, (uint32_t)roi.height };
+
+	v4l2_ext_control roi_ctrl;
+	roi_ctrl.id = ROI_CTRL_ID;
+	roi_ctrl.p_u32 = (unsigned int *)&roi_array;
+	roi_ctrl.size = 16;
+
+	v4l2_ext_controls ctrls;
+	ctrls.count = 1;
+	ctrls.controls = (v4l2_ext_control *)&roi_ctrl;
+
+	int ret = ioctl(device_fd_, VIDIOC_S_EXT_CTRLS, &ctrls);
+	if (ret)
+		LOG_ERROR("IMX500: Unable to set absolute ROI");
+}
+
+void IMX500PostProcessingStage::SetInferenceRoiAuto(const unsigned int width, const unsigned int height) const
+{
+	Size s = full_sensor_resolution_.size().boundedToAspectRatio(Size(width, height));
+	Rectangle r = s.centeredTo(full_sensor_resolution_.center()).enclosedIn(full_sensor_resolution_);
+	SetInferenceRoiAbs(r);
+}
+
+void IMX500PostProcessingStage::ShowFwProgressBar()
+{
+	if (fw_progress_.is_open() && fw_progress_chunk_.is_open())
+	{
+		std::thread progress_thread { &IMX500PostProcessingStage::doProgressBar, this };
+		progress_thread.detach();
+	}
+}
+
+std::vector<unsigned int> split(std::stringstream &stream)
+{
+	std::vector<unsigned int> result;
+	unsigned int n;
+
+	while (stream >> n)
+		result.push_back(n);
+
+	return result;
+}
+
+void IMX500PostProcessingStage::doProgressBar()
+{
+	while (1)
+	{
+		std::stringstream fw_progress_str, block_str;
+		unsigned int block_progress;
+
+		fw_progress_.seekg(0);
+		fw_progress_chunk_.seekg(0);
+		fw_progress_str << fw_progress_.rdbuf();
+		block_str << fw_progress_chunk_.rdbuf();
+
+		std::vector<unsigned int> progress = split(fw_progress_str);
+		block_str >> block_progress;
+
+		// [0] == FW state, [1] == current size, [2] == total size.
+		if (progress.size() == 3 && progress[0] == 2)
+		{
+			unsigned int current = progress[1] + block_progress;
+			std::cerr << "Network Firmware Upload: " << current * 100 / progress[2] << "% (" << current / 1024 << "/"
+					  << progress[2] / 1024 << " KB)\r";
+			std::cerr.flush();
+
+			if (progress[2] && progress[1] == progress[2])
+			{
+				std::cerr << std::endl;
+				break;
+			}
+		}
+
+		std::this_thread::sleep_for(500ms);
+	}
+}
diff --git a/post_processing_stages/imx500/imx500_post_processing_stage.hpp b/post_processing_stages/imx500/imx500_post_processing_stage.hpp
new file mode 100644
index 00000000..4fcd76b5
--- /dev/null
+++ b/post_processing_stages/imx500/imx500_post_processing_stage.hpp
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: BSD-2-Clause */
+/*
+ * Copyright (C) 2024, Raspberry Pi Ltd
+ *
+ * imx500_post_rpocessing_stage.hpp - IMX500 post processing stage base class
+ */
+
+#pragma once
+
+#include <fstream>
+#include <mutex>
+
+#include <boost/property_tree/ptree.hpp>
+
+#include <libcamera/geometry.h>
+#include <libcamera/stream.h>
+
+#include "core/completed_request.hpp"
+#include "core/rpicam_app.hpp"
+#include "post_processing_stages/post_processing_stage.hpp"
+
+class IMX500PostProcessingStage : public PostProcessingStage
+{
+public:
+	static constexpr unsigned int Max_Num_Tensors = 8;
+	static constexpr unsigned int Network_Name_Len = 64;
+
+	struct OutputTensorInfo
+	{
+		uint32_t tensor_data_num;
+		uint16_t size;
+		uint8_t ordinal;
+		uint8_t serialization_index;
+	};
+
+	struct CnnOutputTensorInfo
+	{
+		char network_name[Network_Name_Len];
+		uint32_t num_tensors;
+		OutputTensorInfo info[Max_Num_Tensors];
+	};
+
+	IMX500PostProcessingStage(RPiCamApp *app);
+	~IMX500PostProcessingStage();
+
+	void Read(boost::property_tree::ptree const &params) override;
+
+	void Configure() override;
+
+	bool Process(CompletedRequestPtr &completed_request) override;
+
+	libcamera::Rectangle ConvertInferenceCoordinates(const std::vector<float> &coords,
+													 const libcamera::Rectangle &scalerCrop) const;
+	void SetInferenceRoiAbs(const libcamera::Rectangle &roi_) const;
+	void SetInferenceRoiAuto(const unsigned int width, const unsigned int height) const;
+	void ShowFwProgressBar();
+
+protected:
+	libcamera::Rectangle full_sensor_resolution_ = libcamera::Rectangle(0, 0, 4056, 3040);
+	libcamera::Stream *output_stream_;
+	libcamera::Stream *raw_stream_;
+
+private:
+	void doProgressBar();
+
+	int device_fd_;
+	std::ifstream fw_progress_;
+	std::ifstream fw_progress_chunk_;
+
+	std::ofstream input_tensor_file_;
+	unsigned int num_input_tensors_saved_;
+	unsigned int save_frames_;
+	std::vector<int32_t> norm_val_;
+	std::vector<uint8_t> norm_shift_;
+	std::vector<int16_t> div_val_;
+	unsigned int div_shift_;
+	std::mutex lock_;
+};
diff --git a/post_processing_stages/imx500/meson.build b/post_processing_stages/imx500/meson.build
new file mode 100644
index 00000000..985fbc8f
--- /dev/null
+++ b/post_processing_stages/imx500/meson.build
@@ -0,0 +1,31 @@
+imx500_postprocessing_src = files([
+    # Base stage
+    'imx500_post_processing_stage.cpp',
+    # Object detection
+    'imx500_object_detection.cpp',
+    # Posenet
+    'imx500_posenet.cpp',
+])
+
+postproc_assets += files([
+    assets_dir / 'imx500_mobilenet_ssd.json',
+    assets_dir / 'imx500_posenet.json',
+])
+
+imx500_postprocessing_lib = shared_module('imx500-postproc', imx500_postprocessing_src,
+                                          dependencies : libcamera_dep,
+                                          include_directories : '../..',
+                                          install : true,
+                                          install_dir : posproc_libdir,
+                                          name_prefix : '',
+                                         )
+
+if get_option('download_imx500_models')
+    download_script = meson.project_source_root() / 'utils' / 'download-imx500-models.sh'
+    custom_target('imx500-models',
+                  command : [ download_script, '@OUTPUT@' ],
+                  output : 'imx500-models',
+                  install : true,
+                  install_dir : get_option('datadir'),
+                 )
+endif
diff --git a/post_processing_stages/meson.build b/post_processing_stages/meson.build
index 1dd6edf7..aa101413 100644
--- a/post_processing_stages/meson.build
+++ b/post_processing_stages/meson.build
@@ -109,6 +109,11 @@ if hailort_dep.found() and hailo_tappas_dep.found()
     enable_hailo = true
 endif
 
+# IMX500 postprocessing stages.
+if get_option('enable_imx500')
+    subdir('imx500')
+endif
+
 post_processing_headers = files([
     'histogram.hpp',
     'object_detect.hpp',
diff --git a/post_processing_stages/post_processing_stage.hpp b/post_processing_stages/post_processing_stage.hpp
index e745a491..30237ba5 100644
--- a/post_processing_stages/post_processing_stage.hpp
+++ b/post_processing_stages/post_processing_stage.hpp
@@ -5,9 +5,12 @@
  * post_processing_stage.hpp - Post processing stage base class definition.
  */
 
+#pragma once
+
 #include <chrono>
 #include <map>
 #include <string>
+#include <vector>
 
 // Prevents compiler warnings in Boost headers with more recent versions of GCC.
 #define BOOST_BIND_GLOBAL_PLACEHOLDERS
@@ -71,6 +74,24 @@ class PostProcessingStage
 		return std::chrono::duration<double, R>(t2 - t1);
 	}
 
+	template <typename T>
+	static std::vector<T> GetJsonArray(const boost::property_tree::ptree &pt, const std::string &key,
+									   const std::vector<T> &default_value = {})
+	{
+		std::vector<T> vec;
+
+		if (pt.find(key) != pt.not_found())
+		{
+			for (auto &v : pt.get_child(key))
+				vec.push_back(v.second.get_value<T>());
+		}
+
+		for (unsigned int i = vec.size(); i < default_value.size(); i++)
+			vec.push_back(default_value[i]);
+
+		return vec;
+	}
+
 	RPiCamApp *app_;
 };
 
diff --git a/utils/download-imx500-models.sh b/utils/download-imx500-models.sh
new file mode 100755
index 00000000..04e2ef3f
--- /dev/null
+++ b/utils/download-imx500-models.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+set -e
+
+networks=(
+    https://github.com/raspberrypi/imx500-models/raw/main/imx500_network_deeplabv3plus.rpk
+    https://github.com/raspberrypi/imx500-models/raw/main/imx500_network_efficientdet_lite0_pp.rpk
+    https://github.com/raspberrypi/imx500-models/raw/main/imx500_network_efficientnet_bo.rpk
+    https://github.com/raspberrypi/imx500-models/raw/main/imx500_network_efficientnet_lite0.rpk
+    https://github.com/raspberrypi/imx500-models/raw/main/imx500_network_efficientnetv2_b0.rpk
+    https://github.com/raspberrypi/imx500-models/raw/main/imx500_network_efficientnetv2_b1.rpk
+    https://github.com/raspberrypi/imx500-models/raw/main/imx500_network_efficientnetv2_b2.rpk
+    https://github.com/raspberrypi/imx500-models/raw/main/imx500_network_higherhrnet_coco.rpk
+    https://github.com/raspberrypi/imx500-models/raw/main/imx500_network_inputtensoronly.rpk
+    https://github.com/raspberrypi/imx500-models/raw/main/imx500_network_levit_128s.rpk
+    https://github.com/raspberrypi/imx500-models/raw/main/imx500_network_mnasnet1.0.rpk
+    https://github.com/raspberrypi/imx500-models/raw/main/imx500_network_mobilenet_v2.rpk
+    https://github.com/raspberrypi/imx500-models/raw/main/imx500_network_mobilevit_xs.rpk
+    https://github.com/raspberrypi/imx500-models/raw/main/imx500_network_mobilevit_xxs.rpk
+    https://github.com/raspberrypi/imx500-models/raw/main/imx500_network_nanodet_plus_416x416_pp.rpk
+    https://github.com/raspberrypi/imx500-models/raw/main/imx500_network_nanodet_plus_416x416.rpk
+    https://github.com/raspberrypi/imx500-models/raw/main/imx500_network_posenet.rpk
+    https://github.com/raspberrypi/imx500-models/raw/main/imx500_network_regnetx_002.rpk
+    https://github.com/raspberrypi/imx500-models/raw/main/imx500_network_regnety_002.rpk
+    https://github.com/raspberrypi/imx500-models/raw/main/imx500_network_regnety_004.rpk
+    https://github.com/raspberrypi/imx500-models/raw/main/imx500_network_resnet18.rpk
+    https://github.com/raspberrypi/imx500-models/raw/main/imx500_network_shufflenet_v2_x1_5.rpk
+    https://github.com/raspberrypi/imx500-models/raw/main/imx500_network_squeezenet1.0.rpk
+    https://github.com/raspberrypi/imx500-models/raw/main/imx500_network_ssd_mobilenetv2_fpnlite_320x320_pp.rpk
+    https://github.com/raspberrypi/imx500-models/raw/main/imx500_network_yolov8n_pp.rpk
+)
+
+if [ $# -ne 1 ]; then
+    echo "Usage: $0 <directory>"
+    exit 1
+fi
+
+dir=$1
+mkdir -p $dir
+
+for url in ${networks[@]}; do
+    wget -nv -N -P $dir $url
+done

From fe7e364f6ab3bdc5e59962ee815edd9c5e38dc6e Mon Sep 17 00:00:00 2001
From: Serge Schneider <serge@raspberrypi.com>
Date: Wed, 18 Sep 2024 15:00:02 +0100
Subject: [PATCH 4/4] Update gen_orig.yml workflow

Use Debian packaging tarball names

Update to action-gh-release v2

Generate IMX500 models tarball

Check that project and tag versions match
---
 .github/workflows/gen_orig.yml | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/gen_orig.yml b/.github/workflows/gen_orig.yml
index 8155a64f..d60d961d 100644
--- a/.github/workflows/gen_orig.yml
+++ b/.github/workflows/gen_orig.yml
@@ -19,18 +19,26 @@ jobs:
           sudo cp /usr/lib/x86_64-linux-gnu/pkgconfig/{,lib}camera.pc ||:
       - name: Check out repository code
         uses: actions/checkout@v4
-      - name: Generate tarball
+      - name: Generate tarballs
         run: |
           PATH="${HOME}/.local/bin:${PATH}"
+          TARBALL="rpicam-apps-${GITHUB_REF_NAME:1}.tar.xz"
           meson setup build
           meson dist --no-tests --include-subprojects -C build
+          if ! [ -f "build/meson-dist/$TARBALL" ]; then
+            echo "Expected tarball not found - $TARBALL"
+            echo "Does 'version' in meson.build match the tag?"
+            exit 1
+          fi
+          mv "build/meson-dist/$TARBALL" rpicam-apps_${GITHUB_REF_NAME:1}.orig.tar.xz
           ./utils/download-hailo-models.sh hailo-models
-          XZ_OPT=-9 tar -cJf hailo-models-${GITHUB_REF_NAME:1}.tar.xz hailo-models
-      - name: Release tarball
-        uses: softprops/action-gh-release@v1
+          XZ_OPT=-9 tar -cJf rpicam-apps_${GITHUB_REF_NAME:1}.orig-hailo-models.tar.xz hailo-models
+          ./utils/download-imx500-models.sh imx500-models
+          XZ_OPT=-9 tar -cJf rpicam-apps_${GITHUB_REF_NAME:1}.orig-imx500-models.tar.xz imx500-models
+      - name: Release tarballs
+        uses: softprops/action-gh-release@v2
         with:
           files: |
-            build/meson-dist/*.tar.xz
-            hailo-models-*.tar.xz
+            *.tar.xz
       - if: failure()
         run: cat build/meson-logs/meson-log.txt