From a85012ce979045c30e02d8599bfdb939b19ab4fe Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Fri, 1 Dec 2023 16:46:50 +0000
Subject: [PATCH 01/50] wip: basic format mapping

---
 src/core/frame/frame_factory.h              | 14 ++++++++++++++
 src/modules/ffmpeg/producer/av_producer.cpp |  8 ++++++++
 src/modules/ffmpeg/util/av_util.cpp         | 14 ++++++++------
 3 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/src/core/frame/frame_factory.h b/src/core/frame/frame_factory.h
index 965b7e9b40..4c609f02a4 100644
--- a/src/core/frame/frame_factory.h
+++ b/src/core/frame/frame_factory.h
@@ -35,4 +35,18 @@ class frame_factory
     virtual class mutable_frame create_frame(const void* video_stream_tag, const struct pixel_format_desc& desc) = 0;
 };
 
+class frame_converter {
+  public:
+    frame_converter()                                = default;
+    frame_converter& operator=(const frame_converter&) = delete;
+    virtual ~frame_converter()                       = default;
+
+    frame_converter(const frame_converter&) = delete;
+
+    virtual class mutable_frame create_frame(const void* video_stream_tag, const struct pixel_format_desc& desc) = 0;
+
+    virtual class draw_frame convert_frame(const class mutable_frame) = 0;
+
+};
+
 }} // namespace caspar::core
diff --git a/src/modules/ffmpeg/producer/av_producer.cpp b/src/modules/ffmpeg/producer/av_producer.cpp
index b28ed4f307..749d1aa56b 100644
--- a/src/modules/ffmpeg/producer/av_producer.cpp
+++ b/src/modules/ffmpeg/producer/av_producer.cpp
@@ -508,6 +508,14 @@ struct Filter
                                               AV_PIX_FMT_YUVA422P,
                                               AV_PIX_FMT_YUVA420P,
                                               AV_PIX_FMT_UYVY422,
+                                              AV_PIX_FMT_YUV444P10LE,
+                                              AV_PIX_FMT_YUV422P10LE,
+                                              AV_PIX_FMT_YUV420P10LE,
+                                              // AV_PIX_FMT_YUV410P10LE,
+                                              AV_PIX_FMT_YUVA444P10LE,
+                                              AV_PIX_FMT_YUVA422P10LE,
+                                              AV_PIX_FMT_YUVA420P10LE,
+                                              // AV_PIX_FMT_UYVY42210LE,
                                               AV_PIX_FMT_NONE};
             FF(av_opt_set_int_list(sink, "pix_fmts", pix_fmts, -1, AV_OPT_SEARCH_CHILDREN));
 #ifdef _MSC_VER
diff --git a/src/modules/ffmpeg/util/av_util.cpp b/src/modules/ffmpeg/util/av_util.cpp
index 6fd3743b98..1721c64862 100644
--- a/src/modules/ffmpeg/util/av_util.cpp
+++ b/src/modules/ffmpeg/util/av_util.cpp
@@ -102,23 +102,25 @@ core::pixel_format get_pixel_format(AVPixelFormat pix_fmt)
         case AV_PIX_FMT_ABGR:
             return core::pixel_format::abgr;
         case AV_PIX_FMT_YUV444P:
-            return core::pixel_format::ycbcr;
         case AV_PIX_FMT_YUV422P:
-            return core::pixel_format::ycbcr;
         case AV_PIX_FMT_YUV420P:
-            return core::pixel_format::ycbcr;
         case AV_PIX_FMT_YUV411P:
-            return core::pixel_format::ycbcr;
         case AV_PIX_FMT_YUV410P:
             return core::pixel_format::ycbcr;
         case AV_PIX_FMT_YUVA420P:
-            return core::pixel_format::ycbcra;
         case AV_PIX_FMT_YUVA422P:
-            return core::pixel_format::ycbcra;
         case AV_PIX_FMT_YUVA444P:
             return core::pixel_format::ycbcra;
         case AV_PIX_FMT_UYVY422:
             return core::pixel_format::uyvy;
+        case AV_PIX_FMT_YUV444P10LE:
+        case AV_PIX_FMT_YUV422P10LE:
+        case AV_PIX_FMT_YUV420P10LE:
+            return core::pixel_format::ycbcr; // TODO 10bit
+        case AV_PIX_FMT_YUVA444P10LE:
+        case AV_PIX_FMT_YUVA422P10LE:
+        case AV_PIX_FMT_YUVA420P10LE:
+            return core::pixel_format::ycbcra; // TODO 10bit
         default:
             return core::pixel_format::invalid;
     }

From 8cd4cfc47ef66a0baa7f58a00f5a6e7a45230c70 Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Fri, 1 Dec 2023 16:54:46 +0000
Subject: [PATCH 02/50] wip

---
 src/core/frame/frame_factory.h              | 29 ++++++++++++---------
 src/modules/ffmpeg/producer/av_producer.cpp |  4 +--
 2 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/src/core/frame/frame_factory.h b/src/core/frame/frame_factory.h
index 4c609f02a4..57b660dfb8 100644
--- a/src/core/frame/frame_factory.h
+++ b/src/core/frame/frame_factory.h
@@ -23,18 +23,6 @@
 
 namespace caspar { namespace core {
 
-class frame_factory
-{
-  public:
-    frame_factory()                                = default;
-    frame_factory& operator=(const frame_factory&) = delete;
-    virtual ~frame_factory()                       = default;
-
-    frame_factory(const frame_factory&) = delete;
-
-    virtual class mutable_frame create_frame(const void* video_stream_tag, const struct pixel_format_desc& desc) = 0;
-};
-
 class frame_converter {
   public:
     frame_converter()                                = default;
@@ -45,8 +33,23 @@ class frame_converter {
 
     virtual class mutable_frame create_frame(const void* video_stream_tag, const struct pixel_format_desc& desc) = 0;
 
-    virtual class draw_frame convert_frame(const class mutable_frame) = 0;
+    virtual class draw_frame convert_frame(const class mutable_frame& frame) = 0;
+
+};
 
+class frame_factory
+{
+  public:
+    frame_factory()                                = default;
+    frame_factory& operator=(const frame_factory&) = delete;
+    virtual ~frame_factory()                       = default;
+
+    frame_factory(const frame_factory&) = delete;
+
+    virtual class mutable_frame create_frame(const void* video_stream_tag, const struct pixel_format_desc& desc) = 0;
+
+    virtual std::shared_ptr<frame_converter> create_frame_converter() = 0;
 };
 
+
 }} // namespace caspar::core
diff --git a/src/modules/ffmpeg/producer/av_producer.cpp b/src/modules/ffmpeg/producer/av_producer.cpp
index 749d1aa56b..1491f67850 100644
--- a/src/modules/ffmpeg/producer/av_producer.cpp
+++ b/src/modules/ffmpeg/producer/av_producer.cpp
@@ -602,7 +602,7 @@ struct AVProducer::Impl
 
     spl::shared_ptr<diagnostics::graph> graph_;
 
-    const std::shared_ptr<core::frame_factory> frame_factory_;
+    const std::shared_ptr<core::frame_converter> frame_factory_;
     const core::video_format_desc              format_desc_;
     const AVRational                           format_tb_;
     const std::string                          name_;
@@ -655,7 +655,7 @@ struct AVProducer::Impl
          boost::optional<int64_t>             duration,
          bool                                 loop,
          int                                  seekable)
-        : frame_factory_(frame_factory)
+        : frame_factory_(frame_factory->create_frame_converter())
         , format_desc_(format_desc)
         , format_tb_({format_desc.duration, format_desc.time_scale * format_desc.field_count})
         , name_(name)

From bf121208b785201dbc3c924edd8e7a4f7f9cde91 Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Fri, 1 Dec 2023 17:22:51 +0000
Subject: [PATCH 03/50] wip

---
 src/accelerator/CMakeLists.txt                |  2 +
 src/accelerator/ogl/image/frame_converter.cpp | 67 +++++++++++++++++++
 src/accelerator/ogl/image/frame_converter.h   | 52 ++++++++++++++
 src/accelerator/ogl/image/image_mixer.cpp     | 10 +++
 src/accelerator/ogl/image/image_mixer.h       |  2 +
 src/core/mixer/image/image_mixer.h            |  2 +
 src/modules/ffmpeg/producer/av_producer.cpp   |  2 +-
 src/modules/ffmpeg/util/av_util.cpp           | 49 ++++++++++----
 src/modules/ffmpeg/util/av_util.h             |  5 ++
 9 files changed, 178 insertions(+), 13 deletions(-)
 create mode 100644 src/accelerator/ogl/image/frame_converter.cpp
 create mode 100644 src/accelerator/ogl/image/frame_converter.h

diff --git a/src/accelerator/CMakeLists.txt b/src/accelerator/CMakeLists.txt
index 3a1b576af4..f6a1ca74a0 100644
--- a/src/accelerator/CMakeLists.txt
+++ b/src/accelerator/CMakeLists.txt
@@ -5,6 +5,7 @@ set(SOURCES
 	ogl/image/image_kernel.cpp
 	ogl/image/image_mixer.cpp
 	ogl/image/image_shader.cpp
+	ogl/image/frame_converter.cpp
 
 	ogl/util/buffer.cpp
 	ogl/util/device.cpp
@@ -17,6 +18,7 @@ set(HEADERS
 	ogl/image/image_kernel.h
 	ogl/image/image_mixer.h
 	ogl/image/image_shader.h
+	ogl/image/frame_converter.h
 
 	ogl/util/buffer.h
 	ogl/util/device.h
diff --git a/src/accelerator/ogl/image/frame_converter.cpp b/src/accelerator/ogl/image/frame_converter.cpp
new file mode 100644
index 0000000000..4abc7bb6ba
--- /dev/null
+++ b/src/accelerator/ogl/image/frame_converter.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2011 Sveriges Television AB <info@casparcg.com>
+ *
+ * This file is part of CasparCG (www.casparcg.com).
+ *
+ * CasparCG is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * CasparCG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with CasparCG. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Julian Waller, julian@superfly.tv
+ */
+#include "frame_converter.h"
+
+namespace caspar::accelerator::ogl {
+
+ogl_frame_converter::ogl_frame_converter(const spl::shared_ptr<device>& ogl)
+    : ogl_(ogl)
+{
+}
+
+core::mutable_frame ogl_frame_converter::create_frame(const void* tag, const core::pixel_format_desc& desc)
+{
+
+    std::vector<array<std::uint8_t>> image_data;
+    for (auto& plane : desc.planes) {
+        image_data.push_back(ogl_->create_array(plane.size));
+    }
+
+    using future_texture = std::shared_future<std::shared_ptr<texture>>;
+
+    std::weak_ptr<ogl_frame_converter> weak_self = shared_from_this();
+    return core::mutable_frame(
+        tag,
+        std::move(image_data),
+        array<int32_t>{},
+        desc,
+        [weak_self, desc](std::vector<array<const std::uint8_t>> image_data) -> boost::any {
+            // TODO - replace this
+            auto self = weak_self.lock();
+            if (!self) {
+                return boost::any{};
+            }
+            std::vector<future_texture> textures;
+            for (int n = 0; n < static_cast<int>(desc.planes.size()); ++n) {
+                textures.emplace_back(self->ogl_->copy_async(
+                    image_data[n], desc.planes[n].width, desc.planes[n].height, desc.planes[n].stride));
+            }
+            return std::make_shared<decltype(textures)>(std::move(textures));
+        });
+}
+
+core::draw_frame ogl_frame_converter::convert_frame(const core::mutable_frame& frame)
+{
+    // TODO
+    return core::draw_frame{};
+}
+
+} // namespace caspar::accelerator::ogl
\ No newline at end of file
diff --git a/src/accelerator/ogl/image/frame_converter.h b/src/accelerator/ogl/image/frame_converter.h
new file mode 100644
index 0000000000..1a9ec1f80b
--- /dev/null
+++ b/src/accelerator/ogl/image/frame_converter.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2011 Sveriges Television AB <info@casparcg.com>
+ *
+ * This file is part of CasparCG (www.casparcg.com).
+ *
+ * CasparCG is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * CasparCG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with CasparCG. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Julian Waller, julian@superfly.tv
+ */
+
+#pragma once
+
+#include <core/frame/draw_frame.h>
+#include <core/frame/frame.h>
+#include <core/frame/frame_factory.h>
+
+#include "../util/device.h"
+
+namespace caspar::accelerator::ogl {
+
+class ogl_frame_converter
+    : public core::frame_converter
+    , public std::enable_shared_from_this<ogl_frame_converter>
+{
+  public:
+    ogl_frame_converter(const spl::shared_ptr<device>& ogl);
+    ogl_frame_converter(const ogl_frame_converter&) = delete;
+
+    ~ogl_frame_converter() override = default;
+
+    ogl_frame_converter& operator=(const ogl_frame_converter&) = delete;
+
+    core::mutable_frame create_frame(const void* video_stream_tag, const core::pixel_format_desc& desc) override;
+
+    core::draw_frame convert_frame(const core::mutable_frame& frame) override;
+
+  private:
+    const spl::shared_ptr<device> ogl_;
+};
+
+} // namespace caspar::accelerator::ogl
\ No newline at end of file
diff --git a/src/accelerator/ogl/image/image_mixer.cpp b/src/accelerator/ogl/image/image_mixer.cpp
index 56212e239f..01fb752681 100644
--- a/src/accelerator/ogl/image/image_mixer.cpp
+++ b/src/accelerator/ogl/image/image_mixer.cpp
@@ -25,6 +25,7 @@
 #include "../util/buffer.h"
 #include "../util/device.h"
 #include "../util/texture.h"
+#include "frame_converter.h"
 
 #include <common/array.h>
 #include <common/future.h>
@@ -327,6 +328,11 @@ struct image_mixer::impl
                 return std::make_shared<decltype(textures)>(std::move(textures));
             });
     }
+
+    std::shared_ptr<core::frame_converter> create_frame_converter() override
+    {
+       return std::make_shared<ogl_frame_converter>(ogl_);
+    }
 };
 
 image_mixer::image_mixer(const spl::shared_ptr<device>& ogl, const int channel_id, const size_t max_frame_size)
@@ -345,5 +351,9 @@ core::mutable_frame image_mixer::create_frame(const void* tag, const core::pixel
 {
     return impl_->create_frame(tag, desc);
 }
+std::shared_ptr<core::frame_converter> image_mixer::create_frame_converter()
+{
+    return impl_->create_frame_converter();
+}
 
 }}} // namespace caspar::accelerator::ogl
diff --git a/src/accelerator/ogl/image/image_mixer.h b/src/accelerator/ogl/image/image_mixer.h
index c9034238d8..19be36225d 100644
--- a/src/accelerator/ogl/image/image_mixer.h
+++ b/src/accelerator/ogl/image/image_mixer.h
@@ -46,6 +46,8 @@ class image_mixer final : public core::image_mixer
     std::future<array<const std::uint8_t>> operator()(const core::video_format_desc& format_desc) override;
     core::mutable_frame                    create_frame(const void* tag, const core::pixel_format_desc& desc) override;
 
+    std::shared_ptr<core::frame_converter> create_frame_converter() override;
+
     // core::image_mixer
 
     void push(const core::frame_transform& frame) override;
diff --git a/src/core/mixer/image/image_mixer.h b/src/core/mixer/image/image_mixer.h
index 0922e38261..dcff0d1b79 100644
--- a/src/core/mixer/image/image_mixer.h
+++ b/src/core/mixer/image/image_mixer.h
@@ -48,6 +48,8 @@ class image_mixer
     virtual std::future<array<const uint8_t>> operator()(const struct video_format_desc& format_desc) = 0;
 
     class mutable_frame create_frame(const void* tag, const struct pixel_format_desc& desc) override = 0;
+
+    std::shared_ptr<frame_converter> create_frame_converter() override = 0;
 };
 
 }} // namespace caspar::core
diff --git a/src/modules/ffmpeg/producer/av_producer.cpp b/src/modules/ffmpeg/producer/av_producer.cpp
index 1491f67850..b1d66dc124 100644
--- a/src/modules/ffmpeg/producer/av_producer.cpp
+++ b/src/modules/ffmpeg/producer/av_producer.cpp
@@ -871,7 +871,7 @@ struct AVProducer::Impl
                 frame.duration   = av_rescale_q(frame.audio->nb_samples, {1, sr}, TIME_BASE_Q);
             }
 
-            frame.frame       = core::draw_frame(make_frame(this, *frame_factory_, frame.video, frame.audio));
+            frame.frame       = core::draw_frame(make_frame2(this, frame_factory_, frame.video, frame.audio));
             frame.frame_count = frame_count_++;
 
             graph_->set_value("decode-time", decode_timer.elapsed() * format_desc_.fps * 0.5);
diff --git a/src/modules/ffmpeg/util/av_util.cpp b/src/modules/ffmpeg/util/av_util.cpp
index 1721c64862..7c410b6258 100644
--- a/src/modules/ffmpeg/util/av_util.cpp
+++ b/src/modules/ffmpeg/util/av_util.cpp
@@ -40,19 +40,12 @@ std::shared_ptr<AVPacket> alloc_packet()
     return packet;
 }
 
-core::mutable_frame make_frame(void*                    tag,
-                               core::frame_factory&     frame_factory,
-                               std::shared_ptr<AVFrame> video,
-                               std::shared_ptr<AVFrame> audio)
+core::mutable_frame copy_frame_tmp(core::mutable_frame            frame,
+                                   const core::pixel_format_desc& pix_desc,
+                                   const std::vector<int>&        data_map,
+                                   std::shared_ptr<AVFrame>       video,
+                                   std::shared_ptr<AVFrame>       audio)
 {
-    std::vector<int> data_map; // TODO(perf) when using data_map, avoid uploading duplicate planes
-
-    const auto pix_desc =
-        video ? pixel_format_desc(static_cast<AVPixelFormat>(video->format), video->width, video->height, data_map)
-              : core::pixel_format_desc(core::pixel_format::invalid);
-
-    auto frame = frame_factory.create_frame(tag, pix_desc);
-
     tbb::parallel_invoke(
         [&]() {
             if (video) {
@@ -84,6 +77,38 @@ core::mutable_frame make_frame(void*                    tag,
     return frame;
 }
 
+core::mutable_frame make_frame(void*                    tag,
+                               core::frame_factory&     frame_factory,
+                               std::shared_ptr<AVFrame> video,
+                               std::shared_ptr<AVFrame> audio)
+{
+    std::vector<int> data_map; // TODO(perf) when using data_map, avoid uploading duplicate planes
+
+    const auto pix_desc =
+        video ? pixel_format_desc(static_cast<AVPixelFormat>(video->format), video->width, video->height, data_map)
+              : core::pixel_format_desc(core::pixel_format::invalid);
+
+    auto frame = frame_factory.create_frame(tag, pix_desc);
+
+    return copy_frame_tmp(std::move(frame), pix_desc, data_map, std::move(video), std::move(audio));
+}
+
+core::mutable_frame make_frame2(void*                                         tag,
+                                const std::shared_ptr<core::frame_converter>& frame_factory,
+                                std::shared_ptr<AVFrame>                      video,
+                                std::shared_ptr<AVFrame>                      audio)
+{
+    std::vector<int> data_map; // TODO(perf) when using data_map, avoid uploading duplicate planes
+
+    const auto pix_desc =
+        video ? pixel_format_desc(static_cast<AVPixelFormat>(video->format), video->width, video->height, data_map)
+              : core::pixel_format_desc(core::pixel_format::invalid);
+
+    auto frame = frame_factory->create_frame(tag, pix_desc);
+
+    return copy_frame_tmp(std::move(frame), pix_desc, data_map, std::move(video), std::move(audio));
+}
+
 core::pixel_format get_pixel_format(AVPixelFormat pix_fmt)
 {
     switch (pix_fmt) {
diff --git a/src/modules/ffmpeg/util/av_util.h b/src/modules/ffmpeg/util/av_util.h
index 8a6ceed869..c1f1b97bb7 100644
--- a/src/modules/ffmpeg/util/av_util.h
+++ b/src/modules/ffmpeg/util/av_util.h
@@ -27,6 +27,11 @@ core::mutable_frame     make_frame(void*                    tag,
                                    std::shared_ptr<AVFrame> video,
                                    std::shared_ptr<AVFrame> audio);
 
+core::mutable_frame make_frame2(void*                                         tag,
+                                const std::shared_ptr<core::frame_converter>& frame_factory,
+                                std::shared_ptr<AVFrame>                      video,
+                                std::shared_ptr<AVFrame>                      audio);
+
 std::shared_ptr<AVFrame> make_av_video_frame(const core::const_frame& frame, const core::video_format_desc& format_des);
 std::shared_ptr<AVFrame> make_av_audio_frame(const core::const_frame& frame, const core::video_format_desc& format_des);
 

From 8a2a85bde28d25d40f04a09255d44b219d363864 Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Fri, 1 Dec 2023 17:35:26 +0000
Subject: [PATCH 04/50] wip

---
 src/accelerator/ogl/image/image_mixer.cpp | 43 +++++++++++++++++------
 src/core/frame/pixel_format.h             |  2 ++
 src/modules/ffmpeg/util/av_util.cpp       |  4 +--
 3 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/src/accelerator/ogl/image/image_mixer.cpp b/src/accelerator/ogl/image/image_mixer.cpp
index 01fb752681..b0db1b52b7 100644
--- a/src/accelerator/ogl/image/image_mixer.cpp
+++ b/src/accelerator/ogl/image/image_mixer.cpp
@@ -302,6 +302,21 @@ struct image_mixer::impl
         return renderer_(std::move(layers_), format_desc);
     }
 
+    std::vector<future_texture> convert_frame(const std::vector<array<const std::uint8_t>>& image_data,
+                                              const core::pixel_format_desc&                desc) const
+    {
+        const auto& plane0 =  desc.planes[0]; // TODO - this doesnt feel safe, or accurate
+
+        std::vector<future_texture> textures;
+        const auto texture = ogl_->create_texture(plane0.width, plane0.height,
+                                                                   4); // TODO - don't clear
+
+        // TODO - how to run and link shader?
+        textures.emplace_back(make_ready_future(texture));
+
+        return textures;
+    }
+
     core::mutable_frame create_frame(const void* tag, const core::pixel_format_desc& desc) override
     {
         std::vector<array<std::uint8_t>> image_data;
@@ -320,18 +335,29 @@ struct image_mixer::impl
                 if (!self) {
                     return boost::any{};
                 }
-                std::vector<future_texture> textures;
-                for (int n = 0; n < static_cast<int>(desc.planes.size()); ++n) {
-                    textures.emplace_back(self->ogl_->copy_async(
-                        image_data[n], desc.planes[n].width, desc.planes[n].height, desc.planes[n].stride));
+
+                switch (desc.format) {
+                    case core::pixel_format::ycbcr10:
+                    case core::pixel_format::ycbcra10: {
+                        std::vector<future_texture> textures = self->convert_frame(image_data, desc);
+
+                        return std::make_shared<decltype(textures)>(std::move(textures));
+                    }
+                    default: {
+                        std::vector<future_texture> textures;
+                        for (int n = 0; n < static_cast<int>(desc.planes.size()); ++n) {
+                            textures.emplace_back(self->ogl_->copy_async(
+                                image_data[n], desc.planes[n].width, desc.planes[n].height, desc.planes[n].stride));
+                        }
+                        return std::make_shared<decltype(textures)>(std::move(textures));
+                    }
                 }
-                return std::make_shared<decltype(textures)>(std::move(textures));
             });
     }
 
     std::shared_ptr<core::frame_converter> create_frame_converter() override
     {
-       return std::make_shared<ogl_frame_converter>(ogl_);
+        return std::make_shared<ogl_frame_converter>(ogl_);
     }
 };
 
@@ -351,9 +377,6 @@ core::mutable_frame image_mixer::create_frame(const void* tag, const core::pixel
 {
     return impl_->create_frame(tag, desc);
 }
-std::shared_ptr<core::frame_converter> image_mixer::create_frame_converter()
-{
-    return impl_->create_frame_converter();
-}
+std::shared_ptr<core::frame_converter> image_mixer::create_frame_converter() { return impl_->create_frame_converter(); }
 
 }}} // namespace caspar::accelerator::ogl
diff --git a/src/core/frame/pixel_format.h b/src/core/frame/pixel_format.h
index 007e1b5082..efe45e071d 100644
--- a/src/core/frame/pixel_format.h
+++ b/src/core/frame/pixel_format.h
@@ -38,6 +38,8 @@ enum class pixel_format
     bgr,
     rgb,
     uyvy,
+    ycbcr10,
+    ycbcra10,
     count,
     invalid,
 };
diff --git a/src/modules/ffmpeg/util/av_util.cpp b/src/modules/ffmpeg/util/av_util.cpp
index 7c410b6258..c2f2c2ae3a 100644
--- a/src/modules/ffmpeg/util/av_util.cpp
+++ b/src/modules/ffmpeg/util/av_util.cpp
@@ -141,11 +141,11 @@ core::pixel_format get_pixel_format(AVPixelFormat pix_fmt)
         case AV_PIX_FMT_YUV444P10LE:
         case AV_PIX_FMT_YUV422P10LE:
         case AV_PIX_FMT_YUV420P10LE:
-            return core::pixel_format::ycbcr; // TODO 10bit
+            return core::pixel_format::ycbcr10;
         case AV_PIX_FMT_YUVA444P10LE:
         case AV_PIX_FMT_YUVA422P10LE:
         case AV_PIX_FMT_YUVA420P10LE:
-            return core::pixel_format::ycbcra; // TODO 10bit
+            return core::pixel_format::ycbcra10;
         default:
             return core::pixel_format::invalid;
     }

From 73fab1711e69fe7d165c98539130c76d226a8e78 Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Fri, 1 Dec 2023 17:41:52 +0000
Subject: [PATCH 05/50] wip

---
 src/accelerator/ogl/image/image_mixer.cpp | 7 ++++++-
 src/accelerator/ogl/util/device.cpp       | 3 +++
 src/accelerator/ogl/util/device.h         | 2 ++
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/accelerator/ogl/image/image_mixer.cpp b/src/accelerator/ogl/image/image_mixer.cpp
index b0db1b52b7..44e05a7f47 100644
--- a/src/accelerator/ogl/image/image_mixer.cpp
+++ b/src/accelerator/ogl/image/image_mixer.cpp
@@ -305,15 +305,20 @@ struct image_mixer::impl
     std::vector<future_texture> convert_frame(const std::vector<array<const std::uint8_t>>& image_data,
                                               const core::pixel_format_desc&                desc) const
     {
-        const auto& plane0 =  desc.planes[0]; // TODO - this doesnt feel safe, or accurate
 
         std::vector<future_texture> textures;
+        const auto& plane0 =  desc.planes[0]; // TODO - this doesnt feel safe, or accurate
         const auto texture = ogl_->create_texture(plane0.width, plane0.height,
                                                                    4); // TODO - don't clear
 
         // TODO - how to run and link shader?
         textures.emplace_back(make_ready_future(texture));
 
+                /*
+        const auto texture = ogl_->convert_frame(std::move(image_data), desc);
+        textures.emplace_back(texture);
+*/
+
         return textures;
     }
 
diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp
index 0576153f7f..27586388f2 100644
--- a/src/accelerator/ogl/util/device.cpp
+++ b/src/accelerator/ogl/util/device.cpp
@@ -437,6 +437,9 @@ std::future<array<const uint8_t>> device::copy_async(const std::shared_ptr<textu
 {
     return impl_->copy_async(source);
 }
+//std::future<std::shared_ptr<texture>> device::convert_frame() {
+    // TODO
+//}
 void         device::dispatch(std::function<void()> func) { boost::asio::dispatch(impl_->service_, std::move(func)); }
 std::wstring device::version() const { return impl_->version(); }
 boost::property_tree::wptree device::info() const { return impl_->info(); }
diff --git a/src/accelerator/ogl/util/device.h b/src/accelerator/ogl/util/device.h
index d7f1cef1a4..09492aaa27 100644
--- a/src/accelerator/ogl/util/device.h
+++ b/src/accelerator/ogl/util/device.h
@@ -51,6 +51,8 @@ class device final
     std::future<std::shared_ptr<class texture>>
                                       copy_async(const array<const uint8_t>& source, int width, int height, int stride);
     std::future<array<const uint8_t>> copy_async(const std::shared_ptr<class texture>& source);
+
+//    std::future<std::shared_ptr<texture>> convert_frame(std::vector<array<const std::uint8_t>> image_data);
     template <typename Func>
     auto dispatch_async(Func&& func)
     {

From c58ad0e53fb9ad0bc3ec744518d65247a30dd361 Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Fri, 1 Dec 2023 18:41:22 +0000
Subject: [PATCH 06/50] things are hooked up, but has no output

---
 src/accelerator/CMakeLists.txt                |   4 +
 src/accelerator/ogl/image/image_mixer.cpp     |  10 +-
 src/accelerator/ogl/image/shader_to_rgba.comp |  15 ++
 src/accelerator/ogl/util/compute_shader.cpp   | 142 ++++++++++++++++++
 src/accelerator/ogl/util/compute_shader.h     |  63 ++++++++
 src/accelerator/ogl/util/device.cpp           |  58 ++++++-
 src/accelerator/ogl/util/device.h             |   4 +-
 src/accelerator/ogl/util/texture.cpp          |  18 ++-
 src/modules/ffmpeg/producer/av_producer.cpp   |   6 +-
 src/modules/ffmpeg/util/av_util.cpp           |   6 +-
 src/shell/casparcg.config                     |   3 +
 11 files changed, 309 insertions(+), 20 deletions(-)
 create mode 100644 src/accelerator/ogl/image/shader_to_rgba.comp
 create mode 100644 src/accelerator/ogl/util/compute_shader.cpp
 create mode 100644 src/accelerator/ogl/util/compute_shader.h

diff --git a/src/accelerator/CMakeLists.txt b/src/accelerator/CMakeLists.txt
index f6a1ca74a0..ee2eda3b2e 100644
--- a/src/accelerator/CMakeLists.txt
+++ b/src/accelerator/CMakeLists.txt
@@ -8,6 +8,7 @@ set(SOURCES
 	ogl/image/frame_converter.cpp
 
 	ogl/util/buffer.cpp
+	ogl/util/compute_shader.cpp
 	ogl/util/device.cpp
 	ogl/util/shader.cpp
 	ogl/util/texture.cpp
@@ -21,12 +22,14 @@ set(HEADERS
 	ogl/image/frame_converter.h
 
 	ogl/util/buffer.h
+	ogl/util/compute_shader.h
 	ogl/util/device.h
 	ogl/util/shader.h
 	ogl/util/texture.h
 
 	ogl_image_vertex.h
 	ogl_image_fragment.h
+	ogl_image_to_rgba.h
 
 	accelerator.h
 	StdAfx.h
@@ -34,6 +37,7 @@ set(HEADERS
 
 bin2c("ogl/image/shader.vert" "ogl_image_vertex.h" "caspar::accelerator::ogl" "vertex_shader")
 bin2c("ogl/image/shader.frag" "ogl_image_fragment.h" "caspar::accelerator::ogl" "fragment_shader")
+bin2c("ogl/image/shader_to_rgba.comp" "ogl_image_to_rgba.h" "caspar::accelerator::ogl" "compute_to_rgba_shader")
 
 casparcg_add_library(accelerator SOURCES ${SOURCES} ${HEADERS})
 target_include_directories(accelerator PRIVATE
diff --git a/src/accelerator/ogl/image/image_mixer.cpp b/src/accelerator/ogl/image/image_mixer.cpp
index 44e05a7f47..36c1b7bd51 100644
--- a/src/accelerator/ogl/image/image_mixer.cpp
+++ b/src/accelerator/ogl/image/image_mixer.cpp
@@ -305,20 +305,20 @@ struct image_mixer::impl
     std::vector<future_texture> convert_frame(const std::vector<array<const std::uint8_t>>& image_data,
                                               const core::pixel_format_desc&                desc) const
     {
+        const auto& plane0 = desc.planes[0]; // TODO - this doesnt feel safe, or accurate
 
         std::vector<future_texture> textures;
-        const auto& plane0 =  desc.planes[0]; // TODO - this doesnt feel safe, or accurate
+        /*
         const auto texture = ogl_->create_texture(plane0.width, plane0.height,
                                                                    4); // TODO - don't clear
 
         // TODO - how to run and link shader?
         textures.emplace_back(make_ready_future(texture));
-
-                /*
-        const auto texture = ogl_->convert_frame(std::move(image_data), desc);
-        textures.emplace_back(texture);
 */
 
+        textures.emplace_back(ogl_->convert_frame(
+            image_data, plane0.width, plane0.height, 1)); // TODO - what is this 'format' parameter?
+
         return textures;
     }
 
diff --git a/src/accelerator/ogl/image/shader_to_rgba.comp b/src/accelerator/ogl/image/shader_to_rgba.comp
new file mode 100644
index 0000000000..abc7adc03c
--- /dev/null
+++ b/src/accelerator/ogl/image/shader_to_rgba.comp
@@ -0,0 +1,15 @@
+#version 430
+
+layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(rgba32f, binding = 0) uniform image2D imgOutput; // Note: this needs to match what it is writing to
+
+void main() {
+    vec4 value = vec4(0.0, 0.0, 0.0, 1.0);
+    ivec2 texelCoord = ivec2(gl_GlobalInvocationID.xy);
+
+    value.x = float(texelCoord.x)/(gl_NumWorkGroups.x);
+    value.y = float(texelCoord.y)/(gl_NumWorkGroups.y);
+
+    imageStore(imgOutput, texelCoord, value);
+}
\ No newline at end of file
diff --git a/src/accelerator/ogl/util/compute_shader.cpp b/src/accelerator/ogl/util/compute_shader.cpp
new file mode 100644
index 0000000000..612bf8b922
--- /dev/null
+++ b/src/accelerator/ogl/util/compute_shader.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2011 Sveriges Television AB <info@casparcg.com>
+ *
+ * This file is part of CasparCG (www.casparcg.com).
+ *
+ * CasparCG is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * CasparCG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with CasparCG. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Robert Nagy, ronag89@gmail.com
+ */
+#include "compute_shader.h"
+
+#include <common/gl/gl_check.h>
+
+#include <GL/glew.h>
+
+#include <unordered_map>
+
+namespace caspar { namespace accelerator { namespace ogl {
+
+struct compute_shader::impl
+{
+    GLuint                                 program_;
+    std::unordered_map<std::string, GLint> uniform_locations_;
+    std::unordered_map<std::string, GLint> attrib_locations_;
+
+    impl(const impl&)            = delete;
+    impl& operator=(const impl&) = delete;
+
+  public:
+    impl(const std::string& compute_source_str)
+        : program_(0)
+    {
+        int work_grp_cnt[3];
+
+        glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 0, &work_grp_cnt[0]);
+        glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 1, &work_grp_cnt[1]);
+        glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 2, &work_grp_cnt[2]);
+
+        printf("max global (total) work group counts x:%i y:%i z:%i\n",
+               work_grp_cnt[0], work_grp_cnt[1], work_grp_cnt[2]);
+
+        GLint success;
+
+        const char* compute_source = compute_source_str.c_str();
+
+        auto compute_shader = glCreateShaderObjectARB(GL_COMPUTE_SHADER);
+
+        GL(glShaderSourceARB(compute_shader, 1, &compute_source, NULL));
+        GL(glCompileShaderARB(compute_shader));
+
+        GL(glGetObjectParameterivARB(compute_shader, GL_OBJECT_COMPILE_STATUS_ARB, &success));
+        if (success == GL_FALSE) {
+            char info[2048];
+            GL(glGetInfoLogARB(compute_shader, sizeof(info), 0, info));
+            GL(glDeleteObjectARB(compute_shader));
+            std::stringstream str;
+            str << "Failed to compile compute shader:" << std::endl << info << std::endl;
+            CASPAR_THROW_EXCEPTION(caspar_exception() << msg_info(str.str()));
+        }
+
+        program_ = glCreateProgramObjectARB();
+
+        GL(glAttachObjectARB(program_, compute_shader));
+        GL(glLinkProgramARB(program_));
+
+        GL(glDeleteObjectARB(compute_shader));
+
+        GL(glGetObjectParameterivARB(program_, GL_OBJECT_LINK_STATUS_ARB, &success));
+        if (success == GL_FALSE) {
+            char info[2048];
+            GL(glGetInfoLogARB(program_, sizeof(info), 0, info));
+            GL(glDeleteObjectARB(program_));
+            std::stringstream str;
+            str << "Failed to link shader program:" << std::endl << info << std::endl;
+            CASPAR_THROW_EXCEPTION(caspar_exception() << msg_info(str.str()));
+        }
+        GL(glUseProgramObjectARB(program_));
+    }
+
+    ~impl() { glDeleteProgram(program_); }
+
+    GLint get_uniform_location(const char* name)
+    {
+        auto it = uniform_locations_.find(name);
+        if (it == uniform_locations_.end())
+            it = uniform_locations_.insert(std::make_pair(name, glGetUniformLocation(program_, name))).first;
+        return it->second;
+    }
+
+    GLint get_attrib_location(const char* name)
+    {
+        auto it = attrib_locations_.find(name);
+        if (it == attrib_locations_.end())
+            it = attrib_locations_.insert(std::make_pair(name, glGetAttribLocation(program_, name))).first;
+        return it->second;
+    }
+
+    void set(const std::string& name, bool value) { set(name, value ? 1 : 0); }
+
+    void set(const std::string& name, int value) { GL(glUniform1i(get_uniform_location(name.c_str()), value)); }
+
+    void set(const std::string& name, float value) { GL(glUniform1f(get_uniform_location(name.c_str()), value)); }
+
+    void set(const std::string& name, double value0, double value1)
+    {
+        GL(glUniform2f(get_uniform_location(name.c_str()), static_cast<float>(value0), static_cast<float>(value1)));
+    }
+
+    void set(const std::string& name, double value)
+    {
+        GL(glUniform1f(get_uniform_location(name.c_str()), static_cast<float>(value)));
+    }
+
+    void use() { GL(glUseProgramObjectARB(program_)); }
+};
+
+compute_shader::compute_shader(const std::string& compute_source_str)
+    : impl_(new impl(compute_source_str))
+{
+}
+compute_shader::~compute_shader() {}
+void  compute_shader::set(const std::string& name, bool value) { impl_->set(name, value); }
+void  compute_shader::set(const std::string& name, int value) { impl_->set(name, value); }
+void  compute_shader::set(const std::string& name, float value) { impl_->set(name, value); }
+void  compute_shader::set(const std::string& name, double value0, double value1) { impl_->set(name, value0, value1); }
+void  compute_shader::set(const std::string& name, double value) { impl_->set(name, value); }
+GLint compute_shader::get_attrib_location(const char* name) { return impl_->get_attrib_location(name); }
+int   compute_shader::id() const { return impl_->program_; }
+void  compute_shader::use() const { impl_->use(); }
+
+}}} // namespace caspar::accelerator::ogl
diff --git a/src/accelerator/ogl/util/compute_shader.h b/src/accelerator/ogl/util/compute_shader.h
new file mode 100644
index 0000000000..71aa6bb290
--- /dev/null
+++ b/src/accelerator/ogl/util/compute_shader.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2011 Sveriges Television AB <info@casparcg.com>
+ *
+ * This file is part of CasparCG (www.casparcg.com).
+ *
+ * CasparCG is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * CasparCG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with CasparCG. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Robert Nagy, ronag89@gmail.com
+ */
+
+#pragma once
+
+#include <GL/glew.h>
+#include <memory>
+#include <string>
+#include <type_traits>
+
+namespace caspar { namespace accelerator { namespace ogl {
+
+class compute_shader final
+{
+    compute_shader(const compute_shader&);
+    compute_shader& operator=(const compute_shader&);
+
+  public:
+    compute_shader(const std::string& compute_source_str);
+    ~compute_shader();
+
+    void set(const std::string& name, bool value);
+    void set(const std::string& name, int value);
+    void set(const std::string& name, float value);
+    void set(const std::string& name, double value0, double value1);
+    void set(const std::string& name, double value);
+
+    GLint get_attrib_location(const char* name);
+
+    template <typename E>
+    typename std::enable_if<std::is_enum<E>::value, void>::type set(const std::string& name, E value)
+    {
+        set(name, static_cast<typename std::underlying_type<E>::type>(value));
+    }
+
+    void use() const;
+
+    int id() const;
+
+  private:
+    struct impl;
+    std::unique_ptr<impl> impl_;
+};
+
+}}} // namespace caspar::accelerator::ogl
diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp
index 27586388f2..2ccdb87753 100644
--- a/src/accelerator/ogl/util/device.cpp
+++ b/src/accelerator/ogl/util/device.cpp
@@ -21,9 +21,10 @@
 #include "device.h"
 
 #include "buffer.h"
-#include "shader.h"
+#include "compute_shader.h"
 #include "texture.h"
 
+
 #include <common/array.h>
 #include <common/assert.h>
 #include <common/env.h>
@@ -41,6 +42,7 @@
 
 #include <boost/asio/deadline_timer.hpp>
 #include <boost/asio/dispatch.hpp>
+#include <memory>
 #include <boost/asio/spawn.hpp>
 #include <boost/property_tree/ptree.hpp>
 
@@ -51,6 +53,8 @@
 #include <future>
 #include <thread>
 
+#include "ogl_image_to_rgba.h"
+
 namespace caspar { namespace accelerator { namespace ogl {
 
 using namespace boost::asio;
@@ -69,6 +73,8 @@ struct device::impl : public std::enable_shared_from_this<impl>
 
     sync_queue_t sync_queue_;
 
+    std::unique_ptr<compute_shader> compute_shader_;
+
     GLuint fbo_;
 
     std::wstring version_;
@@ -113,6 +119,8 @@ struct device::impl : public std::enable_shared_from_this<impl>
         GL(glCreateFramebuffers(1, &fbo_));
         GL(glBindFramebuffer(GL_FRAMEBUFFER, fbo_));
 
+        compute_shader_ = std::make_unique<compute_shader>(std::string(compute_to_rgba_shader));
+
         device_.setActive(false);
 
         thread_ = std::thread([&] {
@@ -175,7 +183,7 @@ struct device::impl : public std::enable_shared_from_this<impl>
 
     std::shared_ptr<texture> create_texture(int width, int height, int stride, bool clear)
     {
-        CASPAR_VERIFY(stride > 0 && stride < 5);
+        CASPAR_VERIFY(stride > 0 && stride < 6);
         CASPAR_VERIFY(width > 0 && height > 0);
 
         // TODO (perf) Shared pool.
@@ -221,6 +229,8 @@ struct device::impl : public std::enable_shared_from_this<impl>
         return array<uint8_t>(ptr, buf->size(), buf);
     }
 
+
+
     std::future<std::shared_ptr<texture>>
     copy_async(const array<const uint8_t>& source, int width, int height, int stride)
     {
@@ -283,6 +293,42 @@ struct device::impl : public std::enable_shared_from_this<impl>
         });
     }
 
+    std::future<std::shared_ptr<texture>>
+    convert_frame(const std::vector<array<const uint8_t>>& sources, int width, int height, int format)
+    {
+        return dispatch_async([=] {
+
+            auto tex = create_texture(width, height, 5, false);
+
+            //tex->bind(0);
+            glBindImageTexture(0, tex->id(), 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA32F);
+            compute_shader_->use();
+
+            glDispatchCompute((unsigned int)width, (unsigned int)height, 1);
+
+            // make sure writing to image has finished before read
+//            glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); // TODO - this will probably block the main rendering loop
+            glMemoryBarrier(GL_ALL_BARRIER_BITS);
+
+
+
+            /*
+std::shared_ptr<buffer> buf;
+            auto tmp = source.storage<std::shared_ptr<buffer>>();
+            if (tmp) {
+                buf = *tmp;
+            } else {
+                buf = create_buffer(static_cast<int>(source.size()), true);
+                // TODO (perf) Copy inside a TBB worker.
+                std::memcpy(buf->data(), source.data(), source.size());
+            }
+             */
+
+            // tex->copy_from(*buf);
+            return tex;
+        });
+    }
+
 #ifdef WIN32
     std::future<std::shared_ptr<texture>> copy_async(GLuint source, int width, int height, int stride)
     {
@@ -437,9 +483,11 @@ std::future<array<const uint8_t>> device::copy_async(const std::shared_ptr<textu
 {
     return impl_->copy_async(source);
 }
-//std::future<std::shared_ptr<texture>> device::convert_frame() {
-    // TODO
-//}
+std::future<std::shared_ptr<texture>>
+device::convert_frame(const std::vector<array<const uint8_t>>& sources, int width, int height, int format)
+{
+    return impl_->convert_frame(sources, width, height, format);
+}
 void         device::dispatch(std::function<void()> func) { boost::asio::dispatch(impl_->service_, std::move(func)); }
 std::wstring device::version() const { return impl_->version(); }
 boost::property_tree::wptree device::info() const { return impl_->info(); }
diff --git a/src/accelerator/ogl/util/device.h b/src/accelerator/ogl/util/device.h
index 09492aaa27..49e1a03a90 100644
--- a/src/accelerator/ogl/util/device.h
+++ b/src/accelerator/ogl/util/device.h
@@ -52,7 +52,9 @@ class device final
                                       copy_async(const array<const uint8_t>& source, int width, int height, int stride);
     std::future<array<const uint8_t>> copy_async(const std::shared_ptr<class texture>& source);
 
-//    std::future<std::shared_ptr<texture>> convert_frame(std::vector<array<const std::uint8_t>> image_data);
+    std::future<std::shared_ptr<texture>>
+    convert_frame(const std::vector<array<const uint8_t>>& sources, int width, int height, int format);
+
     template <typename Func>
     auto dispatch_async(Func&& func)
     {
diff --git a/src/accelerator/ogl/util/texture.cpp b/src/accelerator/ogl/util/texture.cpp
index 8682d060ef..26f23b2ce8 100644
--- a/src/accelerator/ogl/util/texture.cpp
+++ b/src/accelerator/ogl/util/texture.cpp
@@ -28,9 +28,9 @@
 
 namespace caspar { namespace accelerator { namespace ogl {
 
-static GLenum FORMAT[]          = {0, GL_RED, GL_RG, GL_BGR, GL_BGRA};
-static GLenum INTERNAL_FORMAT[] = {0, GL_R8, GL_RG8, GL_RGB8, GL_RGBA8};
-static GLenum TYPE[] = {0, GL_UNSIGNED_BYTE, GL_UNSIGNED_BYTE, GL_UNSIGNED_BYTE, GL_UNSIGNED_INT_8_8_8_8_REV};
+static GLenum FORMAT[]          = {0, GL_RED, GL_RG, GL_BGR, GL_BGRA, GL_RGBA};
+static GLenum INTERNAL_FORMAT[] = {0, GL_R8, GL_RG8, GL_RGB8, GL_RGBA8, GL_RGBA32F};
+static GLenum TYPE[] = {0, GL_UNSIGNED_BYTE, GL_UNSIGNED_BYTE, GL_UNSIGNED_BYTE, GL_UNSIGNED_INT_8_8_8_8_REV, GL_UNSIGNED_BYTE};
 
 struct texture::impl
 {
@@ -50,12 +50,22 @@ struct texture::impl
         , stride_(stride)
         , size_(width * height * stride)
     {
+        if (stride == 5) {
+            size_ = width * height * 16;
+        }
+
+
         GL(glCreateTextures(GL_TEXTURE_2D, 1, &id_));
         GL(glTextureParameteri(id_, GL_TEXTURE_MIN_FILTER, GL_LINEAR));
         GL(glTextureParameteri(id_, GL_TEXTURE_MAG_FILTER, GL_LINEAR));
         GL(glTextureParameteri(id_, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE));
         GL(glTextureParameteri(id_, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE));
         GL(glTextureStorage2D(id_, 1, INTERNAL_FORMAT[stride_], width_, height_));
+
+
+        //bind();
+        //GL(glTexImage2D(GL_TEXTURE_2D, 0, INTERNAL_FORMAT[stride_], width, height, 0, FORMAT[stride_], GL_FLOAT, nullptr)); // HACK
+        //unbind();
     }
 
     ~impl() { glDeleteTextures(1, &id_); }
@@ -131,7 +141,7 @@ void texture::copy_to(buffer& dest) { impl_->copy_to(dest); }
 int  texture::width() const { return impl_->width_; }
 int  texture::height() const { return impl_->height_; }
 int  texture::stride() const { return impl_->stride_; }
-int  texture::size() const { return impl_->width_ * impl_->height_ * impl_->stride_; }
+int  texture::size() const { return impl_->size_; }
 int  texture::id() const { return impl_->id_; }
 
 }}} // namespace caspar::accelerator::ogl
diff --git a/src/modules/ffmpeg/producer/av_producer.cpp b/src/modules/ffmpeg/producer/av_producer.cpp
index b1d66dc124..749d1aa56b 100644
--- a/src/modules/ffmpeg/producer/av_producer.cpp
+++ b/src/modules/ffmpeg/producer/av_producer.cpp
@@ -602,7 +602,7 @@ struct AVProducer::Impl
 
     spl::shared_ptr<diagnostics::graph> graph_;
 
-    const std::shared_ptr<core::frame_converter> frame_factory_;
+    const std::shared_ptr<core::frame_factory> frame_factory_;
     const core::video_format_desc              format_desc_;
     const AVRational                           format_tb_;
     const std::string                          name_;
@@ -655,7 +655,7 @@ struct AVProducer::Impl
          boost::optional<int64_t>             duration,
          bool                                 loop,
          int                                  seekable)
-        : frame_factory_(frame_factory->create_frame_converter())
+        : frame_factory_(frame_factory)
         , format_desc_(format_desc)
         , format_tb_({format_desc.duration, format_desc.time_scale * format_desc.field_count})
         , name_(name)
@@ -871,7 +871,7 @@ struct AVProducer::Impl
                 frame.duration   = av_rescale_q(frame.audio->nb_samples, {1, sr}, TIME_BASE_Q);
             }
 
-            frame.frame       = core::draw_frame(make_frame2(this, frame_factory_, frame.video, frame.audio));
+            frame.frame       = core::draw_frame(make_frame(this, *frame_factory_, frame.video, frame.audio));
             frame.frame_count = frame_count_++;
 
             graph_->set_value("decode-time", decode_timer.elapsed() * format_desc_.fps * 0.5);
diff --git a/src/modules/ffmpeg/util/av_util.cpp b/src/modules/ffmpeg/util/av_util.cpp
index c2f2c2ae3a..b71e42ad86 100644
--- a/src/modules/ffmpeg/util/av_util.cpp
+++ b/src/modules/ffmpeg/util/av_util.cpp
@@ -178,7 +178,9 @@ core::pixel_format_desc pixel_format_desc(AVPixelFormat pix_fmt, int width, int
             return desc;
         }
         case core::pixel_format::ycbcr:
-        case core::pixel_format::ycbcra: {
+        case core::pixel_format::ycbcra:
+        case core::pixel_format::ycbcr10:
+        case core::pixel_format::ycbcra10:{
             // Find chroma height
             // av_image_fill_plane_sizes is not available until ffmpeg 4.4, but we still need to support ffmpeg 4.2, so
             // we fall back to calling av_image_fill_pointers with a NULL image buffer. We can't unconditionally use
@@ -201,7 +203,7 @@ core::pixel_format_desc pixel_format_desc(AVPixelFormat pix_fmt, int width, int
             desc.planes.push_back(core::pixel_format_desc::plane(linesizes[1], h2, 1));
             desc.planes.push_back(core::pixel_format_desc::plane(linesizes[2], h2, 1));
 
-            if (desc.format == core::pixel_format::ycbcra)
+            if (desc.format == core::pixel_format::ycbcra||desc.format == core::pixel_format::ycbcra10)
                 desc.planes.push_back(core::pixel_format_desc::plane(linesizes[3], height, 1));
 
             return desc;
diff --git a/src/shell/casparcg.config b/src/shell/casparcg.config
index 3eec0e9d8e..1369e84791 100644
--- a/src/shell/casparcg.config
+++ b/src/shell/casparcg.config
@@ -15,6 +15,9 @@
                 <screen />
                 <system-audio />
             </consumers>
+            <producers>
+            <producer id="10">test</producer>
+            </producers>
         </channel>
     </channels>
     <controllers>

From 90a6ecd6b94a41dc03003729f5a84123ad9571c1 Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Mon, 4 Dec 2023 22:36:50 +0000
Subject: [PATCH 07/50] nope

---
 src/accelerator/ogl/util/compute_shader.cpp | 18 +++++++++---------
 src/accelerator/ogl/util/device.cpp         | 12 +++++++++---
 src/accelerator/ogl/util/texture.cpp        | 11 +++++++++++
 3 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/src/accelerator/ogl/util/compute_shader.cpp b/src/accelerator/ogl/util/compute_shader.cpp
index 612bf8b922..26aba484df 100644
--- a/src/accelerator/ogl/util/compute_shader.cpp
+++ b/src/accelerator/ogl/util/compute_shader.cpp
@@ -54,10 +54,10 @@ struct compute_shader::impl
 
         const char* compute_source = compute_source_str.c_str();
 
-        auto compute_shader = glCreateShaderObjectARB(GL_COMPUTE_SHADER);
+        auto compute_shader = glCreateShader(GL_COMPUTE_SHADER);
 
-        GL(glShaderSourceARB(compute_shader, 1, &compute_source, NULL));
-        GL(glCompileShaderARB(compute_shader));
+        GL(glShaderSource(compute_shader, 1, &compute_source, NULL));
+        GL(glCompileShader(compute_shader));
 
         GL(glGetObjectParameterivARB(compute_shader, GL_OBJECT_COMPILE_STATUS_ARB, &success));
         if (success == GL_FALSE) {
@@ -69,12 +69,12 @@ struct compute_shader::impl
             CASPAR_THROW_EXCEPTION(caspar_exception() << msg_info(str.str()));
         }
 
-        program_ = glCreateProgramObjectARB();
+        program_ = glCreateProgram();
 
-        GL(glAttachObjectARB(program_, compute_shader));
-        GL(glLinkProgramARB(program_));
+        GL(glAttachShader(program_, compute_shader));
+        GL(glLinkProgram(program_));
 
-        GL(glDeleteObjectARB(compute_shader));
+        GL(glDeleteShader(compute_shader));
 
         GL(glGetObjectParameterivARB(program_, GL_OBJECT_LINK_STATUS_ARB, &success));
         if (success == GL_FALSE) {
@@ -85,7 +85,7 @@ struct compute_shader::impl
             str << "Failed to link shader program:" << std::endl << info << std::endl;
             CASPAR_THROW_EXCEPTION(caspar_exception() << msg_info(str.str()));
         }
-        GL(glUseProgramObjectARB(program_));
+        GL(glUseProgram(program_));
     }
 
     ~impl() { glDeleteProgram(program_); }
@@ -122,7 +122,7 @@ struct compute_shader::impl
         GL(glUniform1f(get_uniform_location(name.c_str()), static_cast<float>(value)));
     }
 
-    void use() { GL(glUseProgramObjectARB(program_)); }
+    void use() { GL(glUseProgram(program_)); }
 };
 
 compute_shader::compute_shader(const std::string& compute_source_str)
diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp
index 2ccdb87753..34e66b85fe 100644
--- a/src/accelerator/ogl/util/device.cpp
+++ b/src/accelerator/ogl/util/device.cpp
@@ -119,7 +119,6 @@ struct device::impl : public std::enable_shared_from_this<impl>
         GL(glCreateFramebuffers(1, &fbo_));
         GL(glBindFramebuffer(GL_FRAMEBUFFER, fbo_));
 
-        compute_shader_ = std::make_unique<compute_shader>(std::string(compute_to_rgba_shader));
 
         device_.setActive(false);
 
@@ -298,10 +297,14 @@ struct device::impl : public std::enable_shared_from_this<impl>
     {
         return dispatch_async([=] {
 
+            if (!compute_shader_)
+            compute_shader_ = std::make_unique<compute_shader>(std::string(compute_to_rgba_shader));
+
             auto tex = create_texture(width, height, 5, false);
 
-            //tex->bind(0);
-            glBindImageTexture(0, tex->id(), 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA32F);
+            tex->bind(0);
+            //compute_shader_->use();
+            glBindImageTexture(0, tex->id(), 0, GL_FALSE, 0, GL_READ_WRITE, GL_RGBA32F);
             compute_shader_->use();
 
             glDispatchCompute((unsigned int)width, (unsigned int)height, 1);
@@ -310,6 +313,9 @@ struct device::impl : public std::enable_shared_from_this<impl>
 //            glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); // TODO - this will probably block the main rendering loop
             glMemoryBarrier(GL_ALL_BARRIER_BITS);
 
+            glFlush();
+
+                        std::this_thread::sleep_for(std::chrono::milliseconds(5));
 
 
             /*
diff --git a/src/accelerator/ogl/util/texture.cpp b/src/accelerator/ogl/util/texture.cpp
index 26f23b2ce8..ffb8eae779 100644
--- a/src/accelerator/ogl/util/texture.cpp
+++ b/src/accelerator/ogl/util/texture.cpp
@@ -54,6 +54,7 @@ struct texture::impl
             size_ = width * height * 16;
         }
 
+        /*
 
         GL(glCreateTextures(GL_TEXTURE_2D, 1, &id_));
         GL(glTextureParameteri(id_, GL_TEXTURE_MIN_FILTER, GL_LINEAR));
@@ -61,6 +62,16 @@ struct texture::impl
         GL(glTextureParameteri(id_, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE));
         GL(glTextureParameteri(id_, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE));
         GL(glTextureStorage2D(id_, 1, INTERNAL_FORMAT[stride_], width_, height_));
+        */
+
+        glGenTextures(1, &id_);
+        glActiveTexture(GL_TEXTURE0);
+        glBindTexture(GL_TEXTURE_2D, id_);
+        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+        glTexImage2D(GL_TEXTURE_2D, 0, INTERNAL_FORMAT[stride_], width, height, 0, FORMAT[stride_], GL_FLOAT, NULL);
 
 
         //bind();

From efd99f6d266c53fff4264a2812cb2d955ff7a1ef Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Fri, 22 Dec 2023 15:42:37 +0000
Subject: [PATCH 08/50] wip: something happens!

---
 src/accelerator/ogl/image/image_mixer.cpp     | 10 +++++++-
 src/accelerator/ogl/image/shader.frag         |  2 ++
 src/accelerator/ogl/image/shader_to_rgba.comp |  2 +-
 src/accelerator/ogl/util/device.cpp           | 23 +------------------
 src/accelerator/ogl/util/texture.cpp          | 17 --------------
 5 files changed, 13 insertions(+), 41 deletions(-)

diff --git a/src/accelerator/ogl/image/image_mixer.cpp b/src/accelerator/ogl/image/image_mixer.cpp
index 36c1b7bd51..3e35e0b781 100644
--- a/src/accelerator/ogl/image/image_mixer.cpp
+++ b/src/accelerator/ogl/image/image_mixer.cpp
@@ -317,7 +317,15 @@ struct image_mixer::impl
 */
 
         textures.emplace_back(ogl_->convert_frame(
-            image_data, plane0.width, plane0.height, 1)); // TODO - what is this 'format' parameter?
+                image_data, plane0.width, plane0.height, 1)); // TODO - what is this 'format' parameter?
+//        textures.emplace_back(make_ready_future(t.get()));
+
+/*
+        for (int n = 0; n < static_cast<int>(desc.planes.size()); ++n) {
+            textures.emplace_back(ogl_->copy_async(
+                    image_data[n], desc.planes[n].width, desc.planes[n].height, desc.planes[n].stride));
+        }
+*/
 
         return textures;
     }
diff --git a/src/accelerator/ogl/image/shader.frag b/src/accelerator/ogl/image/shader.frag
index 93d31d13f2..0e889aed8d 100644
--- a/src/accelerator/ogl/image/shader.frag
+++ b/src/accelerator/ogl/image/shader.frag
@@ -480,6 +480,8 @@ vec4 get_rgba_color()
     case 1:		//bgra,
         return get_sample(plane[0], TexCoord.st / TexCoord.q).bgra;
     case 2:		//rgba,
+    case 11: //ycbcr10,
+    case 12: //ycbcra10,
         return get_sample(plane[0], TexCoord.st / TexCoord.q).rgba;
     case 3:		//argb,
         return get_sample(plane[0], TexCoord.st / TexCoord.q).argb;
diff --git a/src/accelerator/ogl/image/shader_to_rgba.comp b/src/accelerator/ogl/image/shader_to_rgba.comp
index abc7adc03c..5b0a89a569 100644
--- a/src/accelerator/ogl/image/shader_to_rgba.comp
+++ b/src/accelerator/ogl/image/shader_to_rgba.comp
@@ -5,7 +5,7 @@ layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
 layout(rgba32f, binding = 0) uniform image2D imgOutput; // Note: this needs to match what it is writing to
 
 void main() {
-    vec4 value = vec4(0.0, 0.0, 0.0, 1.0);
+    vec4 value = vec4(0.0, 0.0, 1.0, 1.0);
     ivec2 texelCoord = ivec2(gl_GlobalInvocationID.xy);
 
     value.x = float(texelCoord.x)/(gl_NumWorkGroups.x);
diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp
index 34e66b85fe..7b2d120728 100644
--- a/src/accelerator/ogl/util/device.cpp
+++ b/src/accelerator/ogl/util/device.cpp
@@ -302,35 +302,14 @@ struct device::impl : public std::enable_shared_from_this<impl>
 
             auto tex = create_texture(width, height, 5, false);
 
-            tex->bind(0);
-            //compute_shader_->use();
             glBindImageTexture(0, tex->id(), 0, GL_FALSE, 0, GL_READ_WRITE, GL_RGBA32F);
             compute_shader_->use();
 
             glDispatchCompute((unsigned int)width, (unsigned int)height, 1);
 
             // make sure writing to image has finished before read
-//            glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); // TODO - this will probably block the main rendering loop
-            glMemoryBarrier(GL_ALL_BARRIER_BITS);
+            glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
 
-            glFlush();
-
-                        std::this_thread::sleep_for(std::chrono::milliseconds(5));
-
-
-            /*
-std::shared_ptr<buffer> buf;
-            auto tmp = source.storage<std::shared_ptr<buffer>>();
-            if (tmp) {
-                buf = *tmp;
-            } else {
-                buf = create_buffer(static_cast<int>(source.size()), true);
-                // TODO (perf) Copy inside a TBB worker.
-                std::memcpy(buf->data(), source.data(), source.size());
-            }
-             */
-
-            // tex->copy_from(*buf);
             return tex;
         });
     }
diff --git a/src/accelerator/ogl/util/texture.cpp b/src/accelerator/ogl/util/texture.cpp
index ffb8eae779..dedf4374f3 100644
--- a/src/accelerator/ogl/util/texture.cpp
+++ b/src/accelerator/ogl/util/texture.cpp
@@ -54,29 +54,12 @@ struct texture::impl
             size_ = width * height * 16;
         }
 
-        /*
-
         GL(glCreateTextures(GL_TEXTURE_2D, 1, &id_));
         GL(glTextureParameteri(id_, GL_TEXTURE_MIN_FILTER, GL_LINEAR));
         GL(glTextureParameteri(id_, GL_TEXTURE_MAG_FILTER, GL_LINEAR));
         GL(glTextureParameteri(id_, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE));
         GL(glTextureParameteri(id_, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE));
         GL(glTextureStorage2D(id_, 1, INTERNAL_FORMAT[stride_], width_, height_));
-        */
-
-        glGenTextures(1, &id_);
-        glActiveTexture(GL_TEXTURE0);
-        glBindTexture(GL_TEXTURE_2D, id_);
-        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
-        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
-        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
-        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
-        glTexImage2D(GL_TEXTURE_2D, 0, INTERNAL_FORMAT[stride_], width, height, 0, FORMAT[stride_], GL_FLOAT, NULL);
-
-
-        //bind();
-        //GL(glTexImage2D(GL_TEXTURE_2D, 0, INTERNAL_FORMAT[stride_], width, height, 0, FORMAT[stride_], GL_FLOAT, nullptr)); // HACK
-        //unbind();
     }
 
     ~impl() { glDeleteTextures(1, &id_); }

From 467062e919824df0f0982f599242c621f46684e8 Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Fri, 22 Dec 2023 16:31:47 +0000
Subject: [PATCH 09/50] hack a mess

---
 src/accelerator/ogl/image/image_mixer.cpp     | 35 +++++---------
 src/accelerator/ogl/image/shader.frag         | 19 ++++++--
 src/accelerator/ogl/image/shader_to_rgba.comp |  6 +--
 src/accelerator/ogl/util/device.cpp           |  7 +--
 src/accelerator/ogl/util/texture.cpp          |  9 ++--
 src/core/frame/pixel_format.h                 |  8 +++-
 src/modules/ffmpeg/util/av_util.cpp           | 47 ++++++++++++++++---
 7 files changed, 89 insertions(+), 42 deletions(-)

diff --git a/src/accelerator/ogl/image/image_mixer.cpp b/src/accelerator/ogl/image/image_mixer.cpp
index 3e35e0b781..7f5683025a 100644
--- a/src/accelerator/ogl/image/image_mixer.cpp
+++ b/src/accelerator/ogl/image/image_mixer.cpp
@@ -307,25 +307,12 @@ struct image_mixer::impl
     {
         const auto& plane0 = desc.planes[0]; // TODO - this doesnt feel safe, or accurate
 
-        std::vector<future_texture> textures;
-        /*
-        const auto texture = ogl_->create_texture(plane0.width, plane0.height,
-                                                                   4); // TODO - don't clear
+        // TODO - desc is no longer 'correct' and should probably be changed to avoid the mixer shader being aware of these formats
 
-        // TODO - how to run and link shader?
-        textures.emplace_back(make_ready_future(texture));
-*/
+        std::vector<future_texture> textures;
 
         textures.emplace_back(ogl_->convert_frame(
-                image_data, plane0.width, plane0.height, 1)); // TODO - what is this 'format' parameter?
-//        textures.emplace_back(make_ready_future(t.get()));
-
-/*
-        for (int n = 0; n < static_cast<int>(desc.planes.size()); ++n) {
-            textures.emplace_back(ogl_->copy_async(
-                    image_data[n], desc.planes[n].width, desc.planes[n].height, desc.planes[n].stride));
-        }
-*/
+                image_data, plane0.width, plane0.height, plane0.width / 2)); // TODO - what is this 'format' parameter?
 
         return textures;
     }
@@ -350,12 +337,16 @@ struct image_mixer::impl
                 }
 
                 switch (desc.format) {
-                    case core::pixel_format::ycbcr10:
-                    case core::pixel_format::ycbcra10: {
-                        std::vector<future_texture> textures = self->convert_frame(image_data, desc);
-
-                        return std::make_shared<decltype(textures)>(std::move(textures));
-                    }
+//                    case core::pixel_format::ycbcr10_420:
+//                    case core::pixel_format::ycbcr10_422:
+//                    case core::pixel_format::ycbcr10_444:
+//                    case core::pixel_format::ycbcra10_420:
+//                    case core::pixel_format::ycbcra10_422:
+//                    case core::pixel_format::ycbcra10_444: {
+//                        std::vector<future_texture> textures = self->convert_frame(image_data, desc);
+//
+//                        return std::make_shared<decltype(textures)>(std::move(textures));
+//                    }
                     default: {
                         std::vector<future_texture> textures;
                         for (int n = 0; n < static_cast<int>(desc.planes.size()); ++n) {
diff --git a/src/accelerator/ogl/image/shader.frag b/src/accelerator/ogl/image/shader.frag
index 0e889aed8d..baeaaae34c 100644
--- a/src/accelerator/ogl/image/shader.frag
+++ b/src/accelerator/ogl/image/shader.frag
@@ -479,15 +479,16 @@ vec4 get_rgba_color()
         return vec4(get_sample(plane[0], TexCoord.st / TexCoord.q).rrr, 1.0);
     case 1:		//bgra,
         return get_sample(plane[0], TexCoord.st / TexCoord.q).bgra;
-    case 2:		//rgba,
-    case 11: //ycbcr10,
-    case 12: //ycbcra10,
+    case 2:		//rgba
         return get_sample(plane[0], TexCoord.st / TexCoord.q).rgba;
     case 3:		//argb,
         return get_sample(plane[0], TexCoord.st / TexCoord.q).argb;
     case 4:		//abgr,
         return get_sample(plane[0], TexCoord.st / TexCoord.q).gbar;
     case 5:		//ycbcr,
+        case 11:    //ycbcr10_420
+        case 12:    //ycbcr10_422
+        case 13:    //ycbcr10_444
         {
             float y  = get_sample(plane[0], TexCoord.st / TexCoord.q).r;
             float cb = get_sample(plane[1], TexCoord.st / TexCoord.q).r;
@@ -495,6 +496,9 @@ vec4 get_rgba_color()
             return ycbcra_to_rgba(y, cb, cr, 1.0);
         }
     case 6:		//ycbcra
+        case 14:    //ycbcra10_420
+        case 15:    //ycbcra10_422
+        case 16:    //ycbcra10_444
         {
             float y  = get_sample(plane[0], TexCoord.st / TexCoord.q).r;
             float cb = get_sample(plane[1], TexCoord.st / TexCoord.q).r;
@@ -518,6 +522,15 @@ vec4 get_rgba_color()
 			float cr = get_sample(plane[1], TexCoord.st / TexCoord.q).r;			
 			return ycbcra_to_rgba(y, cb, cr, 1.0);
 		}
+    // formats converted from packed formats
+//    case 11:    //ycbcr10_420
+//    case 12:    //ycbcr10_422
+//    case 13:    //ycbcr10_444
+//        return vec4(get_sample(plane[0], TexCoord.st / TexCoord.q).rgb, 1.0);
+//    case 14:    //ycbcra10_420
+//    case 15:    //ycbcra10_422
+//    case 16:    //ycbcra10_444
+//        return get_sample(plane[0], TexCoord.st / TexCoord.q).rgba;
     }
     return vec4(0.0, 0.0, 0.0, 0.0);
 }
diff --git a/src/accelerator/ogl/image/shader_to_rgba.comp b/src/accelerator/ogl/image/shader_to_rgba.comp
index 5b0a89a569..1964edadbf 100644
--- a/src/accelerator/ogl/image/shader_to_rgba.comp
+++ b/src/accelerator/ogl/image/shader_to_rgba.comp
@@ -5,11 +5,11 @@ layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
 layout(rgba32f, binding = 0) uniform image2D imgOutput; // Note: this needs to match what it is writing to
 
 void main() {
-    vec4 value = vec4(0.0, 0.0, 1.0, 1.0);
+    vec4 value = vec4(0.0, 0.0, 0.0, 1.0);
     ivec2 texelCoord = ivec2(gl_GlobalInvocationID.xy);
 
-    value.x = float(texelCoord.x)/(gl_NumWorkGroups.x);
-    value.y = float(texelCoord.y)/(gl_NumWorkGroups.y);
+    value.r = float(texelCoord.x)/(gl_NumWorkGroups.x);
+    value.g = float(texelCoord.y)/(gl_NumWorkGroups.y);
 
     imageStore(imgOutput, texelCoord, value);
 }
\ No newline at end of file
diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp
index 7b2d120728..9030edf635 100644
--- a/src/accelerator/ogl/util/device.cpp
+++ b/src/accelerator/ogl/util/device.cpp
@@ -182,7 +182,7 @@ struct device::impl : public std::enable_shared_from_this<impl>
 
     std::shared_ptr<texture> create_texture(int width, int height, int stride, bool clear)
     {
-        CASPAR_VERIFY(stride > 0 && stride < 6);
+        CASPAR_VERIFY(stride > 0 && stride < 7);
         CASPAR_VERIFY(width > 0 && height > 0);
 
         // TODO (perf) Shared pool.
@@ -293,7 +293,7 @@ struct device::impl : public std::enable_shared_from_this<impl>
     }
 
     std::future<std::shared_ptr<texture>>
-    convert_frame(const std::vector<array<const uint8_t>>& sources, int width, int height, int format)
+    convert_frame(const std::vector<array<const uint8_t>>& sources, int width, int height, int width_samples)
     {
         return dispatch_async([=] {
 
@@ -303,9 +303,10 @@ struct device::impl : public std::enable_shared_from_this<impl>
             auto tex = create_texture(width, height, 5, false);
 
             glBindImageTexture(0, tex->id(), 0, GL_FALSE, 0, GL_READ_WRITE, GL_RGBA32F);
+
             compute_shader_->use();
 
-            glDispatchCompute((unsigned int)width, (unsigned int)height, 1);
+            glDispatchCompute((unsigned int)width_samples, (unsigned int)height, 1);
 
             // make sure writing to image has finished before read
             glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
diff --git a/src/accelerator/ogl/util/texture.cpp b/src/accelerator/ogl/util/texture.cpp
index dedf4374f3..262b593396 100644
--- a/src/accelerator/ogl/util/texture.cpp
+++ b/src/accelerator/ogl/util/texture.cpp
@@ -28,9 +28,9 @@
 
 namespace caspar { namespace accelerator { namespace ogl {
 
-static GLenum FORMAT[]          = {0, GL_RED, GL_RG, GL_BGR, GL_BGRA, GL_RGBA};
-static GLenum INTERNAL_FORMAT[] = {0, GL_R8, GL_RG8, GL_RGB8, GL_RGBA8, GL_RGBA32F};
-static GLenum TYPE[] = {0, GL_UNSIGNED_BYTE, GL_UNSIGNED_BYTE, GL_UNSIGNED_BYTE, GL_UNSIGNED_INT_8_8_8_8_REV, GL_UNSIGNED_BYTE};
+static GLenum FORMAT[]          = {0, GL_RED, GL_RG, GL_BGR, GL_BGRA, GL_RGBA, GL_RED};
+static GLenum INTERNAL_FORMAT[] = {0, GL_R8, GL_RG8, GL_RGB8, GL_RGBA8, GL_RGBA32F, GL_R16F};
+static GLenum TYPE[] = {0, GL_UNSIGNED_BYTE, GL_UNSIGNED_BYTE, GL_UNSIGNED_BYTE, GL_UNSIGNED_INT_8_8_8_8_REV, GL_UNSIGNED_BYTE, GL_UNSIGNED_BYTE};
 
 struct texture::impl
 {
@@ -52,6 +52,9 @@ struct texture::impl
     {
         if (stride == 5) {
             size_ = width * height * 16;
+        }else
+        if (stride == 6) {
+            size_ = width * height * 2;
         }
 
         GL(glCreateTextures(GL_TEXTURE_2D, 1, &id_));
diff --git a/src/core/frame/pixel_format.h b/src/core/frame/pixel_format.h
index efe45e071d..95ac4619c6 100644
--- a/src/core/frame/pixel_format.h
+++ b/src/core/frame/pixel_format.h
@@ -38,8 +38,12 @@ enum class pixel_format
     bgr,
     rgb,
     uyvy,
-    ycbcr10,
-    ycbcra10,
+    ycbcr10_420,
+    ycbcr10_422,
+    ycbcr10_444,
+    ycbcra10_420,
+    ycbcra10_422,
+    ycbcra10_444,
     count,
     invalid,
 };
diff --git a/src/modules/ffmpeg/util/av_util.cpp b/src/modules/ffmpeg/util/av_util.cpp
index b71e42ad86..4e0a996e03 100644
--- a/src/modules/ffmpeg/util/av_util.cpp
+++ b/src/modules/ffmpeg/util/av_util.cpp
@@ -139,13 +139,17 @@ core::pixel_format get_pixel_format(AVPixelFormat pix_fmt)
         case AV_PIX_FMT_UYVY422:
             return core::pixel_format::uyvy;
         case AV_PIX_FMT_YUV444P10LE:
+            return core::pixel_format::ycbcr10_444;
         case AV_PIX_FMT_YUV422P10LE:
+            return core::pixel_format::ycbcr10_422;
         case AV_PIX_FMT_YUV420P10LE:
-            return core::pixel_format::ycbcr10;
+            return core::pixel_format::ycbcr10_420;
         case AV_PIX_FMT_YUVA444P10LE:
+            return core::pixel_format::ycbcra10_444;
         case AV_PIX_FMT_YUVA422P10LE:
+            return core::pixel_format::ycbcra10_422;
         case AV_PIX_FMT_YUVA420P10LE:
-            return core::pixel_format::ycbcra10;
+            return core::pixel_format::ycbcra10_420;
         default:
             return core::pixel_format::invalid;
     }
@@ -178,9 +182,7 @@ core::pixel_format_desc pixel_format_desc(AVPixelFormat pix_fmt, int width, int
             return desc;
         }
         case core::pixel_format::ycbcr:
-        case core::pixel_format::ycbcra:
-        case core::pixel_format::ycbcr10:
-        case core::pixel_format::ycbcra10:{
+        case core::pixel_format::ycbcra:{
             // Find chroma height
             // av_image_fill_plane_sizes is not available until ffmpeg 4.4, but we still need to support ffmpeg 4.2, so
             // we fall back to calling av_image_fill_pointers with a NULL image buffer. We can't unconditionally use
@@ -203,11 +205,44 @@ core::pixel_format_desc pixel_format_desc(AVPixelFormat pix_fmt, int width, int
             desc.planes.push_back(core::pixel_format_desc::plane(linesizes[1], h2, 1));
             desc.planes.push_back(core::pixel_format_desc::plane(linesizes[2], h2, 1));
 
-            if (desc.format == core::pixel_format::ycbcra||desc.format == core::pixel_format::ycbcra10)
+            if (desc.format == core::pixel_format::ycbcra)
                 desc.planes.push_back(core::pixel_format_desc::plane(linesizes[3], height, 1));
 
             return desc;
         }
+        case core::pixel_format::ycbcr10_420:
+        case core::pixel_format::ycbcr10_422:
+        case core::pixel_format::ycbcr10_444:
+        case core::pixel_format::ycbcra10_420:
+        case core::pixel_format::ycbcra10_422:
+        case core::pixel_format::ycbcra10_444: {
+            // Find chroma height
+            // av_image_fill_plane_sizes is not available until ffmpeg 4.4, but we still need to support ffmpeg 4.2, so
+            // we fall back to calling av_image_fill_pointers with a NULL image buffer. We can't unconditionally use
+            // av_image_fill_pointers because it will not accept a NULL buffer on ffmpeg >= 5.0.
+#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(56, 56, 100)
+            size_t    sizes[4];
+            ptrdiff_t linesizes1[4];
+            for (int i = 0; i < 4; i++)
+                linesizes1[i] = linesizes[i];
+            av_image_fill_plane_sizes(sizes, pix_fmt, height, linesizes1);
+            auto size2 = static_cast<int>(sizes[1]);
+#else
+            uint8_t* dummy_pict_data[4];
+            av_image_fill_pointers(dummy_pict_data, pix_fmt, height, NULL, linesizes);
+            auto size2 = static_cast<int>(dummy_pict_data[2] - dummy_pict_data[1]);
+#endif
+            auto h2 = size2 / linesizes[1];
+
+            desc.planes.push_back(core::pixel_format_desc::plane(linesizes[0] / 2, height, 6));
+//            desc.planes.push_back(core::pixel_format_desc::plane(linesizes[1] / 2, h2, 6));
+//            desc.planes.push_back(core::pixel_format_desc::plane(linesizes[2] / 2, h2, 6));
+//
+//            if (desc.format == core::pixel_format::ycbcra10_420||desc.format == core::pixel_format::ycbcra10_422||desc.format == core::pixel_format::ycbcra10_444)
+//                desc.planes.push_back(core::pixel_format_desc::plane(linesizes[3] / 2, height, 6));
+
+            return desc;
+        }
         case core::pixel_format::uyvy: {
             desc.planes.push_back(core::pixel_format_desc::plane(linesizes[0] / 2, height, 2));
             desc.planes.push_back(core::pixel_format_desc::plane(linesizes[0] / 4, height, 4));

From 1f07c7f8e286f2d0958bb3415f096cdd59580179 Mon Sep 17 00:00:00 2001
From: Niklas Andersson <3985238+niklaspandersson@users.noreply.github.com>
Date: Fri, 15 Dec 2023 09:07:11 +0000
Subject: [PATCH 10/50] Add 16bit support to ogl texture

---
 src/accelerator/ogl/util/texture.cpp | 38 ++++++++++++++++------------
 src/accelerator/ogl/util/texture.h   | 14 +++++-----
 src/common/bit_depth.h               | 13 ++++++++++
 3 files changed, 43 insertions(+), 22 deletions(-)
 create mode 100644 src/common/bit_depth.h

diff --git a/src/accelerator/ogl/util/texture.cpp b/src/accelerator/ogl/util/texture.cpp
index 262b593396..478fc9b4f3 100644
--- a/src/accelerator/ogl/util/texture.cpp
+++ b/src/accelerator/ogl/util/texture.cpp
@@ -22,33 +22,37 @@
 
 #include "buffer.h"
 
+#include <common/bit_depth.h>
 #include <common/gl/gl_check.h>
 
 #include <GL/glew.h>
 
 namespace caspar { namespace accelerator { namespace ogl {
 
-static GLenum FORMAT[]          = {0, GL_RED, GL_RG, GL_BGR, GL_BGRA, GL_RGBA, GL_RED};
-static GLenum INTERNAL_FORMAT[] = {0, GL_R8, GL_RG8, GL_RGB8, GL_RGBA8, GL_RGBA32F, GL_R16F};
-static GLenum TYPE[] = {0, GL_UNSIGNED_BYTE, GL_UNSIGNED_BYTE, GL_UNSIGNED_BYTE, GL_UNSIGNED_INT_8_8_8_8_REV, GL_UNSIGNED_BYTE, GL_UNSIGNED_BYTE};
+static GLenum FORMAT[]             = {0, GL_RED, GL_RG, GL_BGR, GL_BGRA};
+static GLenum INTERNAL_FORMAT[][5] = {{0, GL_R8, GL_RG8, GL_RGB8, GL_RGBA8}, {0, GL_R16, GL_RG16, GL_RGB16, GL_RGBA16}};
+static GLenum TYPE[][5] = {{0, GL_UNSIGNED_BYTE, GL_UNSIGNED_BYTE, GL_UNSIGNED_BYTE, GL_UNSIGNED_INT_8_8_8_8_REV},
+                           {0, GL_UNSIGNED_SHORT, GL_UNSIGNED_SHORT, GL_UNSIGNED_SHORT, GL_UNSIGNED_SHORT}};
 
 struct texture::impl
 {
-    GLuint  id_     = 0;
-    GLsizei width_  = 0;
-    GLsizei height_ = 0;
-    GLsizei stride_ = 0;
-    GLsizei size_   = 0;
+    GLuint            id_     = 0;
+    GLsizei           width_  = 0;
+    GLsizei           height_ = 0;
+    GLsizei           stride_ = 0;
+    GLsizei           size_   = 0;
+    common::bit_depth depth_;
 
     impl(const impl&)            = delete;
     impl& operator=(const impl&) = delete;
 
   public:
-    impl(int width, int height, int stride)
+    impl(int width, int height, int stride, common::bit_depth depth)
         : width_(width)
         , height_(height)
         , stride_(stride)
-        , size_(width * height * stride)
+        , depth_(depth)
+        , size_(width * height * stride * (1 + static_cast<int>(depth)))
     {
         if (stride == 5) {
             size_ = width * height * 16;
@@ -62,7 +66,7 @@ struct texture::impl
         GL(glTextureParameteri(id_, GL_TEXTURE_MAG_FILTER, GL_LINEAR));
         GL(glTextureParameteri(id_, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE));
         GL(glTextureParameteri(id_, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE));
-        GL(glTextureStorage2D(id_, 1, INTERNAL_FORMAT[stride_], width_, height_));
+        GL(glTextureStorage2D(id_, 1, INTERNAL_FORMAT[static_cast<int>(depth)][stride_], width_, height_));
     }
 
     ~impl() { glDeleteTextures(1, &id_); }
@@ -79,7 +83,7 @@ struct texture::impl
 
     void attach() { GL(glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + 0, GL_TEXTURE_2D, id_, 0)); }
 
-    void clear() { GL(glClearTexImage(id_, 0, FORMAT[stride_], TYPE[stride_], nullptr)); }
+    void clear() { GL(glClearTexImage(id_, 0, FORMAT[stride_], TYPE[static_cast<int>(depth_)][stride_], nullptr)); }
 
 #ifdef WIN32
     void copy_from(int texture_id)
@@ -99,7 +103,8 @@ struct texture::impl
             glPixelStorei(GL_UNPACK_ALIGNMENT, 4);
         }
 
-        GL(glTextureSubImage2D(id_, 0, 0, 0, width_, height_, FORMAT[stride_], TYPE[stride_], nullptr));
+        GL(glTextureSubImage2D(
+            id_, 0, 0, 0, width_, height_, FORMAT[stride_], TYPE[static_cast<int>(depth_)][stride_], nullptr));
 
         src.unbind();
     }
@@ -107,13 +112,13 @@ struct texture::impl
     void copy_to(buffer& dst)
     {
         dst.bind();
-        GL(glGetTextureImage(id_, 0, FORMAT[stride_], TYPE[stride_], size_, nullptr));
+        GL(glGetTextureImage(id_, 0, FORMAT[stride_], TYPE[static_cast<int>(depth_)][stride_], size_, nullptr));
         dst.unbind();
     }
 };
 
-texture::texture(int width, int height, int stride)
-    : impl_(new impl(width, height, stride))
+texture::texture(int width, int height, int stride, common::bit_depth depth)
+    : impl_(new impl(width, height, stride, depth))
 {
 }
 texture::texture(texture&& other)
@@ -138,6 +143,7 @@ void texture::copy_to(buffer& dest) { impl_->copy_to(dest); }
 int  texture::width() const { return impl_->width_; }
 int  texture::height() const { return impl_->height_; }
 int  texture::stride() const { return impl_->stride_; }
+common::bit_depth texture::depth() const { return impl_->depth_; }
 int  texture::size() const { return impl_->size_; }
 int  texture::id() const { return impl_->id_; }
 
diff --git a/src/accelerator/ogl/util/texture.h b/src/accelerator/ogl/util/texture.h
index ccdca84250..ff2c117f73 100644
--- a/src/accelerator/ogl/util/texture.h
+++ b/src/accelerator/ogl/util/texture.h
@@ -21,6 +21,7 @@
 
 #pragma once
 
+#include <common/bit_depth.h>
 #include <memory>
 
 namespace caspar { namespace accelerator { namespace ogl {
@@ -28,7 +29,7 @@ namespace caspar { namespace accelerator { namespace ogl {
 class texture final
 {
   public:
-    texture(int width, int height, int stride);
+    texture(int width, int height, int stride, common::bit_depth depth = common::bit_depth::bit8);
     texture(const texture&) = delete;
     texture(texture&& other);
     ~texture();
@@ -47,11 +48,12 @@ class texture final
     void bind(int index);
     void unbind();
 
-    int width() const;
-    int height() const;
-    int stride() const;
-    int size() const;
-    int id() const;
+    int               width() const;
+    int               height() const;
+    int               stride() const;
+    common::bit_depth depth() const;
+    int               size() const;
+    int               id() const;
 
   private:
     struct impl;
diff --git a/src/common/bit_depth.h b/src/common/bit_depth.h
new file mode 100644
index 0000000000..9d1b633f84
--- /dev/null
+++ b/src/common/bit_depth.h
@@ -0,0 +1,13 @@
+#pragma once
+
+#include <memory>
+
+namespace caspar { namespace common {
+
+enum class bit_depth : uint8_t
+{
+    bit8  = 0,
+    bit16 = 1,
+};
+
+}} // namespace caspar::common
\ No newline at end of file

From a940412aa9bf70d999d31c52574a44fb39a0210b Mon Sep 17 00:00:00 2001
From: Niklas Andersson <3985238+niklaspandersson@users.noreply.github.com>
Date: Fri, 15 Dec 2023 10:12:15 +0000
Subject: [PATCH 11/50] add 16bit support to ogl device

---
 src/accelerator/ogl/util/device.cpp | 91 ++++++++++++++++-------------
 src/accelerator/ogl/util/device.h   | 12 +++-
 2 files changed, 58 insertions(+), 45 deletions(-)

diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp
index 9030edf635..d6ae89dd31 100644
--- a/src/accelerator/ogl/util/device.cpp
+++ b/src/accelerator/ogl/util/device.cpp
@@ -66,8 +66,8 @@ struct device::impl : public std::enable_shared_from_this<impl>
 
     sf::Context device_;
 
-    std::array<tbb::concurrent_unordered_map<size_t, texture_queue_t>, 4> device_pools_;
-    std::array<tbb::concurrent_unordered_map<size_t, buffer_queue_t>, 2>  host_pools_;
+    std::array<std::array<tbb::concurrent_unordered_map<size_t, texture_queue_t>, 4>, 2> device_pools_;
+    std::array<tbb::concurrent_unordered_map<size_t, buffer_queue_t>, 2>                 host_pools_;
 
     using sync_queue_t = tbb::concurrent_bounded_queue<std::shared_ptr<buffer>>;
 
@@ -140,8 +140,9 @@ struct device::impl : public std::enable_shared_from_this<impl>
         for (auto& pool : host_pools_)
             pool.clear();
 
-        for (auto& pool : device_pools_)
-            pool.clear();
+        for (auto& pools : device_pools_)
+            for (auto& pool : pools)
+                pool.clear();
 
         sync_queue_.clear();
 
@@ -180,17 +181,18 @@ struct device::impl : public std::enable_shared_from_this<impl>
 
     std::wstring version() { return version_; }
 
-    std::shared_ptr<texture> create_texture(int width, int height, int stride, bool clear)
+    std::shared_ptr<texture> create_texture(int width, int height, int stride, common::bit_depth depth, bool clear)
     {
         CASPAR_VERIFY(stride > 0 && stride < 7);
         CASPAR_VERIFY(width > 0 && height > 0);
 
         // TODO (perf) Shared pool.
-        auto pool = &device_pools_[stride - 1][(width << 16 & 0xFFFF0000) | (height & 0x0000FFFF)];
+        auto pool =
+            &device_pools_[static_cast<int>(depth)][stride - 1][(width << 16 & 0xFFFF0000) | (height & 0x0000FFFF)];
 
         std::shared_ptr<texture> tex;
         if (!pool->try_pop(tex)) {
-            tex = std::make_shared<texture>(width, height, stride);
+            tex = std::make_shared<texture>(width, height, stride, depth);
         }
 
         if (clear) {
@@ -231,7 +233,7 @@ struct device::impl : public std::enable_shared_from_this<impl>
 
 
     std::future<std::shared_ptr<texture>>
-    copy_async(const array<const uint8_t>& source, int width, int height, int stride)
+    copy_async(const array<const uint8_t>& source, int width, int height, int stride, common::bit_depth depth)
     {
         return dispatch_async([=] {
             std::shared_ptr<buffer> buf;
@@ -245,7 +247,7 @@ struct device::impl : public std::enable_shared_from_this<impl>
                 std::memcpy(buf->data(), source.data(), source.size());
             }
 
-            auto tex = create_texture(width, height, stride, false);
+            auto tex = create_texture(width, height, stride, depth, false);
             tex->copy_from(*buf);
             // TODO (perf) save tex on source
             return tex;
@@ -355,32 +357,35 @@ struct device::impl : public std::enable_shared_from_this<impl>
         size_t                       total_pooled_device_buffer_count = 0;
 
         for (size_t i = 0; i < device_pools_.size(); ++i) {
-            auto& pools      = device_pools_.at(i);
-            bool  mipmapping = i > 3;
-            auto  stride     = mipmapping ? i - 3 : i + 1;
-
-            for (auto& pool : pools) {
-                auto width  = pool.first >> 16;
-                auto height = pool.first & 0x0000FFFF;
-                auto size   = width * height * stride;
-                auto count  = pool.second.size();
-
-                if (count == 0)
-                    continue;
-
-                boost::property_tree::wptree pool_info;
-
-                pool_info.add(L"stride", stride);
-                pool_info.add(L"mipmapping", mipmapping);
-                pool_info.add(L"width", width);
-                pool_info.add(L"height", height);
-                pool_info.add(L"size", size);
-                pool_info.add(L"count", count);
-
-                total_pooled_device_buffer_size += size * count;
-                total_pooled_device_buffer_count += count;
-
-                pooled_device_buffers.add_child(L"device_buffer_pool", pool_info);
+            auto& depth_pools = device_pools_.at(i);
+            for (size_t i = 0; i < depth_pools.size(); ++i) {
+                auto& pools      = depth_pools.at(i);
+                bool  mipmapping = i > 3;
+                auto  stride     = mipmapping ? i - 3 : i + 1;
+
+                for (auto& pool : pools) {
+                    auto width  = pool.first >> 16;
+                    auto height = pool.first & 0x0000FFFF;
+                    auto size   = width * height * stride;
+                    auto count  = pool.second.size();
+
+                    if (count == 0)
+                        continue;
+
+                    boost::property_tree::wptree pool_info;
+
+                    pool_info.add(L"stride", stride);
+                    pool_info.add(L"mipmapping", mipmapping);
+                    pool_info.add(L"width", width);
+                    pool_info.add(L"height", height);
+                    pool_info.add(L"size", size);
+                    pool_info.add(L"count", count);
+
+                    total_pooled_device_buffer_size += size * count;
+                    total_pooled_device_buffer_count += count;
+
+                    pooled_device_buffers.add_child(L"device_buffer_pool", pool_info);
+                }
             }
         }
 
@@ -435,9 +440,11 @@ struct device::impl : public std::enable_shared_from_this<impl>
             CASPAR_LOG(info) << " ogl: Running GC.";
 
             try {
-                for (auto& pools : device_pools_) {
-                    for (auto& pool : pools)
-                        pool.second.clear();
+                for (auto& depth_pools : device_pools_) {
+                    for (auto& pools : depth_pools) {
+                        for (auto& pool : pools)
+                            pool.second.clear();
+                    }
                 }
                 for (auto& pools : host_pools_) {
                     for (auto& pool : pools)
@@ -455,15 +462,15 @@ device::device()
 {
 }
 device::~device() {}
-std::shared_ptr<texture> device::create_texture(int width, int height, int stride)
+std::shared_ptr<texture> device::create_texture(int width, int height, int stride, common::bit_depth depth)
 {
-    return impl_->create_texture(width, height, stride, true);
+    return impl_->create_texture(width, height, stride, depth, true);
 }
 array<uint8_t> device::create_array(int size) { return impl_->create_array(size); }
 std::future<std::shared_ptr<texture>>
-device::copy_async(const array<const uint8_t>& source, int width, int height, int stride)
+device::copy_async(const array<const uint8_t>& source, int width, int height, int stride, common::bit_depth depth)
 {
-    return impl_->copy_async(source, width, height, stride);
+    return impl_->copy_async(source, width, height, stride, depth);
 }
 std::future<array<const uint8_t>> device::copy_async(const std::shared_ptr<texture>& source)
 {
diff --git a/src/accelerator/ogl/util/device.h b/src/accelerator/ogl/util/device.h
index 49e1a03a90..c9a5bfe604 100644
--- a/src/accelerator/ogl/util/device.h
+++ b/src/accelerator/ogl/util/device.h
@@ -23,6 +23,7 @@
 
 #include <accelerator/accelerator.h>
 #include <common/array.h>
+#include <common/bit_depth.h>
 
 #include <functional>
 #include <future>
@@ -45,11 +46,16 @@ class device final
 
     device& operator=(const device&) = delete;
 
-    std::shared_ptr<class texture> create_texture(int width, int height, int stride);
-    array<uint8_t>                 create_array(int size);
+    std::shared_ptr<class texture>
+                   create_texture(int width, int height, int stride, common::bit_depth depth = common::bit_depth::bit8);
+    array<uint8_t> create_array(int size);
 
     std::future<std::shared_ptr<class texture>>
-                                      copy_async(const array<const uint8_t>& source, int width, int height, int stride);
+                                      copy_async(const array<const uint8_t>& source,
+                                                 int                         width,
+                                                 int                         height,
+                                                 int                         stride,
+                                                 common::bit_depth           depth = common::bit_depth::bit8); // TODO: remove default value
     std::future<array<const uint8_t>> copy_async(const std::shared_ptr<class texture>& source);
 
     std::future<std::shared_ptr<texture>>

From 1332a46f1708b71536298a2e3e634c67ee1e28b9 Mon Sep 17 00:00:00 2001
From: Niklas Andersson <3985238+niklaspandersson@users.noreply.github.com>
Date: Fri, 15 Dec 2023 12:35:13 +0000
Subject: [PATCH 12/50] Add create_frame override to specify bit_depth in
 frame_factory interface

---
 src/accelerator/ogl/image/image_mixer.cpp | 12 ++++++++++++
 src/accelerator/ogl/image/image_mixer.h   |  3 +++
 src/core/frame/frame_factory.h            |  5 +++++
 src/core/mixer/image/image_mixer.h        |  4 ++++
 4 files changed, 24 insertions(+)

diff --git a/src/accelerator/ogl/image/image_mixer.cpp b/src/accelerator/ogl/image/image_mixer.cpp
index 7f5683025a..8601f91aae 100644
--- a/src/accelerator/ogl/image/image_mixer.cpp
+++ b/src/accelerator/ogl/image/image_mixer.cpp
@@ -28,6 +28,7 @@
 #include "frame_converter.h"
 
 #include <common/array.h>
+#include <common/bit_depth.h>
 #include <common/future.h>
 #include <common/log.h>
 
@@ -318,6 +319,12 @@ struct image_mixer::impl
     }
 
     core::mutable_frame create_frame(const void* tag, const core::pixel_format_desc& desc) override
+    {
+        return create_frame(tag, desc, common::bit_depth::bit8); // TODO: replace with channel default
+    }
+
+    core::mutable_frame
+    create_frame(const void* tag, const core::pixel_format_desc& desc, common::bit_depth depth) override
     {
         std::vector<array<std::uint8_t>> image_data;
         for (auto& plane : desc.planes) {
@@ -381,6 +388,11 @@ core::mutable_frame image_mixer::create_frame(const void* tag, const core::pixel
 {
     return impl_->create_frame(tag, desc);
 }
+core::mutable_frame
+image_mixer::create_frame(const void* tag, const core::pixel_format_desc& desc, common::bit_depth depth)
+{
+    return impl_->create_frame(tag, desc, depth);
+}
 std::shared_ptr<core::frame_converter> image_mixer::create_frame_converter() { return impl_->create_frame_converter(); }
 
 }}} // namespace caspar::accelerator::ogl
diff --git a/src/accelerator/ogl/image/image_mixer.h b/src/accelerator/ogl/image/image_mixer.h
index 19be36225d..679c618599 100644
--- a/src/accelerator/ogl/image/image_mixer.h
+++ b/src/accelerator/ogl/image/image_mixer.h
@@ -22,6 +22,7 @@
 #pragma once
 
 #include <common/array.h>
+#include <common/bit_depth.h>
 #include <common/memory.h>
 
 #include <core/frame/frame.h>
@@ -45,6 +46,8 @@ class image_mixer final : public core::image_mixer
 
     std::future<array<const std::uint8_t>> operator()(const core::video_format_desc& format_desc) override;
     core::mutable_frame                    create_frame(const void* tag, const core::pixel_format_desc& desc) override;
+    core::mutable_frame
+    create_frame(const void* video_stream_tag, const core::pixel_format_desc& desc, common::bit_depth depth) override;
 
     std::shared_ptr<core::frame_converter> create_frame_converter() override;
 
diff --git a/src/core/frame/frame_factory.h b/src/core/frame/frame_factory.h
index 57b660dfb8..ba5586a98e 100644
--- a/src/core/frame/frame_factory.h
+++ b/src/core/frame/frame_factory.h
@@ -21,6 +21,8 @@
 
 #pragma once
 
+#include <common/bit_depth.h>
+
 namespace caspar { namespace core {
 
 class frame_converter {
@@ -48,6 +50,9 @@ class frame_factory
 
     virtual class mutable_frame create_frame(const void* video_stream_tag, const struct pixel_format_desc& desc) = 0;
 
+    virtual class mutable_frame
+    create_frame(const void* video_stream_tag, const struct pixel_format_desc& desc, common::bit_depth depth) = 0;
+
     virtual std::shared_ptr<frame_converter> create_frame_converter() = 0;
 };
 
diff --git a/src/core/mixer/image/image_mixer.h b/src/core/mixer/image/image_mixer.h
index dcff0d1b79..621349382b 100644
--- a/src/core/mixer/image/image_mixer.h
+++ b/src/core/mixer/image/image_mixer.h
@@ -49,6 +49,10 @@ class image_mixer
 
     class mutable_frame create_frame(const void* tag, const struct pixel_format_desc& desc) override = 0;
 
+    class mutable_frame create_frame(const void*                     video_stream_tag,
+                                     const struct pixel_format_desc& desc,
+                                     common::bit_depth               depth) override                               = 0;
+
     std::shared_ptr<frame_converter> create_frame_converter() override = 0;
 };
 

From 8f871711c942b260eaf1d2824d28cf4bc2f2864d Mon Sep 17 00:00:00 2001
From: Niklas Andersson <3985238+niklaspandersson@users.noreply.github.com>
Date: Fri, 15 Dec 2023 13:42:58 +0000
Subject: [PATCH 13/50] add native_depth property to caspar::array

---
 src/accelerator/ogl/image/image_mixer.cpp |  2 +-
 src/accelerator/ogl/util/device.cpp       | 18 +++++----
 src/accelerator/ogl/util/device.h         |  8 +---
 src/common/array.h                        | 45 +++++++++++++++--------
 4 files changed, 43 insertions(+), 30 deletions(-)

diff --git a/src/accelerator/ogl/image/image_mixer.cpp b/src/accelerator/ogl/image/image_mixer.cpp
index 8601f91aae..24cf67cbd6 100644
--- a/src/accelerator/ogl/image/image_mixer.cpp
+++ b/src/accelerator/ogl/image/image_mixer.cpp
@@ -328,7 +328,7 @@ struct image_mixer::impl
     {
         std::vector<array<std::uint8_t>> image_data;
         for (auto& plane : desc.planes) {
-            image_data.push_back(ogl_->create_array(plane.size));
+            image_data.push_back(ogl_->create_array(plane.size, depth));
         }
 
         std::weak_ptr<image_mixer::impl> weak_self = shared_from_this();
diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp
index d6ae89dd31..711ed323e9 100644
--- a/src/accelerator/ogl/util/device.cpp
+++ b/src/accelerator/ogl/util/device.cpp
@@ -223,18 +223,20 @@ struct device::impl : public std::enable_shared_from_this<impl>
         });
     }
 
-    array<uint8_t> create_array(int size)
+    array<uint8_t> create_array(int count, common::bit_depth depth)
     {
-        auto buf = create_buffer(size, true);
-        auto ptr = reinterpret_cast<uint8_t*>(buf->data());
-        return array<uint8_t>(ptr, buf->size(), buf);
+        auto bytes_per_pixel = static_cast<int>(depth) + 1;
+        auto buf             = create_buffer(count * bytes_per_pixel, true);
+        auto ptr             = reinterpret_cast<uint8_t*>(buf->data());
+        return array<uint8_t>(ptr, buf->size(), buf, depth);
     }
 
 
 
     std::future<std::shared_ptr<texture>>
-    copy_async(const array<const uint8_t>& source, int width, int height, int stride, common::bit_depth depth)
+    copy_async(const array<const uint8_t>& source, int width, int height, int stride)
     {
+        auto depth = source.native_depth();
         return dispatch_async([=] {
             std::shared_ptr<buffer> buf;
 
@@ -466,11 +468,11 @@ std::shared_ptr<texture> device::create_texture(int width, int height, int strid
 {
     return impl_->create_texture(width, height, stride, depth, true);
 }
-array<uint8_t> device::create_array(int size) { return impl_->create_array(size); }
+array<uint8_t> device::create_array(int size, common::bit_depth depth) { return impl_->create_array(size, depth); }
 std::future<std::shared_ptr<texture>>
-device::copy_async(const array<const uint8_t>& source, int width, int height, int stride, common::bit_depth depth)
+device::copy_async(const array<const uint8_t>& source, int width, int height, int stride)
 {
-    return impl_->copy_async(source, width, height, stride, depth);
+    return impl_->copy_async(source, width, height, stride);
 }
 std::future<array<const uint8_t>> device::copy_async(const std::shared_ptr<texture>& source)
 {
diff --git a/src/accelerator/ogl/util/device.h b/src/accelerator/ogl/util/device.h
index c9a5bfe604..1900c9f877 100644
--- a/src/accelerator/ogl/util/device.h
+++ b/src/accelerator/ogl/util/device.h
@@ -48,14 +48,10 @@ class device final
 
     std::shared_ptr<class texture>
                    create_texture(int width, int height, int stride, common::bit_depth depth = common::bit_depth::bit8);
-    array<uint8_t> create_array(int size);
+    array<uint8_t> create_array(int size, common::bit_depth depth);
 
     std::future<std::shared_ptr<class texture>>
-                                      copy_async(const array<const uint8_t>& source,
-                                                 int                         width,
-                                                 int                         height,
-                                                 int                         stride,
-                                                 common::bit_depth           depth = common::bit_depth::bit8); // TODO: remove default value
+                                      copy_async(const array<const uint8_t>& source, int width, int height, int stride);
     std::future<array<const uint8_t>> copy_async(const std::shared_ptr<class texture>& source);
 
     std::future<std::shared_ptr<texture>>
diff --git a/src/common/array.h b/src/common/array.h
index b7a6019b71..97b0d411c2 100644
--- a/src/common/array.h
+++ b/src/common/array.h
@@ -2,6 +2,8 @@
 
 #include <boost/any.hpp>
 
+#include "bit_depth.h"
+
 #include <cstddef>
 #include <cstdlib>
 #include <memory>
@@ -41,10 +43,11 @@ class array final
     }
 
     template <typename S>
-    explicit array(T* ptr, std::size_t size, S&& storage)
+    explicit array(T* ptr, std::size_t size, S&& storage, common::bit_depth native_depth = common::bit_depth::bit8)
         : ptr_(ptr)
         , size_(size)
         , storage_(std::make_shared<boost::any>(std::forward<S>(storage)))
+        , native_depth_(native_depth)
     {
     }
 
@@ -54,6 +57,7 @@ class array final
         : ptr_(other.ptr_)
         , size_(other.size_)
         , storage_(std::move(other.storage_))
+        , native_depth_(other.native_depth_)
     {
         other.ptr_  = nullptr;
         other.size_ = 0;
@@ -63,17 +67,19 @@ class array final
 
     array& operator=(array&& other)
     {
-        ptr_     = std::move(other.ptr_);
-        size_    = std::move(other.size_);
-        storage_ = std::move(other.storage_);
+        ptr_          = std::move(other.ptr_);
+        size_         = std::move(other.size_);
+        storage_      = std::move(other.storage_);
+        native_depth_ = std::move(other.native_depth_);
 
         return *this;
     }
 
-    T*          begin() const { return ptr_; }
-    T*          data() const { return ptr_; }
-    T*          end() const { return ptr_ + size_; }
-    std::size_t size() const { return size_; }
+    T*                begin() const { return ptr_; }
+    T*                data() const { return ptr_; }
+    T*                end() const { return ptr_ + size_; }
+    std::size_t       size() const { return size_; }
+    common::bit_depth native_depth() const { return native_depth_; }
 
     explicit operator bool() const { return size_ > 0; };
 
@@ -84,8 +90,9 @@ class array final
     }
 
   private:
-    T*                          ptr_  = nullptr;
-    std::size_t                 size_ = 0;
+    T*                          ptr_          = nullptr;
+    std::size_t                 size_         = 0;
+    common::bit_depth           native_depth_ = common::bit_depth::bit8;
     std::shared_ptr<boost::any> storage_;
 };
 
@@ -118,10 +125,14 @@ class array<const T> final
     }
 
     template <typename S>
-    explicit array(const T* ptr, std::size_t size, S&& storage)
+    explicit array(const T*          ptr,
+                   std::size_t       size,
+                   S&&               storage,
+                   common::bit_depth native_depth = common::bit_depth::bit8)
         : ptr_(ptr)
         , size_(size)
         , storage_(std::make_shared<boost::any>(std::forward<S>(storage)))
+        , native_depth_(native_depth)
     {
     }
 
@@ -129,6 +140,7 @@ class array<const T> final
         : ptr_(other.ptr_)
         , size_(other.size_)
         , storage_(other.storage_)
+        , native_depth_(other.native_depth_)
     {
     }
 
@@ -136,6 +148,7 @@ class array<const T> final
         : ptr_(other.ptr_)
         , size_(other.size_)
         , storage_(other.storage_)
+        , native_depth_(other.native_depth_)
     {
         other.ptr_     = nullptr;
         other.size_    = 0;
@@ -150,10 +163,11 @@ class array<const T> final
         return *this;
     }
 
-    const T*    begin() const { return ptr_; }
-    const T*    data() const { return ptr_; }
-    const T*    end() const { return ptr_ + size_; }
-    std::size_t size() const { return size_; }
+    const T*          begin() const { return ptr_; }
+    const T*          data() const { return ptr_; }
+    const T*          end() const { return ptr_ + size_; }
+    std::size_t       size() const { return size_; }
+    common::bit_depth native_depth() const { return native_depth_; }
 
     explicit operator bool() const { return size_ > 0; }
 
@@ -167,6 +181,7 @@ class array<const T> final
     const T*                    ptr_  = nullptr;
     std::size_t                 size_ = 0;
     std::shared_ptr<boost::any> storage_;
+    common::bit_depth           native_depth_ = common::bit_depth::bit8;
 };
 
 } // namespace caspar

From 723419dae089943079d3809e81c4a7154b6ff988 Mon Sep 17 00:00:00 2001
From: Niklas Andersson <3985238+niklaspandersson@users.noreply.github.com>
Date: Fri, 15 Dec 2023 13:54:41 +0000
Subject: [PATCH 14/50] add 16bits support to image_mixer

---
 src/accelerator/accelerator.cpp           | 11 +++----
 src/accelerator/accelerator.h             |  4 ++-
 src/accelerator/ogl/image/image_mixer.cpp | 36 +++++++++++++----------
 src/accelerator/ogl/image/image_mixer.h   |  2 +-
 src/accelerator/ogl/util/device.h         |  5 ++--
 src/shell/server.cpp                      |  3 +-
 6 files changed, 34 insertions(+), 27 deletions(-)

diff --git a/src/accelerator/accelerator.cpp b/src/accelerator/accelerator.cpp
index 5668553ac6..fb1017091d 100644
--- a/src/accelerator/accelerator.cpp
+++ b/src/accelerator/accelerator.cpp
@@ -5,6 +5,8 @@
 
 #include <boost/property_tree/ptree.hpp>
 
+#include <common/bit_depth.h>
+
 #include <core/mixer/image/image_mixer.h>
 
 #include <memory>
@@ -23,10 +25,9 @@ struct accelerator::impl
     {
     }
 
-    std::unique_ptr<core::image_mixer> create_image_mixer(const int channel_id)
+    std::unique_ptr<core::image_mixer> create_image_mixer(int channel_id, common::bit_depth depth)
     {
-        return std::make_unique<ogl::image_mixer>(
-            spl::make_shared_ptr(get_device()), channel_id, format_repository_.get_max_video_format_size());
+        return std::make_unique<ogl::image_mixer>(spl::make_shared_ptr(get_device()), channel_id, depth, format_repository_.get_max_video_format_size());
     }
 
     std::shared_ptr<ogl::device> get_device()
@@ -46,9 +47,9 @@ accelerator::accelerator(const core::video_format_repository format_repository)
 
 accelerator::~accelerator() {}
 
-std::unique_ptr<core::image_mixer> accelerator::create_image_mixer(const int channel_id)
+std::unique_ptr<core::image_mixer> accelerator::create_image_mixer(const int channel_id, common::bit_depth depth)
 {
-    return impl_->create_image_mixer(channel_id);
+    return impl_->create_image_mixer(channel_id, depth);
 }
 
 std::shared_ptr<accelerator_device> accelerator::get_device() const
diff --git a/src/accelerator/accelerator.h b/src/accelerator/accelerator.h
index 5bd67a5f55..f7419d7f99 100644
--- a/src/accelerator/accelerator.h
+++ b/src/accelerator/accelerator.h
@@ -1,5 +1,7 @@
 #pragma once
 
+#include <common/bit_depth.h>
+
 #include <core/mixer/mixer.h>
 #include <core/video_format.h>
 
@@ -27,7 +29,7 @@ class accelerator
 
     accelerator& operator=(accelerator&) = delete;
 
-    std::unique_ptr<caspar::core::image_mixer> create_image_mixer(int channel_id);
+    std::unique_ptr<caspar::core::image_mixer> create_image_mixer(int channel_id, common::bit_depth depth);
 
     std::shared_ptr<accelerator_device> get_device() const;
 
diff --git a/src/accelerator/ogl/image/image_mixer.cpp b/src/accelerator/ogl/image/image_mixer.cpp
index 24cf67cbd6..7cf1c2ad88 100644
--- a/src/accelerator/ogl/image/image_mixer.cpp
+++ b/src/accelerator/ogl/image/image_mixer.cpp
@@ -74,11 +74,13 @@ class image_renderer
     spl::shared_ptr<device> ogl_;
     image_kernel            kernel_;
     const size_t            max_frame_size_;
+    common::bit_depth       depth_;
 
   public:
-    explicit image_renderer(const spl::shared_ptr<device>& ogl, const size_t max_frame_size)
+    explicit image_renderer(const spl::shared_ptr<device>& ogl, common::bit_depth depth, const size_t max_frame_size)
         : ogl_(ogl)
         , kernel_(ogl_)
+        , depth_(depth)
         , max_frame_size_(max_frame_size)
     {
     }
@@ -87,12 +89,12 @@ class image_renderer
                                                       const core::video_format_desc& format_desc)
     {
         if (layers.empty()) { // Bypass GPU with empty frame.
-            static const std::vector<uint8_t> buffer(max_frame_size_, 0);
-            return make_ready_future(array<const std::uint8_t>(buffer.data(), format_desc.size, true));
+            static const std::vector<uint8_t> buffer(max_frame_size * 2, 0); // TODO better
+            return make_ready_future(array<const std::uint8_t>(buffer.data(), format_desc.size, true, depth_));
         }
 
         return flatten(ogl_->dispatch_async([=]() mutable -> std::shared_future<array<const std::uint8_t>> {
-            auto target_texture = ogl_->create_texture(format_desc.width, format_desc.height, 4);
+            auto target_texture = ogl_->create_texture(format_desc.width, format_desc.height, 4, depth_);
 
             draw(target_texture, std::move(layers), format_desc);
 
@@ -125,7 +127,7 @@ class image_renderer
         std::shared_ptr<texture> local_mix_texture;
 
         if (layer.blend_mode != core::blend_mode::normal) {
-            auto layer_texture = ogl_->create_texture(target_texture->width(), target_texture->height(), 4);
+            auto layer_texture = ogl_->create_texture(target_texture->width(), target_texture->height(), 4, depth_);
 
             for (auto& item : layer.items)
                 draw(layer_texture,
@@ -172,9 +174,9 @@ class image_renderer
         }
 
         if (item.transform.is_key) {
-            local_key_texture = local_key_texture
-                                    ? local_key_texture
-                                    : ogl_->create_texture(target_texture->width(), target_texture->height(), 1);
+            local_key_texture =
+                local_key_texture ? local_key_texture
+                                  : ogl_->create_texture(target_texture->width(), target_texture->height(), 1, depth_);
 
             draw_params.background = local_key_texture;
             draw_params.local_key  = nullptr;
@@ -182,9 +184,9 @@ class image_renderer
 
             kernel_.draw(std::move(draw_params));
         } else if (item.transform.is_mix) {
-            local_mix_texture = local_mix_texture
-                                    ? local_mix_texture
-                                    : ogl_->create_texture(target_texture->width(), target_texture->height(), 4);
+            local_mix_texture =
+                local_mix_texture ? local_mix_texture
+                                  : ogl_->create_texture(target_texture->width(), target_texture->height(), 4, depth_);
 
             draw_params.background = local_mix_texture;
             draw_params.local_key  = std::move(local_key_texture);
@@ -234,12 +236,14 @@ struct image_mixer::impl
     std::vector<core::image_transform> transform_stack_;
     std::vector<layer>                 layers_; // layer/stream/items
     std::vector<layer*>                layer_stack_;
+    common::bit_depth                  depth_;
 
   public:
-    impl(const spl::shared_ptr<device>& ogl, const int channel_id, const size_t max_frame_size)
+    impl(const spl::shared_ptr<device>& ogl, int channel_id, common::bit_depth depth, const size_t max_frame_size)
         : ogl_(ogl)
-        , renderer_(ogl, max_frame_size)
+        , renderer_(ogl, depth, max_frame_size)
         , transform_stack_(1)
+        , depth_(depth)
     {
         CASPAR_LOG(info) << L"Initialized OpenGL Accelerated GPU Image Mixer for channel " << channel_id;
     }
@@ -320,7 +324,7 @@ struct image_mixer::impl
 
     core::mutable_frame create_frame(const void* tag, const core::pixel_format_desc& desc) override
     {
-        return create_frame(tag, desc, common::bit_depth::bit8); // TODO: replace with channel default
+        return create_frame(tag, desc, depth_);
     }
 
     core::mutable_frame
@@ -372,8 +376,8 @@ struct image_mixer::impl
     }
 };
 
-image_mixer::image_mixer(const spl::shared_ptr<device>& ogl, const int channel_id, const size_t max_frame_size)
-    : impl_(std::make_unique<impl>(ogl, channel_id, max_frame_size))
+image_mixer::image_mixer(const spl::shared_ptr<device>& ogl, int channel_id, common::bit_depth depth, const size_t max_frame_size)
+    : impl_(std::make_unique<impl>(ogl, channel_id, depth,max_frame_size))
 {
 }
 image_mixer::~image_mixer() {}
diff --git a/src/accelerator/ogl/image/image_mixer.h b/src/accelerator/ogl/image/image_mixer.h
index 679c618599..d159f50183 100644
--- a/src/accelerator/ogl/image/image_mixer.h
+++ b/src/accelerator/ogl/image/image_mixer.h
@@ -37,7 +37,7 @@ namespace caspar { namespace accelerator { namespace ogl {
 class image_mixer final : public core::image_mixer
 {
   public:
-    image_mixer(const spl::shared_ptr<class device>& ogl, int channel_id, const size_t max_frame_size);
+    image_mixer(const spl::shared_ptr<class device>& ogl, int channel_id, common::bit_depth depth, const size_t max_frame_size);
     image_mixer(const image_mixer&) = delete;
 
     ~image_mixer();
diff --git a/src/accelerator/ogl/util/device.h b/src/accelerator/ogl/util/device.h
index 1900c9f877..91e470e370 100644
--- a/src/accelerator/ogl/util/device.h
+++ b/src/accelerator/ogl/util/device.h
@@ -46,9 +46,8 @@ class device final
 
     device& operator=(const device&) = delete;
 
-    std::shared_ptr<class texture>
-                   create_texture(int width, int height, int stride, common::bit_depth depth = common::bit_depth::bit8);
-    array<uint8_t> create_array(int size, common::bit_depth depth);
+    std::shared_ptr<class texture> create_texture(int width, int height, int stride, common::bit_depth depth);
+    array<uint8_t>                 create_array(int size, common::bit_depth depth);
 
     std::future<std::shared_ptr<class texture>>
                                       copy_async(const array<const uint8_t>& source, int width, int height, int stride);
diff --git a/src/shell/server.cpp b/src/shell/server.cpp
index 09ea2d6f5e..81758999f8 100644
--- a/src/shell/server.cpp
+++ b/src/shell/server.cpp
@@ -24,6 +24,7 @@
 
 #include <accelerator/accelerator.h>
 
+#include <common/bit_depth.h>
 #include <common/env.h>
 #include <common/except.h>
 #include <common/memory.h>
@@ -263,7 +264,7 @@ struct server::impl
             auto channel =
                 spl::make_shared<video_channel>(channel_id,
                                                 format_desc,
-                                                accelerator_.create_image_mixer(channel_id),
+                                                accelerator_.create_image_mixer(channel_id, common::bit_depth::bit8),
                                                 [channel_id, weak_client](core::monitor::state channel_state) {
                                                     monitor::state state;
                                                     state[""]["channel"][channel_id] = channel_state;

From b96d98e533441c9e1cf284c6836d9a0ceec2bbf4 Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Fri, 22 Dec 2023 17:00:36 +0000
Subject: [PATCH 15/50] wip: correct colour

---
 src/accelerator/ogl/image/frame_converter.cpp |  2 +-
 src/accelerator/ogl/image/image_mixer.cpp     | 15 ++---------
 src/accelerator/ogl/image/image_mixer.h       |  2 --
 src/accelerator/ogl/image/shader.frag         | 22 +++++++++++++---
 src/accelerator/ogl/util/device.cpp           |  4 +--
 src/core/frame/frame_factory.h                |  3 ---
 src/core/frame/pixel_format.h                 | 18 +++++++++----
 src/core/mixer/image/image_mixer.h            |  4 ---
 src/modules/ffmpeg/util/av_util.cpp           | 26 ++++++-------------
 9 files changed, 45 insertions(+), 51 deletions(-)

diff --git a/src/accelerator/ogl/image/frame_converter.cpp b/src/accelerator/ogl/image/frame_converter.cpp
index 4abc7bb6ba..1baf619413 100644
--- a/src/accelerator/ogl/image/frame_converter.cpp
+++ b/src/accelerator/ogl/image/frame_converter.cpp
@@ -32,7 +32,7 @@ core::mutable_frame ogl_frame_converter::create_frame(const void* tag, const cor
 
     std::vector<array<std::uint8_t>> image_data;
     for (auto& plane : desc.planes) {
-        image_data.push_back(ogl_->create_array(plane.size));
+        image_data.push_back(ogl_->create_array(plane.size, common::bit_depth::bit16)); // TODO: Depth
     }
 
     using future_texture = std::shared_future<std::shared_ptr<texture>>;
diff --git a/src/accelerator/ogl/image/image_mixer.cpp b/src/accelerator/ogl/image/image_mixer.cpp
index 7cf1c2ad88..cd4ab31428 100644
--- a/src/accelerator/ogl/image/image_mixer.cpp
+++ b/src/accelerator/ogl/image/image_mixer.cpp
@@ -89,7 +89,7 @@ class image_renderer
                                                       const core::video_format_desc& format_desc)
     {
         if (layers.empty()) { // Bypass GPU with empty frame.
-            static const std::vector<uint8_t> buffer(max_frame_size * 2, 0); // TODO better
+            static const std::vector<uint8_t> buffer(max_frame_size_ * 2, 0); // TODO better
             return make_ready_future(array<const std::uint8_t>(buffer.data(), format_desc.size, true, depth_));
         }
 
@@ -323,16 +323,10 @@ struct image_mixer::impl
     }
 
     core::mutable_frame create_frame(const void* tag, const core::pixel_format_desc& desc) override
-    {
-        return create_frame(tag, desc, depth_);
-    }
-
-    core::mutable_frame
-    create_frame(const void* tag, const core::pixel_format_desc& desc, common::bit_depth depth) override
     {
         std::vector<array<std::uint8_t>> image_data;
         for (auto& plane : desc.planes) {
-            image_data.push_back(ogl_->create_array(plane.size, depth));
+            image_data.push_back(ogl_->create_array(plane.size, plane.depth));
         }
 
         std::weak_ptr<image_mixer::impl> weak_self = shared_from_this();
@@ -392,11 +386,6 @@ core::mutable_frame image_mixer::create_frame(const void* tag, const core::pixel
 {
     return impl_->create_frame(tag, desc);
 }
-core::mutable_frame
-image_mixer::create_frame(const void* tag, const core::pixel_format_desc& desc, common::bit_depth depth)
-{
-    return impl_->create_frame(tag, desc, depth);
-}
 std::shared_ptr<core::frame_converter> image_mixer::create_frame_converter() { return impl_->create_frame_converter(); }
 
 }}} // namespace caspar::accelerator::ogl
diff --git a/src/accelerator/ogl/image/image_mixer.h b/src/accelerator/ogl/image/image_mixer.h
index d159f50183..12b954d713 100644
--- a/src/accelerator/ogl/image/image_mixer.h
+++ b/src/accelerator/ogl/image/image_mixer.h
@@ -46,8 +46,6 @@ class image_mixer final : public core::image_mixer
 
     std::future<array<const std::uint8_t>> operator()(const core::video_format_desc& format_desc) override;
     core::mutable_frame                    create_frame(const void* tag, const core::pixel_format_desc& desc) override;
-    core::mutable_frame
-    create_frame(const void* video_stream_tag, const core::pixel_format_desc& desc, common::bit_depth depth) override;
 
     std::shared_ptr<core::frame_converter> create_frame_converter() override;
 
diff --git a/src/accelerator/ogl/image/shader.frag b/src/accelerator/ogl/image/shader.frag
index baeaaae34c..2d8d4a6dc1 100644
--- a/src/accelerator/ogl/image/shader.frag
+++ b/src/accelerator/ogl/image/shader.frag
@@ -486,13 +486,21 @@ vec4 get_rgba_color()
     case 4:		//abgr,
         return get_sample(plane[0], TexCoord.st / TexCoord.q).gbar;
     case 5:		//ycbcr,
-        case 11:    //ycbcr10_420
-        case 12:    //ycbcr10_422
-        case 13:    //ycbcr10_444
+    case 11:    //ycbcr10_420
+    case 12:    //ycbcr10_422
+    case 13:    //ycbcr10_444
         {
             float y  = get_sample(plane[0], TexCoord.st / TexCoord.q).r;
             float cb = get_sample(plane[1], TexCoord.st / TexCoord.q).r;
             float cr = get_sample(plane[2], TexCoord.st / TexCoord.q).r;
+
+            if (pixel_format >= 10){
+                // unpack 16bit to 10bit
+                y *= 64;
+                cb *= 64;
+                cr *= 64;
+            }
+
             return ycbcra_to_rgba(y, cb, cr, 1.0);
         }
     case 6:		//ycbcra
@@ -504,6 +512,14 @@ vec4 get_rgba_color()
             float cb = get_sample(plane[1], TexCoord.st / TexCoord.q).r;
             float cr = get_sample(plane[2], TexCoord.st / TexCoord.q).r;
             float a  = get_sample(plane[3], TexCoord.st / TexCoord.q).r;
+
+            if (pixel_format >= 10){
+                // unpack 16bit to 10bit
+                y *= 64;
+                cb *= 64;
+                cr *= 64;
+            }
+
             return ycbcra_to_rgba(y, cb, cr, a);
         }
     case 7:		//luma
diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp
index 711ed323e9..68f0bed0a1 100644
--- a/src/accelerator/ogl/util/device.cpp
+++ b/src/accelerator/ogl/util/device.cpp
@@ -183,7 +183,7 @@ struct device::impl : public std::enable_shared_from_this<impl>
 
     std::shared_ptr<texture> create_texture(int width, int height, int stride, common::bit_depth depth, bool clear)
     {
-        CASPAR_VERIFY(stride > 0 && stride < 7);
+        CASPAR_VERIFY(stride > 0 && stride < 5);
         CASPAR_VERIFY(width > 0 && height > 0);
 
         // TODO (perf) Shared pool.
@@ -304,7 +304,7 @@ struct device::impl : public std::enable_shared_from_this<impl>
             if (!compute_shader_)
             compute_shader_ = std::make_unique<compute_shader>(std::string(compute_to_rgba_shader));
 
-            auto tex = create_texture(width, height, 5, false);
+            auto tex = create_texture(width, height, 4, common::bit_depth::bit16, false);
 
             glBindImageTexture(0, tex->id(), 0, GL_FALSE, 0, GL_READ_WRITE, GL_RGBA32F);
 
diff --git a/src/core/frame/frame_factory.h b/src/core/frame/frame_factory.h
index ba5586a98e..ff4403a854 100644
--- a/src/core/frame/frame_factory.h
+++ b/src/core/frame/frame_factory.h
@@ -50,9 +50,6 @@ class frame_factory
 
     virtual class mutable_frame create_frame(const void* video_stream_tag, const struct pixel_format_desc& desc) = 0;
 
-    virtual class mutable_frame
-    create_frame(const void* video_stream_tag, const struct pixel_format_desc& desc, common::bit_depth depth) = 0;
-
     virtual std::shared_ptr<frame_converter> create_frame_converter() = 0;
 };
 
diff --git a/src/core/frame/pixel_format.h b/src/core/frame/pixel_format.h
index 95ac4619c6..32ffe30df9 100644
--- a/src/core/frame/pixel_format.h
+++ b/src/core/frame/pixel_format.h
@@ -21,6 +21,7 @@
 
 #pragma once
 
+#include <common/bit_depth.h>
 #include <vector>
 
 namespace caspar { namespace core {
@@ -57,15 +58,22 @@ struct pixel_format_desc final
         int height   = 0;
         int size     = 0;
         int stride   = 0;
+        common::bit_depth depth = common::bit_depth::bit8;
 
         plane() = default;
 
         plane(int width, int height, int stride)
-            : linesize(width * stride)
-            , width(width)
-            , height(height)
-            , size(width * height * stride)
-            , stride(stride)
+            : plane(width, height, stride, common::bit_depth::bit8)
+        {
+        }
+
+        plane(int width, int height, int stride, common::bit_depth depth)
+                : linesize(width * stride * (static_cast<int>(depth) + 1))
+                , width(width)
+                , height(height)
+                , size(width * height * stride)
+                , stride(stride)
+                , depth(depth)
         {
         }
     };
diff --git a/src/core/mixer/image/image_mixer.h b/src/core/mixer/image/image_mixer.h
index 621349382b..dcff0d1b79 100644
--- a/src/core/mixer/image/image_mixer.h
+++ b/src/core/mixer/image/image_mixer.h
@@ -49,10 +49,6 @@ class image_mixer
 
     class mutable_frame create_frame(const void* tag, const struct pixel_format_desc& desc) override = 0;
 
-    class mutable_frame create_frame(const void*                     video_stream_tag,
-                                     const struct pixel_format_desc& desc,
-                                     common::bit_depth               depth) override                               = 0;
-
     std::shared_ptr<frame_converter> create_frame_converter() override = 0;
 };
 
diff --git a/src/modules/ffmpeg/util/av_util.cpp b/src/modules/ffmpeg/util/av_util.cpp
index 4e0a996e03..b97e131aed 100644
--- a/src/modules/ffmpeg/util/av_util.cpp
+++ b/src/modules/ffmpeg/util/av_util.cpp
@@ -216,30 +216,20 @@ core::pixel_format_desc pixel_format_desc(AVPixelFormat pix_fmt, int width, int
         case core::pixel_format::ycbcra10_420:
         case core::pixel_format::ycbcra10_422:
         case core::pixel_format::ycbcra10_444: {
-            // Find chroma height
-            // av_image_fill_plane_sizes is not available until ffmpeg 4.4, but we still need to support ffmpeg 4.2, so
-            // we fall back to calling av_image_fill_pointers with a NULL image buffer. We can't unconditionally use
-            // av_image_fill_pointers because it will not accept a NULL buffer on ffmpeg >= 5.0.
-#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(56, 56, 100)
             size_t    sizes[4];
             ptrdiff_t linesizes1[4];
             for (int i = 0; i < 4; i++)
                 linesizes1[i] = linesizes[i];
             av_image_fill_plane_sizes(sizes, pix_fmt, height, linesizes1);
-            auto size2 = static_cast<int>(sizes[1]);
-#else
-            uint8_t* dummy_pict_data[4];
-            av_image_fill_pointers(dummy_pict_data, pix_fmt, height, NULL, linesizes);
-            auto size2 = static_cast<int>(dummy_pict_data[2] - dummy_pict_data[1]);
-#endif
-            auto h2 = size2 / linesizes[1];
 
-            desc.planes.push_back(core::pixel_format_desc::plane(linesizes[0] / 2, height, 6));
-//            desc.planes.push_back(core::pixel_format_desc::plane(linesizes[1] / 2, h2, 6));
-//            desc.planes.push_back(core::pixel_format_desc::plane(linesizes[2] / 2, h2, 6));
-//
-//            if (desc.format == core::pixel_format::ycbcra10_420||desc.format == core::pixel_format::ycbcra10_422||desc.format == core::pixel_format::ycbcra10_444)
-//                desc.planes.push_back(core::pixel_format_desc::plane(linesizes[3] / 2, height, 6));
+            auto h2 = static_cast<int>(sizes[1]) / linesizes[1];
+
+            desc.planes.emplace_back(linesizes[0] / 2, height, 1, common::bit_depth::bit16);
+            desc.planes.emplace_back(linesizes[1] / 2, h2, 1, common::bit_depth::bit16);
+            desc.planes.emplace_back(linesizes[2] / 2, h2, 1, common::bit_depth::bit16);
+
+            if (desc.format == core::pixel_format::ycbcra10_420||desc.format == core::pixel_format::ycbcra10_422||desc.format == core::pixel_format::ycbcra10_444)
+                desc.planes.emplace_back(linesizes[3] / 2, height, 1, common::bit_depth::bit16);
 
             return desc;
         }

From 79cde825eedc44f3990757c1d457d3729658f6a2 Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Fri, 22 Dec 2023 17:02:25 +0000
Subject: [PATCH 16/50] simplify

---
 src/accelerator/ogl/image/shader.frag | 12 ++++--------
 src/core/frame/pixel_format.h         |  8 ++------
 src/modules/ffmpeg/util/av_util.cpp   | 18 +++++-------------
 3 files changed, 11 insertions(+), 27 deletions(-)

diff --git a/src/accelerator/ogl/image/shader.frag b/src/accelerator/ogl/image/shader.frag
index 2d8d4a6dc1..89776e85ca 100644
--- a/src/accelerator/ogl/image/shader.frag
+++ b/src/accelerator/ogl/image/shader.frag
@@ -486,15 +486,13 @@ vec4 get_rgba_color()
     case 4:		//abgr,
         return get_sample(plane[0], TexCoord.st / TexCoord.q).gbar;
     case 5:		//ycbcr,
-    case 11:    //ycbcr10_420
-    case 12:    //ycbcr10_422
-    case 13:    //ycbcr10_444
+    case 11:    //ycbcr10
         {
             float y  = get_sample(plane[0], TexCoord.st / TexCoord.q).r;
             float cb = get_sample(plane[1], TexCoord.st / TexCoord.q).r;
             float cr = get_sample(plane[2], TexCoord.st / TexCoord.q).r;
 
-            if (pixel_format >= 10){
+            if (pixel_format == 11){ //ycbcr10
                 // unpack 16bit to 10bit
                 y *= 64;
                 cb *= 64;
@@ -504,16 +502,14 @@ vec4 get_rgba_color()
             return ycbcra_to_rgba(y, cb, cr, 1.0);
         }
     case 6:		//ycbcra
-        case 14:    //ycbcra10_420
-        case 15:    //ycbcra10_422
-        case 16:    //ycbcra10_444
+    case 12:    //ycbcra10
         {
             float y  = get_sample(plane[0], TexCoord.st / TexCoord.q).r;
             float cb = get_sample(plane[1], TexCoord.st / TexCoord.q).r;
             float cr = get_sample(plane[2], TexCoord.st / TexCoord.q).r;
             float a  = get_sample(plane[3], TexCoord.st / TexCoord.q).r;
 
-            if (pixel_format >= 10){
+            if (pixel_format == 12){ //ycbcra10
                 // unpack 16bit to 10bit
                 y *= 64;
                 cb *= 64;
diff --git a/src/core/frame/pixel_format.h b/src/core/frame/pixel_format.h
index 32ffe30df9..b03569bedf 100644
--- a/src/core/frame/pixel_format.h
+++ b/src/core/frame/pixel_format.h
@@ -39,12 +39,8 @@ enum class pixel_format
     bgr,
     rgb,
     uyvy,
-    ycbcr10_420,
-    ycbcr10_422,
-    ycbcr10_444,
-    ycbcra10_420,
-    ycbcra10_422,
-    ycbcra10_444,
+    ycbcr10,
+    ycbcra10,
     count,
     invalid,
 };
diff --git a/src/modules/ffmpeg/util/av_util.cpp b/src/modules/ffmpeg/util/av_util.cpp
index b97e131aed..dd64f60551 100644
--- a/src/modules/ffmpeg/util/av_util.cpp
+++ b/src/modules/ffmpeg/util/av_util.cpp
@@ -139,17 +139,13 @@ core::pixel_format get_pixel_format(AVPixelFormat pix_fmt)
         case AV_PIX_FMT_UYVY422:
             return core::pixel_format::uyvy;
         case AV_PIX_FMT_YUV444P10LE:
-            return core::pixel_format::ycbcr10_444;
         case AV_PIX_FMT_YUV422P10LE:
-            return core::pixel_format::ycbcr10_422;
         case AV_PIX_FMT_YUV420P10LE:
-            return core::pixel_format::ycbcr10_420;
+            return core::pixel_format::ycbcr10;
         case AV_PIX_FMT_YUVA444P10LE:
-            return core::pixel_format::ycbcra10_444;
         case AV_PIX_FMT_YUVA422P10LE:
-            return core::pixel_format::ycbcra10_422;
         case AV_PIX_FMT_YUVA420P10LE:
-            return core::pixel_format::ycbcra10_420;
+            return core::pixel_format::ycbcra10;
         default:
             return core::pixel_format::invalid;
     }
@@ -210,12 +206,8 @@ core::pixel_format_desc pixel_format_desc(AVPixelFormat pix_fmt, int width, int
 
             return desc;
         }
-        case core::pixel_format::ycbcr10_420:
-        case core::pixel_format::ycbcr10_422:
-        case core::pixel_format::ycbcr10_444:
-        case core::pixel_format::ycbcra10_420:
-        case core::pixel_format::ycbcra10_422:
-        case core::pixel_format::ycbcra10_444: {
+        case core::pixel_format::ycbcr10:
+        case core::pixel_format::ycbcra10: {
             size_t    sizes[4];
             ptrdiff_t linesizes1[4];
             for (int i = 0; i < 4; i++)
@@ -228,7 +220,7 @@ core::pixel_format_desc pixel_format_desc(AVPixelFormat pix_fmt, int width, int
             desc.planes.emplace_back(linesizes[1] / 2, h2, 1, common::bit_depth::bit16);
             desc.planes.emplace_back(linesizes[2] / 2, h2, 1, common::bit_depth::bit16);
 
-            if (desc.format == core::pixel_format::ycbcra10_420||desc.format == core::pixel_format::ycbcra10_422||desc.format == core::pixel_format::ycbcra10_444)
+            if (desc.format == core::pixel_format::ycbcra10)
                 desc.planes.emplace_back(linesizes[3] / 2, height, 1, common::bit_depth::bit16);
 
             return desc;

From 8e934cd9ee3f7daf8bc5e4eaa2b024f14ea48bb5 Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Fri, 22 Dec 2023 17:22:44 +0000
Subject: [PATCH 17/50] add 16bit yuv, untested

---
 src/accelerator/ogl/image/shader.frag       | 11 ++---------
 src/core/frame/pixel_format.h               |  2 ++
 src/modules/ffmpeg/producer/av_producer.cpp |  6 ++++++
 src/modules/ffmpeg/util/av_util.cpp         | 12 +++++++++++-
 4 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/src/accelerator/ogl/image/shader.frag b/src/accelerator/ogl/image/shader.frag
index 89776e85ca..f8d3356c6c 100644
--- a/src/accelerator/ogl/image/shader.frag
+++ b/src/accelerator/ogl/image/shader.frag
@@ -487,6 +487,7 @@ vec4 get_rgba_color()
         return get_sample(plane[0], TexCoord.st / TexCoord.q).gbar;
     case 5:		//ycbcr,
     case 11:    //ycbcr10
+    case 13:    //ycbcr16
         {
             float y  = get_sample(plane[0], TexCoord.st / TexCoord.q).r;
             float cb = get_sample(plane[1], TexCoord.st / TexCoord.q).r;
@@ -503,6 +504,7 @@ vec4 get_rgba_color()
         }
     case 6:		//ycbcra
     case 12:    //ycbcra10
+    case 14:    //ycbcra16
         {
             float y  = get_sample(plane[0], TexCoord.st / TexCoord.q).r;
             float cb = get_sample(plane[1], TexCoord.st / TexCoord.q).r;
@@ -534,15 +536,6 @@ vec4 get_rgba_color()
 			float cr = get_sample(plane[1], TexCoord.st / TexCoord.q).r;			
 			return ycbcra_to_rgba(y, cb, cr, 1.0);
 		}
-    // formats converted from packed formats
-//    case 11:    //ycbcr10_420
-//    case 12:    //ycbcr10_422
-//    case 13:    //ycbcr10_444
-//        return vec4(get_sample(plane[0], TexCoord.st / TexCoord.q).rgb, 1.0);
-//    case 14:    //ycbcra10_420
-//    case 15:    //ycbcra10_422
-//    case 16:    //ycbcra10_444
-//        return get_sample(plane[0], TexCoord.st / TexCoord.q).rgba;
     }
     return vec4(0.0, 0.0, 0.0, 0.0);
 }
diff --git a/src/core/frame/pixel_format.h b/src/core/frame/pixel_format.h
index b03569bedf..d24fbdfeb4 100644
--- a/src/core/frame/pixel_format.h
+++ b/src/core/frame/pixel_format.h
@@ -41,6 +41,8 @@ enum class pixel_format
     uyvy,
     ycbcr10,
     ycbcra10,
+    ycbcr16,
+    ycbcra16,
     count,
     invalid,
 };
diff --git a/src/modules/ffmpeg/producer/av_producer.cpp b/src/modules/ffmpeg/producer/av_producer.cpp
index 749d1aa56b..ce52ff8a11 100644
--- a/src/modules/ffmpeg/producer/av_producer.cpp
+++ b/src/modules/ffmpeg/producer/av_producer.cpp
@@ -512,10 +512,16 @@ struct Filter
                                               AV_PIX_FMT_YUV422P10LE,
                                               AV_PIX_FMT_YUV420P10LE,
                                               // AV_PIX_FMT_YUV410P10LE,
+                                              AV_PIX_FMT_YUV444P16LE,
+                                              AV_PIX_FMT_YUV422P16LE,
+                                              AV_PIX_FMT_YUV420P16LE,
                                               AV_PIX_FMT_YUVA444P10LE,
                                               AV_PIX_FMT_YUVA422P10LE,
                                               AV_PIX_FMT_YUVA420P10LE,
                                               // AV_PIX_FMT_UYVY42210LE,
+                                              AV_PIX_FMT_YUVA444P16LE,
+                                              AV_PIX_FMT_YUVA422P16LE,
+                                              AV_PIX_FMT_YUVA420P16LE,
                                               AV_PIX_FMT_NONE};
             FF(av_opt_set_int_list(sink, "pix_fmts", pix_fmts, -1, AV_OPT_SEARCH_CHILDREN));
 #ifdef _MSC_VER
diff --git a/src/modules/ffmpeg/util/av_util.cpp b/src/modules/ffmpeg/util/av_util.cpp
index dd64f60551..bdba15e853 100644
--- a/src/modules/ffmpeg/util/av_util.cpp
+++ b/src/modules/ffmpeg/util/av_util.cpp
@@ -142,10 +142,18 @@ core::pixel_format get_pixel_format(AVPixelFormat pix_fmt)
         case AV_PIX_FMT_YUV422P10LE:
         case AV_PIX_FMT_YUV420P10LE:
             return core::pixel_format::ycbcr10;
+        case AV_PIX_FMT_YUV444P16LE:
+        case AV_PIX_FMT_YUV422P16LE:
+        case AV_PIX_FMT_YUV420P16LE:
+            return core::pixel_format::ycbcr16;
         case AV_PIX_FMT_YUVA444P10LE:
         case AV_PIX_FMT_YUVA422P10LE:
         case AV_PIX_FMT_YUVA420P10LE:
             return core::pixel_format::ycbcra10;
+        case AV_PIX_FMT_YUVA444P16LE:
+        case AV_PIX_FMT_YUVA422P16LE:
+        case AV_PIX_FMT_YUVA420P16LE:
+            return core::pixel_format::ycbcra16;
         default:
             return core::pixel_format::invalid;
     }
@@ -207,7 +215,9 @@ core::pixel_format_desc pixel_format_desc(AVPixelFormat pix_fmt, int width, int
             return desc;
         }
         case core::pixel_format::ycbcr10:
-        case core::pixel_format::ycbcra10: {
+        case core::pixel_format::ycbcra10:
+        case core::pixel_format::ycbcr16:
+        case core::pixel_format::ycbcra16: {
             size_t    sizes[4];
             ptrdiff_t linesizes1[4];
             for (int i = 0; i < 4; i++)

From 0c69bb809955d2e39f1b07e0d1b65edd43373b83 Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Fri, 22 Dec 2023 17:42:12 +0000
Subject: [PATCH 18/50] wip: propogate frame_converter type to consumers

---
 src/accelerator/ogl/image/image_mixer.cpp           | 6 +++---
 src/accelerator/ogl/image/image_mixer.h             | 2 +-
 src/core/consumer/frame_consumer.cpp                | 6 ++++--
 src/core/consumer/frame_consumer.h                  | 4 ++++
 src/core/frame/frame_factory.h                      | 2 +-
 src/core/fwd.h                                      | 1 +
 src/core/mixer/image/image_mixer.h                  | 2 +-
 src/core/video_channel.cpp                          | 4 ++++
 src/core/video_channel.h                            | 1 +
 src/modules/artnet/consumer/artnet_consumer.cpp     | 1 +
 src/modules/artnet/consumer/artnet_consumer.h       | 1 +
 src/modules/bluefish/consumer/bluefish_consumer.cpp | 2 ++
 src/modules/bluefish/consumer/bluefish_consumer.h   | 2 ++
 src/modules/decklink/consumer/decklink_consumer.cpp | 2 ++
 src/modules/decklink/consumer/decklink_consumer.h   | 2 ++
 src/modules/ffmpeg/consumer/ffmpeg_consumer.cpp     | 2 ++
 src/modules/ffmpeg/consumer/ffmpeg_consumer.h       | 2 ++
 src/modules/image/consumer/image_consumer.cpp       | 1 +
 src/modules/image/consumer/image_consumer.h         | 1 +
 src/modules/newtek/consumer/newtek_ndi_consumer.cpp | 2 ++
 src/modules/newtek/consumer/newtek_ndi_consumer.h   | 2 ++
 src/modules/oal/consumer/oal_consumer.cpp           | 2 ++
 src/modules/oal/consumer/oal_consumer.h             | 2 ++
 src/modules/screen/consumer/screen_consumer.cpp     | 2 ++
 src/modules/screen/consumer/screen_consumer.h       | 2 ++
 src/protocol/amcp/AMCPCommandsImpl.cpp              | 8 ++++----
 src/shell/server.cpp                                | 2 +-
 27 files changed, 53 insertions(+), 13 deletions(-)

diff --git a/src/accelerator/ogl/image/image_mixer.cpp b/src/accelerator/ogl/image/image_mixer.cpp
index cd4ab31428..35324356c8 100644
--- a/src/accelerator/ogl/image/image_mixer.cpp
+++ b/src/accelerator/ogl/image/image_mixer.cpp
@@ -364,9 +364,9 @@ struct image_mixer::impl
             });
     }
 
-    std::shared_ptr<core::frame_converter> create_frame_converter() override
+    spl::shared_ptr<core::frame_converter> create_frame_converter() override
     {
-        return std::make_shared<ogl_frame_converter>(ogl_);
+        return spl::make_shared<ogl_frame_converter>(ogl_);
     }
 };
 
@@ -386,6 +386,6 @@ core::mutable_frame image_mixer::create_frame(const void* tag, const core::pixel
 {
     return impl_->create_frame(tag, desc);
 }
-std::shared_ptr<core::frame_converter> image_mixer::create_frame_converter() { return impl_->create_frame_converter(); }
+spl::shared_ptr<core::frame_converter> image_mixer::create_frame_converter() { return impl_->create_frame_converter(); }
 
 }}} // namespace caspar::accelerator::ogl
diff --git a/src/accelerator/ogl/image/image_mixer.h b/src/accelerator/ogl/image/image_mixer.h
index 12b954d713..a29873e352 100644
--- a/src/accelerator/ogl/image/image_mixer.h
+++ b/src/accelerator/ogl/image/image_mixer.h
@@ -47,7 +47,7 @@ class image_mixer final : public core::image_mixer
     std::future<array<const std::uint8_t>> operator()(const core::video_format_desc& format_desc) override;
     core::mutable_frame                    create_frame(const void* tag, const core::pixel_format_desc& desc) override;
 
-    std::shared_ptr<core::frame_converter> create_frame_converter() override;
+    spl::shared_ptr<core::frame_converter> create_frame_converter() override;
 
     // core::image_mixer
 
diff --git a/src/core/consumer/frame_consumer.cpp b/src/core/consumer/frame_consumer.cpp
index 1c410edc37..ae83c2fcd4 100644
--- a/src/core/consumer/frame_consumer.cpp
+++ b/src/core/consumer/frame_consumer.cpp
@@ -163,6 +163,7 @@ class print_consumer_proxy : public frame_consumer
 spl::shared_ptr<core::frame_consumer>
 frame_consumer_registry::create_consumer(const std::vector<std::wstring>&                         params,
                                          const core::video_format_repository&                     format_repository,
+                                         const spl::shared_ptr<core::frame_converter>& frame_converter,
                                          const std::vector<spl::shared_ptr<core::video_channel>>& channels) const
 {
     if (params.empty())
@@ -173,7 +174,7 @@ frame_consumer_registry::create_consumer(const std::vector<std::wstring>&
     if (!std::any_of(
             consumer_factories.begin(), consumer_factories.end(), [&](const consumer_factory_t& factory) -> bool {
                 try {
-                    consumer = factory(params, format_repository, channels);
+                    consumer = factory(params, format_repository,frame_converter, channels);
                 } catch (...) {
                     CASPAR_LOG_CURRENT_EXCEPTION();
                 }
@@ -189,6 +190,7 @@ spl::shared_ptr<frame_consumer>
 frame_consumer_registry::create_consumer(const std::wstring&                                      element_name,
                                          const boost::property_tree::wptree&                      element,
                                          const core::video_format_repository&                     format_repository,
+                                         const spl::shared_ptr<core::frame_converter>& frame_converter,
                                          const std::vector<spl::shared_ptr<core::video_channel>>& channels) const
 {
     auto& preconfigured_consumer_factories = impl_->preconfigured_consumer_factories;
@@ -199,7 +201,7 @@ frame_consumer_registry::create_consumer(const std::wstring&
                                << msg_info(L"No consumer factory registered for element name " + element_name));
 
     return spl::make_shared<destroy_consumer_proxy>(
-        spl::make_shared<print_consumer_proxy>(found->second(element, format_repository, channels)));
+        spl::make_shared<print_consumer_proxy>(found->second(element, format_repository,frame_converter, channels)));
 }
 
 const spl::shared_ptr<frame_consumer>& frame_consumer::empty()
diff --git a/src/core/consumer/frame_consumer.h b/src/core/consumer/frame_consumer.h
index 55f8dbd7e1..5bff60b789 100644
--- a/src/core/consumer/frame_consumer.h
+++ b/src/core/consumer/frame_consumer.h
@@ -62,10 +62,12 @@ class frame_consumer
 using consumer_factory_t =
     std::function<spl::shared_ptr<frame_consumer>(const std::vector<std::wstring>&     params,
                                                   const core::video_format_repository& format_repository,
+                                                  const spl::shared_ptr<core::frame_converter>& frame_converter,
                                                   const std::vector<spl::shared_ptr<core::video_channel>>& channels)>;
 using preconfigured_consumer_factory_t =
     std::function<spl::shared_ptr<frame_consumer>(const boost::property_tree::wptree&  element,
                                                   const core::video_format_repository& format_repository,
+                                                  const spl::shared_ptr<core::frame_converter>& frame_converter,
                                                   const std::vector<spl::shared_ptr<core::video_channel>>& channels)>;
 
 class frame_consumer_registry
@@ -78,11 +80,13 @@ class frame_consumer_registry
     spl::shared_ptr<frame_consumer>
     create_consumer(const std::vector<std::wstring>&                         params,
                     const core::video_format_repository&                     format_repository,
+                    const spl::shared_ptr<core::frame_converter>& frame_converter,
                     const std::vector<spl::shared_ptr<core::video_channel>>& channels) const;
     spl::shared_ptr<frame_consumer>
     create_consumer(const std::wstring&                                      element_name,
                     const boost::property_tree::wptree&                      element,
                     const core::video_format_repository&                     format_repository,
+                    const spl::shared_ptr<core::frame_converter>& frame_converter,
                     const std::vector<spl::shared_ptr<core::video_channel>>& channels) const;
 
   private:
diff --git a/src/core/frame/frame_factory.h b/src/core/frame/frame_factory.h
index ff4403a854..099be0a832 100644
--- a/src/core/frame/frame_factory.h
+++ b/src/core/frame/frame_factory.h
@@ -50,7 +50,7 @@ class frame_factory
 
     virtual class mutable_frame create_frame(const void* video_stream_tag, const struct pixel_format_desc& desc) = 0;
 
-    virtual std::shared_ptr<frame_converter> create_frame_converter() = 0;
+    virtual spl::shared_ptr<frame_converter> create_frame_converter() = 0;
 };
 
 
diff --git a/src/core/fwd.h b/src/core/fwd.h
index d61d1f070e..6a8bdc8af9 100644
--- a/src/core/fwd.h
+++ b/src/core/fwd.h
@@ -31,6 +31,7 @@ FORWARD2(caspar, core, class output);
 FORWARD2(caspar, core, class image_mixer);
 FORWARD2(caspar, core, struct video_format_desc);
 FORWARD2(caspar, core, class frame_factory);
+FORWARD2(caspar, core, class frame_converter);
 FORWARD2(caspar, core, class frame_producer);
 FORWARD2(caspar, core, class frame_consumer);
 FORWARD2(caspar, core, class draw_frame);
diff --git a/src/core/mixer/image/image_mixer.h b/src/core/mixer/image/image_mixer.h
index dcff0d1b79..494bd8ad5f 100644
--- a/src/core/mixer/image/image_mixer.h
+++ b/src/core/mixer/image/image_mixer.h
@@ -49,7 +49,7 @@ class image_mixer
 
     class mutable_frame create_frame(const void* tag, const struct pixel_format_desc& desc) override = 0;
 
-    std::shared_ptr<frame_converter> create_frame_converter() override = 0;
+    spl::shared_ptr<frame_converter> create_frame_converter() override = 0;
 };
 
 }} // namespace caspar::core
diff --git a/src/core/video_channel.cpp b/src/core/video_channel.cpp
index 652411e2b0..f2ec344b79 100644
--- a/src/core/video_channel.cpp
+++ b/src/core/video_channel.cpp
@@ -245,6 +245,10 @@ mixer&                              video_channel::mixer() { return impl_->mixer
 const output&                       video_channel::output() const { return impl_->output_; }
 output&                             video_channel::output() { return impl_->output_; }
 spl::shared_ptr<frame_factory>      video_channel::frame_factory() { return impl_->image_mixer_; }
+spl::shared_ptr<frame_converter>      video_channel::frame_converter() {
+    // TODO - is this too expensive?
+    return impl_->image_mixer_->create_frame_converter();
+}
 int                                 video_channel::index() const { return impl_->index(); }
 core::monitor::state                video_channel::state() const { return impl_->state_; }
 
diff --git a/src/core/video_channel.h b/src/core/video_channel.h
index 1bdc98ff87..801fc6e3f8 100644
--- a/src/core/video_channel.h
+++ b/src/core/video_channel.h
@@ -85,6 +85,7 @@ class video_channel final
     core::output&                       output();
 
     spl::shared_ptr<core::frame_factory> frame_factory();
+    spl::shared_ptr<core::frame_converter> frame_converter();
 
     int index() const;
 
diff --git a/src/modules/artnet/consumer/artnet_consumer.cpp b/src/modules/artnet/consumer/artnet_consumer.cpp
index c38b1df6dc..c08a5ab8a9 100644
--- a/src/modules/artnet/consumer/artnet_consumer.cpp
+++ b/src/modules/artnet/consumer/artnet_consumer.cpp
@@ -311,6 +311,7 @@ std::vector<fixture> get_fixtures_ptree(const boost::property_tree::wptree& ptre
 spl::shared_ptr<core::frame_consumer>
 create_preconfigured_consumer(const boost::property_tree::wptree&                      ptree,
                               const core::video_format_repository&                     format_repository,
+                              const spl::shared_ptr<core::frame_converter>& frame_converter,
                               const std::vector<spl::shared_ptr<core::video_channel>>& channels)
 {
     configuration config;
diff --git a/src/modules/artnet/consumer/artnet_consumer.h b/src/modules/artnet/consumer/artnet_consumer.h
index 0fca71248f..ffb94ad84a 100644
--- a/src/modules/artnet/consumer/artnet_consumer.h
+++ b/src/modules/artnet/consumer/artnet_consumer.h
@@ -35,5 +35,6 @@ namespace caspar { namespace artnet {
 spl::shared_ptr<core::frame_consumer>
 create_preconfigured_consumer(const boost::property_tree::wptree&                      ptree,
                               const core::video_format_repository&                     format_repository,
+                              const spl::shared_ptr<core::frame_converter>& frame_converter,
                               const std::vector<spl::shared_ptr<core::video_channel>>& channels);
 }} // namespace caspar::artnet
diff --git a/src/modules/bluefish/consumer/bluefish_consumer.cpp b/src/modules/bluefish/consumer/bluefish_consumer.cpp
index b5c68d8280..25d3f01c02 100644
--- a/src/modules/bluefish/consumer/bluefish_consumer.cpp
+++ b/src/modules/bluefish/consumer/bluefish_consumer.cpp
@@ -884,6 +884,7 @@ struct bluefish_consumer_proxy : public core::frame_consumer
 
 spl::shared_ptr<core::frame_consumer> create_consumer(const std::vector<std::wstring>&     params,
                                                       const core::video_format_repository& format_repository,
+                                                      const spl::shared_ptr<core::frame_converter>& frame_converter,
                                                       const std::vector<spl::shared_ptr<core::video_channel>>& channels)
 {
     if (params.size() < 1 || !boost::iequals(params.at(0), L"BLUEFISH")) {
@@ -939,6 +940,7 @@ spl::shared_ptr<core::frame_consumer> create_consumer(const std::vector<std::wst
 spl::shared_ptr<core::frame_consumer>
 create_preconfigured_consumer(const boost::property_tree::wptree&                      ptree,
                               const core::video_format_repository&                     format_repository,
+                              const spl::shared_ptr<core::frame_converter>& frame_converter,
                               const std::vector<spl::shared_ptr<core::video_channel>>& channels)
 {
     configuration config;
diff --git a/src/modules/bluefish/consumer/bluefish_consumer.h b/src/modules/bluefish/consumer/bluefish_consumer.h
index 0d97101bfb..9d942492b3 100644
--- a/src/modules/bluefish/consumer/bluefish_consumer.h
+++ b/src/modules/bluefish/consumer/bluefish_consumer.h
@@ -34,11 +34,13 @@ namespace caspar { namespace bluefish {
 spl::shared_ptr<core::frame_consumer>
 create_consumer(const std::vector<std::wstring>&                         params,
                 const core::video_format_repository&                     format_repository,
+                const spl::shared_ptr<core::frame_converter>& frame_converter,
                 const std::vector<spl::shared_ptr<core::video_channel>>& channels);
 
 spl::shared_ptr<core::frame_consumer>
 create_preconfigured_consumer(const boost::property_tree::wptree&                      ptree,
                               const core::video_format_repository&                     format_repository,
+                              const spl::shared_ptr<core::frame_converter>& frame_converter,
                               const std::vector<spl::shared_ptr<core::video_channel>>& channels);
 
 }} // namespace caspar::bluefish
diff --git a/src/modules/decklink/consumer/decklink_consumer.cpp b/src/modules/decklink/consumer/decklink_consumer.cpp
index 6c4391b823..8feca3e9aa 100644
--- a/src/modules/decklink/consumer/decklink_consumer.cpp
+++ b/src/modules/decklink/consumer/decklink_consumer.cpp
@@ -901,6 +901,7 @@ struct decklink_consumer_proxy : public core::frame_consumer
 
 spl::shared_ptr<core::frame_consumer> create_consumer(const std::vector<std::wstring>&     params,
                                                       const core::video_format_repository& format_repository,
+                                                      const spl::shared_ptr<core::frame_converter>& frame_converter,
                                                       const std::vector<spl::shared_ptr<core::video_channel>>& channels)
 {
     if (params.empty() || !boost::iequals(params.at(0), L"DECKLINK")) {
@@ -915,6 +916,7 @@ spl::shared_ptr<core::frame_consumer> create_consumer(const std::vector<std::wst
 spl::shared_ptr<core::frame_consumer>
 create_preconfigured_consumer(const boost::property_tree::wptree&                      ptree,
                               const core::video_format_repository&                     format_repository,
+                              const spl::shared_ptr<core::frame_converter>& frame_converter,
                               const std::vector<spl::shared_ptr<core::video_channel>>& channels)
 {
     configuration config = parse_xml_config(ptree, format_repository);
diff --git a/src/modules/decklink/consumer/decklink_consumer.h b/src/modules/decklink/consumer/decklink_consumer.h
index 2d6da8d0bc..94ffc08b90 100644
--- a/src/modules/decklink/consumer/decklink_consumer.h
+++ b/src/modules/decklink/consumer/decklink_consumer.h
@@ -35,10 +35,12 @@ namespace caspar { namespace decklink {
 spl::shared_ptr<core::frame_consumer>
 create_consumer(const std::vector<std::wstring>&                         params,
                 const core::video_format_repository&                     format_repository,
+                const spl::shared_ptr<core::frame_converter>& frame_converter,
                 const std::vector<spl::shared_ptr<core::video_channel>>& channels);
 spl::shared_ptr<core::frame_consumer>
 create_preconfigured_consumer(const boost::property_tree::wptree&                      ptree,
                               const core::video_format_repository&                     format_repository,
+                              const spl::shared_ptr<core::frame_converter>& frame_converter,
                               const std::vector<spl::shared_ptr<core::video_channel>>& channels);
 
 }} // namespace caspar::decklink
diff --git a/src/modules/ffmpeg/consumer/ffmpeg_consumer.cpp b/src/modules/ffmpeg/consumer/ffmpeg_consumer.cpp
index 5f6fa3a14c..43c9a81bbb 100644
--- a/src/modules/ffmpeg/consumer/ffmpeg_consumer.cpp
+++ b/src/modules/ffmpeg/consumer/ffmpeg_consumer.cpp
@@ -715,6 +715,7 @@ struct ffmpeg_consumer : public core::frame_consumer
 
 spl::shared_ptr<core::frame_consumer> create_consumer(const std::vector<std::wstring>&     params,
                                                       const core::video_format_repository& format_repository,
+                                                      const spl::shared_ptr<core::frame_converter>& frame_converter,
                                                       const std::vector<spl::shared_ptr<core::video_channel>>& channels)
 {
     if (params.size() < 2 || (!boost::iequals(params.at(0), L"STREAM") && !boost::iequals(params.at(0), L"FILE")))
@@ -731,6 +732,7 @@ spl::shared_ptr<core::frame_consumer> create_consumer(const std::vector<std::wst
 spl::shared_ptr<core::frame_consumer>
 create_preconfigured_consumer(const boost::property_tree::wptree&                      ptree,
                               const core::video_format_repository&                     format_repository,
+                              const spl::shared_ptr<core::frame_converter>& frame_converter,
                               const std::vector<spl::shared_ptr<core::video_channel>>& channels)
 {
     return spl::make_shared<ffmpeg_consumer>(u8(ptree.get<std::wstring>(L"path", L"")),
diff --git a/src/modules/ffmpeg/consumer/ffmpeg_consumer.h b/src/modules/ffmpeg/consumer/ffmpeg_consumer.h
index 3a4af28d6e..998eb42a69 100644
--- a/src/modules/ffmpeg/consumer/ffmpeg_consumer.h
+++ b/src/modules/ffmpeg/consumer/ffmpeg_consumer.h
@@ -35,10 +35,12 @@ namespace caspar { namespace ffmpeg {
 spl::shared_ptr<core::frame_consumer>
 create_consumer(const std::vector<std::wstring>&                         params,
                 const core::video_format_repository&                     format_repository,
+                const spl::shared_ptr<core::frame_converter>& frame_converter,
                 const std::vector<spl::shared_ptr<core::video_channel>>& channels);
 spl::shared_ptr<core::frame_consumer>
 create_preconfigured_consumer(const boost::property_tree::wptree&,
                               const core::video_format_repository&                     format_repository,
+                              const spl::shared_ptr<core::frame_converter>& frame_converter,
                               const std::vector<spl::shared_ptr<core::video_channel>>& channels);
 
 }} // namespace caspar::ffmpeg
diff --git a/src/modules/image/consumer/image_consumer.cpp b/src/modules/image/consumer/image_consumer.cpp
index f9c020abff..d3bee919f7 100644
--- a/src/modules/image/consumer/image_consumer.cpp
+++ b/src/modules/image/consumer/image_consumer.cpp
@@ -118,6 +118,7 @@ struct image_consumer : public core::frame_consumer
 
 spl::shared_ptr<core::frame_consumer> create_consumer(const std::vector<std::wstring>&     params,
                                                       const core::video_format_repository& format_repository,
+                                                      const spl::shared_ptr<core::frame_converter>& frame_converter,
                                                       const std::vector<spl::shared_ptr<core::video_channel>>& channels)
 {
     if (params.empty() || !boost::iequals(params.at(0), L"IMAGE"))
diff --git a/src/modules/image/consumer/image_consumer.h b/src/modules/image/consumer/image_consumer.h
index e971f28e2b..b779a6e528 100644
--- a/src/modules/image/consumer/image_consumer.h
+++ b/src/modules/image/consumer/image_consumer.h
@@ -34,6 +34,7 @@ namespace caspar { namespace image {
 spl::shared_ptr<core::frame_consumer>
 create_consumer(const std::vector<std::wstring>&                         params,
                 const core::video_format_repository&                     format_repository,
+                const spl::shared_ptr<core::frame_converter>& frame_converter,
                 const std::vector<spl::shared_ptr<core::video_channel>>& channels);
 
 }} // namespace caspar::image
diff --git a/src/modules/newtek/consumer/newtek_ndi_consumer.cpp b/src/modules/newtek/consumer/newtek_ndi_consumer.cpp
index 1a93a2f73e..79c66c997c 100644
--- a/src/modules/newtek/consumer/newtek_ndi_consumer.cpp
+++ b/src/modules/newtek/consumer/newtek_ndi_consumer.cpp
@@ -257,6 +257,7 @@ std::atomic<int> newtek_ndi_consumer::instances_(0);
 spl::shared_ptr<core::frame_consumer>
 create_ndi_consumer(const std::vector<std::wstring>&                         params,
                     const core::video_format_repository&                     format_repository,
+                    const spl::shared_ptr<core::frame_converter>& frame_converter,
                     const std::vector<spl::shared_ptr<core::video_channel>>& channels)
 {
     if (params.size() < 1 || !boost::iequals(params.at(0), L"NDI"))
@@ -269,6 +270,7 @@ create_ndi_consumer(const std::vector<std::wstring>&                         par
 spl::shared_ptr<core::frame_consumer>
 create_preconfigured_ndi_consumer(const boost::property_tree::wptree&                      ptree,
                                   const core::video_format_repository&                     format_repository,
+                                  const spl::shared_ptr<core::frame_converter>& frame_converter,
                                   const std::vector<spl::shared_ptr<core::video_channel>>& channels)
 {
     auto name         = ptree.get(L"name", L"");
diff --git a/src/modules/newtek/consumer/newtek_ndi_consumer.h b/src/modules/newtek/consumer/newtek_ndi_consumer.h
index 2f3e788d53..8148ee5385 100644
--- a/src/modules/newtek/consumer/newtek_ndi_consumer.h
+++ b/src/modules/newtek/consumer/newtek_ndi_consumer.h
@@ -35,10 +35,12 @@ namespace caspar { namespace newtek {
 spl::shared_ptr<core::frame_consumer>
 create_ndi_consumer(const std::vector<std::wstring>&                         params,
                     const core::video_format_repository&                     format_repository,
+                    const spl::shared_ptr<core::frame_converter>& frame_converter,
                     const std::vector<spl::shared_ptr<core::video_channel>>& channels);
 spl::shared_ptr<core::frame_consumer>
 create_preconfigured_ndi_consumer(const boost::property_tree::wptree&                      ptree,
                                   const core::video_format_repository&                     format_repository,
+                                  const spl::shared_ptr<core::frame_converter>& frame_converter,
                                   const std::vector<spl::shared_ptr<core::video_channel>>& channels);
 
 }} // namespace caspar::newtek
diff --git a/src/modules/oal/consumer/oal_consumer.cpp b/src/modules/oal/consumer/oal_consumer.cpp
index fa790fe9fb..6ce9a81c09 100644
--- a/src/modules/oal/consumer/oal_consumer.cpp
+++ b/src/modules/oal/consumer/oal_consumer.cpp
@@ -389,6 +389,7 @@ struct oal_consumer : public core::frame_consumer
 
 spl::shared_ptr<core::frame_consumer> create_consumer(const std::vector<std::wstring>&     params,
                                                       const core::video_format_repository& format_repository,
+                                                      const spl::shared_ptr<core::frame_converter>& frame_converter,
                                                       const std::vector<spl::shared_ptr<core::video_channel>>& channels)
 {
     if (params.empty() || !boost::iequals(params.at(0), L"AUDIO"))
@@ -400,6 +401,7 @@ spl::shared_ptr<core::frame_consumer> create_consumer(const std::vector<std::wst
 spl::shared_ptr<core::frame_consumer>
 create_preconfigured_consumer(const boost::property_tree::wptree&                      ptree,
                               const core::video_format_repository&                     format_repository,
+                              const spl::shared_ptr<core::frame_converter>& frame_converter,
                               const std::vector<spl::shared_ptr<core::video_channel>>& channels)
 {
     return spl::make_shared<oal_consumer>();
diff --git a/src/modules/oal/consumer/oal_consumer.h b/src/modules/oal/consumer/oal_consumer.h
index a868d505b6..d143412d5b 100644
--- a/src/modules/oal/consumer/oal_consumer.h
+++ b/src/modules/oal/consumer/oal_consumer.h
@@ -34,10 +34,12 @@ namespace caspar { namespace oal {
 spl::shared_ptr<core::frame_consumer>
 create_consumer(const std::vector<std::wstring>&                         params,
                 const core::video_format_repository&                     format_repository,
+                const spl::shared_ptr<core::frame_converter>& frame_converter,
                 const std::vector<spl::shared_ptr<core::video_channel>>& channels);
 spl::shared_ptr<core::frame_consumer>
 create_preconfigured_consumer(const boost::property_tree::wptree&,
                               const core::video_format_repository&                     format_repository,
+                              const spl::shared_ptr<core::frame_converter>& frame_converter,
                               const std::vector<spl::shared_ptr<core::video_channel>>& channels);
 
 }} // namespace caspar::oal
diff --git a/src/modules/screen/consumer/screen_consumer.cpp b/src/modules/screen/consumer/screen_consumer.cpp
index 23d4301d58..7f401a483a 100644
--- a/src/modules/screen/consumer/screen_consumer.cpp
+++ b/src/modules/screen/consumer/screen_consumer.cpp
@@ -608,6 +608,7 @@ struct screen_consumer_proxy : public core::frame_consumer
 
 spl::shared_ptr<core::frame_consumer> create_consumer(const std::vector<std::wstring>&     params,
                                                       const core::video_format_repository& format_repository,
+                                                      const spl::shared_ptr<core::frame_converter>& frame_converter,
                                                       const std::vector<spl::shared_ptr<core::video_channel>>& channels)
 {
     if (params.empty() || !boost::iequals(params.at(0), L"SCREEN")) {
@@ -644,6 +645,7 @@ spl::shared_ptr<core::frame_consumer> create_consumer(const std::vector<std::wst
 spl::shared_ptr<core::frame_consumer>
 create_preconfigured_consumer(const boost::property_tree::wptree&                      ptree,
                               const core::video_format_repository&                     format_repository,
+                              const spl::shared_ptr<core::frame_converter>& frame_converter,
                               const std::vector<spl::shared_ptr<core::video_channel>>& channels)
 {
     configuration config;
diff --git a/src/modules/screen/consumer/screen_consumer.h b/src/modules/screen/consumer/screen_consumer.h
index c7129052b4..16493ab824 100644
--- a/src/modules/screen/consumer/screen_consumer.h
+++ b/src/modules/screen/consumer/screen_consumer.h
@@ -33,10 +33,12 @@ namespace caspar { namespace screen {
 spl::shared_ptr<core::frame_consumer>
 create_consumer(const std::vector<std::wstring>&                         params,
                 const core::video_format_repository&                     format_repository,
+                const spl::shared_ptr<core::frame_converter>& frame_converter,
                 const std::vector<spl::shared_ptr<core::video_channel>>& channels);
 spl::shared_ptr<core::frame_consumer>
 create_preconfigured_consumer(const boost::property_tree::wptree&                      ptree,
                               const core::video_format_repository&                     format_repository,
+                              const spl::shared_ptr<core::frame_converter>& frame_converter,
                               const std::vector<spl::shared_ptr<core::video_channel>>& channels);
 
 }} // namespace caspar::screen
diff --git a/src/protocol/amcp/AMCPCommandsImpl.cpp b/src/protocol/amcp/AMCPCommandsImpl.cpp
index 403bbe4d15..cfa23511db 100644
--- a/src/protocol/amcp/AMCPCommandsImpl.cpp
+++ b/src/protocol/amcp/AMCPCommandsImpl.cpp
@@ -456,7 +456,7 @@ std::wstring add_command(command_context& ctx)
     core::diagnostics::call_context::for_thread().video_channel = ctx.channel_index + 1;
 
     auto consumer = ctx.static_context->consumer_registry->create_consumer(
-        ctx.parameters, ctx.static_context->format_repository, get_channels(ctx));
+        ctx.parameters, ctx.static_context->format_repository, ctx.channel.raw_channel->frame_converter(), get_channels(ctx));
     ctx.channel.raw_channel->output().add(ctx.layer_index(consumer->index()), consumer);
 
     return L"202 ADD OK\r\n";
@@ -474,7 +474,7 @@ std::wstring remove_command(command_context& ctx)
         }
 
         index = ctx.static_context->consumer_registry
-                    ->create_consumer(ctx.parameters, ctx.static_context->format_repository, get_channels(ctx))
+                    ->create_consumer(ctx.parameters, ctx.static_context->format_repository,ctx.channel.raw_channel->frame_converter(), get_channels(ctx))
                     ->index();
     }
 
@@ -488,7 +488,7 @@ std::wstring remove_command(command_context& ctx)
 std::wstring print_command(command_context& ctx)
 {
     ctx.channel.raw_channel->output().add(ctx.static_context->consumer_registry->create_consumer(
-        {L"IMAGE"}, ctx.static_context->format_repository, get_channels(ctx)));
+        {L"IMAGE"}, ctx.static_context->format_repository, ctx.channel.raw_channel->frame_converter(), get_channels(ctx)));
 
     return L"202 PRINT OK\r\n";
 }
@@ -1354,7 +1354,7 @@ std::wstring channel_grid_command(command_context& ctx)
     params.emplace_back(L"NAME");
     params.emplace_back(L"Channel Grid Window");
     auto screen = ctx.static_context->consumer_registry->create_consumer(
-        params, ctx.static_context->format_repository, get_channels(ctx));
+        params, ctx.static_context->format_repository, ctx.channel.raw_channel->frame_converter(), get_channels(ctx));
 
     self.raw_channel->output().add(screen);
 
diff --git a/src/shell/server.cpp b/src/shell/server.cpp
index 81758999f8..f22e0cc9e2 100644
--- a/src/shell/server.cpp
+++ b/src/shell/server.cpp
@@ -342,7 +342,7 @@ struct server::impl
                     try {
                         if (name != L"<xmlcomment>")
                             channel.raw_channel->output().add(consumer_registry_->create_consumer(
-                                name, xml_consumer.second, video_format_repository_, channels_vec));
+                                name, xml_consumer.second, video_format_repository_, channel.raw_channel->frame_converter(), channels_vec));
                     } catch (...) {
                         CASPAR_LOG_CURRENT_EXCEPTION();
                     }

From 57a362452fcbfaa127d3d11be6d25174dd0fc16e Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Fri, 22 Dec 2023 17:48:35 +0000
Subject: [PATCH 19/50] wip

---
 .../decklink/consumer/decklink_consumer.cpp   | 20 ++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/src/modules/decklink/consumer/decklink_consumer.cpp b/src/modules/decklink/consumer/decklink_consumer.cpp
index 8feca3e9aa..6a200f94e7 100644
--- a/src/modules/decklink/consumer/decklink_consumer.cpp
+++ b/src/modules/decklink/consumer/decklink_consumer.cpp
@@ -403,6 +403,8 @@ struct decklink_secondary_port final : public IDeckLinkVideoOutputCallback
 
 struct decklink_consumer final : public IDeckLinkVideoOutputCallback
 {
+    const spl::shared_ptr<core::frame_converter> frame_converter_;
+
     const int           channel_index_;
     const configuration config_;
 
@@ -444,8 +446,9 @@ struct decklink_consumer final : public IDeckLinkVideoOutputCallback
     std::atomic<bool> abort_request_{false};
 
   public:
-    decklink_consumer(const configuration& config, core::video_format_desc channel_format_desc, int channel_index)
-        : channel_index_(channel_index)
+    decklink_consumer(const spl::shared_ptr<core::frame_converter>& frame_converter, const configuration& config, core::video_format_desc channel_format_desc, int channel_index)
+        : frame_converter_(frame_converter)
+        , channel_index_(channel_index)
         , config_(config)
         , channel_format_desc_(std::move(channel_format_desc))
         , decklink_format_desc_(get_decklink_format(config.primary, channel_format_desc_))
@@ -849,14 +852,17 @@ struct decklink_consumer final : public IDeckLinkVideoOutputCallback
 
 struct decklink_consumer_proxy : public core::frame_consumer
 {
+    const spl::shared_ptr<core::frame_converter> frame_converter_;
+
     const configuration                config_;
     std::unique_ptr<decklink_consumer> consumer_;
     core::video_format_desc            format_desc_;
     executor                           executor_;
 
   public:
-    explicit decklink_consumer_proxy(const configuration& config)
-        : config_(config)
+    explicit decklink_consumer_proxy(const spl::shared_ptr<core::frame_converter>& frame_converter, const configuration& config)
+        : frame_converter_(frame_converter)
+        , config_(config)
         , executor_(L"decklink_consumer[" + std::to_wstring(config.primary.device_index) + L"]")
     {
         executor_.begin_invoke([=] { com_initialize(); });
@@ -876,7 +882,7 @@ struct decklink_consumer_proxy : public core::frame_consumer
         format_desc_ = format_desc;
         executor_.invoke([=] {
             consumer_.reset();
-            consumer_ = std::make_unique<decklink_consumer>(config_, format_desc, channel_index);
+            consumer_ = std::make_unique<decklink_consumer>(frame_converter_, config_, format_desc, channel_index);
         });
     }
 
@@ -910,7 +916,7 @@ spl::shared_ptr<core::frame_consumer> create_consumer(const std::vector<std::wst
 
     configuration config = parse_amcp_config(params, format_repository);
 
-    return spl::make_shared<decklink_consumer_proxy>(config);
+    return spl::make_shared<decklink_consumer_proxy>(frame_converter, config);
 }
 
 spl::shared_ptr<core::frame_consumer>
@@ -921,7 +927,7 @@ create_preconfigured_consumer(const boost::property_tree::wptree&
 {
     configuration config = parse_xml_config(ptree, format_repository);
 
-    return spl::make_shared<decklink_consumer_proxy>(config);
+    return spl::make_shared<decklink_consumer_proxy>(frame_converter, config);
 }
 
 }} // namespace caspar::decklink

From f6a30be997c6eff547b2002ae4174c416a33b83c Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Wed, 27 Dec 2023 17:02:54 +0000
Subject: [PATCH 20/50] wip: boilerplate for frame conversion

---
 src/accelerator/CMakeLists.txt                |  2 +
 src/accelerator/ogl/image/frame_converter.cpp | 50 +++++++++++-
 src/accelerator/ogl/image/frame_converter.h   |  3 +
 .../ogl/image/shader_from_rgba.comp           | 15 ++++
 src/accelerator/ogl/util/device.cpp           | 77 ++++++++++++++++---
 src/accelerator/ogl/util/device.h             |  7 ++
 src/core/frame/frame_factory.h                | 17 +++-
 .../decklink/consumer/decklink_consumer.cpp   | 21 ++---
 8 files changed, 167 insertions(+), 25 deletions(-)
 create mode 100644 src/accelerator/ogl/image/shader_from_rgba.comp

diff --git a/src/accelerator/CMakeLists.txt b/src/accelerator/CMakeLists.txt
index ee2eda3b2e..41aae2bf3f 100644
--- a/src/accelerator/CMakeLists.txt
+++ b/src/accelerator/CMakeLists.txt
@@ -30,6 +30,7 @@ set(HEADERS
 	ogl_image_vertex.h
 	ogl_image_fragment.h
 	ogl_image_to_rgba.h
+	ogl_image_from_rgba.h
 
 	accelerator.h
 	StdAfx.h
@@ -38,6 +39,7 @@ set(HEADERS
 bin2c("ogl/image/shader.vert" "ogl_image_vertex.h" "caspar::accelerator::ogl" "vertex_shader")
 bin2c("ogl/image/shader.frag" "ogl_image_fragment.h" "caspar::accelerator::ogl" "fragment_shader")
 bin2c("ogl/image/shader_to_rgba.comp" "ogl_image_to_rgba.h" "caspar::accelerator::ogl" "compute_to_rgba_shader")
+bin2c("ogl/image/shader_from_rgba.comp" "ogl_image_from_rgba.h" "caspar::accelerator::ogl" "compute_from_rgba_shader")
 
 casparcg_add_library(accelerator SOURCES ${SOURCES} ${HEADERS})
 target_include_directories(accelerator PRIVATE
diff --git a/src/accelerator/ogl/image/frame_converter.cpp b/src/accelerator/ogl/image/frame_converter.cpp
index 1baf619413..cbe614a641 100644
--- a/src/accelerator/ogl/image/frame_converter.cpp
+++ b/src/accelerator/ogl/image/frame_converter.cpp
@@ -19,6 +19,12 @@
  * Author: Julian Waller, julian@superfly.tv
  */
 #include "frame_converter.h"
+#include "../util/texture.h"
+
+#include <core/frame/pixel_format.h>
+
+#include <common/except.h>
+#include <common/future.h>
 
 namespace caspar::accelerator::ogl {
 
@@ -29,7 +35,6 @@ ogl_frame_converter::ogl_frame_converter(const spl::shared_ptr<device>& ogl)
 
 core::mutable_frame ogl_frame_converter::create_frame(const void* tag, const core::pixel_format_desc& desc)
 {
-
     std::vector<array<std::uint8_t>> image_data;
     for (auto& plane : desc.planes) {
         image_data.push_back(ogl_->create_array(plane.size, common::bit_depth::bit16)); // TODO: Depth
@@ -64,4 +69,47 @@ core::draw_frame ogl_frame_converter::convert_frame(const core::mutable_frame& f
     return core::draw_frame{};
 }
 
+std::shared_future<std::vector<array<const std::uint8_t>>>
+ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, const core::encoded_frame_format format)
+{
+    std::vector<array<const std::uint8_t>> buffers;
+    int                                    x_count = 0;
+    int                                    y_count = 0;
+    switch (format) {
+        case core::encoded_frame_format::decklink_v210:
+            auto row_blocks = ((frame.width() + 47) / 48);
+            auto row_bytes  = row_blocks * 128;
+
+            // TODO - result must be 128byte aligned. can that be guaranteed here?
+            buffers.push_back(ogl_->create_array(row_bytes * frame.height(), common::bit_depth::bit8));
+            x_count = row_blocks;
+            y_count = frame.height();
+            break;
+    }
+
+    if (buffers.size() == 0 || x_count == 0 || y_count == 0) {
+        CASPAR_THROW_EXCEPTION(not_supported() << msg_info("Unknown encoded frame format"));
+    }
+
+    std::vector<std::shared_ptr<texture>> textures;
+
+    // TODO - avoid this extra copy
+    auto plane_count = frame.pixel_format_desc().planes.size();
+    for (size_t i = 0; i < plane_count; i++) {
+        auto plane   = frame.pixel_format_desc().planes[i];
+        auto texture = ogl_->copy_async(frame.image_data(i), plane.width, plane.height, plane.size);
+        textures.push_back(texture.get());
+    }
+
+    auto future_conversion =
+        ogl_->convert_from_texture(textures, buffers, frame.width(), frame.height(), x_count, y_count);
+
+    return std::async(std::launch::deferred,
+                      [buffers = std::move(buffers), future_conversion = std::move(future_conversion)]() mutable {
+                          future_conversion.get();
+
+                          return std::move(buffers);
+                      });
+}
+
 } // namespace caspar::accelerator::ogl
\ No newline at end of file
diff --git a/src/accelerator/ogl/image/frame_converter.h b/src/accelerator/ogl/image/frame_converter.h
index 1a9ec1f80b..41daedc772 100644
--- a/src/accelerator/ogl/image/frame_converter.h
+++ b/src/accelerator/ogl/image/frame_converter.h
@@ -45,6 +45,9 @@ class ogl_frame_converter
 
     core::draw_frame convert_frame(const core::mutable_frame& frame) override;
 
+    std::shared_future<std::vector<array<const std::uint8_t>>>
+    convert_from_rgba(const core::const_frame& frame, const core::encoded_frame_format format) override;
+
   private:
     const spl::shared_ptr<device> ogl_;
 };
diff --git a/src/accelerator/ogl/image/shader_from_rgba.comp b/src/accelerator/ogl/image/shader_from_rgba.comp
new file mode 100644
index 0000000000..b8fe8752e2
--- /dev/null
+++ b/src/accelerator/ogl/image/shader_from_rgba.comp
@@ -0,0 +1,15 @@
+#version 430
+
+layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(rgba32f, binding = 0) uniform image2D imgOutput; // Note: this needs to match what it is writing to
+
+void main() {
+    vec4 value = vec4(0.0, 0.0, 0.0, 1.0);
+    ivec2 texelCoord = ivec2(gl_GlobalInvocationID.xy);
+
+    value.r = float(texelCoord.x)/(gl_NumWorkGroups.x);
+    value.g = float(texelCoord.y)/(gl_NumWorkGroups.y);
+    
+    imageStore(imgOutput, texelCoord, value);
+}
\ No newline at end of file
diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp
index 68f0bed0a1..0a317706d7 100644
--- a/src/accelerator/ogl/util/device.cpp
+++ b/src/accelerator/ogl/util/device.cpp
@@ -24,7 +24,6 @@
 #include "compute_shader.h"
 #include "texture.h"
 
-
 #include <common/array.h>
 #include <common/assert.h>
 #include <common/env.h>
@@ -42,9 +41,9 @@
 
 #include <boost/asio/deadline_timer.hpp>
 #include <boost/asio/dispatch.hpp>
-#include <memory>
 #include <boost/asio/spawn.hpp>
 #include <boost/property_tree/ptree.hpp>
+#include <memory>
 
 #include <tbb/concurrent_queue.h>
 #include <tbb/concurrent_unordered_map.h>
@@ -53,6 +52,7 @@
 #include <future>
 #include <thread>
 
+#include "ogl_image_from_rgba.h"
 #include "ogl_image_to_rgba.h"
 
 namespace caspar { namespace accelerator { namespace ogl {
@@ -73,7 +73,8 @@ struct device::impl : public std::enable_shared_from_this<impl>
 
     sync_queue_t sync_queue_;
 
-    std::unique_ptr<compute_shader> compute_shader_;
+    std::unique_ptr<compute_shader> compute_to_rgba_;
+    std::unique_ptr<compute_shader> compute_from_rgba_;
 
     GLuint fbo_;
 
@@ -119,7 +120,6 @@ struct device::impl : public std::enable_shared_from_this<impl>
         GL(glCreateFramebuffers(1, &fbo_));
         GL(glBindFramebuffer(GL_FRAMEBUFFER, fbo_));
 
-
         device_.setActive(false);
 
         thread_ = std::thread([&] {
@@ -231,8 +231,6 @@ struct device::impl : public std::enable_shared_from_this<impl>
         return array<uint8_t>(ptr, buf->size(), buf, depth);
     }
 
-
-
     std::future<std::shared_ptr<texture>>
     copy_async(const array<const uint8_t>& source, int width, int height, int stride)
     {
@@ -300,15 +298,14 @@ struct device::impl : public std::enable_shared_from_this<impl>
     convert_frame(const std::vector<array<const uint8_t>>& sources, int width, int height, int width_samples)
     {
         return dispatch_async([=] {
-
-            if (!compute_shader_)
-            compute_shader_ = std::make_unique<compute_shader>(std::string(compute_to_rgba_shader));
+            if (!compute_to_rgba_)
+                compute_to_rgba_ = std::make_unique<compute_shader>(std::string(compute_to_rgba_shader));
 
             auto tex = create_texture(width, height, 4, common::bit_depth::bit16, false);
 
             glBindImageTexture(0, tex->id(), 0, GL_FALSE, 0, GL_READ_WRITE, GL_RGBA32F);
 
-            compute_shader_->use();
+            compute_to_rgba_->use();
 
             glDispatchCompute((unsigned int)width_samples, (unsigned int)height, 1);
 
@@ -319,6 +316,57 @@ struct device::impl : public std::enable_shared_from_this<impl>
         });
     }
 
+    std::future<void> convert_from_texture(const std::vector<std::shared_ptr<texture>>& textures,
+                                           const std::vector<array<const uint8_t>>&     buffers,
+                                           int                                          width,
+                                           int                                          height,
+                                           int                                          x_count,
+                                           int                                          y_count)
+    {
+        return spawn_async([=](yield_context yield) {
+            if (!compute_from_rgba_)
+                compute_from_rgba_ = std::make_unique<compute_shader>(std::string(compute_from_rgba_shader));
+
+            // TODO: This probably only needs to handle one texture
+            for (size_t i = 0; i < textures.size(); i++) {
+                auto& tex = textures[i];
+                glBindImageTexture(i, tex->id(), 0, GL_FALSE, 0, GL_READ_WRITE, GL_RGBA32F);
+            }
+
+            for (size_t i = 0; i < buffers.size(); i++) {
+                auto& source = buffers[i];
+                auto  tmp    = source.storage<std::shared_ptr<buffer>>();
+                if (!tmp) {
+                    CASPAR_THROW_EXCEPTION(caspar_exception() << msg_info("Buffer is not gpu backed"));
+                }
+
+                glBindBufferBase(GL_SHADER_STORAGE_BUFFER, i, tmp->get()->id());
+            }
+
+            compute_to_rgba_->use();
+
+            glDispatchCompute((unsigned int)x_count, (unsigned int)y_count, 1);
+
+            auto fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
+
+            GL(glFlush());
+
+            deadline_timer timer(service_);
+            for (auto n = 0; true; ++n) {
+                // TODO (perf) Smarter non-polling solution?
+                timer.expires_from_now(boost::posix_time::milliseconds(2));
+                timer.async_wait(yield);
+
+                auto wait = glClientWaitSync(fence, 0, 1);
+                if (wait == GL_ALREADY_SIGNALED || wait == GL_CONDITION_SATISFIED) {
+                    break;
+                }
+            }
+
+            glDeleteSync(fence);
+        });
+    }
+
 #ifdef WIN32
     std::future<std::shared_ptr<texture>> copy_async(GLuint source, int width, int height, int stride)
     {
@@ -483,6 +531,15 @@ device::convert_frame(const std::vector<array<const uint8_t>>& sources, int widt
 {
     return impl_->convert_frame(sources, width, height, format);
 }
+std::future<void> device::convert_from_texture(const std::vector<std::shared_ptr<texture>>& textures,
+                                               const std::vector<array<const uint8_t>>&     buffers,
+                                               int                                          width,
+                                               int                                          height,
+                                               int                                          x_count,
+                                               int                                          y_count)
+{
+    return impl_->convert_from_texture(textures, buffers, width, height, x_count, y_count);
+}
 void         device::dispatch(std::function<void()> func) { boost::asio::dispatch(impl_->service_, std::move(func)); }
 std::wstring device::version() const { return impl_->version(); }
 boost::property_tree::wptree device::info() const { return impl_->info(); }
diff --git a/src/accelerator/ogl/util/device.h b/src/accelerator/ogl/util/device.h
index 91e470e370..7e245f73b5 100644
--- a/src/accelerator/ogl/util/device.h
+++ b/src/accelerator/ogl/util/device.h
@@ -56,6 +56,13 @@ class device final
     std::future<std::shared_ptr<texture>>
     convert_frame(const std::vector<array<const uint8_t>>& sources, int width, int height, int format);
 
+    std::future<void> convert_from_texture(const std::vector<std::shared_ptr<texture>>& textures,
+                                           const std::vector<array<const uint8_t>>&     buffers,
+                                           int                                          width,
+                                           int                                          height,
+                                           int                                          x_count,
+                                           int                                          y_count);
+
     template <typename Func>
     auto dispatch_async(Func&& func)
     {
diff --git a/src/core/frame/frame_factory.h b/src/core/frame/frame_factory.h
index 099be0a832..48192135d1 100644
--- a/src/core/frame/frame_factory.h
+++ b/src/core/frame/frame_factory.h
@@ -23,13 +23,21 @@
 
 #include <common/bit_depth.h>
 
+#include <future>
+
 namespace caspar { namespace core {
 
-class frame_converter {
+enum encoded_frame_format
+{
+    decklink_v210 = 0,
+};
+
+class frame_converter
+{
   public:
-    frame_converter()                                = default;
+    frame_converter()                                  = default;
     frame_converter& operator=(const frame_converter&) = delete;
-    virtual ~frame_converter()                       = default;
+    virtual ~frame_converter()                         = default;
 
     frame_converter(const frame_converter&) = delete;
 
@@ -37,6 +45,8 @@ class frame_converter {
 
     virtual class draw_frame convert_frame(const class mutable_frame& frame) = 0;
 
+    virtual std::shared_future<std::vector<array<const std::uint8_t>>>
+    convert_from_rgba(const core::const_frame& frame, const encoded_frame_format format) = 0;
 };
 
 class frame_factory
@@ -53,5 +63,4 @@ class frame_factory
     virtual spl::shared_ptr<frame_converter> create_frame_converter() = 0;
 };
 
-
 }} // namespace caspar::core
diff --git a/src/modules/decklink/consumer/decklink_consumer.cpp b/src/modules/decklink/consumer/decklink_consumer.cpp
index 6a200f94e7..d10c98a390 100644
--- a/src/modules/decklink/consumer/decklink_consumer.cpp
+++ b/src/modules/decklink/consumer/decklink_consumer.cpp
@@ -749,16 +749,17 @@ struct decklink_consumer final : public IDeckLinkVideoOutputCallback
                         schedule_next_audio(std::move(audio_data), nb_samples);
                     }
                 } else {
-                    // Send frame to secondary ports
-                    auto& context = secondary_port_contexts_[i];
-                    context->schedule_frame(frame1, video_display_time);
-                    if (isInterlaced) {
-                        context->schedule_frame(frame2, video_display_time);
-                    }
-
-                    if (config_.embedded_audio) {
-                        // TODO - audio for secondaries?
-                    }
+                    // TODO - reimplement this
+                    // // Send frame to secondary ports
+                    // auto& context = secondary_port_contexts_[i];
+                    // context->schedule_frame(frame1, video_display_time);
+                    // if (isInterlaced) {
+                    //     context->schedule_frame(frame2, video_display_time);
+                    // }
+
+                    // if (config_.embedded_audio) {
+                    //     // TODO - audio for secondaries?
+                    // }
                 }
             });
 

From 111e8f7fa7dd413a07646c82191b95f527569979 Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Fri, 29 Dec 2023 14:45:28 +0000
Subject: [PATCH 21/50] wip: broke

---
 src/accelerator/ogl/image/frame_converter.cpp |   8 +-
 .../ogl/image/shader_from_rgba.comp           |   7 +-
 src/accelerator/ogl/image/shader_to_rgba.comp |   2 +-
 src/accelerator/ogl/util/device.cpp           |  12 +-
 src/common/memshfl.h                          |   8 +-
 .../decklink/consumer/decklink_consumer.cpp   | 127 +++++++++++++-----
 src/modules/decklink/consumer/frame.cpp       |   4 +-
 7 files changed, 114 insertions(+), 54 deletions(-)

diff --git a/src/accelerator/ogl/image/frame_converter.cpp b/src/accelerator/ogl/image/frame_converter.cpp
index cbe614a641..6c58dc4af3 100644
--- a/src/accelerator/ogl/image/frame_converter.cpp
+++ b/src/accelerator/ogl/image/frame_converter.cpp
@@ -94,10 +94,10 @@ ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, const cor
     std::vector<std::shared_ptr<texture>> textures;
 
     // TODO - avoid this extra copy
-    auto plane_count = frame.pixel_format_desc().planes.size();
-    for (size_t i = 0; i < plane_count; i++) {
-        auto plane   = frame.pixel_format_desc().planes[i];
-        auto texture = ogl_->copy_async(frame.image_data(i), plane.width, plane.height, plane.size);
+    size_t i = 0;
+    for (auto& plane : frame.pixel_format_desc().planes) {
+        // TODO - this is failing. is the buffer going the wrong direction causing it to fail?
+        auto texture = ogl_->copy_async(frame.image_data(i++), plane.width, plane.height, plane.stride);
         textures.push_back(texture.get());
     }
 
diff --git a/src/accelerator/ogl/image/shader_from_rgba.comp b/src/accelerator/ogl/image/shader_from_rgba.comp
index b8fe8752e2..d17e3eff0a 100644
--- a/src/accelerator/ogl/image/shader_from_rgba.comp
+++ b/src/accelerator/ogl/image/shader_from_rgba.comp
@@ -2,7 +2,12 @@
 
 layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
 
-layout(rgba32f, binding = 0) uniform image2D imgOutput; // Note: this needs to match what it is writing to
+layout(rgba16f, binding = 0) uniform image2D imgInput; // Note: this needs to match what it is writing to
+
+layout(std430, binding = 1) buffer bufferOutput
+{
+    uint8 data[];
+};
 
 void main() {
     vec4 value = vec4(0.0, 0.0, 0.0, 1.0);
diff --git a/src/accelerator/ogl/image/shader_to_rgba.comp b/src/accelerator/ogl/image/shader_to_rgba.comp
index 1964edadbf..4bc5fba7f3 100644
--- a/src/accelerator/ogl/image/shader_to_rgba.comp
+++ b/src/accelerator/ogl/image/shader_to_rgba.comp
@@ -2,7 +2,7 @@
 
 layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
 
-layout(rgba32f, binding = 0) uniform image2D imgOutput; // Note: this needs to match what it is writing to
+layout(rgba16f, binding = 0) uniform image2D imgOutput; // Note: this needs to match what it is writing to
 
 void main() {
     vec4 value = vec4(0.0, 0.0, 0.0, 1.0);
diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp
index 0a317706d7..cd4e2a0f67 100644
--- a/src/accelerator/ogl/util/device.cpp
+++ b/src/accelerator/ogl/util/device.cpp
@@ -303,7 +303,7 @@ struct device::impl : public std::enable_shared_from_this<impl>
 
             auto tex = create_texture(width, height, 4, common::bit_depth::bit16, false);
 
-            glBindImageTexture(0, tex->id(), 0, GL_FALSE, 0, GL_READ_WRITE, GL_RGBA32F);
+            glBindImageTexture(0, tex->id(), 0, GL_FALSE, 0, GL_READ_WRITE, GL_RGBA16F);
 
             compute_to_rgba_->use();
 
@@ -328,10 +328,10 @@ struct device::impl : public std::enable_shared_from_this<impl>
                 compute_from_rgba_ = std::make_unique<compute_shader>(std::string(compute_from_rgba_shader));
 
             // TODO: This probably only needs to handle one texture
-            for (size_t i = 0; i < textures.size(); i++) {
-                auto& tex = textures[i];
-                glBindImageTexture(i, tex->id(), 0, GL_FALSE, 0, GL_READ_WRITE, GL_RGBA32F);
-            }
+            // for (size_t i = 0; i < textures.size(); i++) {
+            auto& tex = textures[0];
+            glBindImageTexture(0, tex->id(), 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA16F);
+            // }
 
             for (size_t i = 0; i < buffers.size(); i++) {
                 auto& source = buffers[i];
@@ -340,7 +340,7 @@ struct device::impl : public std::enable_shared_from_this<impl>
                     CASPAR_THROW_EXCEPTION(caspar_exception() << msg_info("Buffer is not gpu backed"));
                 }
 
-                glBindBufferBase(GL_SHADER_STORAGE_BUFFER, i, tmp->get()->id());
+                glBindBufferBase(GL_SHADER_STORAGE_BUFFER, i + 1, tmp->get()->id());
             }
 
             compute_to_rgba_->use();
diff --git a/src/common/memshfl.h b/src/common/memshfl.h
index 3947e9867f..2bc3b4f10c 100644
--- a/src/common/memshfl.h
+++ b/src/common/memshfl.h
@@ -35,14 +35,14 @@
 namespace caspar {
 
 #ifdef _MSC_VER
-static std::shared_ptr<void> create_aligned_buffer(size_t size)
+static std::shared_ptr<void> create_aligned_buffer(size_t size, size_t alignment)
 {
-    return std::shared_ptr<void>(_aligned_malloc(size, 64), _aligned_free);
+    return std::shared_ptr<void>(_aligned_malloc(size, alignment), _aligned_free);
 }
 #else
-static std::shared_ptr<void> create_aligned_buffer(size_t size)
+static std::shared_ptr<void> create_aligned_buffer(size_t size, size_t alignment)
 {
-    return std::shared_ptr<void>(aligned_alloc(64, size), free);
+    return std::shared_ptr<void>(aligned_alloc(alignment, size), free);
 }
 #endif
 
diff --git a/src/modules/decklink/consumer/decklink_consumer.cpp b/src/modules/decklink/consumer/decklink_consumer.cpp
index d10c98a390..9a158cc0ed 100644
--- a/src/modules/decklink/consumer/decklink_consumer.cpp
+++ b/src/modules/decklink/consumer/decklink_consumer.cpp
@@ -34,6 +34,7 @@
 #include <core/consumer/frame_consumer.h>
 #include <core/diagnostics/call_context.h>
 #include <core/frame/frame.h>
+#include <core/frame/frame_factory.h>
 #include <core/mixer/audio/audio_mixer.h>
 #include <core/video_format.h>
 
@@ -53,6 +54,7 @@
 #include <future>
 #include <memory>
 #include <mutex>
+#include <optional>
 #include <queue>
 #include <thread>
 #include <utility>
@@ -193,13 +195,18 @@ core::video_format_desc get_decklink_format(const port_configuration&      confi
 class decklink_frame : public IDeckLinkVideoFrame
 {
     core::video_format_desc format_desc_;
+    BMDPixelFormat          pixel_format_;
     std::shared_ptr<void>   data_;
     std::atomic<int>        ref_count_{0};
     int                     nb_samples_;
 
   public:
-    decklink_frame(std::shared_ptr<void> data, core::video_format_desc format_desc, int nb_samples)
+    decklink_frame(std::shared_ptr<void>   data,
+                   BMDPixelFormat          pixel_format,
+                   core::video_format_desc format_desc,
+                   int                     nb_samples)
         : format_desc_(std::move(format_desc))
+        , pixel_format_(pixel_format)
         , data_(std::move(data))
         , nb_samples_(nb_samples)
     {
@@ -224,10 +231,21 @@ class decklink_frame : public IDeckLinkVideoFrame
 
     // IDecklinkVideoFrame
 
-    long STDMETHODCALLTYPE           GetWidth() override { return static_cast<long>(format_desc_.width); }
-    long STDMETHODCALLTYPE           GetHeight() override { return static_cast<long>(format_desc_.height); }
-    long STDMETHODCALLTYPE           GetRowBytes() override { return static_cast<long>(format_desc_.width) * 4; }
-    BMDPixelFormat STDMETHODCALLTYPE GetPixelFormat() override { return bmdFormat8BitBGRA; }
+    long STDMETHODCALLTYPE GetWidth() override { return static_cast<long>(format_desc_.width); }
+    long STDMETHODCALLTYPE GetHeight() override { return static_cast<long>(format_desc_.height); }
+    long STDMETHODCALLTYPE GetRowBytes() override
+    {
+        switch (pixel_format_) {
+            case bmdFormat8BitARGB:
+            case bmdFormat8BitBGRA:
+                return static_cast<long>(format_desc_.width) * 4;
+            case bmdFormat10BitYUV:
+                return ((static_cast<long>(format_desc_.width) + 47) / 48) * 128;
+            default:
+                return 0;
+        }
+    }
+    BMDPixelFormat STDMETHODCALLTYPE GetPixelFormat() override { return pixel_format_; }
     BMDFrameFlags STDMETHODCALLTYPE  GetFlags() override { return bmdFrameFlagDefault; }
 
     HRESULT STDMETHODCALLTYPE GetBytes(void** buffer) override
@@ -369,13 +387,16 @@ struct decklink_secondary_port final : public IDeckLinkVideoOutputCallback
         auto image_data = convert_frame_for_port(
             channel_format_desc_, decklink_format_desc_, output_config_, frame1, frame2, mode_->GetFieldDominance());
 
-        schedule_next_video(image_data, 0, display_time);
+        schedule_next_video(image_data, bmdFormat8BitBGRA, 0, display_time);
     }
 
-    void schedule_next_video(std::shared_ptr<void> image_data, int nb_samples, BMDTimeValue display_time)
+    void schedule_next_video(std::shared_ptr<void> image_data,
+                             BMDPixelFormat        pixel_format,
+                             int                   nb_samples,
+                             BMDTimeValue          display_time)
     {
         auto packed_frame = wrap_raw<com_ptr, IDeckLinkVideoFrame>(
-            new decklink_frame(std::move(image_data), decklink_format_desc_, nb_samples));
+            new decklink_frame(std::move(image_data), pixel_format, decklink_format_desc_, nb_samples));
         if (FAILED(output_->ScheduleVideoFrame(get_raw(packed_frame),
                                                display_time,
                                                decklink_format_desc_.duration,
@@ -401,6 +422,18 @@ struct decklink_secondary_port final : public IDeckLinkVideoOutputCallback
     }
 };
 
+struct converted_frame
+{
+    core::const_frame                                          raw_frame;
+    std::shared_future<std::vector<array<const std::uint8_t>>> frame;
+
+    converted_frame(core::const_frame raw_frame, std::shared_future<std::vector<array<const std::uint8_t>>> frame)
+        : raw_frame(raw_frame)
+        , frame(frame)
+    {
+    }
+};
+
 struct decklink_consumer final : public IDeckLinkVideoOutputCallback
 {
     const spl::shared_ptr<core::frame_converter> frame_converter_;
@@ -421,10 +454,10 @@ struct decklink_consumer final : public IDeckLinkVideoOutputCallback
     const core::video_format_desc channel_format_desc_;
     const core::video_format_desc decklink_format_desc_;
 
-    std::mutex                    buffer_mutex_;
-    std::condition_variable       buffer_cond_;
-    std::queue<core::const_frame> buffer_;
-    int                           buffer_capacity_ = channel_format_desc_.field_count;
+    std::mutex                  buffer_mutex_;
+    std::condition_variable     buffer_cond_;
+    std::queue<converted_frame> buffer_;
+    int                         buffer_capacity_ = channel_format_desc_.field_count;
 
     const int buffer_size_ = config_.buffer_depth(); // Minimum buffer-size 3.
 
@@ -446,7 +479,10 @@ struct decklink_consumer final : public IDeckLinkVideoOutputCallback
     std::atomic<bool> abort_request_{false};
 
   public:
-    decklink_consumer(const spl::shared_ptr<core::frame_converter>& frame_converter, const configuration& config, core::video_format_desc channel_format_desc, int channel_index)
+    decklink_consumer(const spl::shared_ptr<core::frame_converter>& frame_converter,
+                      const configuration&                          config,
+                      core::video_format_desc                       channel_format_desc,
+                      int                                           channel_index)
         : frame_converter_(frame_converter)
         , channel_index_(channel_index)
         , config_(config)
@@ -520,11 +556,11 @@ struct decklink_consumer final : public IDeckLinkVideoOutputCallback
                                     nb_samples);
             }
 
-            std::shared_ptr<void> image_data = create_aligned_buffer(decklink_format_desc_.size);
+            std::shared_ptr<void> image_data = create_aligned_buffer(decklink_format_desc_.size, 128);
 
-            schedule_next_video(image_data, nb_samples, video_scheduled_);
+            schedule_next_video(image_data, bmdFormat8BitBGRA, nb_samples, video_scheduled_);
             for (auto& context : secondary_port_contexts_) {
-                context->schedule_next_video(image_data, 0, video_scheduled_);
+                context->schedule_next_video(image_data, bmdFormat8BitBGRA, 0, video_scheduled_);
             }
 
             video_scheduled_ += decklink_format_desc_.duration;
@@ -707,8 +743,8 @@ struct decklink_consumer final : public IDeckLinkVideoOutputCallback
                 }
             }
 
-            core::const_frame frame1 = pop();
-            core::const_frame frame2;
+            std::optional<converted_frame> frame1 = pop();
+            std::optional<converted_frame> frame2;
 
             bool isInterlaced = mode_->GetFieldDominance() != bmdProgressiveFrame;
             if (mode_->GetFieldDominance() != bmdProgressiveFrame) {
@@ -719,14 +755,22 @@ struct decklink_consumer final : public IDeckLinkVideoOutputCallback
             if (abort_request_)
                 return E_FAIL;
 
+            // Skip if frames are missing
+            if (!frame1.has_value() || (isInterlaced && !frame2.has_value()))
+                return S_OK;
+
             BMDTimeValue video_display_time = video_scheduled_;
             video_scheduled_ += decklink_format_desc_.duration;
 
             std::vector<std::int32_t> audio_data;
             if (config_.embedded_audio) {
-                audio_data.insert(audio_data.end(), frame1.audio_data().begin(), frame1.audio_data().end());
+                audio_data.insert(audio_data.end(),
+                                  frame1.value().raw_frame.audio_data().begin(),
+                                  frame1.value().raw_frame.audio_data().end());
                 if (isInterlaced) {
-                    audio_data.insert(audio_data.end(), frame2.audio_data().begin(), frame2.audio_data().end());
+                    audio_data.insert(audio_data.end(),
+                                      frame2.value().raw_frame.audio_data().begin(),
+                                      frame2.value().raw_frame.audio_data().end());
                 }
             }
             // TODO: is this reliable?
@@ -736,14 +780,19 @@ struct decklink_consumer final : public IDeckLinkVideoOutputCallback
             tbb::parallel_for(-1, static_cast<int>(secondary_port_contexts_.size()), [&](int i) {
                 if (i == -1) {
                     // Primary port
-                    std::shared_ptr<void> image_data = convert_frame_for_port(channel_format_desc_,
-                                                                              decklink_format_desc_,
-                                                                              config_.primary,
-                                                                              frame1,
-                                                                              frame2,
-                                                                              mode_->GetFieldDominance());
+                    // std::shared_ptr<void> image_data = convert_frame_for_port(channel_format_desc_,
+                    //                                                           decklink_format_desc_,
+                    //                                                           config_.primary,
+                    //                                                           frame1,
+                    //                                                           frame2,
+                    //                                                           mode_->GetFieldDominance());
 
-                    schedule_next_video(image_data, nb_samples, video_display_time);
+                    auto buffers = frame1.value().frame.get();
+
+                    std::shared_ptr<void> image_data = create_aligned_buffer(decklink_format_desc_.size, 128);
+                    std::memcpy(image_data.get(), buffers.at(0).data(), buffers.at(0).size());
+
+                    schedule_next_video(image_data, bmdFormat10BitYUV, nb_samples, video_display_time);
 
                     if (config_.embedded_audio) {
                         schedule_next_audio(std::move(audio_data), nb_samples);
@@ -772,9 +821,9 @@ struct decklink_consumer final : public IDeckLinkVideoOutputCallback
         return S_OK;
     }
 
-    core::const_frame pop()
+    std::optional<converted_frame> pop()
     {
-        core::const_frame frame;
+        std::optional<converted_frame> frame;
         {
             std::unique_lock<std::mutex> lock(buffer_mutex_);
             buffer_cond_.wait(lock, [&] { return !buffer_.empty() || abort_request_; });
@@ -804,10 +853,13 @@ struct decklink_consumer final : public IDeckLinkVideoOutputCallback
         audio_scheduled_ += nb_samples; // TODO - what if there are too many/few samples in this frame?
     }
 
-    void schedule_next_video(std::shared_ptr<void> image_data, int nb_samples, BMDTimeValue display_time)
+    void schedule_next_video(std::shared_ptr<void> image_data,
+                             BMDPixelFormat        pixel_format,
+                             int                   nb_samples,
+                             BMDTimeValue          display_time)
     {
         auto fill_frame = wrap_raw<com_ptr, IDeckLinkVideoFrame>(
-            new decklink_frame(std::move(image_data), decklink_format_desc_, nb_samples));
+            new decklink_frame(std::move(image_data), pixel_format, decklink_format_desc_, nb_samples));
         if (FAILED(output_->ScheduleVideoFrame(
                 get_raw(fill_frame), display_time, decklink_format_desc_.duration, decklink_format_desc_.time_scale))) {
             CASPAR_LOG(error) << print() << L" Failed to schedule primary video.";
@@ -824,12 +876,14 @@ struct decklink_consumer final : public IDeckLinkVideoOutputCallback
         }
 
         if (frame) {
+            auto frame_future = frame_converter_->convert_from_rgba(frame, core::encoded_frame_format::decklink_v210);
+
             std::unique_lock<std::mutex> lock(buffer_mutex_);
             if (field != core::video_field::b) {
                 // Always push a field2, as we have supplied field1
                 buffer_cond_.wait(lock, [&] { return buffer_.size() < buffer_capacity_ || abort_request_; });
             }
-            buffer_.push(std::move(frame));
+            buffer_.push(converted_frame(std::move(frame), frame_future));
         }
         buffer_cond_.notify_all();
 
@@ -861,7 +915,8 @@ struct decklink_consumer_proxy : public core::frame_consumer
     executor                           executor_;
 
   public:
-    explicit decklink_consumer_proxy(const spl::shared_ptr<core::frame_converter>& frame_converter, const configuration& config)
+    explicit decklink_consumer_proxy(const spl::shared_ptr<core::frame_converter>& frame_converter,
+                                     const configuration&                          config)
         : frame_converter_(frame_converter)
         , config_(config)
         , executor_(L"decklink_consumer[" + std::to_wstring(config.primary.device_index) + L"]")
@@ -906,8 +961,8 @@ struct decklink_consumer_proxy : public core::frame_consumer
     [[nodiscard]] core::monitor::state state() const override { return get_state_for_config(config_, format_desc_); }
 };
 
-spl::shared_ptr<core::frame_consumer> create_consumer(const std::vector<std::wstring>&     params,
-                                                      const core::video_format_repository& format_repository,
+spl::shared_ptr<core::frame_consumer> create_consumer(const std::vector<std::wstring>&              params,
+                                                      const core::video_format_repository&          format_repository,
                                                       const spl::shared_ptr<core::frame_converter>& frame_converter,
                                                       const std::vector<spl::shared_ptr<core::video_channel>>& channels)
 {
@@ -923,7 +978,7 @@ spl::shared_ptr<core::frame_consumer> create_consumer(const std::vector<std::wst
 spl::shared_ptr<core::frame_consumer>
 create_preconfigured_consumer(const boost::property_tree::wptree&                      ptree,
                               const core::video_format_repository&                     format_repository,
-                              const spl::shared_ptr<core::frame_converter>& frame_converter,
+                              const spl::shared_ptr<core::frame_converter>&            frame_converter,
                               const std::vector<spl::shared_ptr<core::video_channel>>& channels)
 {
     configuration config = parse_xml_config(ptree, format_repository);
diff --git a/src/modules/decklink/consumer/frame.cpp b/src/modules/decklink/consumer/frame.cpp
index 57d55e6640..730885e705 100644
--- a/src/modules/decklink/consumer/frame.cpp
+++ b/src/modules/decklink/consumer/frame.cpp
@@ -31,7 +31,7 @@ namespace caspar { namespace decklink {
 
 std::shared_ptr<void> convert_to_key_only(const std::shared_ptr<void>& image_data, std::size_t byte_count)
 {
-    auto key_data = create_aligned_buffer(byte_count);
+    auto key_data = create_aligned_buffer(byte_count, 64);
 
     aligned_memshfl(key_data.get(), image_data.get(), byte_count, 0x0F0F0F0F, 0x0B0B0B0B, 0x07070707, 0x03030303);
 
@@ -133,7 +133,7 @@ std::shared_ptr<void> convert_frame_for_port(const core::video_format_desc& chan
                                              const core::const_frame&       frame2,
                                              BMDFieldDominance              field_dominance)
 {
-    std::shared_ptr<void> image_data = create_aligned_buffer(decklink_format_desc.size);
+    std::shared_ptr<void> image_data = create_aligned_buffer(decklink_format_desc.size, 64);
 
     if (field_dominance != bmdProgressiveFrame) {
         convert_frame(channel_format_desc,

From 95e30ec843c4fd0ec0f3c082e0ca32bceae2f55a Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Fri, 29 Dec 2023 15:00:24 +0000
Subject: [PATCH 22/50] fix: remove bit_depth property from `array`

---
 src/accelerator/ogl/image/frame_converter.cpp | 10 +++++-----
 src/accelerator/ogl/image/image_mixer.cpp     |  9 +++++----
 src/accelerator/ogl/util/device.cpp           | 16 +++++++---------
 src/accelerator/ogl/util/device.h             |  4 ++--
 src/common/array.h                            | 17 ++---------------
 src/common/bit_depth.h                        |  4 ++++
 src/core/frame/pixel_format.h                 |  4 ++--
 7 files changed, 27 insertions(+), 37 deletions(-)

diff --git a/src/accelerator/ogl/image/frame_converter.cpp b/src/accelerator/ogl/image/frame_converter.cpp
index 6c58dc4af3..330b947a27 100644
--- a/src/accelerator/ogl/image/frame_converter.cpp
+++ b/src/accelerator/ogl/image/frame_converter.cpp
@@ -37,7 +37,7 @@ core::mutable_frame ogl_frame_converter::create_frame(const void* tag, const cor
 {
     std::vector<array<std::uint8_t>> image_data;
     for (auto& plane : desc.planes) {
-        image_data.push_back(ogl_->create_array(plane.size, common::bit_depth::bit16)); // TODO: Depth
+        image_data.push_back(ogl_->create_array(plane.size));
     }
 
     using future_texture = std::shared_future<std::shared_ptr<texture>>;
@@ -57,7 +57,7 @@ core::mutable_frame ogl_frame_converter::create_frame(const void* tag, const cor
             std::vector<future_texture> textures;
             for (int n = 0; n < static_cast<int>(desc.planes.size()); ++n) {
                 textures.emplace_back(self->ogl_->copy_async(
-                    image_data[n], desc.planes[n].width, desc.planes[n].height, desc.planes[n].stride));
+                    image_data[n], desc.planes[n].width, desc.planes[n].height, desc.planes[n].stride, desc.planes[n].depth));
             }
             return std::make_shared<decltype(textures)>(std::move(textures));
         });
@@ -81,13 +81,13 @@ ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, const cor
             auto row_bytes  = row_blocks * 128;
 
             // TODO - result must be 128byte aligned. can that be guaranteed here?
-            buffers.push_back(ogl_->create_array(row_bytes * frame.height(), common::bit_depth::bit8));
+            buffers.push_back(ogl_->create_array(row_bytes * frame.height()));
             x_count = row_blocks;
             y_count = frame.height();
             break;
     }
 
-    if (buffers.size() == 0 || x_count == 0 || y_count == 0) {
+    if (buffers.empty() || x_count == 0 || y_count == 0) {
         CASPAR_THROW_EXCEPTION(not_supported() << msg_info("Unknown encoded frame format"));
     }
 
@@ -97,7 +97,7 @@ ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, const cor
     size_t i = 0;
     for (auto& plane : frame.pixel_format_desc().planes) {
         // TODO - this is failing. is the buffer going the wrong direction causing it to fail?
-        auto texture = ogl_->copy_async(frame.image_data(i++), plane.width, plane.height, plane.stride);
+        auto texture = ogl_->copy_async(frame.image_data(i++), plane.width, plane.height, plane.stride, plane.depth);
         textures.push_back(texture.get());
     }
 
diff --git a/src/accelerator/ogl/image/image_mixer.cpp b/src/accelerator/ogl/image/image_mixer.cpp
index 35324356c8..ae184ac818 100644
--- a/src/accelerator/ogl/image/image_mixer.cpp
+++ b/src/accelerator/ogl/image/image_mixer.cpp
@@ -90,7 +90,7 @@ class image_renderer
     {
         if (layers.empty()) { // Bypass GPU with empty frame.
             static const std::vector<uint8_t> buffer(max_frame_size_ * 2, 0); // TODO better
-            return make_ready_future(array<const std::uint8_t>(buffer.data(), format_desc.size, true, depth_));
+            return make_ready_future(array<const std::uint8_t>(buffer.data(), format_desc.size, true));
         }
 
         return flatten(ogl_->dispatch_async([=]() mutable -> std::shared_future<array<const std::uint8_t>> {
@@ -289,7 +289,8 @@ struct image_mixer::impl
                 item.textures.emplace_back(ogl_->copy_async(frame.image_data(n),
                                                             item.pix_desc.planes[n].width,
                                                             item.pix_desc.planes[n].height,
-                                                            item.pix_desc.planes[n].stride));
+                                                            item.pix_desc.planes[n].stride,
+                                                            item.pix_desc.planes[n].depth));
             }
         }
 
@@ -326,7 +327,7 @@ struct image_mixer::impl
     {
         std::vector<array<std::uint8_t>> image_data;
         for (auto& plane : desc.planes) {
-            image_data.push_back(ogl_->create_array(plane.size, plane.depth));
+            image_data.push_back(ogl_->create_array(plane.size));
         }
 
         std::weak_ptr<image_mixer::impl> weak_self = shared_from_this();
@@ -356,7 +357,7 @@ struct image_mixer::impl
                         std::vector<future_texture> textures;
                         for (int n = 0; n < static_cast<int>(desc.planes.size()); ++n) {
                             textures.emplace_back(self->ogl_->copy_async(
-                                image_data[n], desc.planes[n].width, desc.planes[n].height, desc.planes[n].stride));
+                                image_data[n], desc.planes[n].width, desc.planes[n].height, desc.planes[n].stride, desc.planes[n].depth));
                         }
                         return std::make_shared<decltype(textures)>(std::move(textures));
                     }
diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp
index cd4e2a0f67..d0ff420655 100644
--- a/src/accelerator/ogl/util/device.cpp
+++ b/src/accelerator/ogl/util/device.cpp
@@ -223,18 +223,16 @@ struct device::impl : public std::enable_shared_from_this<impl>
         });
     }
 
-    array<uint8_t> create_array(int count, common::bit_depth depth)
+    array<uint8_t> create_array(int count)
     {
-        auto bytes_per_pixel = static_cast<int>(depth) + 1;
-        auto buf             = create_buffer(count * bytes_per_pixel, true);
+        auto buf             = create_buffer(count, true);
         auto ptr             = reinterpret_cast<uint8_t*>(buf->data());
-        return array<uint8_t>(ptr, buf->size(), buf, depth);
+        return array<uint8_t>(ptr, buf->size(), buf);
     }
 
     std::future<std::shared_ptr<texture>>
-    copy_async(const array<const uint8_t>& source, int width, int height, int stride)
+    copy_async(const array<const uint8_t>& source, int width, int height, int stride, common::bit_depth depth)
     {
-        auto depth = source.native_depth();
         return dispatch_async([=] {
             std::shared_ptr<buffer> buf;
 
@@ -516,11 +514,11 @@ std::shared_ptr<texture> device::create_texture(int width, int height, int strid
 {
     return impl_->create_texture(width, height, stride, depth, true);
 }
-array<uint8_t> device::create_array(int size, common::bit_depth depth) { return impl_->create_array(size, depth); }
+array<uint8_t> device::create_array(int size) { return impl_->create_array(size); }
 std::future<std::shared_ptr<texture>>
-device::copy_async(const array<const uint8_t>& source, int width, int height, int stride)
+device::copy_async(const array<const uint8_t>& source, int width, int height, int stride, common::bit_depth depth)
 {
-    return impl_->copy_async(source, width, height, stride);
+    return impl_->copy_async(source, width, height, stride, depth);
 }
 std::future<array<const uint8_t>> device::copy_async(const std::shared_ptr<texture>& source)
 {
diff --git a/src/accelerator/ogl/util/device.h b/src/accelerator/ogl/util/device.h
index 7e245f73b5..b001756468 100644
--- a/src/accelerator/ogl/util/device.h
+++ b/src/accelerator/ogl/util/device.h
@@ -47,10 +47,10 @@ class device final
     device& operator=(const device&) = delete;
 
     std::shared_ptr<class texture> create_texture(int width, int height, int stride, common::bit_depth depth);
-    array<uint8_t>                 create_array(int size, common::bit_depth depth);
+    array<uint8_t>                 create_array(int size);
 
     std::future<std::shared_ptr<class texture>>
-                                      copy_async(const array<const uint8_t>& source, int width, int height, int stride);
+                                      copy_async(const array<const uint8_t>& source, int width, int height, int stride, common::bit_depth depth);
     std::future<array<const uint8_t>> copy_async(const std::shared_ptr<class texture>& source);
 
     std::future<std::shared_ptr<texture>>
diff --git a/src/common/array.h b/src/common/array.h
index 97b0d411c2..10f27957c3 100644
--- a/src/common/array.h
+++ b/src/common/array.h
@@ -2,8 +2,6 @@
 
 #include <boost/any.hpp>
 
-#include "bit_depth.h"
-
 #include <cstddef>
 #include <cstdlib>
 #include <memory>
@@ -43,11 +41,10 @@ class array final
     }
 
     template <typename S>
-    explicit array(T* ptr, std::size_t size, S&& storage, common::bit_depth native_depth = common::bit_depth::bit8)
+    explicit array(T* ptr, std::size_t size, S&& storage)
         : ptr_(ptr)
         , size_(size)
         , storage_(std::make_shared<boost::any>(std::forward<S>(storage)))
-        , native_depth_(native_depth)
     {
     }
 
@@ -57,7 +54,6 @@ class array final
         : ptr_(other.ptr_)
         , size_(other.size_)
         , storage_(std::move(other.storage_))
-        , native_depth_(other.native_depth_)
     {
         other.ptr_  = nullptr;
         other.size_ = 0;
@@ -70,7 +66,6 @@ class array final
         ptr_          = std::move(other.ptr_);
         size_         = std::move(other.size_);
         storage_      = std::move(other.storage_);
-        native_depth_ = std::move(other.native_depth_);
 
         return *this;
     }
@@ -79,7 +74,6 @@ class array final
     T*                data() const { return ptr_; }
     T*                end() const { return ptr_ + size_; }
     std::size_t       size() const { return size_; }
-    common::bit_depth native_depth() const { return native_depth_; }
 
     explicit operator bool() const { return size_ > 0; };
 
@@ -92,7 +86,6 @@ class array final
   private:
     T*                          ptr_          = nullptr;
     std::size_t                 size_         = 0;
-    common::bit_depth           native_depth_ = common::bit_depth::bit8;
     std::shared_ptr<boost::any> storage_;
 };
 
@@ -127,12 +120,10 @@ class array<const T> final
     template <typename S>
     explicit array(const T*          ptr,
                    std::size_t       size,
-                   S&&               storage,
-                   common::bit_depth native_depth = common::bit_depth::bit8)
+                   S&&               storage)
         : ptr_(ptr)
         , size_(size)
         , storage_(std::make_shared<boost::any>(std::forward<S>(storage)))
-        , native_depth_(native_depth)
     {
     }
 
@@ -140,7 +131,6 @@ class array<const T> final
         : ptr_(other.ptr_)
         , size_(other.size_)
         , storage_(other.storage_)
-        , native_depth_(other.native_depth_)
     {
     }
 
@@ -148,7 +138,6 @@ class array<const T> final
         : ptr_(other.ptr_)
         , size_(other.size_)
         , storage_(other.storage_)
-        , native_depth_(other.native_depth_)
     {
         other.ptr_     = nullptr;
         other.size_    = 0;
@@ -167,7 +156,6 @@ class array<const T> final
     const T*          data() const { return ptr_; }
     const T*          end() const { return ptr_ + size_; }
     std::size_t       size() const { return size_; }
-    common::bit_depth native_depth() const { return native_depth_; }
 
     explicit operator bool() const { return size_ > 0; }
 
@@ -181,7 +169,6 @@ class array<const T> final
     const T*                    ptr_  = nullptr;
     std::size_t                 size_ = 0;
     std::shared_ptr<boost::any> storage_;
-    common::bit_depth           native_depth_ = common::bit_depth::bit8;
 };
 
 } // namespace caspar
diff --git a/src/common/bit_depth.h b/src/common/bit_depth.h
index 9d1b633f84..45c563e060 100644
--- a/src/common/bit_depth.h
+++ b/src/common/bit_depth.h
@@ -10,4 +10,8 @@ enum class bit_depth : uint8_t
     bit16 = 1,
 };
 
+inline int bytes_per_pixel(bit_depth depth){
+    return static_cast<int>(depth) + 1;
+}
+
 }} // namespace caspar::common
\ No newline at end of file
diff --git a/src/core/frame/pixel_format.h b/src/core/frame/pixel_format.h
index d24fbdfeb4..5cad369a4c 100644
--- a/src/core/frame/pixel_format.h
+++ b/src/core/frame/pixel_format.h
@@ -66,10 +66,10 @@ struct pixel_format_desc final
         }
 
         plane(int width, int height, int stride, common::bit_depth depth)
-                : linesize(width * stride * (static_cast<int>(depth) + 1))
+                : linesize(width * stride * common::bytes_per_pixel(depth))
                 , width(width)
                 , height(height)
-                , size(width * height * stride)
+                , size(width * height * stride * common::bytes_per_pixel(depth))
                 , stride(stride)
                 , depth(depth)
         {

From 6618c4b432c0d3eea14cf1f638faab7b91c3cdcb Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Fri, 29 Dec 2023 15:22:38 +0000
Subject: [PATCH 23/50] wip: hackily expose composited texture inside
 const_frame

---
 src/accelerator/ogl/image/frame_converter.cpp | 12 +++++-----
 src/accelerator/ogl/image/image_mixer.cpp     | 23 +++++++++++--------
 src/accelerator/ogl/image/image_mixer.h       |  2 +-
 src/core/frame/frame.cpp                      | 13 +++++++++--
 src/core/frame/frame.h                        |  4 ++++
 src/core/mixer/image/image_mixer.h            |  9 +++++++-
 src/core/mixer/mixer.cpp                      |  7 +++---
 7 files changed, 48 insertions(+), 22 deletions(-)

diff --git a/src/accelerator/ogl/image/frame_converter.cpp b/src/accelerator/ogl/image/frame_converter.cpp
index 330b947a27..8d8407d4cd 100644
--- a/src/accelerator/ogl/image/frame_converter.cpp
+++ b/src/accelerator/ogl/image/frame_converter.cpp
@@ -93,12 +93,12 @@ ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, const cor
 
     std::vector<std::shared_ptr<texture>> textures;
 
-    // TODO - avoid this extra copy
-    size_t i = 0;
-    for (auto& plane : frame.pixel_format_desc().planes) {
-        // TODO - this is failing. is the buffer going the wrong direction causing it to fail?
-        auto texture = ogl_->copy_async(frame.image_data(i++), plane.width, plane.height, plane.stride, plane.depth);
-        textures.push_back(texture.get());
+    {
+        auto texture_ptr = boost::any_cast<std::shared_ptr<texture>>(frame.opaque());
+        if (!texture_ptr) {
+            CASPAR_THROW_EXCEPTION(not_supported() << msg_info("No texture inside frame!"));
+        }
+        textures.push_back(std::move(texture_ptr));
     }
 
     auto future_conversion =
diff --git a/src/accelerator/ogl/image/image_mixer.cpp b/src/accelerator/ogl/image/image_mixer.cpp
index ae184ac818..d5def0687f 100644
--- a/src/accelerator/ogl/image/image_mixer.cpp
+++ b/src/accelerator/ogl/image/image_mixer.cpp
@@ -85,20 +85,25 @@ class image_renderer
     {
     }
 
-    std::future<array<const std::uint8_t>> operator()(std::vector<layer>             layers,
+    std::future<core::mixed_image> operator()(std::vector<layer>             layers,
                                                       const core::video_format_desc& format_desc)
     {
-        if (layers.empty()) { // Bypass GPU with empty frame.
-            static const std::vector<uint8_t> buffer(max_frame_size_ * 2, 0); // TODO better
-            return make_ready_future(array<const std::uint8_t>(buffer.data(), format_desc.size, true));
-        }
+        // TODO - re-enable
+//        if (layers.empty()) { // Bypass GPU with empty frame.
+//            static const std::vector<uint8_t> buffer(max_frame_size_ * 2, 0); // TODO better
+//            return make_ready_future(array<const std::uint8_t>(buffer.data(), format_desc.size, true));
+//        }
 
-        return flatten(ogl_->dispatch_async([=]() mutable -> std::shared_future<array<const std::uint8_t>> {
+        return flatten(ogl_->dispatch_async([=]() mutable {
             auto target_texture = ogl_->create_texture(format_desc.width, format_desc.height, 4, depth_);
 
             draw(target_texture, std::move(layers), format_desc);
 
-            return ogl_->copy_async(target_texture);
+            auto bytes = ogl_->copy_async(target_texture).share();
+
+            return std::async(std::launch::deferred, [bytes = std::move(bytes), target_texture=std::move(target_texture)]() {
+                return core::mixed_image(bytes.get(), target_texture);
+            });
         }));
     }
 
@@ -303,7 +308,7 @@ struct image_mixer::impl
         layer_stack_.resize(transform_stack_.back().layer_depth);
     }
 
-    std::future<array<const std::uint8_t>> render(const core::video_format_desc& format_desc)
+    std::future<core::mixed_image> render(const core::video_format_desc& format_desc)
     {
         return renderer_(std::move(layers_), format_desc);
     }
@@ -379,7 +384,7 @@ image_mixer::~image_mixer() {}
 void image_mixer::push(const core::frame_transform& transform) { impl_->push(transform); }
 void image_mixer::visit(const core::const_frame& frame) { impl_->visit(frame); }
 void image_mixer::pop() { impl_->pop(); }
-std::future<array<const std::uint8_t>> image_mixer::operator()(const core::video_format_desc& format_desc)
+std::future<core::mixed_image> image_mixer::operator()(const core::video_format_desc& format_desc)
 {
     return impl_->render(format_desc);
 }
diff --git a/src/accelerator/ogl/image/image_mixer.h b/src/accelerator/ogl/image/image_mixer.h
index a29873e352..6fe916f775 100644
--- a/src/accelerator/ogl/image/image_mixer.h
+++ b/src/accelerator/ogl/image/image_mixer.h
@@ -44,7 +44,7 @@ class image_mixer final : public core::image_mixer
 
     image_mixer& operator=(const image_mixer&) = delete;
 
-    std::future<array<const std::uint8_t>> operator()(const core::video_format_desc& format_desc) override;
+    std::future<core::mixed_image> operator()(const core::video_format_desc& format_desc) override;
     core::mutable_frame                    create_frame(const void* tag, const core::pixel_format_desc& desc) override;
 
     spl::shared_ptr<core::frame_converter> create_frame_converter() override;
diff --git a/src/core/frame/frame.cpp b/src/core/frame/frame.cpp
index 4eb5193f18..ff96dce561 100644
--- a/src/core/frame/frame.cpp
+++ b/src/core/frame/frame.cpp
@@ -97,10 +97,12 @@ struct const_frame::impl
 
     impl(std::vector<array<const std::uint8_t>> image_data,
          array<const std::int32_t>              audio_data,
-         const core::pixel_format_desc&         desc)
+         const core::pixel_format_desc&         desc,
+         boost::any opaque)
         : image_data_(std::move(image_data))
         , audio_data_(std::move(audio_data))
         , desc_(desc)
+        , opaque_(opaque)
     {
         if (desc_.planes.size() != image_data_.size()) {
             CASPAR_THROW_EXCEPTION(invalid_argument());
@@ -147,7 +149,14 @@ const_frame::const_frame() {}
 const_frame::const_frame(std::vector<array<const std::uint8_t>> image_data,
                          array<const std::int32_t>              audio_data,
                          const core::pixel_format_desc&         desc)
-    : impl_(new impl(std::move(image_data), std::move(audio_data), desc))
+    : impl_(new impl(std::move(image_data), std::move(audio_data), desc, nullptr))
+{
+}
+const_frame::const_frame(std::vector<array<const std::uint8_t>> image_data,
+    array<const std::int32_t>              audio_data,
+    const struct pixel_format_desc&        desc,
+            boost::any opaque)
+        : impl_(new impl(std::move(image_data), std::move(audio_data), desc, opaque))
 {
 }
 const_frame::const_frame(mutable_frame&& other)
diff --git a/src/core/frame/frame.h b/src/core/frame/frame.h
index e5fc4ef60c..0a5774d48f 100644
--- a/src/core/frame/frame.h
+++ b/src/core/frame/frame.h
@@ -61,6 +61,10 @@ class const_frame final
     explicit const_frame(std::vector<array<const std::uint8_t>> image_data,
                          array<const std::int32_t>              audio_data,
                          const struct pixel_format_desc&        desc);
+    explicit const_frame(std::vector<array<const std::uint8_t>> image_data,
+                         array<const std::int32_t>              audio_data,
+                         const struct pixel_format_desc&        desc,
+                                 boost::any opaque);
     const_frame(const const_frame& other);
     const_frame(mutable_frame&& other);
 
diff --git a/src/core/mixer/image/image_mixer.h b/src/core/mixer/image/image_mixer.h
index 494bd8ad5f..30919913ac 100644
--- a/src/core/mixer/image/image_mixer.h
+++ b/src/core/mixer/image/image_mixer.h
@@ -30,6 +30,13 @@
 
 namespace caspar { namespace core {
 
+struct mixed_image{
+    array<const uint8_t> rgba8;
+    boost::any texture;
+
+    mixed_image(array<const uint8_t> rgba8, boost::any texture):rgba8(rgba8),texture(texture){}
+};
+
 class image_mixer
     : public frame_visitor
     , public frame_factory
@@ -45,7 +52,7 @@ class image_mixer
     void visit(const class const_frame& frame) override     = 0;
     void pop() override                                     = 0;
 
-    virtual std::future<array<const uint8_t>> operator()(const struct video_format_desc& format_desc) = 0;
+    virtual std::future<mixed_image> operator()(const struct video_format_desc& format_desc) = 0;
 
     class mutable_frame create_frame(const void* tag, const struct pixel_format_desc& desc) override = 0;
 
diff --git a/src/core/mixer/mixer.cpp b/src/core/mixer/mixer.cpp
index 1095c2108a..70a95bfc3d 100644
--- a/src/core/mixer/mixer.cpp
+++ b/src/core/mixer/mixer.cpp
@@ -75,11 +75,12 @@ struct mixer::impl
         buffer_.push(std::async(
             std::launch::deferred,
             [image = std::move(image), audio = std::move(audio), graph = graph_, format_desc, tag = this]() mutable {
+                auto image2 = image.get();
                 auto desc = pixel_format_desc(pixel_format::bgra);
-                desc.planes.push_back(pixel_format_desc::plane(format_desc.width, format_desc.height, 4));
+                desc.planes.emplace_back(format_desc.width, format_desc.height, 4);
                 std::vector<array<const uint8_t>> image_data;
-                image_data.emplace_back(std::move(image.get()));
-                return const_frame(std::move(image_data), std::move(audio), desc);
+                image_data.emplace_back(std::move(image2.rgba8));
+                return const_frame(std::move(image_data), std::move(audio), desc, image2.texture);
             }));
 
         if (buffer_.size() <= format_desc.field_count) {

From 1e3b38a6ca6088cc42dd046667c7aff17f313470 Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Fri, 29 Dec 2023 15:44:03 +0000
Subject: [PATCH 24/50] fix

---
 src/accelerator/ogl/util/device.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp
index d0ff420655..b443a18c8f 100644
--- a/src/accelerator/ogl/util/device.cpp
+++ b/src/accelerator/ogl/util/device.cpp
@@ -341,7 +341,7 @@ struct device::impl : public std::enable_shared_from_this<impl>
                 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, i + 1, tmp->get()->id());
             }
 
-            compute_to_rgba_->use();
+            compute_from_rgba_->use();
 
             glDispatchCompute((unsigned int)x_count, (unsigned int)y_count, 1);
 

From 25fad4536a5d10ab13eaa77378a89ad5efb101cd Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Fri, 29 Dec 2023 17:32:46 +0000
Subject: [PATCH 25/50] wip: incorrect conversion, but something semi
 identifiable

---
 src/accelerator/ogl/image/frame_converter.cpp | 25 ++---
 src/accelerator/ogl/image/image_mixer.cpp     |  4 +-
 .../ogl/image/shader_from_rgba.comp           | 97 +++++++++++++++++--
 src/accelerator/ogl/util/device.cpp           | 35 ++++---
 src/accelerator/ogl/util/device.h             | 12 ++-
 src/shell/server.cpp                          |  2 +-
 6 files changed, 135 insertions(+), 40 deletions(-)

diff --git a/src/accelerator/ogl/image/frame_converter.cpp b/src/accelerator/ogl/image/frame_converter.cpp
index 8d8407d4cd..6e5d9277b9 100644
--- a/src/accelerator/ogl/image/frame_converter.cpp
+++ b/src/accelerator/ogl/image/frame_converter.cpp
@@ -75,15 +75,18 @@ ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, const cor
     std::vector<array<const std::uint8_t>> buffers;
     int                                    x_count = 0;
     int                                    y_count = 0;
+    int words_per_line = 0;
+
     switch (format) {
         case core::encoded_frame_format::decklink_v210:
             auto row_blocks = ((frame.width() + 47) / 48);
             auto row_bytes  = row_blocks * 128;
 
             // TODO - result must be 128byte aligned. can that be guaranteed here?
-            buffers.push_back(ogl_->create_array(row_bytes * frame.height()));
-            x_count = row_blocks;
+            buffers.emplace_back(ogl_->create_array(row_bytes * frame.height()));
+            x_count = row_blocks * 8;
             y_count = frame.height();
+            words_per_line = row_blocks * 32;
             break;
     }
 
@@ -91,18 +94,18 @@ ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, const cor
         CASPAR_THROW_EXCEPTION(not_supported() << msg_info("Unknown encoded frame format"));
     }
 
-    std::vector<std::shared_ptr<texture>> textures;
-
-    {
-        auto texture_ptr = boost::any_cast<std::shared_ptr<texture>>(frame.opaque());
-        if (!texture_ptr) {
-            CASPAR_THROW_EXCEPTION(not_supported() << msg_info("No texture inside frame!"));
-        }
-        textures.push_back(std::move(texture_ptr));
+    auto texture_ptr = boost::any_cast<std::shared_ptr<texture>>(frame.opaque());
+    if (!texture_ptr) {
+        CASPAR_THROW_EXCEPTION(not_supported() << msg_info("No texture inside frame!"));
     }
 
+    convert_from_texture_description description{};
+    description.width = frame.width();
+    description.height = frame.height();
+    description.words_per_line = words_per_line;
+
     auto future_conversion =
-        ogl_->convert_from_texture(textures, buffers, frame.width(), frame.height(), x_count, y_count);
+        ogl_->convert_from_texture(texture_ptr, buffers, description, x_count, y_count);
 
     return std::async(std::launch::deferred,
                       [buffers = std::move(buffers), future_conversion = std::move(future_conversion)]() mutable {
diff --git a/src/accelerator/ogl/image/image_mixer.cpp b/src/accelerator/ogl/image/image_mixer.cpp
index d5def0687f..bbd5a4f1d4 100644
--- a/src/accelerator/ogl/image/image_mixer.cpp
+++ b/src/accelerator/ogl/image/image_mixer.cpp
@@ -101,8 +101,8 @@ class image_renderer
 
             auto bytes = ogl_->copy_async(target_texture).share();
 
-            return std::async(std::launch::deferred, [bytes = std::move(bytes), target_texture=std::move(target_texture)]() {
-                return core::mixed_image(bytes.get(), target_texture);
+            return std::async(std::launch::deferred, [bytes = std::move(bytes), target_texture = std::move(target_texture)]() {
+                return core::mixed_image(bytes.get(), std::move(target_texture));
             });
         }));
     }
diff --git a/src/accelerator/ogl/image/shader_from_rgba.comp b/src/accelerator/ogl/image/shader_from_rgba.comp
index d17e3eff0a..749203e993 100644
--- a/src/accelerator/ogl/image/shader_from_rgba.comp
+++ b/src/accelerator/ogl/image/shader_from_rgba.comp
@@ -4,17 +4,100 @@ layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
 
 layout(rgba16f, binding = 0) uniform image2D imgInput; // Note: this needs to match what it is writing to
 
-layout(std430, binding = 1) buffer bufferOutput
+layout(std430, binding = 1) buffer buffer_layout
 {
-    uint8 data[];
+    uint bufferOutput[];
 };
 
+layout(std430, binding = 2) buffer description_layout
+{
+    // This must match convert_from_texture_description in device.h
+    uint frame_width;
+    uint frame_height;
+    uint words_per_line;
+};
+
+vec3 rgba_to_bt709(vec4 pixel) {
+    float KR = 0.2126;
+    float KB = 0.0722;
+    float KG = 1.0 - KR - KB;
+
+    float KRi = 1.0 - KR;
+    float KBi = 1.0 - KB;
+
+    float YRange = 219.0 / 256.0;
+    float CbCrRange = 224.0 / 256.0;
+    float HalfCbCrRange = CbCrRange / 2.0;
+
+    float YOffset = 16.0 / 256.0;
+    float CbCrOffset = 0.5;
+
+    float KRoKBi = KR / KBi;
+    float KGoKBi = KG / KBi;
+    float KBoKRi = KB / KRi;
+    float KGoKRi = KG / KRi;
+
+//    vec3 y_range = vec3(KR * YRange, KG * YRange, KB * YRange);
+//    vec3 y_offset = vec3(YOffset, YOffset, YOffset);
+
+    float y16 = KR * pixel.r + KG * pixel.g + KB * pixel.b;
+    float y = (YOffset + y16 * YRange);
+
+    float cb16 = -KRoKBi * pixel.r - KGoKBi * pixel.g + pixel.b;
+    float cb = (CbCrOffset + cb16 * HalfCbCrRange);
+    cb = 0.5;
+
+    float cr16 = pixel.r - KGoKRi * pixel.g - KBoKRi * pixel.b;
+    float cr = (CbCrOffset + cr16 * HalfCbCrRange);
+    cr = 0.5;
+
+    return vec3(y, cb, cr);
+}
+
+uint to_10bit(float val) {
+    return uint(round(clamp(val, 0, 1) * 1023));
+}
+
+void decklink_v210() {
+    // basic coordinates
+    uint y_offset = gl_GlobalInvocationID.y * words_per_line;
+    uint x_offset = gl_GlobalInvocationID.x * 4; // 4 bytes per op
+    uint offset = y_offset + x_offset;
+
+    uint image_x = gl_GlobalInvocationID.x * 6;
+
+    // sample image
+    vec4 sample0 = imageLoad(imgInput, ivec2(image_x, gl_GlobalInvocationID.y));
+    vec4 sample1 = imageLoad(imgInput, ivec2(image_x+1, gl_GlobalInvocationID.y));
+    vec4 sample2 = imageLoad(imgInput, ivec2(image_x+2, gl_GlobalInvocationID.y));
+    vec4 sample3 = imageLoad(imgInput, ivec2(image_x+3, gl_GlobalInvocationID.y));
+    vec4 sample4 = imageLoad(imgInput, ivec2(image_x+4, gl_GlobalInvocationID.y));
+    vec4 sample5 = imageLoad(imgInput, ivec2(image_x+5, gl_GlobalInvocationID.y));
+    vec3 ycbcr0 = rgba_to_bt709(sample0);
+    vec3 ycbcr1 = rgba_to_bt709(sample1);
+    vec3 ycbcr2 = rgba_to_bt709(sample2);
+    vec3 ycbcr3 = rgba_to_bt709(sample3);
+    vec3 ycbcr4 = rgba_to_bt709(sample4);
+    vec3 ycbcr5 = rgba_to_bt709(sample5);
+
+    // compute words
+//    uint word0 = to_10bit(ycbcr0.t) + (to_10bit(ycbcr0.s) << 10) + (to_10bit(ycbcr0.p) << 20);
+//    uint word1 = to_10bit(ycbcr1.s) + (to_10bit(ycbcr2.t) << 10) + (to_10bit(ycbcr2.s) << 20);
+//    uint word2 = to_10bit(ycbcr2.p) + (to_10bit(ycbcr3.s) << 10) + (to_10bit(ycbcr4.t) << 20);
+//    uint word3 = to_10bit(ycbcr4.s) + (to_10bit(ycbcr4.p) << 10) + (to_10bit(ycbcr5.s) << 20);
+    uint word0 = 512 + (to_10bit(ycbcr0.s) << 10) + (512 << 20);
+    uint word1 = to_10bit(ycbcr1.s) + (512 << 10) + (to_10bit(ycbcr2.s) << 20);
+    uint word2 = 512 + (to_10bit(ycbcr3.s) << 10) + (512 << 20);
+    uint word3 = to_10bit(ycbcr4.s) + (512 << 10) + (to_10bit(ycbcr5.s) << 20);
+
+    bufferOutput[offset+0] = word0;
+    bufferOutput[offset+1] = word1;
+    bufferOutput[offset+2] = word2;
+    bufferOutput[offset+3] = word3;
+}
+
 void main() {
-    vec4 value = vec4(0.0, 0.0, 0.0, 1.0);
     ivec2 texelCoord = ivec2(gl_GlobalInvocationID.xy);
 
-    value.r = float(texelCoord.x)/(gl_NumWorkGroups.x);
-    value.g = float(texelCoord.y)/(gl_NumWorkGroups.y);
-    
-    imageStore(imgOutput, texelCoord, value);
+    decklink_v210();
 }
\ No newline at end of file
diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp
index b443a18c8f..33794db80a 100644
--- a/src/accelerator/ogl/util/device.cpp
+++ b/src/accelerator/ogl/util/device.cpp
@@ -314,10 +314,11 @@ struct device::impl : public std::enable_shared_from_this<impl>
         });
     }
 
-    std::future<void> convert_from_texture(const std::vector<std::shared_ptr<texture>>& textures,
+
+
+    std::future<void> convert_from_texture(const std::shared_ptr<texture>& texture,
                                            const std::vector<array<const uint8_t>>&     buffers,
-                                           int                                          width,
-                                           int                                          height,
+                                           const convert_from_texture_description& description,
                                            int                                          x_count,
                                            int                                          y_count)
     {
@@ -325,21 +326,24 @@ struct device::impl : public std::enable_shared_from_this<impl>
             if (!compute_from_rgba_)
                 compute_from_rgba_ = std::make_unique<compute_shader>(std::string(compute_from_rgba_shader));
 
-            // TODO: This probably only needs to handle one texture
-            // for (size_t i = 0; i < textures.size(); i++) {
-            auto& tex = textures[0];
-            glBindImageTexture(0, tex->id(), 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA16F);
-            // }
+            // single input texture
+            glBindImageTexture(0, texture->id(), 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA16F);
 
-            for (size_t i = 0; i < buffers.size(); i++) {
-                auto& source = buffers[i];
+            // TODO: only a single buffer?
+//            for (size_t i = 0; i < buffers.size(); i++) {
+                auto& source = buffers[0];
                 auto  tmp    = source.storage<std::shared_ptr<buffer>>();
                 if (!tmp) {
                     CASPAR_THROW_EXCEPTION(caspar_exception() << msg_info("Buffer is not gpu backed"));
                 }
 
-                glBindBufferBase(GL_SHADER_STORAGE_BUFFER, i + 1, tmp->get()->id());
-            }
+                glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, tmp->get()->id());
+//            }
+
+            // TODO - binding 2 description
+            auto description_buffer = create_buffer(sizeof(convert_from_texture_description), false);
+            std::memcpy(description_buffer->data(), &description, sizeof (convert_from_texture_description));
+            glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, description_buffer->id());
 
             compute_from_rgba_->use();
 
@@ -529,14 +533,13 @@ device::convert_frame(const std::vector<array<const uint8_t>>& sources, int widt
 {
     return impl_->convert_frame(sources, width, height, format);
 }
-std::future<void> device::convert_from_texture(const std::vector<std::shared_ptr<texture>>& textures,
+std::future<void> device::convert_from_texture(const std::shared_ptr<texture>& texture,
                                                const std::vector<array<const uint8_t>>&     buffers,
-                                               int                                          width,
-                                               int                                          height,
+                                               const convert_from_texture_description& description,
                                                int                                          x_count,
                                                int                                          y_count)
 {
-    return impl_->convert_from_texture(textures, buffers, width, height, x_count, y_count);
+    return impl_->convert_from_texture(texture, buffers, description, x_count, y_count);
 }
 void         device::dispatch(std::function<void()> func) { boost::asio::dispatch(impl_->service_, std::move(func)); }
 std::wstring device::version() const { return impl_->version(); }
diff --git a/src/accelerator/ogl/util/device.h b/src/accelerator/ogl/util/device.h
index b001756468..73d8ddd14c 100644
--- a/src/accelerator/ogl/util/device.h
+++ b/src/accelerator/ogl/util/device.h
@@ -34,6 +34,13 @@
 
 namespace caspar { namespace accelerator { namespace ogl {
 
+// This must match description_layout in shader_from_rgba.comp
+struct convert_from_texture_description {
+    uint32_t width;
+    uint32_t height;
+    uint32_t words_per_line;
+};
+
 class device final
     : public std::enable_shared_from_this<device>
     , public accelerator_device
@@ -56,10 +63,9 @@ class device final
     std::future<std::shared_ptr<texture>>
     convert_frame(const std::vector<array<const uint8_t>>& sources, int width, int height, int format);
 
-    std::future<void> convert_from_texture(const std::vector<std::shared_ptr<texture>>& textures,
+    std::future<void> convert_from_texture(const std::shared_ptr<texture>& texture,
                                            const std::vector<array<const uint8_t>>&     buffers,
-                                           int                                          width,
-                                           int                                          height,
+                                           const convert_from_texture_description& description,
                                            int                                          x_count,
                                            int                                          y_count);
 
diff --git a/src/shell/server.cpp b/src/shell/server.cpp
index f22e0cc9e2..149adbd793 100644
--- a/src/shell/server.cpp
+++ b/src/shell/server.cpp
@@ -264,7 +264,7 @@ struct server::impl
             auto channel =
                 spl::make_shared<video_channel>(channel_id,
                                                 format_desc,
-                                                accelerator_.create_image_mixer(channel_id, common::bit_depth::bit8),
+                                                accelerator_.create_image_mixer(channel_id, common::bit_depth::bit16),
                                                 [channel_id, weak_client](core::monitor::state channel_state) {
                                                     monitor::state state;
                                                     state[""]["channel"][channel_id] = channel_state;

From e7fc480ab77b0e5ddcd605f28ec15f25c4ae384f Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Fri, 29 Dec 2023 17:50:16 +0000
Subject: [PATCH 26/50] fix colour and 8bit texture support

---
 src/accelerator/ogl/image/frame_converter.cpp |  1 +
 .../ogl/image/shader_from_rgba.comp           | 54 ++++++++-----------
 src/accelerator/ogl/util/device.cpp           | 21 ++++++--
 src/accelerator/ogl/util/device.h             |  1 +
 4 files changed, 42 insertions(+), 35 deletions(-)

diff --git a/src/accelerator/ogl/image/frame_converter.cpp b/src/accelerator/ogl/image/frame_converter.cpp
index 6e5d9277b9..f74d1f614f 100644
--- a/src/accelerator/ogl/image/frame_converter.cpp
+++ b/src/accelerator/ogl/image/frame_converter.cpp
@@ -100,6 +100,7 @@ ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, const cor
     }
 
     convert_from_texture_description description{};
+    description.is_16_bit = texture_ptr->depth() == common::bit_depth::bit16;
     description.width = frame.width();
     description.height = frame.height();
     description.words_per_line = words_per_line;
diff --git a/src/accelerator/ogl/image/shader_from_rgba.comp b/src/accelerator/ogl/image/shader_from_rgba.comp
index 749203e993..06ac54c6d3 100644
--- a/src/accelerator/ogl/image/shader_from_rgba.comp
+++ b/src/accelerator/ogl/image/shader_from_rgba.comp
@@ -2,21 +2,31 @@
 
 layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
 
-layout(rgba16f, binding = 0) uniform image2D imgInput; // Note: this needs to match what it is writing to
+layout(rgba16, binding = 0) uniform image2D imgInput16bit; // Note: this needs to match what it is writing to
+layout(rgba8, binding = 1) uniform image2D imgInput8bit; // Note: this needs to match what it is writing to
 
-layout(std430, binding = 1) buffer buffer_layout
+layout(std430, binding = 2) buffer buffer_layout
 {
     uint bufferOutput[];
 };
 
-layout(std430, binding = 2) buffer description_layout
+layout(std430, binding = 3) buffer description_layout
 {
     // This must match convert_from_texture_description in device.h
+    bool is_16_bit;
     uint frame_width;
     uint frame_height;
     uint words_per_line;
 };
 
+vec4 read_pixel(ivec2 coord) {
+    if (is_16_bit){
+        return imageLoad(imgInput16bit, coord);
+    } else {
+        return imageLoad(imgInput8bit, coord);
+    }
+}
+
 vec3 rgba_to_bt709(vec4 pixel) {
     float KR = 0.2126;
     float KB = 0.0722;
@@ -45,11 +55,9 @@ vec3 rgba_to_bt709(vec4 pixel) {
 
     float cb16 = -KRoKBi * pixel.r - KGoKBi * pixel.g + pixel.b;
     float cb = (CbCrOffset + cb16 * HalfCbCrRange);
-    cb = 0.5;
 
     float cr16 = pixel.r - KGoKRi * pixel.g - KBoKRi * pixel.b;
     float cr = (CbCrOffset + cr16 * HalfCbCrRange);
-    cr = 0.5;
 
     return vec3(y, cb, cr);
 }
@@ -63,37 +71,21 @@ void decklink_v210() {
     uint y_offset = gl_GlobalInvocationID.y * words_per_line;
     uint x_offset = gl_GlobalInvocationID.x * 4; // 4 bytes per op
     uint offset = y_offset + x_offset;
-
     uint image_x = gl_GlobalInvocationID.x * 6;
 
     // sample image
-    vec4 sample0 = imageLoad(imgInput, ivec2(image_x, gl_GlobalInvocationID.y));
-    vec4 sample1 = imageLoad(imgInput, ivec2(image_x+1, gl_GlobalInvocationID.y));
-    vec4 sample2 = imageLoad(imgInput, ivec2(image_x+2, gl_GlobalInvocationID.y));
-    vec4 sample3 = imageLoad(imgInput, ivec2(image_x+3, gl_GlobalInvocationID.y));
-    vec4 sample4 = imageLoad(imgInput, ivec2(image_x+4, gl_GlobalInvocationID.y));
-    vec4 sample5 = imageLoad(imgInput, ivec2(image_x+5, gl_GlobalInvocationID.y));
-    vec3 ycbcr0 = rgba_to_bt709(sample0);
-    vec3 ycbcr1 = rgba_to_bt709(sample1);
-    vec3 ycbcr2 = rgba_to_bt709(sample2);
-    vec3 ycbcr3 = rgba_to_bt709(sample3);
-    vec3 ycbcr4 = rgba_to_bt709(sample4);
-    vec3 ycbcr5 = rgba_to_bt709(sample5);
+    vec3 ycbcr0 = rgba_to_bt709(read_pixel(ivec2(image_x + 0, gl_GlobalInvocationID.y)));
+    vec3 ycbcr1 = rgba_to_bt709(read_pixel(ivec2(image_x + 1, gl_GlobalInvocationID.y)));
+    vec3 ycbcr2 = rgba_to_bt709(read_pixel(ivec2(image_x + 2, gl_GlobalInvocationID.y)));
+    vec3 ycbcr3 = rgba_to_bt709(read_pixel(ivec2(image_x + 3, gl_GlobalInvocationID.y)));
+    vec3 ycbcr4 = rgba_to_bt709(read_pixel(ivec2(image_x + 4, gl_GlobalInvocationID.y)));
+    vec3 ycbcr5 = rgba_to_bt709(read_pixel(ivec2(image_x + 5, gl_GlobalInvocationID.y)));
 
     // compute words
-//    uint word0 = to_10bit(ycbcr0.t) + (to_10bit(ycbcr0.s) << 10) + (to_10bit(ycbcr0.p) << 20);
-//    uint word1 = to_10bit(ycbcr1.s) + (to_10bit(ycbcr2.t) << 10) + (to_10bit(ycbcr2.s) << 20);
-//    uint word2 = to_10bit(ycbcr2.p) + (to_10bit(ycbcr3.s) << 10) + (to_10bit(ycbcr4.t) << 20);
-//    uint word3 = to_10bit(ycbcr4.s) + (to_10bit(ycbcr4.p) << 10) + (to_10bit(ycbcr5.s) << 20);
-    uint word0 = 512 + (to_10bit(ycbcr0.s) << 10) + (512 << 20);
-    uint word1 = to_10bit(ycbcr1.s) + (512 << 10) + (to_10bit(ycbcr2.s) << 20);
-    uint word2 = 512 + (to_10bit(ycbcr3.s) << 10) + (512 << 20);
-    uint word3 = to_10bit(ycbcr4.s) + (512 << 10) + (to_10bit(ycbcr5.s) << 20);
-
-    bufferOutput[offset+0] = word0;
-    bufferOutput[offset+1] = word1;
-    bufferOutput[offset+2] = word2;
-    bufferOutput[offset+3] = word3;
+    bufferOutput[offset + 0] = to_10bit(ycbcr0.t) + (to_10bit(ycbcr0.s) << 10) + (to_10bit(ycbcr0.p) << 20);
+    bufferOutput[offset + 1] = to_10bit(ycbcr1.s) + (to_10bit(ycbcr2.t) << 10) + (to_10bit(ycbcr2.s) << 20);
+    bufferOutput[offset + 2] = to_10bit(ycbcr2.p) + (to_10bit(ycbcr3.s) << 10) + (to_10bit(ycbcr4.t) << 20);
+    bufferOutput[offset + 3] = to_10bit(ycbcr4.s) + (to_10bit(ycbcr4.p) << 10) + (to_10bit(ycbcr5.s) << 20);
 }
 
 void main() {
diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp
index 33794db80a..077400658b 100644
--- a/src/accelerator/ogl/util/device.cpp
+++ b/src/accelerator/ogl/util/device.cpp
@@ -327,7 +327,20 @@ struct device::impl : public std::enable_shared_from_this<impl>
                 compute_from_rgba_ = std::make_unique<compute_shader>(std::string(compute_from_rgba_shader));
 
             // single input texture
-            glBindImageTexture(0, texture->id(), 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA16F);
+            GLuint texid_8bit = 0;
+            GLuint texid_16bit = 0;
+
+            switch(texture->depth()) {
+                case common::bit_depth::bit8:
+                    texid_8bit = texture->id();
+                    break;
+                case common::bit_depth::bit16:
+                    texid_16bit = texture->id();
+                    break;
+            }
+
+            GL(glBindImageTexture(0, texid_16bit, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA16));
+            GL(glBindImageTexture(1, texid_8bit, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA8));
 
             // TODO: only a single buffer?
 //            for (size_t i = 0; i < buffers.size(); i++) {
@@ -337,17 +350,17 @@ struct device::impl : public std::enable_shared_from_this<impl>
                     CASPAR_THROW_EXCEPTION(caspar_exception() << msg_info("Buffer is not gpu backed"));
                 }
 
-                glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, tmp->get()->id());
+            GL(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, tmp->get()->id()));
 //            }
 
             // TODO - binding 2 description
             auto description_buffer = create_buffer(sizeof(convert_from_texture_description), false);
             std::memcpy(description_buffer->data(), &description, sizeof (convert_from_texture_description));
-            glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, description_buffer->id());
+            GL(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, description_buffer->id()));
 
             compute_from_rgba_->use();
 
-            glDispatchCompute((unsigned int)x_count, (unsigned int)y_count, 1);
+           GL(glDispatchCompute((unsigned int)x_count, (unsigned int)y_count, 1));
 
             auto fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
 
diff --git a/src/accelerator/ogl/util/device.h b/src/accelerator/ogl/util/device.h
index 73d8ddd14c..f3fdcf79d9 100644
--- a/src/accelerator/ogl/util/device.h
+++ b/src/accelerator/ogl/util/device.h
@@ -36,6 +36,7 @@ namespace caspar { namespace accelerator { namespace ogl {
 
 // This must match description_layout in shader_from_rgba.comp
 struct convert_from_texture_description {
+    bool is_16_bit;
     uint32_t width;
     uint32_t height;
     uint32_t words_per_line;

From 030308ffae96fcdacded12b097f451c5516ef40b Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Fri, 29 Dec 2023 17:56:03 +0000
Subject: [PATCH 27/50] fix: rgba8 download was incorrectly 16bit packed

---
 src/accelerator/ogl/image/image_mixer.cpp | 2 +-
 src/accelerator/ogl/util/device.cpp       | 8 ++++----
 src/accelerator/ogl/util/device.h         | 2 +-
 src/accelerator/ogl/util/texture.cpp      | 7 ++++---
 src/accelerator/ogl/util/texture.h        | 1 +
 5 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/src/accelerator/ogl/image/image_mixer.cpp b/src/accelerator/ogl/image/image_mixer.cpp
index bbd5a4f1d4..223b665730 100644
--- a/src/accelerator/ogl/image/image_mixer.cpp
+++ b/src/accelerator/ogl/image/image_mixer.cpp
@@ -99,7 +99,7 @@ class image_renderer
 
             draw(target_texture, std::move(layers), format_desc);
 
-            auto bytes = ogl_->copy_async(target_texture).share();
+            auto bytes = ogl_->copy_async(target_texture, true).share();
 
             return std::async(std::launch::deferred, [bytes = std::move(bytes), target_texture = std::move(target_texture)]() {
                 return core::mixed_image(bytes.get(), std::move(target_texture));
diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp
index 077400658b..e42e646d5c 100644
--- a/src/accelerator/ogl/util/device.cpp
+++ b/src/accelerator/ogl/util/device.cpp
@@ -252,11 +252,11 @@ struct device::impl : public std::enable_shared_from_this<impl>
         });
     }
 
-    std::future<array<const uint8_t>> copy_async(const std::shared_ptr<texture>& source)
+    std::future<array<const uint8_t>> copy_async(const std::shared_ptr<texture>& source, bool as_rgba8)
     {
         return spawn_async([=](yield_context yield) {
             auto buf = create_buffer(source->size(), false);
-            source->copy_to(*buf);
+            source->copy_to(*buf, as_rgba8? common::bit_depth::bit8 : source->depth());
 
             sync_queue_.push(nullptr);
 
@@ -537,9 +537,9 @@ device::copy_async(const array<const uint8_t>& source, int width, int height, in
 {
     return impl_->copy_async(source, width, height, stride, depth);
 }
-std::future<array<const uint8_t>> device::copy_async(const std::shared_ptr<texture>& source)
+std::future<array<const uint8_t>> device::copy_async(const std::shared_ptr<texture>& source, bool as_rgba8)
 {
-    return impl_->copy_async(source);
+    return impl_->copy_async(source, as_rgba8);
 }
 std::future<std::shared_ptr<texture>>
 device::convert_frame(const std::vector<array<const uint8_t>>& sources, int width, int height, int format)
diff --git a/src/accelerator/ogl/util/device.h b/src/accelerator/ogl/util/device.h
index f3fdcf79d9..e457bdc626 100644
--- a/src/accelerator/ogl/util/device.h
+++ b/src/accelerator/ogl/util/device.h
@@ -59,7 +59,7 @@ class device final
 
     std::future<std::shared_ptr<class texture>>
                                       copy_async(const array<const uint8_t>& source, int width, int height, int stride, common::bit_depth depth);
-    std::future<array<const uint8_t>> copy_async(const std::shared_ptr<class texture>& source);
+    std::future<array<const uint8_t>> copy_async(const std::shared_ptr<class texture>& source, bool as_rgba8);
 
     std::future<std::shared_ptr<texture>>
     convert_frame(const std::vector<array<const uint8_t>>& sources, int width, int height, int format);
diff --git a/src/accelerator/ogl/util/texture.cpp b/src/accelerator/ogl/util/texture.cpp
index 478fc9b4f3..a430ac05fc 100644
--- a/src/accelerator/ogl/util/texture.cpp
+++ b/src/accelerator/ogl/util/texture.cpp
@@ -109,10 +109,10 @@ struct texture::impl
         src.unbind();
     }
 
-    void copy_to(buffer& dst)
+    void copy_to(buffer& dst, common::bit_depth depth)
     {
         dst.bind();
-        GL(glGetTextureImage(id_, 0, FORMAT[stride_], TYPE[static_cast<int>(depth_)][stride_], size_, nullptr));
+        GL(glGetTextureImage(id_, 0, FORMAT[stride_], TYPE[static_cast<int>(depth)][stride_], size_, nullptr));
         dst.unbind();
     }
 };
@@ -139,7 +139,8 @@ void texture::clear() { impl_->clear(); }
 void texture::copy_from(int source) { impl_->copy_from(source); }
 #endif
 void texture::copy_from(buffer& source) { impl_->copy_from(source); }
-void texture::copy_to(buffer& dest) { impl_->copy_to(dest); }
+void texture::copy_to(class buffer& dest, common::bit_depth depth) { impl_->copy_to(dest, depth); }
+void texture::copy_to(buffer& dest) { impl_->copy_to(dest, impl_->depth_); }
 int  texture::width() const { return impl_->width_; }
 int  texture::height() const { return impl_->height_; }
 int  texture::stride() const { return impl_->stride_; }
diff --git a/src/accelerator/ogl/util/texture.h b/src/accelerator/ogl/util/texture.h
index ff2c117f73..b5e7d05237 100644
--- a/src/accelerator/ogl/util/texture.h
+++ b/src/accelerator/ogl/util/texture.h
@@ -41,6 +41,7 @@ class texture final
     void copy_from(int source);
 #endif
     void copy_from(class buffer& source);
+    void copy_to(class buffer& dest, common::bit_depth depth);
     void copy_to(class buffer& dest);
 
     void attach();

From d6704df224ff2632b3d924a70c3a89b223f5db9a Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Fri, 29 Dec 2023 17:56:56 +0000
Subject: [PATCH 28/50] fix: remove unused windows only gl code

---
 src/accelerator/ogl/util/device.cpp  | 31 ----------------------------
 src/accelerator/ogl/util/texture.cpp | 11 ----------
 src/accelerator/ogl/util/texture.h   |  3 ---
 3 files changed, 45 deletions(-)

diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp
index e42e646d5c..bf2821a7f8 100644
--- a/src/accelerator/ogl/util/device.cpp
+++ b/src/accelerator/ogl/util/device.cpp
@@ -382,37 +382,6 @@ struct device::impl : public std::enable_shared_from_this<impl>
         });
     }
 
-#ifdef WIN32
-    std::future<std::shared_ptr<texture>> copy_async(GLuint source, int width, int height, int stride)
-    {
-        return spawn_async([=](yield_context yield) {
-            auto tex = create_texture(width, height, stride, false);
-
-            tex->copy_from(source);
-
-            auto fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
-
-            GL(glFlush());
-
-            deadline_timer timer(service_);
-            for (auto n = 0; true; ++n) {
-                // TODO (perf) Smarter non-polling solution?
-                timer.expires_from_now(boost::posix_time::milliseconds(2));
-                timer.async_wait(yield);
-
-                auto wait = glClientWaitSync(fence, 0, 1);
-                if (wait == GL_ALREADY_SIGNALED || wait == GL_CONDITION_SATISFIED) {
-                    break;
-                }
-            }
-
-            glDeleteSync(fence);
-
-            return tex;
-        });
-    }
-#endif
-
     boost::property_tree::wptree info() const
     {
         boost::property_tree::wptree info;
diff --git a/src/accelerator/ogl/util/texture.cpp b/src/accelerator/ogl/util/texture.cpp
index a430ac05fc..4c9e8f0342 100644
--- a/src/accelerator/ogl/util/texture.cpp
+++ b/src/accelerator/ogl/util/texture.cpp
@@ -85,14 +85,6 @@ struct texture::impl
 
     void clear() { GL(glClearTexImage(id_, 0, FORMAT[stride_], TYPE[static_cast<int>(depth_)][stride_], nullptr)); }
 
-#ifdef WIN32
-    void copy_from(int texture_id)
-    {
-        GL(glCopyImageSubData(
-            texture_id, GL_TEXTURE_2D, 0, 0, 0, 0, id_, GL_TEXTURE_2D, 0, 0, 0, 0, width_, height_, 1));
-    }
-#endif
-
     void copy_from(buffer& src)
     {
         src.bind();
@@ -135,9 +127,6 @@ void texture::bind(int index) { impl_->bind(index); }
 void texture::unbind() { impl_->unbind(); }
 void texture::attach() { impl_->attach(); }
 void texture::clear() { impl_->clear(); }
-#ifdef WIN32
-void texture::copy_from(int source) { impl_->copy_from(source); }
-#endif
 void texture::copy_from(buffer& source) { impl_->copy_from(source); }
 void texture::copy_to(class buffer& dest, common::bit_depth depth) { impl_->copy_to(dest, depth); }
 void texture::copy_to(buffer& dest) { impl_->copy_to(dest, impl_->depth_); }
diff --git a/src/accelerator/ogl/util/texture.h b/src/accelerator/ogl/util/texture.h
index b5e7d05237..da1122a8ee 100644
--- a/src/accelerator/ogl/util/texture.h
+++ b/src/accelerator/ogl/util/texture.h
@@ -37,9 +37,6 @@ class texture final
     texture& operator=(const texture&) = delete;
     texture& operator=(texture&& other);
 
-#ifdef WIN32
-    void copy_from(int source);
-#endif
     void copy_from(class buffer& source);
     void copy_to(class buffer& dest, common::bit_depth depth);
     void copy_to(class buffer& dest);

From 54e42bce13b6f3470b12d64c96539e93295a87f9 Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Fri, 29 Dec 2023 17:57:52 +0000
Subject: [PATCH 29/50] wip: interleave shader and remove clamp

---
 src/accelerator/ogl/image/shader_from_rgba.comp | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/accelerator/ogl/image/shader_from_rgba.comp b/src/accelerator/ogl/image/shader_from_rgba.comp
index 06ac54c6d3..3a629ab396 100644
--- a/src/accelerator/ogl/image/shader_from_rgba.comp
+++ b/src/accelerator/ogl/image/shader_from_rgba.comp
@@ -63,7 +63,7 @@ vec3 rgba_to_bt709(vec4 pixel) {
 }
 
 uint to_10bit(float val) {
-    return uint(round(clamp(val, 0, 1) * 1023));
+    return uint(round(val * 1023));
 }
 
 void decklink_v210() {
@@ -73,18 +73,19 @@ void decklink_v210() {
     uint offset = y_offset + x_offset;
     uint image_x = gl_GlobalInvocationID.x * 6;
 
-    // sample image
+    // sample image and compute words
     vec3 ycbcr0 = rgba_to_bt709(read_pixel(ivec2(image_x + 0, gl_GlobalInvocationID.y)));
+    bufferOutput[offset + 0] = to_10bit(ycbcr0.t) + (to_10bit(ycbcr0.s) << 10) + (to_10bit(ycbcr0.p) << 20);
+
     vec3 ycbcr1 = rgba_to_bt709(read_pixel(ivec2(image_x + 1, gl_GlobalInvocationID.y)));
     vec3 ycbcr2 = rgba_to_bt709(read_pixel(ivec2(image_x + 2, gl_GlobalInvocationID.y)));
+    bufferOutput[offset + 1] = to_10bit(ycbcr1.s) + (to_10bit(ycbcr2.t) << 10) + (to_10bit(ycbcr2.s) << 20);
+
     vec3 ycbcr3 = rgba_to_bt709(read_pixel(ivec2(image_x + 3, gl_GlobalInvocationID.y)));
     vec3 ycbcr4 = rgba_to_bt709(read_pixel(ivec2(image_x + 4, gl_GlobalInvocationID.y)));
-    vec3 ycbcr5 = rgba_to_bt709(read_pixel(ivec2(image_x + 5, gl_GlobalInvocationID.y)));
-
-    // compute words
-    bufferOutput[offset + 0] = to_10bit(ycbcr0.t) + (to_10bit(ycbcr0.s) << 10) + (to_10bit(ycbcr0.p) << 20);
-    bufferOutput[offset + 1] = to_10bit(ycbcr1.s) + (to_10bit(ycbcr2.t) << 10) + (to_10bit(ycbcr2.s) << 20);
     bufferOutput[offset + 2] = to_10bit(ycbcr2.p) + (to_10bit(ycbcr3.s) << 10) + (to_10bit(ycbcr4.t) << 20);
+
+    vec3 ycbcr5 = rgba_to_bt709(read_pixel(ivec2(image_x + 5, gl_GlobalInvocationID.y)));
     bufferOutput[offset + 3] = to_10bit(ycbcr4.s) + (to_10bit(ycbcr4.p) << 10) + (to_10bit(ycbcr5.s) << 20);
 }
 

From 61559754f49cafd0803472e550f1c0834aee5bf7 Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Sat, 30 Dec 2023 15:22:35 +0000
Subject: [PATCH 30/50] chore: remove some dead code

---
 src/accelerator/ogl/image/image_mixer.cpp | 39 +++--------------------
 src/accelerator/ogl/util/device.cpp       | 10 ++----
 src/accelerator/ogl/util/device.h         |  3 --
 3 files changed, 8 insertions(+), 44 deletions(-)

diff --git a/src/accelerator/ogl/image/image_mixer.cpp b/src/accelerator/ogl/image/image_mixer.cpp
index 223b665730..5990e423cb 100644
--- a/src/accelerator/ogl/image/image_mixer.cpp
+++ b/src/accelerator/ogl/image/image_mixer.cpp
@@ -313,21 +313,6 @@ struct image_mixer::impl
         return renderer_(std::move(layers_), format_desc);
     }
 
-    std::vector<future_texture> convert_frame(const std::vector<array<const std::uint8_t>>& image_data,
-                                              const core::pixel_format_desc&                desc) const
-    {
-        const auto& plane0 = desc.planes[0]; // TODO - this doesnt feel safe, or accurate
-
-        // TODO - desc is no longer 'correct' and should probably be changed to avoid the mixer shader being aware of these formats
-
-        std::vector<future_texture> textures;
-
-        textures.emplace_back(ogl_->convert_frame(
-                image_data, plane0.width, plane0.height, plane0.width / 2)); // TODO - what is this 'format' parameter?
-
-        return textures;
-    }
-
     core::mutable_frame create_frame(const void* tag, const core::pixel_format_desc& desc) override
     {
         std::vector<array<std::uint8_t>> image_data;
@@ -347,26 +332,12 @@ struct image_mixer::impl
                     return boost::any{};
                 }
 
-                switch (desc.format) {
-//                    case core::pixel_format::ycbcr10_420:
-//                    case core::pixel_format::ycbcr10_422:
-//                    case core::pixel_format::ycbcr10_444:
-//                    case core::pixel_format::ycbcra10_420:
-//                    case core::pixel_format::ycbcra10_422:
-//                    case core::pixel_format::ycbcra10_444: {
-//                        std::vector<future_texture> textures = self->convert_frame(image_data, desc);
-//
-//                        return std::make_shared<decltype(textures)>(std::move(textures));
-//                    }
-                    default: {
-                        std::vector<future_texture> textures;
-                        for (int n = 0; n < static_cast<int>(desc.planes.size()); ++n) {
-                            textures.emplace_back(self->ogl_->copy_async(
-                                image_data[n], desc.planes[n].width, desc.planes[n].height, desc.planes[n].stride, desc.planes[n].depth));
-                        }
-                        return std::make_shared<decltype(textures)>(std::move(textures));
-                    }
+                std::vector<future_texture> textures;
+                for (int n = 0; n < static_cast<int>(desc.planes.size()); ++n) {
+                    textures.emplace_back(self->ogl_->copy_async(
+                        image_data[n], desc.planes[n].width, desc.planes[n].height, desc.planes[n].stride, desc.planes[n].depth));
                 }
+                return std::make_shared<decltype(textures)>(std::move(textures));
             });
     }
 
diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp
index bf2821a7f8..fbddce7a5e 100644
--- a/src/accelerator/ogl/util/device.cpp
+++ b/src/accelerator/ogl/util/device.cpp
@@ -292,6 +292,8 @@ struct device::impl : public std::enable_shared_from_this<impl>
         });
     }
 
+    /*
+     * In its current form, this is not useful/complete. But it will be needed in some form for a producer 'soon'
     std::future<std::shared_ptr<texture>>
     convert_frame(const std::vector<array<const uint8_t>>& sources, int width, int height, int width_samples)
     {
@@ -313,8 +315,7 @@ struct device::impl : public std::enable_shared_from_this<impl>
             return tex;
         });
     }
-
-
+    */
 
     std::future<void> convert_from_texture(const std::shared_ptr<texture>& texture,
                                            const std::vector<array<const uint8_t>>&     buffers,
@@ -510,11 +511,6 @@ std::future<array<const uint8_t>> device::copy_async(const std::shared_ptr<textu
 {
     return impl_->copy_async(source, as_rgba8);
 }
-std::future<std::shared_ptr<texture>>
-device::convert_frame(const std::vector<array<const uint8_t>>& sources, int width, int height, int format)
-{
-    return impl_->convert_frame(sources, width, height, format);
-}
 std::future<void> device::convert_from_texture(const std::shared_ptr<texture>& texture,
                                                const std::vector<array<const uint8_t>>&     buffers,
                                                const convert_from_texture_description& description,
diff --git a/src/accelerator/ogl/util/device.h b/src/accelerator/ogl/util/device.h
index e457bdc626..060c8d3c15 100644
--- a/src/accelerator/ogl/util/device.h
+++ b/src/accelerator/ogl/util/device.h
@@ -61,9 +61,6 @@ class device final
                                       copy_async(const array<const uint8_t>& source, int width, int height, int stride, common::bit_depth depth);
     std::future<array<const uint8_t>> copy_async(const std::shared_ptr<class texture>& source, bool as_rgba8);
 
-    std::future<std::shared_ptr<texture>>
-    convert_frame(const std::vector<array<const uint8_t>>& sources, int width, int height, int format);
-
     std::future<void> convert_from_texture(const std::shared_ptr<texture>& texture,
                                            const std::vector<array<const uint8_t>>&     buffers,
                                            const convert_from_texture_description& description,

From a60827e0e6e297cfa53baea79ca7b9447508aaca Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Sat, 30 Dec 2023 15:23:00 +0000
Subject: [PATCH 31/50] chore: format

---
 src/accelerator/accelerator.cpp               |  3 +-
 src/accelerator/ogl/image/frame_converter.cpp | 57 +++++++--------
 src/accelerator/ogl/image/image_mixer.cpp     | 69 ++++++++++---------
 src/accelerator/ogl/image/image_mixer.h       |  7 +-
 src/accelerator/ogl/util/compute_shader.cpp   |  4 +-
 src/accelerator/ogl/util/device.cpp           | 48 ++++++-------
 src/accelerator/ogl/util/device.h             | 17 ++---
 src/accelerator/ogl/util/texture.cpp          | 27 ++++----
 src/common/array.h                            | 30 ++++----
 src/common/bit_depth.h                        |  4 +-
 src/core/consumer/frame_consumer.cpp          |  8 +--
 src/core/consumer/frame_consumer.h            | 12 ++--
 src/core/frame/frame.cpp                      | 10 +--
 src/core/frame/frame.h                        |  2 +-
 src/core/frame/pixel_format.h                 | 24 +++----
 src/core/mixer/image/image_mixer.h            | 11 ++-
 src/core/mixer/mixer.cpp                      |  2 +-
 src/core/video_channel.cpp                    |  7 +-
 src/core/video_channel.h                      |  2 +-
 .../artnet/consumer/artnet_consumer.cpp       |  2 +-
 src/modules/artnet/consumer/artnet_consumer.h |  2 +-
 .../bluefish/consumer/bluefish_consumer.cpp   |  6 +-
 .../bluefish/consumer/bluefish_consumer.h     |  4 +-
 .../decklink/consumer/decklink_consumer.h     |  4 +-
 .../ffmpeg/consumer/ffmpeg_consumer.cpp       |  6 +-
 src/modules/ffmpeg/consumer/ffmpeg_consumer.h |  4 +-
 src/modules/ffmpeg/util/av_util.cpp           |  2 +-
 src/modules/image/consumer/image_consumer.cpp |  4 +-
 src/modules/image/consumer/image_consumer.h   |  2 +-
 .../newtek/consumer/newtek_ndi_consumer.cpp   |  4 +-
 .../newtek/consumer/newtek_ndi_consumer.h     |  4 +-
 src/modules/oal/consumer/oal_consumer.cpp     |  6 +-
 src/modules/oal/consumer/oal_consumer.h       |  4 +-
 .../screen/consumer/screen_consumer.cpp       |  6 +-
 src/modules/screen/consumer/screen_consumer.h |  4 +-
 src/protocol/amcp/AMCPCommandsImpl.cpp        | 18 +++--
 src/shell/server.cpp                          |  8 ++-
 37 files changed, 229 insertions(+), 205 deletions(-)

diff --git a/src/accelerator/accelerator.cpp b/src/accelerator/accelerator.cpp
index fb1017091d..c679a7a932 100644
--- a/src/accelerator/accelerator.cpp
+++ b/src/accelerator/accelerator.cpp
@@ -27,7 +27,8 @@ struct accelerator::impl
 
     std::unique_ptr<core::image_mixer> create_image_mixer(int channel_id, common::bit_depth depth)
     {
-        return std::make_unique<ogl::image_mixer>(spl::make_shared_ptr(get_device()), channel_id, depth, format_repository_.get_max_video_format_size());
+        return std::make_unique<ogl::image_mixer>(
+            spl::make_shared_ptr(get_device()), channel_id, depth, format_repository_.get_max_video_format_size());
     }
 
     std::shared_ptr<ogl::device> get_device()
diff --git a/src/accelerator/ogl/image/frame_converter.cpp b/src/accelerator/ogl/image/frame_converter.cpp
index f74d1f614f..9a56fcd983 100644
--- a/src/accelerator/ogl/image/frame_converter.cpp
+++ b/src/accelerator/ogl/image/frame_converter.cpp
@@ -43,24 +43,26 @@ core::mutable_frame ogl_frame_converter::create_frame(const void* tag, const cor
     using future_texture = std::shared_future<std::shared_ptr<texture>>;
 
     std::weak_ptr<ogl_frame_converter> weak_self = shared_from_this();
-    return core::mutable_frame(
-        tag,
-        std::move(image_data),
-        array<int32_t>{},
-        desc,
-        [weak_self, desc](std::vector<array<const std::uint8_t>> image_data) -> boost::any {
-            // TODO - replace this
-            auto self = weak_self.lock();
-            if (!self) {
-                return boost::any{};
-            }
-            std::vector<future_texture> textures;
-            for (int n = 0; n < static_cast<int>(desc.planes.size()); ++n) {
-                textures.emplace_back(self->ogl_->copy_async(
-                    image_data[n], desc.planes[n].width, desc.planes[n].height, desc.planes[n].stride, desc.planes[n].depth));
-            }
-            return std::make_shared<decltype(textures)>(std::move(textures));
-        });
+    return core::mutable_frame(tag,
+                               std::move(image_data),
+                               array<int32_t>{},
+                               desc,
+                               [weak_self, desc](std::vector<array<const std::uint8_t>> image_data) -> boost::any {
+                                   // TODO - replace this
+                                   auto self = weak_self.lock();
+                                   if (!self) {
+                                       return boost::any{};
+                                   }
+                                   std::vector<future_texture> textures;
+                                   for (int n = 0; n < static_cast<int>(desc.planes.size()); ++n) {
+                                       textures.emplace_back(self->ogl_->copy_async(image_data[n],
+                                                                                    desc.planes[n].width,
+                                                                                    desc.planes[n].height,
+                                                                                    desc.planes[n].stride,
+                                                                                    desc.planes[n].depth));
+                                   }
+                                   return std::make_shared<decltype(textures)>(std::move(textures));
+                               });
 }
 
 core::draw_frame ogl_frame_converter::convert_frame(const core::mutable_frame& frame)
@@ -73,9 +75,9 @@ std::shared_future<std::vector<array<const std::uint8_t>>>
 ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, const core::encoded_frame_format format)
 {
     std::vector<array<const std::uint8_t>> buffers;
-    int                                    x_count = 0;
-    int                                    y_count = 0;
-    int words_per_line = 0;
+    int                                    x_count        = 0;
+    int                                    y_count        = 0;
+    int                                    words_per_line = 0;
 
     switch (format) {
         case core::encoded_frame_format::decklink_v210:
@@ -84,8 +86,8 @@ ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, const cor
 
             // TODO - result must be 128byte aligned. can that be guaranteed here?
             buffers.emplace_back(ogl_->create_array(row_bytes * frame.height()));
-            x_count = row_blocks * 8;
-            y_count = frame.height();
+            x_count        = row_blocks * 8;
+            y_count        = frame.height();
             words_per_line = row_blocks * 32;
             break;
     }
@@ -100,13 +102,12 @@ ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, const cor
     }
 
     convert_from_texture_description description{};
-    description.is_16_bit = texture_ptr->depth() == common::bit_depth::bit16;
-    description.width = frame.width();
-    description.height = frame.height();
+    description.is_16_bit      = texture_ptr->depth() == common::bit_depth::bit16;
+    description.width          = frame.width();
+    description.height         = frame.height();
     description.words_per_line = words_per_line;
 
-    auto future_conversion =
-        ogl_->convert_from_texture(texture_ptr, buffers, description, x_count, y_count);
+    auto future_conversion = ogl_->convert_from_texture(texture_ptr, buffers, description, x_count, y_count);
 
     return std::async(std::launch::deferred,
                       [buffers = std::move(buffers), future_conversion = std::move(future_conversion)]() mutable {
diff --git a/src/accelerator/ogl/image/image_mixer.cpp b/src/accelerator/ogl/image/image_mixer.cpp
index 5990e423cb..87025fe18c 100644
--- a/src/accelerator/ogl/image/image_mixer.cpp
+++ b/src/accelerator/ogl/image/image_mixer.cpp
@@ -85,14 +85,13 @@ class image_renderer
     {
     }
 
-    std::future<core::mixed_image> operator()(std::vector<layer>             layers,
-                                                      const core::video_format_desc& format_desc)
+    std::future<core::mixed_image> operator()(std::vector<layer> layers, const core::video_format_desc& format_desc)
     {
         // TODO - re-enable
-//        if (layers.empty()) { // Bypass GPU with empty frame.
-//            static const std::vector<uint8_t> buffer(max_frame_size_ * 2, 0); // TODO better
-//            return make_ready_future(array<const std::uint8_t>(buffer.data(), format_desc.size, true));
-//        }
+        //        if (layers.empty()) { // Bypass GPU with empty frame.
+        //            static const std::vector<uint8_t> buffer(max_frame_size_ * 2, 0); // TODO better
+        //            return make_ready_future(array<const std::uint8_t>(buffer.data(), format_desc.size, true));
+        //        }
 
         return flatten(ogl_->dispatch_async([=]() mutable {
             auto target_texture = ogl_->create_texture(format_desc.width, format_desc.height, 4, depth_);
@@ -101,9 +100,10 @@ class image_renderer
 
             auto bytes = ogl_->copy_async(target_texture, true).share();
 
-            return std::async(std::launch::deferred, [bytes = std::move(bytes), target_texture = std::move(target_texture)]() {
-                return core::mixed_image(bytes.get(), std::move(target_texture));
-            });
+            return std::async(std::launch::deferred,
+                              [bytes = std::move(bytes), target_texture = std::move(target_texture)]() {
+                                  return core::mixed_image(bytes.get(), std::move(target_texture));
+                              });
         }));
     }
 
@@ -321,24 +321,26 @@ struct image_mixer::impl
         }
 
         std::weak_ptr<image_mixer::impl> weak_self = shared_from_this();
-        return core::mutable_frame(
-            tag,
-            std::move(image_data),
-            array<int32_t>{},
-            desc,
-            [weak_self, desc](std::vector<array<const std::uint8_t>> image_data) -> boost::any {
-                auto self = weak_self.lock();
-                if (!self) {
-                    return boost::any{};
-                }
-
-                std::vector<future_texture> textures;
-                for (int n = 0; n < static_cast<int>(desc.planes.size()); ++n) {
-                    textures.emplace_back(self->ogl_->copy_async(
-                        image_data[n], desc.planes[n].width, desc.planes[n].height, desc.planes[n].stride, desc.planes[n].depth));
-                }
-                return std::make_shared<decltype(textures)>(std::move(textures));
-            });
+        return core::mutable_frame(tag,
+                                   std::move(image_data),
+                                   array<int32_t>{},
+                                   desc,
+                                   [weak_self, desc](std::vector<array<const std::uint8_t>> image_data) -> boost::any {
+                                       auto self = weak_self.lock();
+                                       if (!self) {
+                                           return boost::any{};
+                                       }
+
+                                       std::vector<future_texture> textures;
+                                       for (int n = 0; n < static_cast<int>(desc.planes.size()); ++n) {
+                                           textures.emplace_back(self->ogl_->copy_async(image_data[n],
+                                                                                        desc.planes[n].width,
+                                                                                        desc.planes[n].height,
+                                                                                        desc.planes[n].stride,
+                                                                                        desc.planes[n].depth));
+                                       }
+                                       return std::make_shared<decltype(textures)>(std::move(textures));
+                                   });
     }
 
     spl::shared_ptr<core::frame_converter> create_frame_converter() override
@@ -347,14 +349,17 @@ struct image_mixer::impl
     }
 };
 
-image_mixer::image_mixer(const spl::shared_ptr<device>& ogl, int channel_id, common::bit_depth depth, const size_t max_frame_size)
-    : impl_(std::make_unique<impl>(ogl, channel_id, depth,max_frame_size))
+image_mixer::image_mixer(const spl::shared_ptr<device>& ogl,
+                         int                            channel_id,
+                         common::bit_depth              depth,
+                         const size_t                   max_frame_size)
+    : impl_(std::make_unique<impl>(ogl, channel_id, depth, max_frame_size))
 {
 }
 image_mixer::~image_mixer() {}
-void image_mixer::push(const core::frame_transform& transform) { impl_->push(transform); }
-void image_mixer::visit(const core::const_frame& frame) { impl_->visit(frame); }
-void image_mixer::pop() { impl_->pop(); }
+void                           image_mixer::push(const core::frame_transform& transform) { impl_->push(transform); }
+void                           image_mixer::visit(const core::const_frame& frame) { impl_->visit(frame); }
+void                           image_mixer::pop() { impl_->pop(); }
 std::future<core::mixed_image> image_mixer::operator()(const core::video_format_desc& format_desc)
 {
     return impl_->render(format_desc);
diff --git a/src/accelerator/ogl/image/image_mixer.h b/src/accelerator/ogl/image/image_mixer.h
index 6fe916f775..de2b6ac792 100644
--- a/src/accelerator/ogl/image/image_mixer.h
+++ b/src/accelerator/ogl/image/image_mixer.h
@@ -37,7 +37,10 @@ namespace caspar { namespace accelerator { namespace ogl {
 class image_mixer final : public core::image_mixer
 {
   public:
-    image_mixer(const spl::shared_ptr<class device>& ogl, int channel_id, common::bit_depth depth, const size_t max_frame_size);
+    image_mixer(const spl::shared_ptr<class device>& ogl,
+                int                                  channel_id,
+                common::bit_depth                    depth,
+                const size_t                         max_frame_size);
     image_mixer(const image_mixer&) = delete;
 
     ~image_mixer();
@@ -45,7 +48,7 @@ class image_mixer final : public core::image_mixer
     image_mixer& operator=(const image_mixer&) = delete;
 
     std::future<core::mixed_image> operator()(const core::video_format_desc& format_desc) override;
-    core::mutable_frame                    create_frame(const void* tag, const core::pixel_format_desc& desc) override;
+    core::mutable_frame            create_frame(const void* tag, const core::pixel_format_desc& desc) override;
 
     spl::shared_ptr<core::frame_converter> create_frame_converter() override;
 
diff --git a/src/accelerator/ogl/util/compute_shader.cpp b/src/accelerator/ogl/util/compute_shader.cpp
index 26aba484df..d737a94d3d 100644
--- a/src/accelerator/ogl/util/compute_shader.cpp
+++ b/src/accelerator/ogl/util/compute_shader.cpp
@@ -47,8 +47,8 @@ struct compute_shader::impl
         glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 1, &work_grp_cnt[1]);
         glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 2, &work_grp_cnt[2]);
 
-        printf("max global (total) work group counts x:%i y:%i z:%i\n",
-               work_grp_cnt[0], work_grp_cnt[1], work_grp_cnt[2]);
+        printf(
+            "max global (total) work group counts x:%i y:%i z:%i\n", work_grp_cnt[0], work_grp_cnt[1], work_grp_cnt[2]);
 
         GLint success;
 
diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp
index fbddce7a5e..32b4374cf4 100644
--- a/src/accelerator/ogl/util/device.cpp
+++ b/src/accelerator/ogl/util/device.cpp
@@ -225,8 +225,8 @@ struct device::impl : public std::enable_shared_from_this<impl>
 
     array<uint8_t> create_array(int count)
     {
-        auto buf             = create_buffer(count, true);
-        auto ptr             = reinterpret_cast<uint8_t*>(buf->data());
+        auto buf = create_buffer(count, true);
+        auto ptr = reinterpret_cast<uint8_t*>(buf->data());
         return array<uint8_t>(ptr, buf->size(), buf);
     }
 
@@ -256,7 +256,7 @@ struct device::impl : public std::enable_shared_from_this<impl>
     {
         return spawn_async([=](yield_context yield) {
             auto buf = create_buffer(source->size(), false);
-            source->copy_to(*buf, as_rgba8? common::bit_depth::bit8 : source->depth());
+            source->copy_to(*buf, as_rgba8 ? common::bit_depth::bit8 : source->depth());
 
             sync_queue_.push(nullptr);
 
@@ -317,21 +317,21 @@ struct device::impl : public std::enable_shared_from_this<impl>
     }
     */
 
-    std::future<void> convert_from_texture(const std::shared_ptr<texture>& texture,
-                                           const std::vector<array<const uint8_t>>&     buffers,
-                                           const convert_from_texture_description& description,
-                                           int                                          x_count,
-                                           int                                          y_count)
+    std::future<void> convert_from_texture(const std::shared_ptr<texture>&          texture,
+                                           const std::vector<array<const uint8_t>>& buffers,
+                                           const convert_from_texture_description&  description,
+                                           int                                      x_count,
+                                           int                                      y_count)
     {
         return spawn_async([=](yield_context yield) {
             if (!compute_from_rgba_)
                 compute_from_rgba_ = std::make_unique<compute_shader>(std::string(compute_from_rgba_shader));
 
             // single input texture
-            GLuint texid_8bit = 0;
+            GLuint texid_8bit  = 0;
             GLuint texid_16bit = 0;
 
-            switch(texture->depth()) {
+            switch (texture->depth()) {
                 case common::bit_depth::bit8:
                     texid_8bit = texture->id();
                     break;
@@ -344,24 +344,24 @@ struct device::impl : public std::enable_shared_from_this<impl>
             GL(glBindImageTexture(1, texid_8bit, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA8));
 
             // TODO: only a single buffer?
-//            for (size_t i = 0; i < buffers.size(); i++) {
-                auto& source = buffers[0];
-                auto  tmp    = source.storage<std::shared_ptr<buffer>>();
-                if (!tmp) {
-                    CASPAR_THROW_EXCEPTION(caspar_exception() << msg_info("Buffer is not gpu backed"));
-                }
+            //            for (size_t i = 0; i < buffers.size(); i++) {
+            auto& source = buffers[0];
+            auto  tmp    = source.storage<std::shared_ptr<buffer>>();
+            if (!tmp) {
+                CASPAR_THROW_EXCEPTION(caspar_exception() << msg_info("Buffer is not gpu backed"));
+            }
 
             GL(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, tmp->get()->id()));
-//            }
+            //            }
 
             // TODO - binding 2 description
             auto description_buffer = create_buffer(sizeof(convert_from_texture_description), false);
-            std::memcpy(description_buffer->data(), &description, sizeof (convert_from_texture_description));
+            std::memcpy(description_buffer->data(), &description, sizeof(convert_from_texture_description));
             GL(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, description_buffer->id()));
 
             compute_from_rgba_->use();
 
-           GL(glDispatchCompute((unsigned int)x_count, (unsigned int)y_count, 1));
+            GL(glDispatchCompute((unsigned int)x_count, (unsigned int)y_count, 1));
 
             auto fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
 
@@ -511,11 +511,11 @@ std::future<array<const uint8_t>> device::copy_async(const std::shared_ptr<textu
 {
     return impl_->copy_async(source, as_rgba8);
 }
-std::future<void> device::convert_from_texture(const std::shared_ptr<texture>& texture,
-                                               const std::vector<array<const uint8_t>>&     buffers,
-                                               const convert_from_texture_description& description,
-                                               int                                          x_count,
-                                               int                                          y_count)
+std::future<void> device::convert_from_texture(const std::shared_ptr<texture>&          texture,
+                                               const std::vector<array<const uint8_t>>& buffers,
+                                               const convert_from_texture_description&  description,
+                                               int                                      x_count,
+                                               int                                      y_count)
 {
     return impl_->convert_from_texture(texture, buffers, description, x_count, y_count);
 }
diff --git a/src/accelerator/ogl/util/device.h b/src/accelerator/ogl/util/device.h
index 060c8d3c15..e5dce060cb 100644
--- a/src/accelerator/ogl/util/device.h
+++ b/src/accelerator/ogl/util/device.h
@@ -35,8 +35,9 @@
 namespace caspar { namespace accelerator { namespace ogl {
 
 // This must match description_layout in shader_from_rgba.comp
-struct convert_from_texture_description {
-    bool is_16_bit;
+struct convert_from_texture_description
+{
+    bool     is_16_bit;
     uint32_t width;
     uint32_t height;
     uint32_t words_per_line;
@@ -58,14 +59,14 @@ class device final
     array<uint8_t>                 create_array(int size);
 
     std::future<std::shared_ptr<class texture>>
-                                      copy_async(const array<const uint8_t>& source, int width, int height, int stride, common::bit_depth depth);
+    copy_async(const array<const uint8_t>& source, int width, int height, int stride, common::bit_depth depth);
     std::future<array<const uint8_t>> copy_async(const std::shared_ptr<class texture>& source, bool as_rgba8);
 
-    std::future<void> convert_from_texture(const std::shared_ptr<texture>& texture,
-                                           const std::vector<array<const uint8_t>>&     buffers,
-                                           const convert_from_texture_description& description,
-                                           int                                          x_count,
-                                           int                                          y_count);
+    std::future<void> convert_from_texture(const std::shared_ptr<texture>&          texture,
+                                           const std::vector<array<const uint8_t>>& buffers,
+                                           const convert_from_texture_description&  description,
+                                           int                                      x_count,
+                                           int                                      y_count);
 
     template <typename Func>
     auto dispatch_async(Func&& func)
diff --git a/src/accelerator/ogl/util/texture.cpp b/src/accelerator/ogl/util/texture.cpp
index 4c9e8f0342..4e47e8768d 100644
--- a/src/accelerator/ogl/util/texture.cpp
+++ b/src/accelerator/ogl/util/texture.cpp
@@ -56,8 +56,7 @@ struct texture::impl
     {
         if (stride == 5) {
             size_ = width * height * 16;
-        }else
-        if (stride == 6) {
+        } else if (stride == 6) {
             size_ = width * height * 2;
         }
 
@@ -123,18 +122,18 @@ texture& texture::operator=(texture&& other)
     impl_ = std::move(other.impl_);
     return *this;
 }
-void texture::bind(int index) { impl_->bind(index); }
-void texture::unbind() { impl_->unbind(); }
-void texture::attach() { impl_->attach(); }
-void texture::clear() { impl_->clear(); }
-void texture::copy_from(buffer& source) { impl_->copy_from(source); }
-void texture::copy_to(class buffer& dest, common::bit_depth depth) { impl_->copy_to(dest, depth); }
-void texture::copy_to(buffer& dest) { impl_->copy_to(dest, impl_->depth_); }
-int  texture::width() const { return impl_->width_; }
-int  texture::height() const { return impl_->height_; }
-int  texture::stride() const { return impl_->stride_; }
+void              texture::bind(int index) { impl_->bind(index); }
+void              texture::unbind() { impl_->unbind(); }
+void              texture::attach() { impl_->attach(); }
+void              texture::clear() { impl_->clear(); }
+void              texture::copy_from(buffer& source) { impl_->copy_from(source); }
+void              texture::copy_to(class buffer& dest, common::bit_depth depth) { impl_->copy_to(dest, depth); }
+void              texture::copy_to(buffer& dest) { impl_->copy_to(dest, impl_->depth_); }
+int               texture::width() const { return impl_->width_; }
+int               texture::height() const { return impl_->height_; }
+int               texture::stride() const { return impl_->stride_; }
 common::bit_depth texture::depth() const { return impl_->depth_; }
-int  texture::size() const { return impl_->size_; }
-int  texture::id() const { return impl_->id_; }
+int               texture::size() const { return impl_->size_; }
+int               texture::id() const { return impl_->id_; }
 
 }}} // namespace caspar::accelerator::ogl
diff --git a/src/common/array.h b/src/common/array.h
index 10f27957c3..b7a6019b71 100644
--- a/src/common/array.h
+++ b/src/common/array.h
@@ -63,17 +63,17 @@ class array final
 
     array& operator=(array&& other)
     {
-        ptr_          = std::move(other.ptr_);
-        size_         = std::move(other.size_);
-        storage_      = std::move(other.storage_);
+        ptr_     = std::move(other.ptr_);
+        size_    = std::move(other.size_);
+        storage_ = std::move(other.storage_);
 
         return *this;
     }
 
-    T*                begin() const { return ptr_; }
-    T*                data() const { return ptr_; }
-    T*                end() const { return ptr_ + size_; }
-    std::size_t       size() const { return size_; }
+    T*          begin() const { return ptr_; }
+    T*          data() const { return ptr_; }
+    T*          end() const { return ptr_ + size_; }
+    std::size_t size() const { return size_; }
 
     explicit operator bool() const { return size_ > 0; };
 
@@ -84,8 +84,8 @@ class array final
     }
 
   private:
-    T*                          ptr_          = nullptr;
-    std::size_t                 size_         = 0;
+    T*                          ptr_  = nullptr;
+    std::size_t                 size_ = 0;
     std::shared_ptr<boost::any> storage_;
 };
 
@@ -118,9 +118,7 @@ class array<const T> final
     }
 
     template <typename S>
-    explicit array(const T*          ptr,
-                   std::size_t       size,
-                   S&&               storage)
+    explicit array(const T* ptr, std::size_t size, S&& storage)
         : ptr_(ptr)
         , size_(size)
         , storage_(std::make_shared<boost::any>(std::forward<S>(storage)))
@@ -152,10 +150,10 @@ class array<const T> final
         return *this;
     }
 
-    const T*          begin() const { return ptr_; }
-    const T*          data() const { return ptr_; }
-    const T*          end() const { return ptr_ + size_; }
-    std::size_t       size() const { return size_; }
+    const T*    begin() const { return ptr_; }
+    const T*    data() const { return ptr_; }
+    const T*    end() const { return ptr_ + size_; }
+    std::size_t size() const { return size_; }
 
     explicit operator bool() const { return size_ > 0; }
 
diff --git a/src/common/bit_depth.h b/src/common/bit_depth.h
index 45c563e060..ba6bb0d2b2 100644
--- a/src/common/bit_depth.h
+++ b/src/common/bit_depth.h
@@ -10,8 +10,6 @@ enum class bit_depth : uint8_t
     bit16 = 1,
 };
 
-inline int bytes_per_pixel(bit_depth depth){
-    return static_cast<int>(depth) + 1;
-}
+inline int bytes_per_pixel(bit_depth depth) { return static_cast<int>(depth) + 1; }
 
 }} // namespace caspar::common
\ No newline at end of file
diff --git a/src/core/consumer/frame_consumer.cpp b/src/core/consumer/frame_consumer.cpp
index ae83c2fcd4..512c937f18 100644
--- a/src/core/consumer/frame_consumer.cpp
+++ b/src/core/consumer/frame_consumer.cpp
@@ -163,7 +163,7 @@ class print_consumer_proxy : public frame_consumer
 spl::shared_ptr<core::frame_consumer>
 frame_consumer_registry::create_consumer(const std::vector<std::wstring>&                         params,
                                          const core::video_format_repository&                     format_repository,
-                                         const spl::shared_ptr<core::frame_converter>& frame_converter,
+                                         const spl::shared_ptr<core::frame_converter>&            frame_converter,
                                          const std::vector<spl::shared_ptr<core::video_channel>>& channels) const
 {
     if (params.empty())
@@ -174,7 +174,7 @@ frame_consumer_registry::create_consumer(const std::vector<std::wstring>&
     if (!std::any_of(
             consumer_factories.begin(), consumer_factories.end(), [&](const consumer_factory_t& factory) -> bool {
                 try {
-                    consumer = factory(params, format_repository,frame_converter, channels);
+                    consumer = factory(params, format_repository, frame_converter, channels);
                 } catch (...) {
                     CASPAR_LOG_CURRENT_EXCEPTION();
                 }
@@ -190,7 +190,7 @@ spl::shared_ptr<frame_consumer>
 frame_consumer_registry::create_consumer(const std::wstring&                                      element_name,
                                          const boost::property_tree::wptree&                      element,
                                          const core::video_format_repository&                     format_repository,
-                                         const spl::shared_ptr<core::frame_converter>& frame_converter,
+                                         const spl::shared_ptr<core::frame_converter>&            frame_converter,
                                          const std::vector<spl::shared_ptr<core::video_channel>>& channels) const
 {
     auto& preconfigured_consumer_factories = impl_->preconfigured_consumer_factories;
@@ -201,7 +201,7 @@ frame_consumer_registry::create_consumer(const std::wstring&
                                << msg_info(L"No consumer factory registered for element name " + element_name));
 
     return spl::make_shared<destroy_consumer_proxy>(
-        spl::make_shared<print_consumer_proxy>(found->second(element, format_repository,frame_converter, channels)));
+        spl::make_shared<print_consumer_proxy>(found->second(element, format_repository, frame_converter, channels)));
 }
 
 const spl::shared_ptr<frame_consumer>& frame_consumer::empty()
diff --git a/src/core/consumer/frame_consumer.h b/src/core/consumer/frame_consumer.h
index 5bff60b789..ba8e810147 100644
--- a/src/core/consumer/frame_consumer.h
+++ b/src/core/consumer/frame_consumer.h
@@ -60,13 +60,13 @@ class frame_consumer
 };
 
 using consumer_factory_t =
-    std::function<spl::shared_ptr<frame_consumer>(const std::vector<std::wstring>&     params,
-                                                  const core::video_format_repository& format_repository,
+    std::function<spl::shared_ptr<frame_consumer>(const std::vector<std::wstring>&              params,
+                                                  const core::video_format_repository&          format_repository,
                                                   const spl::shared_ptr<core::frame_converter>& frame_converter,
                                                   const std::vector<spl::shared_ptr<core::video_channel>>& channels)>;
 using preconfigured_consumer_factory_t =
-    std::function<spl::shared_ptr<frame_consumer>(const boost::property_tree::wptree&  element,
-                                                  const core::video_format_repository& format_repository,
+    std::function<spl::shared_ptr<frame_consumer>(const boost::property_tree::wptree&           element,
+                                                  const core::video_format_repository&          format_repository,
                                                   const spl::shared_ptr<core::frame_converter>& frame_converter,
                                                   const std::vector<spl::shared_ptr<core::video_channel>>& channels)>;
 
@@ -80,13 +80,13 @@ class frame_consumer_registry
     spl::shared_ptr<frame_consumer>
     create_consumer(const std::vector<std::wstring>&                         params,
                     const core::video_format_repository&                     format_repository,
-                    const spl::shared_ptr<core::frame_converter>& frame_converter,
+                    const spl::shared_ptr<core::frame_converter>&            frame_converter,
                     const std::vector<spl::shared_ptr<core::video_channel>>& channels) const;
     spl::shared_ptr<frame_consumer>
     create_consumer(const std::wstring&                                      element_name,
                     const boost::property_tree::wptree&                      element,
                     const core::video_format_repository&                     format_repository,
-                    const spl::shared_ptr<core::frame_converter>& frame_converter,
+                    const spl::shared_ptr<core::frame_converter>&            frame_converter,
                     const std::vector<spl::shared_ptr<core::video_channel>>& channels) const;
 
   private:
diff --git a/src/core/frame/frame.cpp b/src/core/frame/frame.cpp
index ff96dce561..afc5579118 100644
--- a/src/core/frame/frame.cpp
+++ b/src/core/frame/frame.cpp
@@ -98,7 +98,7 @@ struct const_frame::impl
     impl(std::vector<array<const std::uint8_t>> image_data,
          array<const std::int32_t>              audio_data,
          const core::pixel_format_desc&         desc,
-         boost::any opaque)
+         boost::any                             opaque)
         : image_data_(std::move(image_data))
         , audio_data_(std::move(audio_data))
         , desc_(desc)
@@ -153,10 +153,10 @@ const_frame::const_frame(std::vector<array<const std::uint8_t>> image_data,
 {
 }
 const_frame::const_frame(std::vector<array<const std::uint8_t>> image_data,
-    array<const std::int32_t>              audio_data,
-    const struct pixel_format_desc&        desc,
-            boost::any opaque)
-        : impl_(new impl(std::move(image_data), std::move(audio_data), desc, opaque))
+                         array<const std::int32_t>              audio_data,
+                         const struct pixel_format_desc&        desc,
+                         boost::any                             opaque)
+    : impl_(new impl(std::move(image_data), std::move(audio_data), desc, opaque))
 {
 }
 const_frame::const_frame(mutable_frame&& other)
diff --git a/src/core/frame/frame.h b/src/core/frame/frame.h
index 0a5774d48f..cec65a5fad 100644
--- a/src/core/frame/frame.h
+++ b/src/core/frame/frame.h
@@ -64,7 +64,7 @@ class const_frame final
     explicit const_frame(std::vector<array<const std::uint8_t>> image_data,
                          array<const std::int32_t>              audio_data,
                          const struct pixel_format_desc&        desc,
-                                 boost::any opaque);
+                         boost::any                             opaque);
     const_frame(const const_frame& other);
     const_frame(mutable_frame&& other);
 
diff --git a/src/core/frame/pixel_format.h b/src/core/frame/pixel_format.h
index 5cad369a4c..ba7faad41b 100644
--- a/src/core/frame/pixel_format.h
+++ b/src/core/frame/pixel_format.h
@@ -51,12 +51,12 @@ struct pixel_format_desc final
 {
     struct plane
     {
-        int linesize = 0;
-        int width    = 0;
-        int height   = 0;
-        int size     = 0;
-        int stride   = 0;
-        common::bit_depth depth = common::bit_depth::bit8;
+        int               linesize = 0;
+        int               width    = 0;
+        int               height   = 0;
+        int               size     = 0;
+        int               stride   = 0;
+        common::bit_depth depth    = common::bit_depth::bit8;
 
         plane() = default;
 
@@ -66,12 +66,12 @@ struct pixel_format_desc final
         }
 
         plane(int width, int height, int stride, common::bit_depth depth)
-                : linesize(width * stride * common::bytes_per_pixel(depth))
-                , width(width)
-                , height(height)
-                , size(width * height * stride * common::bytes_per_pixel(depth))
-                , stride(stride)
-                , depth(depth)
+            : linesize(width * stride * common::bytes_per_pixel(depth))
+            , width(width)
+            , height(height)
+            , size(width * height * stride * common::bytes_per_pixel(depth))
+            , stride(stride)
+            , depth(depth)
         {
         }
     };
diff --git a/src/core/mixer/image/image_mixer.h b/src/core/mixer/image/image_mixer.h
index 30919913ac..a993cf7b26 100644
--- a/src/core/mixer/image/image_mixer.h
+++ b/src/core/mixer/image/image_mixer.h
@@ -30,11 +30,16 @@
 
 namespace caspar { namespace core {
 
-struct mixed_image{
+struct mixed_image
+{
     array<const uint8_t> rgba8;
-    boost::any texture;
+    boost::any           texture;
 
-    mixed_image(array<const uint8_t> rgba8, boost::any texture):rgba8(rgba8),texture(texture){}
+    mixed_image(array<const uint8_t> rgba8, boost::any texture)
+        : rgba8(rgba8)
+        , texture(texture)
+    {
+    }
 };
 
 class image_mixer
diff --git a/src/core/mixer/mixer.cpp b/src/core/mixer/mixer.cpp
index 70a95bfc3d..98be54ee33 100644
--- a/src/core/mixer/mixer.cpp
+++ b/src/core/mixer/mixer.cpp
@@ -76,7 +76,7 @@ struct mixer::impl
             std::launch::deferred,
             [image = std::move(image), audio = std::move(audio), graph = graph_, format_desc, tag = this]() mutable {
                 auto image2 = image.get();
-                auto desc = pixel_format_desc(pixel_format::bgra);
+                auto desc   = pixel_format_desc(pixel_format::bgra);
                 desc.planes.emplace_back(format_desc.width, format_desc.height, 4);
                 std::vector<array<const uint8_t>> image_data;
                 image_data.emplace_back(std::move(image2.rgba8));
diff --git a/src/core/video_channel.cpp b/src/core/video_channel.cpp
index f2ec344b79..30761ea9b4 100644
--- a/src/core/video_channel.cpp
+++ b/src/core/video_channel.cpp
@@ -245,12 +245,13 @@ mixer&                              video_channel::mixer() { return impl_->mixer
 const output&                       video_channel::output() const { return impl_->output_; }
 output&                             video_channel::output() { return impl_->output_; }
 spl::shared_ptr<frame_factory>      video_channel::frame_factory() { return impl_->image_mixer_; }
-spl::shared_ptr<frame_converter>      video_channel::frame_converter() {
+spl::shared_ptr<frame_converter>    video_channel::frame_converter()
+{
     // TODO - is this too expensive?
     return impl_->image_mixer_->create_frame_converter();
 }
-int                                 video_channel::index() const { return impl_->index(); }
-core::monitor::state                video_channel::state() const { return impl_->state_; }
+int                  video_channel::index() const { return impl_->index(); }
+core::monitor::state video_channel::state() const { return impl_->state_; }
 
 std::shared_ptr<route> video_channel::route(int index, route_mode mode) { return impl_->route(index, mode); }
 
diff --git a/src/core/video_channel.h b/src/core/video_channel.h
index 801fc6e3f8..879ee0f564 100644
--- a/src/core/video_channel.h
+++ b/src/core/video_channel.h
@@ -84,7 +84,7 @@ class video_channel final
     const core::output&                 output() const;
     core::output&                       output();
 
-    spl::shared_ptr<core::frame_factory> frame_factory();
+    spl::shared_ptr<core::frame_factory>   frame_factory();
     spl::shared_ptr<core::frame_converter> frame_converter();
 
     int index() const;
diff --git a/src/modules/artnet/consumer/artnet_consumer.cpp b/src/modules/artnet/consumer/artnet_consumer.cpp
index c08a5ab8a9..05554b7349 100644
--- a/src/modules/artnet/consumer/artnet_consumer.cpp
+++ b/src/modules/artnet/consumer/artnet_consumer.cpp
@@ -311,7 +311,7 @@ std::vector<fixture> get_fixtures_ptree(const boost::property_tree::wptree& ptre
 spl::shared_ptr<core::frame_consumer>
 create_preconfigured_consumer(const boost::property_tree::wptree&                      ptree,
                               const core::video_format_repository&                     format_repository,
-                              const spl::shared_ptr<core::frame_converter>& frame_converter,
+                              const spl::shared_ptr<core::frame_converter>&            frame_converter,
                               const std::vector<spl::shared_ptr<core::video_channel>>& channels)
 {
     configuration config;
diff --git a/src/modules/artnet/consumer/artnet_consumer.h b/src/modules/artnet/consumer/artnet_consumer.h
index ffb94ad84a..6aef2da0e6 100644
--- a/src/modules/artnet/consumer/artnet_consumer.h
+++ b/src/modules/artnet/consumer/artnet_consumer.h
@@ -35,6 +35,6 @@ namespace caspar { namespace artnet {
 spl::shared_ptr<core::frame_consumer>
 create_preconfigured_consumer(const boost::property_tree::wptree&                      ptree,
                               const core::video_format_repository&                     format_repository,
-                              const spl::shared_ptr<core::frame_converter>& frame_converter,
+                              const spl::shared_ptr<core::frame_converter>&            frame_converter,
                               const std::vector<spl::shared_ptr<core::video_channel>>& channels);
 }} // namespace caspar::artnet
diff --git a/src/modules/bluefish/consumer/bluefish_consumer.cpp b/src/modules/bluefish/consumer/bluefish_consumer.cpp
index 25d3f01c02..d36bad13a8 100644
--- a/src/modules/bluefish/consumer/bluefish_consumer.cpp
+++ b/src/modules/bluefish/consumer/bluefish_consumer.cpp
@@ -882,8 +882,8 @@ struct bluefish_consumer_proxy : public core::frame_consumer
     }
 };
 
-spl::shared_ptr<core::frame_consumer> create_consumer(const std::vector<std::wstring>&     params,
-                                                      const core::video_format_repository& format_repository,
+spl::shared_ptr<core::frame_consumer> create_consumer(const std::vector<std::wstring>&              params,
+                                                      const core::video_format_repository&          format_repository,
                                                       const spl::shared_ptr<core::frame_converter>& frame_converter,
                                                       const std::vector<spl::shared_ptr<core::video_channel>>& channels)
 {
@@ -940,7 +940,7 @@ spl::shared_ptr<core::frame_consumer> create_consumer(const std::vector<std::wst
 spl::shared_ptr<core::frame_consumer>
 create_preconfigured_consumer(const boost::property_tree::wptree&                      ptree,
                               const core::video_format_repository&                     format_repository,
-                              const spl::shared_ptr<core::frame_converter>& frame_converter,
+                              const spl::shared_ptr<core::frame_converter>&            frame_converter,
                               const std::vector<spl::shared_ptr<core::video_channel>>& channels)
 {
     configuration config;
diff --git a/src/modules/bluefish/consumer/bluefish_consumer.h b/src/modules/bluefish/consumer/bluefish_consumer.h
index 9d942492b3..eb952d4a43 100644
--- a/src/modules/bluefish/consumer/bluefish_consumer.h
+++ b/src/modules/bluefish/consumer/bluefish_consumer.h
@@ -34,13 +34,13 @@ namespace caspar { namespace bluefish {
 spl::shared_ptr<core::frame_consumer>
 create_consumer(const std::vector<std::wstring>&                         params,
                 const core::video_format_repository&                     format_repository,
-                const spl::shared_ptr<core::frame_converter>& frame_converter,
+                const spl::shared_ptr<core::frame_converter>&            frame_converter,
                 const std::vector<spl::shared_ptr<core::video_channel>>& channels);
 
 spl::shared_ptr<core::frame_consumer>
 create_preconfigured_consumer(const boost::property_tree::wptree&                      ptree,
                               const core::video_format_repository&                     format_repository,
-                              const spl::shared_ptr<core::frame_converter>& frame_converter,
+                              const spl::shared_ptr<core::frame_converter>&            frame_converter,
                               const std::vector<spl::shared_ptr<core::video_channel>>& channels);
 
 }} // namespace caspar::bluefish
diff --git a/src/modules/decklink/consumer/decklink_consumer.h b/src/modules/decklink/consumer/decklink_consumer.h
index 94ffc08b90..3d91c590b7 100644
--- a/src/modules/decklink/consumer/decklink_consumer.h
+++ b/src/modules/decklink/consumer/decklink_consumer.h
@@ -35,12 +35,12 @@ namespace caspar { namespace decklink {
 spl::shared_ptr<core::frame_consumer>
 create_consumer(const std::vector<std::wstring>&                         params,
                 const core::video_format_repository&                     format_repository,
-                const spl::shared_ptr<core::frame_converter>& frame_converter,
+                const spl::shared_ptr<core::frame_converter>&            frame_converter,
                 const std::vector<spl::shared_ptr<core::video_channel>>& channels);
 spl::shared_ptr<core::frame_consumer>
 create_preconfigured_consumer(const boost::property_tree::wptree&                      ptree,
                               const core::video_format_repository&                     format_repository,
-                              const spl::shared_ptr<core::frame_converter>& frame_converter,
+                              const spl::shared_ptr<core::frame_converter>&            frame_converter,
                               const std::vector<spl::shared_ptr<core::video_channel>>& channels);
 
 }} // namespace caspar::decklink
diff --git a/src/modules/ffmpeg/consumer/ffmpeg_consumer.cpp b/src/modules/ffmpeg/consumer/ffmpeg_consumer.cpp
index 43c9a81bbb..1a176ddd7f 100644
--- a/src/modules/ffmpeg/consumer/ffmpeg_consumer.cpp
+++ b/src/modules/ffmpeg/consumer/ffmpeg_consumer.cpp
@@ -713,8 +713,8 @@ struct ffmpeg_consumer : public core::frame_consumer
     }
 };
 
-spl::shared_ptr<core::frame_consumer> create_consumer(const std::vector<std::wstring>&     params,
-                                                      const core::video_format_repository& format_repository,
+spl::shared_ptr<core::frame_consumer> create_consumer(const std::vector<std::wstring>&              params,
+                                                      const core::video_format_repository&          format_repository,
                                                       const spl::shared_ptr<core::frame_converter>& frame_converter,
                                                       const std::vector<spl::shared_ptr<core::video_channel>>& channels)
 {
@@ -732,7 +732,7 @@ spl::shared_ptr<core::frame_consumer> create_consumer(const std::vector<std::wst
 spl::shared_ptr<core::frame_consumer>
 create_preconfigured_consumer(const boost::property_tree::wptree&                      ptree,
                               const core::video_format_repository&                     format_repository,
-                              const spl::shared_ptr<core::frame_converter>& frame_converter,
+                              const spl::shared_ptr<core::frame_converter>&            frame_converter,
                               const std::vector<spl::shared_ptr<core::video_channel>>& channels)
 {
     return spl::make_shared<ffmpeg_consumer>(u8(ptree.get<std::wstring>(L"path", L"")),
diff --git a/src/modules/ffmpeg/consumer/ffmpeg_consumer.h b/src/modules/ffmpeg/consumer/ffmpeg_consumer.h
index 998eb42a69..ecd01e794f 100644
--- a/src/modules/ffmpeg/consumer/ffmpeg_consumer.h
+++ b/src/modules/ffmpeg/consumer/ffmpeg_consumer.h
@@ -35,12 +35,12 @@ namespace caspar { namespace ffmpeg {
 spl::shared_ptr<core::frame_consumer>
 create_consumer(const std::vector<std::wstring>&                         params,
                 const core::video_format_repository&                     format_repository,
-                const spl::shared_ptr<core::frame_converter>& frame_converter,
+                const spl::shared_ptr<core::frame_converter>&            frame_converter,
                 const std::vector<spl::shared_ptr<core::video_channel>>& channels);
 spl::shared_ptr<core::frame_consumer>
 create_preconfigured_consumer(const boost::property_tree::wptree&,
                               const core::video_format_repository&                     format_repository,
-                              const spl::shared_ptr<core::frame_converter>& frame_converter,
+                              const spl::shared_ptr<core::frame_converter>&            frame_converter,
                               const std::vector<spl::shared_ptr<core::video_channel>>& channels);
 
 }} // namespace caspar::ffmpeg
diff --git a/src/modules/ffmpeg/util/av_util.cpp b/src/modules/ffmpeg/util/av_util.cpp
index bdba15e853..a58975666e 100644
--- a/src/modules/ffmpeg/util/av_util.cpp
+++ b/src/modules/ffmpeg/util/av_util.cpp
@@ -186,7 +186,7 @@ core::pixel_format_desc pixel_format_desc(AVPixelFormat pix_fmt, int width, int
             return desc;
         }
         case core::pixel_format::ycbcr:
-        case core::pixel_format::ycbcra:{
+        case core::pixel_format::ycbcra: {
             // Find chroma height
             // av_image_fill_plane_sizes is not available until ffmpeg 4.4, but we still need to support ffmpeg 4.2, so
             // we fall back to calling av_image_fill_pointers with a NULL image buffer. We can't unconditionally use
diff --git a/src/modules/image/consumer/image_consumer.cpp b/src/modules/image/consumer/image_consumer.cpp
index d3bee919f7..786080d42c 100644
--- a/src/modules/image/consumer/image_consumer.cpp
+++ b/src/modules/image/consumer/image_consumer.cpp
@@ -116,8 +116,8 @@ struct image_consumer : public core::frame_consumer
     }
 };
 
-spl::shared_ptr<core::frame_consumer> create_consumer(const std::vector<std::wstring>&     params,
-                                                      const core::video_format_repository& format_repository,
+spl::shared_ptr<core::frame_consumer> create_consumer(const std::vector<std::wstring>&              params,
+                                                      const core::video_format_repository&          format_repository,
                                                       const spl::shared_ptr<core::frame_converter>& frame_converter,
                                                       const std::vector<spl::shared_ptr<core::video_channel>>& channels)
 {
diff --git a/src/modules/image/consumer/image_consumer.h b/src/modules/image/consumer/image_consumer.h
index b779a6e528..1dd38f52ba 100644
--- a/src/modules/image/consumer/image_consumer.h
+++ b/src/modules/image/consumer/image_consumer.h
@@ -34,7 +34,7 @@ namespace caspar { namespace image {
 spl::shared_ptr<core::frame_consumer>
 create_consumer(const std::vector<std::wstring>&                         params,
                 const core::video_format_repository&                     format_repository,
-                const spl::shared_ptr<core::frame_converter>& frame_converter,
+                const spl::shared_ptr<core::frame_converter>&            frame_converter,
                 const std::vector<spl::shared_ptr<core::video_channel>>& channels);
 
 }} // namespace caspar::image
diff --git a/src/modules/newtek/consumer/newtek_ndi_consumer.cpp b/src/modules/newtek/consumer/newtek_ndi_consumer.cpp
index 79c66c997c..916b0d783f 100644
--- a/src/modules/newtek/consumer/newtek_ndi_consumer.cpp
+++ b/src/modules/newtek/consumer/newtek_ndi_consumer.cpp
@@ -257,7 +257,7 @@ std::atomic<int> newtek_ndi_consumer::instances_(0);
 spl::shared_ptr<core::frame_consumer>
 create_ndi_consumer(const std::vector<std::wstring>&                         params,
                     const core::video_format_repository&                     format_repository,
-                    const spl::shared_ptr<core::frame_converter>& frame_converter,
+                    const spl::shared_ptr<core::frame_converter>&            frame_converter,
                     const std::vector<spl::shared_ptr<core::video_channel>>& channels)
 {
     if (params.size() < 1 || !boost::iequals(params.at(0), L"NDI"))
@@ -270,7 +270,7 @@ create_ndi_consumer(const std::vector<std::wstring>&                         par
 spl::shared_ptr<core::frame_consumer>
 create_preconfigured_ndi_consumer(const boost::property_tree::wptree&                      ptree,
                                   const core::video_format_repository&                     format_repository,
-                                  const spl::shared_ptr<core::frame_converter>& frame_converter,
+                                  const spl::shared_ptr<core::frame_converter>&            frame_converter,
                                   const std::vector<spl::shared_ptr<core::video_channel>>& channels)
 {
     auto name         = ptree.get(L"name", L"");
diff --git a/src/modules/newtek/consumer/newtek_ndi_consumer.h b/src/modules/newtek/consumer/newtek_ndi_consumer.h
index 8148ee5385..0cd5bf6a38 100644
--- a/src/modules/newtek/consumer/newtek_ndi_consumer.h
+++ b/src/modules/newtek/consumer/newtek_ndi_consumer.h
@@ -35,12 +35,12 @@ namespace caspar { namespace newtek {
 spl::shared_ptr<core::frame_consumer>
 create_ndi_consumer(const std::vector<std::wstring>&                         params,
                     const core::video_format_repository&                     format_repository,
-                    const spl::shared_ptr<core::frame_converter>& frame_converter,
+                    const spl::shared_ptr<core::frame_converter>&            frame_converter,
                     const std::vector<spl::shared_ptr<core::video_channel>>& channels);
 spl::shared_ptr<core::frame_consumer>
 create_preconfigured_ndi_consumer(const boost::property_tree::wptree&                      ptree,
                                   const core::video_format_repository&                     format_repository,
-                                  const spl::shared_ptr<core::frame_converter>& frame_converter,
+                                  const spl::shared_ptr<core::frame_converter>&            frame_converter,
                                   const std::vector<spl::shared_ptr<core::video_channel>>& channels);
 
 }} // namespace caspar::newtek
diff --git a/src/modules/oal/consumer/oal_consumer.cpp b/src/modules/oal/consumer/oal_consumer.cpp
index 6ce9a81c09..c41b395ad6 100644
--- a/src/modules/oal/consumer/oal_consumer.cpp
+++ b/src/modules/oal/consumer/oal_consumer.cpp
@@ -387,8 +387,8 @@ struct oal_consumer : public core::frame_consumer
     }
 };
 
-spl::shared_ptr<core::frame_consumer> create_consumer(const std::vector<std::wstring>&     params,
-                                                      const core::video_format_repository& format_repository,
+spl::shared_ptr<core::frame_consumer> create_consumer(const std::vector<std::wstring>&              params,
+                                                      const core::video_format_repository&          format_repository,
                                                       const spl::shared_ptr<core::frame_converter>& frame_converter,
                                                       const std::vector<spl::shared_ptr<core::video_channel>>& channels)
 {
@@ -401,7 +401,7 @@ spl::shared_ptr<core::frame_consumer> create_consumer(const std::vector<std::wst
 spl::shared_ptr<core::frame_consumer>
 create_preconfigured_consumer(const boost::property_tree::wptree&                      ptree,
                               const core::video_format_repository&                     format_repository,
-                              const spl::shared_ptr<core::frame_converter>& frame_converter,
+                              const spl::shared_ptr<core::frame_converter>&            frame_converter,
                               const std::vector<spl::shared_ptr<core::video_channel>>& channels)
 {
     return spl::make_shared<oal_consumer>();
diff --git a/src/modules/oal/consumer/oal_consumer.h b/src/modules/oal/consumer/oal_consumer.h
index d143412d5b..d5d9428674 100644
--- a/src/modules/oal/consumer/oal_consumer.h
+++ b/src/modules/oal/consumer/oal_consumer.h
@@ -34,12 +34,12 @@ namespace caspar { namespace oal {
 spl::shared_ptr<core::frame_consumer>
 create_consumer(const std::vector<std::wstring>&                         params,
                 const core::video_format_repository&                     format_repository,
-                const spl::shared_ptr<core::frame_converter>& frame_converter,
+                const spl::shared_ptr<core::frame_converter>&            frame_converter,
                 const std::vector<spl::shared_ptr<core::video_channel>>& channels);
 spl::shared_ptr<core::frame_consumer>
 create_preconfigured_consumer(const boost::property_tree::wptree&,
                               const core::video_format_repository&                     format_repository,
-                              const spl::shared_ptr<core::frame_converter>& frame_converter,
+                              const spl::shared_ptr<core::frame_converter>&            frame_converter,
                               const std::vector<spl::shared_ptr<core::video_channel>>& channels);
 
 }} // namespace caspar::oal
diff --git a/src/modules/screen/consumer/screen_consumer.cpp b/src/modules/screen/consumer/screen_consumer.cpp
index 7f401a483a..b69aa8b840 100644
--- a/src/modules/screen/consumer/screen_consumer.cpp
+++ b/src/modules/screen/consumer/screen_consumer.cpp
@@ -606,8 +606,8 @@ struct screen_consumer_proxy : public core::frame_consumer
     }
 };
 
-spl::shared_ptr<core::frame_consumer> create_consumer(const std::vector<std::wstring>&     params,
-                                                      const core::video_format_repository& format_repository,
+spl::shared_ptr<core::frame_consumer> create_consumer(const std::vector<std::wstring>&              params,
+                                                      const core::video_format_repository&          format_repository,
                                                       const spl::shared_ptr<core::frame_converter>& frame_converter,
                                                       const std::vector<spl::shared_ptr<core::video_channel>>& channels)
 {
@@ -645,7 +645,7 @@ spl::shared_ptr<core::frame_consumer> create_consumer(const std::vector<std::wst
 spl::shared_ptr<core::frame_consumer>
 create_preconfigured_consumer(const boost::property_tree::wptree&                      ptree,
                               const core::video_format_repository&                     format_repository,
-                              const spl::shared_ptr<core::frame_converter>& frame_converter,
+                              const spl::shared_ptr<core::frame_converter>&            frame_converter,
                               const std::vector<spl::shared_ptr<core::video_channel>>& channels)
 {
     configuration config;
diff --git a/src/modules/screen/consumer/screen_consumer.h b/src/modules/screen/consumer/screen_consumer.h
index 16493ab824..0cab8953eb 100644
--- a/src/modules/screen/consumer/screen_consumer.h
+++ b/src/modules/screen/consumer/screen_consumer.h
@@ -33,12 +33,12 @@ namespace caspar { namespace screen {
 spl::shared_ptr<core::frame_consumer>
 create_consumer(const std::vector<std::wstring>&                         params,
                 const core::video_format_repository&                     format_repository,
-                const spl::shared_ptr<core::frame_converter>& frame_converter,
+                const spl::shared_ptr<core::frame_converter>&            frame_converter,
                 const std::vector<spl::shared_ptr<core::video_channel>>& channels);
 spl::shared_ptr<core::frame_consumer>
 create_preconfigured_consumer(const boost::property_tree::wptree&                      ptree,
                               const core::video_format_repository&                     format_repository,
-                              const spl::shared_ptr<core::frame_converter>& frame_converter,
+                              const spl::shared_ptr<core::frame_converter>&            frame_converter,
                               const std::vector<spl::shared_ptr<core::video_channel>>& channels);
 
 }} // namespace caspar::screen
diff --git a/src/protocol/amcp/AMCPCommandsImpl.cpp b/src/protocol/amcp/AMCPCommandsImpl.cpp
index cfa23511db..1e92b7558e 100644
--- a/src/protocol/amcp/AMCPCommandsImpl.cpp
+++ b/src/protocol/amcp/AMCPCommandsImpl.cpp
@@ -455,8 +455,10 @@ std::wstring add_command(command_context& ctx)
     core::diagnostics::scoped_call_context save;
     core::diagnostics::call_context::for_thread().video_channel = ctx.channel_index + 1;
 
-    auto consumer = ctx.static_context->consumer_registry->create_consumer(
-        ctx.parameters, ctx.static_context->format_repository, ctx.channel.raw_channel->frame_converter(), get_channels(ctx));
+    auto consumer = ctx.static_context->consumer_registry->create_consumer(ctx.parameters,
+                                                                           ctx.static_context->format_repository,
+                                                                           ctx.channel.raw_channel->frame_converter(),
+                                                                           get_channels(ctx));
     ctx.channel.raw_channel->output().add(ctx.layer_index(consumer->index()), consumer);
 
     return L"202 ADD OK\r\n";
@@ -474,7 +476,10 @@ std::wstring remove_command(command_context& ctx)
         }
 
         index = ctx.static_context->consumer_registry
-                    ->create_consumer(ctx.parameters, ctx.static_context->format_repository,ctx.channel.raw_channel->frame_converter(), get_channels(ctx))
+                    ->create_consumer(ctx.parameters,
+                                      ctx.static_context->format_repository,
+                                      ctx.channel.raw_channel->frame_converter(),
+                                      get_channels(ctx))
                     ->index();
     }
 
@@ -487,8 +492,11 @@ std::wstring remove_command(command_context& ctx)
 
 std::wstring print_command(command_context& ctx)
 {
-    ctx.channel.raw_channel->output().add(ctx.static_context->consumer_registry->create_consumer(
-        {L"IMAGE"}, ctx.static_context->format_repository, ctx.channel.raw_channel->frame_converter(), get_channels(ctx)));
+    ctx.channel.raw_channel->output().add(
+        ctx.static_context->consumer_registry->create_consumer({L"IMAGE"},
+                                                               ctx.static_context->format_repository,
+                                                               ctx.channel.raw_channel->frame_converter(),
+                                                               get_channels(ctx)));
 
     return L"202 PRINT OK\r\n";
 }
diff --git a/src/shell/server.cpp b/src/shell/server.cpp
index 149adbd793..ae9db3b5a2 100644
--- a/src/shell/server.cpp
+++ b/src/shell/server.cpp
@@ -341,8 +341,12 @@ struct server::impl
 
                     try {
                         if (name != L"<xmlcomment>")
-                            channel.raw_channel->output().add(consumer_registry_->create_consumer(
-                                name, xml_consumer.second, video_format_repository_, channel.raw_channel->frame_converter(), channels_vec));
+                            channel.raw_channel->output().add(
+                                consumer_registry_->create_consumer(name,
+                                                                    xml_consumer.second,
+                                                                    video_format_repository_,
+                                                                    channel.raw_channel->frame_converter(),
+                                                                    channels_vec));
                     } catch (...) {
                         CASPAR_LOG_CURRENT_EXCEPTION();
                     }

From 9cb0282c3015a0a5226e178a0b544f41c689d74d Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Sat, 30 Dec 2023 15:32:57 +0000
Subject: [PATCH 32/50] chore: add todos to ndi producer

---
 src/modules/newtek/producer/newtek_ndi_producer.cpp | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/modules/newtek/producer/newtek_ndi_producer.cpp b/src/modules/newtek/producer/newtek_ndi_producer.cpp
index 0596f57999..52758681c2 100644
--- a/src/modules/newtek/producer/newtek_ndi_producer.cpp
+++ b/src/modules/newtek/producer/newtek_ndi_producer.cpp
@@ -180,6 +180,15 @@ struct newtek_ndi_producer : public core::frame_producer
                 av_frame->data[0]     = video_frame.p_data;
                 av_frame->linesize[0] = video_frame.line_stride_in_bytes;
                 switch (video_frame.FourCC) {
+                    case NDIlib_FourCC_type_UYVY:
+                        av_frame->format = AV_PIX_FMT_UYVY422;
+                        break;
+                    // case NDIlib_FourCC_type_UYVA:
+                    // case NDIlib_FourCC_type_P216:
+                    // case NDIlib_FourCC_type_PA16:
+                    // case NDIlib_FourCC_type_YV12:
+                    // case NDIlib_FourCC_type_I420:
+                    // case NDIlib_FourCC_type_NV12:
                     case NDIlib_FourCC_type_BGRA:
                         av_frame->format = AV_PIX_FMT_BGRA;
                         break;
@@ -192,9 +201,6 @@ struct newtek_ndi_producer : public core::frame_producer
                     case NDIlib_FourCC_type_RGBX:
                         av_frame->format = AV_PIX_FMT_RGBA;
                         break;
-                    case NDIlib_FourCC_type_UYVY:
-                        av_frame->format = AV_PIX_FMT_UYVY422;
-                        break;
                     default: // should never happen because library handles the conversion for us
                         av_frame->format = AV_PIX_FMT_BGRA;
                         break;

From 98419f6801e43877b44b903d3c42c857580d01eb Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Sat, 30 Dec 2023 15:45:04 +0000
Subject: [PATCH 33/50] wip: tidy

---
 src/accelerator/ogl/image/frame_converter.cpp | 22 ++++++-------
 src/accelerator/ogl/image/frame_converter.h   |  6 ++--
 src/accelerator/ogl/util/device.cpp           | 31 ++++++++-----------
 src/accelerator/ogl/util/device.h             | 10 +++---
 src/core/frame/frame_factory.h                |  6 ++--
 .../decklink/consumer/decklink_consumer.cpp   | 15 ++++-----
 6 files changed, 43 insertions(+), 47 deletions(-)

diff --git a/src/accelerator/ogl/image/frame_converter.cpp b/src/accelerator/ogl/image/frame_converter.cpp
index 9a56fcd983..ff75f96871 100644
--- a/src/accelerator/ogl/image/frame_converter.cpp
+++ b/src/accelerator/ogl/image/frame_converter.cpp
@@ -65,19 +65,19 @@ core::mutable_frame ogl_frame_converter::create_frame(const void* tag, const cor
                                });
 }
 
-core::draw_frame ogl_frame_converter::convert_frame(const core::mutable_frame& frame)
+core::draw_frame ogl_frame_converter::convert_to_rgba(const core::mutable_frame& frame)
 {
     // TODO
     return core::draw_frame{};
 }
 
-std::shared_future<std::vector<array<const std::uint8_t>>>
+std::shared_future<array<const std::uint8_t>>
 ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, const core::encoded_frame_format format)
 {
-    std::vector<array<const std::uint8_t>> buffers;
-    int                                    x_count        = 0;
-    int                                    y_count        = 0;
-    int                                    words_per_line = 0;
+    array<const std::uint8_t> source;
+    int                       x_count        = 0;
+    int                       y_count        = 0;
+    int                       words_per_line = 0;
 
     switch (format) {
         case core::encoded_frame_format::decklink_v210:
@@ -85,14 +85,14 @@ ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, const cor
             auto row_bytes  = row_blocks * 128;
 
             // TODO - result must be 128byte aligned. can that be guaranteed here?
-            buffers.emplace_back(ogl_->create_array(row_bytes * frame.height()));
+            source         = ogl_->create_array(row_bytes * frame.height());
             x_count        = row_blocks * 8;
             y_count        = frame.height();
             words_per_line = row_blocks * 32;
             break;
     }
 
-    if (buffers.empty() || x_count == 0 || y_count == 0) {
+    if (source.size() == 0 || x_count == 0 || y_count == 0) {
         CASPAR_THROW_EXCEPTION(not_supported() << msg_info("Unknown encoded frame format"));
     }
 
@@ -107,13 +107,13 @@ ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, const cor
     description.height         = frame.height();
     description.words_per_line = words_per_line;
 
-    auto future_conversion = ogl_->convert_from_texture(texture_ptr, buffers, description, x_count, y_count);
+    auto future_conversion = ogl_->convert_from_texture(texture_ptr, source, description, x_count, y_count);
 
     return std::async(std::launch::deferred,
-                      [buffers = std::move(buffers), future_conversion = std::move(future_conversion)]() mutable {
+                      [source = std::move(source), future_conversion = std::move(future_conversion)]() mutable {
                           future_conversion.get();
 
-                          return std::move(buffers);
+                          return std::move(source);
                       });
 }
 
diff --git a/src/accelerator/ogl/image/frame_converter.h b/src/accelerator/ogl/image/frame_converter.h
index 41daedc772..a8065e3a97 100644
--- a/src/accelerator/ogl/image/frame_converter.h
+++ b/src/accelerator/ogl/image/frame_converter.h
@@ -43,10 +43,10 @@ class ogl_frame_converter
 
     core::mutable_frame create_frame(const void* video_stream_tag, const core::pixel_format_desc& desc) override;
 
-    core::draw_frame convert_frame(const core::mutable_frame& frame) override;
+    core::draw_frame convert_to_rgba(const core::mutable_frame& frame) override;
 
-    std::shared_future<std::vector<array<const std::uint8_t>>>
-    convert_from_rgba(const core::const_frame& frame, const core::encoded_frame_format format) override;
+    std::shared_future<array<const std::uint8_t>> convert_from_rgba(const core::const_frame&   frame,
+                                                                    core::encoded_frame_format format) override;
 
   private:
     const spl::shared_ptr<device> ogl_;
diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp
index 32b4374cf4..86ae8781cb 100644
--- a/src/accelerator/ogl/util/device.cpp
+++ b/src/accelerator/ogl/util/device.cpp
@@ -317,11 +317,11 @@ struct device::impl : public std::enable_shared_from_this<impl>
     }
     */
 
-    std::future<void> convert_from_texture(const std::shared_ptr<texture>&          texture,
-                                           const std::vector<array<const uint8_t>>& buffers,
-                                           const convert_from_texture_description&  description,
-                                           int                                      x_count,
-                                           int                                      y_count)
+    std::future<void> convert_from_texture(const std::shared_ptr<texture>&         texture,
+                                           const array<const uint8_t>&             source,
+                                           const convert_from_texture_description& description,
+                                           int                                     x_count,
+                                           int                                     y_count)
     {
         return spawn_async([=](yield_context yield) {
             if (!compute_from_rgba_)
@@ -343,16 +343,12 @@ struct device::impl : public std::enable_shared_from_this<impl>
             GL(glBindImageTexture(0, texid_16bit, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA16));
             GL(glBindImageTexture(1, texid_8bit, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA8));
 
-            // TODO: only a single buffer?
-            //            for (size_t i = 0; i < buffers.size(); i++) {
-            auto& source = buffers[0];
-            auto  tmp    = source.storage<std::shared_ptr<buffer>>();
+            auto tmp = source.storage<std::shared_ptr<buffer>>();
             if (!tmp) {
                 CASPAR_THROW_EXCEPTION(caspar_exception() << msg_info("Buffer is not gpu backed"));
             }
 
             GL(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, tmp->get()->id()));
-            //            }
 
             // TODO - binding 2 description
             auto description_buffer = create_buffer(sizeof(convert_from_texture_description), false);
@@ -391,8 +387,7 @@ struct device::impl : public std::enable_shared_from_this<impl>
         size_t                       total_pooled_device_buffer_size  = 0;
         size_t                       total_pooled_device_buffer_count = 0;
 
-        for (size_t i = 0; i < device_pools_.size(); ++i) {
-            auto& depth_pools = device_pools_.at(i);
+        for (const auto& depth_pools : device_pools_) {
             for (size_t i = 0; i < depth_pools.size(); ++i) {
                 auto& pools      = depth_pools.at(i);
                 bool  mipmapping = i > 3;
@@ -511,13 +506,13 @@ std::future<array<const uint8_t>> device::copy_async(const std::shared_ptr<textu
 {
     return impl_->copy_async(source, as_rgba8);
 }
-std::future<void> device::convert_from_texture(const std::shared_ptr<texture>&          texture,
-                                               const std::vector<array<const uint8_t>>& buffers,
-                                               const convert_from_texture_description&  description,
-                                               int                                      x_count,
-                                               int                                      y_count)
+std::future<void> device::convert_from_texture(const std::shared_ptr<texture>&         texture,
+                                               const array<const uint8_t>&             source,
+                                               const convert_from_texture_description& description,
+                                               int                                     x_count,
+                                               int                                     y_count)
 {
-    return impl_->convert_from_texture(texture, buffers, description, x_count, y_count);
+    return impl_->convert_from_texture(texture, source, description, x_count, y_count);
 }
 void         device::dispatch(std::function<void()> func) { boost::asio::dispatch(impl_->service_, std::move(func)); }
 std::wstring device::version() const { return impl_->version(); }
diff --git a/src/accelerator/ogl/util/device.h b/src/accelerator/ogl/util/device.h
index e5dce060cb..46872ea1bb 100644
--- a/src/accelerator/ogl/util/device.h
+++ b/src/accelerator/ogl/util/device.h
@@ -62,11 +62,11 @@ class device final
     copy_async(const array<const uint8_t>& source, int width, int height, int stride, common::bit_depth depth);
     std::future<array<const uint8_t>> copy_async(const std::shared_ptr<class texture>& source, bool as_rgba8);
 
-    std::future<void> convert_from_texture(const std::shared_ptr<texture>&          texture,
-                                           const std::vector<array<const uint8_t>>& buffers,
-                                           const convert_from_texture_description&  description,
-                                           int                                      x_count,
-                                           int                                      y_count);
+    std::future<void> convert_from_texture(const std::shared_ptr<texture>&         texture,
+                                           const array<const uint8_t>&             source,
+                                           const convert_from_texture_description& description,
+                                           int                                     x_count,
+                                           int                                     y_count);
 
     template <typename Func>
     auto dispatch_async(Func&& func)
diff --git a/src/core/frame/frame_factory.h b/src/core/frame/frame_factory.h
index 48192135d1..a0849ae171 100644
--- a/src/core/frame/frame_factory.h
+++ b/src/core/frame/frame_factory.h
@@ -43,10 +43,10 @@ class frame_converter
 
     virtual class mutable_frame create_frame(const void* video_stream_tag, const struct pixel_format_desc& desc) = 0;
 
-    virtual class draw_frame convert_frame(const class mutable_frame& frame) = 0;
+    virtual class draw_frame convert_to_rgba(const class mutable_frame& frame) = 0;
 
-    virtual std::shared_future<std::vector<array<const std::uint8_t>>>
-    convert_from_rgba(const core::const_frame& frame, const encoded_frame_format format) = 0;
+    virtual std::shared_future<array<const std::uint8_t>> convert_from_rgba(const core::const_frame& frame,
+                                                                            encoded_frame_format     format) = 0;
 };
 
 class frame_factory
diff --git a/src/modules/decklink/consumer/decklink_consumer.cpp b/src/modules/decklink/consumer/decklink_consumer.cpp
index 9a158cc0ed..918798fee0 100644
--- a/src/modules/decklink/consumer/decklink_consumer.cpp
+++ b/src/modules/decklink/consumer/decklink_consumer.cpp
@@ -424,12 +424,12 @@ struct decklink_secondary_port final : public IDeckLinkVideoOutputCallback
 
 struct converted_frame
 {
-    core::const_frame                                          raw_frame;
-    std::shared_future<std::vector<array<const std::uint8_t>>> frame;
+    core::const_frame                             raw_frame;
+    std::shared_future<array<const std::uint8_t>> frame;
 
-    converted_frame(core::const_frame raw_frame, std::shared_future<std::vector<array<const std::uint8_t>>> frame)
+    converted_frame(const core::const_frame& raw_frame, std::shared_future<array<const std::uint8_t>> frame)
         : raw_frame(raw_frame)
-        , frame(frame)
+        , frame(std::move(frame))
     {
     }
 };
@@ -780,6 +780,7 @@ struct decklink_consumer final : public IDeckLinkVideoOutputCallback
             tbb::parallel_for(-1, static_cast<int>(secondary_port_contexts_.size()), [&](int i) {
                 if (i == -1) {
                     // Primary port
+                    // TODO - reimplement this
                     // std::shared_ptr<void> image_data = convert_frame_for_port(channel_format_desc_,
                     //                                                           decklink_format_desc_,
                     //                                                           config_.primary,
@@ -787,10 +788,10 @@ struct decklink_consumer final : public IDeckLinkVideoOutputCallback
                     //                                                           frame2,
                     //                                                           mode_->GetFieldDominance());
 
-                    auto buffers = frame1.value().frame.get();
+                    auto buffer = frame1.value().frame.get();
 
                     std::shared_ptr<void> image_data = create_aligned_buffer(decklink_format_desc_.size, 128);
-                    std::memcpy(image_data.get(), buffers.at(0).data(), buffers.at(0).size());
+                    std::memcpy(image_data.get(), buffer.data(), buffer.size());
 
                     schedule_next_video(image_data, bmdFormat10BitYUV, nb_samples, video_display_time);
 
@@ -883,7 +884,7 @@ struct decklink_consumer final : public IDeckLinkVideoOutputCallback
                 // Always push a field2, as we have supplied field1
                 buffer_cond_.wait(lock, [&] { return buffer_.size() < buffer_capacity_ || abort_request_; });
             }
-            buffer_.push(converted_frame(std::move(frame), frame_future));
+            buffer_.push(converted_frame(frame, frame_future));
         }
         buffer_cond_.notify_all();
 

From 4cf8a2a277ab62ad7b448529ca8edc26ef693fe2 Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Sat, 30 Dec 2023 15:52:06 +0000
Subject: [PATCH 34/50] wip: reimplement decklink key-only flag

---
 src/accelerator/ogl/image/frame_converter.cpp       | 5 ++++-
 src/accelerator/ogl/image/frame_converter.h         | 4 ++--
 src/accelerator/ogl/image/shader_from_rgba.comp     | 6 ++++++
 src/accelerator/ogl/util/device.h                   | 1 +
 src/core/frame/frame_factory.h                      | 4 ++--
 src/modules/decklink/consumer/decklink_consumer.cpp | 3 ++-
 6 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/src/accelerator/ogl/image/frame_converter.cpp b/src/accelerator/ogl/image/frame_converter.cpp
index ff75f96871..8326f7edf4 100644
--- a/src/accelerator/ogl/image/frame_converter.cpp
+++ b/src/accelerator/ogl/image/frame_converter.cpp
@@ -72,7 +72,9 @@ core::draw_frame ogl_frame_converter::convert_to_rgba(const core::mutable_frame&
 }
 
 std::shared_future<array<const std::uint8_t>>
-ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, const core::encoded_frame_format format)
+ogl_frame_converter::convert_from_rgba(const core::const_frame&         frame,
+                                       const core::encoded_frame_format format,
+                                       bool                             key_only)
 {
     array<const std::uint8_t> source;
     int                       x_count        = 0;
@@ -106,6 +108,7 @@ ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, const cor
     description.width          = frame.width();
     description.height         = frame.height();
     description.words_per_line = words_per_line;
+    description.key_only       = key_only;
 
     auto future_conversion = ogl_->convert_from_texture(texture_ptr, source, description, x_count, y_count);
 
diff --git a/src/accelerator/ogl/image/frame_converter.h b/src/accelerator/ogl/image/frame_converter.h
index a8065e3a97..b3767ecfad 100644
--- a/src/accelerator/ogl/image/frame_converter.h
+++ b/src/accelerator/ogl/image/frame_converter.h
@@ -45,8 +45,8 @@ class ogl_frame_converter
 
     core::draw_frame convert_to_rgba(const core::mutable_frame& frame) override;
 
-    std::shared_future<array<const std::uint8_t>> convert_from_rgba(const core::const_frame&   frame,
-                                                                    core::encoded_frame_format format) override;
+    std::shared_future<array<const std::uint8_t>>
+    convert_from_rgba(const core::const_frame& frame, core::encoded_frame_format format, bool key_only) override;
 
   private:
     const spl::shared_ptr<device> ogl_;
diff --git a/src/accelerator/ogl/image/shader_from_rgba.comp b/src/accelerator/ogl/image/shader_from_rgba.comp
index 3a629ab396..f1bdbe93ce 100644
--- a/src/accelerator/ogl/image/shader_from_rgba.comp
+++ b/src/accelerator/ogl/image/shader_from_rgba.comp
@@ -17,6 +17,7 @@ layout(std430, binding = 3) buffer description_layout
     uint frame_width;
     uint frame_height;
     uint words_per_line;
+    bool key_only;
 };
 
 vec4 read_pixel(ivec2 coord) {
@@ -28,6 +29,11 @@ vec4 read_pixel(ivec2 coord) {
 }
 
 vec3 rgba_to_bt709(vec4 pixel) {
+    if (key_only) {
+        // TODO - verify everything about this
+        return vec3(pixel.a, 0.5, 0.5);
+    }
+
     float KR = 0.2126;
     float KB = 0.0722;
     float KG = 1.0 - KR - KB;
diff --git a/src/accelerator/ogl/util/device.h b/src/accelerator/ogl/util/device.h
index 46872ea1bb..e6548dd39f 100644
--- a/src/accelerator/ogl/util/device.h
+++ b/src/accelerator/ogl/util/device.h
@@ -41,6 +41,7 @@ struct convert_from_texture_description
     uint32_t width;
     uint32_t height;
     uint32_t words_per_line;
+    bool     key_only;
 };
 
 class device final
diff --git a/src/core/frame/frame_factory.h b/src/core/frame/frame_factory.h
index a0849ae171..f73bdefd35 100644
--- a/src/core/frame/frame_factory.h
+++ b/src/core/frame/frame_factory.h
@@ -45,8 +45,8 @@ class frame_converter
 
     virtual class draw_frame convert_to_rgba(const class mutable_frame& frame) = 0;
 
-    virtual std::shared_future<array<const std::uint8_t>> convert_from_rgba(const core::const_frame& frame,
-                                                                            encoded_frame_format     format) = 0;
+    virtual std::shared_future<array<const std::uint8_t>>
+    convert_from_rgba(const core::const_frame& frame, encoded_frame_format format, bool key_only) = 0;
 };
 
 class frame_factory
diff --git a/src/modules/decklink/consumer/decklink_consumer.cpp b/src/modules/decklink/consumer/decklink_consumer.cpp
index 918798fee0..90e7d8c082 100644
--- a/src/modules/decklink/consumer/decklink_consumer.cpp
+++ b/src/modules/decklink/consumer/decklink_consumer.cpp
@@ -877,7 +877,8 @@ struct decklink_consumer final : public IDeckLinkVideoOutputCallback
         }
 
         if (frame) {
-            auto frame_future = frame_converter_->convert_from_rgba(frame, core::encoded_frame_format::decklink_v210);
+            auto frame_future = frame_converter_->convert_from_rgba(
+                frame, core::encoded_frame_format::decklink_v210, config_.primary.key_only);
 
             std::unique_lock<std::mutex> lock(buffer_mutex_);
             if (field != core::video_field::b) {

From 4cc8a05d1d4c153de194bb11eb29fec341ed0731 Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Tue, 28 Nov 2023 23:52:53 +0000
Subject: [PATCH 35/50] feat: minimise cpu image conversions for image producer

---
 src/modules/image/producer/image_producer.cpp | 18 +++--
 .../image/producer/image_scroll_producer.cpp  | 12 +--
 src/modules/image/util/image_loader.cpp       | 77 ++++++++++++-------
 src/modules/image/util/image_loader.h         | 12 ++-
 4 files changed, 75 insertions(+), 44 deletions(-)

diff --git a/src/modules/image/producer/image_producer.cpp b/src/modules/image/producer/image_producer.cpp
index f7595375d3..2e469d1374 100644
--- a/src/modules/image/producer/image_producer.cpp
+++ b/src/modules/image/producer/image_producer.cpp
@@ -35,6 +35,7 @@
 #include <core/frame/draw_frame.h>
 #include <core/frame/frame.h>
 #include <core/frame/frame_factory.h>
+#include <core/frame/geometry.h>
 #include <core/frame/pixel_format.h>
 #include <core/monitor/monitor.h>
 #include <core/producer/frame_producer.h>
@@ -67,7 +68,7 @@ struct image_producer : public core::frame_producer
         , frame_factory_(frame_factory)
         , length_(length)
     {
-        load(load_image(description_));
+        load(load_image(description_, true));
 
         CASPAR_LOG(info) << print() << L" Initialized";
     }
@@ -80,19 +81,20 @@ struct image_producer : public core::frame_producer
         , frame_factory_(frame_factory)
         , length_(length)
     {
-        load(load_png_from_memory(png_data, size));
+        load(load_png_from_memory(png_data, size, true));
 
         CASPAR_LOG(info) << print() << L" Initialized";
     }
 
-    void load(const std::shared_ptr<FIBITMAP>& bitmap)
+    void load(const loaded_image& image)
     {
-        FreeImage_FlipVertical(bitmap.get());
-        core::pixel_format_desc desc(core::pixel_format::bgra);
-        desc.planes.emplace_back(FreeImage_GetWidth(bitmap.get()), FreeImage_GetHeight(bitmap.get()), 4);
-        auto frame = frame_factory_->create_frame(this, desc);
+        core::pixel_format_desc desc(image.format);
+        desc.planes.emplace_back(
+            FreeImage_GetWidth(image.bitmap.get()), FreeImage_GetHeight(image.bitmap.get()), image.stride);
+        auto frame       = frame_factory_->create_frame(this, desc);
+        frame.geometry() = core::frame_geometry::get_default_vflip();
 
-        std::copy_n(FreeImage_GetBits(bitmap.get()), frame.image_data(0).size(), frame.image_data(0).begin());
+        std::copy_n(FreeImage_GetBits(image.bitmap.get()), frame.image_data(0).size(), frame.image_data(0).begin());
         frame_ = core::draw_frame(std::move(frame));
     }
 
diff --git a/src/modules/image/producer/image_scroll_producer.cpp b/src/modules/image/producer/image_scroll_producer.cpp
index 0482ae08cb..2df6825573 100644
--- a/src/modules/image/producer/image_scroll_producer.cpp
+++ b/src/modules/image/producer/image_scroll_producer.cpp
@@ -140,11 +140,11 @@ struct image_scroll_producer : public core::frame_producer
         if (end_time_)
             speed = -1.0;
 
-        auto bitmap = load_image(filename_);
-        FreeImage_FlipVertical(bitmap.get());
+        auto bitmap = load_image(filename_, false);
+        FreeImage_FlipVertical(bitmap.bitmap.get());
 
-        width_  = FreeImage_GetWidth(bitmap.get());
-        height_ = FreeImage_GetHeight(bitmap.get());
+        width_  = FreeImage_GetWidth(bitmap.bitmap.get());
+        height_ = FreeImage_GetHeight(bitmap.bitmap.get());
 
         bool vertical   = width_ == format_desc_.width;
         bool horizontal = height_ == format_desc_.height;
@@ -169,7 +169,7 @@ struct image_scroll_producer : public core::frame_producer
 
         speed_ = speed_tweener(speed, speed, 0, tweener(L"linear"));
 
-        auto                   bytes = FreeImage_GetBits(bitmap.get());
+        auto                   bytes = FreeImage_GetBits(bitmap.bitmap.get());
         auto                   count = width_ * height_ * 4;
         image_view<bgra_pixel> original_view(bytes, width_, height_);
 
@@ -193,7 +193,7 @@ struct image_scroll_producer : public core::frame_producer
             caspar::tweener        blur_tweener(L"easeInQuad");
             blur(original_view, blurred_view, angle, motion_blur_px, blur_tweener);
             bytes = blurred_copy.get();
-            bitmap.reset();
+            bitmap.bitmap.reset();
         }
 
         if (vertical) {
diff --git a/src/modules/image/util/image_loader.cpp b/src/modules/image/util/image_loader.cpp
index eedc289e04..eefc486592 100644
--- a/src/modules/image/util/image_loader.cpp
+++ b/src/modules/image/util/image_loader.cpp
@@ -41,9 +41,54 @@
 #include "image_algorithms.h"
 #include "image_view.h"
 
+#if FREEIMAGE_COLORORDER == FREEIMAGE_COLORORDER_BGR
+#define IMAGE_BGRA_FORMAT core::pixel_format::bgra
+#define IMAGE_BGR_FORMAT core::pixel_format::bgr
+#else
+#define IMAGE_BGRA_FORMAT core::pixel_format::rgba
+#define IMAGE_BGR_FORMAT core::pixel_format::rgb
+#endif
+
 namespace caspar { namespace image {
 
-std::shared_ptr<FIBITMAP> load_image(const std::wstring& filename)
+loaded_image prepare_loaded_image(FREE_IMAGE_FORMAT fif, std::shared_ptr<FIBITMAP> bitmap, bool allow_all_formats)
+{
+    core::pixel_format format;
+    int                stride;
+
+    unsigned int bpp = FreeImage_GetBPP(bitmap.get());
+    if (bpp == 32) {
+        format = IMAGE_BGRA_FORMAT;
+        stride = 4;
+    } else if (allow_all_formats && bpp == 24) {
+        format = IMAGE_BGR_FORMAT;
+        stride = 3;
+    } else if (allow_all_formats && !FreeImage_IsTransparent(bitmap.get())) {
+        format = IMAGE_BGR_FORMAT;
+        stride = 3;
+
+        bitmap = std::shared_ptr<FIBITMAP>(FreeImage_ConvertTo24Bits(bitmap.get()), FreeImage_Unload);
+    } else {
+        format = IMAGE_BGRA_FORMAT;
+        stride = 4;
+
+        bitmap = std::shared_ptr<FIBITMAP>(FreeImage_ConvertTo32Bits(bitmap.get()), FreeImage_Unload);
+    }
+
+    if (!bitmap)
+        CASPAR_THROW_EXCEPTION(invalid_argument() << msg_info("Unsupported image format."));
+
+    // PNG-images need to be premultiplied with their alpha
+    if (fif == FIF_PNG && format == IMAGE_BGRA_FORMAT) {
+        image_view<bgra_pixel> original_view(
+            FreeImage_GetBits(bitmap.get()), FreeImage_GetWidth(bitmap.get()), FreeImage_GetHeight(bitmap.get()));
+        premultiply(original_view);
+    }
+
+    return {std::move(bitmap), format, stride};
+}
+
+loaded_image load_image(const std::wstring& filename, bool allow_all_formats)
 {
     if (!boost::filesystem::exists(filename))
         CASPAR_THROW_EXCEPTION(file_not_found() << boost::errinfo_file_name(u8(filename)));
@@ -70,23 +115,10 @@ std::shared_ptr<FIBITMAP> load_image(const std::wstring& filename)
     auto bitmap = std::shared_ptr<FIBITMAP>(FreeImage_Load(fif, u8(filename).c_str(), 0), FreeImage_Unload);
 #endif
 
-    if (FreeImage_GetBPP(bitmap.get()) != 32) {
-        bitmap = std::shared_ptr<FIBITMAP>(FreeImage_ConvertTo32Bits(bitmap.get()), FreeImage_Unload);
-        if (!bitmap)
-            CASPAR_THROW_EXCEPTION(invalid_argument() << msg_info("Unsupported image format."));
-    }
-
-    // PNG-images need to be premultiplied with their alpha
-    if (fif == FIF_PNG) {
-        image_view<bgra_pixel> original_view(
-            FreeImage_GetBits(bitmap.get()), FreeImage_GetWidth(bitmap.get()), FreeImage_GetHeight(bitmap.get()));
-        premultiply(original_view);
-    }
-
-    return bitmap;
+    return prepare_loaded_image(fif, std::move(bitmap), allow_all_formats);
 }
 
-std::shared_ptr<FIBITMAP> load_png_from_memory(const void* memory_location, size_t size)
+loaded_image load_png_from_memory(const void* memory_location, size_t size, bool allow_all_formats)
 {
     FREE_IMAGE_FORMAT fif = FIF_PNG;
 
@@ -95,18 +127,7 @@ std::shared_ptr<FIBITMAP> load_png_from_memory(const void* memory_location, size
         FreeImage_CloseMemory);
     auto bitmap = std::shared_ptr<FIBITMAP>(FreeImage_LoadFromMemory(fif, memory.get(), 0), FreeImage_Unload);
 
-    if (FreeImage_GetBPP(bitmap.get()) != 32) {
-        bitmap = std::shared_ptr<FIBITMAP>(FreeImage_ConvertTo32Bits(bitmap.get()), FreeImage_Unload);
-
-        if (!bitmap)
-            CASPAR_THROW_EXCEPTION(invalid_argument() << msg_info("Unsupported image format."));
-    }
-
-    // PNG-images need to be premultiplied with their alpha
-    image_view<bgra_pixel> original_view(
-        FreeImage_GetBits(bitmap.get()), FreeImage_GetWidth(bitmap.get()), FreeImage_GetHeight(bitmap.get()));
-    premultiply(original_view);
-    return bitmap;
+    return prepare_loaded_image(fif, std::move(bitmap), allow_all_formats);
 }
 
 const std::set<std::wstring>& supported_extensions()
diff --git a/src/modules/image/util/image_loader.h b/src/modules/image/util/image_loader.h
index 81a68bfea8..9362be31bc 100644
--- a/src/modules/image/util/image_loader.h
+++ b/src/modules/image/util/image_loader.h
@@ -21,6 +21,7 @@
 
 #pragma once
 
+#include <core/frame/pixel_format.h>
 #include <memory>
 #include <set>
 #include <string>
@@ -29,8 +30,15 @@ struct FIBITMAP;
 
 namespace caspar { namespace image {
 
-std::shared_ptr<FIBITMAP>     load_image(const std::wstring& filename);
-std::shared_ptr<FIBITMAP>     load_png_from_memory(const void* memory_location, size_t size);
+struct loaded_image
+{
+    std::shared_ptr<FIBITMAP> bitmap;
+    core::pixel_format        format;
+    int                       stride;
+};
+
+loaded_image                  load_image(const std::wstring& filename, bool allow_all_formats);
+loaded_image                  load_png_from_memory(const void* memory_location, size_t size, bool allow_all_formats);
 const std::set<std::wstring>& supported_extensions();
 
 }} // namespace caspar::image

From 219ada3b9335dabe70b03a8ae13a85c9e89ad1b5 Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Sat, 30 Dec 2023 17:10:18 +0000
Subject: [PATCH 36/50] wip: start of 16bit png writing

---
 src/accelerator/ogl/image/frame_converter.cpp | 40 +++++++++----
 src/accelerator/ogl/image/frame_converter.h   |  2 +
 .../ogl/image/shader_from_rgba.comp           | 46 ++++++++++++++-
 src/accelerator/ogl/util/device.cpp           | 10 ++--
 src/accelerator/ogl/util/device.h             |  5 +-
 src/core/frame/frame_factory.h                |  6 +-
 src/modules/image/consumer/image_consumer.cpp | 59 +++++++++++++++----
 src/modules/image/util/image_algorithms.h     | 10 ++--
 src/modules/image/util/image_view.h           | 34 +++++++++++
 9 files changed, 174 insertions(+), 38 deletions(-)

diff --git a/src/accelerator/ogl/image/frame_converter.cpp b/src/accelerator/ogl/image/frame_converter.cpp
index 8326f7edf4..5e642e49d5 100644
--- a/src/accelerator/ogl/image/frame_converter.cpp
+++ b/src/accelerator/ogl/image/frame_converter.cpp
@@ -76,25 +76,33 @@ ogl_frame_converter::convert_from_rgba(const core::const_frame&         frame,
                                        const core::encoded_frame_format format,
                                        bool                             key_only)
 {
-    array<const std::uint8_t> source;
-    int                       x_count        = 0;
-    int                       y_count        = 0;
+    array<const std::uint8_t> buffer;
+    unsigned int              x_count        = 0;
+    unsigned int              y_count        = 0;
     int                       words_per_line = 0;
 
     switch (format) {
+        case core::encoded_frame_format::rgba16:
+        case core::encoded_frame_format::bgra16:
+            x_count        = frame.width();
+            y_count        = frame.height();
+            buffer         = ogl_->create_array(frame.width() * frame.height() * 8);
+            words_per_line = frame.width() * 2;
+
+            break;
         case core::encoded_frame_format::decklink_v210:
             auto row_blocks = ((frame.width() + 47) / 48);
             auto row_bytes  = row_blocks * 128;
 
             // TODO - result must be 128byte aligned. can that be guaranteed here?
-            source         = ogl_->create_array(row_bytes * frame.height());
+            buffer         = ogl_->create_array(row_bytes * frame.height());
             x_count        = row_blocks * 8;
             y_count        = frame.height();
             words_per_line = row_blocks * 32;
             break;
     }
 
-    if (source.size() == 0 || x_count == 0 || y_count == 0) {
+    if (buffer.size() == 0 || x_count == 0 || y_count == 0) {
         CASPAR_THROW_EXCEPTION(not_supported() << msg_info("Unknown encoded frame format"));
     }
 
@@ -104,20 +112,30 @@ ogl_frame_converter::convert_from_rgba(const core::const_frame&         frame,
     }
 
     convert_from_texture_description description{};
+    description.target_format  = format;
     description.is_16_bit      = texture_ptr->depth() == common::bit_depth::bit16;
     description.width          = frame.width();
     description.height         = frame.height();
     description.words_per_line = words_per_line;
     description.key_only       = key_only;
 
-    auto future_conversion = ogl_->convert_from_texture(texture_ptr, source, description, x_count, y_count);
+    auto future_conversion = ogl_->convert_from_texture(texture_ptr, buffer, description, x_count, y_count);
+
+    return std::async(std::launch::deferred, [buffer, future_conversion = std::move(future_conversion)]() mutable {
+        future_conversion.get();
+
+        return buffer;
+    });
+}
 
-    return std::async(std::launch::deferred,
-                      [source = std::move(source), future_conversion = std::move(future_conversion)]() mutable {
-                          future_conversion.get();
+common::bit_depth ogl_frame_converter::get_frame_bitdepth(const core::const_frame& frame)
+{
+    auto texture_ptr = boost::any_cast<std::shared_ptr<texture>>(frame.opaque());
+    if (!texture_ptr) {
+        CASPAR_THROW_EXCEPTION(not_supported() << msg_info("No texture inside frame!"));
+    }
 
-                          return std::move(source);
-                      });
+    return texture_ptr->depth();
 }
 
 } // namespace caspar::accelerator::ogl
\ No newline at end of file
diff --git a/src/accelerator/ogl/image/frame_converter.h b/src/accelerator/ogl/image/frame_converter.h
index b3767ecfad..3d85635d91 100644
--- a/src/accelerator/ogl/image/frame_converter.h
+++ b/src/accelerator/ogl/image/frame_converter.h
@@ -48,6 +48,8 @@ class ogl_frame_converter
     std::shared_future<array<const std::uint8_t>>
     convert_from_rgba(const core::const_frame& frame, core::encoded_frame_format format, bool key_only) override;
 
+    common::bit_depth get_frame_bitdepth(const core::const_frame& frame) override;
+
   private:
     const spl::shared_ptr<device> ogl_;
 };
diff --git a/src/accelerator/ogl/image/shader_from_rgba.comp b/src/accelerator/ogl/image/shader_from_rgba.comp
index f1bdbe93ce..a783c9e6f2 100644
--- a/src/accelerator/ogl/image/shader_from_rgba.comp
+++ b/src/accelerator/ogl/image/shader_from_rgba.comp
@@ -13,6 +13,7 @@ layout(std430, binding = 2) buffer buffer_layout
 layout(std430, binding = 3) buffer description_layout
 {
     // This must match convert_from_texture_description in device.h
+    uint target_format;
     bool is_16_bit;
     uint frame_width;
     uint frame_height;
@@ -75,7 +76,7 @@ uint to_10bit(float val) {
 void decklink_v210() {
     // basic coordinates
     uint y_offset = gl_GlobalInvocationID.y * words_per_line;
-    uint x_offset = gl_GlobalInvocationID.x * 4; // 4 bytes per op
+    uint x_offset = gl_GlobalInvocationID.x * 4; // 4 words per op
     uint offset = y_offset + x_offset;
     uint image_x = gl_GlobalInvocationID.x * 6;
 
@@ -95,8 +96,49 @@ void decklink_v210() {
     bufferOutput[offset + 3] = to_10bit(ycbcr4.s) + (to_10bit(ycbcr4.p) << 10) + (to_10bit(ycbcr5.s) << 20);
 }
 
+void rgba16() {
+    // TODO: is this excessive to do in a shader?
+    // basic coordinates
+    uint y_offset = gl_GlobalInvocationID.y * words_per_line;
+    uint x_offset = gl_GlobalInvocationID.x * 2; // 4 words per op
+    uint offset = y_offset + x_offset;
+
+    // TODO - can this be done solely as integer?
+    vec4 pixel = read_pixel(ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y));
+
+    // Note: the texture is stored in reverse order
+    bufferOutput[offset + 0] = uint(pixel.b * 65535) + (uint(pixel.g * 65535) << 16);
+    bufferOutput[offset + 1] = uint(pixel.r * 65535) + (uint(pixel.a * 65535) << 16);
+}
+
+void bgra16() {
+    // TODO: is this excessive to do in a shader?
+    // basic coordinates
+    uint y_offset = gl_GlobalInvocationID.y * words_per_line;
+    uint x_offset = gl_GlobalInvocationID.x * 2; // 4 words per op
+    uint offset = y_offset + x_offset;
+
+    // TODO - can this be done solely as integer?
+    vec4 pixel = read_pixel(ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y));
+
+    // Note: the texture is stored in reverse order
+    bufferOutput[offset + 0] = uint(pixel.r * 65535) + (uint(pixel.g * 65535) << 16);
+    bufferOutput[offset + 1] = uint(pixel.b * 65535) + (uint(pixel.a * 65535) << 16);
+}
+
 void main() {
     ivec2 texelCoord = ivec2(gl_GlobalInvocationID.xy);
 
-    decklink_v210();
+    switch(target_format){
+        case 0: // rgba16
+            rgba16();
+            break;
+        case 1: // bgra16
+            bgra16();
+            break;
+
+        case 2:
+            decklink_v210();
+            break;
+    }
 }
\ No newline at end of file
diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp
index 86ae8781cb..9062a322db 100644
--- a/src/accelerator/ogl/util/device.cpp
+++ b/src/accelerator/ogl/util/device.cpp
@@ -320,8 +320,8 @@ struct device::impl : public std::enable_shared_from_this<impl>
     std::future<void> convert_from_texture(const std::shared_ptr<texture>&         texture,
                                            const array<const uint8_t>&             source,
                                            const convert_from_texture_description& description,
-                                           int                                     x_count,
-                                           int                                     y_count)
+                                           unsigned int                            x_count,
+                                           unsigned int                            y_count)
     {
         return spawn_async([=](yield_context yield) {
             if (!compute_from_rgba_)
@@ -357,7 +357,7 @@ struct device::impl : public std::enable_shared_from_this<impl>
 
             compute_from_rgba_->use();
 
-            GL(glDispatchCompute((unsigned int)x_count, (unsigned int)y_count, 1));
+            GL(glDispatchCompute(x_count, y_count, 1));
 
             auto fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
 
@@ -509,8 +509,8 @@ std::future<array<const uint8_t>> device::copy_async(const std::shared_ptr<textu
 std::future<void> device::convert_from_texture(const std::shared_ptr<texture>&         texture,
                                                const array<const uint8_t>&             source,
                                                const convert_from_texture_description& description,
-                                               int                                     x_count,
-                                               int                                     y_count)
+                                               unsigned int                            x_count,
+                                               unsigned int                            y_count)
 {
     return impl_->convert_from_texture(texture, source, description, x_count, y_count);
 }
diff --git a/src/accelerator/ogl/util/device.h b/src/accelerator/ogl/util/device.h
index e6548dd39f..f6c3b5974f 100644
--- a/src/accelerator/ogl/util/device.h
+++ b/src/accelerator/ogl/util/device.h
@@ -37,6 +37,7 @@ namespace caspar { namespace accelerator { namespace ogl {
 // This must match description_layout in shader_from_rgba.comp
 struct convert_from_texture_description
 {
+    uint     target_format;
     bool     is_16_bit;
     uint32_t width;
     uint32_t height;
@@ -66,8 +67,8 @@ class device final
     std::future<void> convert_from_texture(const std::shared_ptr<texture>&         texture,
                                            const array<const uint8_t>&             source,
                                            const convert_from_texture_description& description,
-                                           int                                     x_count,
-                                           int                                     y_count);
+                                           unsigned int                            x_count,
+                                           unsigned int                            y_count);
 
     template <typename Func>
     auto dispatch_async(Func&& func)
diff --git a/src/core/frame/frame_factory.h b/src/core/frame/frame_factory.h
index f73bdefd35..82416fdc48 100644
--- a/src/core/frame/frame_factory.h
+++ b/src/core/frame/frame_factory.h
@@ -29,7 +29,9 @@ namespace caspar { namespace core {
 
 enum encoded_frame_format
 {
-    decklink_v210 = 0,
+    rgba16        = 0,
+    bgra16        = 1,
+    decklink_v210 = 2,
 };
 
 class frame_converter
@@ -47,6 +49,8 @@ class frame_converter
 
     virtual std::shared_future<array<const std::uint8_t>>
     convert_from_rgba(const core::const_frame& frame, encoded_frame_format format, bool key_only) = 0;
+
+    virtual common::bit_depth get_frame_bitdepth(const core::const_frame& frame) = 0;
 };
 
 class frame_factory
diff --git a/src/modules/image/consumer/image_consumer.cpp b/src/modules/image/consumer/image_consumer.cpp
index 786080d42c..41117ce1ac 100644
--- a/src/modules/image/consumer/image_consumer.cpp
+++ b/src/modules/image/consumer/image_consumer.cpp
@@ -36,6 +36,7 @@
 
 #include <core/consumer/frame_consumer.h>
 #include <core/frame/frame.h>
+#include <core/frame/frame_factory.h>
 #include <core/video_format.h>
 
 #include <boost/algorithm/string.hpp>
@@ -49,15 +50,23 @@
 
 namespace caspar { namespace image {
 
+#if FREEIMAGE_COLORORDER == FREEIMAGE_COLORORDER_BGR
+#define IMAGE_ENCODED_FORMAT core::encoded_frame_format::bgra16
+#else
+#define IMAGE_ENCODED_FORMAT core::encoded_frame_format::rgba16
+#endif
+
 struct image_consumer : public core::frame_consumer
 {
-    const std::wstring filename_;
+    const spl::shared_ptr<core::frame_converter> frame_converter_;
+    const std::wstring                           filename_;
 
   public:
     // frame_consumer
 
-    explicit image_consumer(std::wstring filename)
-        : filename_(std::move(filename))
+    explicit image_consumer(const spl::shared_ptr<core::frame_converter>& frame_converter, std::wstring filename)
+        : frame_converter_(frame_converter)
+        , filename_(std::move(filename))
     {
     }
 
@@ -67,7 +76,7 @@ struct image_consumer : public core::frame_consumer
     {
         auto filename = filename_;
 
-        std::thread async([frame, filename] {
+        std::thread async([frame_converter = frame_converter_, frame, filename] {
             try {
                 auto filename2 = filename;
 
@@ -78,14 +87,40 @@ struct image_consumer : public core::frame_consumer
                 else
                     filename2 = env::media_folder() + filename2 + L".png";
 
-                auto bitmap = std::shared_ptr<FIBITMAP>(
-                    FreeImage_Allocate(static_cast<int>(frame.width()), static_cast<int>(frame.height()), 32),
-                    FreeImage_Unload);
-                std::memcpy(FreeImage_GetBits(bitmap.get()), frame.image_data(0).begin(), frame.image_data(0).size());
+                common::bit_depth frame_depth = frame_converter->get_frame_bitdepth(frame);
+
+                std::shared_ptr<FIBITMAP> bitmap;
+
+                if (frame_depth != common::bit_depth::bit8) {
+                    bitmap = std::shared_ptr<FIBITMAP>(FreeImage_AllocateT(FIT_RGBA16,
+                                                                           static_cast<int>(frame.width()),
+                                                                           static_cast<int>(frame.height())),
+                                                       FreeImage_Unload);
+
+                    array<const std::uint8_t> rgba16_bytes =
+                        frame_converter->convert_from_rgba(frame, IMAGE_ENCODED_FORMAT, false).get();
+
+                    std::memcpy(FreeImage_GetBits(bitmap.get()), rgba16_bytes.data(), rgba16_bytes.size());
+
+                    // TODO - this doesnt work
+                    image_view<bgra16_pixel> original_view(FreeImage_GetBits(bitmap.get()),
+                                                         static_cast<int>(frame.width()),
+                                                         static_cast<int>(frame.height()));
+                    unmultiply(original_view, 65535);
+                } else {
+                    bitmap = std::shared_ptr<FIBITMAP>(
+                        FreeImage_AllocateT(
+                            FIT_BITMAP, static_cast<int>(frame.width()), static_cast<int>(frame.height()), 32),
+                        FreeImage_Unload);
+
+                    std::memcpy(
+                        FreeImage_GetBits(bitmap.get()), frame.image_data(0).begin(), frame.image_data(0).size());
 
-                image_view<bgra_pixel> original_view(
-                    FreeImage_GetBits(bitmap.get()), static_cast<int>(frame.width()), static_cast<int>(frame.height()));
-                unmultiply(original_view);
+                    image_view<bgra_pixel> original_view(FreeImage_GetBits(bitmap.get()),
+                                                         static_cast<int>(frame.width()),
+                                                         static_cast<int>(frame.height()));
+                    unmultiply(original_view, 255);
+                }
 
                 FreeImage_FlipVertical(bitmap.get());
 #ifdef WIN32
@@ -129,7 +164,7 @@ spl::shared_ptr<core::frame_consumer> create_consumer(const std::vector<std::wst
     if (params.size() > 1)
         filename = params.at(1);
 
-    return spl::make_shared<image_consumer>(filename);
+    return spl::make_shared<image_consumer>(frame_converter, filename);
 }
 
 }} // namespace caspar::image
diff --git a/src/modules/image/util/image_algorithms.h b/src/modules/image/util/image_algorithms.h
index 268f5efad3..f6a7c6662c 100644
--- a/src/modules/image/util/image_algorithms.h
+++ b/src/modules/image/util/image_algorithms.h
@@ -206,21 +206,21 @@ void premultiply(SrcDstView& view_to_modify)
  *                       models RGBAPixel.
  */
 template <class SrcDstView>
-void unmultiply(SrcDstView& view_to_modify)
+void unmultiply(SrcDstView& view_to_modify, int max)
 {
     std::for_each(view_to_modify.begin(), view_to_modify.end(), [&](typename SrcDstView::pixel_type& pixel) {
         int alpha = static_cast<int>(pixel.a());
 
-        if (alpha != 0 && alpha != 255) {
+        if (alpha != 0 && alpha != max) {
             // We don't event try to premultiply 0 since it will be unaffected.
             if (pixel.r())
-                pixel.r() = static_cast<uint8_t>(static_cast<int>(pixel.r()) * 255 / alpha);
+                pixel.r() = static_cast<uint8_t>(static_cast<int>(pixel.r()) * max / alpha);
 
             if (pixel.g())
-                pixel.g() = static_cast<uint8_t>(static_cast<int>(pixel.g()) * 255 / alpha);
+                pixel.g() = static_cast<uint8_t>(static_cast<int>(pixel.g()) * max / alpha);
 
             if (pixel.b())
-                pixel.b() = static_cast<uint8_t>(static_cast<int>(pixel.b()) * 255 / alpha);
+                pixel.b() = static_cast<uint8_t>(static_cast<int>(pixel.b()) * max / alpha);
         }
     });
 }
diff --git a/src/modules/image/util/image_view.h b/src/modules/image/util/image_view.h
index b9029a094d..cb0a4300a7 100644
--- a/src/modules/image/util/image_view.h
+++ b/src/modules/image/util/image_view.h
@@ -59,6 +59,40 @@ class bgra_pixel
     uint8_t&       a() { return a_; }
 };
 
+/**
+ * A POD pixel with a compatible memory layout as a 16bit BGRA pixel (32bits in
+ * total).
+ * <p>
+ * Models the PackedPixel concept used by for example image_view. Also models
+ * the RGBAPixel concept which does not care about the order between RGBA but
+ * only requires that all 4 channel has accessors.
+ */
+class bgra16_pixel
+{
+    uint16_t b_;
+    uint16_t g_;
+    uint16_t r_;
+    uint16_t a_;
+
+  public:
+    bgra16_pixel(uint16_t b = 0, uint16_t g = 0, uint16_t r = 0, uint16_t a = 0)
+        : b_(b)
+        , g_(g)
+        , r_(r)
+        , a_(a)
+    {
+    }
+
+    const uint16_t& b() const { return b_; }
+    uint16_t&       b() { return b_; }
+    const uint16_t& g() const { return g_; }
+    uint16_t&       g() { return g_; }
+    const uint16_t& r() const { return r_; }
+    uint16_t&       r() { return r_; }
+    const uint16_t& a() const { return a_; }
+    uint16_t&       a() { return a_; }
+};
+
 template <class PackedPixel>
 class image_sub_view;
 

From 8cfc286c78e68327cbb6a823dd4b08afdc5d7805 Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Sat, 30 Dec 2023 17:20:16 +0000
Subject: [PATCH 37/50] fix: image consumer 16bit generation

---
 src/accelerator/ogl/image/frame_converter.cpp |  4 ++-
 src/accelerator/ogl/image/frame_converter.h   |  6 ++--
 .../ogl/image/shader_from_rgba.comp           | 18 +++++++---
 src/accelerator/ogl/util/device.h             |  1 +
 src/core/frame/frame_factory.h                |  2 +-
 .../decklink/consumer/decklink_consumer.cpp   |  2 +-
 src/modules/image/consumer/image_consumer.cpp | 10 ++----
 src/modules/image/util/image_algorithms.h     | 10 +++---
 src/modules/image/util/image_view.h           | 34 -------------------
 9 files changed, 31 insertions(+), 56 deletions(-)

diff --git a/src/accelerator/ogl/image/frame_converter.cpp b/src/accelerator/ogl/image/frame_converter.cpp
index 5e642e49d5..5618be349a 100644
--- a/src/accelerator/ogl/image/frame_converter.cpp
+++ b/src/accelerator/ogl/image/frame_converter.cpp
@@ -74,7 +74,8 @@ core::draw_frame ogl_frame_converter::convert_to_rgba(const core::mutable_frame&
 std::shared_future<array<const std::uint8_t>>
 ogl_frame_converter::convert_from_rgba(const core::const_frame&         frame,
                                        const core::encoded_frame_format format,
-                                       bool                             key_only)
+                                       bool                             key_only,
+                                       bool                             straighten)
 {
     array<const std::uint8_t> buffer;
     unsigned int              x_count        = 0;
@@ -118,6 +119,7 @@ ogl_frame_converter::convert_from_rgba(const core::const_frame&         frame,
     description.height         = frame.height();
     description.words_per_line = words_per_line;
     description.key_only       = key_only;
+    description.straighten     = straighten;
 
     auto future_conversion = ogl_->convert_from_texture(texture_ptr, buffer, description, x_count, y_count);
 
diff --git a/src/accelerator/ogl/image/frame_converter.h b/src/accelerator/ogl/image/frame_converter.h
index 3d85635d91..31f0e1a684 100644
--- a/src/accelerator/ogl/image/frame_converter.h
+++ b/src/accelerator/ogl/image/frame_converter.h
@@ -45,8 +45,10 @@ class ogl_frame_converter
 
     core::draw_frame convert_to_rgba(const core::mutable_frame& frame) override;
 
-    std::shared_future<array<const std::uint8_t>>
-    convert_from_rgba(const core::const_frame& frame, core::encoded_frame_format format, bool key_only) override;
+    std::shared_future<array<const std::uint8_t>> convert_from_rgba(const core::const_frame&   frame,
+                                                                    core::encoded_frame_format format,
+                                                                    bool                       key_only,
+                                                                    bool                       straighten) override;
 
     common::bit_depth get_frame_bitdepth(const core::const_frame& frame) override;
 
diff --git a/src/accelerator/ogl/image/shader_from_rgba.comp b/src/accelerator/ogl/image/shader_from_rgba.comp
index a783c9e6f2..76a4e3436e 100644
--- a/src/accelerator/ogl/image/shader_from_rgba.comp
+++ b/src/accelerator/ogl/image/shader_from_rgba.comp
@@ -19,14 +19,21 @@ layout(std430, binding = 3) buffer description_layout
     uint frame_height;
     uint words_per_line;
     bool key_only;
+    bool straigthen;
 };
 
+vec4 straighten_pixel(vec4 pixel) {
+    return vec4(
+        pixel.r / pixel.a,
+        pixel.g / pixel.a,
+        pixel.b / pixel.a,
+        pixel.a
+    );
+}
+
 vec4 read_pixel(ivec2 coord) {
-    if (is_16_bit){
-        return imageLoad(imgInput16bit, coord);
-    } else {
-        return imageLoad(imgInput8bit, coord);
-    }
+    vec4 pixel = is_16_bit ? imageLoad(imgInput16bit, coord) : imageLoad(imgInput8bit, coord);
+    return straighten_pixel(pixel);
 }
 
 vec3 rgba_to_bt709(vec4 pixel) {
@@ -96,6 +103,7 @@ void decklink_v210() {
     bufferOutput[offset + 3] = to_10bit(ycbcr4.s) + (to_10bit(ycbcr4.p) << 10) + (to_10bit(ycbcr5.s) << 20);
 }
 
+
 void rgba16() {
     // TODO: is this excessive to do in a shader?
     // basic coordinates
diff --git a/src/accelerator/ogl/util/device.h b/src/accelerator/ogl/util/device.h
index f6c3b5974f..6930a5d886 100644
--- a/src/accelerator/ogl/util/device.h
+++ b/src/accelerator/ogl/util/device.h
@@ -43,6 +43,7 @@ struct convert_from_texture_description
     uint32_t height;
     uint32_t words_per_line;
     bool     key_only;
+    bool     straighten;
 };
 
 class device final
diff --git a/src/core/frame/frame_factory.h b/src/core/frame/frame_factory.h
index 82416fdc48..bff013c952 100644
--- a/src/core/frame/frame_factory.h
+++ b/src/core/frame/frame_factory.h
@@ -48,7 +48,7 @@ class frame_converter
     virtual class draw_frame convert_to_rgba(const class mutable_frame& frame) = 0;
 
     virtual std::shared_future<array<const std::uint8_t>>
-    convert_from_rgba(const core::const_frame& frame, encoded_frame_format format, bool key_only) = 0;
+    convert_from_rgba(const core::const_frame& frame, encoded_frame_format format, bool key_only, bool straighten) = 0;
 
     virtual common::bit_depth get_frame_bitdepth(const core::const_frame& frame) = 0;
 };
diff --git a/src/modules/decklink/consumer/decklink_consumer.cpp b/src/modules/decklink/consumer/decklink_consumer.cpp
index 90e7d8c082..ed58593bec 100644
--- a/src/modules/decklink/consumer/decklink_consumer.cpp
+++ b/src/modules/decklink/consumer/decklink_consumer.cpp
@@ -878,7 +878,7 @@ struct decklink_consumer final : public IDeckLinkVideoOutputCallback
 
         if (frame) {
             auto frame_future = frame_converter_->convert_from_rgba(
-                frame, core::encoded_frame_format::decklink_v210, config_.primary.key_only);
+                frame, core::encoded_frame_format::decklink_v210, config_.primary.key_only, false);
 
             std::unique_lock<std::mutex> lock(buffer_mutex_);
             if (field != core::video_field::b) {
diff --git a/src/modules/image/consumer/image_consumer.cpp b/src/modules/image/consumer/image_consumer.cpp
index 41117ce1ac..9735d55507 100644
--- a/src/modules/image/consumer/image_consumer.cpp
+++ b/src/modules/image/consumer/image_consumer.cpp
@@ -98,15 +98,11 @@ struct image_consumer : public core::frame_consumer
                                                        FreeImage_Unload);
 
                     array<const std::uint8_t> rgba16_bytes =
-                        frame_converter->convert_from_rgba(frame, IMAGE_ENCODED_FORMAT, false).get();
+                        frame_converter->convert_from_rgba(frame, IMAGE_ENCODED_FORMAT, false, true).get();
 
                     std::memcpy(FreeImage_GetBits(bitmap.get()), rgba16_bytes.data(), rgba16_bytes.size());
 
-                    // TODO - this doesnt work
-                    image_view<bgra16_pixel> original_view(FreeImage_GetBits(bitmap.get()),
-                                                         static_cast<int>(frame.width()),
-                                                         static_cast<int>(frame.height()));
-                    unmultiply(original_view, 65535);
+                    // Note: premultiplication is done on the gpu
                 } else {
                     bitmap = std::shared_ptr<FIBITMAP>(
                         FreeImage_AllocateT(
@@ -119,7 +115,7 @@ struct image_consumer : public core::frame_consumer
                     image_view<bgra_pixel> original_view(FreeImage_GetBits(bitmap.get()),
                                                          static_cast<int>(frame.width()),
                                                          static_cast<int>(frame.height()));
-                    unmultiply(original_view, 255);
+                    unmultiply(original_view);
                 }
 
                 FreeImage_FlipVertical(bitmap.get());
diff --git a/src/modules/image/util/image_algorithms.h b/src/modules/image/util/image_algorithms.h
index f6a7c6662c..268f5efad3 100644
--- a/src/modules/image/util/image_algorithms.h
+++ b/src/modules/image/util/image_algorithms.h
@@ -206,21 +206,21 @@ void premultiply(SrcDstView& view_to_modify)
  *                       models RGBAPixel.
  */
 template <class SrcDstView>
-void unmultiply(SrcDstView& view_to_modify, int max)
+void unmultiply(SrcDstView& view_to_modify)
 {
     std::for_each(view_to_modify.begin(), view_to_modify.end(), [&](typename SrcDstView::pixel_type& pixel) {
         int alpha = static_cast<int>(pixel.a());
 
-        if (alpha != 0 && alpha != max) {
+        if (alpha != 0 && alpha != 255) {
             // We don't event try to premultiply 0 since it will be unaffected.
             if (pixel.r())
-                pixel.r() = static_cast<uint8_t>(static_cast<int>(pixel.r()) * max / alpha);
+                pixel.r() = static_cast<uint8_t>(static_cast<int>(pixel.r()) * 255 / alpha);
 
             if (pixel.g())
-                pixel.g() = static_cast<uint8_t>(static_cast<int>(pixel.g()) * max / alpha);
+                pixel.g() = static_cast<uint8_t>(static_cast<int>(pixel.g()) * 255 / alpha);
 
             if (pixel.b())
-                pixel.b() = static_cast<uint8_t>(static_cast<int>(pixel.b()) * max / alpha);
+                pixel.b() = static_cast<uint8_t>(static_cast<int>(pixel.b()) * 255 / alpha);
         }
     });
 }
diff --git a/src/modules/image/util/image_view.h b/src/modules/image/util/image_view.h
index cb0a4300a7..b9029a094d 100644
--- a/src/modules/image/util/image_view.h
+++ b/src/modules/image/util/image_view.h
@@ -59,40 +59,6 @@ class bgra_pixel
     uint8_t&       a() { return a_; }
 };
 
-/**
- * A POD pixel with a compatible memory layout as a 16bit BGRA pixel (32bits in
- * total).
- * <p>
- * Models the PackedPixel concept used by for example image_view. Also models
- * the RGBAPixel concept which does not care about the order between RGBA but
- * only requires that all 4 channel has accessors.
- */
-class bgra16_pixel
-{
-    uint16_t b_;
-    uint16_t g_;
-    uint16_t r_;
-    uint16_t a_;
-
-  public:
-    bgra16_pixel(uint16_t b = 0, uint16_t g = 0, uint16_t r = 0, uint16_t a = 0)
-        : b_(b)
-        , g_(g)
-        , r_(r)
-        , a_(a)
-    {
-    }
-
-    const uint16_t& b() const { return b_; }
-    uint16_t&       b() { return b_; }
-    const uint16_t& g() const { return g_; }
-    uint16_t&       g() { return g_; }
-    const uint16_t& r() const { return r_; }
-    uint16_t&       r() { return r_; }
-    const uint16_t& a() const { return a_; }
-    uint16_t&       a() { return a_; }
-};
-
 template <class PackedPixel>
 class image_sub_view;
 

From f22f6a2dc1aff1494f88148350171b01a5bc2067 Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Sat, 30 Dec 2023 17:32:33 +0000
Subject: [PATCH 38/50] fix: image consumer 16bit defined by amcp

---
 src/accelerator/ogl/util/device.cpp           |  2 +-
 src/modules/image/consumer/image_consumer.cpp | 23 ++++++++++++-------
 src/protocol/amcp/AMCPCommandsImpl.cpp        | 11 +++++----
 3 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp
index 9062a322db..f2e6d8ca12 100644
--- a/src/accelerator/ogl/util/device.cpp
+++ b/src/accelerator/ogl/util/device.cpp
@@ -255,7 +255,7 @@ struct device::impl : public std::enable_shared_from_this<impl>
     std::future<array<const uint8_t>> copy_async(const std::shared_ptr<texture>& source, bool as_rgba8)
     {
         return spawn_async([=](yield_context yield) {
-            auto buf = create_buffer(source->size(), false);
+            auto buf = create_buffer(as_rgba8 ? source->size() / 2 : source->size(), false);
             source->copy_to(*buf, as_rgba8 ? common::bit_depth::bit8 : source->depth());
 
             sync_queue_.push(nullptr);
diff --git a/src/modules/image/consumer/image_consumer.cpp b/src/modules/image/consumer/image_consumer.cpp
index 9735d55507..c035edfcfc 100644
--- a/src/modules/image/consumer/image_consumer.cpp
+++ b/src/modules/image/consumer/image_consumer.cpp
@@ -32,6 +32,7 @@
 #include <common/env.h>
 #include <common/future.h>
 #include <common/log.h>
+#include <common/param.h>
 #include <common/utf.h>
 
 #include <core/consumer/frame_consumer.h>
@@ -60,13 +61,17 @@ struct image_consumer : public core::frame_consumer
 {
     const spl::shared_ptr<core::frame_converter> frame_converter_;
     const std::wstring                           filename_;
+    const bool                                   depth16_;
 
   public:
     // frame_consumer
 
-    explicit image_consumer(const spl::shared_ptr<core::frame_converter>& frame_converter, std::wstring filename)
+    explicit image_consumer(const spl::shared_ptr<core::frame_converter>& frame_converter,
+                            std::wstring                                  filename,
+                            bool                                          depth16)
         : frame_converter_(frame_converter)
         , filename_(std::move(filename))
+        , depth16_(depth16)
     {
     }
 
@@ -74,9 +79,7 @@ struct image_consumer : public core::frame_consumer
 
     std::future<bool> send(core::video_field field, core::const_frame frame) override
     {
-        auto filename = filename_;
-
-        std::thread async([frame_converter = frame_converter_, frame, filename] {
+        std::thread async([frame_converter = frame_converter_, depth16 = depth16_, frame, filename = filename_] {
             try {
                 auto filename2 = filename;
 
@@ -87,11 +90,10 @@ struct image_consumer : public core::frame_consumer
                 else
                     filename2 = env::media_folder() + filename2 + L".png";
 
-                common::bit_depth frame_depth = frame_converter->get_frame_bitdepth(frame);
-
                 std::shared_ptr<FIBITMAP> bitmap;
+                common::bit_depth         frame_depth = frame_converter->get_frame_bitdepth(frame);
 
-                if (frame_depth != common::bit_depth::bit8) {
+                if (depth16 && frame_depth != common::bit_depth::bit8) {
                     bitmap = std::shared_ptr<FIBITMAP>(FreeImage_AllocateT(FIT_RGBA16,
                                                                            static_cast<int>(frame.width()),
                                                                            static_cast<int>(frame.height())),
@@ -156,11 +158,16 @@ spl::shared_ptr<core::frame_consumer> create_consumer(const std::vector<std::wst
         return core::frame_consumer::empty();
 
     std::wstring filename;
+    bool         depth16 = false;
 
     if (params.size() > 1)
         filename = params.at(1);
+    if (params.size() > 2) {
+        depth16 =
+            contains_param(L"16BIT", params) || contains_param(L"16-BIT", params) || contains_param(L"16_BIT", params);
+    }
 
-    return spl::make_shared<image_consumer>(frame_converter, filename);
+    return spl::make_shared<image_consumer>(frame_converter, filename, depth16);
 }
 
 }} // namespace caspar::image
diff --git a/src/protocol/amcp/AMCPCommandsImpl.cpp b/src/protocol/amcp/AMCPCommandsImpl.cpp
index 1e92b7558e..08d636c277 100644
--- a/src/protocol/amcp/AMCPCommandsImpl.cpp
+++ b/src/protocol/amcp/AMCPCommandsImpl.cpp
@@ -492,11 +492,12 @@ std::wstring remove_command(command_context& ctx)
 
 std::wstring print_command(command_context& ctx)
 {
-    ctx.channel.raw_channel->output().add(
-        ctx.static_context->consumer_registry->create_consumer({L"IMAGE"},
-                                                               ctx.static_context->format_repository,
-                                                               ctx.channel.raw_channel->frame_converter(),
-                                                               get_channels(ctx)));
+    std::vector<std::wstring> params = {L"IMAGE"};
+    params.resize(ctx.parameters.size() + 1);
+    std::copy(std::cbegin(ctx.parameters), std::cend(ctx.parameters), params.begin() + 1);
+
+    ctx.channel.raw_channel->output().add(ctx.static_context->consumer_registry->create_consumer(
+        params, ctx.static_context->format_repository, ctx.channel.raw_channel->frame_converter(), get_channels(ctx)));
 
     return L"202 PRINT OK\r\n";
 }

From 7476448a6f7980196e8f071f4dc2aa02c08ae1c1 Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Sat, 30 Dec 2023 17:33:12 +0000
Subject: [PATCH 39/50] Revert "fix: image consumer 16bit defined by amcp"

This reverts commit f22f6a2dc1aff1494f88148350171b01a5bc2067.
---
 src/accelerator/ogl/util/device.cpp           |  2 +-
 src/modules/image/consumer/image_consumer.cpp | 23 +++++++------------
 src/protocol/amcp/AMCPCommandsImpl.cpp        | 11 ++++-----
 3 files changed, 14 insertions(+), 22 deletions(-)

diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp
index f2e6d8ca12..9062a322db 100644
--- a/src/accelerator/ogl/util/device.cpp
+++ b/src/accelerator/ogl/util/device.cpp
@@ -255,7 +255,7 @@ struct device::impl : public std::enable_shared_from_this<impl>
     std::future<array<const uint8_t>> copy_async(const std::shared_ptr<texture>& source, bool as_rgba8)
     {
         return spawn_async([=](yield_context yield) {
-            auto buf = create_buffer(as_rgba8 ? source->size() / 2 : source->size(), false);
+            auto buf = create_buffer(source->size(), false);
             source->copy_to(*buf, as_rgba8 ? common::bit_depth::bit8 : source->depth());
 
             sync_queue_.push(nullptr);
diff --git a/src/modules/image/consumer/image_consumer.cpp b/src/modules/image/consumer/image_consumer.cpp
index c035edfcfc..9735d55507 100644
--- a/src/modules/image/consumer/image_consumer.cpp
+++ b/src/modules/image/consumer/image_consumer.cpp
@@ -32,7 +32,6 @@
 #include <common/env.h>
 #include <common/future.h>
 #include <common/log.h>
-#include <common/param.h>
 #include <common/utf.h>
 
 #include <core/consumer/frame_consumer.h>
@@ -61,17 +60,13 @@ struct image_consumer : public core::frame_consumer
 {
     const spl::shared_ptr<core::frame_converter> frame_converter_;
     const std::wstring                           filename_;
-    const bool                                   depth16_;
 
   public:
     // frame_consumer
 
-    explicit image_consumer(const spl::shared_ptr<core::frame_converter>& frame_converter,
-                            std::wstring                                  filename,
-                            bool                                          depth16)
+    explicit image_consumer(const spl::shared_ptr<core::frame_converter>& frame_converter, std::wstring filename)
         : frame_converter_(frame_converter)
         , filename_(std::move(filename))
-        , depth16_(depth16)
     {
     }
 
@@ -79,7 +74,9 @@ struct image_consumer : public core::frame_consumer
 
     std::future<bool> send(core::video_field field, core::const_frame frame) override
     {
-        std::thread async([frame_converter = frame_converter_, depth16 = depth16_, frame, filename = filename_] {
+        auto filename = filename_;
+
+        std::thread async([frame_converter = frame_converter_, frame, filename] {
             try {
                 auto filename2 = filename;
 
@@ -90,10 +87,11 @@ struct image_consumer : public core::frame_consumer
                 else
                     filename2 = env::media_folder() + filename2 + L".png";
 
+                common::bit_depth frame_depth = frame_converter->get_frame_bitdepth(frame);
+
                 std::shared_ptr<FIBITMAP> bitmap;
-                common::bit_depth         frame_depth = frame_converter->get_frame_bitdepth(frame);
 
-                if (depth16 && frame_depth != common::bit_depth::bit8) {
+                if (frame_depth != common::bit_depth::bit8) {
                     bitmap = std::shared_ptr<FIBITMAP>(FreeImage_AllocateT(FIT_RGBA16,
                                                                            static_cast<int>(frame.width()),
                                                                            static_cast<int>(frame.height())),
@@ -158,16 +156,11 @@ spl::shared_ptr<core::frame_consumer> create_consumer(const std::vector<std::wst
         return core::frame_consumer::empty();
 
     std::wstring filename;
-    bool         depth16 = false;
 
     if (params.size() > 1)
         filename = params.at(1);
-    if (params.size() > 2) {
-        depth16 =
-            contains_param(L"16BIT", params) || contains_param(L"16-BIT", params) || contains_param(L"16_BIT", params);
-    }
 
-    return spl::make_shared<image_consumer>(frame_converter, filename, depth16);
+    return spl::make_shared<image_consumer>(frame_converter, filename);
 }
 
 }} // namespace caspar::image
diff --git a/src/protocol/amcp/AMCPCommandsImpl.cpp b/src/protocol/amcp/AMCPCommandsImpl.cpp
index 08d636c277..1e92b7558e 100644
--- a/src/protocol/amcp/AMCPCommandsImpl.cpp
+++ b/src/protocol/amcp/AMCPCommandsImpl.cpp
@@ -492,12 +492,11 @@ std::wstring remove_command(command_context& ctx)
 
 std::wstring print_command(command_context& ctx)
 {
-    std::vector<std::wstring> params = {L"IMAGE"};
-    params.resize(ctx.parameters.size() + 1);
-    std::copy(std::cbegin(ctx.parameters), std::cend(ctx.parameters), params.begin() + 1);
-
-    ctx.channel.raw_channel->output().add(ctx.static_context->consumer_registry->create_consumer(
-        params, ctx.static_context->format_repository, ctx.channel.raw_channel->frame_converter(), get_channels(ctx)));
+    ctx.channel.raw_channel->output().add(
+        ctx.static_context->consumer_registry->create_consumer({L"IMAGE"},
+                                                               ctx.static_context->format_repository,
+                                                               ctx.channel.raw_channel->frame_converter(),
+                                                               get_channels(ctx)));
 
     return L"202 PRINT OK\r\n";
 }

From a99522e20c30594e39bb0ab1a54a763b7345795c Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Sat, 30 Dec 2023 17:33:43 +0000
Subject: [PATCH 40/50] fix: typo

---
 src/accelerator/ogl/util/device.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp
index 9062a322db..f2e6d8ca12 100644
--- a/src/accelerator/ogl/util/device.cpp
+++ b/src/accelerator/ogl/util/device.cpp
@@ -255,7 +255,7 @@ struct device::impl : public std::enable_shared_from_this<impl>
     std::future<array<const uint8_t>> copy_async(const std::shared_ptr<texture>& source, bool as_rgba8)
     {
         return spawn_async([=](yield_context yield) {
-            auto buf = create_buffer(source->size(), false);
+            auto buf = create_buffer(as_rgba8 ? source->size() / 2 : source->size(), false);
             source->copy_to(*buf, as_rgba8 ? common::bit_depth::bit8 : source->depth());
 
             sync_queue_.push(nullptr);

From c7984a41408d66bb2c1ae75c36b47694059a5aad Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Sat, 30 Dec 2023 17:48:02 +0000
Subject: [PATCH 41/50] feat: image producer can work in 64bit

---
 src/accelerator/ogl/image/image_kernel.cpp    |  2 ++
 src/accelerator/ogl/image/shader.frag         |  4 ++++
 src/core/frame/pixel_format.h                 |  3 ++-
 src/modules/image/producer/image_producer.cpp |  3 ++-
 src/modules/image/util/image_loader.cpp       | 16 ++++++++++++++--
 src/modules/image/util/image_loader.h         |  5 +++++
 6 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/src/accelerator/ogl/image/image_kernel.cpp b/src/accelerator/ogl/image/image_kernel.cpp
index 155d574e8d..f4b3e57929 100644
--- a/src/accelerator/ogl/image/image_kernel.cpp
+++ b/src/accelerator/ogl/image/image_kernel.cpp
@@ -239,6 +239,8 @@ struct image_kernel::impl
 
         shader_->use();
 
+        shader_->set("is_straight_alpha", params.pix_desc.is_straight);
+
         shader_->set("plane[0]", texture_id::plane0);
         shader_->set("plane[1]", texture_id::plane1);
         shader_->set("plane[2]", texture_id::plane2);
diff --git a/src/accelerator/ogl/image/shader.frag b/src/accelerator/ogl/image/shader.frag
index f8d3356c6c..b515bd3589 100644
--- a/src/accelerator/ogl/image/shader.frag
+++ b/src/accelerator/ogl/image/shader.frag
@@ -8,6 +8,8 @@ uniform sampler2D	plane[4];
 uniform sampler2D	local_key;
 uniform sampler2D	layer_key;
 
+uniform bool        is_straight_alpha;
+
 uniform bool		is_hd;
 uniform bool		has_local_key;
 uniform bool		has_layer_key;
@@ -543,6 +545,8 @@ vec4 get_rgba_color()
 void main()
 {
     vec4 color = get_rgba_color();
+    if (is_straight_alpha)
+        color.rgb *= color.a;
     if (chroma)
         color = chroma_key(color);
     if(levels)
diff --git a/src/core/frame/pixel_format.h b/src/core/frame/pixel_format.h
index ba7faad41b..90823360d9 100644
--- a/src/core/frame/pixel_format.h
+++ b/src/core/frame/pixel_format.h
@@ -83,7 +83,8 @@ struct pixel_format_desc final
     {
     }
 
-    pixel_format       format = pixel_format::invalid;
+    pixel_format       format      = pixel_format::invalid;
+    bool               is_straight = false;
     std::vector<plane> planes;
 };
 
diff --git a/src/modules/image/producer/image_producer.cpp b/src/modules/image/producer/image_producer.cpp
index 2e469d1374..18a0f4328b 100644
--- a/src/modules/image/producer/image_producer.cpp
+++ b/src/modules/image/producer/image_producer.cpp
@@ -89,8 +89,9 @@ struct image_producer : public core::frame_producer
     void load(const loaded_image& image)
     {
         core::pixel_format_desc desc(image.format);
+        desc.is_straight = image.is_straight;
         desc.planes.emplace_back(
-            FreeImage_GetWidth(image.bitmap.get()), FreeImage_GetHeight(image.bitmap.get()), image.stride);
+            FreeImage_GetWidth(image.bitmap.get()), FreeImage_GetHeight(image.bitmap.get()), image.stride, image.depth);
         auto frame       = frame_factory_->create_frame(this, desc);
         frame.geometry() = core::frame_geometry::get_default_vflip();
 
diff --git a/src/modules/image/util/image_loader.cpp b/src/modules/image/util/image_loader.cpp
index eefc486592..a76236d8f9 100644
--- a/src/modules/image/util/image_loader.cpp
+++ b/src/modules/image/util/image_loader.cpp
@@ -55,19 +55,30 @@ loaded_image prepare_loaded_image(FREE_IMAGE_FORMAT fif, std::shared_ptr<FIBITMA
 {
     core::pixel_format format;
     int                stride;
+    common::bit_depth  depth = common::bit_depth::bit8;
 
     unsigned int bpp = FreeImage_GetBPP(bitmap.get());
+
     if (bpp == 32) {
         format = IMAGE_BGRA_FORMAT;
         stride = 4;
     } else if (allow_all_formats && bpp == 24) {
         format = IMAGE_BGR_FORMAT;
         stride = 3;
+    } else if (allow_all_formats && bpp == 64) {
+        format = core::pixel_format::rgba;
+        stride = 4;
+        depth  = common::bit_depth::bit16;
+    } else if (allow_all_formats && bpp == 48) {
+        format = core::pixel_format::rgb;
+        stride = 3;
+        depth  = common::bit_depth::bit16;
     } else if (allow_all_formats && !FreeImage_IsTransparent(bitmap.get())) {
         format = IMAGE_BGR_FORMAT;
         stride = 3;
 
         bitmap = std::shared_ptr<FIBITMAP>(FreeImage_ConvertTo24Bits(bitmap.get()), FreeImage_Unload);
+
     } else {
         format = IMAGE_BGRA_FORMAT;
         stride = 4;
@@ -79,13 +90,14 @@ loaded_image prepare_loaded_image(FREE_IMAGE_FORMAT fif, std::shared_ptr<FIBITMA
         CASPAR_THROW_EXCEPTION(invalid_argument() << msg_info("Unsupported image format."));
 
     // PNG-images need to be premultiplied with their alpha
-    if (fif == FIF_PNG && format == IMAGE_BGRA_FORMAT) {
+    bool is_straight = fif == FIF_PNG && (format == core::pixel_format::bgra || format == core::pixel_format::rgba);
+    if (!allow_all_formats && is_straight) {
         image_view<bgra_pixel> original_view(
             FreeImage_GetBits(bitmap.get()), FreeImage_GetWidth(bitmap.get()), FreeImage_GetHeight(bitmap.get()));
         premultiply(original_view);
     }
 
-    return {std::move(bitmap), format, stride};
+    return {std::move(bitmap), format, stride, depth, is_straight};
 }
 
 loaded_image load_image(const std::wstring& filename, bool allow_all_formats)
diff --git a/src/modules/image/util/image_loader.h b/src/modules/image/util/image_loader.h
index 9362be31bc..725e3b18ed 100644
--- a/src/modules/image/util/image_loader.h
+++ b/src/modules/image/util/image_loader.h
@@ -22,6 +22,9 @@
 #pragma once
 
 #include <core/frame/pixel_format.h>
+
+#include <common/bit_depth.h>
+
 #include <memory>
 #include <set>
 #include <string>
@@ -35,6 +38,8 @@ struct loaded_image
     std::shared_ptr<FIBITMAP> bitmap;
     core::pixel_format        format;
     int                       stride;
+    common::bit_depth         depth;
+    bool                      is_straight;
 };
 
 loaded_image                  load_image(const std::wstring& filename, bool allow_all_formats);

From 544b51627c1a4063defd0666269ec2dc28c142b9 Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Sat, 30 Dec 2023 17:49:30 +0000
Subject: [PATCH 42/50] fix: 64bit freeimage endianness

---
 src/accelerator/ogl/image/shader_from_rgba.comp | 10 ++++------
 src/modules/image/consumer/image_consumer.cpp   | 10 +++-------
 src/modules/image/producer/image_producer.cpp   |  1 +
 src/modules/image/util/image_loader.cpp         |  2 ++
 4 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/src/accelerator/ogl/image/shader_from_rgba.comp b/src/accelerator/ogl/image/shader_from_rgba.comp
index 76a4e3436e..da53c58e5e 100644
--- a/src/accelerator/ogl/image/shader_from_rgba.comp
+++ b/src/accelerator/ogl/image/shader_from_rgba.comp
@@ -114,9 +114,8 @@ void rgba16() {
     // TODO - can this be done solely as integer?
     vec4 pixel = read_pixel(ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y));
 
-    // Note: the texture is stored in reverse order
-    bufferOutput[offset + 0] = uint(pixel.b * 65535) + (uint(pixel.g * 65535) << 16);
-    bufferOutput[offset + 1] = uint(pixel.r * 65535) + (uint(pixel.a * 65535) << 16);
+    bufferOutput[offset + 0] = uint(pixel.r * 65535) + (uint(pixel.g * 65535) << 16);
+    bufferOutput[offset + 1] = uint(pixel.b * 65535) + (uint(pixel.a * 65535) << 16);
 }
 
 void bgra16() {
@@ -129,9 +128,8 @@ void bgra16() {
     // TODO - can this be done solely as integer?
     vec4 pixel = read_pixel(ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y));
 
-    // Note: the texture is stored in reverse order
-    bufferOutput[offset + 0] = uint(pixel.r * 65535) + (uint(pixel.g * 65535) << 16);
-    bufferOutput[offset + 1] = uint(pixel.b * 65535) + (uint(pixel.a * 65535) << 16);
+    bufferOutput[offset + 0] = uint(pixel.b * 65535) + (uint(pixel.g * 65535) << 16);
+    bufferOutput[offset + 1] = uint(pixel.r * 65535) + (uint(pixel.a * 65535) << 16);
 }
 
 void main() {
diff --git a/src/modules/image/consumer/image_consumer.cpp b/src/modules/image/consumer/image_consumer.cpp
index 9735d55507..98ab30e3b5 100644
--- a/src/modules/image/consumer/image_consumer.cpp
+++ b/src/modules/image/consumer/image_consumer.cpp
@@ -50,12 +50,6 @@
 
 namespace caspar { namespace image {
 
-#if FREEIMAGE_COLORORDER == FREEIMAGE_COLORORDER_BGR
-#define IMAGE_ENCODED_FORMAT core::encoded_frame_format::bgra16
-#else
-#define IMAGE_ENCODED_FORMAT core::encoded_frame_format::rgba16
-#endif
-
 struct image_consumer : public core::frame_consumer
 {
     const spl::shared_ptr<core::frame_converter> frame_converter_;
@@ -97,8 +91,10 @@ struct image_consumer : public core::frame_consumer
                                                                            static_cast<int>(frame.height())),
                                                        FreeImage_Unload);
 
+                    // freeimage appears to ignore endianness
                     array<const std::uint8_t> rgba16_bytes =
-                        frame_converter->convert_from_rgba(frame, IMAGE_ENCODED_FORMAT, false, true).get();
+                        frame_converter->convert_from_rgba(frame, core::encoded_frame_format::rgba16, false, true)
+                            .get();
 
                     std::memcpy(FreeImage_GetBits(bitmap.get()), rgba16_bytes.data(), rgba16_bytes.size());
 
diff --git a/src/modules/image/producer/image_producer.cpp b/src/modules/image/producer/image_producer.cpp
index 18a0f4328b..fd4666b20d 100644
--- a/src/modules/image/producer/image_producer.cpp
+++ b/src/modules/image/producer/image_producer.cpp
@@ -92,6 +92,7 @@ struct image_producer : public core::frame_producer
         desc.is_straight = image.is_straight;
         desc.planes.emplace_back(
             FreeImage_GetWidth(image.bitmap.get()), FreeImage_GetHeight(image.bitmap.get()), image.stride, image.depth);
+
         auto frame       = frame_factory_->create_frame(this, desc);
         frame.geometry() = core::frame_geometry::get_default_vflip();
 
diff --git a/src/modules/image/util/image_loader.cpp b/src/modules/image/util/image_loader.cpp
index a76236d8f9..46bdef21c1 100644
--- a/src/modules/image/util/image_loader.cpp
+++ b/src/modules/image/util/image_loader.cpp
@@ -66,10 +66,12 @@ loaded_image prepare_loaded_image(FREE_IMAGE_FORMAT fif, std::shared_ptr<FIBITMA
         format = IMAGE_BGR_FORMAT;
         stride = 3;
     } else if (allow_all_formats && bpp == 64) {
+        // freeimage appears to ignore endianness
         format = core::pixel_format::rgba;
         stride = 4;
         depth  = common::bit_depth::bit16;
     } else if (allow_all_formats && bpp == 48) {
+        // freeimage appears to ignore endianness
         format = core::pixel_format::rgb;
         stride = 3;
         depth  = common::bit_depth::bit16;

From b9baeca47bfd90af137c34099c7b52eb930128ac Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Sat, 30 Dec 2023 17:51:49 +0000
Subject: [PATCH 43/50] fix: propogate parameters from print command to image
 consumer

---
 src/protocol/amcp/AMCPCommandsImpl.cpp | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/protocol/amcp/AMCPCommandsImpl.cpp b/src/protocol/amcp/AMCPCommandsImpl.cpp
index 1e92b7558e..a45543667d 100644
--- a/src/protocol/amcp/AMCPCommandsImpl.cpp
+++ b/src/protocol/amcp/AMCPCommandsImpl.cpp
@@ -492,11 +492,14 @@ std::wstring remove_command(command_context& ctx)
 
 std::wstring print_command(command_context& ctx)
 {
-    ctx.channel.raw_channel->output().add(
-        ctx.static_context->consumer_registry->create_consumer({L"IMAGE"},
-                                                               ctx.static_context->format_repository,
-                                                               ctx.channel.raw_channel->frame_converter(),
-                                                               get_channels(ctx)));
+    std::vector<std::wstring> params = {L"IMAGE"};
+    if (!ctx.parameters.empty()) {
+        params.resize(ctx.parameters.size() + 1);
+        std::copy(std::cbegin(ctx.parameters), std::cend(ctx.parameters), params.begin() + 1);
+    }
+
+    ctx.channel.raw_channel->output().add(ctx.static_context->consumer_registry->create_consumer(
+        params, ctx.static_context->format_repository, ctx.channel.raw_channel->frame_converter(), get_channels(ctx)));
 
     return L"202 PRINT OK\r\n";
 }

From 60fca061ebdfc9c2bae257e92ac6bcdd00dfd16b Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Sat, 30 Dec 2023 18:02:11 +0000
Subject: [PATCH 44/50] wip: tidy

---
 src/accelerator/ogl/util/device.cpp  | 6 ++++--
 src/accelerator/ogl/util/texture.cpp | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp
index f2e6d8ca12..62b1edae64 100644
--- a/src/accelerator/ogl/util/device.cpp
+++ b/src/accelerator/ogl/util/device.cpp
@@ -255,8 +255,10 @@ struct device::impl : public std::enable_shared_from_this<impl>
     std::future<array<const uint8_t>> copy_async(const std::shared_ptr<texture>& source, bool as_rgba8)
     {
         return spawn_async([=](yield_context yield) {
-            auto buf = create_buffer(as_rgba8 ? source->size() / 2 : source->size(), false);
-            source->copy_to(*buf, as_rgba8 ? common::bit_depth::bit8 : source->depth());
+            auto bit_depth = as_rgba8 ? common::bit_depth::bit8 : source->depth();
+            auto buf       = create_buffer(
+                source->width() * source->height() * source->stride() * common::bytes_per_pixel(bit_depth), false);
+            source->copy_to(*buf, bit_depth);
 
             sync_queue_.push(nullptr);
 
diff --git a/src/accelerator/ogl/util/texture.cpp b/src/accelerator/ogl/util/texture.cpp
index 4e47e8768d..c489b43f3f 100644
--- a/src/accelerator/ogl/util/texture.cpp
+++ b/src/accelerator/ogl/util/texture.cpp
@@ -52,7 +52,7 @@ struct texture::impl
         , height_(height)
         , stride_(stride)
         , depth_(depth)
-        , size_(width * height * stride * (1 + static_cast<int>(depth)))
+        , size_(width * height * stride * common::bytes_per_pixel(depth))
     {
         if (stride == 5) {
             size_ = width * height * 16;

From 69c29f57e6b64d66140e831de761734c8e3b4796 Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Sat, 30 Dec 2023 18:38:53 +0000
Subject: [PATCH 45/50] wip: tidying

---
 src/accelerator/ogl/image/frame_converter.cpp | 22 +++++--------
 src/accelerator/ogl/util/device.cpp           | 33 ++++++++++---------
 src/accelerator/ogl/util/device.h             | 10 +++---
 src/accelerator/ogl/util/texture.cpp          |  6 ----
 src/modules/ffmpeg/util/av_util.cpp           | 31 +++++++----------
 5 files changed, 41 insertions(+), 61 deletions(-)

diff --git a/src/accelerator/ogl/image/frame_converter.cpp b/src/accelerator/ogl/image/frame_converter.cpp
index 5618be349a..154d1af910 100644
--- a/src/accelerator/ogl/image/frame_converter.cpp
+++ b/src/accelerator/ogl/image/frame_converter.cpp
@@ -77,17 +77,17 @@ ogl_frame_converter::convert_from_rgba(const core::const_frame&         frame,
                                        bool                             key_only,
                                        bool                             straighten)
 {
-    array<const std::uint8_t> buffer;
-    unsigned int              x_count        = 0;
-    unsigned int              y_count        = 0;
-    int                       words_per_line = 0;
+    int          buffer_size    = 0;
+    unsigned int x_count        = 0;
+    unsigned int y_count        = 0;
+    int          words_per_line = 0;
 
     switch (format) {
         case core::encoded_frame_format::rgba16:
         case core::encoded_frame_format::bgra16:
             x_count        = frame.width();
             y_count        = frame.height();
-            buffer         = ogl_->create_array(frame.width() * frame.height() * 8);
+            buffer_size    = frame.width() * frame.height() * 8;
             words_per_line = frame.width() * 2;
 
             break;
@@ -96,14 +96,14 @@ ogl_frame_converter::convert_from_rgba(const core::const_frame&         frame,
             auto row_bytes  = row_blocks * 128;
 
             // TODO - result must be 128byte aligned. can that be guaranteed here?
-            buffer         = ogl_->create_array(row_bytes * frame.height());
+            buffer_size    = row_bytes * frame.height();
             x_count        = row_blocks * 8;
             y_count        = frame.height();
             words_per_line = row_blocks * 32;
             break;
     }
 
-    if (buffer.size() == 0 || x_count == 0 || y_count == 0) {
+    if (buffer_size == 0 || x_count == 0 || y_count == 0) {
         CASPAR_THROW_EXCEPTION(not_supported() << msg_info("Unknown encoded frame format"));
     }
 
@@ -121,13 +121,7 @@ ogl_frame_converter::convert_from_rgba(const core::const_frame&         frame,
     description.key_only       = key_only;
     description.straighten     = straighten;
 
-    auto future_conversion = ogl_->convert_from_texture(texture_ptr, buffer, description, x_count, y_count);
-
-    return std::async(std::launch::deferred, [buffer, future_conversion = std::move(future_conversion)]() mutable {
-        future_conversion.get();
-
-        return buffer;
-    });
+    return ogl_->convert_from_texture(texture_ptr, buffer_size, description, x_count, y_count);
 }
 
 common::bit_depth ogl_frame_converter::get_frame_bitdepth(const core::const_frame& frame)
diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp
index 62b1edae64..d81eecc36c 100644
--- a/src/accelerator/ogl/util/device.cpp
+++ b/src/accelerator/ogl/util/device.cpp
@@ -319,11 +319,11 @@ struct device::impl : public std::enable_shared_from_this<impl>
     }
     */
 
-    std::future<void> convert_from_texture(const std::shared_ptr<texture>&         texture,
-                                           const array<const uint8_t>&             source,
-                                           const convert_from_texture_description& description,
-                                           unsigned int                            x_count,
-                                           unsigned int                            y_count)
+    std::future<array<const uint8_t>> convert_from_texture(const std::shared_ptr<texture>&         texture,
+                                                           int                                     buffer_size,
+                                                           const convert_from_texture_description& description,
+                                                           unsigned int                            x_count,
+                                                           unsigned int                            y_count)
     {
         return spawn_async([=](yield_context yield) {
             if (!compute_from_rgba_)
@@ -345,14 +345,13 @@ struct device::impl : public std::enable_shared_from_this<impl>
             GL(glBindImageTexture(0, texid_16bit, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA16));
             GL(glBindImageTexture(1, texid_8bit, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA8));
 
-            auto tmp = source.storage<std::shared_ptr<buffer>>();
-            if (!tmp) {
+            array<const uint8_t> output_buffer = create_array(buffer_size); // TODO - tidy this?
+            auto                 buffer_ptr    = output_buffer.storage<std::shared_ptr<buffer>>();
+            if (!buffer_ptr) {
                 CASPAR_THROW_EXCEPTION(caspar_exception() << msg_info("Buffer is not gpu backed"));
             }
+            GL(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, buffer_ptr->get()->id()));
 
-            GL(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, tmp->get()->id()));
-
-            // TODO - binding 2 description
             auto description_buffer = create_buffer(sizeof(convert_from_texture_description), false);
             std::memcpy(description_buffer->data(), &description, sizeof(convert_from_texture_description));
             GL(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, description_buffer->id()));
@@ -378,6 +377,8 @@ struct device::impl : public std::enable_shared_from_this<impl>
             }
 
             glDeleteSync(fence);
+
+            return output_buffer;
         });
     }
 
@@ -508,13 +509,13 @@ std::future<array<const uint8_t>> device::copy_async(const std::shared_ptr<textu
 {
     return impl_->copy_async(source, as_rgba8);
 }
-std::future<void> device::convert_from_texture(const std::shared_ptr<texture>&         texture,
-                                               const array<const uint8_t>&             source,
-                                               const convert_from_texture_description& description,
-                                               unsigned int                            x_count,
-                                               unsigned int                            y_count)
+std::future<array<const uint8_t>> device::convert_from_texture(const std::shared_ptr<texture>&         texture,
+                                                               int                                     buffer_size,
+                                                               const convert_from_texture_description& description,
+                                                               unsigned int                            x_count,
+                                                               unsigned int                            y_count)
 {
-    return impl_->convert_from_texture(texture, source, description, x_count, y_count);
+    return impl_->convert_from_texture(texture, buffer_size, description, x_count, y_count);
 }
 void         device::dispatch(std::function<void()> func) { boost::asio::dispatch(impl_->service_, std::move(func)); }
 std::wstring device::version() const { return impl_->version(); }
diff --git a/src/accelerator/ogl/util/device.h b/src/accelerator/ogl/util/device.h
index 6930a5d886..07858b0fde 100644
--- a/src/accelerator/ogl/util/device.h
+++ b/src/accelerator/ogl/util/device.h
@@ -65,11 +65,11 @@ class device final
     copy_async(const array<const uint8_t>& source, int width, int height, int stride, common::bit_depth depth);
     std::future<array<const uint8_t>> copy_async(const std::shared_ptr<class texture>& source, bool as_rgba8);
 
-    std::future<void> convert_from_texture(const std::shared_ptr<texture>&         texture,
-                                           const array<const uint8_t>&             source,
-                                           const convert_from_texture_description& description,
-                                           unsigned int                            x_count,
-                                           unsigned int                            y_count);
+    std::future<array<const uint8_t>> convert_from_texture(const std::shared_ptr<texture>&         texture,
+                                                           int                                     buffer_size,
+                                                           const convert_from_texture_description& description,
+                                                           unsigned int                            x_count,
+                                                           unsigned int                            y_count);
 
     template <typename Func>
     auto dispatch_async(Func&& func)
diff --git a/src/accelerator/ogl/util/texture.cpp b/src/accelerator/ogl/util/texture.cpp
index c489b43f3f..26d11e0693 100644
--- a/src/accelerator/ogl/util/texture.cpp
+++ b/src/accelerator/ogl/util/texture.cpp
@@ -54,12 +54,6 @@ struct texture::impl
         , depth_(depth)
         , size_(width * height * stride * common::bytes_per_pixel(depth))
     {
-        if (stride == 5) {
-            size_ = width * height * 16;
-        } else if (stride == 6) {
-            size_ = width * height * 2;
-        }
-
         GL(glCreateTextures(GL_TEXTURE_2D, 1, &id_));
         GL(glTextureParameteri(id_, GL_TEXTURE_MIN_FILTER, GL_LINEAR));
         GL(glTextureParameteri(id_, GL_TEXTURE_MAG_FILTER, GL_LINEAR));
diff --git a/src/modules/ffmpeg/util/av_util.cpp b/src/modules/ffmpeg/util/av_util.cpp
index a58975666e..d89f2b384d 100644
--- a/src/modules/ffmpeg/util/av_util.cpp
+++ b/src/modules/ffmpeg/util/av_util.cpp
@@ -170,47 +170,38 @@ core::pixel_format_desc pixel_format_desc(AVPixelFormat pix_fmt, int width, int
     switch (desc.format) {
         case core::pixel_format::gray:
         case core::pixel_format::luma: {
-            desc.planes.push_back(core::pixel_format_desc::plane(linesizes[0], height, 1));
+            desc.planes.emplace_back(linesizes[0], height, 1);
             return desc;
         }
         case core::pixel_format::bgr:
         case core::pixel_format::rgb: {
-            desc.planes.push_back(core::pixel_format_desc::plane(linesizes[0] / 3, height, 3));
+            desc.planes.emplace_back(linesizes[0] / 3, height, 3);
             return desc;
         }
         case core::pixel_format::bgra:
         case core::pixel_format::argb:
         case core::pixel_format::rgba:
         case core::pixel_format::abgr: {
-            desc.planes.push_back(core::pixel_format_desc::plane(linesizes[0] / 4, height, 4));
+            desc.planes.emplace_back(linesizes[0] / 4, height, 4);
             return desc;
         }
         case core::pixel_format::ycbcr:
         case core::pixel_format::ycbcra: {
             // Find chroma height
-            // av_image_fill_plane_sizes is not available until ffmpeg 4.4, but we still need to support ffmpeg 4.2, so
-            // we fall back to calling av_image_fill_pointers with a NULL image buffer. We can't unconditionally use
-            // av_image_fill_pointers because it will not accept a NULL buffer on ffmpeg >= 5.0.
-#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(56, 56, 100)
             size_t    sizes[4];
             ptrdiff_t linesizes1[4];
             for (int i = 0; i < 4; i++)
                 linesizes1[i] = linesizes[i];
             av_image_fill_plane_sizes(sizes, pix_fmt, height, linesizes1);
-            auto size2 = static_cast<int>(sizes[1]);
-#else
-            uint8_t* dummy_pict_data[4];
-            av_image_fill_pointers(dummy_pict_data, pix_fmt, height, NULL, linesizes);
-            auto size2 = static_cast<int>(dummy_pict_data[2] - dummy_pict_data[1]);
-#endif
-            auto h2 = size2 / linesizes[1];
 
-            desc.planes.push_back(core::pixel_format_desc::plane(linesizes[0], height, 1));
-            desc.planes.push_back(core::pixel_format_desc::plane(linesizes[1], h2, 1));
-            desc.planes.push_back(core::pixel_format_desc::plane(linesizes[2], h2, 1));
+            auto h2 = static_cast<int>(sizes[1]) / linesizes[1];
+
+            desc.planes.emplace_back(linesizes[0], height, 1);
+            desc.planes.emplace_back(linesizes[1], h2, 1);
+            desc.planes.emplace_back(linesizes[2], h2, 1);
 
             if (desc.format == core::pixel_format::ycbcra)
-                desc.planes.push_back(core::pixel_format_desc::plane(linesizes[3], height, 1));
+                desc.planes.emplace_back(linesizes[3], height, 1);
 
             return desc;
         }
@@ -236,8 +227,8 @@ core::pixel_format_desc pixel_format_desc(AVPixelFormat pix_fmt, int width, int
             return desc;
         }
         case core::pixel_format::uyvy: {
-            desc.planes.push_back(core::pixel_format_desc::plane(linesizes[0] / 2, height, 2));
-            desc.planes.push_back(core::pixel_format_desc::plane(linesizes[0] / 4, height, 4));
+            desc.planes.emplace_back(linesizes[0] / 2, height, 2);
+            desc.planes.emplace_back(linesizes[0] / 4, height, 4);
 
             data_map.clear();
             data_map.push_back(0);

From 4f58d1aaaf8533056351228b800da05ca6bb1eeb Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Sat, 30 Dec 2023 22:41:40 +0000
Subject: [PATCH 46/50] wip: tidying

---
 .../ogl/image/shader_from_rgba.comp           | 20 +++++++
 src/accelerator/ogl/image/shader_to_rgba.comp | 20 +++++++
 src/accelerator/ogl/util/compute_shader.cpp   | 53 ++-----------------
 src/accelerator/ogl/util/compute_shader.h     | 21 ++------
 4 files changed, 47 insertions(+), 67 deletions(-)

diff --git a/src/accelerator/ogl/image/shader_from_rgba.comp b/src/accelerator/ogl/image/shader_from_rgba.comp
index da53c58e5e..df051149e0 100644
--- a/src/accelerator/ogl/image/shader_from_rgba.comp
+++ b/src/accelerator/ogl/image/shader_from_rgba.comp
@@ -1,3 +1,23 @@
+/*
+ * Copyright (c) 2011 Sveriges Television AB <info@casparcg.com>
+ *
+ * This file is part of CasparCG (www.casparcg.com).
+ *
+ * CasparCG is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * CasparCG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with CasparCG. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Julian Waller, julian@superfly.tv
+ */
 #version 430
 
 layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
diff --git a/src/accelerator/ogl/image/shader_to_rgba.comp b/src/accelerator/ogl/image/shader_to_rgba.comp
index 4bc5fba7f3..6dbe5d34c6 100644
--- a/src/accelerator/ogl/image/shader_to_rgba.comp
+++ b/src/accelerator/ogl/image/shader_to_rgba.comp
@@ -1,3 +1,23 @@
+/*
+ * Copyright (c) 2011 Sveriges Television AB <info@casparcg.com>
+ *
+ * This file is part of CasparCG (www.casparcg.com).
+ *
+ * CasparCG is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * CasparCG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with CasparCG. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Julian Waller, julian@superfly.tv
+ */
 #version 430
 
 layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
diff --git a/src/accelerator/ogl/util/compute_shader.cpp b/src/accelerator/ogl/util/compute_shader.cpp
index d737a94d3d..8c3e7663ac 100644
--- a/src/accelerator/ogl/util/compute_shader.cpp
+++ b/src/accelerator/ogl/util/compute_shader.cpp
@@ -16,7 +16,7 @@
  * You should have received a copy of the GNU General Public License
  * along with CasparCG. If not, see <http://www.gnu.org/licenses/>.
  *
- * Author: Robert Nagy, ronag89@gmail.com
+ * Author: Julian Waller, julian@superfly.tv
  */
 #include "compute_shader.h"
 
@@ -24,15 +24,11 @@
 
 #include <GL/glew.h>
 
-#include <unordered_map>
-
 namespace caspar { namespace accelerator { namespace ogl {
 
 struct compute_shader::impl
 {
-    GLuint                                 program_;
-    std::unordered_map<std::string, GLint> uniform_locations_;
-    std::unordered_map<std::string, GLint> attrib_locations_;
+    GLuint program_;
 
     impl(const impl&)            = delete;
     impl& operator=(const impl&) = delete;
@@ -47,9 +43,6 @@ struct compute_shader::impl
         glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 1, &work_grp_cnt[1]);
         glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 2, &work_grp_cnt[2]);
 
-        printf(
-            "max global (total) work group counts x:%i y:%i z:%i\n", work_grp_cnt[0], work_grp_cnt[1], work_grp_cnt[2]);
-
         GLint success;
 
         const char* compute_source = compute_source_str.c_str();
@@ -90,38 +83,6 @@ struct compute_shader::impl
 
     ~impl() { glDeleteProgram(program_); }
 
-    GLint get_uniform_location(const char* name)
-    {
-        auto it = uniform_locations_.find(name);
-        if (it == uniform_locations_.end())
-            it = uniform_locations_.insert(std::make_pair(name, glGetUniformLocation(program_, name))).first;
-        return it->second;
-    }
-
-    GLint get_attrib_location(const char* name)
-    {
-        auto it = attrib_locations_.find(name);
-        if (it == attrib_locations_.end())
-            it = attrib_locations_.insert(std::make_pair(name, glGetAttribLocation(program_, name))).first;
-        return it->second;
-    }
-
-    void set(const std::string& name, bool value) { set(name, value ? 1 : 0); }
-
-    void set(const std::string& name, int value) { GL(glUniform1i(get_uniform_location(name.c_str()), value)); }
-
-    void set(const std::string& name, float value) { GL(glUniform1f(get_uniform_location(name.c_str()), value)); }
-
-    void set(const std::string& name, double value0, double value1)
-    {
-        GL(glUniform2f(get_uniform_location(name.c_str()), static_cast<float>(value0), static_cast<float>(value1)));
-    }
-
-    void set(const std::string& name, double value)
-    {
-        GL(glUniform1f(get_uniform_location(name.c_str()), static_cast<float>(value)));
-    }
-
     void use() { GL(glUseProgram(program_)); }
 };
 
@@ -130,13 +91,7 @@ compute_shader::compute_shader(const std::string& compute_source_str)
 {
 }
 compute_shader::~compute_shader() {}
-void  compute_shader::set(const std::string& name, bool value) { impl_->set(name, value); }
-void  compute_shader::set(const std::string& name, int value) { impl_->set(name, value); }
-void  compute_shader::set(const std::string& name, float value) { impl_->set(name, value); }
-void  compute_shader::set(const std::string& name, double value0, double value1) { impl_->set(name, value0, value1); }
-void  compute_shader::set(const std::string& name, double value) { impl_->set(name, value); }
-GLint compute_shader::get_attrib_location(const char* name) { return impl_->get_attrib_location(name); }
-int   compute_shader::id() const { return impl_->program_; }
-void  compute_shader::use() const { impl_->use(); }
+GLuint compute_shader::id() const { return impl_->program_; }
+void   compute_shader::use() const { impl_->use(); }
 
 }}} // namespace caspar::accelerator::ogl
diff --git a/src/accelerator/ogl/util/compute_shader.h b/src/accelerator/ogl/util/compute_shader.h
index 71aa6bb290..af86498e99 100644
--- a/src/accelerator/ogl/util/compute_shader.h
+++ b/src/accelerator/ogl/util/compute_shader.h
@@ -16,7 +16,7 @@
  * You should have received a copy of the GNU General Public License
  * along with CasparCG. If not, see <http://www.gnu.org/licenses/>.
  *
- * Author: Robert Nagy, ronag89@gmail.com
+ * Author: Julian Waller, julian@superfly.tv
  */
 
 #pragma once
@@ -24,7 +24,6 @@
 #include <GL/glew.h>
 #include <memory>
 #include <string>
-#include <type_traits>
 
 namespace caspar { namespace accelerator { namespace ogl {
 
@@ -34,26 +33,12 @@ class compute_shader final
     compute_shader& operator=(const compute_shader&);
 
   public:
-    compute_shader(const std::string& compute_source_str);
+    explicit compute_shader(const std::string& compute_source_str);
     ~compute_shader();
 
-    void set(const std::string& name, bool value);
-    void set(const std::string& name, int value);
-    void set(const std::string& name, float value);
-    void set(const std::string& name, double value0, double value1);
-    void set(const std::string& name, double value);
-
-    GLint get_attrib_location(const char* name);
-
-    template <typename E>
-    typename std::enable_if<std::is_enum<E>::value, void>::type set(const std::string& name, E value)
-    {
-        set(name, static_cast<typename std::underlying_type<E>::type>(value));
-    }
-
     void use() const;
 
-    int id() const;
+    [[nodiscard]] GLuint id() const;
 
   private:
     struct impl;

From e0047af6e024a57b836657b858c50f394a0a7c67 Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Sat, 30 Dec 2023 22:52:18 +0000
Subject: [PATCH 47/50] wip: generic key-only implementation

---
 src/accelerator/ogl/image/shader_from_rgba.comp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/accelerator/ogl/image/shader_from_rgba.comp b/src/accelerator/ogl/image/shader_from_rgba.comp
index df051149e0..8bb65fdccd 100644
--- a/src/accelerator/ogl/image/shader_from_rgba.comp
+++ b/src/accelerator/ogl/image/shader_from_rgba.comp
@@ -53,6 +53,7 @@ vec4 straighten_pixel(vec4 pixel) {
 
 vec4 read_pixel(ivec2 coord) {
     vec4 pixel = is_16_bit ? imageLoad(imgInput16bit, coord) : imageLoad(imgInput8bit, coord);
+    if (key_only) return pixel.aaaa;
     return straighten_pixel(pixel);
 }
 

From f07b2818898a339486a3bc02bbefbf0838b38a95 Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Thu, 4 Jan 2024 14:59:07 +0000
Subject: [PATCH 48/50] wip: fixes

---
 src/accelerator/ogl/image/frame_converter.cpp | 74 +++++++++----------
 src/accelerator/ogl/image/frame_converter.h   |  4 +-
 .../ogl/image/shader_from_rgba.comp           | 15 ++--
 src/accelerator/ogl/util/device.h             |  8 +-
 src/core/frame/frame_factory.h                |  4 +-
 src/modules/ffmpeg/util/av_util.cpp           | 30 ++++----
 6 files changed, 68 insertions(+), 67 deletions(-)

diff --git a/src/accelerator/ogl/image/frame_converter.cpp b/src/accelerator/ogl/image/frame_converter.cpp
index 154d1af910..ff7a070577 100644
--- a/src/accelerator/ogl/image/frame_converter.cpp
+++ b/src/accelerator/ogl/image/frame_converter.cpp
@@ -33,43 +33,43 @@ ogl_frame_converter::ogl_frame_converter(const spl::shared_ptr<device>& ogl)
 {
 }
 
-core::mutable_frame ogl_frame_converter::create_frame(const void* tag, const core::pixel_format_desc& desc)
-{
-    std::vector<array<std::uint8_t>> image_data;
-    for (auto& plane : desc.planes) {
-        image_data.push_back(ogl_->create_array(plane.size));
-    }
-
-    using future_texture = std::shared_future<std::shared_ptr<texture>>;
-
-    std::weak_ptr<ogl_frame_converter> weak_self = shared_from_this();
-    return core::mutable_frame(tag,
-                               std::move(image_data),
-                               array<int32_t>{},
-                               desc,
-                               [weak_self, desc](std::vector<array<const std::uint8_t>> image_data) -> boost::any {
-                                   // TODO - replace this
-                                   auto self = weak_self.lock();
-                                   if (!self) {
-                                       return boost::any{};
-                                   }
-                                   std::vector<future_texture> textures;
-                                   for (int n = 0; n < static_cast<int>(desc.planes.size()); ++n) {
-                                       textures.emplace_back(self->ogl_->copy_async(image_data[n],
-                                                                                    desc.planes[n].width,
-                                                                                    desc.planes[n].height,
-                                                                                    desc.planes[n].stride,
-                                                                                    desc.planes[n].depth));
-                                   }
-                                   return std::make_shared<decltype(textures)>(std::move(textures));
-                               });
-}
-
-core::draw_frame ogl_frame_converter::convert_to_rgba(const core::mutable_frame& frame)
-{
-    // TODO
-    return core::draw_frame{};
-}
+// core::mutable_frame ogl_frame_converter::create_frame(const void* tag, const core::pixel_format_desc& desc)
+// {
+//     std::vector<array<std::uint8_t>> image_data;
+//     for (auto& plane : desc.planes) {
+//         image_data.push_back(ogl_->create_array(plane.size));
+//     }
+
+//     using future_texture = std::shared_future<std::shared_ptr<texture>>;
+
+//     std::weak_ptr<ogl_frame_converter> weak_self = shared_from_this();
+//     return core::mutable_frame(tag,
+//                                std::move(image_data),
+//                                array<int32_t>{},
+//                                desc,
+//                                [weak_self, desc](std::vector<array<const std::uint8_t>> image_data) -> boost::any {
+//                                    // TODO - replace this
+//                                    auto self = weak_self.lock();
+//                                    if (!self) {
+//                                        return boost::any{};
+//                                    }
+//                                    std::vector<future_texture> textures;
+//                                    for (int n = 0; n < static_cast<int>(desc.planes.size()); ++n) {
+//                                        textures.emplace_back(self->ogl_->copy_async(image_data[n],
+//                                                                                     desc.planes[n].width,
+//                                                                                     desc.planes[n].height,
+//                                                                                     desc.planes[n].stride,
+//                                                                                     desc.planes[n].depth));
+//                                    }
+//                                    return std::make_shared<decltype(textures)>(std::move(textures));
+//                                });
+// }
+
+// core::draw_frame ogl_frame_converter::convert_to_rgba(const core::mutable_frame& frame)
+// {
+//     // TODO
+//     return core::draw_frame{};
+// }
 
 std::shared_future<array<const std::uint8_t>>
 ogl_frame_converter::convert_from_rgba(const core::const_frame&         frame,
diff --git a/src/accelerator/ogl/image/frame_converter.h b/src/accelerator/ogl/image/frame_converter.h
index 31f0e1a684..3de9f1b8cd 100644
--- a/src/accelerator/ogl/image/frame_converter.h
+++ b/src/accelerator/ogl/image/frame_converter.h
@@ -41,9 +41,9 @@ class ogl_frame_converter
 
     ogl_frame_converter& operator=(const ogl_frame_converter&) = delete;
 
-    core::mutable_frame create_frame(const void* video_stream_tag, const core::pixel_format_desc& desc) override;
+    // core::mutable_frame create_frame(const void* video_stream_tag, const core::pixel_format_desc& desc) override;
 
-    core::draw_frame convert_to_rgba(const core::mutable_frame& frame) override;
+    // core::draw_frame convert_to_rgba(const core::mutable_frame& frame) override;
 
     std::shared_future<array<const std::uint8_t>> convert_from_rgba(const core::const_frame&   frame,
                                                                     core::encoded_frame_format format,
diff --git a/src/accelerator/ogl/image/shader_from_rgba.comp b/src/accelerator/ogl/image/shader_from_rgba.comp
index 8bb65fdccd..c17497a99c 100644
--- a/src/accelerator/ogl/image/shader_from_rgba.comp
+++ b/src/accelerator/ogl/image/shader_from_rgba.comp
@@ -34,12 +34,12 @@ layout(std430, binding = 3) buffer description_layout
 {
     // This must match convert_from_texture_description in device.h
     uint target_format;
-    bool is_16_bit;
+    uint is_16_bit;
     uint frame_width;
     uint frame_height;
     uint words_per_line;
-    bool key_only;
-    bool straigthen;
+    uint key_only;
+    uint straigthen;
 };
 
 vec4 straighten_pixel(vec4 pixel) {
@@ -52,13 +52,14 @@ vec4 straighten_pixel(vec4 pixel) {
 }
 
 vec4 read_pixel(ivec2 coord) {
-    vec4 pixel = is_16_bit ? imageLoad(imgInput16bit, coord) : imageLoad(imgInput8bit, coord);
-    if (key_only) return pixel.aaaa;
-    return straighten_pixel(pixel);
+    vec4 pixel = is_16_bit > 0 ? imageLoad(imgInput16bit, coord) : imageLoad(imgInput8bit, coord);
+    if (key_only > 0) return pixel.aaaa;
+    if (straigthen > 0) return straighten_pixel(pixel);
+    return pixel;
 }
 
 vec3 rgba_to_bt709(vec4 pixel) {
-    if (key_only) {
+    if (key_only > 0) {
         // TODO - verify everything about this
         return vec3(pixel.a, 0.5, 0.5);
     }
diff --git a/src/accelerator/ogl/util/device.h b/src/accelerator/ogl/util/device.h
index 07858b0fde..8a165a546b 100644
--- a/src/accelerator/ogl/util/device.h
+++ b/src/accelerator/ogl/util/device.h
@@ -37,13 +37,13 @@ namespace caspar { namespace accelerator { namespace ogl {
 // This must match description_layout in shader_from_rgba.comp
 struct convert_from_texture_description
 {
-    uint     target_format;
-    bool     is_16_bit;
+    uint32_t     target_format;
+    uint32_t     is_16_bit;
     uint32_t width;
     uint32_t height;
     uint32_t words_per_line;
-    bool     key_only;
-    bool     straighten;
+    uint32_t     key_only;
+    uint32_t     straighten;
 };
 
 class device final
diff --git a/src/core/frame/frame_factory.h b/src/core/frame/frame_factory.h
index bff013c952..8175eba20e 100644
--- a/src/core/frame/frame_factory.h
+++ b/src/core/frame/frame_factory.h
@@ -43,9 +43,9 @@ class frame_converter
 
     frame_converter(const frame_converter&) = delete;
 
-    virtual class mutable_frame create_frame(const void* video_stream_tag, const struct pixel_format_desc& desc) = 0;
+    // virtual class mutable_frame create_frame(const void* video_stream_tag, const struct pixel_format_desc& desc) = 0;
 
-    virtual class draw_frame convert_to_rgba(const class mutable_frame& frame) = 0;
+    // virtual class draw_frame convert_to_rgba(const class mutable_frame& frame) = 0;
 
     virtual std::shared_future<array<const std::uint8_t>>
     convert_from_rgba(const core::const_frame& frame, encoded_frame_format format, bool key_only, bool straighten) = 0;
diff --git a/src/modules/ffmpeg/util/av_util.cpp b/src/modules/ffmpeg/util/av_util.cpp
index d89f2b384d..633841eedd 100644
--- a/src/modules/ffmpeg/util/av_util.cpp
+++ b/src/modules/ffmpeg/util/av_util.cpp
@@ -93,21 +93,21 @@ core::mutable_frame make_frame(void*                    tag,
     return copy_frame_tmp(std::move(frame), pix_desc, data_map, std::move(video), std::move(audio));
 }
 
-core::mutable_frame make_frame2(void*                                         tag,
-                                const std::shared_ptr<core::frame_converter>& frame_factory,
-                                std::shared_ptr<AVFrame>                      video,
-                                std::shared_ptr<AVFrame>                      audio)
-{
-    std::vector<int> data_map; // TODO(perf) when using data_map, avoid uploading duplicate planes
-
-    const auto pix_desc =
-        video ? pixel_format_desc(static_cast<AVPixelFormat>(video->format), video->width, video->height, data_map)
-              : core::pixel_format_desc(core::pixel_format::invalid);
-
-    auto frame = frame_factory->create_frame(tag, pix_desc);
-
-    return copy_frame_tmp(std::move(frame), pix_desc, data_map, std::move(video), std::move(audio));
-}
+//core::mutable_frame make_frame2(void*                                         tag,
+//                                const std::shared_ptr<core::frame_converter>& frame_factory,
+//                                std::shared_ptr<AVFrame>                      video,
+//                                std::shared_ptr<AVFrame>                      audio)
+//{
+//    std::vector<int> data_map; // TODO(perf) when using data_map, avoid uploading duplicate planes
+//
+//    const auto pix_desc =
+//        video ? pixel_format_desc(static_cast<AVPixelFormat>(video->format), video->width, video->height, data_map)
+//              : core::pixel_format_desc(core::pixel_format::invalid);
+//
+//    auto frame = frame_factory->create_frame(tag, pix_desc);
+//
+//    return copy_frame_tmp(std::move(frame), pix_desc, data_map, std::move(video), std::move(audio));
+//}
 
 core::pixel_format get_pixel_format(AVPixelFormat pix_fmt)
 {

From bfcedf43f9f182b6c325be1175e66ede0589122b Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Thu, 4 Jan 2024 17:00:27 +0000
Subject: [PATCH 49/50] fix: allow 16bit from ffmpeg

---
 src/modules/ffmpeg/producer/av_producer.cpp |  7 +++++--
 src/modules/ffmpeg/util/av_util.cpp         | 12 ++++++++++--
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/src/modules/ffmpeg/producer/av_producer.cpp b/src/modules/ffmpeg/producer/av_producer.cpp
index ce52ff8a11..9b2078a073 100644
--- a/src/modules/ffmpeg/producer/av_producer.cpp
+++ b/src/modules/ffmpeg/producer/av_producer.cpp
@@ -500,6 +500,11 @@ struct Filter
                                               AV_PIX_FMT_ARGB,
                                               AV_PIX_FMT_RGBA,
                                               AV_PIX_FMT_ABGR,
+                                              AV_PIX_FMT_BGR48LE,
+                                              AV_PIX_FMT_RGB48LE,
+                                              AV_PIX_FMT_BGRA64LE,
+                                              AV_PIX_FMT_RGBA64LE,
+                                              // AV_PIX_FMT_X2RGB10LE TODO
                                               AV_PIX_FMT_YUV444P,
                                               AV_PIX_FMT_YUV422P,
                                               AV_PIX_FMT_YUV420P,
@@ -511,14 +516,12 @@ struct Filter
                                               AV_PIX_FMT_YUV444P10LE,
                                               AV_PIX_FMT_YUV422P10LE,
                                               AV_PIX_FMT_YUV420P10LE,
-                                              // AV_PIX_FMT_YUV410P10LE,
                                               AV_PIX_FMT_YUV444P16LE,
                                               AV_PIX_FMT_YUV422P16LE,
                                               AV_PIX_FMT_YUV420P16LE,
                                               AV_PIX_FMT_YUVA444P10LE,
                                               AV_PIX_FMT_YUVA422P10LE,
                                               AV_PIX_FMT_YUVA420P10LE,
-                                              // AV_PIX_FMT_UYVY42210LE,
                                               AV_PIX_FMT_YUVA444P16LE,
                                               AV_PIX_FMT_YUVA422P16LE,
                                               AV_PIX_FMT_YUVA420P16LE,
diff --git a/src/modules/ffmpeg/util/av_util.cpp b/src/modules/ffmpeg/util/av_util.cpp
index 633841eedd..7214be49c5 100644
--- a/src/modules/ffmpeg/util/av_util.cpp
+++ b/src/modules/ffmpeg/util/av_util.cpp
@@ -115,14 +115,18 @@ core::pixel_format get_pixel_format(AVPixelFormat pix_fmt)
         case AV_PIX_FMT_GRAY8:
             return core::pixel_format::gray;
         case AV_PIX_FMT_RGB24:
+        case AV_PIX_FMT_RGB48LE:
             return core::pixel_format::rgb;
         case AV_PIX_FMT_BGR24:
+        case AV_PIX_FMT_BGR48LE:
             return core::pixel_format::bgr;
         case AV_PIX_FMT_BGRA:
+        case AV_PIX_FMT_BGRA64LE:
             return core::pixel_format::bgra;
         case AV_PIX_FMT_ARGB:
             return core::pixel_format::argb;
         case AV_PIX_FMT_RGBA:
+        case AV_PIX_FMT_RGBA64LE:
             return core::pixel_format::rgba;
         case AV_PIX_FMT_ABGR:
             return core::pixel_format::abgr;
@@ -175,14 +179,18 @@ core::pixel_format_desc pixel_format_desc(AVPixelFormat pix_fmt, int width, int
         }
         case core::pixel_format::bgr:
         case core::pixel_format::rgb: {
-            desc.planes.emplace_back(linesizes[0] / 3, height, 3);
+            auto depth =  (pix_fmt == AV_PIX_FMT_BGR48LE || pix_fmt==AV_PIX_FMT_RGB48LE) ? common::bit_depth::bit16:common::bit_depth::bit8;
+            auto scale = depth == common::bit_depth::bit16?6:3;
+            desc.planes.emplace_back(linesizes[0] / scale, height, 3, depth);
             return desc;
         }
         case core::pixel_format::bgra:
         case core::pixel_format::argb:
         case core::pixel_format::rgba:
         case core::pixel_format::abgr: {
-            desc.planes.emplace_back(linesizes[0] / 4, height, 4);
+            auto depth =  (pix_fmt == AV_PIX_FMT_BGRA64LE || pix_fmt==AV_PIX_FMT_RGBA64LE) ? common::bit_depth::bit16:common::bit_depth::bit8;
+            auto scale = depth == common::bit_depth::bit16?8:4;
+            desc.planes.emplace_back(linesizes[0] / scale, height, 4, depth);
             return desc;
         }
         case core::pixel_format::ycbcr:

From 7adf098cb816943eea9d45cb4b38aa1adc1884a2 Mon Sep 17 00:00:00 2001
From: Julian Waller <git@julusian.co.uk>
Date: Thu, 4 Jan 2024 17:31:42 +0000
Subject: [PATCH 50/50] wip: boilerplate for decklink 12bit, but nothing
 happens

---
 .../decklink/consumer/decklink_consumer.cpp     | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/src/modules/decklink/consumer/decklink_consumer.cpp b/src/modules/decklink/consumer/decklink_consumer.cpp
index ed58593bec..80d9946a3f 100644
--- a/src/modules/decklink/consumer/decklink_consumer.cpp
+++ b/src/modules/decklink/consumer/decklink_consumer.cpp
@@ -241,6 +241,9 @@ class decklink_frame : public IDeckLinkVideoFrame
                 return static_cast<long>(format_desc_.width) * 4;
             case bmdFormat10BitYUV:
                 return ((static_cast<long>(format_desc_.width) + 47) / 48) * 128;
+            case bmdFormat12BitRGBLE:
+            case bmdFormat12BitRGB:
+                return (static_cast<long>(format_desc_.width) * 36) / 8;
             default:
                 return 0;
         }
@@ -790,10 +793,18 @@ struct decklink_consumer final : public IDeckLinkVideoOutputCallback
 
                     auto buffer = frame1.value().frame.get();
 
-                    std::shared_ptr<void> image_data = create_aligned_buffer(decklink_format_desc_.size, 128);
-                    std::memcpy(image_data.get(), buffer.data(), buffer.size());
 
-                    schedule_next_video(image_data, bmdFormat10BitYUV, nb_samples, video_display_time);
+                    // rgb12:
+//                    std::shared_ptr<void> image_data = create_aligned_buffer(((decklink_format_desc_.width * 36) / 8)*decklink_format_desc_.height, 128);
+//                    std::memcpy(image_data.get(), buffer.data(), buffer.size());
+//
+//                    schedule_next_video(image_data, bmdFormat12BitRGBLE, nb_samples, video_display_time);
+
+                    // yuv10:
+                     std::shared_ptr<void> image_data = create_aligned_buffer(buffer.size(), 128);
+                     std::memcpy(image_data.get(), buffer.data(), buffer.size());
+
+                     schedule_next_video(image_data, bmdFormat10BitYUV, nb_samples, video_display_time);
 
                     if (config_.embedded_audio) {
                         schedule_next_audio(std::move(audio_data), nb_samples);