From a85012ce979045c30e02d8599bfdb939b19ab4fe Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Fri, 1 Dec 2023 16:46:50 +0000 Subject: [PATCH 01/50] wip: basic format mapping --- src/core/frame/frame_factory.h | 14 ++++++++++++++ src/modules/ffmpeg/producer/av_producer.cpp | 8 ++++++++ src/modules/ffmpeg/util/av_util.cpp | 14 ++++++++------ 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/src/core/frame/frame_factory.h b/src/core/frame/frame_factory.h index 965b7e9b40..4c609f02a4 100644 --- a/src/core/frame/frame_factory.h +++ b/src/core/frame/frame_factory.h @@ -35,4 +35,18 @@ class frame_factory virtual class mutable_frame create_frame(const void* video_stream_tag, const struct pixel_format_desc& desc) = 0; }; +class frame_converter { + public: + frame_converter() = default; + frame_converter& operator=(const frame_converter&) = delete; + virtual ~frame_converter() = default; + + frame_converter(const frame_converter&) = delete; + + virtual class mutable_frame create_frame(const void* video_stream_tag, const struct pixel_format_desc& desc) = 0; + + virtual class draw_frame convert_frame(const class mutable_frame) = 0; + +}; + }} // namespace caspar::core diff --git a/src/modules/ffmpeg/producer/av_producer.cpp b/src/modules/ffmpeg/producer/av_producer.cpp index b28ed4f307..749d1aa56b 100644 --- a/src/modules/ffmpeg/producer/av_producer.cpp +++ b/src/modules/ffmpeg/producer/av_producer.cpp @@ -508,6 +508,14 @@ struct Filter AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA420P, AV_PIX_FMT_UYVY422, + AV_PIX_FMT_YUV444P10LE, + AV_PIX_FMT_YUV422P10LE, + AV_PIX_FMT_YUV420P10LE, + // AV_PIX_FMT_YUV410P10LE, + AV_PIX_FMT_YUVA444P10LE, + AV_PIX_FMT_YUVA422P10LE, + AV_PIX_FMT_YUVA420P10LE, + // AV_PIX_FMT_UYVY42210LE, AV_PIX_FMT_NONE}; FF(av_opt_set_int_list(sink, "pix_fmts", pix_fmts, -1, AV_OPT_SEARCH_CHILDREN)); #ifdef _MSC_VER diff --git a/src/modules/ffmpeg/util/av_util.cpp b/src/modules/ffmpeg/util/av_util.cpp index 6fd3743b98..1721c64862 100644 --- a/src/modules/ffmpeg/util/av_util.cpp +++ b/src/modules/ffmpeg/util/av_util.cpp @@ -102,23 +102,25 @@ core::pixel_format get_pixel_format(AVPixelFormat pix_fmt) case AV_PIX_FMT_ABGR: return core::pixel_format::abgr; case AV_PIX_FMT_YUV444P: - return core::pixel_format::ycbcr; case AV_PIX_FMT_YUV422P: - return core::pixel_format::ycbcr; case AV_PIX_FMT_YUV420P: - return core::pixel_format::ycbcr; case AV_PIX_FMT_YUV411P: - return core::pixel_format::ycbcr; case AV_PIX_FMT_YUV410P: return core::pixel_format::ycbcr; case AV_PIX_FMT_YUVA420P: - return core::pixel_format::ycbcra; case AV_PIX_FMT_YUVA422P: - return core::pixel_format::ycbcra; case AV_PIX_FMT_YUVA444P: return core::pixel_format::ycbcra; case AV_PIX_FMT_UYVY422: return core::pixel_format::uyvy; + case AV_PIX_FMT_YUV444P10LE: + case AV_PIX_FMT_YUV422P10LE: + case AV_PIX_FMT_YUV420P10LE: + return core::pixel_format::ycbcr; // TODO 10bit + case AV_PIX_FMT_YUVA444P10LE: + case AV_PIX_FMT_YUVA422P10LE: + case AV_PIX_FMT_YUVA420P10LE: + return core::pixel_format::ycbcra; // TODO 10bit default: return core::pixel_format::invalid; } From 8cd4cfc47ef66a0baa7f58a00f5a6e7a45230c70 Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Fri, 1 Dec 2023 16:54:46 +0000 Subject: [PATCH 02/50] wip --- src/core/frame/frame_factory.h | 29 ++++++++++++--------- src/modules/ffmpeg/producer/av_producer.cpp | 4 +-- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/src/core/frame/frame_factory.h b/src/core/frame/frame_factory.h index 4c609f02a4..57b660dfb8 100644 --- a/src/core/frame/frame_factory.h +++ b/src/core/frame/frame_factory.h @@ -23,18 +23,6 @@ namespace caspar { namespace core { -class frame_factory -{ - public: - frame_factory() = default; - frame_factory& operator=(const frame_factory&) = delete; - virtual ~frame_factory() = default; - - frame_factory(const frame_factory&) = delete; - - virtual class mutable_frame create_frame(const void* video_stream_tag, const struct pixel_format_desc& desc) = 0; -}; - class frame_converter { public: frame_converter() = default; @@ -45,8 +33,23 @@ class frame_converter { virtual class mutable_frame create_frame(const void* video_stream_tag, const struct pixel_format_desc& desc) = 0; - virtual class draw_frame convert_frame(const class mutable_frame) = 0; + virtual class draw_frame convert_frame(const class mutable_frame& frame) = 0; + +}; +class frame_factory +{ + public: + frame_factory() = default; + frame_factory& operator=(const frame_factory&) = delete; + virtual ~frame_factory() = default; + + frame_factory(const frame_factory&) = delete; + + virtual class mutable_frame create_frame(const void* video_stream_tag, const struct pixel_format_desc& desc) = 0; + + virtual std::shared_ptr create_frame_converter() = 0; }; + }} // namespace caspar::core diff --git a/src/modules/ffmpeg/producer/av_producer.cpp b/src/modules/ffmpeg/producer/av_producer.cpp index 749d1aa56b..1491f67850 100644 --- a/src/modules/ffmpeg/producer/av_producer.cpp +++ b/src/modules/ffmpeg/producer/av_producer.cpp @@ -602,7 +602,7 @@ struct AVProducer::Impl spl::shared_ptr graph_; - const std::shared_ptr frame_factory_; + const std::shared_ptr frame_factory_; const core::video_format_desc format_desc_; const AVRational format_tb_; const std::string name_; @@ -655,7 +655,7 @@ struct AVProducer::Impl boost::optional duration, bool loop, int seekable) - : frame_factory_(frame_factory) + : frame_factory_(frame_factory->create_frame_converter()) , format_desc_(format_desc) , format_tb_({format_desc.duration, format_desc.time_scale * format_desc.field_count}) , name_(name) From bf121208b785201dbc3c924edd8e7a4f7f9cde91 Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Fri, 1 Dec 2023 17:22:51 +0000 Subject: [PATCH 03/50] wip --- src/accelerator/CMakeLists.txt | 2 + src/accelerator/ogl/image/frame_converter.cpp | 67 +++++++++++++++++++ src/accelerator/ogl/image/frame_converter.h | 52 ++++++++++++++ src/accelerator/ogl/image/image_mixer.cpp | 10 +++ src/accelerator/ogl/image/image_mixer.h | 2 + src/core/mixer/image/image_mixer.h | 2 + src/modules/ffmpeg/producer/av_producer.cpp | 2 +- src/modules/ffmpeg/util/av_util.cpp | 49 ++++++++++---- src/modules/ffmpeg/util/av_util.h | 5 ++ 9 files changed, 178 insertions(+), 13 deletions(-) create mode 100644 src/accelerator/ogl/image/frame_converter.cpp create mode 100644 src/accelerator/ogl/image/frame_converter.h diff --git a/src/accelerator/CMakeLists.txt b/src/accelerator/CMakeLists.txt index 3a1b576af4..f6a1ca74a0 100644 --- a/src/accelerator/CMakeLists.txt +++ b/src/accelerator/CMakeLists.txt @@ -5,6 +5,7 @@ set(SOURCES ogl/image/image_kernel.cpp ogl/image/image_mixer.cpp ogl/image/image_shader.cpp + ogl/image/frame_converter.cpp ogl/util/buffer.cpp ogl/util/device.cpp @@ -17,6 +18,7 @@ set(HEADERS ogl/image/image_kernel.h ogl/image/image_mixer.h ogl/image/image_shader.h + ogl/image/frame_converter.h ogl/util/buffer.h ogl/util/device.h diff --git a/src/accelerator/ogl/image/frame_converter.cpp b/src/accelerator/ogl/image/frame_converter.cpp new file mode 100644 index 0000000000..4abc7bb6ba --- /dev/null +++ b/src/accelerator/ogl/image/frame_converter.cpp @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2011 Sveriges Television AB + * + * This file is part of CasparCG (www.casparcg.com). + * + * CasparCG is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * CasparCG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with CasparCG. If not, see . + * + * Author: Julian Waller, julian@superfly.tv + */ +#include "frame_converter.h" + +namespace caspar::accelerator::ogl { + +ogl_frame_converter::ogl_frame_converter(const spl::shared_ptr& ogl) + : ogl_(ogl) +{ +} + +core::mutable_frame ogl_frame_converter::create_frame(const void* tag, const core::pixel_format_desc& desc) +{ + + std::vector> image_data; + for (auto& plane : desc.planes) { + image_data.push_back(ogl_->create_array(plane.size)); + } + + using future_texture = std::shared_future>; + + std::weak_ptr weak_self = shared_from_this(); + return core::mutable_frame( + tag, + std::move(image_data), + array{}, + desc, + [weak_self, desc](std::vector> image_data) -> boost::any { + // TODO - replace this + auto self = weak_self.lock(); + if (!self) { + return boost::any{}; + } + std::vector textures; + for (int n = 0; n < static_cast(desc.planes.size()); ++n) { + textures.emplace_back(self->ogl_->copy_async( + image_data[n], desc.planes[n].width, desc.planes[n].height, desc.planes[n].stride)); + } + return std::make_shared(std::move(textures)); + }); +} + +core::draw_frame ogl_frame_converter::convert_frame(const core::mutable_frame& frame) +{ + // TODO + return core::draw_frame{}; +} + +} // namespace caspar::accelerator::ogl \ No newline at end of file diff --git a/src/accelerator/ogl/image/frame_converter.h b/src/accelerator/ogl/image/frame_converter.h new file mode 100644 index 0000000000..1a9ec1f80b --- /dev/null +++ b/src/accelerator/ogl/image/frame_converter.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2011 Sveriges Television AB + * + * This file is part of CasparCG (www.casparcg.com). + * + * CasparCG is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * CasparCG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with CasparCG. If not, see . + * + * Author: Julian Waller, julian@superfly.tv + */ + +#pragma once + +#include +#include +#include + +#include "../util/device.h" + +namespace caspar::accelerator::ogl { + +class ogl_frame_converter + : public core::frame_converter + , public std::enable_shared_from_this +{ + public: + ogl_frame_converter(const spl::shared_ptr& ogl); + ogl_frame_converter(const ogl_frame_converter&) = delete; + + ~ogl_frame_converter() override = default; + + ogl_frame_converter& operator=(const ogl_frame_converter&) = delete; + + core::mutable_frame create_frame(const void* video_stream_tag, const core::pixel_format_desc& desc) override; + + core::draw_frame convert_frame(const core::mutable_frame& frame) override; + + private: + const spl::shared_ptr ogl_; +}; + +} // namespace caspar::accelerator::ogl \ No newline at end of file diff --git a/src/accelerator/ogl/image/image_mixer.cpp b/src/accelerator/ogl/image/image_mixer.cpp index 56212e239f..01fb752681 100644 --- a/src/accelerator/ogl/image/image_mixer.cpp +++ b/src/accelerator/ogl/image/image_mixer.cpp @@ -25,6 +25,7 @@ #include "../util/buffer.h" #include "../util/device.h" #include "../util/texture.h" +#include "frame_converter.h" #include #include @@ -327,6 +328,11 @@ struct image_mixer::impl return std::make_shared(std::move(textures)); }); } + + std::shared_ptr create_frame_converter() override + { + return std::make_shared(ogl_); + } }; image_mixer::image_mixer(const spl::shared_ptr& ogl, const int channel_id, const size_t max_frame_size) @@ -345,5 +351,9 @@ core::mutable_frame image_mixer::create_frame(const void* tag, const core::pixel { return impl_->create_frame(tag, desc); } +std::shared_ptr image_mixer::create_frame_converter() +{ + return impl_->create_frame_converter(); +} }}} // namespace caspar::accelerator::ogl diff --git a/src/accelerator/ogl/image/image_mixer.h b/src/accelerator/ogl/image/image_mixer.h index c9034238d8..19be36225d 100644 --- a/src/accelerator/ogl/image/image_mixer.h +++ b/src/accelerator/ogl/image/image_mixer.h @@ -46,6 +46,8 @@ class image_mixer final : public core::image_mixer std::future> operator()(const core::video_format_desc& format_desc) override; core::mutable_frame create_frame(const void* tag, const core::pixel_format_desc& desc) override; + std::shared_ptr create_frame_converter() override; + // core::image_mixer void push(const core::frame_transform& frame) override; diff --git a/src/core/mixer/image/image_mixer.h b/src/core/mixer/image/image_mixer.h index 0922e38261..dcff0d1b79 100644 --- a/src/core/mixer/image/image_mixer.h +++ b/src/core/mixer/image/image_mixer.h @@ -48,6 +48,8 @@ class image_mixer virtual std::future> operator()(const struct video_format_desc& format_desc) = 0; class mutable_frame create_frame(const void* tag, const struct pixel_format_desc& desc) override = 0; + + std::shared_ptr create_frame_converter() override = 0; }; }} // namespace caspar::core diff --git a/src/modules/ffmpeg/producer/av_producer.cpp b/src/modules/ffmpeg/producer/av_producer.cpp index 1491f67850..b1d66dc124 100644 --- a/src/modules/ffmpeg/producer/av_producer.cpp +++ b/src/modules/ffmpeg/producer/av_producer.cpp @@ -871,7 +871,7 @@ struct AVProducer::Impl frame.duration = av_rescale_q(frame.audio->nb_samples, {1, sr}, TIME_BASE_Q); } - frame.frame = core::draw_frame(make_frame(this, *frame_factory_, frame.video, frame.audio)); + frame.frame = core::draw_frame(make_frame2(this, frame_factory_, frame.video, frame.audio)); frame.frame_count = frame_count_++; graph_->set_value("decode-time", decode_timer.elapsed() * format_desc_.fps * 0.5); diff --git a/src/modules/ffmpeg/util/av_util.cpp b/src/modules/ffmpeg/util/av_util.cpp index 1721c64862..7c410b6258 100644 --- a/src/modules/ffmpeg/util/av_util.cpp +++ b/src/modules/ffmpeg/util/av_util.cpp @@ -40,19 +40,12 @@ std::shared_ptr alloc_packet() return packet; } -core::mutable_frame make_frame(void* tag, - core::frame_factory& frame_factory, - std::shared_ptr video, - std::shared_ptr audio) +core::mutable_frame copy_frame_tmp(core::mutable_frame frame, + const core::pixel_format_desc& pix_desc, + const std::vector& data_map, + std::shared_ptr video, + std::shared_ptr audio) { - std::vector data_map; // TODO(perf) when using data_map, avoid uploading duplicate planes - - const auto pix_desc = - video ? pixel_format_desc(static_cast(video->format), video->width, video->height, data_map) - : core::pixel_format_desc(core::pixel_format::invalid); - - auto frame = frame_factory.create_frame(tag, pix_desc); - tbb::parallel_invoke( [&]() { if (video) { @@ -84,6 +77,38 @@ core::mutable_frame make_frame(void* tag, return frame; } +core::mutable_frame make_frame(void* tag, + core::frame_factory& frame_factory, + std::shared_ptr video, + std::shared_ptr audio) +{ + std::vector data_map; // TODO(perf) when using data_map, avoid uploading duplicate planes + + const auto pix_desc = + video ? pixel_format_desc(static_cast(video->format), video->width, video->height, data_map) + : core::pixel_format_desc(core::pixel_format::invalid); + + auto frame = frame_factory.create_frame(tag, pix_desc); + + return copy_frame_tmp(std::move(frame), pix_desc, data_map, std::move(video), std::move(audio)); +} + +core::mutable_frame make_frame2(void* tag, + const std::shared_ptr& frame_factory, + std::shared_ptr video, + std::shared_ptr audio) +{ + std::vector data_map; // TODO(perf) when using data_map, avoid uploading duplicate planes + + const auto pix_desc = + video ? pixel_format_desc(static_cast(video->format), video->width, video->height, data_map) + : core::pixel_format_desc(core::pixel_format::invalid); + + auto frame = frame_factory->create_frame(tag, pix_desc); + + return copy_frame_tmp(std::move(frame), pix_desc, data_map, std::move(video), std::move(audio)); +} + core::pixel_format get_pixel_format(AVPixelFormat pix_fmt) { switch (pix_fmt) { diff --git a/src/modules/ffmpeg/util/av_util.h b/src/modules/ffmpeg/util/av_util.h index 8a6ceed869..c1f1b97bb7 100644 --- a/src/modules/ffmpeg/util/av_util.h +++ b/src/modules/ffmpeg/util/av_util.h @@ -27,6 +27,11 @@ core::mutable_frame make_frame(void* tag, std::shared_ptr video, std::shared_ptr audio); +core::mutable_frame make_frame2(void* tag, + const std::shared_ptr& frame_factory, + std::shared_ptr video, + std::shared_ptr audio); + std::shared_ptr make_av_video_frame(const core::const_frame& frame, const core::video_format_desc& format_des); std::shared_ptr make_av_audio_frame(const core::const_frame& frame, const core::video_format_desc& format_des); From 8a2a85bde28d25d40f04a09255d44b219d363864 Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Fri, 1 Dec 2023 17:35:26 +0000 Subject: [PATCH 04/50] wip --- src/accelerator/ogl/image/image_mixer.cpp | 43 +++++++++++++++++------ src/core/frame/pixel_format.h | 2 ++ src/modules/ffmpeg/util/av_util.cpp | 4 +-- 3 files changed, 37 insertions(+), 12 deletions(-) diff --git a/src/accelerator/ogl/image/image_mixer.cpp b/src/accelerator/ogl/image/image_mixer.cpp index 01fb752681..b0db1b52b7 100644 --- a/src/accelerator/ogl/image/image_mixer.cpp +++ b/src/accelerator/ogl/image/image_mixer.cpp @@ -302,6 +302,21 @@ struct image_mixer::impl return renderer_(std::move(layers_), format_desc); } + std::vector convert_frame(const std::vector>& image_data, + const core::pixel_format_desc& desc) const + { + const auto& plane0 = desc.planes[0]; // TODO - this doesnt feel safe, or accurate + + std::vector textures; + const auto texture = ogl_->create_texture(plane0.width, plane0.height, + 4); // TODO - don't clear + + // TODO - how to run and link shader? + textures.emplace_back(make_ready_future(texture)); + + return textures; + } + core::mutable_frame create_frame(const void* tag, const core::pixel_format_desc& desc) override { std::vector> image_data; @@ -320,18 +335,29 @@ struct image_mixer::impl if (!self) { return boost::any{}; } - std::vector textures; - for (int n = 0; n < static_cast(desc.planes.size()); ++n) { - textures.emplace_back(self->ogl_->copy_async( - image_data[n], desc.planes[n].width, desc.planes[n].height, desc.planes[n].stride)); + + switch (desc.format) { + case core::pixel_format::ycbcr10: + case core::pixel_format::ycbcra10: { + std::vector textures = self->convert_frame(image_data, desc); + + return std::make_shared(std::move(textures)); + } + default: { + std::vector textures; + for (int n = 0; n < static_cast(desc.planes.size()); ++n) { + textures.emplace_back(self->ogl_->copy_async( + image_data[n], desc.planes[n].width, desc.planes[n].height, desc.planes[n].stride)); + } + return std::make_shared(std::move(textures)); + } } - return std::make_shared(std::move(textures)); }); } std::shared_ptr create_frame_converter() override { - return std::make_shared(ogl_); + return std::make_shared(ogl_); } }; @@ -351,9 +377,6 @@ core::mutable_frame image_mixer::create_frame(const void* tag, const core::pixel { return impl_->create_frame(tag, desc); } -std::shared_ptr image_mixer::create_frame_converter() -{ - return impl_->create_frame_converter(); -} +std::shared_ptr image_mixer::create_frame_converter() { return impl_->create_frame_converter(); } }}} // namespace caspar::accelerator::ogl diff --git a/src/core/frame/pixel_format.h b/src/core/frame/pixel_format.h index 007e1b5082..efe45e071d 100644 --- a/src/core/frame/pixel_format.h +++ b/src/core/frame/pixel_format.h @@ -38,6 +38,8 @@ enum class pixel_format bgr, rgb, uyvy, + ycbcr10, + ycbcra10, count, invalid, }; diff --git a/src/modules/ffmpeg/util/av_util.cpp b/src/modules/ffmpeg/util/av_util.cpp index 7c410b6258..c2f2c2ae3a 100644 --- a/src/modules/ffmpeg/util/av_util.cpp +++ b/src/modules/ffmpeg/util/av_util.cpp @@ -141,11 +141,11 @@ core::pixel_format get_pixel_format(AVPixelFormat pix_fmt) case AV_PIX_FMT_YUV444P10LE: case AV_PIX_FMT_YUV422P10LE: case AV_PIX_FMT_YUV420P10LE: - return core::pixel_format::ycbcr; // TODO 10bit + return core::pixel_format::ycbcr10; case AV_PIX_FMT_YUVA444P10LE: case AV_PIX_FMT_YUVA422P10LE: case AV_PIX_FMT_YUVA420P10LE: - return core::pixel_format::ycbcra; // TODO 10bit + return core::pixel_format::ycbcra10; default: return core::pixel_format::invalid; } From 73fab1711e69fe7d165c98539130c76d226a8e78 Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Fri, 1 Dec 2023 17:41:52 +0000 Subject: [PATCH 05/50] wip --- src/accelerator/ogl/image/image_mixer.cpp | 7 ++++++- src/accelerator/ogl/util/device.cpp | 3 +++ src/accelerator/ogl/util/device.h | 2 ++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/accelerator/ogl/image/image_mixer.cpp b/src/accelerator/ogl/image/image_mixer.cpp index b0db1b52b7..44e05a7f47 100644 --- a/src/accelerator/ogl/image/image_mixer.cpp +++ b/src/accelerator/ogl/image/image_mixer.cpp @@ -305,15 +305,20 @@ struct image_mixer::impl std::vector convert_frame(const std::vector>& image_data, const core::pixel_format_desc& desc) const { - const auto& plane0 = desc.planes[0]; // TODO - this doesnt feel safe, or accurate std::vector textures; + const auto& plane0 = desc.planes[0]; // TODO - this doesnt feel safe, or accurate const auto texture = ogl_->create_texture(plane0.width, plane0.height, 4); // TODO - don't clear // TODO - how to run and link shader? textures.emplace_back(make_ready_future(texture)); + /* + const auto texture = ogl_->convert_frame(std::move(image_data), desc); + textures.emplace_back(texture); +*/ + return textures; } diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp index 0576153f7f..27586388f2 100644 --- a/src/accelerator/ogl/util/device.cpp +++ b/src/accelerator/ogl/util/device.cpp @@ -437,6 +437,9 @@ std::future> device::copy_async(const std::shared_ptrcopy_async(source); } +//std::future> device::convert_frame() { + // TODO +//} void device::dispatch(std::function func) { boost::asio::dispatch(impl_->service_, std::move(func)); } std::wstring device::version() const { return impl_->version(); } boost::property_tree::wptree device::info() const { return impl_->info(); } diff --git a/src/accelerator/ogl/util/device.h b/src/accelerator/ogl/util/device.h index d7f1cef1a4..09492aaa27 100644 --- a/src/accelerator/ogl/util/device.h +++ b/src/accelerator/ogl/util/device.h @@ -51,6 +51,8 @@ class device final std::future> copy_async(const array& source, int width, int height, int stride); std::future> copy_async(const std::shared_ptr& source); + +// std::future> convert_frame(std::vector> image_data); template auto dispatch_async(Func&& func) { From c58ad0e53fb9ad0bc3ec744518d65247a30dd361 Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Fri, 1 Dec 2023 18:41:22 +0000 Subject: [PATCH 06/50] things are hooked up, but has no output --- src/accelerator/CMakeLists.txt | 4 + src/accelerator/ogl/image/image_mixer.cpp | 10 +- src/accelerator/ogl/image/shader_to_rgba.comp | 15 ++ src/accelerator/ogl/util/compute_shader.cpp | 142 ++++++++++++++++++ src/accelerator/ogl/util/compute_shader.h | 63 ++++++++ src/accelerator/ogl/util/device.cpp | 58 ++++++- src/accelerator/ogl/util/device.h | 4 +- src/accelerator/ogl/util/texture.cpp | 18 ++- src/modules/ffmpeg/producer/av_producer.cpp | 6 +- src/modules/ffmpeg/util/av_util.cpp | 6 +- src/shell/casparcg.config | 3 + 11 files changed, 309 insertions(+), 20 deletions(-) create mode 100644 src/accelerator/ogl/image/shader_to_rgba.comp create mode 100644 src/accelerator/ogl/util/compute_shader.cpp create mode 100644 src/accelerator/ogl/util/compute_shader.h diff --git a/src/accelerator/CMakeLists.txt b/src/accelerator/CMakeLists.txt index f6a1ca74a0..ee2eda3b2e 100644 --- a/src/accelerator/CMakeLists.txt +++ b/src/accelerator/CMakeLists.txt @@ -8,6 +8,7 @@ set(SOURCES ogl/image/frame_converter.cpp ogl/util/buffer.cpp + ogl/util/compute_shader.cpp ogl/util/device.cpp ogl/util/shader.cpp ogl/util/texture.cpp @@ -21,12 +22,14 @@ set(HEADERS ogl/image/frame_converter.h ogl/util/buffer.h + ogl/util/compute_shader.h ogl/util/device.h ogl/util/shader.h ogl/util/texture.h ogl_image_vertex.h ogl_image_fragment.h + ogl_image_to_rgba.h accelerator.h StdAfx.h @@ -34,6 +37,7 @@ set(HEADERS bin2c("ogl/image/shader.vert" "ogl_image_vertex.h" "caspar::accelerator::ogl" "vertex_shader") bin2c("ogl/image/shader.frag" "ogl_image_fragment.h" "caspar::accelerator::ogl" "fragment_shader") +bin2c("ogl/image/shader_to_rgba.comp" "ogl_image_to_rgba.h" "caspar::accelerator::ogl" "compute_to_rgba_shader") casparcg_add_library(accelerator SOURCES ${SOURCES} ${HEADERS}) target_include_directories(accelerator PRIVATE diff --git a/src/accelerator/ogl/image/image_mixer.cpp b/src/accelerator/ogl/image/image_mixer.cpp index 44e05a7f47..36c1b7bd51 100644 --- a/src/accelerator/ogl/image/image_mixer.cpp +++ b/src/accelerator/ogl/image/image_mixer.cpp @@ -305,20 +305,20 @@ struct image_mixer::impl std::vector convert_frame(const std::vector>& image_data, const core::pixel_format_desc& desc) const { + const auto& plane0 = desc.planes[0]; // TODO - this doesnt feel safe, or accurate std::vector textures; - const auto& plane0 = desc.planes[0]; // TODO - this doesnt feel safe, or accurate + /* const auto texture = ogl_->create_texture(plane0.width, plane0.height, 4); // TODO - don't clear // TODO - how to run and link shader? textures.emplace_back(make_ready_future(texture)); - - /* - const auto texture = ogl_->convert_frame(std::move(image_data), desc); - textures.emplace_back(texture); */ + textures.emplace_back(ogl_->convert_frame( + image_data, plane0.width, plane0.height, 1)); // TODO - what is this 'format' parameter? + return textures; } diff --git a/src/accelerator/ogl/image/shader_to_rgba.comp b/src/accelerator/ogl/image/shader_to_rgba.comp new file mode 100644 index 0000000000..abc7adc03c --- /dev/null +++ b/src/accelerator/ogl/image/shader_to_rgba.comp @@ -0,0 +1,15 @@ +#version 430 + +layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(rgba32f, binding = 0) uniform image2D imgOutput; // Note: this needs to match what it is writing to + +void main() { + vec4 value = vec4(0.0, 0.0, 0.0, 1.0); + ivec2 texelCoord = ivec2(gl_GlobalInvocationID.xy); + + value.x = float(texelCoord.x)/(gl_NumWorkGroups.x); + value.y = float(texelCoord.y)/(gl_NumWorkGroups.y); + + imageStore(imgOutput, texelCoord, value); +} \ No newline at end of file diff --git a/src/accelerator/ogl/util/compute_shader.cpp b/src/accelerator/ogl/util/compute_shader.cpp new file mode 100644 index 0000000000..612bf8b922 --- /dev/null +++ b/src/accelerator/ogl/util/compute_shader.cpp @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2011 Sveriges Television AB + * + * This file is part of CasparCG (www.casparcg.com). + * + * CasparCG is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * CasparCG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with CasparCG. If not, see . + * + * Author: Robert Nagy, ronag89@gmail.com + */ +#include "compute_shader.h" + +#include + +#include + +#include + +namespace caspar { namespace accelerator { namespace ogl { + +struct compute_shader::impl +{ + GLuint program_; + std::unordered_map uniform_locations_; + std::unordered_map attrib_locations_; + + impl(const impl&) = delete; + impl& operator=(const impl&) = delete; + + public: + impl(const std::string& compute_source_str) + : program_(0) + { + int work_grp_cnt[3]; + + glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 0, &work_grp_cnt[0]); + glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 1, &work_grp_cnt[1]); + glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 2, &work_grp_cnt[2]); + + printf("max global (total) work group counts x:%i y:%i z:%i\n", + work_grp_cnt[0], work_grp_cnt[1], work_grp_cnt[2]); + + GLint success; + + const char* compute_source = compute_source_str.c_str(); + + auto compute_shader = glCreateShaderObjectARB(GL_COMPUTE_SHADER); + + GL(glShaderSourceARB(compute_shader, 1, &compute_source, NULL)); + GL(glCompileShaderARB(compute_shader)); + + GL(glGetObjectParameterivARB(compute_shader, GL_OBJECT_COMPILE_STATUS_ARB, &success)); + if (success == GL_FALSE) { + char info[2048]; + GL(glGetInfoLogARB(compute_shader, sizeof(info), 0, info)); + GL(glDeleteObjectARB(compute_shader)); + std::stringstream str; + str << "Failed to compile compute shader:" << std::endl << info << std::endl; + CASPAR_THROW_EXCEPTION(caspar_exception() << msg_info(str.str())); + } + + program_ = glCreateProgramObjectARB(); + + GL(glAttachObjectARB(program_, compute_shader)); + GL(glLinkProgramARB(program_)); + + GL(glDeleteObjectARB(compute_shader)); + + GL(glGetObjectParameterivARB(program_, GL_OBJECT_LINK_STATUS_ARB, &success)); + if (success == GL_FALSE) { + char info[2048]; + GL(glGetInfoLogARB(program_, sizeof(info), 0, info)); + GL(glDeleteObjectARB(program_)); + std::stringstream str; + str << "Failed to link shader program:" << std::endl << info << std::endl; + CASPAR_THROW_EXCEPTION(caspar_exception() << msg_info(str.str())); + } + GL(glUseProgramObjectARB(program_)); + } + + ~impl() { glDeleteProgram(program_); } + + GLint get_uniform_location(const char* name) + { + auto it = uniform_locations_.find(name); + if (it == uniform_locations_.end()) + it = uniform_locations_.insert(std::make_pair(name, glGetUniformLocation(program_, name))).first; + return it->second; + } + + GLint get_attrib_location(const char* name) + { + auto it = attrib_locations_.find(name); + if (it == attrib_locations_.end()) + it = attrib_locations_.insert(std::make_pair(name, glGetAttribLocation(program_, name))).first; + return it->second; + } + + void set(const std::string& name, bool value) { set(name, value ? 1 : 0); } + + void set(const std::string& name, int value) { GL(glUniform1i(get_uniform_location(name.c_str()), value)); } + + void set(const std::string& name, float value) { GL(glUniform1f(get_uniform_location(name.c_str()), value)); } + + void set(const std::string& name, double value0, double value1) + { + GL(glUniform2f(get_uniform_location(name.c_str()), static_cast(value0), static_cast(value1))); + } + + void set(const std::string& name, double value) + { + GL(glUniform1f(get_uniform_location(name.c_str()), static_cast(value))); + } + + void use() { GL(glUseProgramObjectARB(program_)); } +}; + +compute_shader::compute_shader(const std::string& compute_source_str) + : impl_(new impl(compute_source_str)) +{ +} +compute_shader::~compute_shader() {} +void compute_shader::set(const std::string& name, bool value) { impl_->set(name, value); } +void compute_shader::set(const std::string& name, int value) { impl_->set(name, value); } +void compute_shader::set(const std::string& name, float value) { impl_->set(name, value); } +void compute_shader::set(const std::string& name, double value0, double value1) { impl_->set(name, value0, value1); } +void compute_shader::set(const std::string& name, double value) { impl_->set(name, value); } +GLint compute_shader::get_attrib_location(const char* name) { return impl_->get_attrib_location(name); } +int compute_shader::id() const { return impl_->program_; } +void compute_shader::use() const { impl_->use(); } + +}}} // namespace caspar::accelerator::ogl diff --git a/src/accelerator/ogl/util/compute_shader.h b/src/accelerator/ogl/util/compute_shader.h new file mode 100644 index 0000000000..71aa6bb290 --- /dev/null +++ b/src/accelerator/ogl/util/compute_shader.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2011 Sveriges Television AB + * + * This file is part of CasparCG (www.casparcg.com). + * + * CasparCG is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * CasparCG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with CasparCG. If not, see . + * + * Author: Robert Nagy, ronag89@gmail.com + */ + +#pragma once + +#include +#include +#include +#include + +namespace caspar { namespace accelerator { namespace ogl { + +class compute_shader final +{ + compute_shader(const compute_shader&); + compute_shader& operator=(const compute_shader&); + + public: + compute_shader(const std::string& compute_source_str); + ~compute_shader(); + + void set(const std::string& name, bool value); + void set(const std::string& name, int value); + void set(const std::string& name, float value); + void set(const std::string& name, double value0, double value1); + void set(const std::string& name, double value); + + GLint get_attrib_location(const char* name); + + template + typename std::enable_if::value, void>::type set(const std::string& name, E value) + { + set(name, static_cast::type>(value)); + } + + void use() const; + + int id() const; + + private: + struct impl; + std::unique_ptr impl_; +}; + +}}} // namespace caspar::accelerator::ogl diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp index 27586388f2..2ccdb87753 100644 --- a/src/accelerator/ogl/util/device.cpp +++ b/src/accelerator/ogl/util/device.cpp @@ -21,9 +21,10 @@ #include "device.h" #include "buffer.h" -#include "shader.h" +#include "compute_shader.h" #include "texture.h" + #include #include #include @@ -41,6 +42,7 @@ #include #include +#include #include #include @@ -51,6 +53,8 @@ #include #include +#include "ogl_image_to_rgba.h" + namespace caspar { namespace accelerator { namespace ogl { using namespace boost::asio; @@ -69,6 +73,8 @@ struct device::impl : public std::enable_shared_from_this sync_queue_t sync_queue_; + std::unique_ptr compute_shader_; + GLuint fbo_; std::wstring version_; @@ -113,6 +119,8 @@ struct device::impl : public std::enable_shared_from_this GL(glCreateFramebuffers(1, &fbo_)); GL(glBindFramebuffer(GL_FRAMEBUFFER, fbo_)); + compute_shader_ = std::make_unique(std::string(compute_to_rgba_shader)); + device_.setActive(false); thread_ = std::thread([&] { @@ -175,7 +183,7 @@ struct device::impl : public std::enable_shared_from_this std::shared_ptr create_texture(int width, int height, int stride, bool clear) { - CASPAR_VERIFY(stride > 0 && stride < 5); + CASPAR_VERIFY(stride > 0 && stride < 6); CASPAR_VERIFY(width > 0 && height > 0); // TODO (perf) Shared pool. @@ -221,6 +229,8 @@ struct device::impl : public std::enable_shared_from_this return array(ptr, buf->size(), buf); } + + std::future> copy_async(const array& source, int width, int height, int stride) { @@ -283,6 +293,42 @@ struct device::impl : public std::enable_shared_from_this }); } + std::future> + convert_frame(const std::vector>& sources, int width, int height, int format) + { + return dispatch_async([=] { + + auto tex = create_texture(width, height, 5, false); + + //tex->bind(0); + glBindImageTexture(0, tex->id(), 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA32F); + compute_shader_->use(); + + glDispatchCompute((unsigned int)width, (unsigned int)height, 1); + + // make sure writing to image has finished before read +// glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); // TODO - this will probably block the main rendering loop + glMemoryBarrier(GL_ALL_BARRIER_BITS); + + + + /* +std::shared_ptr buf; + auto tmp = source.storage>(); + if (tmp) { + buf = *tmp; + } else { + buf = create_buffer(static_cast(source.size()), true); + // TODO (perf) Copy inside a TBB worker. + std::memcpy(buf->data(), source.data(), source.size()); + } + */ + + // tex->copy_from(*buf); + return tex; + }); + } + #ifdef WIN32 std::future> copy_async(GLuint source, int width, int height, int stride) { @@ -437,9 +483,11 @@ std::future> device::copy_async(const std::shared_ptrcopy_async(source); } -//std::future> device::convert_frame() { - // TODO -//} +std::future> +device::convert_frame(const std::vector>& sources, int width, int height, int format) +{ + return impl_->convert_frame(sources, width, height, format); +} void device::dispatch(std::function func) { boost::asio::dispatch(impl_->service_, std::move(func)); } std::wstring device::version() const { return impl_->version(); } boost::property_tree::wptree device::info() const { return impl_->info(); } diff --git a/src/accelerator/ogl/util/device.h b/src/accelerator/ogl/util/device.h index 09492aaa27..49e1a03a90 100644 --- a/src/accelerator/ogl/util/device.h +++ b/src/accelerator/ogl/util/device.h @@ -52,7 +52,9 @@ class device final copy_async(const array& source, int width, int height, int stride); std::future> copy_async(const std::shared_ptr& source); -// std::future> convert_frame(std::vector> image_data); + std::future> + convert_frame(const std::vector>& sources, int width, int height, int format); + template auto dispatch_async(Func&& func) { diff --git a/src/accelerator/ogl/util/texture.cpp b/src/accelerator/ogl/util/texture.cpp index 8682d060ef..26f23b2ce8 100644 --- a/src/accelerator/ogl/util/texture.cpp +++ b/src/accelerator/ogl/util/texture.cpp @@ -28,9 +28,9 @@ namespace caspar { namespace accelerator { namespace ogl { -static GLenum FORMAT[] = {0, GL_RED, GL_RG, GL_BGR, GL_BGRA}; -static GLenum INTERNAL_FORMAT[] = {0, GL_R8, GL_RG8, GL_RGB8, GL_RGBA8}; -static GLenum TYPE[] = {0, GL_UNSIGNED_BYTE, GL_UNSIGNED_BYTE, GL_UNSIGNED_BYTE, GL_UNSIGNED_INT_8_8_8_8_REV}; +static GLenum FORMAT[] = {0, GL_RED, GL_RG, GL_BGR, GL_BGRA, GL_RGBA}; +static GLenum INTERNAL_FORMAT[] = {0, GL_R8, GL_RG8, GL_RGB8, GL_RGBA8, GL_RGBA32F}; +static GLenum TYPE[] = {0, GL_UNSIGNED_BYTE, GL_UNSIGNED_BYTE, GL_UNSIGNED_BYTE, GL_UNSIGNED_INT_8_8_8_8_REV, GL_UNSIGNED_BYTE}; struct texture::impl { @@ -50,12 +50,22 @@ struct texture::impl , stride_(stride) , size_(width * height * stride) { + if (stride == 5) { + size_ = width * height * 16; + } + + GL(glCreateTextures(GL_TEXTURE_2D, 1, &id_)); GL(glTextureParameteri(id_, GL_TEXTURE_MIN_FILTER, GL_LINEAR)); GL(glTextureParameteri(id_, GL_TEXTURE_MAG_FILTER, GL_LINEAR)); GL(glTextureParameteri(id_, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE)); GL(glTextureParameteri(id_, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE)); GL(glTextureStorage2D(id_, 1, INTERNAL_FORMAT[stride_], width_, height_)); + + + //bind(); + //GL(glTexImage2D(GL_TEXTURE_2D, 0, INTERNAL_FORMAT[stride_], width, height, 0, FORMAT[stride_], GL_FLOAT, nullptr)); // HACK + //unbind(); } ~impl() { glDeleteTextures(1, &id_); } @@ -131,7 +141,7 @@ void texture::copy_to(buffer& dest) { impl_->copy_to(dest); } int texture::width() const { return impl_->width_; } int texture::height() const { return impl_->height_; } int texture::stride() const { return impl_->stride_; } -int texture::size() const { return impl_->width_ * impl_->height_ * impl_->stride_; } +int texture::size() const { return impl_->size_; } int texture::id() const { return impl_->id_; } }}} // namespace caspar::accelerator::ogl diff --git a/src/modules/ffmpeg/producer/av_producer.cpp b/src/modules/ffmpeg/producer/av_producer.cpp index b1d66dc124..749d1aa56b 100644 --- a/src/modules/ffmpeg/producer/av_producer.cpp +++ b/src/modules/ffmpeg/producer/av_producer.cpp @@ -602,7 +602,7 @@ struct AVProducer::Impl spl::shared_ptr graph_; - const std::shared_ptr frame_factory_; + const std::shared_ptr frame_factory_; const core::video_format_desc format_desc_; const AVRational format_tb_; const std::string name_; @@ -655,7 +655,7 @@ struct AVProducer::Impl boost::optional duration, bool loop, int seekable) - : frame_factory_(frame_factory->create_frame_converter()) + : frame_factory_(frame_factory) , format_desc_(format_desc) , format_tb_({format_desc.duration, format_desc.time_scale * format_desc.field_count}) , name_(name) @@ -871,7 +871,7 @@ struct AVProducer::Impl frame.duration = av_rescale_q(frame.audio->nb_samples, {1, sr}, TIME_BASE_Q); } - frame.frame = core::draw_frame(make_frame2(this, frame_factory_, frame.video, frame.audio)); + frame.frame = core::draw_frame(make_frame(this, *frame_factory_, frame.video, frame.audio)); frame.frame_count = frame_count_++; graph_->set_value("decode-time", decode_timer.elapsed() * format_desc_.fps * 0.5); diff --git a/src/modules/ffmpeg/util/av_util.cpp b/src/modules/ffmpeg/util/av_util.cpp index c2f2c2ae3a..b71e42ad86 100644 --- a/src/modules/ffmpeg/util/av_util.cpp +++ b/src/modules/ffmpeg/util/av_util.cpp @@ -178,7 +178,9 @@ core::pixel_format_desc pixel_format_desc(AVPixelFormat pix_fmt, int width, int return desc; } case core::pixel_format::ycbcr: - case core::pixel_format::ycbcra: { + case core::pixel_format::ycbcra: + case core::pixel_format::ycbcr10: + case core::pixel_format::ycbcra10:{ // Find chroma height // av_image_fill_plane_sizes is not available until ffmpeg 4.4, but we still need to support ffmpeg 4.2, so // we fall back to calling av_image_fill_pointers with a NULL image buffer. We can't unconditionally use @@ -201,7 +203,7 @@ core::pixel_format_desc pixel_format_desc(AVPixelFormat pix_fmt, int width, int desc.planes.push_back(core::pixel_format_desc::plane(linesizes[1], h2, 1)); desc.planes.push_back(core::pixel_format_desc::plane(linesizes[2], h2, 1)); - if (desc.format == core::pixel_format::ycbcra) + if (desc.format == core::pixel_format::ycbcra||desc.format == core::pixel_format::ycbcra10) desc.planes.push_back(core::pixel_format_desc::plane(linesizes[3], height, 1)); return desc; diff --git a/src/shell/casparcg.config b/src/shell/casparcg.config index 3eec0e9d8e..1369e84791 100644 --- a/src/shell/casparcg.config +++ b/src/shell/casparcg.config @@ -15,6 +15,9 @@ + + test + From 90a6ecd6b94a41dc03003729f5a84123ad9571c1 Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Mon, 4 Dec 2023 22:36:50 +0000 Subject: [PATCH 07/50] nope --- src/accelerator/ogl/util/compute_shader.cpp | 18 +++++++++--------- src/accelerator/ogl/util/device.cpp | 12 +++++++++--- src/accelerator/ogl/util/texture.cpp | 11 +++++++++++ 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/src/accelerator/ogl/util/compute_shader.cpp b/src/accelerator/ogl/util/compute_shader.cpp index 612bf8b922..26aba484df 100644 --- a/src/accelerator/ogl/util/compute_shader.cpp +++ b/src/accelerator/ogl/util/compute_shader.cpp @@ -54,10 +54,10 @@ struct compute_shader::impl const char* compute_source = compute_source_str.c_str(); - auto compute_shader = glCreateShaderObjectARB(GL_COMPUTE_SHADER); + auto compute_shader = glCreateShader(GL_COMPUTE_SHADER); - GL(glShaderSourceARB(compute_shader, 1, &compute_source, NULL)); - GL(glCompileShaderARB(compute_shader)); + GL(glShaderSource(compute_shader, 1, &compute_source, NULL)); + GL(glCompileShader(compute_shader)); GL(glGetObjectParameterivARB(compute_shader, GL_OBJECT_COMPILE_STATUS_ARB, &success)); if (success == GL_FALSE) { @@ -69,12 +69,12 @@ struct compute_shader::impl CASPAR_THROW_EXCEPTION(caspar_exception() << msg_info(str.str())); } - program_ = glCreateProgramObjectARB(); + program_ = glCreateProgram(); - GL(glAttachObjectARB(program_, compute_shader)); - GL(glLinkProgramARB(program_)); + GL(glAttachShader(program_, compute_shader)); + GL(glLinkProgram(program_)); - GL(glDeleteObjectARB(compute_shader)); + GL(glDeleteShader(compute_shader)); GL(glGetObjectParameterivARB(program_, GL_OBJECT_LINK_STATUS_ARB, &success)); if (success == GL_FALSE) { @@ -85,7 +85,7 @@ struct compute_shader::impl str << "Failed to link shader program:" << std::endl << info << std::endl; CASPAR_THROW_EXCEPTION(caspar_exception() << msg_info(str.str())); } - GL(glUseProgramObjectARB(program_)); + GL(glUseProgram(program_)); } ~impl() { glDeleteProgram(program_); } @@ -122,7 +122,7 @@ struct compute_shader::impl GL(glUniform1f(get_uniform_location(name.c_str()), static_cast(value))); } - void use() { GL(glUseProgramObjectARB(program_)); } + void use() { GL(glUseProgram(program_)); } }; compute_shader::compute_shader(const std::string& compute_source_str) diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp index 2ccdb87753..34e66b85fe 100644 --- a/src/accelerator/ogl/util/device.cpp +++ b/src/accelerator/ogl/util/device.cpp @@ -119,7 +119,6 @@ struct device::impl : public std::enable_shared_from_this GL(glCreateFramebuffers(1, &fbo_)); GL(glBindFramebuffer(GL_FRAMEBUFFER, fbo_)); - compute_shader_ = std::make_unique(std::string(compute_to_rgba_shader)); device_.setActive(false); @@ -298,10 +297,14 @@ struct device::impl : public std::enable_shared_from_this { return dispatch_async([=] { + if (!compute_shader_) + compute_shader_ = std::make_unique(std::string(compute_to_rgba_shader)); + auto tex = create_texture(width, height, 5, false); - //tex->bind(0); - glBindImageTexture(0, tex->id(), 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA32F); + tex->bind(0); + //compute_shader_->use(); + glBindImageTexture(0, tex->id(), 0, GL_FALSE, 0, GL_READ_WRITE, GL_RGBA32F); compute_shader_->use(); glDispatchCompute((unsigned int)width, (unsigned int)height, 1); @@ -310,6 +313,9 @@ struct device::impl : public std::enable_shared_from_this // glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); // TODO - this will probably block the main rendering loop glMemoryBarrier(GL_ALL_BARRIER_BITS); + glFlush(); + + std::this_thread::sleep_for(std::chrono::milliseconds(5)); /* diff --git a/src/accelerator/ogl/util/texture.cpp b/src/accelerator/ogl/util/texture.cpp index 26f23b2ce8..ffb8eae779 100644 --- a/src/accelerator/ogl/util/texture.cpp +++ b/src/accelerator/ogl/util/texture.cpp @@ -54,6 +54,7 @@ struct texture::impl size_ = width * height * 16; } + /* GL(glCreateTextures(GL_TEXTURE_2D, 1, &id_)); GL(glTextureParameteri(id_, GL_TEXTURE_MIN_FILTER, GL_LINEAR)); @@ -61,6 +62,16 @@ struct texture::impl GL(glTextureParameteri(id_, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE)); GL(glTextureParameteri(id_, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE)); GL(glTextureStorage2D(id_, 1, INTERNAL_FORMAT[stride_], width_, height_)); + */ + + glGenTextures(1, &id_); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, id_); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexImage2D(GL_TEXTURE_2D, 0, INTERNAL_FORMAT[stride_], width, height, 0, FORMAT[stride_], GL_FLOAT, NULL); //bind(); From efd99f6d266c53fff4264a2812cb2d955ff7a1ef Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Fri, 22 Dec 2023 15:42:37 +0000 Subject: [PATCH 08/50] wip: something happens! --- src/accelerator/ogl/image/image_mixer.cpp | 10 +++++++- src/accelerator/ogl/image/shader.frag | 2 ++ src/accelerator/ogl/image/shader_to_rgba.comp | 2 +- src/accelerator/ogl/util/device.cpp | 23 +------------------ src/accelerator/ogl/util/texture.cpp | 17 -------------- 5 files changed, 13 insertions(+), 41 deletions(-) diff --git a/src/accelerator/ogl/image/image_mixer.cpp b/src/accelerator/ogl/image/image_mixer.cpp index 36c1b7bd51..3e35e0b781 100644 --- a/src/accelerator/ogl/image/image_mixer.cpp +++ b/src/accelerator/ogl/image/image_mixer.cpp @@ -317,7 +317,15 @@ struct image_mixer::impl */ textures.emplace_back(ogl_->convert_frame( - image_data, plane0.width, plane0.height, 1)); // TODO - what is this 'format' parameter? + image_data, plane0.width, plane0.height, 1)); // TODO - what is this 'format' parameter? +// textures.emplace_back(make_ready_future(t.get())); + +/* + for (int n = 0; n < static_cast(desc.planes.size()); ++n) { + textures.emplace_back(ogl_->copy_async( + image_data[n], desc.planes[n].width, desc.planes[n].height, desc.planes[n].stride)); + } +*/ return textures; } diff --git a/src/accelerator/ogl/image/shader.frag b/src/accelerator/ogl/image/shader.frag index 93d31d13f2..0e889aed8d 100644 --- a/src/accelerator/ogl/image/shader.frag +++ b/src/accelerator/ogl/image/shader.frag @@ -480,6 +480,8 @@ vec4 get_rgba_color() case 1: //bgra, return get_sample(plane[0], TexCoord.st / TexCoord.q).bgra; case 2: //rgba, + case 11: //ycbcr10, + case 12: //ycbcra10, return get_sample(plane[0], TexCoord.st / TexCoord.q).rgba; case 3: //argb, return get_sample(plane[0], TexCoord.st / TexCoord.q).argb; diff --git a/src/accelerator/ogl/image/shader_to_rgba.comp b/src/accelerator/ogl/image/shader_to_rgba.comp index abc7adc03c..5b0a89a569 100644 --- a/src/accelerator/ogl/image/shader_to_rgba.comp +++ b/src/accelerator/ogl/image/shader_to_rgba.comp @@ -5,7 +5,7 @@ layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in; layout(rgba32f, binding = 0) uniform image2D imgOutput; // Note: this needs to match what it is writing to void main() { - vec4 value = vec4(0.0, 0.0, 0.0, 1.0); + vec4 value = vec4(0.0, 0.0, 1.0, 1.0); ivec2 texelCoord = ivec2(gl_GlobalInvocationID.xy); value.x = float(texelCoord.x)/(gl_NumWorkGroups.x); diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp index 34e66b85fe..7b2d120728 100644 --- a/src/accelerator/ogl/util/device.cpp +++ b/src/accelerator/ogl/util/device.cpp @@ -302,35 +302,14 @@ struct device::impl : public std::enable_shared_from_this auto tex = create_texture(width, height, 5, false); - tex->bind(0); - //compute_shader_->use(); glBindImageTexture(0, tex->id(), 0, GL_FALSE, 0, GL_READ_WRITE, GL_RGBA32F); compute_shader_->use(); glDispatchCompute((unsigned int)width, (unsigned int)height, 1); // make sure writing to image has finished before read -// glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); // TODO - this will probably block the main rendering loop - glMemoryBarrier(GL_ALL_BARRIER_BITS); + glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); - glFlush(); - - std::this_thread::sleep_for(std::chrono::milliseconds(5)); - - - /* -std::shared_ptr buf; - auto tmp = source.storage>(); - if (tmp) { - buf = *tmp; - } else { - buf = create_buffer(static_cast(source.size()), true); - // TODO (perf) Copy inside a TBB worker. - std::memcpy(buf->data(), source.data(), source.size()); - } - */ - - // tex->copy_from(*buf); return tex; }); } diff --git a/src/accelerator/ogl/util/texture.cpp b/src/accelerator/ogl/util/texture.cpp index ffb8eae779..dedf4374f3 100644 --- a/src/accelerator/ogl/util/texture.cpp +++ b/src/accelerator/ogl/util/texture.cpp @@ -54,29 +54,12 @@ struct texture::impl size_ = width * height * 16; } - /* - GL(glCreateTextures(GL_TEXTURE_2D, 1, &id_)); GL(glTextureParameteri(id_, GL_TEXTURE_MIN_FILTER, GL_LINEAR)); GL(glTextureParameteri(id_, GL_TEXTURE_MAG_FILTER, GL_LINEAR)); GL(glTextureParameteri(id_, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE)); GL(glTextureParameteri(id_, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE)); GL(glTextureStorage2D(id_, 1, INTERNAL_FORMAT[stride_], width_, height_)); - */ - - glGenTextures(1, &id_); - glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_2D, id_); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTexImage2D(GL_TEXTURE_2D, 0, INTERNAL_FORMAT[stride_], width, height, 0, FORMAT[stride_], GL_FLOAT, NULL); - - - //bind(); - //GL(glTexImage2D(GL_TEXTURE_2D, 0, INTERNAL_FORMAT[stride_], width, height, 0, FORMAT[stride_], GL_FLOAT, nullptr)); // HACK - //unbind(); } ~impl() { glDeleteTextures(1, &id_); } From 467062e919824df0f0982f599242c621f46684e8 Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Fri, 22 Dec 2023 16:31:47 +0000 Subject: [PATCH 09/50] hack a mess --- src/accelerator/ogl/image/image_mixer.cpp | 35 +++++--------- src/accelerator/ogl/image/shader.frag | 19 ++++++-- src/accelerator/ogl/image/shader_to_rgba.comp | 6 +-- src/accelerator/ogl/util/device.cpp | 7 +-- src/accelerator/ogl/util/texture.cpp | 9 ++-- src/core/frame/pixel_format.h | 8 +++- src/modules/ffmpeg/util/av_util.cpp | 47 ++++++++++++++++--- 7 files changed, 89 insertions(+), 42 deletions(-) diff --git a/src/accelerator/ogl/image/image_mixer.cpp b/src/accelerator/ogl/image/image_mixer.cpp index 3e35e0b781..7f5683025a 100644 --- a/src/accelerator/ogl/image/image_mixer.cpp +++ b/src/accelerator/ogl/image/image_mixer.cpp @@ -307,25 +307,12 @@ struct image_mixer::impl { const auto& plane0 = desc.planes[0]; // TODO - this doesnt feel safe, or accurate - std::vector textures; - /* - const auto texture = ogl_->create_texture(plane0.width, plane0.height, - 4); // TODO - don't clear + // TODO - desc is no longer 'correct' and should probably be changed to avoid the mixer shader being aware of these formats - // TODO - how to run and link shader? - textures.emplace_back(make_ready_future(texture)); -*/ + std::vector textures; textures.emplace_back(ogl_->convert_frame( - image_data, plane0.width, plane0.height, 1)); // TODO - what is this 'format' parameter? -// textures.emplace_back(make_ready_future(t.get())); - -/* - for (int n = 0; n < static_cast(desc.planes.size()); ++n) { - textures.emplace_back(ogl_->copy_async( - image_data[n], desc.planes[n].width, desc.planes[n].height, desc.planes[n].stride)); - } -*/ + image_data, plane0.width, plane0.height, plane0.width / 2)); // TODO - what is this 'format' parameter? return textures; } @@ -350,12 +337,16 @@ struct image_mixer::impl } switch (desc.format) { - case core::pixel_format::ycbcr10: - case core::pixel_format::ycbcra10: { - std::vector textures = self->convert_frame(image_data, desc); - - return std::make_shared(std::move(textures)); - } +// case core::pixel_format::ycbcr10_420: +// case core::pixel_format::ycbcr10_422: +// case core::pixel_format::ycbcr10_444: +// case core::pixel_format::ycbcra10_420: +// case core::pixel_format::ycbcra10_422: +// case core::pixel_format::ycbcra10_444: { +// std::vector textures = self->convert_frame(image_data, desc); +// +// return std::make_shared(std::move(textures)); +// } default: { std::vector textures; for (int n = 0; n < static_cast(desc.planes.size()); ++n) { diff --git a/src/accelerator/ogl/image/shader.frag b/src/accelerator/ogl/image/shader.frag index 0e889aed8d..baeaaae34c 100644 --- a/src/accelerator/ogl/image/shader.frag +++ b/src/accelerator/ogl/image/shader.frag @@ -479,15 +479,16 @@ vec4 get_rgba_color() return vec4(get_sample(plane[0], TexCoord.st / TexCoord.q).rrr, 1.0); case 1: //bgra, return get_sample(plane[0], TexCoord.st / TexCoord.q).bgra; - case 2: //rgba, - case 11: //ycbcr10, - case 12: //ycbcra10, + case 2: //rgba return get_sample(plane[0], TexCoord.st / TexCoord.q).rgba; case 3: //argb, return get_sample(plane[0], TexCoord.st / TexCoord.q).argb; case 4: //abgr, return get_sample(plane[0], TexCoord.st / TexCoord.q).gbar; case 5: //ycbcr, + case 11: //ycbcr10_420 + case 12: //ycbcr10_422 + case 13: //ycbcr10_444 { float y = get_sample(plane[0], TexCoord.st / TexCoord.q).r; float cb = get_sample(plane[1], TexCoord.st / TexCoord.q).r; @@ -495,6 +496,9 @@ vec4 get_rgba_color() return ycbcra_to_rgba(y, cb, cr, 1.0); } case 6: //ycbcra + case 14: //ycbcra10_420 + case 15: //ycbcra10_422 + case 16: //ycbcra10_444 { float y = get_sample(plane[0], TexCoord.st / TexCoord.q).r; float cb = get_sample(plane[1], TexCoord.st / TexCoord.q).r; @@ -518,6 +522,15 @@ vec4 get_rgba_color() float cr = get_sample(plane[1], TexCoord.st / TexCoord.q).r; return ycbcra_to_rgba(y, cb, cr, 1.0); } + // formats converted from packed formats +// case 11: //ycbcr10_420 +// case 12: //ycbcr10_422 +// case 13: //ycbcr10_444 +// return vec4(get_sample(plane[0], TexCoord.st / TexCoord.q).rgb, 1.0); +// case 14: //ycbcra10_420 +// case 15: //ycbcra10_422 +// case 16: //ycbcra10_444 +// return get_sample(plane[0], TexCoord.st / TexCoord.q).rgba; } return vec4(0.0, 0.0, 0.0, 0.0); } diff --git a/src/accelerator/ogl/image/shader_to_rgba.comp b/src/accelerator/ogl/image/shader_to_rgba.comp index 5b0a89a569..1964edadbf 100644 --- a/src/accelerator/ogl/image/shader_to_rgba.comp +++ b/src/accelerator/ogl/image/shader_to_rgba.comp @@ -5,11 +5,11 @@ layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in; layout(rgba32f, binding = 0) uniform image2D imgOutput; // Note: this needs to match what it is writing to void main() { - vec4 value = vec4(0.0, 0.0, 1.0, 1.0); + vec4 value = vec4(0.0, 0.0, 0.0, 1.0); ivec2 texelCoord = ivec2(gl_GlobalInvocationID.xy); - value.x = float(texelCoord.x)/(gl_NumWorkGroups.x); - value.y = float(texelCoord.y)/(gl_NumWorkGroups.y); + value.r = float(texelCoord.x)/(gl_NumWorkGroups.x); + value.g = float(texelCoord.y)/(gl_NumWorkGroups.y); imageStore(imgOutput, texelCoord, value); } \ No newline at end of file diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp index 7b2d120728..9030edf635 100644 --- a/src/accelerator/ogl/util/device.cpp +++ b/src/accelerator/ogl/util/device.cpp @@ -182,7 +182,7 @@ struct device::impl : public std::enable_shared_from_this std::shared_ptr create_texture(int width, int height, int stride, bool clear) { - CASPAR_VERIFY(stride > 0 && stride < 6); + CASPAR_VERIFY(stride > 0 && stride < 7); CASPAR_VERIFY(width > 0 && height > 0); // TODO (perf) Shared pool. @@ -293,7 +293,7 @@ struct device::impl : public std::enable_shared_from_this } std::future> - convert_frame(const std::vector>& sources, int width, int height, int format) + convert_frame(const std::vector>& sources, int width, int height, int width_samples) { return dispatch_async([=] { @@ -303,9 +303,10 @@ struct device::impl : public std::enable_shared_from_this auto tex = create_texture(width, height, 5, false); glBindImageTexture(0, tex->id(), 0, GL_FALSE, 0, GL_READ_WRITE, GL_RGBA32F); + compute_shader_->use(); - glDispatchCompute((unsigned int)width, (unsigned int)height, 1); + glDispatchCompute((unsigned int)width_samples, (unsigned int)height, 1); // make sure writing to image has finished before read glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); diff --git a/src/accelerator/ogl/util/texture.cpp b/src/accelerator/ogl/util/texture.cpp index dedf4374f3..262b593396 100644 --- a/src/accelerator/ogl/util/texture.cpp +++ b/src/accelerator/ogl/util/texture.cpp @@ -28,9 +28,9 @@ namespace caspar { namespace accelerator { namespace ogl { -static GLenum FORMAT[] = {0, GL_RED, GL_RG, GL_BGR, GL_BGRA, GL_RGBA}; -static GLenum INTERNAL_FORMAT[] = {0, GL_R8, GL_RG8, GL_RGB8, GL_RGBA8, GL_RGBA32F}; -static GLenum TYPE[] = {0, GL_UNSIGNED_BYTE, GL_UNSIGNED_BYTE, GL_UNSIGNED_BYTE, GL_UNSIGNED_INT_8_8_8_8_REV, GL_UNSIGNED_BYTE}; +static GLenum FORMAT[] = {0, GL_RED, GL_RG, GL_BGR, GL_BGRA, GL_RGBA, GL_RED}; +static GLenum INTERNAL_FORMAT[] = {0, GL_R8, GL_RG8, GL_RGB8, GL_RGBA8, GL_RGBA32F, GL_R16F}; +static GLenum TYPE[] = {0, GL_UNSIGNED_BYTE, GL_UNSIGNED_BYTE, GL_UNSIGNED_BYTE, GL_UNSIGNED_INT_8_8_8_8_REV, GL_UNSIGNED_BYTE, GL_UNSIGNED_BYTE}; struct texture::impl { @@ -52,6 +52,9 @@ struct texture::impl { if (stride == 5) { size_ = width * height * 16; + }else + if (stride == 6) { + size_ = width * height * 2; } GL(glCreateTextures(GL_TEXTURE_2D, 1, &id_)); diff --git a/src/core/frame/pixel_format.h b/src/core/frame/pixel_format.h index efe45e071d..95ac4619c6 100644 --- a/src/core/frame/pixel_format.h +++ b/src/core/frame/pixel_format.h @@ -38,8 +38,12 @@ enum class pixel_format bgr, rgb, uyvy, - ycbcr10, - ycbcra10, + ycbcr10_420, + ycbcr10_422, + ycbcr10_444, + ycbcra10_420, + ycbcra10_422, + ycbcra10_444, count, invalid, }; diff --git a/src/modules/ffmpeg/util/av_util.cpp b/src/modules/ffmpeg/util/av_util.cpp index b71e42ad86..4e0a996e03 100644 --- a/src/modules/ffmpeg/util/av_util.cpp +++ b/src/modules/ffmpeg/util/av_util.cpp @@ -139,13 +139,17 @@ core::pixel_format get_pixel_format(AVPixelFormat pix_fmt) case AV_PIX_FMT_UYVY422: return core::pixel_format::uyvy; case AV_PIX_FMT_YUV444P10LE: + return core::pixel_format::ycbcr10_444; case AV_PIX_FMT_YUV422P10LE: + return core::pixel_format::ycbcr10_422; case AV_PIX_FMT_YUV420P10LE: - return core::pixel_format::ycbcr10; + return core::pixel_format::ycbcr10_420; case AV_PIX_FMT_YUVA444P10LE: + return core::pixel_format::ycbcra10_444; case AV_PIX_FMT_YUVA422P10LE: + return core::pixel_format::ycbcra10_422; case AV_PIX_FMT_YUVA420P10LE: - return core::pixel_format::ycbcra10; + return core::pixel_format::ycbcra10_420; default: return core::pixel_format::invalid; } @@ -178,9 +182,7 @@ core::pixel_format_desc pixel_format_desc(AVPixelFormat pix_fmt, int width, int return desc; } case core::pixel_format::ycbcr: - case core::pixel_format::ycbcra: - case core::pixel_format::ycbcr10: - case core::pixel_format::ycbcra10:{ + case core::pixel_format::ycbcra:{ // Find chroma height // av_image_fill_plane_sizes is not available until ffmpeg 4.4, but we still need to support ffmpeg 4.2, so // we fall back to calling av_image_fill_pointers with a NULL image buffer. We can't unconditionally use @@ -203,11 +205,44 @@ core::pixel_format_desc pixel_format_desc(AVPixelFormat pix_fmt, int width, int desc.planes.push_back(core::pixel_format_desc::plane(linesizes[1], h2, 1)); desc.planes.push_back(core::pixel_format_desc::plane(linesizes[2], h2, 1)); - if (desc.format == core::pixel_format::ycbcra||desc.format == core::pixel_format::ycbcra10) + if (desc.format == core::pixel_format::ycbcra) desc.planes.push_back(core::pixel_format_desc::plane(linesizes[3], height, 1)); return desc; } + case core::pixel_format::ycbcr10_420: + case core::pixel_format::ycbcr10_422: + case core::pixel_format::ycbcr10_444: + case core::pixel_format::ycbcra10_420: + case core::pixel_format::ycbcra10_422: + case core::pixel_format::ycbcra10_444: { + // Find chroma height + // av_image_fill_plane_sizes is not available until ffmpeg 4.4, but we still need to support ffmpeg 4.2, so + // we fall back to calling av_image_fill_pointers with a NULL image buffer. We can't unconditionally use + // av_image_fill_pointers because it will not accept a NULL buffer on ffmpeg >= 5.0. +#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(56, 56, 100) + size_t sizes[4]; + ptrdiff_t linesizes1[4]; + for (int i = 0; i < 4; i++) + linesizes1[i] = linesizes[i]; + av_image_fill_plane_sizes(sizes, pix_fmt, height, linesizes1); + auto size2 = static_cast(sizes[1]); +#else + uint8_t* dummy_pict_data[4]; + av_image_fill_pointers(dummy_pict_data, pix_fmt, height, NULL, linesizes); + auto size2 = static_cast(dummy_pict_data[2] - dummy_pict_data[1]); +#endif + auto h2 = size2 / linesizes[1]; + + desc.planes.push_back(core::pixel_format_desc::plane(linesizes[0] / 2, height, 6)); +// desc.planes.push_back(core::pixel_format_desc::plane(linesizes[1] / 2, h2, 6)); +// desc.planes.push_back(core::pixel_format_desc::plane(linesizes[2] / 2, h2, 6)); +// +// if (desc.format == core::pixel_format::ycbcra10_420||desc.format == core::pixel_format::ycbcra10_422||desc.format == core::pixel_format::ycbcra10_444) +// desc.planes.push_back(core::pixel_format_desc::plane(linesizes[3] / 2, height, 6)); + + return desc; + } case core::pixel_format::uyvy: { desc.planes.push_back(core::pixel_format_desc::plane(linesizes[0] / 2, height, 2)); desc.planes.push_back(core::pixel_format_desc::plane(linesizes[0] / 4, height, 4)); From 1f07c7f8e286f2d0958bb3415f096cdd59580179 Mon Sep 17 00:00:00 2001 From: Niklas Andersson <3985238+niklaspandersson@users.noreply.github.com> Date: Fri, 15 Dec 2023 09:07:11 +0000 Subject: [PATCH 10/50] Add 16bit support to ogl texture --- src/accelerator/ogl/util/texture.cpp | 38 ++++++++++++++++------------ src/accelerator/ogl/util/texture.h | 14 +++++----- src/common/bit_depth.h | 13 ++++++++++ 3 files changed, 43 insertions(+), 22 deletions(-) create mode 100644 src/common/bit_depth.h diff --git a/src/accelerator/ogl/util/texture.cpp b/src/accelerator/ogl/util/texture.cpp index 262b593396..478fc9b4f3 100644 --- a/src/accelerator/ogl/util/texture.cpp +++ b/src/accelerator/ogl/util/texture.cpp @@ -22,33 +22,37 @@ #include "buffer.h" +#include #include #include namespace caspar { namespace accelerator { namespace ogl { -static GLenum FORMAT[] = {0, GL_RED, GL_RG, GL_BGR, GL_BGRA, GL_RGBA, GL_RED}; -static GLenum INTERNAL_FORMAT[] = {0, GL_R8, GL_RG8, GL_RGB8, GL_RGBA8, GL_RGBA32F, GL_R16F}; -static GLenum TYPE[] = {0, GL_UNSIGNED_BYTE, GL_UNSIGNED_BYTE, GL_UNSIGNED_BYTE, GL_UNSIGNED_INT_8_8_8_8_REV, GL_UNSIGNED_BYTE, GL_UNSIGNED_BYTE}; +static GLenum FORMAT[] = {0, GL_RED, GL_RG, GL_BGR, GL_BGRA}; +static GLenum INTERNAL_FORMAT[][5] = {{0, GL_R8, GL_RG8, GL_RGB8, GL_RGBA8}, {0, GL_R16, GL_RG16, GL_RGB16, GL_RGBA16}}; +static GLenum TYPE[][5] = {{0, GL_UNSIGNED_BYTE, GL_UNSIGNED_BYTE, GL_UNSIGNED_BYTE, GL_UNSIGNED_INT_8_8_8_8_REV}, + {0, GL_UNSIGNED_SHORT, GL_UNSIGNED_SHORT, GL_UNSIGNED_SHORT, GL_UNSIGNED_SHORT}}; struct texture::impl { - GLuint id_ = 0; - GLsizei width_ = 0; - GLsizei height_ = 0; - GLsizei stride_ = 0; - GLsizei size_ = 0; + GLuint id_ = 0; + GLsizei width_ = 0; + GLsizei height_ = 0; + GLsizei stride_ = 0; + GLsizei size_ = 0; + common::bit_depth depth_; impl(const impl&) = delete; impl& operator=(const impl&) = delete; public: - impl(int width, int height, int stride) + impl(int width, int height, int stride, common::bit_depth depth) : width_(width) , height_(height) , stride_(stride) - , size_(width * height * stride) + , depth_(depth) + , size_(width * height * stride * (1 + static_cast(depth))) { if (stride == 5) { size_ = width * height * 16; @@ -62,7 +66,7 @@ struct texture::impl GL(glTextureParameteri(id_, GL_TEXTURE_MAG_FILTER, GL_LINEAR)); GL(glTextureParameteri(id_, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE)); GL(glTextureParameteri(id_, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE)); - GL(glTextureStorage2D(id_, 1, INTERNAL_FORMAT[stride_], width_, height_)); + GL(glTextureStorage2D(id_, 1, INTERNAL_FORMAT[static_cast(depth)][stride_], width_, height_)); } ~impl() { glDeleteTextures(1, &id_); } @@ -79,7 +83,7 @@ struct texture::impl void attach() { GL(glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + 0, GL_TEXTURE_2D, id_, 0)); } - void clear() { GL(glClearTexImage(id_, 0, FORMAT[stride_], TYPE[stride_], nullptr)); } + void clear() { GL(glClearTexImage(id_, 0, FORMAT[stride_], TYPE[static_cast(depth_)][stride_], nullptr)); } #ifdef WIN32 void copy_from(int texture_id) @@ -99,7 +103,8 @@ struct texture::impl glPixelStorei(GL_UNPACK_ALIGNMENT, 4); } - GL(glTextureSubImage2D(id_, 0, 0, 0, width_, height_, FORMAT[stride_], TYPE[stride_], nullptr)); + GL(glTextureSubImage2D( + id_, 0, 0, 0, width_, height_, FORMAT[stride_], TYPE[static_cast(depth_)][stride_], nullptr)); src.unbind(); } @@ -107,13 +112,13 @@ struct texture::impl void copy_to(buffer& dst) { dst.bind(); - GL(glGetTextureImage(id_, 0, FORMAT[stride_], TYPE[stride_], size_, nullptr)); + GL(glGetTextureImage(id_, 0, FORMAT[stride_], TYPE[static_cast(depth_)][stride_], size_, nullptr)); dst.unbind(); } }; -texture::texture(int width, int height, int stride) - : impl_(new impl(width, height, stride)) +texture::texture(int width, int height, int stride, common::bit_depth depth) + : impl_(new impl(width, height, stride, depth)) { } texture::texture(texture&& other) @@ -138,6 +143,7 @@ void texture::copy_to(buffer& dest) { impl_->copy_to(dest); } int texture::width() const { return impl_->width_; } int texture::height() const { return impl_->height_; } int texture::stride() const { return impl_->stride_; } +common::bit_depth texture::depth() const { return impl_->depth_; } int texture::size() const { return impl_->size_; } int texture::id() const { return impl_->id_; } diff --git a/src/accelerator/ogl/util/texture.h b/src/accelerator/ogl/util/texture.h index ccdca84250..ff2c117f73 100644 --- a/src/accelerator/ogl/util/texture.h +++ b/src/accelerator/ogl/util/texture.h @@ -21,6 +21,7 @@ #pragma once +#include #include namespace caspar { namespace accelerator { namespace ogl { @@ -28,7 +29,7 @@ namespace caspar { namespace accelerator { namespace ogl { class texture final { public: - texture(int width, int height, int stride); + texture(int width, int height, int stride, common::bit_depth depth = common::bit_depth::bit8); texture(const texture&) = delete; texture(texture&& other); ~texture(); @@ -47,11 +48,12 @@ class texture final void bind(int index); void unbind(); - int width() const; - int height() const; - int stride() const; - int size() const; - int id() const; + int width() const; + int height() const; + int stride() const; + common::bit_depth depth() const; + int size() const; + int id() const; private: struct impl; diff --git a/src/common/bit_depth.h b/src/common/bit_depth.h new file mode 100644 index 0000000000..9d1b633f84 --- /dev/null +++ b/src/common/bit_depth.h @@ -0,0 +1,13 @@ +#pragma once + +#include + +namespace caspar { namespace common { + +enum class bit_depth : uint8_t +{ + bit8 = 0, + bit16 = 1, +}; + +}} // namespace caspar::common \ No newline at end of file From a940412aa9bf70d999d31c52574a44fb39a0210b Mon Sep 17 00:00:00 2001 From: Niklas Andersson <3985238+niklaspandersson@users.noreply.github.com> Date: Fri, 15 Dec 2023 10:12:15 +0000 Subject: [PATCH 11/50] add 16bit support to ogl device --- src/accelerator/ogl/util/device.cpp | 91 ++++++++++++++++------------- src/accelerator/ogl/util/device.h | 12 +++- 2 files changed, 58 insertions(+), 45 deletions(-) diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp index 9030edf635..d6ae89dd31 100644 --- a/src/accelerator/ogl/util/device.cpp +++ b/src/accelerator/ogl/util/device.cpp @@ -66,8 +66,8 @@ struct device::impl : public std::enable_shared_from_this sf::Context device_; - std::array, 4> device_pools_; - std::array, 2> host_pools_; + std::array, 4>, 2> device_pools_; + std::array, 2> host_pools_; using sync_queue_t = tbb::concurrent_bounded_queue>; @@ -140,8 +140,9 @@ struct device::impl : public std::enable_shared_from_this for (auto& pool : host_pools_) pool.clear(); - for (auto& pool : device_pools_) - pool.clear(); + for (auto& pools : device_pools_) + for (auto& pool : pools) + pool.clear(); sync_queue_.clear(); @@ -180,17 +181,18 @@ struct device::impl : public std::enable_shared_from_this std::wstring version() { return version_; } - std::shared_ptr create_texture(int width, int height, int stride, bool clear) + std::shared_ptr create_texture(int width, int height, int stride, common::bit_depth depth, bool clear) { CASPAR_VERIFY(stride > 0 && stride < 7); CASPAR_VERIFY(width > 0 && height > 0); // TODO (perf) Shared pool. - auto pool = &device_pools_[stride - 1][(width << 16 & 0xFFFF0000) | (height & 0x0000FFFF)]; + auto pool = + &device_pools_[static_cast(depth)][stride - 1][(width << 16 & 0xFFFF0000) | (height & 0x0000FFFF)]; std::shared_ptr tex; if (!pool->try_pop(tex)) { - tex = std::make_shared(width, height, stride); + tex = std::make_shared(width, height, stride, depth); } if (clear) { @@ -231,7 +233,7 @@ struct device::impl : public std::enable_shared_from_this std::future> - copy_async(const array& source, int width, int height, int stride) + copy_async(const array& source, int width, int height, int stride, common::bit_depth depth) { return dispatch_async([=] { std::shared_ptr buf; @@ -245,7 +247,7 @@ struct device::impl : public std::enable_shared_from_this std::memcpy(buf->data(), source.data(), source.size()); } - auto tex = create_texture(width, height, stride, false); + auto tex = create_texture(width, height, stride, depth, false); tex->copy_from(*buf); // TODO (perf) save tex on source return tex; @@ -355,32 +357,35 @@ struct device::impl : public std::enable_shared_from_this size_t total_pooled_device_buffer_count = 0; for (size_t i = 0; i < device_pools_.size(); ++i) { - auto& pools = device_pools_.at(i); - bool mipmapping = i > 3; - auto stride = mipmapping ? i - 3 : i + 1; - - for (auto& pool : pools) { - auto width = pool.first >> 16; - auto height = pool.first & 0x0000FFFF; - auto size = width * height * stride; - auto count = pool.second.size(); - - if (count == 0) - continue; - - boost::property_tree::wptree pool_info; - - pool_info.add(L"stride", stride); - pool_info.add(L"mipmapping", mipmapping); - pool_info.add(L"width", width); - pool_info.add(L"height", height); - pool_info.add(L"size", size); - pool_info.add(L"count", count); - - total_pooled_device_buffer_size += size * count; - total_pooled_device_buffer_count += count; - - pooled_device_buffers.add_child(L"device_buffer_pool", pool_info); + auto& depth_pools = device_pools_.at(i); + for (size_t i = 0; i < depth_pools.size(); ++i) { + auto& pools = depth_pools.at(i); + bool mipmapping = i > 3; + auto stride = mipmapping ? i - 3 : i + 1; + + for (auto& pool : pools) { + auto width = pool.first >> 16; + auto height = pool.first & 0x0000FFFF; + auto size = width * height * stride; + auto count = pool.second.size(); + + if (count == 0) + continue; + + boost::property_tree::wptree pool_info; + + pool_info.add(L"stride", stride); + pool_info.add(L"mipmapping", mipmapping); + pool_info.add(L"width", width); + pool_info.add(L"height", height); + pool_info.add(L"size", size); + pool_info.add(L"count", count); + + total_pooled_device_buffer_size += size * count; + total_pooled_device_buffer_count += count; + + pooled_device_buffers.add_child(L"device_buffer_pool", pool_info); + } } } @@ -435,9 +440,11 @@ struct device::impl : public std::enable_shared_from_this CASPAR_LOG(info) << " ogl: Running GC."; try { - for (auto& pools : device_pools_) { - for (auto& pool : pools) - pool.second.clear(); + for (auto& depth_pools : device_pools_) { + for (auto& pools : depth_pools) { + for (auto& pool : pools) + pool.second.clear(); + } } for (auto& pools : host_pools_) { for (auto& pool : pools) @@ -455,15 +462,15 @@ device::device() { } device::~device() {} -std::shared_ptr device::create_texture(int width, int height, int stride) +std::shared_ptr device::create_texture(int width, int height, int stride, common::bit_depth depth) { - return impl_->create_texture(width, height, stride, true); + return impl_->create_texture(width, height, stride, depth, true); } array device::create_array(int size) { return impl_->create_array(size); } std::future> -device::copy_async(const array& source, int width, int height, int stride) +device::copy_async(const array& source, int width, int height, int stride, common::bit_depth depth) { - return impl_->copy_async(source, width, height, stride); + return impl_->copy_async(source, width, height, stride, depth); } std::future> device::copy_async(const std::shared_ptr& source) { diff --git a/src/accelerator/ogl/util/device.h b/src/accelerator/ogl/util/device.h index 49e1a03a90..c9a5bfe604 100644 --- a/src/accelerator/ogl/util/device.h +++ b/src/accelerator/ogl/util/device.h @@ -23,6 +23,7 @@ #include #include +#include #include #include @@ -45,11 +46,16 @@ class device final device& operator=(const device&) = delete; - std::shared_ptr create_texture(int width, int height, int stride); - array create_array(int size); + std::shared_ptr + create_texture(int width, int height, int stride, common::bit_depth depth = common::bit_depth::bit8); + array create_array(int size); std::future> - copy_async(const array& source, int width, int height, int stride); + copy_async(const array& source, + int width, + int height, + int stride, + common::bit_depth depth = common::bit_depth::bit8); // TODO: remove default value std::future> copy_async(const std::shared_ptr& source); std::future> From 1332a46f1708b71536298a2e3e634c67ee1e28b9 Mon Sep 17 00:00:00 2001 From: Niklas Andersson <3985238+niklaspandersson@users.noreply.github.com> Date: Fri, 15 Dec 2023 12:35:13 +0000 Subject: [PATCH 12/50] Add create_frame override to specify bit_depth in frame_factory interface --- src/accelerator/ogl/image/image_mixer.cpp | 12 ++++++++++++ src/accelerator/ogl/image/image_mixer.h | 3 +++ src/core/frame/frame_factory.h | 5 +++++ src/core/mixer/image/image_mixer.h | 4 ++++ 4 files changed, 24 insertions(+) diff --git a/src/accelerator/ogl/image/image_mixer.cpp b/src/accelerator/ogl/image/image_mixer.cpp index 7f5683025a..8601f91aae 100644 --- a/src/accelerator/ogl/image/image_mixer.cpp +++ b/src/accelerator/ogl/image/image_mixer.cpp @@ -28,6 +28,7 @@ #include "frame_converter.h" #include +#include #include #include @@ -318,6 +319,12 @@ struct image_mixer::impl } core::mutable_frame create_frame(const void* tag, const core::pixel_format_desc& desc) override + { + return create_frame(tag, desc, common::bit_depth::bit8); // TODO: replace with channel default + } + + core::mutable_frame + create_frame(const void* tag, const core::pixel_format_desc& desc, common::bit_depth depth) override { std::vector> image_data; for (auto& plane : desc.planes) { @@ -381,6 +388,11 @@ core::mutable_frame image_mixer::create_frame(const void* tag, const core::pixel { return impl_->create_frame(tag, desc); } +core::mutable_frame +image_mixer::create_frame(const void* tag, const core::pixel_format_desc& desc, common::bit_depth depth) +{ + return impl_->create_frame(tag, desc, depth); +} std::shared_ptr image_mixer::create_frame_converter() { return impl_->create_frame_converter(); } }}} // namespace caspar::accelerator::ogl diff --git a/src/accelerator/ogl/image/image_mixer.h b/src/accelerator/ogl/image/image_mixer.h index 19be36225d..679c618599 100644 --- a/src/accelerator/ogl/image/image_mixer.h +++ b/src/accelerator/ogl/image/image_mixer.h @@ -22,6 +22,7 @@ #pragma once #include +#include #include #include @@ -45,6 +46,8 @@ class image_mixer final : public core::image_mixer std::future> operator()(const core::video_format_desc& format_desc) override; core::mutable_frame create_frame(const void* tag, const core::pixel_format_desc& desc) override; + core::mutable_frame + create_frame(const void* video_stream_tag, const core::pixel_format_desc& desc, common::bit_depth depth) override; std::shared_ptr create_frame_converter() override; diff --git a/src/core/frame/frame_factory.h b/src/core/frame/frame_factory.h index 57b660dfb8..ba5586a98e 100644 --- a/src/core/frame/frame_factory.h +++ b/src/core/frame/frame_factory.h @@ -21,6 +21,8 @@ #pragma once +#include + namespace caspar { namespace core { class frame_converter { @@ -48,6 +50,9 @@ class frame_factory virtual class mutable_frame create_frame(const void* video_stream_tag, const struct pixel_format_desc& desc) = 0; + virtual class mutable_frame + create_frame(const void* video_stream_tag, const struct pixel_format_desc& desc, common::bit_depth depth) = 0; + virtual std::shared_ptr create_frame_converter() = 0; }; diff --git a/src/core/mixer/image/image_mixer.h b/src/core/mixer/image/image_mixer.h index dcff0d1b79..621349382b 100644 --- a/src/core/mixer/image/image_mixer.h +++ b/src/core/mixer/image/image_mixer.h @@ -49,6 +49,10 @@ class image_mixer class mutable_frame create_frame(const void* tag, const struct pixel_format_desc& desc) override = 0; + class mutable_frame create_frame(const void* video_stream_tag, + const struct pixel_format_desc& desc, + common::bit_depth depth) override = 0; + std::shared_ptr create_frame_converter() override = 0; }; From 8f871711c942b260eaf1d2824d28cf4bc2f2864d Mon Sep 17 00:00:00 2001 From: Niklas Andersson <3985238+niklaspandersson@users.noreply.github.com> Date: Fri, 15 Dec 2023 13:42:58 +0000 Subject: [PATCH 13/50] add native_depth property to caspar::array --- src/accelerator/ogl/image/image_mixer.cpp | 2 +- src/accelerator/ogl/util/device.cpp | 18 +++++---- src/accelerator/ogl/util/device.h | 8 +--- src/common/array.h | 45 +++++++++++++++-------- 4 files changed, 43 insertions(+), 30 deletions(-) diff --git a/src/accelerator/ogl/image/image_mixer.cpp b/src/accelerator/ogl/image/image_mixer.cpp index 8601f91aae..24cf67cbd6 100644 --- a/src/accelerator/ogl/image/image_mixer.cpp +++ b/src/accelerator/ogl/image/image_mixer.cpp @@ -328,7 +328,7 @@ struct image_mixer::impl { std::vector> image_data; for (auto& plane : desc.planes) { - image_data.push_back(ogl_->create_array(plane.size)); + image_data.push_back(ogl_->create_array(plane.size, depth)); } std::weak_ptr weak_self = shared_from_this(); diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp index d6ae89dd31..711ed323e9 100644 --- a/src/accelerator/ogl/util/device.cpp +++ b/src/accelerator/ogl/util/device.cpp @@ -223,18 +223,20 @@ struct device::impl : public std::enable_shared_from_this }); } - array create_array(int size) + array create_array(int count, common::bit_depth depth) { - auto buf = create_buffer(size, true); - auto ptr = reinterpret_cast(buf->data()); - return array(ptr, buf->size(), buf); + auto bytes_per_pixel = static_cast(depth) + 1; + auto buf = create_buffer(count * bytes_per_pixel, true); + auto ptr = reinterpret_cast(buf->data()); + return array(ptr, buf->size(), buf, depth); } std::future> - copy_async(const array& source, int width, int height, int stride, common::bit_depth depth) + copy_async(const array& source, int width, int height, int stride) { + auto depth = source.native_depth(); return dispatch_async([=] { std::shared_ptr buf; @@ -466,11 +468,11 @@ std::shared_ptr device::create_texture(int width, int height, int strid { return impl_->create_texture(width, height, stride, depth, true); } -array device::create_array(int size) { return impl_->create_array(size); } +array device::create_array(int size, common::bit_depth depth) { return impl_->create_array(size, depth); } std::future> -device::copy_async(const array& source, int width, int height, int stride, common::bit_depth depth) +device::copy_async(const array& source, int width, int height, int stride) { - return impl_->copy_async(source, width, height, stride, depth); + return impl_->copy_async(source, width, height, stride); } std::future> device::copy_async(const std::shared_ptr& source) { diff --git a/src/accelerator/ogl/util/device.h b/src/accelerator/ogl/util/device.h index c9a5bfe604..1900c9f877 100644 --- a/src/accelerator/ogl/util/device.h +++ b/src/accelerator/ogl/util/device.h @@ -48,14 +48,10 @@ class device final std::shared_ptr create_texture(int width, int height, int stride, common::bit_depth depth = common::bit_depth::bit8); - array create_array(int size); + array create_array(int size, common::bit_depth depth); std::future> - copy_async(const array& source, - int width, - int height, - int stride, - common::bit_depth depth = common::bit_depth::bit8); // TODO: remove default value + copy_async(const array& source, int width, int height, int stride); std::future> copy_async(const std::shared_ptr& source); std::future> diff --git a/src/common/array.h b/src/common/array.h index b7a6019b71..97b0d411c2 100644 --- a/src/common/array.h +++ b/src/common/array.h @@ -2,6 +2,8 @@ #include +#include "bit_depth.h" + #include #include #include @@ -41,10 +43,11 @@ class array final } template - explicit array(T* ptr, std::size_t size, S&& storage) + explicit array(T* ptr, std::size_t size, S&& storage, common::bit_depth native_depth = common::bit_depth::bit8) : ptr_(ptr) , size_(size) , storage_(std::make_shared(std::forward(storage))) + , native_depth_(native_depth) { } @@ -54,6 +57,7 @@ class array final : ptr_(other.ptr_) , size_(other.size_) , storage_(std::move(other.storage_)) + , native_depth_(other.native_depth_) { other.ptr_ = nullptr; other.size_ = 0; @@ -63,17 +67,19 @@ class array final array& operator=(array&& other) { - ptr_ = std::move(other.ptr_); - size_ = std::move(other.size_); - storage_ = std::move(other.storage_); + ptr_ = std::move(other.ptr_); + size_ = std::move(other.size_); + storage_ = std::move(other.storage_); + native_depth_ = std::move(other.native_depth_); return *this; } - T* begin() const { return ptr_; } - T* data() const { return ptr_; } - T* end() const { return ptr_ + size_; } - std::size_t size() const { return size_; } + T* begin() const { return ptr_; } + T* data() const { return ptr_; } + T* end() const { return ptr_ + size_; } + std::size_t size() const { return size_; } + common::bit_depth native_depth() const { return native_depth_; } explicit operator bool() const { return size_ > 0; }; @@ -84,8 +90,9 @@ class array final } private: - T* ptr_ = nullptr; - std::size_t size_ = 0; + T* ptr_ = nullptr; + std::size_t size_ = 0; + common::bit_depth native_depth_ = common::bit_depth::bit8; std::shared_ptr storage_; }; @@ -118,10 +125,14 @@ class array final } template - explicit array(const T* ptr, std::size_t size, S&& storage) + explicit array(const T* ptr, + std::size_t size, + S&& storage, + common::bit_depth native_depth = common::bit_depth::bit8) : ptr_(ptr) , size_(size) , storage_(std::make_shared(std::forward(storage))) + , native_depth_(native_depth) { } @@ -129,6 +140,7 @@ class array final : ptr_(other.ptr_) , size_(other.size_) , storage_(other.storage_) + , native_depth_(other.native_depth_) { } @@ -136,6 +148,7 @@ class array final : ptr_(other.ptr_) , size_(other.size_) , storage_(other.storage_) + , native_depth_(other.native_depth_) { other.ptr_ = nullptr; other.size_ = 0; @@ -150,10 +163,11 @@ class array final return *this; } - const T* begin() const { return ptr_; } - const T* data() const { return ptr_; } - const T* end() const { return ptr_ + size_; } - std::size_t size() const { return size_; } + const T* begin() const { return ptr_; } + const T* data() const { return ptr_; } + const T* end() const { return ptr_ + size_; } + std::size_t size() const { return size_; } + common::bit_depth native_depth() const { return native_depth_; } explicit operator bool() const { return size_ > 0; } @@ -167,6 +181,7 @@ class array final const T* ptr_ = nullptr; std::size_t size_ = 0; std::shared_ptr storage_; + common::bit_depth native_depth_ = common::bit_depth::bit8; }; } // namespace caspar From 723419dae089943079d3809e81c4a7154b6ff988 Mon Sep 17 00:00:00 2001 From: Niklas Andersson <3985238+niklaspandersson@users.noreply.github.com> Date: Fri, 15 Dec 2023 13:54:41 +0000 Subject: [PATCH 14/50] add 16bits support to image_mixer --- src/accelerator/accelerator.cpp | 11 +++---- src/accelerator/accelerator.h | 4 ++- src/accelerator/ogl/image/image_mixer.cpp | 36 +++++++++++++---------- src/accelerator/ogl/image/image_mixer.h | 2 +- src/accelerator/ogl/util/device.h | 5 ++-- src/shell/server.cpp | 3 +- 6 files changed, 34 insertions(+), 27 deletions(-) diff --git a/src/accelerator/accelerator.cpp b/src/accelerator/accelerator.cpp index 5668553ac6..fb1017091d 100644 --- a/src/accelerator/accelerator.cpp +++ b/src/accelerator/accelerator.cpp @@ -5,6 +5,8 @@ #include +#include + #include #include @@ -23,10 +25,9 @@ struct accelerator::impl { } - std::unique_ptr create_image_mixer(const int channel_id) + std::unique_ptr create_image_mixer(int channel_id, common::bit_depth depth) { - return std::make_unique( - spl::make_shared_ptr(get_device()), channel_id, format_repository_.get_max_video_format_size()); + return std::make_unique(spl::make_shared_ptr(get_device()), channel_id, depth, format_repository_.get_max_video_format_size()); } std::shared_ptr get_device() @@ -46,9 +47,9 @@ accelerator::accelerator(const core::video_format_repository format_repository) accelerator::~accelerator() {} -std::unique_ptr accelerator::create_image_mixer(const int channel_id) +std::unique_ptr accelerator::create_image_mixer(const int channel_id, common::bit_depth depth) { - return impl_->create_image_mixer(channel_id); + return impl_->create_image_mixer(channel_id, depth); } std::shared_ptr accelerator::get_device() const diff --git a/src/accelerator/accelerator.h b/src/accelerator/accelerator.h index 5bd67a5f55..f7419d7f99 100644 --- a/src/accelerator/accelerator.h +++ b/src/accelerator/accelerator.h @@ -1,5 +1,7 @@ #pragma once +#include + #include #include @@ -27,7 +29,7 @@ class accelerator accelerator& operator=(accelerator&) = delete; - std::unique_ptr create_image_mixer(int channel_id); + std::unique_ptr create_image_mixer(int channel_id, common::bit_depth depth); std::shared_ptr get_device() const; diff --git a/src/accelerator/ogl/image/image_mixer.cpp b/src/accelerator/ogl/image/image_mixer.cpp index 24cf67cbd6..7cf1c2ad88 100644 --- a/src/accelerator/ogl/image/image_mixer.cpp +++ b/src/accelerator/ogl/image/image_mixer.cpp @@ -74,11 +74,13 @@ class image_renderer spl::shared_ptr ogl_; image_kernel kernel_; const size_t max_frame_size_; + common::bit_depth depth_; public: - explicit image_renderer(const spl::shared_ptr& ogl, const size_t max_frame_size) + explicit image_renderer(const spl::shared_ptr& ogl, common::bit_depth depth, const size_t max_frame_size) : ogl_(ogl) , kernel_(ogl_) + , depth_(depth) , max_frame_size_(max_frame_size) { } @@ -87,12 +89,12 @@ class image_renderer const core::video_format_desc& format_desc) { if (layers.empty()) { // Bypass GPU with empty frame. - static const std::vector buffer(max_frame_size_, 0); - return make_ready_future(array(buffer.data(), format_desc.size, true)); + static const std::vector buffer(max_frame_size * 2, 0); // TODO better + return make_ready_future(array(buffer.data(), format_desc.size, true, depth_)); } return flatten(ogl_->dispatch_async([=]() mutable -> std::shared_future> { - auto target_texture = ogl_->create_texture(format_desc.width, format_desc.height, 4); + auto target_texture = ogl_->create_texture(format_desc.width, format_desc.height, 4, depth_); draw(target_texture, std::move(layers), format_desc); @@ -125,7 +127,7 @@ class image_renderer std::shared_ptr local_mix_texture; if (layer.blend_mode != core::blend_mode::normal) { - auto layer_texture = ogl_->create_texture(target_texture->width(), target_texture->height(), 4); + auto layer_texture = ogl_->create_texture(target_texture->width(), target_texture->height(), 4, depth_); for (auto& item : layer.items) draw(layer_texture, @@ -172,9 +174,9 @@ class image_renderer } if (item.transform.is_key) { - local_key_texture = local_key_texture - ? local_key_texture - : ogl_->create_texture(target_texture->width(), target_texture->height(), 1); + local_key_texture = + local_key_texture ? local_key_texture + : ogl_->create_texture(target_texture->width(), target_texture->height(), 1, depth_); draw_params.background = local_key_texture; draw_params.local_key = nullptr; @@ -182,9 +184,9 @@ class image_renderer kernel_.draw(std::move(draw_params)); } else if (item.transform.is_mix) { - local_mix_texture = local_mix_texture - ? local_mix_texture - : ogl_->create_texture(target_texture->width(), target_texture->height(), 4); + local_mix_texture = + local_mix_texture ? local_mix_texture + : ogl_->create_texture(target_texture->width(), target_texture->height(), 4, depth_); draw_params.background = local_mix_texture; draw_params.local_key = std::move(local_key_texture); @@ -234,12 +236,14 @@ struct image_mixer::impl std::vector transform_stack_; std::vector layers_; // layer/stream/items std::vector layer_stack_; + common::bit_depth depth_; public: - impl(const spl::shared_ptr& ogl, const int channel_id, const size_t max_frame_size) + impl(const spl::shared_ptr& ogl, int channel_id, common::bit_depth depth, const size_t max_frame_size) : ogl_(ogl) - , renderer_(ogl, max_frame_size) + , renderer_(ogl, depth, max_frame_size) , transform_stack_(1) + , depth_(depth) { CASPAR_LOG(info) << L"Initialized OpenGL Accelerated GPU Image Mixer for channel " << channel_id; } @@ -320,7 +324,7 @@ struct image_mixer::impl core::mutable_frame create_frame(const void* tag, const core::pixel_format_desc& desc) override { - return create_frame(tag, desc, common::bit_depth::bit8); // TODO: replace with channel default + return create_frame(tag, desc, depth_); } core::mutable_frame @@ -372,8 +376,8 @@ struct image_mixer::impl } }; -image_mixer::image_mixer(const spl::shared_ptr& ogl, const int channel_id, const size_t max_frame_size) - : impl_(std::make_unique(ogl, channel_id, max_frame_size)) +image_mixer::image_mixer(const spl::shared_ptr& ogl, int channel_id, common::bit_depth depth, const size_t max_frame_size) + : impl_(std::make_unique(ogl, channel_id, depth,max_frame_size)) { } image_mixer::~image_mixer() {} diff --git a/src/accelerator/ogl/image/image_mixer.h b/src/accelerator/ogl/image/image_mixer.h index 679c618599..d159f50183 100644 --- a/src/accelerator/ogl/image/image_mixer.h +++ b/src/accelerator/ogl/image/image_mixer.h @@ -37,7 +37,7 @@ namespace caspar { namespace accelerator { namespace ogl { class image_mixer final : public core::image_mixer { public: - image_mixer(const spl::shared_ptr& ogl, int channel_id, const size_t max_frame_size); + image_mixer(const spl::shared_ptr& ogl, int channel_id, common::bit_depth depth, const size_t max_frame_size); image_mixer(const image_mixer&) = delete; ~image_mixer(); diff --git a/src/accelerator/ogl/util/device.h b/src/accelerator/ogl/util/device.h index 1900c9f877..91e470e370 100644 --- a/src/accelerator/ogl/util/device.h +++ b/src/accelerator/ogl/util/device.h @@ -46,9 +46,8 @@ class device final device& operator=(const device&) = delete; - std::shared_ptr - create_texture(int width, int height, int stride, common::bit_depth depth = common::bit_depth::bit8); - array create_array(int size, common::bit_depth depth); + std::shared_ptr create_texture(int width, int height, int stride, common::bit_depth depth); + array create_array(int size, common::bit_depth depth); std::future> copy_async(const array& source, int width, int height, int stride); diff --git a/src/shell/server.cpp b/src/shell/server.cpp index 09ea2d6f5e..81758999f8 100644 --- a/src/shell/server.cpp +++ b/src/shell/server.cpp @@ -24,6 +24,7 @@ #include +#include #include #include #include @@ -263,7 +264,7 @@ struct server::impl auto channel = spl::make_shared(channel_id, format_desc, - accelerator_.create_image_mixer(channel_id), + accelerator_.create_image_mixer(channel_id, common::bit_depth::bit8), [channel_id, weak_client](core::monitor::state channel_state) { monitor::state state; state[""]["channel"][channel_id] = channel_state; From b96d98e533441c9e1cf284c6836d9a0ceec2bbf4 Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Fri, 22 Dec 2023 17:00:36 +0000 Subject: [PATCH 15/50] wip: correct colour --- src/accelerator/ogl/image/frame_converter.cpp | 2 +- src/accelerator/ogl/image/image_mixer.cpp | 15 ++--------- src/accelerator/ogl/image/image_mixer.h | 2 -- src/accelerator/ogl/image/shader.frag | 22 +++++++++++++--- src/accelerator/ogl/util/device.cpp | 4 +-- src/core/frame/frame_factory.h | 3 --- src/core/frame/pixel_format.h | 18 +++++++++---- src/core/mixer/image/image_mixer.h | 4 --- src/modules/ffmpeg/util/av_util.cpp | 26 ++++++------------- 9 files changed, 45 insertions(+), 51 deletions(-) diff --git a/src/accelerator/ogl/image/frame_converter.cpp b/src/accelerator/ogl/image/frame_converter.cpp index 4abc7bb6ba..1baf619413 100644 --- a/src/accelerator/ogl/image/frame_converter.cpp +++ b/src/accelerator/ogl/image/frame_converter.cpp @@ -32,7 +32,7 @@ core::mutable_frame ogl_frame_converter::create_frame(const void* tag, const cor std::vector> image_data; for (auto& plane : desc.planes) { - image_data.push_back(ogl_->create_array(plane.size)); + image_data.push_back(ogl_->create_array(plane.size, common::bit_depth::bit16)); // TODO: Depth } using future_texture = std::shared_future>; diff --git a/src/accelerator/ogl/image/image_mixer.cpp b/src/accelerator/ogl/image/image_mixer.cpp index 7cf1c2ad88..cd4ab31428 100644 --- a/src/accelerator/ogl/image/image_mixer.cpp +++ b/src/accelerator/ogl/image/image_mixer.cpp @@ -89,7 +89,7 @@ class image_renderer const core::video_format_desc& format_desc) { if (layers.empty()) { // Bypass GPU with empty frame. - static const std::vector buffer(max_frame_size * 2, 0); // TODO better + static const std::vector buffer(max_frame_size_ * 2, 0); // TODO better return make_ready_future(array(buffer.data(), format_desc.size, true, depth_)); } @@ -323,16 +323,10 @@ struct image_mixer::impl } core::mutable_frame create_frame(const void* tag, const core::pixel_format_desc& desc) override - { - return create_frame(tag, desc, depth_); - } - - core::mutable_frame - create_frame(const void* tag, const core::pixel_format_desc& desc, common::bit_depth depth) override { std::vector> image_data; for (auto& plane : desc.planes) { - image_data.push_back(ogl_->create_array(plane.size, depth)); + image_data.push_back(ogl_->create_array(plane.size, plane.depth)); } std::weak_ptr weak_self = shared_from_this(); @@ -392,11 +386,6 @@ core::mutable_frame image_mixer::create_frame(const void* tag, const core::pixel { return impl_->create_frame(tag, desc); } -core::mutable_frame -image_mixer::create_frame(const void* tag, const core::pixel_format_desc& desc, common::bit_depth depth) -{ - return impl_->create_frame(tag, desc, depth); -} std::shared_ptr image_mixer::create_frame_converter() { return impl_->create_frame_converter(); } }}} // namespace caspar::accelerator::ogl diff --git a/src/accelerator/ogl/image/image_mixer.h b/src/accelerator/ogl/image/image_mixer.h index d159f50183..12b954d713 100644 --- a/src/accelerator/ogl/image/image_mixer.h +++ b/src/accelerator/ogl/image/image_mixer.h @@ -46,8 +46,6 @@ class image_mixer final : public core::image_mixer std::future> operator()(const core::video_format_desc& format_desc) override; core::mutable_frame create_frame(const void* tag, const core::pixel_format_desc& desc) override; - core::mutable_frame - create_frame(const void* video_stream_tag, const core::pixel_format_desc& desc, common::bit_depth depth) override; std::shared_ptr create_frame_converter() override; diff --git a/src/accelerator/ogl/image/shader.frag b/src/accelerator/ogl/image/shader.frag index baeaaae34c..2d8d4a6dc1 100644 --- a/src/accelerator/ogl/image/shader.frag +++ b/src/accelerator/ogl/image/shader.frag @@ -486,13 +486,21 @@ vec4 get_rgba_color() case 4: //abgr, return get_sample(plane[0], TexCoord.st / TexCoord.q).gbar; case 5: //ycbcr, - case 11: //ycbcr10_420 - case 12: //ycbcr10_422 - case 13: //ycbcr10_444 + case 11: //ycbcr10_420 + case 12: //ycbcr10_422 + case 13: //ycbcr10_444 { float y = get_sample(plane[0], TexCoord.st / TexCoord.q).r; float cb = get_sample(plane[1], TexCoord.st / TexCoord.q).r; float cr = get_sample(plane[2], TexCoord.st / TexCoord.q).r; + + if (pixel_format >= 10){ + // unpack 16bit to 10bit + y *= 64; + cb *= 64; + cr *= 64; + } + return ycbcra_to_rgba(y, cb, cr, 1.0); } case 6: //ycbcra @@ -504,6 +512,14 @@ vec4 get_rgba_color() float cb = get_sample(plane[1], TexCoord.st / TexCoord.q).r; float cr = get_sample(plane[2], TexCoord.st / TexCoord.q).r; float a = get_sample(plane[3], TexCoord.st / TexCoord.q).r; + + if (pixel_format >= 10){ + // unpack 16bit to 10bit + y *= 64; + cb *= 64; + cr *= 64; + } + return ycbcra_to_rgba(y, cb, cr, a); } case 7: //luma diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp index 711ed323e9..68f0bed0a1 100644 --- a/src/accelerator/ogl/util/device.cpp +++ b/src/accelerator/ogl/util/device.cpp @@ -183,7 +183,7 @@ struct device::impl : public std::enable_shared_from_this std::shared_ptr create_texture(int width, int height, int stride, common::bit_depth depth, bool clear) { - CASPAR_VERIFY(stride > 0 && stride < 7); + CASPAR_VERIFY(stride > 0 && stride < 5); CASPAR_VERIFY(width > 0 && height > 0); // TODO (perf) Shared pool. @@ -304,7 +304,7 @@ struct device::impl : public std::enable_shared_from_this if (!compute_shader_) compute_shader_ = std::make_unique(std::string(compute_to_rgba_shader)); - auto tex = create_texture(width, height, 5, false); + auto tex = create_texture(width, height, 4, common::bit_depth::bit16, false); glBindImageTexture(0, tex->id(), 0, GL_FALSE, 0, GL_READ_WRITE, GL_RGBA32F); diff --git a/src/core/frame/frame_factory.h b/src/core/frame/frame_factory.h index ba5586a98e..ff4403a854 100644 --- a/src/core/frame/frame_factory.h +++ b/src/core/frame/frame_factory.h @@ -50,9 +50,6 @@ class frame_factory virtual class mutable_frame create_frame(const void* video_stream_tag, const struct pixel_format_desc& desc) = 0; - virtual class mutable_frame - create_frame(const void* video_stream_tag, const struct pixel_format_desc& desc, common::bit_depth depth) = 0; - virtual std::shared_ptr create_frame_converter() = 0; }; diff --git a/src/core/frame/pixel_format.h b/src/core/frame/pixel_format.h index 95ac4619c6..32ffe30df9 100644 --- a/src/core/frame/pixel_format.h +++ b/src/core/frame/pixel_format.h @@ -21,6 +21,7 @@ #pragma once +#include #include namespace caspar { namespace core { @@ -57,15 +58,22 @@ struct pixel_format_desc final int height = 0; int size = 0; int stride = 0; + common::bit_depth depth = common::bit_depth::bit8; plane() = default; plane(int width, int height, int stride) - : linesize(width * stride) - , width(width) - , height(height) - , size(width * height * stride) - , stride(stride) + : plane(width, height, stride, common::bit_depth::bit8) + { + } + + plane(int width, int height, int stride, common::bit_depth depth) + : linesize(width * stride * (static_cast(depth) + 1)) + , width(width) + , height(height) + , size(width * height * stride) + , stride(stride) + , depth(depth) { } }; diff --git a/src/core/mixer/image/image_mixer.h b/src/core/mixer/image/image_mixer.h index 621349382b..dcff0d1b79 100644 --- a/src/core/mixer/image/image_mixer.h +++ b/src/core/mixer/image/image_mixer.h @@ -49,10 +49,6 @@ class image_mixer class mutable_frame create_frame(const void* tag, const struct pixel_format_desc& desc) override = 0; - class mutable_frame create_frame(const void* video_stream_tag, - const struct pixel_format_desc& desc, - common::bit_depth depth) override = 0; - std::shared_ptr create_frame_converter() override = 0; }; diff --git a/src/modules/ffmpeg/util/av_util.cpp b/src/modules/ffmpeg/util/av_util.cpp index 4e0a996e03..b97e131aed 100644 --- a/src/modules/ffmpeg/util/av_util.cpp +++ b/src/modules/ffmpeg/util/av_util.cpp @@ -216,30 +216,20 @@ core::pixel_format_desc pixel_format_desc(AVPixelFormat pix_fmt, int width, int case core::pixel_format::ycbcra10_420: case core::pixel_format::ycbcra10_422: case core::pixel_format::ycbcra10_444: { - // Find chroma height - // av_image_fill_plane_sizes is not available until ffmpeg 4.4, but we still need to support ffmpeg 4.2, so - // we fall back to calling av_image_fill_pointers with a NULL image buffer. We can't unconditionally use - // av_image_fill_pointers because it will not accept a NULL buffer on ffmpeg >= 5.0. -#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(56, 56, 100) size_t sizes[4]; ptrdiff_t linesizes1[4]; for (int i = 0; i < 4; i++) linesizes1[i] = linesizes[i]; av_image_fill_plane_sizes(sizes, pix_fmt, height, linesizes1); - auto size2 = static_cast(sizes[1]); -#else - uint8_t* dummy_pict_data[4]; - av_image_fill_pointers(dummy_pict_data, pix_fmt, height, NULL, linesizes); - auto size2 = static_cast(dummy_pict_data[2] - dummy_pict_data[1]); -#endif - auto h2 = size2 / linesizes[1]; - desc.planes.push_back(core::pixel_format_desc::plane(linesizes[0] / 2, height, 6)); -// desc.planes.push_back(core::pixel_format_desc::plane(linesizes[1] / 2, h2, 6)); -// desc.planes.push_back(core::pixel_format_desc::plane(linesizes[2] / 2, h2, 6)); -// -// if (desc.format == core::pixel_format::ycbcra10_420||desc.format == core::pixel_format::ycbcra10_422||desc.format == core::pixel_format::ycbcra10_444) -// desc.planes.push_back(core::pixel_format_desc::plane(linesizes[3] / 2, height, 6)); + auto h2 = static_cast(sizes[1]) / linesizes[1]; + + desc.planes.emplace_back(linesizes[0] / 2, height, 1, common::bit_depth::bit16); + desc.planes.emplace_back(linesizes[1] / 2, h2, 1, common::bit_depth::bit16); + desc.planes.emplace_back(linesizes[2] / 2, h2, 1, common::bit_depth::bit16); + + if (desc.format == core::pixel_format::ycbcra10_420||desc.format == core::pixel_format::ycbcra10_422||desc.format == core::pixel_format::ycbcra10_444) + desc.planes.emplace_back(linesizes[3] / 2, height, 1, common::bit_depth::bit16); return desc; } From 79cde825eedc44f3990757c1d457d3729658f6a2 Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Fri, 22 Dec 2023 17:02:25 +0000 Subject: [PATCH 16/50] simplify --- src/accelerator/ogl/image/shader.frag | 12 ++++-------- src/core/frame/pixel_format.h | 8 ++------ src/modules/ffmpeg/util/av_util.cpp | 18 +++++------------- 3 files changed, 11 insertions(+), 27 deletions(-) diff --git a/src/accelerator/ogl/image/shader.frag b/src/accelerator/ogl/image/shader.frag index 2d8d4a6dc1..89776e85ca 100644 --- a/src/accelerator/ogl/image/shader.frag +++ b/src/accelerator/ogl/image/shader.frag @@ -486,15 +486,13 @@ vec4 get_rgba_color() case 4: //abgr, return get_sample(plane[0], TexCoord.st / TexCoord.q).gbar; case 5: //ycbcr, - case 11: //ycbcr10_420 - case 12: //ycbcr10_422 - case 13: //ycbcr10_444 + case 11: //ycbcr10 { float y = get_sample(plane[0], TexCoord.st / TexCoord.q).r; float cb = get_sample(plane[1], TexCoord.st / TexCoord.q).r; float cr = get_sample(plane[2], TexCoord.st / TexCoord.q).r; - if (pixel_format >= 10){ + if (pixel_format == 11){ //ycbcr10 // unpack 16bit to 10bit y *= 64; cb *= 64; @@ -504,16 +502,14 @@ vec4 get_rgba_color() return ycbcra_to_rgba(y, cb, cr, 1.0); } case 6: //ycbcra - case 14: //ycbcra10_420 - case 15: //ycbcra10_422 - case 16: //ycbcra10_444 + case 12: //ycbcra10 { float y = get_sample(plane[0], TexCoord.st / TexCoord.q).r; float cb = get_sample(plane[1], TexCoord.st / TexCoord.q).r; float cr = get_sample(plane[2], TexCoord.st / TexCoord.q).r; float a = get_sample(plane[3], TexCoord.st / TexCoord.q).r; - if (pixel_format >= 10){ + if (pixel_format == 12){ //ycbcra10 // unpack 16bit to 10bit y *= 64; cb *= 64; diff --git a/src/core/frame/pixel_format.h b/src/core/frame/pixel_format.h index 32ffe30df9..b03569bedf 100644 --- a/src/core/frame/pixel_format.h +++ b/src/core/frame/pixel_format.h @@ -39,12 +39,8 @@ enum class pixel_format bgr, rgb, uyvy, - ycbcr10_420, - ycbcr10_422, - ycbcr10_444, - ycbcra10_420, - ycbcra10_422, - ycbcra10_444, + ycbcr10, + ycbcra10, count, invalid, }; diff --git a/src/modules/ffmpeg/util/av_util.cpp b/src/modules/ffmpeg/util/av_util.cpp index b97e131aed..dd64f60551 100644 --- a/src/modules/ffmpeg/util/av_util.cpp +++ b/src/modules/ffmpeg/util/av_util.cpp @@ -139,17 +139,13 @@ core::pixel_format get_pixel_format(AVPixelFormat pix_fmt) case AV_PIX_FMT_UYVY422: return core::pixel_format::uyvy; case AV_PIX_FMT_YUV444P10LE: - return core::pixel_format::ycbcr10_444; case AV_PIX_FMT_YUV422P10LE: - return core::pixel_format::ycbcr10_422; case AV_PIX_FMT_YUV420P10LE: - return core::pixel_format::ycbcr10_420; + return core::pixel_format::ycbcr10; case AV_PIX_FMT_YUVA444P10LE: - return core::pixel_format::ycbcra10_444; case AV_PIX_FMT_YUVA422P10LE: - return core::pixel_format::ycbcra10_422; case AV_PIX_FMT_YUVA420P10LE: - return core::pixel_format::ycbcra10_420; + return core::pixel_format::ycbcra10; default: return core::pixel_format::invalid; } @@ -210,12 +206,8 @@ core::pixel_format_desc pixel_format_desc(AVPixelFormat pix_fmt, int width, int return desc; } - case core::pixel_format::ycbcr10_420: - case core::pixel_format::ycbcr10_422: - case core::pixel_format::ycbcr10_444: - case core::pixel_format::ycbcra10_420: - case core::pixel_format::ycbcra10_422: - case core::pixel_format::ycbcra10_444: { + case core::pixel_format::ycbcr10: + case core::pixel_format::ycbcra10: { size_t sizes[4]; ptrdiff_t linesizes1[4]; for (int i = 0; i < 4; i++) @@ -228,7 +220,7 @@ core::pixel_format_desc pixel_format_desc(AVPixelFormat pix_fmt, int width, int desc.planes.emplace_back(linesizes[1] / 2, h2, 1, common::bit_depth::bit16); desc.planes.emplace_back(linesizes[2] / 2, h2, 1, common::bit_depth::bit16); - if (desc.format == core::pixel_format::ycbcra10_420||desc.format == core::pixel_format::ycbcra10_422||desc.format == core::pixel_format::ycbcra10_444) + if (desc.format == core::pixel_format::ycbcra10) desc.planes.emplace_back(linesizes[3] / 2, height, 1, common::bit_depth::bit16); return desc; From 8e934cd9ee3f7daf8bc5e4eaa2b024f14ea48bb5 Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Fri, 22 Dec 2023 17:22:44 +0000 Subject: [PATCH 17/50] add 16bit yuv, untested --- src/accelerator/ogl/image/shader.frag | 11 ++--------- src/core/frame/pixel_format.h | 2 ++ src/modules/ffmpeg/producer/av_producer.cpp | 6 ++++++ src/modules/ffmpeg/util/av_util.cpp | 12 +++++++++++- 4 files changed, 21 insertions(+), 10 deletions(-) diff --git a/src/accelerator/ogl/image/shader.frag b/src/accelerator/ogl/image/shader.frag index 89776e85ca..f8d3356c6c 100644 --- a/src/accelerator/ogl/image/shader.frag +++ b/src/accelerator/ogl/image/shader.frag @@ -487,6 +487,7 @@ vec4 get_rgba_color() return get_sample(plane[0], TexCoord.st / TexCoord.q).gbar; case 5: //ycbcr, case 11: //ycbcr10 + case 13: //ycbcr16 { float y = get_sample(plane[0], TexCoord.st / TexCoord.q).r; float cb = get_sample(plane[1], TexCoord.st / TexCoord.q).r; @@ -503,6 +504,7 @@ vec4 get_rgba_color() } case 6: //ycbcra case 12: //ycbcra10 + case 14: //ycbcra16 { float y = get_sample(plane[0], TexCoord.st / TexCoord.q).r; float cb = get_sample(plane[1], TexCoord.st / TexCoord.q).r; @@ -534,15 +536,6 @@ vec4 get_rgba_color() float cr = get_sample(plane[1], TexCoord.st / TexCoord.q).r; return ycbcra_to_rgba(y, cb, cr, 1.0); } - // formats converted from packed formats -// case 11: //ycbcr10_420 -// case 12: //ycbcr10_422 -// case 13: //ycbcr10_444 -// return vec4(get_sample(plane[0], TexCoord.st / TexCoord.q).rgb, 1.0); -// case 14: //ycbcra10_420 -// case 15: //ycbcra10_422 -// case 16: //ycbcra10_444 -// return get_sample(plane[0], TexCoord.st / TexCoord.q).rgba; } return vec4(0.0, 0.0, 0.0, 0.0); } diff --git a/src/core/frame/pixel_format.h b/src/core/frame/pixel_format.h index b03569bedf..d24fbdfeb4 100644 --- a/src/core/frame/pixel_format.h +++ b/src/core/frame/pixel_format.h @@ -41,6 +41,8 @@ enum class pixel_format uyvy, ycbcr10, ycbcra10, + ycbcr16, + ycbcra16, count, invalid, }; diff --git a/src/modules/ffmpeg/producer/av_producer.cpp b/src/modules/ffmpeg/producer/av_producer.cpp index 749d1aa56b..ce52ff8a11 100644 --- a/src/modules/ffmpeg/producer/av_producer.cpp +++ b/src/modules/ffmpeg/producer/av_producer.cpp @@ -512,10 +512,16 @@ struct Filter AV_PIX_FMT_YUV422P10LE, AV_PIX_FMT_YUV420P10LE, // AV_PIX_FMT_YUV410P10LE, + AV_PIX_FMT_YUV444P16LE, + AV_PIX_FMT_YUV422P16LE, + AV_PIX_FMT_YUV420P16LE, AV_PIX_FMT_YUVA444P10LE, AV_PIX_FMT_YUVA422P10LE, AV_PIX_FMT_YUVA420P10LE, // AV_PIX_FMT_UYVY42210LE, + AV_PIX_FMT_YUVA444P16LE, + AV_PIX_FMT_YUVA422P16LE, + AV_PIX_FMT_YUVA420P16LE, AV_PIX_FMT_NONE}; FF(av_opt_set_int_list(sink, "pix_fmts", pix_fmts, -1, AV_OPT_SEARCH_CHILDREN)); #ifdef _MSC_VER diff --git a/src/modules/ffmpeg/util/av_util.cpp b/src/modules/ffmpeg/util/av_util.cpp index dd64f60551..bdba15e853 100644 --- a/src/modules/ffmpeg/util/av_util.cpp +++ b/src/modules/ffmpeg/util/av_util.cpp @@ -142,10 +142,18 @@ core::pixel_format get_pixel_format(AVPixelFormat pix_fmt) case AV_PIX_FMT_YUV422P10LE: case AV_PIX_FMT_YUV420P10LE: return core::pixel_format::ycbcr10; + case AV_PIX_FMT_YUV444P16LE: + case AV_PIX_FMT_YUV422P16LE: + case AV_PIX_FMT_YUV420P16LE: + return core::pixel_format::ycbcr16; case AV_PIX_FMT_YUVA444P10LE: case AV_PIX_FMT_YUVA422P10LE: case AV_PIX_FMT_YUVA420P10LE: return core::pixel_format::ycbcra10; + case AV_PIX_FMT_YUVA444P16LE: + case AV_PIX_FMT_YUVA422P16LE: + case AV_PIX_FMT_YUVA420P16LE: + return core::pixel_format::ycbcra16; default: return core::pixel_format::invalid; } @@ -207,7 +215,9 @@ core::pixel_format_desc pixel_format_desc(AVPixelFormat pix_fmt, int width, int return desc; } case core::pixel_format::ycbcr10: - case core::pixel_format::ycbcra10: { + case core::pixel_format::ycbcra10: + case core::pixel_format::ycbcr16: + case core::pixel_format::ycbcra16: { size_t sizes[4]; ptrdiff_t linesizes1[4]; for (int i = 0; i < 4; i++) From 0c69bb809955d2e39f1b07e0d1b65edd43373b83 Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Fri, 22 Dec 2023 17:42:12 +0000 Subject: [PATCH 18/50] wip: propogate frame_converter type to consumers --- src/accelerator/ogl/image/image_mixer.cpp | 6 +++--- src/accelerator/ogl/image/image_mixer.h | 2 +- src/core/consumer/frame_consumer.cpp | 6 ++++-- src/core/consumer/frame_consumer.h | 4 ++++ src/core/frame/frame_factory.h | 2 +- src/core/fwd.h | 1 + src/core/mixer/image/image_mixer.h | 2 +- src/core/video_channel.cpp | 4 ++++ src/core/video_channel.h | 1 + src/modules/artnet/consumer/artnet_consumer.cpp | 1 + src/modules/artnet/consumer/artnet_consumer.h | 1 + src/modules/bluefish/consumer/bluefish_consumer.cpp | 2 ++ src/modules/bluefish/consumer/bluefish_consumer.h | 2 ++ src/modules/decklink/consumer/decklink_consumer.cpp | 2 ++ src/modules/decklink/consumer/decklink_consumer.h | 2 ++ src/modules/ffmpeg/consumer/ffmpeg_consumer.cpp | 2 ++ src/modules/ffmpeg/consumer/ffmpeg_consumer.h | 2 ++ src/modules/image/consumer/image_consumer.cpp | 1 + src/modules/image/consumer/image_consumer.h | 1 + src/modules/newtek/consumer/newtek_ndi_consumer.cpp | 2 ++ src/modules/newtek/consumer/newtek_ndi_consumer.h | 2 ++ src/modules/oal/consumer/oal_consumer.cpp | 2 ++ src/modules/oal/consumer/oal_consumer.h | 2 ++ src/modules/screen/consumer/screen_consumer.cpp | 2 ++ src/modules/screen/consumer/screen_consumer.h | 2 ++ src/protocol/amcp/AMCPCommandsImpl.cpp | 8 ++++---- src/shell/server.cpp | 2 +- 27 files changed, 53 insertions(+), 13 deletions(-) diff --git a/src/accelerator/ogl/image/image_mixer.cpp b/src/accelerator/ogl/image/image_mixer.cpp index cd4ab31428..35324356c8 100644 --- a/src/accelerator/ogl/image/image_mixer.cpp +++ b/src/accelerator/ogl/image/image_mixer.cpp @@ -364,9 +364,9 @@ struct image_mixer::impl }); } - std::shared_ptr create_frame_converter() override + spl::shared_ptr create_frame_converter() override { - return std::make_shared(ogl_); + return spl::make_shared(ogl_); } }; @@ -386,6 +386,6 @@ core::mutable_frame image_mixer::create_frame(const void* tag, const core::pixel { return impl_->create_frame(tag, desc); } -std::shared_ptr image_mixer::create_frame_converter() { return impl_->create_frame_converter(); } +spl::shared_ptr image_mixer::create_frame_converter() { return impl_->create_frame_converter(); } }}} // namespace caspar::accelerator::ogl diff --git a/src/accelerator/ogl/image/image_mixer.h b/src/accelerator/ogl/image/image_mixer.h index 12b954d713..a29873e352 100644 --- a/src/accelerator/ogl/image/image_mixer.h +++ b/src/accelerator/ogl/image/image_mixer.h @@ -47,7 +47,7 @@ class image_mixer final : public core::image_mixer std::future> operator()(const core::video_format_desc& format_desc) override; core::mutable_frame create_frame(const void* tag, const core::pixel_format_desc& desc) override; - std::shared_ptr create_frame_converter() override; + spl::shared_ptr create_frame_converter() override; // core::image_mixer diff --git a/src/core/consumer/frame_consumer.cpp b/src/core/consumer/frame_consumer.cpp index 1c410edc37..ae83c2fcd4 100644 --- a/src/core/consumer/frame_consumer.cpp +++ b/src/core/consumer/frame_consumer.cpp @@ -163,6 +163,7 @@ class print_consumer_proxy : public frame_consumer spl::shared_ptr frame_consumer_registry::create_consumer(const std::vector& params, const core::video_format_repository& format_repository, + const spl::shared_ptr& frame_converter, const std::vector>& channels) const { if (params.empty()) @@ -173,7 +174,7 @@ frame_consumer_registry::create_consumer(const std::vector& if (!std::any_of( consumer_factories.begin(), consumer_factories.end(), [&](const consumer_factory_t& factory) -> bool { try { - consumer = factory(params, format_repository, channels); + consumer = factory(params, format_repository,frame_converter, channels); } catch (...) { CASPAR_LOG_CURRENT_EXCEPTION(); } @@ -189,6 +190,7 @@ spl::shared_ptr frame_consumer_registry::create_consumer(const std::wstring& element_name, const boost::property_tree::wptree& element, const core::video_format_repository& format_repository, + const spl::shared_ptr& frame_converter, const std::vector>& channels) const { auto& preconfigured_consumer_factories = impl_->preconfigured_consumer_factories; @@ -199,7 +201,7 @@ frame_consumer_registry::create_consumer(const std::wstring& << msg_info(L"No consumer factory registered for element name " + element_name)); return spl::make_shared( - spl::make_shared(found->second(element, format_repository, channels))); + spl::make_shared(found->second(element, format_repository,frame_converter, channels))); } const spl::shared_ptr& frame_consumer::empty() diff --git a/src/core/consumer/frame_consumer.h b/src/core/consumer/frame_consumer.h index 55f8dbd7e1..5bff60b789 100644 --- a/src/core/consumer/frame_consumer.h +++ b/src/core/consumer/frame_consumer.h @@ -62,10 +62,12 @@ class frame_consumer using consumer_factory_t = std::function(const std::vector& params, const core::video_format_repository& format_repository, + const spl::shared_ptr& frame_converter, const std::vector>& channels)>; using preconfigured_consumer_factory_t = std::function(const boost::property_tree::wptree& element, const core::video_format_repository& format_repository, + const spl::shared_ptr& frame_converter, const std::vector>& channels)>; class frame_consumer_registry @@ -78,11 +80,13 @@ class frame_consumer_registry spl::shared_ptr create_consumer(const std::vector& params, const core::video_format_repository& format_repository, + const spl::shared_ptr& frame_converter, const std::vector>& channels) const; spl::shared_ptr create_consumer(const std::wstring& element_name, const boost::property_tree::wptree& element, const core::video_format_repository& format_repository, + const spl::shared_ptr& frame_converter, const std::vector>& channels) const; private: diff --git a/src/core/frame/frame_factory.h b/src/core/frame/frame_factory.h index ff4403a854..099be0a832 100644 --- a/src/core/frame/frame_factory.h +++ b/src/core/frame/frame_factory.h @@ -50,7 +50,7 @@ class frame_factory virtual class mutable_frame create_frame(const void* video_stream_tag, const struct pixel_format_desc& desc) = 0; - virtual std::shared_ptr create_frame_converter() = 0; + virtual spl::shared_ptr create_frame_converter() = 0; }; diff --git a/src/core/fwd.h b/src/core/fwd.h index d61d1f070e..6a8bdc8af9 100644 --- a/src/core/fwd.h +++ b/src/core/fwd.h @@ -31,6 +31,7 @@ FORWARD2(caspar, core, class output); FORWARD2(caspar, core, class image_mixer); FORWARD2(caspar, core, struct video_format_desc); FORWARD2(caspar, core, class frame_factory); +FORWARD2(caspar, core, class frame_converter); FORWARD2(caspar, core, class frame_producer); FORWARD2(caspar, core, class frame_consumer); FORWARD2(caspar, core, class draw_frame); diff --git a/src/core/mixer/image/image_mixer.h b/src/core/mixer/image/image_mixer.h index dcff0d1b79..494bd8ad5f 100644 --- a/src/core/mixer/image/image_mixer.h +++ b/src/core/mixer/image/image_mixer.h @@ -49,7 +49,7 @@ class image_mixer class mutable_frame create_frame(const void* tag, const struct pixel_format_desc& desc) override = 0; - std::shared_ptr create_frame_converter() override = 0; + spl::shared_ptr create_frame_converter() override = 0; }; }} // namespace caspar::core diff --git a/src/core/video_channel.cpp b/src/core/video_channel.cpp index 652411e2b0..f2ec344b79 100644 --- a/src/core/video_channel.cpp +++ b/src/core/video_channel.cpp @@ -245,6 +245,10 @@ mixer& video_channel::mixer() { return impl_->mixer const output& video_channel::output() const { return impl_->output_; } output& video_channel::output() { return impl_->output_; } spl::shared_ptr video_channel::frame_factory() { return impl_->image_mixer_; } +spl::shared_ptr video_channel::frame_converter() { + // TODO - is this too expensive? + return impl_->image_mixer_->create_frame_converter(); +} int video_channel::index() const { return impl_->index(); } core::monitor::state video_channel::state() const { return impl_->state_; } diff --git a/src/core/video_channel.h b/src/core/video_channel.h index 1bdc98ff87..801fc6e3f8 100644 --- a/src/core/video_channel.h +++ b/src/core/video_channel.h @@ -85,6 +85,7 @@ class video_channel final core::output& output(); spl::shared_ptr frame_factory(); + spl::shared_ptr frame_converter(); int index() const; diff --git a/src/modules/artnet/consumer/artnet_consumer.cpp b/src/modules/artnet/consumer/artnet_consumer.cpp index c38b1df6dc..c08a5ab8a9 100644 --- a/src/modules/artnet/consumer/artnet_consumer.cpp +++ b/src/modules/artnet/consumer/artnet_consumer.cpp @@ -311,6 +311,7 @@ std::vector get_fixtures_ptree(const boost::property_tree::wptree& ptre spl::shared_ptr create_preconfigured_consumer(const boost::property_tree::wptree& ptree, const core::video_format_repository& format_repository, + const spl::shared_ptr& frame_converter, const std::vector>& channels) { configuration config; diff --git a/src/modules/artnet/consumer/artnet_consumer.h b/src/modules/artnet/consumer/artnet_consumer.h index 0fca71248f..ffb94ad84a 100644 --- a/src/modules/artnet/consumer/artnet_consumer.h +++ b/src/modules/artnet/consumer/artnet_consumer.h @@ -35,5 +35,6 @@ namespace caspar { namespace artnet { spl::shared_ptr create_preconfigured_consumer(const boost::property_tree::wptree& ptree, const core::video_format_repository& format_repository, + const spl::shared_ptr& frame_converter, const std::vector>& channels); }} // namespace caspar::artnet diff --git a/src/modules/bluefish/consumer/bluefish_consumer.cpp b/src/modules/bluefish/consumer/bluefish_consumer.cpp index b5c68d8280..25d3f01c02 100644 --- a/src/modules/bluefish/consumer/bluefish_consumer.cpp +++ b/src/modules/bluefish/consumer/bluefish_consumer.cpp @@ -884,6 +884,7 @@ struct bluefish_consumer_proxy : public core::frame_consumer spl::shared_ptr create_consumer(const std::vector& params, const core::video_format_repository& format_repository, + const spl::shared_ptr& frame_converter, const std::vector>& channels) { if (params.size() < 1 || !boost::iequals(params.at(0), L"BLUEFISH")) { @@ -939,6 +940,7 @@ spl::shared_ptr create_consumer(const std::vector create_preconfigured_consumer(const boost::property_tree::wptree& ptree, const core::video_format_repository& format_repository, + const spl::shared_ptr& frame_converter, const std::vector>& channels) { configuration config; diff --git a/src/modules/bluefish/consumer/bluefish_consumer.h b/src/modules/bluefish/consumer/bluefish_consumer.h index 0d97101bfb..9d942492b3 100644 --- a/src/modules/bluefish/consumer/bluefish_consumer.h +++ b/src/modules/bluefish/consumer/bluefish_consumer.h @@ -34,11 +34,13 @@ namespace caspar { namespace bluefish { spl::shared_ptr create_consumer(const std::vector& params, const core::video_format_repository& format_repository, + const spl::shared_ptr& frame_converter, const std::vector>& channels); spl::shared_ptr create_preconfigured_consumer(const boost::property_tree::wptree& ptree, const core::video_format_repository& format_repository, + const spl::shared_ptr& frame_converter, const std::vector>& channels); }} // namespace caspar::bluefish diff --git a/src/modules/decklink/consumer/decklink_consumer.cpp b/src/modules/decklink/consumer/decklink_consumer.cpp index 6c4391b823..8feca3e9aa 100644 --- a/src/modules/decklink/consumer/decklink_consumer.cpp +++ b/src/modules/decklink/consumer/decklink_consumer.cpp @@ -901,6 +901,7 @@ struct decklink_consumer_proxy : public core::frame_consumer spl::shared_ptr create_consumer(const std::vector& params, const core::video_format_repository& format_repository, + const spl::shared_ptr& frame_converter, const std::vector>& channels) { if (params.empty() || !boost::iequals(params.at(0), L"DECKLINK")) { @@ -915,6 +916,7 @@ spl::shared_ptr create_consumer(const std::vector create_preconfigured_consumer(const boost::property_tree::wptree& ptree, const core::video_format_repository& format_repository, + const spl::shared_ptr& frame_converter, const std::vector>& channels) { configuration config = parse_xml_config(ptree, format_repository); diff --git a/src/modules/decklink/consumer/decklink_consumer.h b/src/modules/decklink/consumer/decklink_consumer.h index 2d6da8d0bc..94ffc08b90 100644 --- a/src/modules/decklink/consumer/decklink_consumer.h +++ b/src/modules/decklink/consumer/decklink_consumer.h @@ -35,10 +35,12 @@ namespace caspar { namespace decklink { spl::shared_ptr create_consumer(const std::vector& params, const core::video_format_repository& format_repository, + const spl::shared_ptr& frame_converter, const std::vector>& channels); spl::shared_ptr create_preconfigured_consumer(const boost::property_tree::wptree& ptree, const core::video_format_repository& format_repository, + const spl::shared_ptr& frame_converter, const std::vector>& channels); }} // namespace caspar::decklink diff --git a/src/modules/ffmpeg/consumer/ffmpeg_consumer.cpp b/src/modules/ffmpeg/consumer/ffmpeg_consumer.cpp index 5f6fa3a14c..43c9a81bbb 100644 --- a/src/modules/ffmpeg/consumer/ffmpeg_consumer.cpp +++ b/src/modules/ffmpeg/consumer/ffmpeg_consumer.cpp @@ -715,6 +715,7 @@ struct ffmpeg_consumer : public core::frame_consumer spl::shared_ptr create_consumer(const std::vector& params, const core::video_format_repository& format_repository, + const spl::shared_ptr& frame_converter, const std::vector>& channels) { if (params.size() < 2 || (!boost::iequals(params.at(0), L"STREAM") && !boost::iequals(params.at(0), L"FILE"))) @@ -731,6 +732,7 @@ spl::shared_ptr create_consumer(const std::vector create_preconfigured_consumer(const boost::property_tree::wptree& ptree, const core::video_format_repository& format_repository, + const spl::shared_ptr& frame_converter, const std::vector>& channels) { return spl::make_shared(u8(ptree.get(L"path", L"")), diff --git a/src/modules/ffmpeg/consumer/ffmpeg_consumer.h b/src/modules/ffmpeg/consumer/ffmpeg_consumer.h index 3a4af28d6e..998eb42a69 100644 --- a/src/modules/ffmpeg/consumer/ffmpeg_consumer.h +++ b/src/modules/ffmpeg/consumer/ffmpeg_consumer.h @@ -35,10 +35,12 @@ namespace caspar { namespace ffmpeg { spl::shared_ptr create_consumer(const std::vector& params, const core::video_format_repository& format_repository, + const spl::shared_ptr& frame_converter, const std::vector>& channels); spl::shared_ptr create_preconfigured_consumer(const boost::property_tree::wptree&, const core::video_format_repository& format_repository, + const spl::shared_ptr& frame_converter, const std::vector>& channels); }} // namespace caspar::ffmpeg diff --git a/src/modules/image/consumer/image_consumer.cpp b/src/modules/image/consumer/image_consumer.cpp index f9c020abff..d3bee919f7 100644 --- a/src/modules/image/consumer/image_consumer.cpp +++ b/src/modules/image/consumer/image_consumer.cpp @@ -118,6 +118,7 @@ struct image_consumer : public core::frame_consumer spl::shared_ptr create_consumer(const std::vector& params, const core::video_format_repository& format_repository, + const spl::shared_ptr& frame_converter, const std::vector>& channels) { if (params.empty() || !boost::iequals(params.at(0), L"IMAGE")) diff --git a/src/modules/image/consumer/image_consumer.h b/src/modules/image/consumer/image_consumer.h index e971f28e2b..b779a6e528 100644 --- a/src/modules/image/consumer/image_consumer.h +++ b/src/modules/image/consumer/image_consumer.h @@ -34,6 +34,7 @@ namespace caspar { namespace image { spl::shared_ptr create_consumer(const std::vector& params, const core::video_format_repository& format_repository, + const spl::shared_ptr& frame_converter, const std::vector>& channels); }} // namespace caspar::image diff --git a/src/modules/newtek/consumer/newtek_ndi_consumer.cpp b/src/modules/newtek/consumer/newtek_ndi_consumer.cpp index 1a93a2f73e..79c66c997c 100644 --- a/src/modules/newtek/consumer/newtek_ndi_consumer.cpp +++ b/src/modules/newtek/consumer/newtek_ndi_consumer.cpp @@ -257,6 +257,7 @@ std::atomic newtek_ndi_consumer::instances_(0); spl::shared_ptr create_ndi_consumer(const std::vector& params, const core::video_format_repository& format_repository, + const spl::shared_ptr& frame_converter, const std::vector>& channels) { if (params.size() < 1 || !boost::iequals(params.at(0), L"NDI")) @@ -269,6 +270,7 @@ create_ndi_consumer(const std::vector& par spl::shared_ptr create_preconfigured_ndi_consumer(const boost::property_tree::wptree& ptree, const core::video_format_repository& format_repository, + const spl::shared_ptr& frame_converter, const std::vector>& channels) { auto name = ptree.get(L"name", L""); diff --git a/src/modules/newtek/consumer/newtek_ndi_consumer.h b/src/modules/newtek/consumer/newtek_ndi_consumer.h index 2f3e788d53..8148ee5385 100644 --- a/src/modules/newtek/consumer/newtek_ndi_consumer.h +++ b/src/modules/newtek/consumer/newtek_ndi_consumer.h @@ -35,10 +35,12 @@ namespace caspar { namespace newtek { spl::shared_ptr create_ndi_consumer(const std::vector& params, const core::video_format_repository& format_repository, + const spl::shared_ptr& frame_converter, const std::vector>& channels); spl::shared_ptr create_preconfigured_ndi_consumer(const boost::property_tree::wptree& ptree, const core::video_format_repository& format_repository, + const spl::shared_ptr& frame_converter, const std::vector>& channels); }} // namespace caspar::newtek diff --git a/src/modules/oal/consumer/oal_consumer.cpp b/src/modules/oal/consumer/oal_consumer.cpp index fa790fe9fb..6ce9a81c09 100644 --- a/src/modules/oal/consumer/oal_consumer.cpp +++ b/src/modules/oal/consumer/oal_consumer.cpp @@ -389,6 +389,7 @@ struct oal_consumer : public core::frame_consumer spl::shared_ptr create_consumer(const std::vector& params, const core::video_format_repository& format_repository, + const spl::shared_ptr& frame_converter, const std::vector>& channels) { if (params.empty() || !boost::iequals(params.at(0), L"AUDIO")) @@ -400,6 +401,7 @@ spl::shared_ptr create_consumer(const std::vector create_preconfigured_consumer(const boost::property_tree::wptree& ptree, const core::video_format_repository& format_repository, + const spl::shared_ptr& frame_converter, const std::vector>& channels) { return spl::make_shared(); diff --git a/src/modules/oal/consumer/oal_consumer.h b/src/modules/oal/consumer/oal_consumer.h index a868d505b6..d143412d5b 100644 --- a/src/modules/oal/consumer/oal_consumer.h +++ b/src/modules/oal/consumer/oal_consumer.h @@ -34,10 +34,12 @@ namespace caspar { namespace oal { spl::shared_ptr create_consumer(const std::vector& params, const core::video_format_repository& format_repository, + const spl::shared_ptr& frame_converter, const std::vector>& channels); spl::shared_ptr create_preconfigured_consumer(const boost::property_tree::wptree&, const core::video_format_repository& format_repository, + const spl::shared_ptr& frame_converter, const std::vector>& channels); }} // namespace caspar::oal diff --git a/src/modules/screen/consumer/screen_consumer.cpp b/src/modules/screen/consumer/screen_consumer.cpp index 23d4301d58..7f401a483a 100644 --- a/src/modules/screen/consumer/screen_consumer.cpp +++ b/src/modules/screen/consumer/screen_consumer.cpp @@ -608,6 +608,7 @@ struct screen_consumer_proxy : public core::frame_consumer spl::shared_ptr create_consumer(const std::vector& params, const core::video_format_repository& format_repository, + const spl::shared_ptr& frame_converter, const std::vector>& channels) { if (params.empty() || !boost::iequals(params.at(0), L"SCREEN")) { @@ -644,6 +645,7 @@ spl::shared_ptr create_consumer(const std::vector create_preconfigured_consumer(const boost::property_tree::wptree& ptree, const core::video_format_repository& format_repository, + const spl::shared_ptr& frame_converter, const std::vector>& channels) { configuration config; diff --git a/src/modules/screen/consumer/screen_consumer.h b/src/modules/screen/consumer/screen_consumer.h index c7129052b4..16493ab824 100644 --- a/src/modules/screen/consumer/screen_consumer.h +++ b/src/modules/screen/consumer/screen_consumer.h @@ -33,10 +33,12 @@ namespace caspar { namespace screen { spl::shared_ptr create_consumer(const std::vector& params, const core::video_format_repository& format_repository, + const spl::shared_ptr& frame_converter, const std::vector>& channels); spl::shared_ptr create_preconfigured_consumer(const boost::property_tree::wptree& ptree, const core::video_format_repository& format_repository, + const spl::shared_ptr& frame_converter, const std::vector>& channels); }} // namespace caspar::screen diff --git a/src/protocol/amcp/AMCPCommandsImpl.cpp b/src/protocol/amcp/AMCPCommandsImpl.cpp index 403bbe4d15..cfa23511db 100644 --- a/src/protocol/amcp/AMCPCommandsImpl.cpp +++ b/src/protocol/amcp/AMCPCommandsImpl.cpp @@ -456,7 +456,7 @@ std::wstring add_command(command_context& ctx) core::diagnostics::call_context::for_thread().video_channel = ctx.channel_index + 1; auto consumer = ctx.static_context->consumer_registry->create_consumer( - ctx.parameters, ctx.static_context->format_repository, get_channels(ctx)); + ctx.parameters, ctx.static_context->format_repository, ctx.channel.raw_channel->frame_converter(), get_channels(ctx)); ctx.channel.raw_channel->output().add(ctx.layer_index(consumer->index()), consumer); return L"202 ADD OK\r\n"; @@ -474,7 +474,7 @@ std::wstring remove_command(command_context& ctx) } index = ctx.static_context->consumer_registry - ->create_consumer(ctx.parameters, ctx.static_context->format_repository, get_channels(ctx)) + ->create_consumer(ctx.parameters, ctx.static_context->format_repository,ctx.channel.raw_channel->frame_converter(), get_channels(ctx)) ->index(); } @@ -488,7 +488,7 @@ std::wstring remove_command(command_context& ctx) std::wstring print_command(command_context& ctx) { ctx.channel.raw_channel->output().add(ctx.static_context->consumer_registry->create_consumer( - {L"IMAGE"}, ctx.static_context->format_repository, get_channels(ctx))); + {L"IMAGE"}, ctx.static_context->format_repository, ctx.channel.raw_channel->frame_converter(), get_channels(ctx))); return L"202 PRINT OK\r\n"; } @@ -1354,7 +1354,7 @@ std::wstring channel_grid_command(command_context& ctx) params.emplace_back(L"NAME"); params.emplace_back(L"Channel Grid Window"); auto screen = ctx.static_context->consumer_registry->create_consumer( - params, ctx.static_context->format_repository, get_channels(ctx)); + params, ctx.static_context->format_repository, ctx.channel.raw_channel->frame_converter(), get_channels(ctx)); self.raw_channel->output().add(screen); diff --git a/src/shell/server.cpp b/src/shell/server.cpp index 81758999f8..f22e0cc9e2 100644 --- a/src/shell/server.cpp +++ b/src/shell/server.cpp @@ -342,7 +342,7 @@ struct server::impl try { if (name != L"") channel.raw_channel->output().add(consumer_registry_->create_consumer( - name, xml_consumer.second, video_format_repository_, channels_vec)); + name, xml_consumer.second, video_format_repository_, channel.raw_channel->frame_converter(), channels_vec)); } catch (...) { CASPAR_LOG_CURRENT_EXCEPTION(); } From 57a362452fcbfaa127d3d11be6d25174dd0fc16e Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Fri, 22 Dec 2023 17:48:35 +0000 Subject: [PATCH 19/50] wip --- .../decklink/consumer/decklink_consumer.cpp | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/modules/decklink/consumer/decklink_consumer.cpp b/src/modules/decklink/consumer/decklink_consumer.cpp index 8feca3e9aa..6a200f94e7 100644 --- a/src/modules/decklink/consumer/decklink_consumer.cpp +++ b/src/modules/decklink/consumer/decklink_consumer.cpp @@ -403,6 +403,8 @@ struct decklink_secondary_port final : public IDeckLinkVideoOutputCallback struct decklink_consumer final : public IDeckLinkVideoOutputCallback { + const spl::shared_ptr frame_converter_; + const int channel_index_; const configuration config_; @@ -444,8 +446,9 @@ struct decklink_consumer final : public IDeckLinkVideoOutputCallback std::atomic abort_request_{false}; public: - decklink_consumer(const configuration& config, core::video_format_desc channel_format_desc, int channel_index) - : channel_index_(channel_index) + decklink_consumer(const spl::shared_ptr& frame_converter, const configuration& config, core::video_format_desc channel_format_desc, int channel_index) + : frame_converter_(frame_converter) + , channel_index_(channel_index) , config_(config) , channel_format_desc_(std::move(channel_format_desc)) , decklink_format_desc_(get_decklink_format(config.primary, channel_format_desc_)) @@ -849,14 +852,17 @@ struct decklink_consumer final : public IDeckLinkVideoOutputCallback struct decklink_consumer_proxy : public core::frame_consumer { + const spl::shared_ptr frame_converter_; + const configuration config_; std::unique_ptr consumer_; core::video_format_desc format_desc_; executor executor_; public: - explicit decklink_consumer_proxy(const configuration& config) - : config_(config) + explicit decklink_consumer_proxy(const spl::shared_ptr& frame_converter, const configuration& config) + : frame_converter_(frame_converter) + , config_(config) , executor_(L"decklink_consumer[" + std::to_wstring(config.primary.device_index) + L"]") { executor_.begin_invoke([=] { com_initialize(); }); @@ -876,7 +882,7 @@ struct decklink_consumer_proxy : public core::frame_consumer format_desc_ = format_desc; executor_.invoke([=] { consumer_.reset(); - consumer_ = std::make_unique(config_, format_desc, channel_index); + consumer_ = std::make_unique(frame_converter_, config_, format_desc, channel_index); }); } @@ -910,7 +916,7 @@ spl::shared_ptr create_consumer(const std::vector(config); + return spl::make_shared(frame_converter, config); } spl::shared_ptr @@ -921,7 +927,7 @@ create_preconfigured_consumer(const boost::property_tree::wptree& { configuration config = parse_xml_config(ptree, format_repository); - return spl::make_shared(config); + return spl::make_shared(frame_converter, config); } }} // namespace caspar::decklink From f6a30be997c6eff547b2002ae4174c416a33b83c Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Wed, 27 Dec 2023 17:02:54 +0000 Subject: [PATCH 20/50] wip: boilerplate for frame conversion --- src/accelerator/CMakeLists.txt | 2 + src/accelerator/ogl/image/frame_converter.cpp | 50 +++++++++++- src/accelerator/ogl/image/frame_converter.h | 3 + .../ogl/image/shader_from_rgba.comp | 15 ++++ src/accelerator/ogl/util/device.cpp | 77 ++++++++++++++++--- src/accelerator/ogl/util/device.h | 7 ++ src/core/frame/frame_factory.h | 17 +++- .../decklink/consumer/decklink_consumer.cpp | 21 ++--- 8 files changed, 167 insertions(+), 25 deletions(-) create mode 100644 src/accelerator/ogl/image/shader_from_rgba.comp diff --git a/src/accelerator/CMakeLists.txt b/src/accelerator/CMakeLists.txt index ee2eda3b2e..41aae2bf3f 100644 --- a/src/accelerator/CMakeLists.txt +++ b/src/accelerator/CMakeLists.txt @@ -30,6 +30,7 @@ set(HEADERS ogl_image_vertex.h ogl_image_fragment.h ogl_image_to_rgba.h + ogl_image_from_rgba.h accelerator.h StdAfx.h @@ -38,6 +39,7 @@ set(HEADERS bin2c("ogl/image/shader.vert" "ogl_image_vertex.h" "caspar::accelerator::ogl" "vertex_shader") bin2c("ogl/image/shader.frag" "ogl_image_fragment.h" "caspar::accelerator::ogl" "fragment_shader") bin2c("ogl/image/shader_to_rgba.comp" "ogl_image_to_rgba.h" "caspar::accelerator::ogl" "compute_to_rgba_shader") +bin2c("ogl/image/shader_from_rgba.comp" "ogl_image_from_rgba.h" "caspar::accelerator::ogl" "compute_from_rgba_shader") casparcg_add_library(accelerator SOURCES ${SOURCES} ${HEADERS}) target_include_directories(accelerator PRIVATE diff --git a/src/accelerator/ogl/image/frame_converter.cpp b/src/accelerator/ogl/image/frame_converter.cpp index 1baf619413..cbe614a641 100644 --- a/src/accelerator/ogl/image/frame_converter.cpp +++ b/src/accelerator/ogl/image/frame_converter.cpp @@ -19,6 +19,12 @@ * Author: Julian Waller, julian@superfly.tv */ #include "frame_converter.h" +#include "../util/texture.h" + +#include + +#include +#include namespace caspar::accelerator::ogl { @@ -29,7 +35,6 @@ ogl_frame_converter::ogl_frame_converter(const spl::shared_ptr& ogl) core::mutable_frame ogl_frame_converter::create_frame(const void* tag, const core::pixel_format_desc& desc) { - std::vector> image_data; for (auto& plane : desc.planes) { image_data.push_back(ogl_->create_array(plane.size, common::bit_depth::bit16)); // TODO: Depth @@ -64,4 +69,47 @@ core::draw_frame ogl_frame_converter::convert_frame(const core::mutable_frame& f return core::draw_frame{}; } +std::shared_future>> +ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, const core::encoded_frame_format format) +{ + std::vector> buffers; + int x_count = 0; + int y_count = 0; + switch (format) { + case core::encoded_frame_format::decklink_v210: + auto row_blocks = ((frame.width() + 47) / 48); + auto row_bytes = row_blocks * 128; + + // TODO - result must be 128byte aligned. can that be guaranteed here? + buffers.push_back(ogl_->create_array(row_bytes * frame.height(), common::bit_depth::bit8)); + x_count = row_blocks; + y_count = frame.height(); + break; + } + + if (buffers.size() == 0 || x_count == 0 || y_count == 0) { + CASPAR_THROW_EXCEPTION(not_supported() << msg_info("Unknown encoded frame format")); + } + + std::vector> textures; + + // TODO - avoid this extra copy + auto plane_count = frame.pixel_format_desc().planes.size(); + for (size_t i = 0; i < plane_count; i++) { + auto plane = frame.pixel_format_desc().planes[i]; + auto texture = ogl_->copy_async(frame.image_data(i), plane.width, plane.height, plane.size); + textures.push_back(texture.get()); + } + + auto future_conversion = + ogl_->convert_from_texture(textures, buffers, frame.width(), frame.height(), x_count, y_count); + + return std::async(std::launch::deferred, + [buffers = std::move(buffers), future_conversion = std::move(future_conversion)]() mutable { + future_conversion.get(); + + return std::move(buffers); + }); +} + } // namespace caspar::accelerator::ogl \ No newline at end of file diff --git a/src/accelerator/ogl/image/frame_converter.h b/src/accelerator/ogl/image/frame_converter.h index 1a9ec1f80b..41daedc772 100644 --- a/src/accelerator/ogl/image/frame_converter.h +++ b/src/accelerator/ogl/image/frame_converter.h @@ -45,6 +45,9 @@ class ogl_frame_converter core::draw_frame convert_frame(const core::mutable_frame& frame) override; + std::shared_future>> + convert_from_rgba(const core::const_frame& frame, const core::encoded_frame_format format) override; + private: const spl::shared_ptr ogl_; }; diff --git a/src/accelerator/ogl/image/shader_from_rgba.comp b/src/accelerator/ogl/image/shader_from_rgba.comp new file mode 100644 index 0000000000..b8fe8752e2 --- /dev/null +++ b/src/accelerator/ogl/image/shader_from_rgba.comp @@ -0,0 +1,15 @@ +#version 430 + +layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(rgba32f, binding = 0) uniform image2D imgOutput; // Note: this needs to match what it is writing to + +void main() { + vec4 value = vec4(0.0, 0.0, 0.0, 1.0); + ivec2 texelCoord = ivec2(gl_GlobalInvocationID.xy); + + value.r = float(texelCoord.x)/(gl_NumWorkGroups.x); + value.g = float(texelCoord.y)/(gl_NumWorkGroups.y); + + imageStore(imgOutput, texelCoord, value); +} \ No newline at end of file diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp index 68f0bed0a1..0a317706d7 100644 --- a/src/accelerator/ogl/util/device.cpp +++ b/src/accelerator/ogl/util/device.cpp @@ -24,7 +24,6 @@ #include "compute_shader.h" #include "texture.h" - #include #include #include @@ -42,9 +41,9 @@ #include #include -#include #include #include +#include #include #include @@ -53,6 +52,7 @@ #include #include +#include "ogl_image_from_rgba.h" #include "ogl_image_to_rgba.h" namespace caspar { namespace accelerator { namespace ogl { @@ -73,7 +73,8 @@ struct device::impl : public std::enable_shared_from_this sync_queue_t sync_queue_; - std::unique_ptr compute_shader_; + std::unique_ptr compute_to_rgba_; + std::unique_ptr compute_from_rgba_; GLuint fbo_; @@ -119,7 +120,6 @@ struct device::impl : public std::enable_shared_from_this GL(glCreateFramebuffers(1, &fbo_)); GL(glBindFramebuffer(GL_FRAMEBUFFER, fbo_)); - device_.setActive(false); thread_ = std::thread([&] { @@ -231,8 +231,6 @@ struct device::impl : public std::enable_shared_from_this return array(ptr, buf->size(), buf, depth); } - - std::future> copy_async(const array& source, int width, int height, int stride) { @@ -300,15 +298,14 @@ struct device::impl : public std::enable_shared_from_this convert_frame(const std::vector>& sources, int width, int height, int width_samples) { return dispatch_async([=] { - - if (!compute_shader_) - compute_shader_ = std::make_unique(std::string(compute_to_rgba_shader)); + if (!compute_to_rgba_) + compute_to_rgba_ = std::make_unique(std::string(compute_to_rgba_shader)); auto tex = create_texture(width, height, 4, common::bit_depth::bit16, false); glBindImageTexture(0, tex->id(), 0, GL_FALSE, 0, GL_READ_WRITE, GL_RGBA32F); - compute_shader_->use(); + compute_to_rgba_->use(); glDispatchCompute((unsigned int)width_samples, (unsigned int)height, 1); @@ -319,6 +316,57 @@ struct device::impl : public std::enable_shared_from_this }); } + std::future convert_from_texture(const std::vector>& textures, + const std::vector>& buffers, + int width, + int height, + int x_count, + int y_count) + { + return spawn_async([=](yield_context yield) { + if (!compute_from_rgba_) + compute_from_rgba_ = std::make_unique(std::string(compute_from_rgba_shader)); + + // TODO: This probably only needs to handle one texture + for (size_t i = 0; i < textures.size(); i++) { + auto& tex = textures[i]; + glBindImageTexture(i, tex->id(), 0, GL_FALSE, 0, GL_READ_WRITE, GL_RGBA32F); + } + + for (size_t i = 0; i < buffers.size(); i++) { + auto& source = buffers[i]; + auto tmp = source.storage>(); + if (!tmp) { + CASPAR_THROW_EXCEPTION(caspar_exception() << msg_info("Buffer is not gpu backed")); + } + + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, i, tmp->get()->id()); + } + + compute_to_rgba_->use(); + + glDispatchCompute((unsigned int)x_count, (unsigned int)y_count, 1); + + auto fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + + GL(glFlush()); + + deadline_timer timer(service_); + for (auto n = 0; true; ++n) { + // TODO (perf) Smarter non-polling solution? + timer.expires_from_now(boost::posix_time::milliseconds(2)); + timer.async_wait(yield); + + auto wait = glClientWaitSync(fence, 0, 1); + if (wait == GL_ALREADY_SIGNALED || wait == GL_CONDITION_SATISFIED) { + break; + } + } + + glDeleteSync(fence); + }); + } + #ifdef WIN32 std::future> copy_async(GLuint source, int width, int height, int stride) { @@ -483,6 +531,15 @@ device::convert_frame(const std::vector>& sources, int widt { return impl_->convert_frame(sources, width, height, format); } +std::future device::convert_from_texture(const std::vector>& textures, + const std::vector>& buffers, + int width, + int height, + int x_count, + int y_count) +{ + return impl_->convert_from_texture(textures, buffers, width, height, x_count, y_count); +} void device::dispatch(std::function func) { boost::asio::dispatch(impl_->service_, std::move(func)); } std::wstring device::version() const { return impl_->version(); } boost::property_tree::wptree device::info() const { return impl_->info(); } diff --git a/src/accelerator/ogl/util/device.h b/src/accelerator/ogl/util/device.h index 91e470e370..7e245f73b5 100644 --- a/src/accelerator/ogl/util/device.h +++ b/src/accelerator/ogl/util/device.h @@ -56,6 +56,13 @@ class device final std::future> convert_frame(const std::vector>& sources, int width, int height, int format); + std::future convert_from_texture(const std::vector>& textures, + const std::vector>& buffers, + int width, + int height, + int x_count, + int y_count); + template auto dispatch_async(Func&& func) { diff --git a/src/core/frame/frame_factory.h b/src/core/frame/frame_factory.h index 099be0a832..48192135d1 100644 --- a/src/core/frame/frame_factory.h +++ b/src/core/frame/frame_factory.h @@ -23,13 +23,21 @@ #include +#include + namespace caspar { namespace core { -class frame_converter { +enum encoded_frame_format +{ + decklink_v210 = 0, +}; + +class frame_converter +{ public: - frame_converter() = default; + frame_converter() = default; frame_converter& operator=(const frame_converter&) = delete; - virtual ~frame_converter() = default; + virtual ~frame_converter() = default; frame_converter(const frame_converter&) = delete; @@ -37,6 +45,8 @@ class frame_converter { virtual class draw_frame convert_frame(const class mutable_frame& frame) = 0; + virtual std::shared_future>> + convert_from_rgba(const core::const_frame& frame, const encoded_frame_format format) = 0; }; class frame_factory @@ -53,5 +63,4 @@ class frame_factory virtual spl::shared_ptr create_frame_converter() = 0; }; - }} // namespace caspar::core diff --git a/src/modules/decklink/consumer/decklink_consumer.cpp b/src/modules/decklink/consumer/decklink_consumer.cpp index 6a200f94e7..d10c98a390 100644 --- a/src/modules/decklink/consumer/decklink_consumer.cpp +++ b/src/modules/decklink/consumer/decklink_consumer.cpp @@ -749,16 +749,17 @@ struct decklink_consumer final : public IDeckLinkVideoOutputCallback schedule_next_audio(std::move(audio_data), nb_samples); } } else { - // Send frame to secondary ports - auto& context = secondary_port_contexts_[i]; - context->schedule_frame(frame1, video_display_time); - if (isInterlaced) { - context->schedule_frame(frame2, video_display_time); - } - - if (config_.embedded_audio) { - // TODO - audio for secondaries? - } + // TODO - reimplement this + // // Send frame to secondary ports + // auto& context = secondary_port_contexts_[i]; + // context->schedule_frame(frame1, video_display_time); + // if (isInterlaced) { + // context->schedule_frame(frame2, video_display_time); + // } + + // if (config_.embedded_audio) { + // // TODO - audio for secondaries? + // } } }); From 111e8f7fa7dd413a07646c82191b95f527569979 Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Fri, 29 Dec 2023 14:45:28 +0000 Subject: [PATCH 21/50] wip: broke --- src/accelerator/ogl/image/frame_converter.cpp | 8 +- .../ogl/image/shader_from_rgba.comp | 7 +- src/accelerator/ogl/image/shader_to_rgba.comp | 2 +- src/accelerator/ogl/util/device.cpp | 12 +- src/common/memshfl.h | 8 +- .../decklink/consumer/decklink_consumer.cpp | 127 +++++++++++++----- src/modules/decklink/consumer/frame.cpp | 4 +- 7 files changed, 114 insertions(+), 54 deletions(-) diff --git a/src/accelerator/ogl/image/frame_converter.cpp b/src/accelerator/ogl/image/frame_converter.cpp index cbe614a641..6c58dc4af3 100644 --- a/src/accelerator/ogl/image/frame_converter.cpp +++ b/src/accelerator/ogl/image/frame_converter.cpp @@ -94,10 +94,10 @@ ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, const cor std::vector> textures; // TODO - avoid this extra copy - auto plane_count = frame.pixel_format_desc().planes.size(); - for (size_t i = 0; i < plane_count; i++) { - auto plane = frame.pixel_format_desc().planes[i]; - auto texture = ogl_->copy_async(frame.image_data(i), plane.width, plane.height, plane.size); + size_t i = 0; + for (auto& plane : frame.pixel_format_desc().planes) { + // TODO - this is failing. is the buffer going the wrong direction causing it to fail? + auto texture = ogl_->copy_async(frame.image_data(i++), plane.width, plane.height, plane.stride); textures.push_back(texture.get()); } diff --git a/src/accelerator/ogl/image/shader_from_rgba.comp b/src/accelerator/ogl/image/shader_from_rgba.comp index b8fe8752e2..d17e3eff0a 100644 --- a/src/accelerator/ogl/image/shader_from_rgba.comp +++ b/src/accelerator/ogl/image/shader_from_rgba.comp @@ -2,7 +2,12 @@ layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in; -layout(rgba32f, binding = 0) uniform image2D imgOutput; // Note: this needs to match what it is writing to +layout(rgba16f, binding = 0) uniform image2D imgInput; // Note: this needs to match what it is writing to + +layout(std430, binding = 1) buffer bufferOutput +{ + uint8 data[]; +}; void main() { vec4 value = vec4(0.0, 0.0, 0.0, 1.0); diff --git a/src/accelerator/ogl/image/shader_to_rgba.comp b/src/accelerator/ogl/image/shader_to_rgba.comp index 1964edadbf..4bc5fba7f3 100644 --- a/src/accelerator/ogl/image/shader_to_rgba.comp +++ b/src/accelerator/ogl/image/shader_to_rgba.comp @@ -2,7 +2,7 @@ layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in; -layout(rgba32f, binding = 0) uniform image2D imgOutput; // Note: this needs to match what it is writing to +layout(rgba16f, binding = 0) uniform image2D imgOutput; // Note: this needs to match what it is writing to void main() { vec4 value = vec4(0.0, 0.0, 0.0, 1.0); diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp index 0a317706d7..cd4e2a0f67 100644 --- a/src/accelerator/ogl/util/device.cpp +++ b/src/accelerator/ogl/util/device.cpp @@ -303,7 +303,7 @@ struct device::impl : public std::enable_shared_from_this auto tex = create_texture(width, height, 4, common::bit_depth::bit16, false); - glBindImageTexture(0, tex->id(), 0, GL_FALSE, 0, GL_READ_WRITE, GL_RGBA32F); + glBindImageTexture(0, tex->id(), 0, GL_FALSE, 0, GL_READ_WRITE, GL_RGBA16F); compute_to_rgba_->use(); @@ -328,10 +328,10 @@ struct device::impl : public std::enable_shared_from_this compute_from_rgba_ = std::make_unique(std::string(compute_from_rgba_shader)); // TODO: This probably only needs to handle one texture - for (size_t i = 0; i < textures.size(); i++) { - auto& tex = textures[i]; - glBindImageTexture(i, tex->id(), 0, GL_FALSE, 0, GL_READ_WRITE, GL_RGBA32F); - } + // for (size_t i = 0; i < textures.size(); i++) { + auto& tex = textures[0]; + glBindImageTexture(0, tex->id(), 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA16F); + // } for (size_t i = 0; i < buffers.size(); i++) { auto& source = buffers[i]; @@ -340,7 +340,7 @@ struct device::impl : public std::enable_shared_from_this CASPAR_THROW_EXCEPTION(caspar_exception() << msg_info("Buffer is not gpu backed")); } - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, i, tmp->get()->id()); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, i + 1, tmp->get()->id()); } compute_to_rgba_->use(); diff --git a/src/common/memshfl.h b/src/common/memshfl.h index 3947e9867f..2bc3b4f10c 100644 --- a/src/common/memshfl.h +++ b/src/common/memshfl.h @@ -35,14 +35,14 @@ namespace caspar { #ifdef _MSC_VER -static std::shared_ptr create_aligned_buffer(size_t size) +static std::shared_ptr create_aligned_buffer(size_t size, size_t alignment) { - return std::shared_ptr(_aligned_malloc(size, 64), _aligned_free); + return std::shared_ptr(_aligned_malloc(size, alignment), _aligned_free); } #else -static std::shared_ptr create_aligned_buffer(size_t size) +static std::shared_ptr create_aligned_buffer(size_t size, size_t alignment) { - return std::shared_ptr(aligned_alloc(64, size), free); + return std::shared_ptr(aligned_alloc(alignment, size), free); } #endif diff --git a/src/modules/decklink/consumer/decklink_consumer.cpp b/src/modules/decklink/consumer/decklink_consumer.cpp index d10c98a390..9a158cc0ed 100644 --- a/src/modules/decklink/consumer/decklink_consumer.cpp +++ b/src/modules/decklink/consumer/decklink_consumer.cpp @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -53,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -193,13 +195,18 @@ core::video_format_desc get_decklink_format(const port_configuration& confi class decklink_frame : public IDeckLinkVideoFrame { core::video_format_desc format_desc_; + BMDPixelFormat pixel_format_; std::shared_ptr data_; std::atomic ref_count_{0}; int nb_samples_; public: - decklink_frame(std::shared_ptr data, core::video_format_desc format_desc, int nb_samples) + decklink_frame(std::shared_ptr data, + BMDPixelFormat pixel_format, + core::video_format_desc format_desc, + int nb_samples) : format_desc_(std::move(format_desc)) + , pixel_format_(pixel_format) , data_(std::move(data)) , nb_samples_(nb_samples) { @@ -224,10 +231,21 @@ class decklink_frame : public IDeckLinkVideoFrame // IDecklinkVideoFrame - long STDMETHODCALLTYPE GetWidth() override { return static_cast(format_desc_.width); } - long STDMETHODCALLTYPE GetHeight() override { return static_cast(format_desc_.height); } - long STDMETHODCALLTYPE GetRowBytes() override { return static_cast(format_desc_.width) * 4; } - BMDPixelFormat STDMETHODCALLTYPE GetPixelFormat() override { return bmdFormat8BitBGRA; } + long STDMETHODCALLTYPE GetWidth() override { return static_cast(format_desc_.width); } + long STDMETHODCALLTYPE GetHeight() override { return static_cast(format_desc_.height); } + long STDMETHODCALLTYPE GetRowBytes() override + { + switch (pixel_format_) { + case bmdFormat8BitARGB: + case bmdFormat8BitBGRA: + return static_cast(format_desc_.width) * 4; + case bmdFormat10BitYUV: + return ((static_cast(format_desc_.width) + 47) / 48) * 128; + default: + return 0; + } + } + BMDPixelFormat STDMETHODCALLTYPE GetPixelFormat() override { return pixel_format_; } BMDFrameFlags STDMETHODCALLTYPE GetFlags() override { return bmdFrameFlagDefault; } HRESULT STDMETHODCALLTYPE GetBytes(void** buffer) override @@ -369,13 +387,16 @@ struct decklink_secondary_port final : public IDeckLinkVideoOutputCallback auto image_data = convert_frame_for_port( channel_format_desc_, decklink_format_desc_, output_config_, frame1, frame2, mode_->GetFieldDominance()); - schedule_next_video(image_data, 0, display_time); + schedule_next_video(image_data, bmdFormat8BitBGRA, 0, display_time); } - void schedule_next_video(std::shared_ptr image_data, int nb_samples, BMDTimeValue display_time) + void schedule_next_video(std::shared_ptr image_data, + BMDPixelFormat pixel_format, + int nb_samples, + BMDTimeValue display_time) { auto packed_frame = wrap_raw( - new decklink_frame(std::move(image_data), decklink_format_desc_, nb_samples)); + new decklink_frame(std::move(image_data), pixel_format, decklink_format_desc_, nb_samples)); if (FAILED(output_->ScheduleVideoFrame(get_raw(packed_frame), display_time, decklink_format_desc_.duration, @@ -401,6 +422,18 @@ struct decklink_secondary_port final : public IDeckLinkVideoOutputCallback } }; +struct converted_frame +{ + core::const_frame raw_frame; + std::shared_future>> frame; + + converted_frame(core::const_frame raw_frame, std::shared_future>> frame) + : raw_frame(raw_frame) + , frame(frame) + { + } +}; + struct decklink_consumer final : public IDeckLinkVideoOutputCallback { const spl::shared_ptr frame_converter_; @@ -421,10 +454,10 @@ struct decklink_consumer final : public IDeckLinkVideoOutputCallback const core::video_format_desc channel_format_desc_; const core::video_format_desc decklink_format_desc_; - std::mutex buffer_mutex_; - std::condition_variable buffer_cond_; - std::queue buffer_; - int buffer_capacity_ = channel_format_desc_.field_count; + std::mutex buffer_mutex_; + std::condition_variable buffer_cond_; + std::queue buffer_; + int buffer_capacity_ = channel_format_desc_.field_count; const int buffer_size_ = config_.buffer_depth(); // Minimum buffer-size 3. @@ -446,7 +479,10 @@ struct decklink_consumer final : public IDeckLinkVideoOutputCallback std::atomic abort_request_{false}; public: - decklink_consumer(const spl::shared_ptr& frame_converter, const configuration& config, core::video_format_desc channel_format_desc, int channel_index) + decklink_consumer(const spl::shared_ptr& frame_converter, + const configuration& config, + core::video_format_desc channel_format_desc, + int channel_index) : frame_converter_(frame_converter) , channel_index_(channel_index) , config_(config) @@ -520,11 +556,11 @@ struct decklink_consumer final : public IDeckLinkVideoOutputCallback nb_samples); } - std::shared_ptr image_data = create_aligned_buffer(decklink_format_desc_.size); + std::shared_ptr image_data = create_aligned_buffer(decklink_format_desc_.size, 128); - schedule_next_video(image_data, nb_samples, video_scheduled_); + schedule_next_video(image_data, bmdFormat8BitBGRA, nb_samples, video_scheduled_); for (auto& context : secondary_port_contexts_) { - context->schedule_next_video(image_data, 0, video_scheduled_); + context->schedule_next_video(image_data, bmdFormat8BitBGRA, 0, video_scheduled_); } video_scheduled_ += decklink_format_desc_.duration; @@ -707,8 +743,8 @@ struct decklink_consumer final : public IDeckLinkVideoOutputCallback } } - core::const_frame frame1 = pop(); - core::const_frame frame2; + std::optional frame1 = pop(); + std::optional frame2; bool isInterlaced = mode_->GetFieldDominance() != bmdProgressiveFrame; if (mode_->GetFieldDominance() != bmdProgressiveFrame) { @@ -719,14 +755,22 @@ struct decklink_consumer final : public IDeckLinkVideoOutputCallback if (abort_request_) return E_FAIL; + // Skip if frames are missing + if (!frame1.has_value() || (isInterlaced && !frame2.has_value())) + return S_OK; + BMDTimeValue video_display_time = video_scheduled_; video_scheduled_ += decklink_format_desc_.duration; std::vector audio_data; if (config_.embedded_audio) { - audio_data.insert(audio_data.end(), frame1.audio_data().begin(), frame1.audio_data().end()); + audio_data.insert(audio_data.end(), + frame1.value().raw_frame.audio_data().begin(), + frame1.value().raw_frame.audio_data().end()); if (isInterlaced) { - audio_data.insert(audio_data.end(), frame2.audio_data().begin(), frame2.audio_data().end()); + audio_data.insert(audio_data.end(), + frame2.value().raw_frame.audio_data().begin(), + frame2.value().raw_frame.audio_data().end()); } } // TODO: is this reliable? @@ -736,14 +780,19 @@ struct decklink_consumer final : public IDeckLinkVideoOutputCallback tbb::parallel_for(-1, static_cast(secondary_port_contexts_.size()), [&](int i) { if (i == -1) { // Primary port - std::shared_ptr image_data = convert_frame_for_port(channel_format_desc_, - decklink_format_desc_, - config_.primary, - frame1, - frame2, - mode_->GetFieldDominance()); + // std::shared_ptr image_data = convert_frame_for_port(channel_format_desc_, + // decklink_format_desc_, + // config_.primary, + // frame1, + // frame2, + // mode_->GetFieldDominance()); - schedule_next_video(image_data, nb_samples, video_display_time); + auto buffers = frame1.value().frame.get(); + + std::shared_ptr image_data = create_aligned_buffer(decklink_format_desc_.size, 128); + std::memcpy(image_data.get(), buffers.at(0).data(), buffers.at(0).size()); + + schedule_next_video(image_data, bmdFormat10BitYUV, nb_samples, video_display_time); if (config_.embedded_audio) { schedule_next_audio(std::move(audio_data), nb_samples); @@ -772,9 +821,9 @@ struct decklink_consumer final : public IDeckLinkVideoOutputCallback return S_OK; } - core::const_frame pop() + std::optional pop() { - core::const_frame frame; + std::optional frame; { std::unique_lock lock(buffer_mutex_); buffer_cond_.wait(lock, [&] { return !buffer_.empty() || abort_request_; }); @@ -804,10 +853,13 @@ struct decklink_consumer final : public IDeckLinkVideoOutputCallback audio_scheduled_ += nb_samples; // TODO - what if there are too many/few samples in this frame? } - void schedule_next_video(std::shared_ptr image_data, int nb_samples, BMDTimeValue display_time) + void schedule_next_video(std::shared_ptr image_data, + BMDPixelFormat pixel_format, + int nb_samples, + BMDTimeValue display_time) { auto fill_frame = wrap_raw( - new decklink_frame(std::move(image_data), decklink_format_desc_, nb_samples)); + new decklink_frame(std::move(image_data), pixel_format, decklink_format_desc_, nb_samples)); if (FAILED(output_->ScheduleVideoFrame( get_raw(fill_frame), display_time, decklink_format_desc_.duration, decklink_format_desc_.time_scale))) { CASPAR_LOG(error) << print() << L" Failed to schedule primary video."; @@ -824,12 +876,14 @@ struct decklink_consumer final : public IDeckLinkVideoOutputCallback } if (frame) { + auto frame_future = frame_converter_->convert_from_rgba(frame, core::encoded_frame_format::decklink_v210); + std::unique_lock lock(buffer_mutex_); if (field != core::video_field::b) { // Always push a field2, as we have supplied field1 buffer_cond_.wait(lock, [&] { return buffer_.size() < buffer_capacity_ || abort_request_; }); } - buffer_.push(std::move(frame)); + buffer_.push(converted_frame(std::move(frame), frame_future)); } buffer_cond_.notify_all(); @@ -861,7 +915,8 @@ struct decklink_consumer_proxy : public core::frame_consumer executor executor_; public: - explicit decklink_consumer_proxy(const spl::shared_ptr& frame_converter, const configuration& config) + explicit decklink_consumer_proxy(const spl::shared_ptr& frame_converter, + const configuration& config) : frame_converter_(frame_converter) , config_(config) , executor_(L"decklink_consumer[" + std::to_wstring(config.primary.device_index) + L"]") @@ -906,8 +961,8 @@ struct decklink_consumer_proxy : public core::frame_consumer [[nodiscard]] core::monitor::state state() const override { return get_state_for_config(config_, format_desc_); } }; -spl::shared_ptr create_consumer(const std::vector& params, - const core::video_format_repository& format_repository, +spl::shared_ptr create_consumer(const std::vector& params, + const core::video_format_repository& format_repository, const spl::shared_ptr& frame_converter, const std::vector>& channels) { @@ -923,7 +978,7 @@ spl::shared_ptr create_consumer(const std::vector create_preconfigured_consumer(const boost::property_tree::wptree& ptree, const core::video_format_repository& format_repository, - const spl::shared_ptr& frame_converter, + const spl::shared_ptr& frame_converter, const std::vector>& channels) { configuration config = parse_xml_config(ptree, format_repository); diff --git a/src/modules/decklink/consumer/frame.cpp b/src/modules/decklink/consumer/frame.cpp index 57d55e6640..730885e705 100644 --- a/src/modules/decklink/consumer/frame.cpp +++ b/src/modules/decklink/consumer/frame.cpp @@ -31,7 +31,7 @@ namespace caspar { namespace decklink { std::shared_ptr convert_to_key_only(const std::shared_ptr& image_data, std::size_t byte_count) { - auto key_data = create_aligned_buffer(byte_count); + auto key_data = create_aligned_buffer(byte_count, 64); aligned_memshfl(key_data.get(), image_data.get(), byte_count, 0x0F0F0F0F, 0x0B0B0B0B, 0x07070707, 0x03030303); @@ -133,7 +133,7 @@ std::shared_ptr convert_frame_for_port(const core::video_format_desc& chan const core::const_frame& frame2, BMDFieldDominance field_dominance) { - std::shared_ptr image_data = create_aligned_buffer(decklink_format_desc.size); + std::shared_ptr image_data = create_aligned_buffer(decklink_format_desc.size, 64); if (field_dominance != bmdProgressiveFrame) { convert_frame(channel_format_desc, From 95e30ec843c4fd0ec0f3c082e0ca32bceae2f55a Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Fri, 29 Dec 2023 15:00:24 +0000 Subject: [PATCH 22/50] fix: remove bit_depth property from `array` --- src/accelerator/ogl/image/frame_converter.cpp | 10 +++++----- src/accelerator/ogl/image/image_mixer.cpp | 9 +++++---- src/accelerator/ogl/util/device.cpp | 16 +++++++--------- src/accelerator/ogl/util/device.h | 4 ++-- src/common/array.h | 17 ++--------------- src/common/bit_depth.h | 4 ++++ src/core/frame/pixel_format.h | 4 ++-- 7 files changed, 27 insertions(+), 37 deletions(-) diff --git a/src/accelerator/ogl/image/frame_converter.cpp b/src/accelerator/ogl/image/frame_converter.cpp index 6c58dc4af3..330b947a27 100644 --- a/src/accelerator/ogl/image/frame_converter.cpp +++ b/src/accelerator/ogl/image/frame_converter.cpp @@ -37,7 +37,7 @@ core::mutable_frame ogl_frame_converter::create_frame(const void* tag, const cor { std::vector> image_data; for (auto& plane : desc.planes) { - image_data.push_back(ogl_->create_array(plane.size, common::bit_depth::bit16)); // TODO: Depth + image_data.push_back(ogl_->create_array(plane.size)); } using future_texture = std::shared_future>; @@ -57,7 +57,7 @@ core::mutable_frame ogl_frame_converter::create_frame(const void* tag, const cor std::vector textures; for (int n = 0; n < static_cast(desc.planes.size()); ++n) { textures.emplace_back(self->ogl_->copy_async( - image_data[n], desc.planes[n].width, desc.planes[n].height, desc.planes[n].stride)); + image_data[n], desc.planes[n].width, desc.planes[n].height, desc.planes[n].stride, desc.planes[n].depth)); } return std::make_shared(std::move(textures)); }); @@ -81,13 +81,13 @@ ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, const cor auto row_bytes = row_blocks * 128; // TODO - result must be 128byte aligned. can that be guaranteed here? - buffers.push_back(ogl_->create_array(row_bytes * frame.height(), common::bit_depth::bit8)); + buffers.push_back(ogl_->create_array(row_bytes * frame.height())); x_count = row_blocks; y_count = frame.height(); break; } - if (buffers.size() == 0 || x_count == 0 || y_count == 0) { + if (buffers.empty() || x_count == 0 || y_count == 0) { CASPAR_THROW_EXCEPTION(not_supported() << msg_info("Unknown encoded frame format")); } @@ -97,7 +97,7 @@ ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, const cor size_t i = 0; for (auto& plane : frame.pixel_format_desc().planes) { // TODO - this is failing. is the buffer going the wrong direction causing it to fail? - auto texture = ogl_->copy_async(frame.image_data(i++), plane.width, plane.height, plane.stride); + auto texture = ogl_->copy_async(frame.image_data(i++), plane.width, plane.height, plane.stride, plane.depth); textures.push_back(texture.get()); } diff --git a/src/accelerator/ogl/image/image_mixer.cpp b/src/accelerator/ogl/image/image_mixer.cpp index 35324356c8..ae184ac818 100644 --- a/src/accelerator/ogl/image/image_mixer.cpp +++ b/src/accelerator/ogl/image/image_mixer.cpp @@ -90,7 +90,7 @@ class image_renderer { if (layers.empty()) { // Bypass GPU with empty frame. static const std::vector buffer(max_frame_size_ * 2, 0); // TODO better - return make_ready_future(array(buffer.data(), format_desc.size, true, depth_)); + return make_ready_future(array(buffer.data(), format_desc.size, true)); } return flatten(ogl_->dispatch_async([=]() mutable -> std::shared_future> { @@ -289,7 +289,8 @@ struct image_mixer::impl item.textures.emplace_back(ogl_->copy_async(frame.image_data(n), item.pix_desc.planes[n].width, item.pix_desc.planes[n].height, - item.pix_desc.planes[n].stride)); + item.pix_desc.planes[n].stride, + item.pix_desc.planes[n].depth)); } } @@ -326,7 +327,7 @@ struct image_mixer::impl { std::vector> image_data; for (auto& plane : desc.planes) { - image_data.push_back(ogl_->create_array(plane.size, plane.depth)); + image_data.push_back(ogl_->create_array(plane.size)); } std::weak_ptr weak_self = shared_from_this(); @@ -356,7 +357,7 @@ struct image_mixer::impl std::vector textures; for (int n = 0; n < static_cast(desc.planes.size()); ++n) { textures.emplace_back(self->ogl_->copy_async( - image_data[n], desc.planes[n].width, desc.planes[n].height, desc.planes[n].stride)); + image_data[n], desc.planes[n].width, desc.planes[n].height, desc.planes[n].stride, desc.planes[n].depth)); } return std::make_shared(std::move(textures)); } diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp index cd4e2a0f67..d0ff420655 100644 --- a/src/accelerator/ogl/util/device.cpp +++ b/src/accelerator/ogl/util/device.cpp @@ -223,18 +223,16 @@ struct device::impl : public std::enable_shared_from_this }); } - array create_array(int count, common::bit_depth depth) + array create_array(int count) { - auto bytes_per_pixel = static_cast(depth) + 1; - auto buf = create_buffer(count * bytes_per_pixel, true); + auto buf = create_buffer(count, true); auto ptr = reinterpret_cast(buf->data()); - return array(ptr, buf->size(), buf, depth); + return array(ptr, buf->size(), buf); } std::future> - copy_async(const array& source, int width, int height, int stride) + copy_async(const array& source, int width, int height, int stride, common::bit_depth depth) { - auto depth = source.native_depth(); return dispatch_async([=] { std::shared_ptr buf; @@ -516,11 +514,11 @@ std::shared_ptr device::create_texture(int width, int height, int strid { return impl_->create_texture(width, height, stride, depth, true); } -array device::create_array(int size, common::bit_depth depth) { return impl_->create_array(size, depth); } +array device::create_array(int size) { return impl_->create_array(size); } std::future> -device::copy_async(const array& source, int width, int height, int stride) +device::copy_async(const array& source, int width, int height, int stride, common::bit_depth depth) { - return impl_->copy_async(source, width, height, stride); + return impl_->copy_async(source, width, height, stride, depth); } std::future> device::copy_async(const std::shared_ptr& source) { diff --git a/src/accelerator/ogl/util/device.h b/src/accelerator/ogl/util/device.h index 7e245f73b5..b001756468 100644 --- a/src/accelerator/ogl/util/device.h +++ b/src/accelerator/ogl/util/device.h @@ -47,10 +47,10 @@ class device final device& operator=(const device&) = delete; std::shared_ptr create_texture(int width, int height, int stride, common::bit_depth depth); - array create_array(int size, common::bit_depth depth); + array create_array(int size); std::future> - copy_async(const array& source, int width, int height, int stride); + copy_async(const array& source, int width, int height, int stride, common::bit_depth depth); std::future> copy_async(const std::shared_ptr& source); std::future> diff --git a/src/common/array.h b/src/common/array.h index 97b0d411c2..10f27957c3 100644 --- a/src/common/array.h +++ b/src/common/array.h @@ -2,8 +2,6 @@ #include -#include "bit_depth.h" - #include #include #include @@ -43,11 +41,10 @@ class array final } template - explicit array(T* ptr, std::size_t size, S&& storage, common::bit_depth native_depth = common::bit_depth::bit8) + explicit array(T* ptr, std::size_t size, S&& storage) : ptr_(ptr) , size_(size) , storage_(std::make_shared(std::forward(storage))) - , native_depth_(native_depth) { } @@ -57,7 +54,6 @@ class array final : ptr_(other.ptr_) , size_(other.size_) , storage_(std::move(other.storage_)) - , native_depth_(other.native_depth_) { other.ptr_ = nullptr; other.size_ = 0; @@ -70,7 +66,6 @@ class array final ptr_ = std::move(other.ptr_); size_ = std::move(other.size_); storage_ = std::move(other.storage_); - native_depth_ = std::move(other.native_depth_); return *this; } @@ -79,7 +74,6 @@ class array final T* data() const { return ptr_; } T* end() const { return ptr_ + size_; } std::size_t size() const { return size_; } - common::bit_depth native_depth() const { return native_depth_; } explicit operator bool() const { return size_ > 0; }; @@ -92,7 +86,6 @@ class array final private: T* ptr_ = nullptr; std::size_t size_ = 0; - common::bit_depth native_depth_ = common::bit_depth::bit8; std::shared_ptr storage_; }; @@ -127,12 +120,10 @@ class array final template explicit array(const T* ptr, std::size_t size, - S&& storage, - common::bit_depth native_depth = common::bit_depth::bit8) + S&& storage) : ptr_(ptr) , size_(size) , storage_(std::make_shared(std::forward(storage))) - , native_depth_(native_depth) { } @@ -140,7 +131,6 @@ class array final : ptr_(other.ptr_) , size_(other.size_) , storage_(other.storage_) - , native_depth_(other.native_depth_) { } @@ -148,7 +138,6 @@ class array final : ptr_(other.ptr_) , size_(other.size_) , storage_(other.storage_) - , native_depth_(other.native_depth_) { other.ptr_ = nullptr; other.size_ = 0; @@ -167,7 +156,6 @@ class array final const T* data() const { return ptr_; } const T* end() const { return ptr_ + size_; } std::size_t size() const { return size_; } - common::bit_depth native_depth() const { return native_depth_; } explicit operator bool() const { return size_ > 0; } @@ -181,7 +169,6 @@ class array final const T* ptr_ = nullptr; std::size_t size_ = 0; std::shared_ptr storage_; - common::bit_depth native_depth_ = common::bit_depth::bit8; }; } // namespace caspar diff --git a/src/common/bit_depth.h b/src/common/bit_depth.h index 9d1b633f84..45c563e060 100644 --- a/src/common/bit_depth.h +++ b/src/common/bit_depth.h @@ -10,4 +10,8 @@ enum class bit_depth : uint8_t bit16 = 1, }; +inline int bytes_per_pixel(bit_depth depth){ + return static_cast(depth) + 1; +} + }} // namespace caspar::common \ No newline at end of file diff --git a/src/core/frame/pixel_format.h b/src/core/frame/pixel_format.h index d24fbdfeb4..5cad369a4c 100644 --- a/src/core/frame/pixel_format.h +++ b/src/core/frame/pixel_format.h @@ -66,10 +66,10 @@ struct pixel_format_desc final } plane(int width, int height, int stride, common::bit_depth depth) - : linesize(width * stride * (static_cast(depth) + 1)) + : linesize(width * stride * common::bytes_per_pixel(depth)) , width(width) , height(height) - , size(width * height * stride) + , size(width * height * stride * common::bytes_per_pixel(depth)) , stride(stride) , depth(depth) { From 6618c4b432c0d3eea14cf1f638faab7b91c3cdcb Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Fri, 29 Dec 2023 15:22:38 +0000 Subject: [PATCH 23/50] wip: hackily expose composited texture inside const_frame --- src/accelerator/ogl/image/frame_converter.cpp | 12 +++++----- src/accelerator/ogl/image/image_mixer.cpp | 23 +++++++++++-------- src/accelerator/ogl/image/image_mixer.h | 2 +- src/core/frame/frame.cpp | 13 +++++++++-- src/core/frame/frame.h | 4 ++++ src/core/mixer/image/image_mixer.h | 9 +++++++- src/core/mixer/mixer.cpp | 7 +++--- 7 files changed, 48 insertions(+), 22 deletions(-) diff --git a/src/accelerator/ogl/image/frame_converter.cpp b/src/accelerator/ogl/image/frame_converter.cpp index 330b947a27..8d8407d4cd 100644 --- a/src/accelerator/ogl/image/frame_converter.cpp +++ b/src/accelerator/ogl/image/frame_converter.cpp @@ -93,12 +93,12 @@ ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, const cor std::vector> textures; - // TODO - avoid this extra copy - size_t i = 0; - for (auto& plane : frame.pixel_format_desc().planes) { - // TODO - this is failing. is the buffer going the wrong direction causing it to fail? - auto texture = ogl_->copy_async(frame.image_data(i++), plane.width, plane.height, plane.stride, plane.depth); - textures.push_back(texture.get()); + { + auto texture_ptr = boost::any_cast>(frame.opaque()); + if (!texture_ptr) { + CASPAR_THROW_EXCEPTION(not_supported() << msg_info("No texture inside frame!")); + } + textures.push_back(std::move(texture_ptr)); } auto future_conversion = diff --git a/src/accelerator/ogl/image/image_mixer.cpp b/src/accelerator/ogl/image/image_mixer.cpp index ae184ac818..d5def0687f 100644 --- a/src/accelerator/ogl/image/image_mixer.cpp +++ b/src/accelerator/ogl/image/image_mixer.cpp @@ -85,20 +85,25 @@ class image_renderer { } - std::future> operator()(std::vector layers, + std::future operator()(std::vector layers, const core::video_format_desc& format_desc) { - if (layers.empty()) { // Bypass GPU with empty frame. - static const std::vector buffer(max_frame_size_ * 2, 0); // TODO better - return make_ready_future(array(buffer.data(), format_desc.size, true)); - } + // TODO - re-enable +// if (layers.empty()) { // Bypass GPU with empty frame. +// static const std::vector buffer(max_frame_size_ * 2, 0); // TODO better +// return make_ready_future(array(buffer.data(), format_desc.size, true)); +// } - return flatten(ogl_->dispatch_async([=]() mutable -> std::shared_future> { + return flatten(ogl_->dispatch_async([=]() mutable { auto target_texture = ogl_->create_texture(format_desc.width, format_desc.height, 4, depth_); draw(target_texture, std::move(layers), format_desc); - return ogl_->copy_async(target_texture); + auto bytes = ogl_->copy_async(target_texture).share(); + + return std::async(std::launch::deferred, [bytes = std::move(bytes), target_texture=std::move(target_texture)]() { + return core::mixed_image(bytes.get(), target_texture); + }); })); } @@ -303,7 +308,7 @@ struct image_mixer::impl layer_stack_.resize(transform_stack_.back().layer_depth); } - std::future> render(const core::video_format_desc& format_desc) + std::future render(const core::video_format_desc& format_desc) { return renderer_(std::move(layers_), format_desc); } @@ -379,7 +384,7 @@ image_mixer::~image_mixer() {} void image_mixer::push(const core::frame_transform& transform) { impl_->push(transform); } void image_mixer::visit(const core::const_frame& frame) { impl_->visit(frame); } void image_mixer::pop() { impl_->pop(); } -std::future> image_mixer::operator()(const core::video_format_desc& format_desc) +std::future image_mixer::operator()(const core::video_format_desc& format_desc) { return impl_->render(format_desc); } diff --git a/src/accelerator/ogl/image/image_mixer.h b/src/accelerator/ogl/image/image_mixer.h index a29873e352..6fe916f775 100644 --- a/src/accelerator/ogl/image/image_mixer.h +++ b/src/accelerator/ogl/image/image_mixer.h @@ -44,7 +44,7 @@ class image_mixer final : public core::image_mixer image_mixer& operator=(const image_mixer&) = delete; - std::future> operator()(const core::video_format_desc& format_desc) override; + std::future operator()(const core::video_format_desc& format_desc) override; core::mutable_frame create_frame(const void* tag, const core::pixel_format_desc& desc) override; spl::shared_ptr create_frame_converter() override; diff --git a/src/core/frame/frame.cpp b/src/core/frame/frame.cpp index 4eb5193f18..ff96dce561 100644 --- a/src/core/frame/frame.cpp +++ b/src/core/frame/frame.cpp @@ -97,10 +97,12 @@ struct const_frame::impl impl(std::vector> image_data, array audio_data, - const core::pixel_format_desc& desc) + const core::pixel_format_desc& desc, + boost::any opaque) : image_data_(std::move(image_data)) , audio_data_(std::move(audio_data)) , desc_(desc) + , opaque_(opaque) { if (desc_.planes.size() != image_data_.size()) { CASPAR_THROW_EXCEPTION(invalid_argument()); @@ -147,7 +149,14 @@ const_frame::const_frame() {} const_frame::const_frame(std::vector> image_data, array audio_data, const core::pixel_format_desc& desc) - : impl_(new impl(std::move(image_data), std::move(audio_data), desc)) + : impl_(new impl(std::move(image_data), std::move(audio_data), desc, nullptr)) +{ +} +const_frame::const_frame(std::vector> image_data, + array audio_data, + const struct pixel_format_desc& desc, + boost::any opaque) + : impl_(new impl(std::move(image_data), std::move(audio_data), desc, opaque)) { } const_frame::const_frame(mutable_frame&& other) diff --git a/src/core/frame/frame.h b/src/core/frame/frame.h index e5fc4ef60c..0a5774d48f 100644 --- a/src/core/frame/frame.h +++ b/src/core/frame/frame.h @@ -61,6 +61,10 @@ class const_frame final explicit const_frame(std::vector> image_data, array audio_data, const struct pixel_format_desc& desc); + explicit const_frame(std::vector> image_data, + array audio_data, + const struct pixel_format_desc& desc, + boost::any opaque); const_frame(const const_frame& other); const_frame(mutable_frame&& other); diff --git a/src/core/mixer/image/image_mixer.h b/src/core/mixer/image/image_mixer.h index 494bd8ad5f..30919913ac 100644 --- a/src/core/mixer/image/image_mixer.h +++ b/src/core/mixer/image/image_mixer.h @@ -30,6 +30,13 @@ namespace caspar { namespace core { +struct mixed_image{ + array rgba8; + boost::any texture; + + mixed_image(array rgba8, boost::any texture):rgba8(rgba8),texture(texture){} +}; + class image_mixer : public frame_visitor , public frame_factory @@ -45,7 +52,7 @@ class image_mixer void visit(const class const_frame& frame) override = 0; void pop() override = 0; - virtual std::future> operator()(const struct video_format_desc& format_desc) = 0; + virtual std::future operator()(const struct video_format_desc& format_desc) = 0; class mutable_frame create_frame(const void* tag, const struct pixel_format_desc& desc) override = 0; diff --git a/src/core/mixer/mixer.cpp b/src/core/mixer/mixer.cpp index 1095c2108a..70a95bfc3d 100644 --- a/src/core/mixer/mixer.cpp +++ b/src/core/mixer/mixer.cpp @@ -75,11 +75,12 @@ struct mixer::impl buffer_.push(std::async( std::launch::deferred, [image = std::move(image), audio = std::move(audio), graph = graph_, format_desc, tag = this]() mutable { + auto image2 = image.get(); auto desc = pixel_format_desc(pixel_format::bgra); - desc.planes.push_back(pixel_format_desc::plane(format_desc.width, format_desc.height, 4)); + desc.planes.emplace_back(format_desc.width, format_desc.height, 4); std::vector> image_data; - image_data.emplace_back(std::move(image.get())); - return const_frame(std::move(image_data), std::move(audio), desc); + image_data.emplace_back(std::move(image2.rgba8)); + return const_frame(std::move(image_data), std::move(audio), desc, image2.texture); })); if (buffer_.size() <= format_desc.field_count) { From 1e3b38a6ca6088cc42dd046667c7aff17f313470 Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Fri, 29 Dec 2023 15:44:03 +0000 Subject: [PATCH 24/50] fix --- src/accelerator/ogl/util/device.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp index d0ff420655..b443a18c8f 100644 --- a/src/accelerator/ogl/util/device.cpp +++ b/src/accelerator/ogl/util/device.cpp @@ -341,7 +341,7 @@ struct device::impl : public std::enable_shared_from_this glBindBufferBase(GL_SHADER_STORAGE_BUFFER, i + 1, tmp->get()->id()); } - compute_to_rgba_->use(); + compute_from_rgba_->use(); glDispatchCompute((unsigned int)x_count, (unsigned int)y_count, 1); From 25fad4536a5d10ab13eaa77378a89ad5efb101cd Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Fri, 29 Dec 2023 17:32:46 +0000 Subject: [PATCH 25/50] wip: incorrect conversion, but something semi identifiable --- src/accelerator/ogl/image/frame_converter.cpp | 25 ++--- src/accelerator/ogl/image/image_mixer.cpp | 4 +- .../ogl/image/shader_from_rgba.comp | 97 +++++++++++++++++-- src/accelerator/ogl/util/device.cpp | 35 ++++--- src/accelerator/ogl/util/device.h | 12 ++- src/shell/server.cpp | 2 +- 6 files changed, 135 insertions(+), 40 deletions(-) diff --git a/src/accelerator/ogl/image/frame_converter.cpp b/src/accelerator/ogl/image/frame_converter.cpp index 8d8407d4cd..6e5d9277b9 100644 --- a/src/accelerator/ogl/image/frame_converter.cpp +++ b/src/accelerator/ogl/image/frame_converter.cpp @@ -75,15 +75,18 @@ ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, const cor std::vector> buffers; int x_count = 0; int y_count = 0; + int words_per_line = 0; + switch (format) { case core::encoded_frame_format::decklink_v210: auto row_blocks = ((frame.width() + 47) / 48); auto row_bytes = row_blocks * 128; // TODO - result must be 128byte aligned. can that be guaranteed here? - buffers.push_back(ogl_->create_array(row_bytes * frame.height())); - x_count = row_blocks; + buffers.emplace_back(ogl_->create_array(row_bytes * frame.height())); + x_count = row_blocks * 8; y_count = frame.height(); + words_per_line = row_blocks * 32; break; } @@ -91,18 +94,18 @@ ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, const cor CASPAR_THROW_EXCEPTION(not_supported() << msg_info("Unknown encoded frame format")); } - std::vector> textures; - - { - auto texture_ptr = boost::any_cast>(frame.opaque()); - if (!texture_ptr) { - CASPAR_THROW_EXCEPTION(not_supported() << msg_info("No texture inside frame!")); - } - textures.push_back(std::move(texture_ptr)); + auto texture_ptr = boost::any_cast>(frame.opaque()); + if (!texture_ptr) { + CASPAR_THROW_EXCEPTION(not_supported() << msg_info("No texture inside frame!")); } + convert_from_texture_description description{}; + description.width = frame.width(); + description.height = frame.height(); + description.words_per_line = words_per_line; + auto future_conversion = - ogl_->convert_from_texture(textures, buffers, frame.width(), frame.height(), x_count, y_count); + ogl_->convert_from_texture(texture_ptr, buffers, description, x_count, y_count); return std::async(std::launch::deferred, [buffers = std::move(buffers), future_conversion = std::move(future_conversion)]() mutable { diff --git a/src/accelerator/ogl/image/image_mixer.cpp b/src/accelerator/ogl/image/image_mixer.cpp index d5def0687f..bbd5a4f1d4 100644 --- a/src/accelerator/ogl/image/image_mixer.cpp +++ b/src/accelerator/ogl/image/image_mixer.cpp @@ -101,8 +101,8 @@ class image_renderer auto bytes = ogl_->copy_async(target_texture).share(); - return std::async(std::launch::deferred, [bytes = std::move(bytes), target_texture=std::move(target_texture)]() { - return core::mixed_image(bytes.get(), target_texture); + return std::async(std::launch::deferred, [bytes = std::move(bytes), target_texture = std::move(target_texture)]() { + return core::mixed_image(bytes.get(), std::move(target_texture)); }); })); } diff --git a/src/accelerator/ogl/image/shader_from_rgba.comp b/src/accelerator/ogl/image/shader_from_rgba.comp index d17e3eff0a..749203e993 100644 --- a/src/accelerator/ogl/image/shader_from_rgba.comp +++ b/src/accelerator/ogl/image/shader_from_rgba.comp @@ -4,17 +4,100 @@ layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in; layout(rgba16f, binding = 0) uniform image2D imgInput; // Note: this needs to match what it is writing to -layout(std430, binding = 1) buffer bufferOutput +layout(std430, binding = 1) buffer buffer_layout { - uint8 data[]; + uint bufferOutput[]; }; +layout(std430, binding = 2) buffer description_layout +{ + // This must match convert_from_texture_description in device.h + uint frame_width; + uint frame_height; + uint words_per_line; +}; + +vec3 rgba_to_bt709(vec4 pixel) { + float KR = 0.2126; + float KB = 0.0722; + float KG = 1.0 - KR - KB; + + float KRi = 1.0 - KR; + float KBi = 1.0 - KB; + + float YRange = 219.0 / 256.0; + float CbCrRange = 224.0 / 256.0; + float HalfCbCrRange = CbCrRange / 2.0; + + float YOffset = 16.0 / 256.0; + float CbCrOffset = 0.5; + + float KRoKBi = KR / KBi; + float KGoKBi = KG / KBi; + float KBoKRi = KB / KRi; + float KGoKRi = KG / KRi; + +// vec3 y_range = vec3(KR * YRange, KG * YRange, KB * YRange); +// vec3 y_offset = vec3(YOffset, YOffset, YOffset); + + float y16 = KR * pixel.r + KG * pixel.g + KB * pixel.b; + float y = (YOffset + y16 * YRange); + + float cb16 = -KRoKBi * pixel.r - KGoKBi * pixel.g + pixel.b; + float cb = (CbCrOffset + cb16 * HalfCbCrRange); + cb = 0.5; + + float cr16 = pixel.r - KGoKRi * pixel.g - KBoKRi * pixel.b; + float cr = (CbCrOffset + cr16 * HalfCbCrRange); + cr = 0.5; + + return vec3(y, cb, cr); +} + +uint to_10bit(float val) { + return uint(round(clamp(val, 0, 1) * 1023)); +} + +void decklink_v210() { + // basic coordinates + uint y_offset = gl_GlobalInvocationID.y * words_per_line; + uint x_offset = gl_GlobalInvocationID.x * 4; // 4 bytes per op + uint offset = y_offset + x_offset; + + uint image_x = gl_GlobalInvocationID.x * 6; + + // sample image + vec4 sample0 = imageLoad(imgInput, ivec2(image_x, gl_GlobalInvocationID.y)); + vec4 sample1 = imageLoad(imgInput, ivec2(image_x+1, gl_GlobalInvocationID.y)); + vec4 sample2 = imageLoad(imgInput, ivec2(image_x+2, gl_GlobalInvocationID.y)); + vec4 sample3 = imageLoad(imgInput, ivec2(image_x+3, gl_GlobalInvocationID.y)); + vec4 sample4 = imageLoad(imgInput, ivec2(image_x+4, gl_GlobalInvocationID.y)); + vec4 sample5 = imageLoad(imgInput, ivec2(image_x+5, gl_GlobalInvocationID.y)); + vec3 ycbcr0 = rgba_to_bt709(sample0); + vec3 ycbcr1 = rgba_to_bt709(sample1); + vec3 ycbcr2 = rgba_to_bt709(sample2); + vec3 ycbcr3 = rgba_to_bt709(sample3); + vec3 ycbcr4 = rgba_to_bt709(sample4); + vec3 ycbcr5 = rgba_to_bt709(sample5); + + // compute words +// uint word0 = to_10bit(ycbcr0.t) + (to_10bit(ycbcr0.s) << 10) + (to_10bit(ycbcr0.p) << 20); +// uint word1 = to_10bit(ycbcr1.s) + (to_10bit(ycbcr2.t) << 10) + (to_10bit(ycbcr2.s) << 20); +// uint word2 = to_10bit(ycbcr2.p) + (to_10bit(ycbcr3.s) << 10) + (to_10bit(ycbcr4.t) << 20); +// uint word3 = to_10bit(ycbcr4.s) + (to_10bit(ycbcr4.p) << 10) + (to_10bit(ycbcr5.s) << 20); + uint word0 = 512 + (to_10bit(ycbcr0.s) << 10) + (512 << 20); + uint word1 = to_10bit(ycbcr1.s) + (512 << 10) + (to_10bit(ycbcr2.s) << 20); + uint word2 = 512 + (to_10bit(ycbcr3.s) << 10) + (512 << 20); + uint word3 = to_10bit(ycbcr4.s) + (512 << 10) + (to_10bit(ycbcr5.s) << 20); + + bufferOutput[offset+0] = word0; + bufferOutput[offset+1] = word1; + bufferOutput[offset+2] = word2; + bufferOutput[offset+3] = word3; +} + void main() { - vec4 value = vec4(0.0, 0.0, 0.0, 1.0); ivec2 texelCoord = ivec2(gl_GlobalInvocationID.xy); - value.r = float(texelCoord.x)/(gl_NumWorkGroups.x); - value.g = float(texelCoord.y)/(gl_NumWorkGroups.y); - - imageStore(imgOutput, texelCoord, value); + decklink_v210(); } \ No newline at end of file diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp index b443a18c8f..33794db80a 100644 --- a/src/accelerator/ogl/util/device.cpp +++ b/src/accelerator/ogl/util/device.cpp @@ -314,10 +314,11 @@ struct device::impl : public std::enable_shared_from_this }); } - std::future convert_from_texture(const std::vector>& textures, + + + std::future convert_from_texture(const std::shared_ptr& texture, const std::vector>& buffers, - int width, - int height, + const convert_from_texture_description& description, int x_count, int y_count) { @@ -325,21 +326,24 @@ struct device::impl : public std::enable_shared_from_this if (!compute_from_rgba_) compute_from_rgba_ = std::make_unique(std::string(compute_from_rgba_shader)); - // TODO: This probably only needs to handle one texture - // for (size_t i = 0; i < textures.size(); i++) { - auto& tex = textures[0]; - glBindImageTexture(0, tex->id(), 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA16F); - // } + // single input texture + glBindImageTexture(0, texture->id(), 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA16F); - for (size_t i = 0; i < buffers.size(); i++) { - auto& source = buffers[i]; + // TODO: only a single buffer? +// for (size_t i = 0; i < buffers.size(); i++) { + auto& source = buffers[0]; auto tmp = source.storage>(); if (!tmp) { CASPAR_THROW_EXCEPTION(caspar_exception() << msg_info("Buffer is not gpu backed")); } - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, i + 1, tmp->get()->id()); - } + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, tmp->get()->id()); +// } + + // TODO - binding 2 description + auto description_buffer = create_buffer(sizeof(convert_from_texture_description), false); + std::memcpy(description_buffer->data(), &description, sizeof (convert_from_texture_description)); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, description_buffer->id()); compute_from_rgba_->use(); @@ -529,14 +533,13 @@ device::convert_frame(const std::vector>& sources, int widt { return impl_->convert_frame(sources, width, height, format); } -std::future device::convert_from_texture(const std::vector>& textures, +std::future device::convert_from_texture(const std::shared_ptr& texture, const std::vector>& buffers, - int width, - int height, + const convert_from_texture_description& description, int x_count, int y_count) { - return impl_->convert_from_texture(textures, buffers, width, height, x_count, y_count); + return impl_->convert_from_texture(texture, buffers, description, x_count, y_count); } void device::dispatch(std::function func) { boost::asio::dispatch(impl_->service_, std::move(func)); } std::wstring device::version() const { return impl_->version(); } diff --git a/src/accelerator/ogl/util/device.h b/src/accelerator/ogl/util/device.h index b001756468..73d8ddd14c 100644 --- a/src/accelerator/ogl/util/device.h +++ b/src/accelerator/ogl/util/device.h @@ -34,6 +34,13 @@ namespace caspar { namespace accelerator { namespace ogl { +// This must match description_layout in shader_from_rgba.comp +struct convert_from_texture_description { + uint32_t width; + uint32_t height; + uint32_t words_per_line; +}; + class device final : public std::enable_shared_from_this , public accelerator_device @@ -56,10 +63,9 @@ class device final std::future> convert_frame(const std::vector>& sources, int width, int height, int format); - std::future convert_from_texture(const std::vector>& textures, + std::future convert_from_texture(const std::shared_ptr& texture, const std::vector>& buffers, - int width, - int height, + const convert_from_texture_description& description, int x_count, int y_count); diff --git a/src/shell/server.cpp b/src/shell/server.cpp index f22e0cc9e2..149adbd793 100644 --- a/src/shell/server.cpp +++ b/src/shell/server.cpp @@ -264,7 +264,7 @@ struct server::impl auto channel = spl::make_shared(channel_id, format_desc, - accelerator_.create_image_mixer(channel_id, common::bit_depth::bit8), + accelerator_.create_image_mixer(channel_id, common::bit_depth::bit16), [channel_id, weak_client](core::monitor::state channel_state) { monitor::state state; state[""]["channel"][channel_id] = channel_state; From e7fc480ab77b0e5ddcd605f28ec15f25c4ae384f Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Fri, 29 Dec 2023 17:50:16 +0000 Subject: [PATCH 26/50] fix colour and 8bit texture support --- src/accelerator/ogl/image/frame_converter.cpp | 1 + .../ogl/image/shader_from_rgba.comp | 54 ++++++++----------- src/accelerator/ogl/util/device.cpp | 21 ++++++-- src/accelerator/ogl/util/device.h | 1 + 4 files changed, 42 insertions(+), 35 deletions(-) diff --git a/src/accelerator/ogl/image/frame_converter.cpp b/src/accelerator/ogl/image/frame_converter.cpp index 6e5d9277b9..f74d1f614f 100644 --- a/src/accelerator/ogl/image/frame_converter.cpp +++ b/src/accelerator/ogl/image/frame_converter.cpp @@ -100,6 +100,7 @@ ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, const cor } convert_from_texture_description description{}; + description.is_16_bit = texture_ptr->depth() == common::bit_depth::bit16; description.width = frame.width(); description.height = frame.height(); description.words_per_line = words_per_line; diff --git a/src/accelerator/ogl/image/shader_from_rgba.comp b/src/accelerator/ogl/image/shader_from_rgba.comp index 749203e993..06ac54c6d3 100644 --- a/src/accelerator/ogl/image/shader_from_rgba.comp +++ b/src/accelerator/ogl/image/shader_from_rgba.comp @@ -2,21 +2,31 @@ layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in; -layout(rgba16f, binding = 0) uniform image2D imgInput; // Note: this needs to match what it is writing to +layout(rgba16, binding = 0) uniform image2D imgInput16bit; // Note: this needs to match what it is writing to +layout(rgba8, binding = 1) uniform image2D imgInput8bit; // Note: this needs to match what it is writing to -layout(std430, binding = 1) buffer buffer_layout +layout(std430, binding = 2) buffer buffer_layout { uint bufferOutput[]; }; -layout(std430, binding = 2) buffer description_layout +layout(std430, binding = 3) buffer description_layout { // This must match convert_from_texture_description in device.h + bool is_16_bit; uint frame_width; uint frame_height; uint words_per_line; }; +vec4 read_pixel(ivec2 coord) { + if (is_16_bit){ + return imageLoad(imgInput16bit, coord); + } else { + return imageLoad(imgInput8bit, coord); + } +} + vec3 rgba_to_bt709(vec4 pixel) { float KR = 0.2126; float KB = 0.0722; @@ -45,11 +55,9 @@ vec3 rgba_to_bt709(vec4 pixel) { float cb16 = -KRoKBi * pixel.r - KGoKBi * pixel.g + pixel.b; float cb = (CbCrOffset + cb16 * HalfCbCrRange); - cb = 0.5; float cr16 = pixel.r - KGoKRi * pixel.g - KBoKRi * pixel.b; float cr = (CbCrOffset + cr16 * HalfCbCrRange); - cr = 0.5; return vec3(y, cb, cr); } @@ -63,37 +71,21 @@ void decklink_v210() { uint y_offset = gl_GlobalInvocationID.y * words_per_line; uint x_offset = gl_GlobalInvocationID.x * 4; // 4 bytes per op uint offset = y_offset + x_offset; - uint image_x = gl_GlobalInvocationID.x * 6; // sample image - vec4 sample0 = imageLoad(imgInput, ivec2(image_x, gl_GlobalInvocationID.y)); - vec4 sample1 = imageLoad(imgInput, ivec2(image_x+1, gl_GlobalInvocationID.y)); - vec4 sample2 = imageLoad(imgInput, ivec2(image_x+2, gl_GlobalInvocationID.y)); - vec4 sample3 = imageLoad(imgInput, ivec2(image_x+3, gl_GlobalInvocationID.y)); - vec4 sample4 = imageLoad(imgInput, ivec2(image_x+4, gl_GlobalInvocationID.y)); - vec4 sample5 = imageLoad(imgInput, ivec2(image_x+5, gl_GlobalInvocationID.y)); - vec3 ycbcr0 = rgba_to_bt709(sample0); - vec3 ycbcr1 = rgba_to_bt709(sample1); - vec3 ycbcr2 = rgba_to_bt709(sample2); - vec3 ycbcr3 = rgba_to_bt709(sample3); - vec3 ycbcr4 = rgba_to_bt709(sample4); - vec3 ycbcr5 = rgba_to_bt709(sample5); + vec3 ycbcr0 = rgba_to_bt709(read_pixel(ivec2(image_x + 0, gl_GlobalInvocationID.y))); + vec3 ycbcr1 = rgba_to_bt709(read_pixel(ivec2(image_x + 1, gl_GlobalInvocationID.y))); + vec3 ycbcr2 = rgba_to_bt709(read_pixel(ivec2(image_x + 2, gl_GlobalInvocationID.y))); + vec3 ycbcr3 = rgba_to_bt709(read_pixel(ivec2(image_x + 3, gl_GlobalInvocationID.y))); + vec3 ycbcr4 = rgba_to_bt709(read_pixel(ivec2(image_x + 4, gl_GlobalInvocationID.y))); + vec3 ycbcr5 = rgba_to_bt709(read_pixel(ivec2(image_x + 5, gl_GlobalInvocationID.y))); // compute words -// uint word0 = to_10bit(ycbcr0.t) + (to_10bit(ycbcr0.s) << 10) + (to_10bit(ycbcr0.p) << 20); -// uint word1 = to_10bit(ycbcr1.s) + (to_10bit(ycbcr2.t) << 10) + (to_10bit(ycbcr2.s) << 20); -// uint word2 = to_10bit(ycbcr2.p) + (to_10bit(ycbcr3.s) << 10) + (to_10bit(ycbcr4.t) << 20); -// uint word3 = to_10bit(ycbcr4.s) + (to_10bit(ycbcr4.p) << 10) + (to_10bit(ycbcr5.s) << 20); - uint word0 = 512 + (to_10bit(ycbcr0.s) << 10) + (512 << 20); - uint word1 = to_10bit(ycbcr1.s) + (512 << 10) + (to_10bit(ycbcr2.s) << 20); - uint word2 = 512 + (to_10bit(ycbcr3.s) << 10) + (512 << 20); - uint word3 = to_10bit(ycbcr4.s) + (512 << 10) + (to_10bit(ycbcr5.s) << 20); - - bufferOutput[offset+0] = word0; - bufferOutput[offset+1] = word1; - bufferOutput[offset+2] = word2; - bufferOutput[offset+3] = word3; + bufferOutput[offset + 0] = to_10bit(ycbcr0.t) + (to_10bit(ycbcr0.s) << 10) + (to_10bit(ycbcr0.p) << 20); + bufferOutput[offset + 1] = to_10bit(ycbcr1.s) + (to_10bit(ycbcr2.t) << 10) + (to_10bit(ycbcr2.s) << 20); + bufferOutput[offset + 2] = to_10bit(ycbcr2.p) + (to_10bit(ycbcr3.s) << 10) + (to_10bit(ycbcr4.t) << 20); + bufferOutput[offset + 3] = to_10bit(ycbcr4.s) + (to_10bit(ycbcr4.p) << 10) + (to_10bit(ycbcr5.s) << 20); } void main() { diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp index 33794db80a..077400658b 100644 --- a/src/accelerator/ogl/util/device.cpp +++ b/src/accelerator/ogl/util/device.cpp @@ -327,7 +327,20 @@ struct device::impl : public std::enable_shared_from_this compute_from_rgba_ = std::make_unique(std::string(compute_from_rgba_shader)); // single input texture - glBindImageTexture(0, texture->id(), 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA16F); + GLuint texid_8bit = 0; + GLuint texid_16bit = 0; + + switch(texture->depth()) { + case common::bit_depth::bit8: + texid_8bit = texture->id(); + break; + case common::bit_depth::bit16: + texid_16bit = texture->id(); + break; + } + + GL(glBindImageTexture(0, texid_16bit, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA16)); + GL(glBindImageTexture(1, texid_8bit, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA8)); // TODO: only a single buffer? // for (size_t i = 0; i < buffers.size(); i++) { @@ -337,17 +350,17 @@ struct device::impl : public std::enable_shared_from_this CASPAR_THROW_EXCEPTION(caspar_exception() << msg_info("Buffer is not gpu backed")); } - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, tmp->get()->id()); + GL(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, tmp->get()->id())); // } // TODO - binding 2 description auto description_buffer = create_buffer(sizeof(convert_from_texture_description), false); std::memcpy(description_buffer->data(), &description, sizeof (convert_from_texture_description)); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, description_buffer->id()); + GL(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, description_buffer->id())); compute_from_rgba_->use(); - glDispatchCompute((unsigned int)x_count, (unsigned int)y_count, 1); + GL(glDispatchCompute((unsigned int)x_count, (unsigned int)y_count, 1)); auto fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); diff --git a/src/accelerator/ogl/util/device.h b/src/accelerator/ogl/util/device.h index 73d8ddd14c..f3fdcf79d9 100644 --- a/src/accelerator/ogl/util/device.h +++ b/src/accelerator/ogl/util/device.h @@ -36,6 +36,7 @@ namespace caspar { namespace accelerator { namespace ogl { // This must match description_layout in shader_from_rgba.comp struct convert_from_texture_description { + bool is_16_bit; uint32_t width; uint32_t height; uint32_t words_per_line; From 030308ffae96fcdacded12b097f451c5516ef40b Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Fri, 29 Dec 2023 17:56:03 +0000 Subject: [PATCH 27/50] fix: rgba8 download was incorrectly 16bit packed --- src/accelerator/ogl/image/image_mixer.cpp | 2 +- src/accelerator/ogl/util/device.cpp | 8 ++++---- src/accelerator/ogl/util/device.h | 2 +- src/accelerator/ogl/util/texture.cpp | 7 ++++--- src/accelerator/ogl/util/texture.h | 1 + 5 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/accelerator/ogl/image/image_mixer.cpp b/src/accelerator/ogl/image/image_mixer.cpp index bbd5a4f1d4..223b665730 100644 --- a/src/accelerator/ogl/image/image_mixer.cpp +++ b/src/accelerator/ogl/image/image_mixer.cpp @@ -99,7 +99,7 @@ class image_renderer draw(target_texture, std::move(layers), format_desc); - auto bytes = ogl_->copy_async(target_texture).share(); + auto bytes = ogl_->copy_async(target_texture, true).share(); return std::async(std::launch::deferred, [bytes = std::move(bytes), target_texture = std::move(target_texture)]() { return core::mixed_image(bytes.get(), std::move(target_texture)); diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp index 077400658b..e42e646d5c 100644 --- a/src/accelerator/ogl/util/device.cpp +++ b/src/accelerator/ogl/util/device.cpp @@ -252,11 +252,11 @@ struct device::impl : public std::enable_shared_from_this }); } - std::future> copy_async(const std::shared_ptr& source) + std::future> copy_async(const std::shared_ptr& source, bool as_rgba8) { return spawn_async([=](yield_context yield) { auto buf = create_buffer(source->size(), false); - source->copy_to(*buf); + source->copy_to(*buf, as_rgba8? common::bit_depth::bit8 : source->depth()); sync_queue_.push(nullptr); @@ -537,9 +537,9 @@ device::copy_async(const array& source, int width, int height, in { return impl_->copy_async(source, width, height, stride, depth); } -std::future> device::copy_async(const std::shared_ptr& source) +std::future> device::copy_async(const std::shared_ptr& source, bool as_rgba8) { - return impl_->copy_async(source); + return impl_->copy_async(source, as_rgba8); } std::future> device::convert_frame(const std::vector>& sources, int width, int height, int format) diff --git a/src/accelerator/ogl/util/device.h b/src/accelerator/ogl/util/device.h index f3fdcf79d9..e457bdc626 100644 --- a/src/accelerator/ogl/util/device.h +++ b/src/accelerator/ogl/util/device.h @@ -59,7 +59,7 @@ class device final std::future> copy_async(const array& source, int width, int height, int stride, common::bit_depth depth); - std::future> copy_async(const std::shared_ptr& source); + std::future> copy_async(const std::shared_ptr& source, bool as_rgba8); std::future> convert_frame(const std::vector>& sources, int width, int height, int format); diff --git a/src/accelerator/ogl/util/texture.cpp b/src/accelerator/ogl/util/texture.cpp index 478fc9b4f3..a430ac05fc 100644 --- a/src/accelerator/ogl/util/texture.cpp +++ b/src/accelerator/ogl/util/texture.cpp @@ -109,10 +109,10 @@ struct texture::impl src.unbind(); } - void copy_to(buffer& dst) + void copy_to(buffer& dst, common::bit_depth depth) { dst.bind(); - GL(glGetTextureImage(id_, 0, FORMAT[stride_], TYPE[static_cast(depth_)][stride_], size_, nullptr)); + GL(glGetTextureImage(id_, 0, FORMAT[stride_], TYPE[static_cast(depth)][stride_], size_, nullptr)); dst.unbind(); } }; @@ -139,7 +139,8 @@ void texture::clear() { impl_->clear(); } void texture::copy_from(int source) { impl_->copy_from(source); } #endif void texture::copy_from(buffer& source) { impl_->copy_from(source); } -void texture::copy_to(buffer& dest) { impl_->copy_to(dest); } +void texture::copy_to(class buffer& dest, common::bit_depth depth) { impl_->copy_to(dest, depth); } +void texture::copy_to(buffer& dest) { impl_->copy_to(dest, impl_->depth_); } int texture::width() const { return impl_->width_; } int texture::height() const { return impl_->height_; } int texture::stride() const { return impl_->stride_; } diff --git a/src/accelerator/ogl/util/texture.h b/src/accelerator/ogl/util/texture.h index ff2c117f73..b5e7d05237 100644 --- a/src/accelerator/ogl/util/texture.h +++ b/src/accelerator/ogl/util/texture.h @@ -41,6 +41,7 @@ class texture final void copy_from(int source); #endif void copy_from(class buffer& source); + void copy_to(class buffer& dest, common::bit_depth depth); void copy_to(class buffer& dest); void attach(); From d6704df224ff2632b3d924a70c3a89b223f5db9a Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Fri, 29 Dec 2023 17:56:56 +0000 Subject: [PATCH 28/50] fix: remove unused windows only gl code --- src/accelerator/ogl/util/device.cpp | 31 ---------------------------- src/accelerator/ogl/util/texture.cpp | 11 ---------- src/accelerator/ogl/util/texture.h | 3 --- 3 files changed, 45 deletions(-) diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp index e42e646d5c..bf2821a7f8 100644 --- a/src/accelerator/ogl/util/device.cpp +++ b/src/accelerator/ogl/util/device.cpp @@ -382,37 +382,6 @@ struct device::impl : public std::enable_shared_from_this }); } -#ifdef WIN32 - std::future> copy_async(GLuint source, int width, int height, int stride) - { - return spawn_async([=](yield_context yield) { - auto tex = create_texture(width, height, stride, false); - - tex->copy_from(source); - - auto fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); - - GL(glFlush()); - - deadline_timer timer(service_); - for (auto n = 0; true; ++n) { - // TODO (perf) Smarter non-polling solution? - timer.expires_from_now(boost::posix_time::milliseconds(2)); - timer.async_wait(yield); - - auto wait = glClientWaitSync(fence, 0, 1); - if (wait == GL_ALREADY_SIGNALED || wait == GL_CONDITION_SATISFIED) { - break; - } - } - - glDeleteSync(fence); - - return tex; - }); - } -#endif - boost::property_tree::wptree info() const { boost::property_tree::wptree info; diff --git a/src/accelerator/ogl/util/texture.cpp b/src/accelerator/ogl/util/texture.cpp index a430ac05fc..4c9e8f0342 100644 --- a/src/accelerator/ogl/util/texture.cpp +++ b/src/accelerator/ogl/util/texture.cpp @@ -85,14 +85,6 @@ struct texture::impl void clear() { GL(glClearTexImage(id_, 0, FORMAT[stride_], TYPE[static_cast(depth_)][stride_], nullptr)); } -#ifdef WIN32 - void copy_from(int texture_id) - { - GL(glCopyImageSubData( - texture_id, GL_TEXTURE_2D, 0, 0, 0, 0, id_, GL_TEXTURE_2D, 0, 0, 0, 0, width_, height_, 1)); - } -#endif - void copy_from(buffer& src) { src.bind(); @@ -135,9 +127,6 @@ void texture::bind(int index) { impl_->bind(index); } void texture::unbind() { impl_->unbind(); } void texture::attach() { impl_->attach(); } void texture::clear() { impl_->clear(); } -#ifdef WIN32 -void texture::copy_from(int source) { impl_->copy_from(source); } -#endif void texture::copy_from(buffer& source) { impl_->copy_from(source); } void texture::copy_to(class buffer& dest, common::bit_depth depth) { impl_->copy_to(dest, depth); } void texture::copy_to(buffer& dest) { impl_->copy_to(dest, impl_->depth_); } diff --git a/src/accelerator/ogl/util/texture.h b/src/accelerator/ogl/util/texture.h index b5e7d05237..da1122a8ee 100644 --- a/src/accelerator/ogl/util/texture.h +++ b/src/accelerator/ogl/util/texture.h @@ -37,9 +37,6 @@ class texture final texture& operator=(const texture&) = delete; texture& operator=(texture&& other); -#ifdef WIN32 - void copy_from(int source); -#endif void copy_from(class buffer& source); void copy_to(class buffer& dest, common::bit_depth depth); void copy_to(class buffer& dest); From 54e42bce13b6f3470b12d64c96539e93295a87f9 Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Fri, 29 Dec 2023 17:57:52 +0000 Subject: [PATCH 29/50] wip: interleave shader and remove clamp --- src/accelerator/ogl/image/shader_from_rgba.comp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/accelerator/ogl/image/shader_from_rgba.comp b/src/accelerator/ogl/image/shader_from_rgba.comp index 06ac54c6d3..3a629ab396 100644 --- a/src/accelerator/ogl/image/shader_from_rgba.comp +++ b/src/accelerator/ogl/image/shader_from_rgba.comp @@ -63,7 +63,7 @@ vec3 rgba_to_bt709(vec4 pixel) { } uint to_10bit(float val) { - return uint(round(clamp(val, 0, 1) * 1023)); + return uint(round(val * 1023)); } void decklink_v210() { @@ -73,18 +73,19 @@ void decklink_v210() { uint offset = y_offset + x_offset; uint image_x = gl_GlobalInvocationID.x * 6; - // sample image + // sample image and compute words vec3 ycbcr0 = rgba_to_bt709(read_pixel(ivec2(image_x + 0, gl_GlobalInvocationID.y))); + bufferOutput[offset + 0] = to_10bit(ycbcr0.t) + (to_10bit(ycbcr0.s) << 10) + (to_10bit(ycbcr0.p) << 20); + vec3 ycbcr1 = rgba_to_bt709(read_pixel(ivec2(image_x + 1, gl_GlobalInvocationID.y))); vec3 ycbcr2 = rgba_to_bt709(read_pixel(ivec2(image_x + 2, gl_GlobalInvocationID.y))); + bufferOutput[offset + 1] = to_10bit(ycbcr1.s) + (to_10bit(ycbcr2.t) << 10) + (to_10bit(ycbcr2.s) << 20); + vec3 ycbcr3 = rgba_to_bt709(read_pixel(ivec2(image_x + 3, gl_GlobalInvocationID.y))); vec3 ycbcr4 = rgba_to_bt709(read_pixel(ivec2(image_x + 4, gl_GlobalInvocationID.y))); - vec3 ycbcr5 = rgba_to_bt709(read_pixel(ivec2(image_x + 5, gl_GlobalInvocationID.y))); - - // compute words - bufferOutput[offset + 0] = to_10bit(ycbcr0.t) + (to_10bit(ycbcr0.s) << 10) + (to_10bit(ycbcr0.p) << 20); - bufferOutput[offset + 1] = to_10bit(ycbcr1.s) + (to_10bit(ycbcr2.t) << 10) + (to_10bit(ycbcr2.s) << 20); bufferOutput[offset + 2] = to_10bit(ycbcr2.p) + (to_10bit(ycbcr3.s) << 10) + (to_10bit(ycbcr4.t) << 20); + + vec3 ycbcr5 = rgba_to_bt709(read_pixel(ivec2(image_x + 5, gl_GlobalInvocationID.y))); bufferOutput[offset + 3] = to_10bit(ycbcr4.s) + (to_10bit(ycbcr4.p) << 10) + (to_10bit(ycbcr5.s) << 20); } From 61559754f49cafd0803472e550f1c0834aee5bf7 Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Sat, 30 Dec 2023 15:22:35 +0000 Subject: [PATCH 30/50] chore: remove some dead code --- src/accelerator/ogl/image/image_mixer.cpp | 39 +++-------------------- src/accelerator/ogl/util/device.cpp | 10 ++---- src/accelerator/ogl/util/device.h | 3 -- 3 files changed, 8 insertions(+), 44 deletions(-) diff --git a/src/accelerator/ogl/image/image_mixer.cpp b/src/accelerator/ogl/image/image_mixer.cpp index 223b665730..5990e423cb 100644 --- a/src/accelerator/ogl/image/image_mixer.cpp +++ b/src/accelerator/ogl/image/image_mixer.cpp @@ -313,21 +313,6 @@ struct image_mixer::impl return renderer_(std::move(layers_), format_desc); } - std::vector convert_frame(const std::vector>& image_data, - const core::pixel_format_desc& desc) const - { - const auto& plane0 = desc.planes[0]; // TODO - this doesnt feel safe, or accurate - - // TODO - desc is no longer 'correct' and should probably be changed to avoid the mixer shader being aware of these formats - - std::vector textures; - - textures.emplace_back(ogl_->convert_frame( - image_data, plane0.width, plane0.height, plane0.width / 2)); // TODO - what is this 'format' parameter? - - return textures; - } - core::mutable_frame create_frame(const void* tag, const core::pixel_format_desc& desc) override { std::vector> image_data; @@ -347,26 +332,12 @@ struct image_mixer::impl return boost::any{}; } - switch (desc.format) { -// case core::pixel_format::ycbcr10_420: -// case core::pixel_format::ycbcr10_422: -// case core::pixel_format::ycbcr10_444: -// case core::pixel_format::ycbcra10_420: -// case core::pixel_format::ycbcra10_422: -// case core::pixel_format::ycbcra10_444: { -// std::vector textures = self->convert_frame(image_data, desc); -// -// return std::make_shared(std::move(textures)); -// } - default: { - std::vector textures; - for (int n = 0; n < static_cast(desc.planes.size()); ++n) { - textures.emplace_back(self->ogl_->copy_async( - image_data[n], desc.planes[n].width, desc.planes[n].height, desc.planes[n].stride, desc.planes[n].depth)); - } - return std::make_shared(std::move(textures)); - } + std::vector textures; + for (int n = 0; n < static_cast(desc.planes.size()); ++n) { + textures.emplace_back(self->ogl_->copy_async( + image_data[n], desc.planes[n].width, desc.planes[n].height, desc.planes[n].stride, desc.planes[n].depth)); } + return std::make_shared(std::move(textures)); }); } diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp index bf2821a7f8..fbddce7a5e 100644 --- a/src/accelerator/ogl/util/device.cpp +++ b/src/accelerator/ogl/util/device.cpp @@ -292,6 +292,8 @@ struct device::impl : public std::enable_shared_from_this }); } + /* + * In its current form, this is not useful/complete. But it will be needed in some form for a producer 'soon' std::future> convert_frame(const std::vector>& sources, int width, int height, int width_samples) { @@ -313,8 +315,7 @@ struct device::impl : public std::enable_shared_from_this return tex; }); } - - + */ std::future convert_from_texture(const std::shared_ptr& texture, const std::vector>& buffers, @@ -510,11 +511,6 @@ std::future> device::copy_async(const std::shared_ptrcopy_async(source, as_rgba8); } -std::future> -device::convert_frame(const std::vector>& sources, int width, int height, int format) -{ - return impl_->convert_frame(sources, width, height, format); -} std::future device::convert_from_texture(const std::shared_ptr& texture, const std::vector>& buffers, const convert_from_texture_description& description, diff --git a/src/accelerator/ogl/util/device.h b/src/accelerator/ogl/util/device.h index e457bdc626..060c8d3c15 100644 --- a/src/accelerator/ogl/util/device.h +++ b/src/accelerator/ogl/util/device.h @@ -61,9 +61,6 @@ class device final copy_async(const array& source, int width, int height, int stride, common::bit_depth depth); std::future> copy_async(const std::shared_ptr& source, bool as_rgba8); - std::future> - convert_frame(const std::vector>& sources, int width, int height, int format); - std::future convert_from_texture(const std::shared_ptr& texture, const std::vector>& buffers, const convert_from_texture_description& description, From a60827e0e6e297cfa53baea79ca7b9447508aaca Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Sat, 30 Dec 2023 15:23:00 +0000 Subject: [PATCH 31/50] chore: format --- src/accelerator/accelerator.cpp | 3 +- src/accelerator/ogl/image/frame_converter.cpp | 57 +++++++-------- src/accelerator/ogl/image/image_mixer.cpp | 69 ++++++++++--------- src/accelerator/ogl/image/image_mixer.h | 7 +- src/accelerator/ogl/util/compute_shader.cpp | 4 +- src/accelerator/ogl/util/device.cpp | 48 ++++++------- src/accelerator/ogl/util/device.h | 17 ++--- src/accelerator/ogl/util/texture.cpp | 27 ++++---- src/common/array.h | 30 ++++---- src/common/bit_depth.h | 4 +- src/core/consumer/frame_consumer.cpp | 8 +-- src/core/consumer/frame_consumer.h | 12 ++-- src/core/frame/frame.cpp | 10 +-- src/core/frame/frame.h | 2 +- src/core/frame/pixel_format.h | 24 +++---- src/core/mixer/image/image_mixer.h | 11 ++- src/core/mixer/mixer.cpp | 2 +- src/core/video_channel.cpp | 7 +- src/core/video_channel.h | 2 +- .../artnet/consumer/artnet_consumer.cpp | 2 +- src/modules/artnet/consumer/artnet_consumer.h | 2 +- .../bluefish/consumer/bluefish_consumer.cpp | 6 +- .../bluefish/consumer/bluefish_consumer.h | 4 +- .../decklink/consumer/decklink_consumer.h | 4 +- .../ffmpeg/consumer/ffmpeg_consumer.cpp | 6 +- src/modules/ffmpeg/consumer/ffmpeg_consumer.h | 4 +- src/modules/ffmpeg/util/av_util.cpp | 2 +- src/modules/image/consumer/image_consumer.cpp | 4 +- src/modules/image/consumer/image_consumer.h | 2 +- .../newtek/consumer/newtek_ndi_consumer.cpp | 4 +- .../newtek/consumer/newtek_ndi_consumer.h | 4 +- src/modules/oal/consumer/oal_consumer.cpp | 6 +- src/modules/oal/consumer/oal_consumer.h | 4 +- .../screen/consumer/screen_consumer.cpp | 6 +- src/modules/screen/consumer/screen_consumer.h | 4 +- src/protocol/amcp/AMCPCommandsImpl.cpp | 18 +++-- src/shell/server.cpp | 8 ++- 37 files changed, 229 insertions(+), 205 deletions(-) diff --git a/src/accelerator/accelerator.cpp b/src/accelerator/accelerator.cpp index fb1017091d..c679a7a932 100644 --- a/src/accelerator/accelerator.cpp +++ b/src/accelerator/accelerator.cpp @@ -27,7 +27,8 @@ struct accelerator::impl std::unique_ptr create_image_mixer(int channel_id, common::bit_depth depth) { - return std::make_unique(spl::make_shared_ptr(get_device()), channel_id, depth, format_repository_.get_max_video_format_size()); + return std::make_unique( + spl::make_shared_ptr(get_device()), channel_id, depth, format_repository_.get_max_video_format_size()); } std::shared_ptr get_device() diff --git a/src/accelerator/ogl/image/frame_converter.cpp b/src/accelerator/ogl/image/frame_converter.cpp index f74d1f614f..9a56fcd983 100644 --- a/src/accelerator/ogl/image/frame_converter.cpp +++ b/src/accelerator/ogl/image/frame_converter.cpp @@ -43,24 +43,26 @@ core::mutable_frame ogl_frame_converter::create_frame(const void* tag, const cor using future_texture = std::shared_future>; std::weak_ptr weak_self = shared_from_this(); - return core::mutable_frame( - tag, - std::move(image_data), - array{}, - desc, - [weak_self, desc](std::vector> image_data) -> boost::any { - // TODO - replace this - auto self = weak_self.lock(); - if (!self) { - return boost::any{}; - } - std::vector textures; - for (int n = 0; n < static_cast(desc.planes.size()); ++n) { - textures.emplace_back(self->ogl_->copy_async( - image_data[n], desc.planes[n].width, desc.planes[n].height, desc.planes[n].stride, desc.planes[n].depth)); - } - return std::make_shared(std::move(textures)); - }); + return core::mutable_frame(tag, + std::move(image_data), + array{}, + desc, + [weak_self, desc](std::vector> image_data) -> boost::any { + // TODO - replace this + auto self = weak_self.lock(); + if (!self) { + return boost::any{}; + } + std::vector textures; + for (int n = 0; n < static_cast(desc.planes.size()); ++n) { + textures.emplace_back(self->ogl_->copy_async(image_data[n], + desc.planes[n].width, + desc.planes[n].height, + desc.planes[n].stride, + desc.planes[n].depth)); + } + return std::make_shared(std::move(textures)); + }); } core::draw_frame ogl_frame_converter::convert_frame(const core::mutable_frame& frame) @@ -73,9 +75,9 @@ std::shared_future>> ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, const core::encoded_frame_format format) { std::vector> buffers; - int x_count = 0; - int y_count = 0; - int words_per_line = 0; + int x_count = 0; + int y_count = 0; + int words_per_line = 0; switch (format) { case core::encoded_frame_format::decklink_v210: @@ -84,8 +86,8 @@ ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, const cor // TODO - result must be 128byte aligned. can that be guaranteed here? buffers.emplace_back(ogl_->create_array(row_bytes * frame.height())); - x_count = row_blocks * 8; - y_count = frame.height(); + x_count = row_blocks * 8; + y_count = frame.height(); words_per_line = row_blocks * 32; break; } @@ -100,13 +102,12 @@ ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, const cor } convert_from_texture_description description{}; - description.is_16_bit = texture_ptr->depth() == common::bit_depth::bit16; - description.width = frame.width(); - description.height = frame.height(); + description.is_16_bit = texture_ptr->depth() == common::bit_depth::bit16; + description.width = frame.width(); + description.height = frame.height(); description.words_per_line = words_per_line; - auto future_conversion = - ogl_->convert_from_texture(texture_ptr, buffers, description, x_count, y_count); + auto future_conversion = ogl_->convert_from_texture(texture_ptr, buffers, description, x_count, y_count); return std::async(std::launch::deferred, [buffers = std::move(buffers), future_conversion = std::move(future_conversion)]() mutable { diff --git a/src/accelerator/ogl/image/image_mixer.cpp b/src/accelerator/ogl/image/image_mixer.cpp index 5990e423cb..87025fe18c 100644 --- a/src/accelerator/ogl/image/image_mixer.cpp +++ b/src/accelerator/ogl/image/image_mixer.cpp @@ -85,14 +85,13 @@ class image_renderer { } - std::future operator()(std::vector layers, - const core::video_format_desc& format_desc) + std::future operator()(std::vector layers, const core::video_format_desc& format_desc) { // TODO - re-enable -// if (layers.empty()) { // Bypass GPU with empty frame. -// static const std::vector buffer(max_frame_size_ * 2, 0); // TODO better -// return make_ready_future(array(buffer.data(), format_desc.size, true)); -// } + // if (layers.empty()) { // Bypass GPU with empty frame. + // static const std::vector buffer(max_frame_size_ * 2, 0); // TODO better + // return make_ready_future(array(buffer.data(), format_desc.size, true)); + // } return flatten(ogl_->dispatch_async([=]() mutable { auto target_texture = ogl_->create_texture(format_desc.width, format_desc.height, 4, depth_); @@ -101,9 +100,10 @@ class image_renderer auto bytes = ogl_->copy_async(target_texture, true).share(); - return std::async(std::launch::deferred, [bytes = std::move(bytes), target_texture = std::move(target_texture)]() { - return core::mixed_image(bytes.get(), std::move(target_texture)); - }); + return std::async(std::launch::deferred, + [bytes = std::move(bytes), target_texture = std::move(target_texture)]() { + return core::mixed_image(bytes.get(), std::move(target_texture)); + }); })); } @@ -321,24 +321,26 @@ struct image_mixer::impl } std::weak_ptr weak_self = shared_from_this(); - return core::mutable_frame( - tag, - std::move(image_data), - array{}, - desc, - [weak_self, desc](std::vector> image_data) -> boost::any { - auto self = weak_self.lock(); - if (!self) { - return boost::any{}; - } - - std::vector textures; - for (int n = 0; n < static_cast(desc.planes.size()); ++n) { - textures.emplace_back(self->ogl_->copy_async( - image_data[n], desc.planes[n].width, desc.planes[n].height, desc.planes[n].stride, desc.planes[n].depth)); - } - return std::make_shared(std::move(textures)); - }); + return core::mutable_frame(tag, + std::move(image_data), + array{}, + desc, + [weak_self, desc](std::vector> image_data) -> boost::any { + auto self = weak_self.lock(); + if (!self) { + return boost::any{}; + } + + std::vector textures; + for (int n = 0; n < static_cast(desc.planes.size()); ++n) { + textures.emplace_back(self->ogl_->copy_async(image_data[n], + desc.planes[n].width, + desc.planes[n].height, + desc.planes[n].stride, + desc.planes[n].depth)); + } + return std::make_shared(std::move(textures)); + }); } spl::shared_ptr create_frame_converter() override @@ -347,14 +349,17 @@ struct image_mixer::impl } }; -image_mixer::image_mixer(const spl::shared_ptr& ogl, int channel_id, common::bit_depth depth, const size_t max_frame_size) - : impl_(std::make_unique(ogl, channel_id, depth,max_frame_size)) +image_mixer::image_mixer(const spl::shared_ptr& ogl, + int channel_id, + common::bit_depth depth, + const size_t max_frame_size) + : impl_(std::make_unique(ogl, channel_id, depth, max_frame_size)) { } image_mixer::~image_mixer() {} -void image_mixer::push(const core::frame_transform& transform) { impl_->push(transform); } -void image_mixer::visit(const core::const_frame& frame) { impl_->visit(frame); } -void image_mixer::pop() { impl_->pop(); } +void image_mixer::push(const core::frame_transform& transform) { impl_->push(transform); } +void image_mixer::visit(const core::const_frame& frame) { impl_->visit(frame); } +void image_mixer::pop() { impl_->pop(); } std::future image_mixer::operator()(const core::video_format_desc& format_desc) { return impl_->render(format_desc); diff --git a/src/accelerator/ogl/image/image_mixer.h b/src/accelerator/ogl/image/image_mixer.h index 6fe916f775..de2b6ac792 100644 --- a/src/accelerator/ogl/image/image_mixer.h +++ b/src/accelerator/ogl/image/image_mixer.h @@ -37,7 +37,10 @@ namespace caspar { namespace accelerator { namespace ogl { class image_mixer final : public core::image_mixer { public: - image_mixer(const spl::shared_ptr& ogl, int channel_id, common::bit_depth depth, const size_t max_frame_size); + image_mixer(const spl::shared_ptr& ogl, + int channel_id, + common::bit_depth depth, + const size_t max_frame_size); image_mixer(const image_mixer&) = delete; ~image_mixer(); @@ -45,7 +48,7 @@ class image_mixer final : public core::image_mixer image_mixer& operator=(const image_mixer&) = delete; std::future operator()(const core::video_format_desc& format_desc) override; - core::mutable_frame create_frame(const void* tag, const core::pixel_format_desc& desc) override; + core::mutable_frame create_frame(const void* tag, const core::pixel_format_desc& desc) override; spl::shared_ptr create_frame_converter() override; diff --git a/src/accelerator/ogl/util/compute_shader.cpp b/src/accelerator/ogl/util/compute_shader.cpp index 26aba484df..d737a94d3d 100644 --- a/src/accelerator/ogl/util/compute_shader.cpp +++ b/src/accelerator/ogl/util/compute_shader.cpp @@ -47,8 +47,8 @@ struct compute_shader::impl glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 1, &work_grp_cnt[1]); glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 2, &work_grp_cnt[2]); - printf("max global (total) work group counts x:%i y:%i z:%i\n", - work_grp_cnt[0], work_grp_cnt[1], work_grp_cnt[2]); + printf( + "max global (total) work group counts x:%i y:%i z:%i\n", work_grp_cnt[0], work_grp_cnt[1], work_grp_cnt[2]); GLint success; diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp index fbddce7a5e..32b4374cf4 100644 --- a/src/accelerator/ogl/util/device.cpp +++ b/src/accelerator/ogl/util/device.cpp @@ -225,8 +225,8 @@ struct device::impl : public std::enable_shared_from_this array create_array(int count) { - auto buf = create_buffer(count, true); - auto ptr = reinterpret_cast(buf->data()); + auto buf = create_buffer(count, true); + auto ptr = reinterpret_cast(buf->data()); return array(ptr, buf->size(), buf); } @@ -256,7 +256,7 @@ struct device::impl : public std::enable_shared_from_this { return spawn_async([=](yield_context yield) { auto buf = create_buffer(source->size(), false); - source->copy_to(*buf, as_rgba8? common::bit_depth::bit8 : source->depth()); + source->copy_to(*buf, as_rgba8 ? common::bit_depth::bit8 : source->depth()); sync_queue_.push(nullptr); @@ -317,21 +317,21 @@ struct device::impl : public std::enable_shared_from_this } */ - std::future convert_from_texture(const std::shared_ptr& texture, - const std::vector>& buffers, - const convert_from_texture_description& description, - int x_count, - int y_count) + std::future convert_from_texture(const std::shared_ptr& texture, + const std::vector>& buffers, + const convert_from_texture_description& description, + int x_count, + int y_count) { return spawn_async([=](yield_context yield) { if (!compute_from_rgba_) compute_from_rgba_ = std::make_unique(std::string(compute_from_rgba_shader)); // single input texture - GLuint texid_8bit = 0; + GLuint texid_8bit = 0; GLuint texid_16bit = 0; - switch(texture->depth()) { + switch (texture->depth()) { case common::bit_depth::bit8: texid_8bit = texture->id(); break; @@ -344,24 +344,24 @@ struct device::impl : public std::enable_shared_from_this GL(glBindImageTexture(1, texid_8bit, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA8)); // TODO: only a single buffer? -// for (size_t i = 0; i < buffers.size(); i++) { - auto& source = buffers[0]; - auto tmp = source.storage>(); - if (!tmp) { - CASPAR_THROW_EXCEPTION(caspar_exception() << msg_info("Buffer is not gpu backed")); - } + // for (size_t i = 0; i < buffers.size(); i++) { + auto& source = buffers[0]; + auto tmp = source.storage>(); + if (!tmp) { + CASPAR_THROW_EXCEPTION(caspar_exception() << msg_info("Buffer is not gpu backed")); + } GL(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, tmp->get()->id())); -// } + // } // TODO - binding 2 description auto description_buffer = create_buffer(sizeof(convert_from_texture_description), false); - std::memcpy(description_buffer->data(), &description, sizeof (convert_from_texture_description)); + std::memcpy(description_buffer->data(), &description, sizeof(convert_from_texture_description)); GL(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, description_buffer->id())); compute_from_rgba_->use(); - GL(glDispatchCompute((unsigned int)x_count, (unsigned int)y_count, 1)); + GL(glDispatchCompute((unsigned int)x_count, (unsigned int)y_count, 1)); auto fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); @@ -511,11 +511,11 @@ std::future> device::copy_async(const std::shared_ptrcopy_async(source, as_rgba8); } -std::future device::convert_from_texture(const std::shared_ptr& texture, - const std::vector>& buffers, - const convert_from_texture_description& description, - int x_count, - int y_count) +std::future device::convert_from_texture(const std::shared_ptr& texture, + const std::vector>& buffers, + const convert_from_texture_description& description, + int x_count, + int y_count) { return impl_->convert_from_texture(texture, buffers, description, x_count, y_count); } diff --git a/src/accelerator/ogl/util/device.h b/src/accelerator/ogl/util/device.h index 060c8d3c15..e5dce060cb 100644 --- a/src/accelerator/ogl/util/device.h +++ b/src/accelerator/ogl/util/device.h @@ -35,8 +35,9 @@ namespace caspar { namespace accelerator { namespace ogl { // This must match description_layout in shader_from_rgba.comp -struct convert_from_texture_description { - bool is_16_bit; +struct convert_from_texture_description +{ + bool is_16_bit; uint32_t width; uint32_t height; uint32_t words_per_line; @@ -58,14 +59,14 @@ class device final array create_array(int size); std::future> - copy_async(const array& source, int width, int height, int stride, common::bit_depth depth); + copy_async(const array& source, int width, int height, int stride, common::bit_depth depth); std::future> copy_async(const std::shared_ptr& source, bool as_rgba8); - std::future convert_from_texture(const std::shared_ptr& texture, - const std::vector>& buffers, - const convert_from_texture_description& description, - int x_count, - int y_count); + std::future convert_from_texture(const std::shared_ptr& texture, + const std::vector>& buffers, + const convert_from_texture_description& description, + int x_count, + int y_count); template auto dispatch_async(Func&& func) diff --git a/src/accelerator/ogl/util/texture.cpp b/src/accelerator/ogl/util/texture.cpp index 4c9e8f0342..4e47e8768d 100644 --- a/src/accelerator/ogl/util/texture.cpp +++ b/src/accelerator/ogl/util/texture.cpp @@ -56,8 +56,7 @@ struct texture::impl { if (stride == 5) { size_ = width * height * 16; - }else - if (stride == 6) { + } else if (stride == 6) { size_ = width * height * 2; } @@ -123,18 +122,18 @@ texture& texture::operator=(texture&& other) impl_ = std::move(other.impl_); return *this; } -void texture::bind(int index) { impl_->bind(index); } -void texture::unbind() { impl_->unbind(); } -void texture::attach() { impl_->attach(); } -void texture::clear() { impl_->clear(); } -void texture::copy_from(buffer& source) { impl_->copy_from(source); } -void texture::copy_to(class buffer& dest, common::bit_depth depth) { impl_->copy_to(dest, depth); } -void texture::copy_to(buffer& dest) { impl_->copy_to(dest, impl_->depth_); } -int texture::width() const { return impl_->width_; } -int texture::height() const { return impl_->height_; } -int texture::stride() const { return impl_->stride_; } +void texture::bind(int index) { impl_->bind(index); } +void texture::unbind() { impl_->unbind(); } +void texture::attach() { impl_->attach(); } +void texture::clear() { impl_->clear(); } +void texture::copy_from(buffer& source) { impl_->copy_from(source); } +void texture::copy_to(class buffer& dest, common::bit_depth depth) { impl_->copy_to(dest, depth); } +void texture::copy_to(buffer& dest) { impl_->copy_to(dest, impl_->depth_); } +int texture::width() const { return impl_->width_; } +int texture::height() const { return impl_->height_; } +int texture::stride() const { return impl_->stride_; } common::bit_depth texture::depth() const { return impl_->depth_; } -int texture::size() const { return impl_->size_; } -int texture::id() const { return impl_->id_; } +int texture::size() const { return impl_->size_; } +int texture::id() const { return impl_->id_; } }}} // namespace caspar::accelerator::ogl diff --git a/src/common/array.h b/src/common/array.h index 10f27957c3..b7a6019b71 100644 --- a/src/common/array.h +++ b/src/common/array.h @@ -63,17 +63,17 @@ class array final array& operator=(array&& other) { - ptr_ = std::move(other.ptr_); - size_ = std::move(other.size_); - storage_ = std::move(other.storage_); + ptr_ = std::move(other.ptr_); + size_ = std::move(other.size_); + storage_ = std::move(other.storage_); return *this; } - T* begin() const { return ptr_; } - T* data() const { return ptr_; } - T* end() const { return ptr_ + size_; } - std::size_t size() const { return size_; } + T* begin() const { return ptr_; } + T* data() const { return ptr_; } + T* end() const { return ptr_ + size_; } + std::size_t size() const { return size_; } explicit operator bool() const { return size_ > 0; }; @@ -84,8 +84,8 @@ class array final } private: - T* ptr_ = nullptr; - std::size_t size_ = 0; + T* ptr_ = nullptr; + std::size_t size_ = 0; std::shared_ptr storage_; }; @@ -118,9 +118,7 @@ class array final } template - explicit array(const T* ptr, - std::size_t size, - S&& storage) + explicit array(const T* ptr, std::size_t size, S&& storage) : ptr_(ptr) , size_(size) , storage_(std::make_shared(std::forward(storage))) @@ -152,10 +150,10 @@ class array final return *this; } - const T* begin() const { return ptr_; } - const T* data() const { return ptr_; } - const T* end() const { return ptr_ + size_; } - std::size_t size() const { return size_; } + const T* begin() const { return ptr_; } + const T* data() const { return ptr_; } + const T* end() const { return ptr_ + size_; } + std::size_t size() const { return size_; } explicit operator bool() const { return size_ > 0; } diff --git a/src/common/bit_depth.h b/src/common/bit_depth.h index 45c563e060..ba6bb0d2b2 100644 --- a/src/common/bit_depth.h +++ b/src/common/bit_depth.h @@ -10,8 +10,6 @@ enum class bit_depth : uint8_t bit16 = 1, }; -inline int bytes_per_pixel(bit_depth depth){ - return static_cast(depth) + 1; -} +inline int bytes_per_pixel(bit_depth depth) { return static_cast(depth) + 1; } }} // namespace caspar::common \ No newline at end of file diff --git a/src/core/consumer/frame_consumer.cpp b/src/core/consumer/frame_consumer.cpp index ae83c2fcd4..512c937f18 100644 --- a/src/core/consumer/frame_consumer.cpp +++ b/src/core/consumer/frame_consumer.cpp @@ -163,7 +163,7 @@ class print_consumer_proxy : public frame_consumer spl::shared_ptr frame_consumer_registry::create_consumer(const std::vector& params, const core::video_format_repository& format_repository, - const spl::shared_ptr& frame_converter, + const spl::shared_ptr& frame_converter, const std::vector>& channels) const { if (params.empty()) @@ -174,7 +174,7 @@ frame_consumer_registry::create_consumer(const std::vector& if (!std::any_of( consumer_factories.begin(), consumer_factories.end(), [&](const consumer_factory_t& factory) -> bool { try { - consumer = factory(params, format_repository,frame_converter, channels); + consumer = factory(params, format_repository, frame_converter, channels); } catch (...) { CASPAR_LOG_CURRENT_EXCEPTION(); } @@ -190,7 +190,7 @@ spl::shared_ptr frame_consumer_registry::create_consumer(const std::wstring& element_name, const boost::property_tree::wptree& element, const core::video_format_repository& format_repository, - const spl::shared_ptr& frame_converter, + const spl::shared_ptr& frame_converter, const std::vector>& channels) const { auto& preconfigured_consumer_factories = impl_->preconfigured_consumer_factories; @@ -201,7 +201,7 @@ frame_consumer_registry::create_consumer(const std::wstring& << msg_info(L"No consumer factory registered for element name " + element_name)); return spl::make_shared( - spl::make_shared(found->second(element, format_repository,frame_converter, channels))); + spl::make_shared(found->second(element, format_repository, frame_converter, channels))); } const spl::shared_ptr& frame_consumer::empty() diff --git a/src/core/consumer/frame_consumer.h b/src/core/consumer/frame_consumer.h index 5bff60b789..ba8e810147 100644 --- a/src/core/consumer/frame_consumer.h +++ b/src/core/consumer/frame_consumer.h @@ -60,13 +60,13 @@ class frame_consumer }; using consumer_factory_t = - std::function(const std::vector& params, - const core::video_format_repository& format_repository, + std::function(const std::vector& params, + const core::video_format_repository& format_repository, const spl::shared_ptr& frame_converter, const std::vector>& channels)>; using preconfigured_consumer_factory_t = - std::function(const boost::property_tree::wptree& element, - const core::video_format_repository& format_repository, + std::function(const boost::property_tree::wptree& element, + const core::video_format_repository& format_repository, const spl::shared_ptr& frame_converter, const std::vector>& channels)>; @@ -80,13 +80,13 @@ class frame_consumer_registry spl::shared_ptr create_consumer(const std::vector& params, const core::video_format_repository& format_repository, - const spl::shared_ptr& frame_converter, + const spl::shared_ptr& frame_converter, const std::vector>& channels) const; spl::shared_ptr create_consumer(const std::wstring& element_name, const boost::property_tree::wptree& element, const core::video_format_repository& format_repository, - const spl::shared_ptr& frame_converter, + const spl::shared_ptr& frame_converter, const std::vector>& channels) const; private: diff --git a/src/core/frame/frame.cpp b/src/core/frame/frame.cpp index ff96dce561..afc5579118 100644 --- a/src/core/frame/frame.cpp +++ b/src/core/frame/frame.cpp @@ -98,7 +98,7 @@ struct const_frame::impl impl(std::vector> image_data, array audio_data, const core::pixel_format_desc& desc, - boost::any opaque) + boost::any opaque) : image_data_(std::move(image_data)) , audio_data_(std::move(audio_data)) , desc_(desc) @@ -153,10 +153,10 @@ const_frame::const_frame(std::vector> image_data, { } const_frame::const_frame(std::vector> image_data, - array audio_data, - const struct pixel_format_desc& desc, - boost::any opaque) - : impl_(new impl(std::move(image_data), std::move(audio_data), desc, opaque)) + array audio_data, + const struct pixel_format_desc& desc, + boost::any opaque) + : impl_(new impl(std::move(image_data), std::move(audio_data), desc, opaque)) { } const_frame::const_frame(mutable_frame&& other) diff --git a/src/core/frame/frame.h b/src/core/frame/frame.h index 0a5774d48f..cec65a5fad 100644 --- a/src/core/frame/frame.h +++ b/src/core/frame/frame.h @@ -64,7 +64,7 @@ class const_frame final explicit const_frame(std::vector> image_data, array audio_data, const struct pixel_format_desc& desc, - boost::any opaque); + boost::any opaque); const_frame(const const_frame& other); const_frame(mutable_frame&& other); diff --git a/src/core/frame/pixel_format.h b/src/core/frame/pixel_format.h index 5cad369a4c..ba7faad41b 100644 --- a/src/core/frame/pixel_format.h +++ b/src/core/frame/pixel_format.h @@ -51,12 +51,12 @@ struct pixel_format_desc final { struct plane { - int linesize = 0; - int width = 0; - int height = 0; - int size = 0; - int stride = 0; - common::bit_depth depth = common::bit_depth::bit8; + int linesize = 0; + int width = 0; + int height = 0; + int size = 0; + int stride = 0; + common::bit_depth depth = common::bit_depth::bit8; plane() = default; @@ -66,12 +66,12 @@ struct pixel_format_desc final } plane(int width, int height, int stride, common::bit_depth depth) - : linesize(width * stride * common::bytes_per_pixel(depth)) - , width(width) - , height(height) - , size(width * height * stride * common::bytes_per_pixel(depth)) - , stride(stride) - , depth(depth) + : linesize(width * stride * common::bytes_per_pixel(depth)) + , width(width) + , height(height) + , size(width * height * stride * common::bytes_per_pixel(depth)) + , stride(stride) + , depth(depth) { } }; diff --git a/src/core/mixer/image/image_mixer.h b/src/core/mixer/image/image_mixer.h index 30919913ac..a993cf7b26 100644 --- a/src/core/mixer/image/image_mixer.h +++ b/src/core/mixer/image/image_mixer.h @@ -30,11 +30,16 @@ namespace caspar { namespace core { -struct mixed_image{ +struct mixed_image +{ array rgba8; - boost::any texture; + boost::any texture; - mixed_image(array rgba8, boost::any texture):rgba8(rgba8),texture(texture){} + mixed_image(array rgba8, boost::any texture) + : rgba8(rgba8) + , texture(texture) + { + } }; class image_mixer diff --git a/src/core/mixer/mixer.cpp b/src/core/mixer/mixer.cpp index 70a95bfc3d..98be54ee33 100644 --- a/src/core/mixer/mixer.cpp +++ b/src/core/mixer/mixer.cpp @@ -76,7 +76,7 @@ struct mixer::impl std::launch::deferred, [image = std::move(image), audio = std::move(audio), graph = graph_, format_desc, tag = this]() mutable { auto image2 = image.get(); - auto desc = pixel_format_desc(pixel_format::bgra); + auto desc = pixel_format_desc(pixel_format::bgra); desc.planes.emplace_back(format_desc.width, format_desc.height, 4); std::vector> image_data; image_data.emplace_back(std::move(image2.rgba8)); diff --git a/src/core/video_channel.cpp b/src/core/video_channel.cpp index f2ec344b79..30761ea9b4 100644 --- a/src/core/video_channel.cpp +++ b/src/core/video_channel.cpp @@ -245,12 +245,13 @@ mixer& video_channel::mixer() { return impl_->mixer const output& video_channel::output() const { return impl_->output_; } output& video_channel::output() { return impl_->output_; } spl::shared_ptr video_channel::frame_factory() { return impl_->image_mixer_; } -spl::shared_ptr video_channel::frame_converter() { +spl::shared_ptr video_channel::frame_converter() +{ // TODO - is this too expensive? return impl_->image_mixer_->create_frame_converter(); } -int video_channel::index() const { return impl_->index(); } -core::monitor::state video_channel::state() const { return impl_->state_; } +int video_channel::index() const { return impl_->index(); } +core::monitor::state video_channel::state() const { return impl_->state_; } std::shared_ptr video_channel::route(int index, route_mode mode) { return impl_->route(index, mode); } diff --git a/src/core/video_channel.h b/src/core/video_channel.h index 801fc6e3f8..879ee0f564 100644 --- a/src/core/video_channel.h +++ b/src/core/video_channel.h @@ -84,7 +84,7 @@ class video_channel final const core::output& output() const; core::output& output(); - spl::shared_ptr frame_factory(); + spl::shared_ptr frame_factory(); spl::shared_ptr frame_converter(); int index() const; diff --git a/src/modules/artnet/consumer/artnet_consumer.cpp b/src/modules/artnet/consumer/artnet_consumer.cpp index c08a5ab8a9..05554b7349 100644 --- a/src/modules/artnet/consumer/artnet_consumer.cpp +++ b/src/modules/artnet/consumer/artnet_consumer.cpp @@ -311,7 +311,7 @@ std::vector get_fixtures_ptree(const boost::property_tree::wptree& ptre spl::shared_ptr create_preconfigured_consumer(const boost::property_tree::wptree& ptree, const core::video_format_repository& format_repository, - const spl::shared_ptr& frame_converter, + const spl::shared_ptr& frame_converter, const std::vector>& channels) { configuration config; diff --git a/src/modules/artnet/consumer/artnet_consumer.h b/src/modules/artnet/consumer/artnet_consumer.h index ffb94ad84a..6aef2da0e6 100644 --- a/src/modules/artnet/consumer/artnet_consumer.h +++ b/src/modules/artnet/consumer/artnet_consumer.h @@ -35,6 +35,6 @@ namespace caspar { namespace artnet { spl::shared_ptr create_preconfigured_consumer(const boost::property_tree::wptree& ptree, const core::video_format_repository& format_repository, - const spl::shared_ptr& frame_converter, + const spl::shared_ptr& frame_converter, const std::vector>& channels); }} // namespace caspar::artnet diff --git a/src/modules/bluefish/consumer/bluefish_consumer.cpp b/src/modules/bluefish/consumer/bluefish_consumer.cpp index 25d3f01c02..d36bad13a8 100644 --- a/src/modules/bluefish/consumer/bluefish_consumer.cpp +++ b/src/modules/bluefish/consumer/bluefish_consumer.cpp @@ -882,8 +882,8 @@ struct bluefish_consumer_proxy : public core::frame_consumer } }; -spl::shared_ptr create_consumer(const std::vector& params, - const core::video_format_repository& format_repository, +spl::shared_ptr create_consumer(const std::vector& params, + const core::video_format_repository& format_repository, const spl::shared_ptr& frame_converter, const std::vector>& channels) { @@ -940,7 +940,7 @@ spl::shared_ptr create_consumer(const std::vector create_preconfigured_consumer(const boost::property_tree::wptree& ptree, const core::video_format_repository& format_repository, - const spl::shared_ptr& frame_converter, + const spl::shared_ptr& frame_converter, const std::vector>& channels) { configuration config; diff --git a/src/modules/bluefish/consumer/bluefish_consumer.h b/src/modules/bluefish/consumer/bluefish_consumer.h index 9d942492b3..eb952d4a43 100644 --- a/src/modules/bluefish/consumer/bluefish_consumer.h +++ b/src/modules/bluefish/consumer/bluefish_consumer.h @@ -34,13 +34,13 @@ namespace caspar { namespace bluefish { spl::shared_ptr create_consumer(const std::vector& params, const core::video_format_repository& format_repository, - const spl::shared_ptr& frame_converter, + const spl::shared_ptr& frame_converter, const std::vector>& channels); spl::shared_ptr create_preconfigured_consumer(const boost::property_tree::wptree& ptree, const core::video_format_repository& format_repository, - const spl::shared_ptr& frame_converter, + const spl::shared_ptr& frame_converter, const std::vector>& channels); }} // namespace caspar::bluefish diff --git a/src/modules/decklink/consumer/decklink_consumer.h b/src/modules/decklink/consumer/decklink_consumer.h index 94ffc08b90..3d91c590b7 100644 --- a/src/modules/decklink/consumer/decklink_consumer.h +++ b/src/modules/decklink/consumer/decklink_consumer.h @@ -35,12 +35,12 @@ namespace caspar { namespace decklink { spl::shared_ptr create_consumer(const std::vector& params, const core::video_format_repository& format_repository, - const spl::shared_ptr& frame_converter, + const spl::shared_ptr& frame_converter, const std::vector>& channels); spl::shared_ptr create_preconfigured_consumer(const boost::property_tree::wptree& ptree, const core::video_format_repository& format_repository, - const spl::shared_ptr& frame_converter, + const spl::shared_ptr& frame_converter, const std::vector>& channels); }} // namespace caspar::decklink diff --git a/src/modules/ffmpeg/consumer/ffmpeg_consumer.cpp b/src/modules/ffmpeg/consumer/ffmpeg_consumer.cpp index 43c9a81bbb..1a176ddd7f 100644 --- a/src/modules/ffmpeg/consumer/ffmpeg_consumer.cpp +++ b/src/modules/ffmpeg/consumer/ffmpeg_consumer.cpp @@ -713,8 +713,8 @@ struct ffmpeg_consumer : public core::frame_consumer } }; -spl::shared_ptr create_consumer(const std::vector& params, - const core::video_format_repository& format_repository, +spl::shared_ptr create_consumer(const std::vector& params, + const core::video_format_repository& format_repository, const spl::shared_ptr& frame_converter, const std::vector>& channels) { @@ -732,7 +732,7 @@ spl::shared_ptr create_consumer(const std::vector create_preconfigured_consumer(const boost::property_tree::wptree& ptree, const core::video_format_repository& format_repository, - const spl::shared_ptr& frame_converter, + const spl::shared_ptr& frame_converter, const std::vector>& channels) { return spl::make_shared(u8(ptree.get(L"path", L"")), diff --git a/src/modules/ffmpeg/consumer/ffmpeg_consumer.h b/src/modules/ffmpeg/consumer/ffmpeg_consumer.h index 998eb42a69..ecd01e794f 100644 --- a/src/modules/ffmpeg/consumer/ffmpeg_consumer.h +++ b/src/modules/ffmpeg/consumer/ffmpeg_consumer.h @@ -35,12 +35,12 @@ namespace caspar { namespace ffmpeg { spl::shared_ptr create_consumer(const std::vector& params, const core::video_format_repository& format_repository, - const spl::shared_ptr& frame_converter, + const spl::shared_ptr& frame_converter, const std::vector>& channels); spl::shared_ptr create_preconfigured_consumer(const boost::property_tree::wptree&, const core::video_format_repository& format_repository, - const spl::shared_ptr& frame_converter, + const spl::shared_ptr& frame_converter, const std::vector>& channels); }} // namespace caspar::ffmpeg diff --git a/src/modules/ffmpeg/util/av_util.cpp b/src/modules/ffmpeg/util/av_util.cpp index bdba15e853..a58975666e 100644 --- a/src/modules/ffmpeg/util/av_util.cpp +++ b/src/modules/ffmpeg/util/av_util.cpp @@ -186,7 +186,7 @@ core::pixel_format_desc pixel_format_desc(AVPixelFormat pix_fmt, int width, int return desc; } case core::pixel_format::ycbcr: - case core::pixel_format::ycbcra:{ + case core::pixel_format::ycbcra: { // Find chroma height // av_image_fill_plane_sizes is not available until ffmpeg 4.4, but we still need to support ffmpeg 4.2, so // we fall back to calling av_image_fill_pointers with a NULL image buffer. We can't unconditionally use diff --git a/src/modules/image/consumer/image_consumer.cpp b/src/modules/image/consumer/image_consumer.cpp index d3bee919f7..786080d42c 100644 --- a/src/modules/image/consumer/image_consumer.cpp +++ b/src/modules/image/consumer/image_consumer.cpp @@ -116,8 +116,8 @@ struct image_consumer : public core::frame_consumer } }; -spl::shared_ptr create_consumer(const std::vector& params, - const core::video_format_repository& format_repository, +spl::shared_ptr create_consumer(const std::vector& params, + const core::video_format_repository& format_repository, const spl::shared_ptr& frame_converter, const std::vector>& channels) { diff --git a/src/modules/image/consumer/image_consumer.h b/src/modules/image/consumer/image_consumer.h index b779a6e528..1dd38f52ba 100644 --- a/src/modules/image/consumer/image_consumer.h +++ b/src/modules/image/consumer/image_consumer.h @@ -34,7 +34,7 @@ namespace caspar { namespace image { spl::shared_ptr create_consumer(const std::vector& params, const core::video_format_repository& format_repository, - const spl::shared_ptr& frame_converter, + const spl::shared_ptr& frame_converter, const std::vector>& channels); }} // namespace caspar::image diff --git a/src/modules/newtek/consumer/newtek_ndi_consumer.cpp b/src/modules/newtek/consumer/newtek_ndi_consumer.cpp index 79c66c997c..916b0d783f 100644 --- a/src/modules/newtek/consumer/newtek_ndi_consumer.cpp +++ b/src/modules/newtek/consumer/newtek_ndi_consumer.cpp @@ -257,7 +257,7 @@ std::atomic newtek_ndi_consumer::instances_(0); spl::shared_ptr create_ndi_consumer(const std::vector& params, const core::video_format_repository& format_repository, - const spl::shared_ptr& frame_converter, + const spl::shared_ptr& frame_converter, const std::vector>& channels) { if (params.size() < 1 || !boost::iequals(params.at(0), L"NDI")) @@ -270,7 +270,7 @@ create_ndi_consumer(const std::vector& par spl::shared_ptr create_preconfigured_ndi_consumer(const boost::property_tree::wptree& ptree, const core::video_format_repository& format_repository, - const spl::shared_ptr& frame_converter, + const spl::shared_ptr& frame_converter, const std::vector>& channels) { auto name = ptree.get(L"name", L""); diff --git a/src/modules/newtek/consumer/newtek_ndi_consumer.h b/src/modules/newtek/consumer/newtek_ndi_consumer.h index 8148ee5385..0cd5bf6a38 100644 --- a/src/modules/newtek/consumer/newtek_ndi_consumer.h +++ b/src/modules/newtek/consumer/newtek_ndi_consumer.h @@ -35,12 +35,12 @@ namespace caspar { namespace newtek { spl::shared_ptr create_ndi_consumer(const std::vector& params, const core::video_format_repository& format_repository, - const spl::shared_ptr& frame_converter, + const spl::shared_ptr& frame_converter, const std::vector>& channels); spl::shared_ptr create_preconfigured_ndi_consumer(const boost::property_tree::wptree& ptree, const core::video_format_repository& format_repository, - const spl::shared_ptr& frame_converter, + const spl::shared_ptr& frame_converter, const std::vector>& channels); }} // namespace caspar::newtek diff --git a/src/modules/oal/consumer/oal_consumer.cpp b/src/modules/oal/consumer/oal_consumer.cpp index 6ce9a81c09..c41b395ad6 100644 --- a/src/modules/oal/consumer/oal_consumer.cpp +++ b/src/modules/oal/consumer/oal_consumer.cpp @@ -387,8 +387,8 @@ struct oal_consumer : public core::frame_consumer } }; -spl::shared_ptr create_consumer(const std::vector& params, - const core::video_format_repository& format_repository, +spl::shared_ptr create_consumer(const std::vector& params, + const core::video_format_repository& format_repository, const spl::shared_ptr& frame_converter, const std::vector>& channels) { @@ -401,7 +401,7 @@ spl::shared_ptr create_consumer(const std::vector create_preconfigured_consumer(const boost::property_tree::wptree& ptree, const core::video_format_repository& format_repository, - const spl::shared_ptr& frame_converter, + const spl::shared_ptr& frame_converter, const std::vector>& channels) { return spl::make_shared(); diff --git a/src/modules/oal/consumer/oal_consumer.h b/src/modules/oal/consumer/oal_consumer.h index d143412d5b..d5d9428674 100644 --- a/src/modules/oal/consumer/oal_consumer.h +++ b/src/modules/oal/consumer/oal_consumer.h @@ -34,12 +34,12 @@ namespace caspar { namespace oal { spl::shared_ptr create_consumer(const std::vector& params, const core::video_format_repository& format_repository, - const spl::shared_ptr& frame_converter, + const spl::shared_ptr& frame_converter, const std::vector>& channels); spl::shared_ptr create_preconfigured_consumer(const boost::property_tree::wptree&, const core::video_format_repository& format_repository, - const spl::shared_ptr& frame_converter, + const spl::shared_ptr& frame_converter, const std::vector>& channels); }} // namespace caspar::oal diff --git a/src/modules/screen/consumer/screen_consumer.cpp b/src/modules/screen/consumer/screen_consumer.cpp index 7f401a483a..b69aa8b840 100644 --- a/src/modules/screen/consumer/screen_consumer.cpp +++ b/src/modules/screen/consumer/screen_consumer.cpp @@ -606,8 +606,8 @@ struct screen_consumer_proxy : public core::frame_consumer } }; -spl::shared_ptr create_consumer(const std::vector& params, - const core::video_format_repository& format_repository, +spl::shared_ptr create_consumer(const std::vector& params, + const core::video_format_repository& format_repository, const spl::shared_ptr& frame_converter, const std::vector>& channels) { @@ -645,7 +645,7 @@ spl::shared_ptr create_consumer(const std::vector create_preconfigured_consumer(const boost::property_tree::wptree& ptree, const core::video_format_repository& format_repository, - const spl::shared_ptr& frame_converter, + const spl::shared_ptr& frame_converter, const std::vector>& channels) { configuration config; diff --git a/src/modules/screen/consumer/screen_consumer.h b/src/modules/screen/consumer/screen_consumer.h index 16493ab824..0cab8953eb 100644 --- a/src/modules/screen/consumer/screen_consumer.h +++ b/src/modules/screen/consumer/screen_consumer.h @@ -33,12 +33,12 @@ namespace caspar { namespace screen { spl::shared_ptr create_consumer(const std::vector& params, const core::video_format_repository& format_repository, - const spl::shared_ptr& frame_converter, + const spl::shared_ptr& frame_converter, const std::vector>& channels); spl::shared_ptr create_preconfigured_consumer(const boost::property_tree::wptree& ptree, const core::video_format_repository& format_repository, - const spl::shared_ptr& frame_converter, + const spl::shared_ptr& frame_converter, const std::vector>& channels); }} // namespace caspar::screen diff --git a/src/protocol/amcp/AMCPCommandsImpl.cpp b/src/protocol/amcp/AMCPCommandsImpl.cpp index cfa23511db..1e92b7558e 100644 --- a/src/protocol/amcp/AMCPCommandsImpl.cpp +++ b/src/protocol/amcp/AMCPCommandsImpl.cpp @@ -455,8 +455,10 @@ std::wstring add_command(command_context& ctx) core::diagnostics::scoped_call_context save; core::diagnostics::call_context::for_thread().video_channel = ctx.channel_index + 1; - auto consumer = ctx.static_context->consumer_registry->create_consumer( - ctx.parameters, ctx.static_context->format_repository, ctx.channel.raw_channel->frame_converter(), get_channels(ctx)); + auto consumer = ctx.static_context->consumer_registry->create_consumer(ctx.parameters, + ctx.static_context->format_repository, + ctx.channel.raw_channel->frame_converter(), + get_channels(ctx)); ctx.channel.raw_channel->output().add(ctx.layer_index(consumer->index()), consumer); return L"202 ADD OK\r\n"; @@ -474,7 +476,10 @@ std::wstring remove_command(command_context& ctx) } index = ctx.static_context->consumer_registry - ->create_consumer(ctx.parameters, ctx.static_context->format_repository,ctx.channel.raw_channel->frame_converter(), get_channels(ctx)) + ->create_consumer(ctx.parameters, + ctx.static_context->format_repository, + ctx.channel.raw_channel->frame_converter(), + get_channels(ctx)) ->index(); } @@ -487,8 +492,11 @@ std::wstring remove_command(command_context& ctx) std::wstring print_command(command_context& ctx) { - ctx.channel.raw_channel->output().add(ctx.static_context->consumer_registry->create_consumer( - {L"IMAGE"}, ctx.static_context->format_repository, ctx.channel.raw_channel->frame_converter(), get_channels(ctx))); + ctx.channel.raw_channel->output().add( + ctx.static_context->consumer_registry->create_consumer({L"IMAGE"}, + ctx.static_context->format_repository, + ctx.channel.raw_channel->frame_converter(), + get_channels(ctx))); return L"202 PRINT OK\r\n"; } diff --git a/src/shell/server.cpp b/src/shell/server.cpp index 149adbd793..ae9db3b5a2 100644 --- a/src/shell/server.cpp +++ b/src/shell/server.cpp @@ -341,8 +341,12 @@ struct server::impl try { if (name != L"") - channel.raw_channel->output().add(consumer_registry_->create_consumer( - name, xml_consumer.second, video_format_repository_, channel.raw_channel->frame_converter(), channels_vec)); + channel.raw_channel->output().add( + consumer_registry_->create_consumer(name, + xml_consumer.second, + video_format_repository_, + channel.raw_channel->frame_converter(), + channels_vec)); } catch (...) { CASPAR_LOG_CURRENT_EXCEPTION(); } From 9cb0282c3015a0a5226e178a0b544f41c689d74d Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Sat, 30 Dec 2023 15:32:57 +0000 Subject: [PATCH 32/50] chore: add todos to ndi producer --- src/modules/newtek/producer/newtek_ndi_producer.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/modules/newtek/producer/newtek_ndi_producer.cpp b/src/modules/newtek/producer/newtek_ndi_producer.cpp index 0596f57999..52758681c2 100644 --- a/src/modules/newtek/producer/newtek_ndi_producer.cpp +++ b/src/modules/newtek/producer/newtek_ndi_producer.cpp @@ -180,6 +180,15 @@ struct newtek_ndi_producer : public core::frame_producer av_frame->data[0] = video_frame.p_data; av_frame->linesize[0] = video_frame.line_stride_in_bytes; switch (video_frame.FourCC) { + case NDIlib_FourCC_type_UYVY: + av_frame->format = AV_PIX_FMT_UYVY422; + break; + // case NDIlib_FourCC_type_UYVA: + // case NDIlib_FourCC_type_P216: + // case NDIlib_FourCC_type_PA16: + // case NDIlib_FourCC_type_YV12: + // case NDIlib_FourCC_type_I420: + // case NDIlib_FourCC_type_NV12: case NDIlib_FourCC_type_BGRA: av_frame->format = AV_PIX_FMT_BGRA; break; @@ -192,9 +201,6 @@ struct newtek_ndi_producer : public core::frame_producer case NDIlib_FourCC_type_RGBX: av_frame->format = AV_PIX_FMT_RGBA; break; - case NDIlib_FourCC_type_UYVY: - av_frame->format = AV_PIX_FMT_UYVY422; - break; default: // should never happen because library handles the conversion for us av_frame->format = AV_PIX_FMT_BGRA; break; From 98419f6801e43877b44b903d3c42c857580d01eb Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Sat, 30 Dec 2023 15:45:04 +0000 Subject: [PATCH 33/50] wip: tidy --- src/accelerator/ogl/image/frame_converter.cpp | 22 ++++++------- src/accelerator/ogl/image/frame_converter.h | 6 ++-- src/accelerator/ogl/util/device.cpp | 31 ++++++++----------- src/accelerator/ogl/util/device.h | 10 +++--- src/core/frame/frame_factory.h | 6 ++-- .../decklink/consumer/decklink_consumer.cpp | 15 ++++----- 6 files changed, 43 insertions(+), 47 deletions(-) diff --git a/src/accelerator/ogl/image/frame_converter.cpp b/src/accelerator/ogl/image/frame_converter.cpp index 9a56fcd983..ff75f96871 100644 --- a/src/accelerator/ogl/image/frame_converter.cpp +++ b/src/accelerator/ogl/image/frame_converter.cpp @@ -65,19 +65,19 @@ core::mutable_frame ogl_frame_converter::create_frame(const void* tag, const cor }); } -core::draw_frame ogl_frame_converter::convert_frame(const core::mutable_frame& frame) +core::draw_frame ogl_frame_converter::convert_to_rgba(const core::mutable_frame& frame) { // TODO return core::draw_frame{}; } -std::shared_future>> +std::shared_future> ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, const core::encoded_frame_format format) { - std::vector> buffers; - int x_count = 0; - int y_count = 0; - int words_per_line = 0; + array source; + int x_count = 0; + int y_count = 0; + int words_per_line = 0; switch (format) { case core::encoded_frame_format::decklink_v210: @@ -85,14 +85,14 @@ ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, const cor auto row_bytes = row_blocks * 128; // TODO - result must be 128byte aligned. can that be guaranteed here? - buffers.emplace_back(ogl_->create_array(row_bytes * frame.height())); + source = ogl_->create_array(row_bytes * frame.height()); x_count = row_blocks * 8; y_count = frame.height(); words_per_line = row_blocks * 32; break; } - if (buffers.empty() || x_count == 0 || y_count == 0) { + if (source.size() == 0 || x_count == 0 || y_count == 0) { CASPAR_THROW_EXCEPTION(not_supported() << msg_info("Unknown encoded frame format")); } @@ -107,13 +107,13 @@ ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, const cor description.height = frame.height(); description.words_per_line = words_per_line; - auto future_conversion = ogl_->convert_from_texture(texture_ptr, buffers, description, x_count, y_count); + auto future_conversion = ogl_->convert_from_texture(texture_ptr, source, description, x_count, y_count); return std::async(std::launch::deferred, - [buffers = std::move(buffers), future_conversion = std::move(future_conversion)]() mutable { + [source = std::move(source), future_conversion = std::move(future_conversion)]() mutable { future_conversion.get(); - return std::move(buffers); + return std::move(source); }); } diff --git a/src/accelerator/ogl/image/frame_converter.h b/src/accelerator/ogl/image/frame_converter.h index 41daedc772..a8065e3a97 100644 --- a/src/accelerator/ogl/image/frame_converter.h +++ b/src/accelerator/ogl/image/frame_converter.h @@ -43,10 +43,10 @@ class ogl_frame_converter core::mutable_frame create_frame(const void* video_stream_tag, const core::pixel_format_desc& desc) override; - core::draw_frame convert_frame(const core::mutable_frame& frame) override; + core::draw_frame convert_to_rgba(const core::mutable_frame& frame) override; - std::shared_future>> - convert_from_rgba(const core::const_frame& frame, const core::encoded_frame_format format) override; + std::shared_future> convert_from_rgba(const core::const_frame& frame, + core::encoded_frame_format format) override; private: const spl::shared_ptr ogl_; diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp index 32b4374cf4..86ae8781cb 100644 --- a/src/accelerator/ogl/util/device.cpp +++ b/src/accelerator/ogl/util/device.cpp @@ -317,11 +317,11 @@ struct device::impl : public std::enable_shared_from_this } */ - std::future convert_from_texture(const std::shared_ptr& texture, - const std::vector>& buffers, - const convert_from_texture_description& description, - int x_count, - int y_count) + std::future convert_from_texture(const std::shared_ptr& texture, + const array& source, + const convert_from_texture_description& description, + int x_count, + int y_count) { return spawn_async([=](yield_context yield) { if (!compute_from_rgba_) @@ -343,16 +343,12 @@ struct device::impl : public std::enable_shared_from_this GL(glBindImageTexture(0, texid_16bit, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA16)); GL(glBindImageTexture(1, texid_8bit, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA8)); - // TODO: only a single buffer? - // for (size_t i = 0; i < buffers.size(); i++) { - auto& source = buffers[0]; - auto tmp = source.storage>(); + auto tmp = source.storage>(); if (!tmp) { CASPAR_THROW_EXCEPTION(caspar_exception() << msg_info("Buffer is not gpu backed")); } GL(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, tmp->get()->id())); - // } // TODO - binding 2 description auto description_buffer = create_buffer(sizeof(convert_from_texture_description), false); @@ -391,8 +387,7 @@ struct device::impl : public std::enable_shared_from_this size_t total_pooled_device_buffer_size = 0; size_t total_pooled_device_buffer_count = 0; - for (size_t i = 0; i < device_pools_.size(); ++i) { - auto& depth_pools = device_pools_.at(i); + for (const auto& depth_pools : device_pools_) { for (size_t i = 0; i < depth_pools.size(); ++i) { auto& pools = depth_pools.at(i); bool mipmapping = i > 3; @@ -511,13 +506,13 @@ std::future> device::copy_async(const std::shared_ptrcopy_async(source, as_rgba8); } -std::future device::convert_from_texture(const std::shared_ptr& texture, - const std::vector>& buffers, - const convert_from_texture_description& description, - int x_count, - int y_count) +std::future device::convert_from_texture(const std::shared_ptr& texture, + const array& source, + const convert_from_texture_description& description, + int x_count, + int y_count) { - return impl_->convert_from_texture(texture, buffers, description, x_count, y_count); + return impl_->convert_from_texture(texture, source, description, x_count, y_count); } void device::dispatch(std::function func) { boost::asio::dispatch(impl_->service_, std::move(func)); } std::wstring device::version() const { return impl_->version(); } diff --git a/src/accelerator/ogl/util/device.h b/src/accelerator/ogl/util/device.h index e5dce060cb..46872ea1bb 100644 --- a/src/accelerator/ogl/util/device.h +++ b/src/accelerator/ogl/util/device.h @@ -62,11 +62,11 @@ class device final copy_async(const array& source, int width, int height, int stride, common::bit_depth depth); std::future> copy_async(const std::shared_ptr& source, bool as_rgba8); - std::future convert_from_texture(const std::shared_ptr& texture, - const std::vector>& buffers, - const convert_from_texture_description& description, - int x_count, - int y_count); + std::future convert_from_texture(const std::shared_ptr& texture, + const array& source, + const convert_from_texture_description& description, + int x_count, + int y_count); template auto dispatch_async(Func&& func) diff --git a/src/core/frame/frame_factory.h b/src/core/frame/frame_factory.h index 48192135d1..a0849ae171 100644 --- a/src/core/frame/frame_factory.h +++ b/src/core/frame/frame_factory.h @@ -43,10 +43,10 @@ class frame_converter virtual class mutable_frame create_frame(const void* video_stream_tag, const struct pixel_format_desc& desc) = 0; - virtual class draw_frame convert_frame(const class mutable_frame& frame) = 0; + virtual class draw_frame convert_to_rgba(const class mutable_frame& frame) = 0; - virtual std::shared_future>> - convert_from_rgba(const core::const_frame& frame, const encoded_frame_format format) = 0; + virtual std::shared_future> convert_from_rgba(const core::const_frame& frame, + encoded_frame_format format) = 0; }; class frame_factory diff --git a/src/modules/decklink/consumer/decklink_consumer.cpp b/src/modules/decklink/consumer/decklink_consumer.cpp index 9a158cc0ed..918798fee0 100644 --- a/src/modules/decklink/consumer/decklink_consumer.cpp +++ b/src/modules/decklink/consumer/decklink_consumer.cpp @@ -424,12 +424,12 @@ struct decklink_secondary_port final : public IDeckLinkVideoOutputCallback struct converted_frame { - core::const_frame raw_frame; - std::shared_future>> frame; + core::const_frame raw_frame; + std::shared_future> frame; - converted_frame(core::const_frame raw_frame, std::shared_future>> frame) + converted_frame(const core::const_frame& raw_frame, std::shared_future> frame) : raw_frame(raw_frame) - , frame(frame) + , frame(std::move(frame)) { } }; @@ -780,6 +780,7 @@ struct decklink_consumer final : public IDeckLinkVideoOutputCallback tbb::parallel_for(-1, static_cast(secondary_port_contexts_.size()), [&](int i) { if (i == -1) { // Primary port + // TODO - reimplement this // std::shared_ptr image_data = convert_frame_for_port(channel_format_desc_, // decklink_format_desc_, // config_.primary, @@ -787,10 +788,10 @@ struct decklink_consumer final : public IDeckLinkVideoOutputCallback // frame2, // mode_->GetFieldDominance()); - auto buffers = frame1.value().frame.get(); + auto buffer = frame1.value().frame.get(); std::shared_ptr image_data = create_aligned_buffer(decklink_format_desc_.size, 128); - std::memcpy(image_data.get(), buffers.at(0).data(), buffers.at(0).size()); + std::memcpy(image_data.get(), buffer.data(), buffer.size()); schedule_next_video(image_data, bmdFormat10BitYUV, nb_samples, video_display_time); @@ -883,7 +884,7 @@ struct decklink_consumer final : public IDeckLinkVideoOutputCallback // Always push a field2, as we have supplied field1 buffer_cond_.wait(lock, [&] { return buffer_.size() < buffer_capacity_ || abort_request_; }); } - buffer_.push(converted_frame(std::move(frame), frame_future)); + buffer_.push(converted_frame(frame, frame_future)); } buffer_cond_.notify_all(); From 4cf8a2a277ab62ad7b448529ca8edc26ef693fe2 Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Sat, 30 Dec 2023 15:52:06 +0000 Subject: [PATCH 34/50] wip: reimplement decklink key-only flag --- src/accelerator/ogl/image/frame_converter.cpp | 5 ++++- src/accelerator/ogl/image/frame_converter.h | 4 ++-- src/accelerator/ogl/image/shader_from_rgba.comp | 6 ++++++ src/accelerator/ogl/util/device.h | 1 + src/core/frame/frame_factory.h | 4 ++-- src/modules/decklink/consumer/decklink_consumer.cpp | 3 ++- 6 files changed, 17 insertions(+), 6 deletions(-) diff --git a/src/accelerator/ogl/image/frame_converter.cpp b/src/accelerator/ogl/image/frame_converter.cpp index ff75f96871..8326f7edf4 100644 --- a/src/accelerator/ogl/image/frame_converter.cpp +++ b/src/accelerator/ogl/image/frame_converter.cpp @@ -72,7 +72,9 @@ core::draw_frame ogl_frame_converter::convert_to_rgba(const core::mutable_frame& } std::shared_future> -ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, const core::encoded_frame_format format) +ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, + const core::encoded_frame_format format, + bool key_only) { array source; int x_count = 0; @@ -106,6 +108,7 @@ ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, const cor description.width = frame.width(); description.height = frame.height(); description.words_per_line = words_per_line; + description.key_only = key_only; auto future_conversion = ogl_->convert_from_texture(texture_ptr, source, description, x_count, y_count); diff --git a/src/accelerator/ogl/image/frame_converter.h b/src/accelerator/ogl/image/frame_converter.h index a8065e3a97..b3767ecfad 100644 --- a/src/accelerator/ogl/image/frame_converter.h +++ b/src/accelerator/ogl/image/frame_converter.h @@ -45,8 +45,8 @@ class ogl_frame_converter core::draw_frame convert_to_rgba(const core::mutable_frame& frame) override; - std::shared_future> convert_from_rgba(const core::const_frame& frame, - core::encoded_frame_format format) override; + std::shared_future> + convert_from_rgba(const core::const_frame& frame, core::encoded_frame_format format, bool key_only) override; private: const spl::shared_ptr ogl_; diff --git a/src/accelerator/ogl/image/shader_from_rgba.comp b/src/accelerator/ogl/image/shader_from_rgba.comp index 3a629ab396..f1bdbe93ce 100644 --- a/src/accelerator/ogl/image/shader_from_rgba.comp +++ b/src/accelerator/ogl/image/shader_from_rgba.comp @@ -17,6 +17,7 @@ layout(std430, binding = 3) buffer description_layout uint frame_width; uint frame_height; uint words_per_line; + bool key_only; }; vec4 read_pixel(ivec2 coord) { @@ -28,6 +29,11 @@ vec4 read_pixel(ivec2 coord) { } vec3 rgba_to_bt709(vec4 pixel) { + if (key_only) { + // TODO - verify everything about this + return vec3(pixel.a, 0.5, 0.5); + } + float KR = 0.2126; float KB = 0.0722; float KG = 1.0 - KR - KB; diff --git a/src/accelerator/ogl/util/device.h b/src/accelerator/ogl/util/device.h index 46872ea1bb..e6548dd39f 100644 --- a/src/accelerator/ogl/util/device.h +++ b/src/accelerator/ogl/util/device.h @@ -41,6 +41,7 @@ struct convert_from_texture_description uint32_t width; uint32_t height; uint32_t words_per_line; + bool key_only; }; class device final diff --git a/src/core/frame/frame_factory.h b/src/core/frame/frame_factory.h index a0849ae171..f73bdefd35 100644 --- a/src/core/frame/frame_factory.h +++ b/src/core/frame/frame_factory.h @@ -45,8 +45,8 @@ class frame_converter virtual class draw_frame convert_to_rgba(const class mutable_frame& frame) = 0; - virtual std::shared_future> convert_from_rgba(const core::const_frame& frame, - encoded_frame_format format) = 0; + virtual std::shared_future> + convert_from_rgba(const core::const_frame& frame, encoded_frame_format format, bool key_only) = 0; }; class frame_factory diff --git a/src/modules/decklink/consumer/decklink_consumer.cpp b/src/modules/decklink/consumer/decklink_consumer.cpp index 918798fee0..90e7d8c082 100644 --- a/src/modules/decklink/consumer/decklink_consumer.cpp +++ b/src/modules/decklink/consumer/decklink_consumer.cpp @@ -877,7 +877,8 @@ struct decklink_consumer final : public IDeckLinkVideoOutputCallback } if (frame) { - auto frame_future = frame_converter_->convert_from_rgba(frame, core::encoded_frame_format::decklink_v210); + auto frame_future = frame_converter_->convert_from_rgba( + frame, core::encoded_frame_format::decklink_v210, config_.primary.key_only); std::unique_lock lock(buffer_mutex_); if (field != core::video_field::b) { From 4cc8a05d1d4c153de194bb11eb29fec341ed0731 Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Tue, 28 Nov 2023 23:52:53 +0000 Subject: [PATCH 35/50] feat: minimise cpu image conversions for image producer --- src/modules/image/producer/image_producer.cpp | 18 +++-- .../image/producer/image_scroll_producer.cpp | 12 +-- src/modules/image/util/image_loader.cpp | 77 ++++++++++++------- src/modules/image/util/image_loader.h | 12 ++- 4 files changed, 75 insertions(+), 44 deletions(-) diff --git a/src/modules/image/producer/image_producer.cpp b/src/modules/image/producer/image_producer.cpp index f7595375d3..2e469d1374 100644 --- a/src/modules/image/producer/image_producer.cpp +++ b/src/modules/image/producer/image_producer.cpp @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -67,7 +68,7 @@ struct image_producer : public core::frame_producer , frame_factory_(frame_factory) , length_(length) { - load(load_image(description_)); + load(load_image(description_, true)); CASPAR_LOG(info) << print() << L" Initialized"; } @@ -80,19 +81,20 @@ struct image_producer : public core::frame_producer , frame_factory_(frame_factory) , length_(length) { - load(load_png_from_memory(png_data, size)); + load(load_png_from_memory(png_data, size, true)); CASPAR_LOG(info) << print() << L" Initialized"; } - void load(const std::shared_ptr& bitmap) + void load(const loaded_image& image) { - FreeImage_FlipVertical(bitmap.get()); - core::pixel_format_desc desc(core::pixel_format::bgra); - desc.planes.emplace_back(FreeImage_GetWidth(bitmap.get()), FreeImage_GetHeight(bitmap.get()), 4); - auto frame = frame_factory_->create_frame(this, desc); + core::pixel_format_desc desc(image.format); + desc.planes.emplace_back( + FreeImage_GetWidth(image.bitmap.get()), FreeImage_GetHeight(image.bitmap.get()), image.stride); + auto frame = frame_factory_->create_frame(this, desc); + frame.geometry() = core::frame_geometry::get_default_vflip(); - std::copy_n(FreeImage_GetBits(bitmap.get()), frame.image_data(0).size(), frame.image_data(0).begin()); + std::copy_n(FreeImage_GetBits(image.bitmap.get()), frame.image_data(0).size(), frame.image_data(0).begin()); frame_ = core::draw_frame(std::move(frame)); } diff --git a/src/modules/image/producer/image_scroll_producer.cpp b/src/modules/image/producer/image_scroll_producer.cpp index 0482ae08cb..2df6825573 100644 --- a/src/modules/image/producer/image_scroll_producer.cpp +++ b/src/modules/image/producer/image_scroll_producer.cpp @@ -140,11 +140,11 @@ struct image_scroll_producer : public core::frame_producer if (end_time_) speed = -1.0; - auto bitmap = load_image(filename_); - FreeImage_FlipVertical(bitmap.get()); + auto bitmap = load_image(filename_, false); + FreeImage_FlipVertical(bitmap.bitmap.get()); - width_ = FreeImage_GetWidth(bitmap.get()); - height_ = FreeImage_GetHeight(bitmap.get()); + width_ = FreeImage_GetWidth(bitmap.bitmap.get()); + height_ = FreeImage_GetHeight(bitmap.bitmap.get()); bool vertical = width_ == format_desc_.width; bool horizontal = height_ == format_desc_.height; @@ -169,7 +169,7 @@ struct image_scroll_producer : public core::frame_producer speed_ = speed_tweener(speed, speed, 0, tweener(L"linear")); - auto bytes = FreeImage_GetBits(bitmap.get()); + auto bytes = FreeImage_GetBits(bitmap.bitmap.get()); auto count = width_ * height_ * 4; image_view original_view(bytes, width_, height_); @@ -193,7 +193,7 @@ struct image_scroll_producer : public core::frame_producer caspar::tweener blur_tweener(L"easeInQuad"); blur(original_view, blurred_view, angle, motion_blur_px, blur_tweener); bytes = blurred_copy.get(); - bitmap.reset(); + bitmap.bitmap.reset(); } if (vertical) { diff --git a/src/modules/image/util/image_loader.cpp b/src/modules/image/util/image_loader.cpp index eedc289e04..eefc486592 100644 --- a/src/modules/image/util/image_loader.cpp +++ b/src/modules/image/util/image_loader.cpp @@ -41,9 +41,54 @@ #include "image_algorithms.h" #include "image_view.h" +#if FREEIMAGE_COLORORDER == FREEIMAGE_COLORORDER_BGR +#define IMAGE_BGRA_FORMAT core::pixel_format::bgra +#define IMAGE_BGR_FORMAT core::pixel_format::bgr +#else +#define IMAGE_BGRA_FORMAT core::pixel_format::rgba +#define IMAGE_BGR_FORMAT core::pixel_format::rgb +#endif + namespace caspar { namespace image { -std::shared_ptr load_image(const std::wstring& filename) +loaded_image prepare_loaded_image(FREE_IMAGE_FORMAT fif, std::shared_ptr bitmap, bool allow_all_formats) +{ + core::pixel_format format; + int stride; + + unsigned int bpp = FreeImage_GetBPP(bitmap.get()); + if (bpp == 32) { + format = IMAGE_BGRA_FORMAT; + stride = 4; + } else if (allow_all_formats && bpp == 24) { + format = IMAGE_BGR_FORMAT; + stride = 3; + } else if (allow_all_formats && !FreeImage_IsTransparent(bitmap.get())) { + format = IMAGE_BGR_FORMAT; + stride = 3; + + bitmap = std::shared_ptr(FreeImage_ConvertTo24Bits(bitmap.get()), FreeImage_Unload); + } else { + format = IMAGE_BGRA_FORMAT; + stride = 4; + + bitmap = std::shared_ptr(FreeImage_ConvertTo32Bits(bitmap.get()), FreeImage_Unload); + } + + if (!bitmap) + CASPAR_THROW_EXCEPTION(invalid_argument() << msg_info("Unsupported image format.")); + + // PNG-images need to be premultiplied with their alpha + if (fif == FIF_PNG && format == IMAGE_BGRA_FORMAT) { + image_view original_view( + FreeImage_GetBits(bitmap.get()), FreeImage_GetWidth(bitmap.get()), FreeImage_GetHeight(bitmap.get())); + premultiply(original_view); + } + + return {std::move(bitmap), format, stride}; +} + +loaded_image load_image(const std::wstring& filename, bool allow_all_formats) { if (!boost::filesystem::exists(filename)) CASPAR_THROW_EXCEPTION(file_not_found() << boost::errinfo_file_name(u8(filename))); @@ -70,23 +115,10 @@ std::shared_ptr load_image(const std::wstring& filename) auto bitmap = std::shared_ptr(FreeImage_Load(fif, u8(filename).c_str(), 0), FreeImage_Unload); #endif - if (FreeImage_GetBPP(bitmap.get()) != 32) { - bitmap = std::shared_ptr(FreeImage_ConvertTo32Bits(bitmap.get()), FreeImage_Unload); - if (!bitmap) - CASPAR_THROW_EXCEPTION(invalid_argument() << msg_info("Unsupported image format.")); - } - - // PNG-images need to be premultiplied with their alpha - if (fif == FIF_PNG) { - image_view original_view( - FreeImage_GetBits(bitmap.get()), FreeImage_GetWidth(bitmap.get()), FreeImage_GetHeight(bitmap.get())); - premultiply(original_view); - } - - return bitmap; + return prepare_loaded_image(fif, std::move(bitmap), allow_all_formats); } -std::shared_ptr load_png_from_memory(const void* memory_location, size_t size) +loaded_image load_png_from_memory(const void* memory_location, size_t size, bool allow_all_formats) { FREE_IMAGE_FORMAT fif = FIF_PNG; @@ -95,18 +127,7 @@ std::shared_ptr load_png_from_memory(const void* memory_location, size FreeImage_CloseMemory); auto bitmap = std::shared_ptr(FreeImage_LoadFromMemory(fif, memory.get(), 0), FreeImage_Unload); - if (FreeImage_GetBPP(bitmap.get()) != 32) { - bitmap = std::shared_ptr(FreeImage_ConvertTo32Bits(bitmap.get()), FreeImage_Unload); - - if (!bitmap) - CASPAR_THROW_EXCEPTION(invalid_argument() << msg_info("Unsupported image format.")); - } - - // PNG-images need to be premultiplied with their alpha - image_view original_view( - FreeImage_GetBits(bitmap.get()), FreeImage_GetWidth(bitmap.get()), FreeImage_GetHeight(bitmap.get())); - premultiply(original_view); - return bitmap; + return prepare_loaded_image(fif, std::move(bitmap), allow_all_formats); } const std::set& supported_extensions() diff --git a/src/modules/image/util/image_loader.h b/src/modules/image/util/image_loader.h index 81a68bfea8..9362be31bc 100644 --- a/src/modules/image/util/image_loader.h +++ b/src/modules/image/util/image_loader.h @@ -21,6 +21,7 @@ #pragma once +#include #include #include #include @@ -29,8 +30,15 @@ struct FIBITMAP; namespace caspar { namespace image { -std::shared_ptr load_image(const std::wstring& filename); -std::shared_ptr load_png_from_memory(const void* memory_location, size_t size); +struct loaded_image +{ + std::shared_ptr bitmap; + core::pixel_format format; + int stride; +}; + +loaded_image load_image(const std::wstring& filename, bool allow_all_formats); +loaded_image load_png_from_memory(const void* memory_location, size_t size, bool allow_all_formats); const std::set& supported_extensions(); }} // namespace caspar::image From 219ada3b9335dabe70b03a8ae13a85c9e89ad1b5 Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Sat, 30 Dec 2023 17:10:18 +0000 Subject: [PATCH 36/50] wip: start of 16bit png writing --- src/accelerator/ogl/image/frame_converter.cpp | 40 +++++++++---- src/accelerator/ogl/image/frame_converter.h | 2 + .../ogl/image/shader_from_rgba.comp | 46 ++++++++++++++- src/accelerator/ogl/util/device.cpp | 10 ++-- src/accelerator/ogl/util/device.h | 5 +- src/core/frame/frame_factory.h | 6 +- src/modules/image/consumer/image_consumer.cpp | 59 +++++++++++++++---- src/modules/image/util/image_algorithms.h | 10 ++-- src/modules/image/util/image_view.h | 34 +++++++++++ 9 files changed, 174 insertions(+), 38 deletions(-) diff --git a/src/accelerator/ogl/image/frame_converter.cpp b/src/accelerator/ogl/image/frame_converter.cpp index 8326f7edf4..5e642e49d5 100644 --- a/src/accelerator/ogl/image/frame_converter.cpp +++ b/src/accelerator/ogl/image/frame_converter.cpp @@ -76,25 +76,33 @@ ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, const core::encoded_frame_format format, bool key_only) { - array source; - int x_count = 0; - int y_count = 0; + array buffer; + unsigned int x_count = 0; + unsigned int y_count = 0; int words_per_line = 0; switch (format) { + case core::encoded_frame_format::rgba16: + case core::encoded_frame_format::bgra16: + x_count = frame.width(); + y_count = frame.height(); + buffer = ogl_->create_array(frame.width() * frame.height() * 8); + words_per_line = frame.width() * 2; + + break; case core::encoded_frame_format::decklink_v210: auto row_blocks = ((frame.width() + 47) / 48); auto row_bytes = row_blocks * 128; // TODO - result must be 128byte aligned. can that be guaranteed here? - source = ogl_->create_array(row_bytes * frame.height()); + buffer = ogl_->create_array(row_bytes * frame.height()); x_count = row_blocks * 8; y_count = frame.height(); words_per_line = row_blocks * 32; break; } - if (source.size() == 0 || x_count == 0 || y_count == 0) { + if (buffer.size() == 0 || x_count == 0 || y_count == 0) { CASPAR_THROW_EXCEPTION(not_supported() << msg_info("Unknown encoded frame format")); } @@ -104,20 +112,30 @@ ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, } convert_from_texture_description description{}; + description.target_format = format; description.is_16_bit = texture_ptr->depth() == common::bit_depth::bit16; description.width = frame.width(); description.height = frame.height(); description.words_per_line = words_per_line; description.key_only = key_only; - auto future_conversion = ogl_->convert_from_texture(texture_ptr, source, description, x_count, y_count); + auto future_conversion = ogl_->convert_from_texture(texture_ptr, buffer, description, x_count, y_count); + + return std::async(std::launch::deferred, [buffer, future_conversion = std::move(future_conversion)]() mutable { + future_conversion.get(); + + return buffer; + }); +} - return std::async(std::launch::deferred, - [source = std::move(source), future_conversion = std::move(future_conversion)]() mutable { - future_conversion.get(); +common::bit_depth ogl_frame_converter::get_frame_bitdepth(const core::const_frame& frame) +{ + auto texture_ptr = boost::any_cast>(frame.opaque()); + if (!texture_ptr) { + CASPAR_THROW_EXCEPTION(not_supported() << msg_info("No texture inside frame!")); + } - return std::move(source); - }); + return texture_ptr->depth(); } } // namespace caspar::accelerator::ogl \ No newline at end of file diff --git a/src/accelerator/ogl/image/frame_converter.h b/src/accelerator/ogl/image/frame_converter.h index b3767ecfad..3d85635d91 100644 --- a/src/accelerator/ogl/image/frame_converter.h +++ b/src/accelerator/ogl/image/frame_converter.h @@ -48,6 +48,8 @@ class ogl_frame_converter std::shared_future> convert_from_rgba(const core::const_frame& frame, core::encoded_frame_format format, bool key_only) override; + common::bit_depth get_frame_bitdepth(const core::const_frame& frame) override; + private: const spl::shared_ptr ogl_; }; diff --git a/src/accelerator/ogl/image/shader_from_rgba.comp b/src/accelerator/ogl/image/shader_from_rgba.comp index f1bdbe93ce..a783c9e6f2 100644 --- a/src/accelerator/ogl/image/shader_from_rgba.comp +++ b/src/accelerator/ogl/image/shader_from_rgba.comp @@ -13,6 +13,7 @@ layout(std430, binding = 2) buffer buffer_layout layout(std430, binding = 3) buffer description_layout { // This must match convert_from_texture_description in device.h + uint target_format; bool is_16_bit; uint frame_width; uint frame_height; @@ -75,7 +76,7 @@ uint to_10bit(float val) { void decklink_v210() { // basic coordinates uint y_offset = gl_GlobalInvocationID.y * words_per_line; - uint x_offset = gl_GlobalInvocationID.x * 4; // 4 bytes per op + uint x_offset = gl_GlobalInvocationID.x * 4; // 4 words per op uint offset = y_offset + x_offset; uint image_x = gl_GlobalInvocationID.x * 6; @@ -95,8 +96,49 @@ void decklink_v210() { bufferOutput[offset + 3] = to_10bit(ycbcr4.s) + (to_10bit(ycbcr4.p) << 10) + (to_10bit(ycbcr5.s) << 20); } +void rgba16() { + // TODO: is this excessive to do in a shader? + // basic coordinates + uint y_offset = gl_GlobalInvocationID.y * words_per_line; + uint x_offset = gl_GlobalInvocationID.x * 2; // 4 words per op + uint offset = y_offset + x_offset; + + // TODO - can this be done solely as integer? + vec4 pixel = read_pixel(ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y)); + + // Note: the texture is stored in reverse order + bufferOutput[offset + 0] = uint(pixel.b * 65535) + (uint(pixel.g * 65535) << 16); + bufferOutput[offset + 1] = uint(pixel.r * 65535) + (uint(pixel.a * 65535) << 16); +} + +void bgra16() { + // TODO: is this excessive to do in a shader? + // basic coordinates + uint y_offset = gl_GlobalInvocationID.y * words_per_line; + uint x_offset = gl_GlobalInvocationID.x * 2; // 4 words per op + uint offset = y_offset + x_offset; + + // TODO - can this be done solely as integer? + vec4 pixel = read_pixel(ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y)); + + // Note: the texture is stored in reverse order + bufferOutput[offset + 0] = uint(pixel.r * 65535) + (uint(pixel.g * 65535) << 16); + bufferOutput[offset + 1] = uint(pixel.b * 65535) + (uint(pixel.a * 65535) << 16); +} + void main() { ivec2 texelCoord = ivec2(gl_GlobalInvocationID.xy); - decklink_v210(); + switch(target_format){ + case 0: // rgba16 + rgba16(); + break; + case 1: // bgra16 + bgra16(); + break; + + case 2: + decklink_v210(); + break; + } } \ No newline at end of file diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp index 86ae8781cb..9062a322db 100644 --- a/src/accelerator/ogl/util/device.cpp +++ b/src/accelerator/ogl/util/device.cpp @@ -320,8 +320,8 @@ struct device::impl : public std::enable_shared_from_this std::future convert_from_texture(const std::shared_ptr& texture, const array& source, const convert_from_texture_description& description, - int x_count, - int y_count) + unsigned int x_count, + unsigned int y_count) { return spawn_async([=](yield_context yield) { if (!compute_from_rgba_) @@ -357,7 +357,7 @@ struct device::impl : public std::enable_shared_from_this compute_from_rgba_->use(); - GL(glDispatchCompute((unsigned int)x_count, (unsigned int)y_count, 1)); + GL(glDispatchCompute(x_count, y_count, 1)); auto fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); @@ -509,8 +509,8 @@ std::future> device::copy_async(const std::shared_ptr device::convert_from_texture(const std::shared_ptr& texture, const array& source, const convert_from_texture_description& description, - int x_count, - int y_count) + unsigned int x_count, + unsigned int y_count) { return impl_->convert_from_texture(texture, source, description, x_count, y_count); } diff --git a/src/accelerator/ogl/util/device.h b/src/accelerator/ogl/util/device.h index e6548dd39f..f6c3b5974f 100644 --- a/src/accelerator/ogl/util/device.h +++ b/src/accelerator/ogl/util/device.h @@ -37,6 +37,7 @@ namespace caspar { namespace accelerator { namespace ogl { // This must match description_layout in shader_from_rgba.comp struct convert_from_texture_description { + uint target_format; bool is_16_bit; uint32_t width; uint32_t height; @@ -66,8 +67,8 @@ class device final std::future convert_from_texture(const std::shared_ptr& texture, const array& source, const convert_from_texture_description& description, - int x_count, - int y_count); + unsigned int x_count, + unsigned int y_count); template auto dispatch_async(Func&& func) diff --git a/src/core/frame/frame_factory.h b/src/core/frame/frame_factory.h index f73bdefd35..82416fdc48 100644 --- a/src/core/frame/frame_factory.h +++ b/src/core/frame/frame_factory.h @@ -29,7 +29,9 @@ namespace caspar { namespace core { enum encoded_frame_format { - decklink_v210 = 0, + rgba16 = 0, + bgra16 = 1, + decklink_v210 = 2, }; class frame_converter @@ -47,6 +49,8 @@ class frame_converter virtual std::shared_future> convert_from_rgba(const core::const_frame& frame, encoded_frame_format format, bool key_only) = 0; + + virtual common::bit_depth get_frame_bitdepth(const core::const_frame& frame) = 0; }; class frame_factory diff --git a/src/modules/image/consumer/image_consumer.cpp b/src/modules/image/consumer/image_consumer.cpp index 786080d42c..41117ce1ac 100644 --- a/src/modules/image/consumer/image_consumer.cpp +++ b/src/modules/image/consumer/image_consumer.cpp @@ -36,6 +36,7 @@ #include #include +#include #include #include @@ -49,15 +50,23 @@ namespace caspar { namespace image { +#if FREEIMAGE_COLORORDER == FREEIMAGE_COLORORDER_BGR +#define IMAGE_ENCODED_FORMAT core::encoded_frame_format::bgra16 +#else +#define IMAGE_ENCODED_FORMAT core::encoded_frame_format::rgba16 +#endif + struct image_consumer : public core::frame_consumer { - const std::wstring filename_; + const spl::shared_ptr frame_converter_; + const std::wstring filename_; public: // frame_consumer - explicit image_consumer(std::wstring filename) - : filename_(std::move(filename)) + explicit image_consumer(const spl::shared_ptr& frame_converter, std::wstring filename) + : frame_converter_(frame_converter) + , filename_(std::move(filename)) { } @@ -67,7 +76,7 @@ struct image_consumer : public core::frame_consumer { auto filename = filename_; - std::thread async([frame, filename] { + std::thread async([frame_converter = frame_converter_, frame, filename] { try { auto filename2 = filename; @@ -78,14 +87,40 @@ struct image_consumer : public core::frame_consumer else filename2 = env::media_folder() + filename2 + L".png"; - auto bitmap = std::shared_ptr( - FreeImage_Allocate(static_cast(frame.width()), static_cast(frame.height()), 32), - FreeImage_Unload); - std::memcpy(FreeImage_GetBits(bitmap.get()), frame.image_data(0).begin(), frame.image_data(0).size()); + common::bit_depth frame_depth = frame_converter->get_frame_bitdepth(frame); + + std::shared_ptr bitmap; + + if (frame_depth != common::bit_depth::bit8) { + bitmap = std::shared_ptr(FreeImage_AllocateT(FIT_RGBA16, + static_cast(frame.width()), + static_cast(frame.height())), + FreeImage_Unload); + + array rgba16_bytes = + frame_converter->convert_from_rgba(frame, IMAGE_ENCODED_FORMAT, false).get(); + + std::memcpy(FreeImage_GetBits(bitmap.get()), rgba16_bytes.data(), rgba16_bytes.size()); + + // TODO - this doesnt work + image_view original_view(FreeImage_GetBits(bitmap.get()), + static_cast(frame.width()), + static_cast(frame.height())); + unmultiply(original_view, 65535); + } else { + bitmap = std::shared_ptr( + FreeImage_AllocateT( + FIT_BITMAP, static_cast(frame.width()), static_cast(frame.height()), 32), + FreeImage_Unload); + + std::memcpy( + FreeImage_GetBits(bitmap.get()), frame.image_data(0).begin(), frame.image_data(0).size()); - image_view original_view( - FreeImage_GetBits(bitmap.get()), static_cast(frame.width()), static_cast(frame.height())); - unmultiply(original_view); + image_view original_view(FreeImage_GetBits(bitmap.get()), + static_cast(frame.width()), + static_cast(frame.height())); + unmultiply(original_view, 255); + } FreeImage_FlipVertical(bitmap.get()); #ifdef WIN32 @@ -129,7 +164,7 @@ spl::shared_ptr create_consumer(const std::vector 1) filename = params.at(1); - return spl::make_shared(filename); + return spl::make_shared(frame_converter, filename); } }} // namespace caspar::image diff --git a/src/modules/image/util/image_algorithms.h b/src/modules/image/util/image_algorithms.h index 268f5efad3..f6a7c6662c 100644 --- a/src/modules/image/util/image_algorithms.h +++ b/src/modules/image/util/image_algorithms.h @@ -206,21 +206,21 @@ void premultiply(SrcDstView& view_to_modify) * models RGBAPixel. */ template -void unmultiply(SrcDstView& view_to_modify) +void unmultiply(SrcDstView& view_to_modify, int max) { std::for_each(view_to_modify.begin(), view_to_modify.end(), [&](typename SrcDstView::pixel_type& pixel) { int alpha = static_cast(pixel.a()); - if (alpha != 0 && alpha != 255) { + if (alpha != 0 && alpha != max) { // We don't event try to premultiply 0 since it will be unaffected. if (pixel.r()) - pixel.r() = static_cast(static_cast(pixel.r()) * 255 / alpha); + pixel.r() = static_cast(static_cast(pixel.r()) * max / alpha); if (pixel.g()) - pixel.g() = static_cast(static_cast(pixel.g()) * 255 / alpha); + pixel.g() = static_cast(static_cast(pixel.g()) * max / alpha); if (pixel.b()) - pixel.b() = static_cast(static_cast(pixel.b()) * 255 / alpha); + pixel.b() = static_cast(static_cast(pixel.b()) * max / alpha); } }); } diff --git a/src/modules/image/util/image_view.h b/src/modules/image/util/image_view.h index b9029a094d..cb0a4300a7 100644 --- a/src/modules/image/util/image_view.h +++ b/src/modules/image/util/image_view.h @@ -59,6 +59,40 @@ class bgra_pixel uint8_t& a() { return a_; } }; +/** + * A POD pixel with a compatible memory layout as a 16bit BGRA pixel (32bits in + * total). + *

+ * Models the PackedPixel concept used by for example image_view. Also models + * the RGBAPixel concept which does not care about the order between RGBA but + * only requires that all 4 channel has accessors. + */ +class bgra16_pixel +{ + uint16_t b_; + uint16_t g_; + uint16_t r_; + uint16_t a_; + + public: + bgra16_pixel(uint16_t b = 0, uint16_t g = 0, uint16_t r = 0, uint16_t a = 0) + : b_(b) + , g_(g) + , r_(r) + , a_(a) + { + } + + const uint16_t& b() const { return b_; } + uint16_t& b() { return b_; } + const uint16_t& g() const { return g_; } + uint16_t& g() { return g_; } + const uint16_t& r() const { return r_; } + uint16_t& r() { return r_; } + const uint16_t& a() const { return a_; } + uint16_t& a() { return a_; } +}; + template class image_sub_view; From 8cfc286c78e68327cbb6a823dd4b08afdc5d7805 Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Sat, 30 Dec 2023 17:20:16 +0000 Subject: [PATCH 37/50] fix: image consumer 16bit generation --- src/accelerator/ogl/image/frame_converter.cpp | 4 ++- src/accelerator/ogl/image/frame_converter.h | 6 ++-- .../ogl/image/shader_from_rgba.comp | 18 +++++++--- src/accelerator/ogl/util/device.h | 1 + src/core/frame/frame_factory.h | 2 +- .../decklink/consumer/decklink_consumer.cpp | 2 +- src/modules/image/consumer/image_consumer.cpp | 10 ++---- src/modules/image/util/image_algorithms.h | 10 +++--- src/modules/image/util/image_view.h | 34 ------------------- 9 files changed, 31 insertions(+), 56 deletions(-) diff --git a/src/accelerator/ogl/image/frame_converter.cpp b/src/accelerator/ogl/image/frame_converter.cpp index 5e642e49d5..5618be349a 100644 --- a/src/accelerator/ogl/image/frame_converter.cpp +++ b/src/accelerator/ogl/image/frame_converter.cpp @@ -74,7 +74,8 @@ core::draw_frame ogl_frame_converter::convert_to_rgba(const core::mutable_frame& std::shared_future> ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, const core::encoded_frame_format format, - bool key_only) + bool key_only, + bool straighten) { array buffer; unsigned int x_count = 0; @@ -118,6 +119,7 @@ ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, description.height = frame.height(); description.words_per_line = words_per_line; description.key_only = key_only; + description.straighten = straighten; auto future_conversion = ogl_->convert_from_texture(texture_ptr, buffer, description, x_count, y_count); diff --git a/src/accelerator/ogl/image/frame_converter.h b/src/accelerator/ogl/image/frame_converter.h index 3d85635d91..31f0e1a684 100644 --- a/src/accelerator/ogl/image/frame_converter.h +++ b/src/accelerator/ogl/image/frame_converter.h @@ -45,8 +45,10 @@ class ogl_frame_converter core::draw_frame convert_to_rgba(const core::mutable_frame& frame) override; - std::shared_future> - convert_from_rgba(const core::const_frame& frame, core::encoded_frame_format format, bool key_only) override; + std::shared_future> convert_from_rgba(const core::const_frame& frame, + core::encoded_frame_format format, + bool key_only, + bool straighten) override; common::bit_depth get_frame_bitdepth(const core::const_frame& frame) override; diff --git a/src/accelerator/ogl/image/shader_from_rgba.comp b/src/accelerator/ogl/image/shader_from_rgba.comp index a783c9e6f2..76a4e3436e 100644 --- a/src/accelerator/ogl/image/shader_from_rgba.comp +++ b/src/accelerator/ogl/image/shader_from_rgba.comp @@ -19,14 +19,21 @@ layout(std430, binding = 3) buffer description_layout uint frame_height; uint words_per_line; bool key_only; + bool straigthen; }; +vec4 straighten_pixel(vec4 pixel) { + return vec4( + pixel.r / pixel.a, + pixel.g / pixel.a, + pixel.b / pixel.a, + pixel.a + ); +} + vec4 read_pixel(ivec2 coord) { - if (is_16_bit){ - return imageLoad(imgInput16bit, coord); - } else { - return imageLoad(imgInput8bit, coord); - } + vec4 pixel = is_16_bit ? imageLoad(imgInput16bit, coord) : imageLoad(imgInput8bit, coord); + return straighten_pixel(pixel); } vec3 rgba_to_bt709(vec4 pixel) { @@ -96,6 +103,7 @@ void decklink_v210() { bufferOutput[offset + 3] = to_10bit(ycbcr4.s) + (to_10bit(ycbcr4.p) << 10) + (to_10bit(ycbcr5.s) << 20); } + void rgba16() { // TODO: is this excessive to do in a shader? // basic coordinates diff --git a/src/accelerator/ogl/util/device.h b/src/accelerator/ogl/util/device.h index f6c3b5974f..6930a5d886 100644 --- a/src/accelerator/ogl/util/device.h +++ b/src/accelerator/ogl/util/device.h @@ -43,6 +43,7 @@ struct convert_from_texture_description uint32_t height; uint32_t words_per_line; bool key_only; + bool straighten; }; class device final diff --git a/src/core/frame/frame_factory.h b/src/core/frame/frame_factory.h index 82416fdc48..bff013c952 100644 --- a/src/core/frame/frame_factory.h +++ b/src/core/frame/frame_factory.h @@ -48,7 +48,7 @@ class frame_converter virtual class draw_frame convert_to_rgba(const class mutable_frame& frame) = 0; virtual std::shared_future> - convert_from_rgba(const core::const_frame& frame, encoded_frame_format format, bool key_only) = 0; + convert_from_rgba(const core::const_frame& frame, encoded_frame_format format, bool key_only, bool straighten) = 0; virtual common::bit_depth get_frame_bitdepth(const core::const_frame& frame) = 0; }; diff --git a/src/modules/decklink/consumer/decklink_consumer.cpp b/src/modules/decklink/consumer/decklink_consumer.cpp index 90e7d8c082..ed58593bec 100644 --- a/src/modules/decklink/consumer/decklink_consumer.cpp +++ b/src/modules/decklink/consumer/decklink_consumer.cpp @@ -878,7 +878,7 @@ struct decklink_consumer final : public IDeckLinkVideoOutputCallback if (frame) { auto frame_future = frame_converter_->convert_from_rgba( - frame, core::encoded_frame_format::decklink_v210, config_.primary.key_only); + frame, core::encoded_frame_format::decklink_v210, config_.primary.key_only, false); std::unique_lock lock(buffer_mutex_); if (field != core::video_field::b) { diff --git a/src/modules/image/consumer/image_consumer.cpp b/src/modules/image/consumer/image_consumer.cpp index 41117ce1ac..9735d55507 100644 --- a/src/modules/image/consumer/image_consumer.cpp +++ b/src/modules/image/consumer/image_consumer.cpp @@ -98,15 +98,11 @@ struct image_consumer : public core::frame_consumer FreeImage_Unload); array rgba16_bytes = - frame_converter->convert_from_rgba(frame, IMAGE_ENCODED_FORMAT, false).get(); + frame_converter->convert_from_rgba(frame, IMAGE_ENCODED_FORMAT, false, true).get(); std::memcpy(FreeImage_GetBits(bitmap.get()), rgba16_bytes.data(), rgba16_bytes.size()); - // TODO - this doesnt work - image_view original_view(FreeImage_GetBits(bitmap.get()), - static_cast(frame.width()), - static_cast(frame.height())); - unmultiply(original_view, 65535); + // Note: premultiplication is done on the gpu } else { bitmap = std::shared_ptr( FreeImage_AllocateT( @@ -119,7 +115,7 @@ struct image_consumer : public core::frame_consumer image_view original_view(FreeImage_GetBits(bitmap.get()), static_cast(frame.width()), static_cast(frame.height())); - unmultiply(original_view, 255); + unmultiply(original_view); } FreeImage_FlipVertical(bitmap.get()); diff --git a/src/modules/image/util/image_algorithms.h b/src/modules/image/util/image_algorithms.h index f6a7c6662c..268f5efad3 100644 --- a/src/modules/image/util/image_algorithms.h +++ b/src/modules/image/util/image_algorithms.h @@ -206,21 +206,21 @@ void premultiply(SrcDstView& view_to_modify) * models RGBAPixel. */ template -void unmultiply(SrcDstView& view_to_modify, int max) +void unmultiply(SrcDstView& view_to_modify) { std::for_each(view_to_modify.begin(), view_to_modify.end(), [&](typename SrcDstView::pixel_type& pixel) { int alpha = static_cast(pixel.a()); - if (alpha != 0 && alpha != max) { + if (alpha != 0 && alpha != 255) { // We don't event try to premultiply 0 since it will be unaffected. if (pixel.r()) - pixel.r() = static_cast(static_cast(pixel.r()) * max / alpha); + pixel.r() = static_cast(static_cast(pixel.r()) * 255 / alpha); if (pixel.g()) - pixel.g() = static_cast(static_cast(pixel.g()) * max / alpha); + pixel.g() = static_cast(static_cast(pixel.g()) * 255 / alpha); if (pixel.b()) - pixel.b() = static_cast(static_cast(pixel.b()) * max / alpha); + pixel.b() = static_cast(static_cast(pixel.b()) * 255 / alpha); } }); } diff --git a/src/modules/image/util/image_view.h b/src/modules/image/util/image_view.h index cb0a4300a7..b9029a094d 100644 --- a/src/modules/image/util/image_view.h +++ b/src/modules/image/util/image_view.h @@ -59,40 +59,6 @@ class bgra_pixel uint8_t& a() { return a_; } }; -/** - * A POD pixel with a compatible memory layout as a 16bit BGRA pixel (32bits in - * total). - *

- * Models the PackedPixel concept used by for example image_view. Also models - * the RGBAPixel concept which does not care about the order between RGBA but - * only requires that all 4 channel has accessors. - */ -class bgra16_pixel -{ - uint16_t b_; - uint16_t g_; - uint16_t r_; - uint16_t a_; - - public: - bgra16_pixel(uint16_t b = 0, uint16_t g = 0, uint16_t r = 0, uint16_t a = 0) - : b_(b) - , g_(g) - , r_(r) - , a_(a) - { - } - - const uint16_t& b() const { return b_; } - uint16_t& b() { return b_; } - const uint16_t& g() const { return g_; } - uint16_t& g() { return g_; } - const uint16_t& r() const { return r_; } - uint16_t& r() { return r_; } - const uint16_t& a() const { return a_; } - uint16_t& a() { return a_; } -}; - template class image_sub_view; From f22f6a2dc1aff1494f88148350171b01a5bc2067 Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Sat, 30 Dec 2023 17:32:33 +0000 Subject: [PATCH 38/50] fix: image consumer 16bit defined by amcp --- src/accelerator/ogl/util/device.cpp | 2 +- src/modules/image/consumer/image_consumer.cpp | 23 ++++++++++++------- src/protocol/amcp/AMCPCommandsImpl.cpp | 11 +++++---- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp index 9062a322db..f2e6d8ca12 100644 --- a/src/accelerator/ogl/util/device.cpp +++ b/src/accelerator/ogl/util/device.cpp @@ -255,7 +255,7 @@ struct device::impl : public std::enable_shared_from_this std::future> copy_async(const std::shared_ptr& source, bool as_rgba8) { return spawn_async([=](yield_context yield) { - auto buf = create_buffer(source->size(), false); + auto buf = create_buffer(as_rgba8 ? source->size() / 2 : source->size(), false); source->copy_to(*buf, as_rgba8 ? common::bit_depth::bit8 : source->depth()); sync_queue_.push(nullptr); diff --git a/src/modules/image/consumer/image_consumer.cpp b/src/modules/image/consumer/image_consumer.cpp index 9735d55507..c035edfcfc 100644 --- a/src/modules/image/consumer/image_consumer.cpp +++ b/src/modules/image/consumer/image_consumer.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -60,13 +61,17 @@ struct image_consumer : public core::frame_consumer { const spl::shared_ptr frame_converter_; const std::wstring filename_; + const bool depth16_; public: // frame_consumer - explicit image_consumer(const spl::shared_ptr& frame_converter, std::wstring filename) + explicit image_consumer(const spl::shared_ptr& frame_converter, + std::wstring filename, + bool depth16) : frame_converter_(frame_converter) , filename_(std::move(filename)) + , depth16_(depth16) { } @@ -74,9 +79,7 @@ struct image_consumer : public core::frame_consumer std::future send(core::video_field field, core::const_frame frame) override { - auto filename = filename_; - - std::thread async([frame_converter = frame_converter_, frame, filename] { + std::thread async([frame_converter = frame_converter_, depth16 = depth16_, frame, filename = filename_] { try { auto filename2 = filename; @@ -87,11 +90,10 @@ struct image_consumer : public core::frame_consumer else filename2 = env::media_folder() + filename2 + L".png"; - common::bit_depth frame_depth = frame_converter->get_frame_bitdepth(frame); - std::shared_ptr bitmap; + common::bit_depth frame_depth = frame_converter->get_frame_bitdepth(frame); - if (frame_depth != common::bit_depth::bit8) { + if (depth16 && frame_depth != common::bit_depth::bit8) { bitmap = std::shared_ptr(FreeImage_AllocateT(FIT_RGBA16, static_cast(frame.width()), static_cast(frame.height())), @@ -156,11 +158,16 @@ spl::shared_ptr create_consumer(const std::vector 1) filename = params.at(1); + if (params.size() > 2) { + depth16 = + contains_param(L"16BIT", params) || contains_param(L"16-BIT", params) || contains_param(L"16_BIT", params); + } - return spl::make_shared(frame_converter, filename); + return spl::make_shared(frame_converter, filename, depth16); } }} // namespace caspar::image diff --git a/src/protocol/amcp/AMCPCommandsImpl.cpp b/src/protocol/amcp/AMCPCommandsImpl.cpp index 1e92b7558e..08d636c277 100644 --- a/src/protocol/amcp/AMCPCommandsImpl.cpp +++ b/src/protocol/amcp/AMCPCommandsImpl.cpp @@ -492,11 +492,12 @@ std::wstring remove_command(command_context& ctx) std::wstring print_command(command_context& ctx) { - ctx.channel.raw_channel->output().add( - ctx.static_context->consumer_registry->create_consumer({L"IMAGE"}, - ctx.static_context->format_repository, - ctx.channel.raw_channel->frame_converter(), - get_channels(ctx))); + std::vector params = {L"IMAGE"}; + params.resize(ctx.parameters.size() + 1); + std::copy(std::cbegin(ctx.parameters), std::cend(ctx.parameters), params.begin() + 1); + + ctx.channel.raw_channel->output().add(ctx.static_context->consumer_registry->create_consumer( + params, ctx.static_context->format_repository, ctx.channel.raw_channel->frame_converter(), get_channels(ctx))); return L"202 PRINT OK\r\n"; } From 7476448a6f7980196e8f071f4dc2aa02c08ae1c1 Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Sat, 30 Dec 2023 17:33:12 +0000 Subject: [PATCH 39/50] Revert "fix: image consumer 16bit defined by amcp" This reverts commit f22f6a2dc1aff1494f88148350171b01a5bc2067. --- src/accelerator/ogl/util/device.cpp | 2 +- src/modules/image/consumer/image_consumer.cpp | 23 +++++++------------ src/protocol/amcp/AMCPCommandsImpl.cpp | 11 ++++----- 3 files changed, 14 insertions(+), 22 deletions(-) diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp index f2e6d8ca12..9062a322db 100644 --- a/src/accelerator/ogl/util/device.cpp +++ b/src/accelerator/ogl/util/device.cpp @@ -255,7 +255,7 @@ struct device::impl : public std::enable_shared_from_this std::future> copy_async(const std::shared_ptr& source, bool as_rgba8) { return spawn_async([=](yield_context yield) { - auto buf = create_buffer(as_rgba8 ? source->size() / 2 : source->size(), false); + auto buf = create_buffer(source->size(), false); source->copy_to(*buf, as_rgba8 ? common::bit_depth::bit8 : source->depth()); sync_queue_.push(nullptr); diff --git a/src/modules/image/consumer/image_consumer.cpp b/src/modules/image/consumer/image_consumer.cpp index c035edfcfc..9735d55507 100644 --- a/src/modules/image/consumer/image_consumer.cpp +++ b/src/modules/image/consumer/image_consumer.cpp @@ -32,7 +32,6 @@ #include #include #include -#include #include #include @@ -61,17 +60,13 @@ struct image_consumer : public core::frame_consumer { const spl::shared_ptr frame_converter_; const std::wstring filename_; - const bool depth16_; public: // frame_consumer - explicit image_consumer(const spl::shared_ptr& frame_converter, - std::wstring filename, - bool depth16) + explicit image_consumer(const spl::shared_ptr& frame_converter, std::wstring filename) : frame_converter_(frame_converter) , filename_(std::move(filename)) - , depth16_(depth16) { } @@ -79,7 +74,9 @@ struct image_consumer : public core::frame_consumer std::future send(core::video_field field, core::const_frame frame) override { - std::thread async([frame_converter = frame_converter_, depth16 = depth16_, frame, filename = filename_] { + auto filename = filename_; + + std::thread async([frame_converter = frame_converter_, frame, filename] { try { auto filename2 = filename; @@ -90,10 +87,11 @@ struct image_consumer : public core::frame_consumer else filename2 = env::media_folder() + filename2 + L".png"; + common::bit_depth frame_depth = frame_converter->get_frame_bitdepth(frame); + std::shared_ptr bitmap; - common::bit_depth frame_depth = frame_converter->get_frame_bitdepth(frame); - if (depth16 && frame_depth != common::bit_depth::bit8) { + if (frame_depth != common::bit_depth::bit8) { bitmap = std::shared_ptr(FreeImage_AllocateT(FIT_RGBA16, static_cast(frame.width()), static_cast(frame.height())), @@ -158,16 +156,11 @@ spl::shared_ptr create_consumer(const std::vector 1) filename = params.at(1); - if (params.size() > 2) { - depth16 = - contains_param(L"16BIT", params) || contains_param(L"16-BIT", params) || contains_param(L"16_BIT", params); - } - return spl::make_shared(frame_converter, filename, depth16); + return spl::make_shared(frame_converter, filename); } }} // namespace caspar::image diff --git a/src/protocol/amcp/AMCPCommandsImpl.cpp b/src/protocol/amcp/AMCPCommandsImpl.cpp index 08d636c277..1e92b7558e 100644 --- a/src/protocol/amcp/AMCPCommandsImpl.cpp +++ b/src/protocol/amcp/AMCPCommandsImpl.cpp @@ -492,12 +492,11 @@ std::wstring remove_command(command_context& ctx) std::wstring print_command(command_context& ctx) { - std::vector params = {L"IMAGE"}; - params.resize(ctx.parameters.size() + 1); - std::copy(std::cbegin(ctx.parameters), std::cend(ctx.parameters), params.begin() + 1); - - ctx.channel.raw_channel->output().add(ctx.static_context->consumer_registry->create_consumer( - params, ctx.static_context->format_repository, ctx.channel.raw_channel->frame_converter(), get_channels(ctx))); + ctx.channel.raw_channel->output().add( + ctx.static_context->consumer_registry->create_consumer({L"IMAGE"}, + ctx.static_context->format_repository, + ctx.channel.raw_channel->frame_converter(), + get_channels(ctx))); return L"202 PRINT OK\r\n"; } From a99522e20c30594e39bb0ab1a54a763b7345795c Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Sat, 30 Dec 2023 17:33:43 +0000 Subject: [PATCH 40/50] fix: typo --- src/accelerator/ogl/util/device.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp index 9062a322db..f2e6d8ca12 100644 --- a/src/accelerator/ogl/util/device.cpp +++ b/src/accelerator/ogl/util/device.cpp @@ -255,7 +255,7 @@ struct device::impl : public std::enable_shared_from_this std::future> copy_async(const std::shared_ptr& source, bool as_rgba8) { return spawn_async([=](yield_context yield) { - auto buf = create_buffer(source->size(), false); + auto buf = create_buffer(as_rgba8 ? source->size() / 2 : source->size(), false); source->copy_to(*buf, as_rgba8 ? common::bit_depth::bit8 : source->depth()); sync_queue_.push(nullptr); From c7984a41408d66bb2c1ae75c36b47694059a5aad Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Sat, 30 Dec 2023 17:48:02 +0000 Subject: [PATCH 41/50] feat: image producer can work in 64bit --- src/accelerator/ogl/image/image_kernel.cpp | 2 ++ src/accelerator/ogl/image/shader.frag | 4 ++++ src/core/frame/pixel_format.h | 3 ++- src/modules/image/producer/image_producer.cpp | 3 ++- src/modules/image/util/image_loader.cpp | 16 ++++++++++++++-- src/modules/image/util/image_loader.h | 5 +++++ 6 files changed, 29 insertions(+), 4 deletions(-) diff --git a/src/accelerator/ogl/image/image_kernel.cpp b/src/accelerator/ogl/image/image_kernel.cpp index 155d574e8d..f4b3e57929 100644 --- a/src/accelerator/ogl/image/image_kernel.cpp +++ b/src/accelerator/ogl/image/image_kernel.cpp @@ -239,6 +239,8 @@ struct image_kernel::impl shader_->use(); + shader_->set("is_straight_alpha", params.pix_desc.is_straight); + shader_->set("plane[0]", texture_id::plane0); shader_->set("plane[1]", texture_id::plane1); shader_->set("plane[2]", texture_id::plane2); diff --git a/src/accelerator/ogl/image/shader.frag b/src/accelerator/ogl/image/shader.frag index f8d3356c6c..b515bd3589 100644 --- a/src/accelerator/ogl/image/shader.frag +++ b/src/accelerator/ogl/image/shader.frag @@ -8,6 +8,8 @@ uniform sampler2D plane[4]; uniform sampler2D local_key; uniform sampler2D layer_key; +uniform bool is_straight_alpha; + uniform bool is_hd; uniform bool has_local_key; uniform bool has_layer_key; @@ -543,6 +545,8 @@ vec4 get_rgba_color() void main() { vec4 color = get_rgba_color(); + if (is_straight_alpha) + color.rgb *= color.a; if (chroma) color = chroma_key(color); if(levels) diff --git a/src/core/frame/pixel_format.h b/src/core/frame/pixel_format.h index ba7faad41b..90823360d9 100644 --- a/src/core/frame/pixel_format.h +++ b/src/core/frame/pixel_format.h @@ -83,7 +83,8 @@ struct pixel_format_desc final { } - pixel_format format = pixel_format::invalid; + pixel_format format = pixel_format::invalid; + bool is_straight = false; std::vector planes; }; diff --git a/src/modules/image/producer/image_producer.cpp b/src/modules/image/producer/image_producer.cpp index 2e469d1374..18a0f4328b 100644 --- a/src/modules/image/producer/image_producer.cpp +++ b/src/modules/image/producer/image_producer.cpp @@ -89,8 +89,9 @@ struct image_producer : public core::frame_producer void load(const loaded_image& image) { core::pixel_format_desc desc(image.format); + desc.is_straight = image.is_straight; desc.planes.emplace_back( - FreeImage_GetWidth(image.bitmap.get()), FreeImage_GetHeight(image.bitmap.get()), image.stride); + FreeImage_GetWidth(image.bitmap.get()), FreeImage_GetHeight(image.bitmap.get()), image.stride, image.depth); auto frame = frame_factory_->create_frame(this, desc); frame.geometry() = core::frame_geometry::get_default_vflip(); diff --git a/src/modules/image/util/image_loader.cpp b/src/modules/image/util/image_loader.cpp index eefc486592..a76236d8f9 100644 --- a/src/modules/image/util/image_loader.cpp +++ b/src/modules/image/util/image_loader.cpp @@ -55,19 +55,30 @@ loaded_image prepare_loaded_image(FREE_IMAGE_FORMAT fif, std::shared_ptr(FreeImage_ConvertTo24Bits(bitmap.get()), FreeImage_Unload); + } else { format = IMAGE_BGRA_FORMAT; stride = 4; @@ -79,13 +90,14 @@ loaded_image prepare_loaded_image(FREE_IMAGE_FORMAT fif, std::shared_ptr original_view( FreeImage_GetBits(bitmap.get()), FreeImage_GetWidth(bitmap.get()), FreeImage_GetHeight(bitmap.get())); premultiply(original_view); } - return {std::move(bitmap), format, stride}; + return {std::move(bitmap), format, stride, depth, is_straight}; } loaded_image load_image(const std::wstring& filename, bool allow_all_formats) diff --git a/src/modules/image/util/image_loader.h b/src/modules/image/util/image_loader.h index 9362be31bc..725e3b18ed 100644 --- a/src/modules/image/util/image_loader.h +++ b/src/modules/image/util/image_loader.h @@ -22,6 +22,9 @@ #pragma once #include + +#include + #include #include #include @@ -35,6 +38,8 @@ struct loaded_image std::shared_ptr bitmap; core::pixel_format format; int stride; + common::bit_depth depth; + bool is_straight; }; loaded_image load_image(const std::wstring& filename, bool allow_all_formats); From 544b51627c1a4063defd0666269ec2dc28c142b9 Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Sat, 30 Dec 2023 17:49:30 +0000 Subject: [PATCH 42/50] fix: 64bit freeimage endianness --- src/accelerator/ogl/image/shader_from_rgba.comp | 10 ++++------ src/modules/image/consumer/image_consumer.cpp | 10 +++------- src/modules/image/producer/image_producer.cpp | 1 + src/modules/image/util/image_loader.cpp | 2 ++ 4 files changed, 10 insertions(+), 13 deletions(-) diff --git a/src/accelerator/ogl/image/shader_from_rgba.comp b/src/accelerator/ogl/image/shader_from_rgba.comp index 76a4e3436e..da53c58e5e 100644 --- a/src/accelerator/ogl/image/shader_from_rgba.comp +++ b/src/accelerator/ogl/image/shader_from_rgba.comp @@ -114,9 +114,8 @@ void rgba16() { // TODO - can this be done solely as integer? vec4 pixel = read_pixel(ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y)); - // Note: the texture is stored in reverse order - bufferOutput[offset + 0] = uint(pixel.b * 65535) + (uint(pixel.g * 65535) << 16); - bufferOutput[offset + 1] = uint(pixel.r * 65535) + (uint(pixel.a * 65535) << 16); + bufferOutput[offset + 0] = uint(pixel.r * 65535) + (uint(pixel.g * 65535) << 16); + bufferOutput[offset + 1] = uint(pixel.b * 65535) + (uint(pixel.a * 65535) << 16); } void bgra16() { @@ -129,9 +128,8 @@ void bgra16() { // TODO - can this be done solely as integer? vec4 pixel = read_pixel(ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y)); - // Note: the texture is stored in reverse order - bufferOutput[offset + 0] = uint(pixel.r * 65535) + (uint(pixel.g * 65535) << 16); - bufferOutput[offset + 1] = uint(pixel.b * 65535) + (uint(pixel.a * 65535) << 16); + bufferOutput[offset + 0] = uint(pixel.b * 65535) + (uint(pixel.g * 65535) << 16); + bufferOutput[offset + 1] = uint(pixel.r * 65535) + (uint(pixel.a * 65535) << 16); } void main() { diff --git a/src/modules/image/consumer/image_consumer.cpp b/src/modules/image/consumer/image_consumer.cpp index 9735d55507..98ab30e3b5 100644 --- a/src/modules/image/consumer/image_consumer.cpp +++ b/src/modules/image/consumer/image_consumer.cpp @@ -50,12 +50,6 @@ namespace caspar { namespace image { -#if FREEIMAGE_COLORORDER == FREEIMAGE_COLORORDER_BGR -#define IMAGE_ENCODED_FORMAT core::encoded_frame_format::bgra16 -#else -#define IMAGE_ENCODED_FORMAT core::encoded_frame_format::rgba16 -#endif - struct image_consumer : public core::frame_consumer { const spl::shared_ptr frame_converter_; @@ -97,8 +91,10 @@ struct image_consumer : public core::frame_consumer static_cast(frame.height())), FreeImage_Unload); + // freeimage appears to ignore endianness array rgba16_bytes = - frame_converter->convert_from_rgba(frame, IMAGE_ENCODED_FORMAT, false, true).get(); + frame_converter->convert_from_rgba(frame, core::encoded_frame_format::rgba16, false, true) + .get(); std::memcpy(FreeImage_GetBits(bitmap.get()), rgba16_bytes.data(), rgba16_bytes.size()); diff --git a/src/modules/image/producer/image_producer.cpp b/src/modules/image/producer/image_producer.cpp index 18a0f4328b..fd4666b20d 100644 --- a/src/modules/image/producer/image_producer.cpp +++ b/src/modules/image/producer/image_producer.cpp @@ -92,6 +92,7 @@ struct image_producer : public core::frame_producer desc.is_straight = image.is_straight; desc.planes.emplace_back( FreeImage_GetWidth(image.bitmap.get()), FreeImage_GetHeight(image.bitmap.get()), image.stride, image.depth); + auto frame = frame_factory_->create_frame(this, desc); frame.geometry() = core::frame_geometry::get_default_vflip(); diff --git a/src/modules/image/util/image_loader.cpp b/src/modules/image/util/image_loader.cpp index a76236d8f9..46bdef21c1 100644 --- a/src/modules/image/util/image_loader.cpp +++ b/src/modules/image/util/image_loader.cpp @@ -66,10 +66,12 @@ loaded_image prepare_loaded_image(FREE_IMAGE_FORMAT fif, std::shared_ptr Date: Sat, 30 Dec 2023 17:51:49 +0000 Subject: [PATCH 43/50] fix: propogate parameters from print command to image consumer --- src/protocol/amcp/AMCPCommandsImpl.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/protocol/amcp/AMCPCommandsImpl.cpp b/src/protocol/amcp/AMCPCommandsImpl.cpp index 1e92b7558e..a45543667d 100644 --- a/src/protocol/amcp/AMCPCommandsImpl.cpp +++ b/src/protocol/amcp/AMCPCommandsImpl.cpp @@ -492,11 +492,14 @@ std::wstring remove_command(command_context& ctx) std::wstring print_command(command_context& ctx) { - ctx.channel.raw_channel->output().add( - ctx.static_context->consumer_registry->create_consumer({L"IMAGE"}, - ctx.static_context->format_repository, - ctx.channel.raw_channel->frame_converter(), - get_channels(ctx))); + std::vector params = {L"IMAGE"}; + if (!ctx.parameters.empty()) { + params.resize(ctx.parameters.size() + 1); + std::copy(std::cbegin(ctx.parameters), std::cend(ctx.parameters), params.begin() + 1); + } + + ctx.channel.raw_channel->output().add(ctx.static_context->consumer_registry->create_consumer( + params, ctx.static_context->format_repository, ctx.channel.raw_channel->frame_converter(), get_channels(ctx))); return L"202 PRINT OK\r\n"; } From 60fca061ebdfc9c2bae257e92ac6bcdd00dfd16b Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Sat, 30 Dec 2023 18:02:11 +0000 Subject: [PATCH 44/50] wip: tidy --- src/accelerator/ogl/util/device.cpp | 6 ++++-- src/accelerator/ogl/util/texture.cpp | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp index f2e6d8ca12..62b1edae64 100644 --- a/src/accelerator/ogl/util/device.cpp +++ b/src/accelerator/ogl/util/device.cpp @@ -255,8 +255,10 @@ struct device::impl : public std::enable_shared_from_this std::future> copy_async(const std::shared_ptr& source, bool as_rgba8) { return spawn_async([=](yield_context yield) { - auto buf = create_buffer(as_rgba8 ? source->size() / 2 : source->size(), false); - source->copy_to(*buf, as_rgba8 ? common::bit_depth::bit8 : source->depth()); + auto bit_depth = as_rgba8 ? common::bit_depth::bit8 : source->depth(); + auto buf = create_buffer( + source->width() * source->height() * source->stride() * common::bytes_per_pixel(bit_depth), false); + source->copy_to(*buf, bit_depth); sync_queue_.push(nullptr); diff --git a/src/accelerator/ogl/util/texture.cpp b/src/accelerator/ogl/util/texture.cpp index 4e47e8768d..c489b43f3f 100644 --- a/src/accelerator/ogl/util/texture.cpp +++ b/src/accelerator/ogl/util/texture.cpp @@ -52,7 +52,7 @@ struct texture::impl , height_(height) , stride_(stride) , depth_(depth) - , size_(width * height * stride * (1 + static_cast(depth))) + , size_(width * height * stride * common::bytes_per_pixel(depth)) { if (stride == 5) { size_ = width * height * 16; From 69c29f57e6b64d66140e831de761734c8e3b4796 Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Sat, 30 Dec 2023 18:38:53 +0000 Subject: [PATCH 45/50] wip: tidying --- src/accelerator/ogl/image/frame_converter.cpp | 22 +++++-------- src/accelerator/ogl/util/device.cpp | 33 ++++++++++--------- src/accelerator/ogl/util/device.h | 10 +++--- src/accelerator/ogl/util/texture.cpp | 6 ---- src/modules/ffmpeg/util/av_util.cpp | 31 +++++++---------- 5 files changed, 41 insertions(+), 61 deletions(-) diff --git a/src/accelerator/ogl/image/frame_converter.cpp b/src/accelerator/ogl/image/frame_converter.cpp index 5618be349a..154d1af910 100644 --- a/src/accelerator/ogl/image/frame_converter.cpp +++ b/src/accelerator/ogl/image/frame_converter.cpp @@ -77,17 +77,17 @@ ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, bool key_only, bool straighten) { - array buffer; - unsigned int x_count = 0; - unsigned int y_count = 0; - int words_per_line = 0; + int buffer_size = 0; + unsigned int x_count = 0; + unsigned int y_count = 0; + int words_per_line = 0; switch (format) { case core::encoded_frame_format::rgba16: case core::encoded_frame_format::bgra16: x_count = frame.width(); y_count = frame.height(); - buffer = ogl_->create_array(frame.width() * frame.height() * 8); + buffer_size = frame.width() * frame.height() * 8; words_per_line = frame.width() * 2; break; @@ -96,14 +96,14 @@ ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, auto row_bytes = row_blocks * 128; // TODO - result must be 128byte aligned. can that be guaranteed here? - buffer = ogl_->create_array(row_bytes * frame.height()); + buffer_size = row_bytes * frame.height(); x_count = row_blocks * 8; y_count = frame.height(); words_per_line = row_blocks * 32; break; } - if (buffer.size() == 0 || x_count == 0 || y_count == 0) { + if (buffer_size == 0 || x_count == 0 || y_count == 0) { CASPAR_THROW_EXCEPTION(not_supported() << msg_info("Unknown encoded frame format")); } @@ -121,13 +121,7 @@ ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, description.key_only = key_only; description.straighten = straighten; - auto future_conversion = ogl_->convert_from_texture(texture_ptr, buffer, description, x_count, y_count); - - return std::async(std::launch::deferred, [buffer, future_conversion = std::move(future_conversion)]() mutable { - future_conversion.get(); - - return buffer; - }); + return ogl_->convert_from_texture(texture_ptr, buffer_size, description, x_count, y_count); } common::bit_depth ogl_frame_converter::get_frame_bitdepth(const core::const_frame& frame) diff --git a/src/accelerator/ogl/util/device.cpp b/src/accelerator/ogl/util/device.cpp index 62b1edae64..d81eecc36c 100644 --- a/src/accelerator/ogl/util/device.cpp +++ b/src/accelerator/ogl/util/device.cpp @@ -319,11 +319,11 @@ struct device::impl : public std::enable_shared_from_this } */ - std::future convert_from_texture(const std::shared_ptr& texture, - const array& source, - const convert_from_texture_description& description, - unsigned int x_count, - unsigned int y_count) + std::future> convert_from_texture(const std::shared_ptr& texture, + int buffer_size, + const convert_from_texture_description& description, + unsigned int x_count, + unsigned int y_count) { return spawn_async([=](yield_context yield) { if (!compute_from_rgba_) @@ -345,14 +345,13 @@ struct device::impl : public std::enable_shared_from_this GL(glBindImageTexture(0, texid_16bit, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA16)); GL(glBindImageTexture(1, texid_8bit, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA8)); - auto tmp = source.storage>(); - if (!tmp) { + array output_buffer = create_array(buffer_size); // TODO - tidy this? + auto buffer_ptr = output_buffer.storage>(); + if (!buffer_ptr) { CASPAR_THROW_EXCEPTION(caspar_exception() << msg_info("Buffer is not gpu backed")); } + GL(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, buffer_ptr->get()->id())); - GL(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, tmp->get()->id())); - - // TODO - binding 2 description auto description_buffer = create_buffer(sizeof(convert_from_texture_description), false); std::memcpy(description_buffer->data(), &description, sizeof(convert_from_texture_description)); GL(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, description_buffer->id())); @@ -378,6 +377,8 @@ struct device::impl : public std::enable_shared_from_this } glDeleteSync(fence); + + return output_buffer; }); } @@ -508,13 +509,13 @@ std::future> device::copy_async(const std::shared_ptrcopy_async(source, as_rgba8); } -std::future device::convert_from_texture(const std::shared_ptr& texture, - const array& source, - const convert_from_texture_description& description, - unsigned int x_count, - unsigned int y_count) +std::future> device::convert_from_texture(const std::shared_ptr& texture, + int buffer_size, + const convert_from_texture_description& description, + unsigned int x_count, + unsigned int y_count) { - return impl_->convert_from_texture(texture, source, description, x_count, y_count); + return impl_->convert_from_texture(texture, buffer_size, description, x_count, y_count); } void device::dispatch(std::function func) { boost::asio::dispatch(impl_->service_, std::move(func)); } std::wstring device::version() const { return impl_->version(); } diff --git a/src/accelerator/ogl/util/device.h b/src/accelerator/ogl/util/device.h index 6930a5d886..07858b0fde 100644 --- a/src/accelerator/ogl/util/device.h +++ b/src/accelerator/ogl/util/device.h @@ -65,11 +65,11 @@ class device final copy_async(const array& source, int width, int height, int stride, common::bit_depth depth); std::future> copy_async(const std::shared_ptr& source, bool as_rgba8); - std::future convert_from_texture(const std::shared_ptr& texture, - const array& source, - const convert_from_texture_description& description, - unsigned int x_count, - unsigned int y_count); + std::future> convert_from_texture(const std::shared_ptr& texture, + int buffer_size, + const convert_from_texture_description& description, + unsigned int x_count, + unsigned int y_count); template auto dispatch_async(Func&& func) diff --git a/src/accelerator/ogl/util/texture.cpp b/src/accelerator/ogl/util/texture.cpp index c489b43f3f..26d11e0693 100644 --- a/src/accelerator/ogl/util/texture.cpp +++ b/src/accelerator/ogl/util/texture.cpp @@ -54,12 +54,6 @@ struct texture::impl , depth_(depth) , size_(width * height * stride * common::bytes_per_pixel(depth)) { - if (stride == 5) { - size_ = width * height * 16; - } else if (stride == 6) { - size_ = width * height * 2; - } - GL(glCreateTextures(GL_TEXTURE_2D, 1, &id_)); GL(glTextureParameteri(id_, GL_TEXTURE_MIN_FILTER, GL_LINEAR)); GL(glTextureParameteri(id_, GL_TEXTURE_MAG_FILTER, GL_LINEAR)); diff --git a/src/modules/ffmpeg/util/av_util.cpp b/src/modules/ffmpeg/util/av_util.cpp index a58975666e..d89f2b384d 100644 --- a/src/modules/ffmpeg/util/av_util.cpp +++ b/src/modules/ffmpeg/util/av_util.cpp @@ -170,47 +170,38 @@ core::pixel_format_desc pixel_format_desc(AVPixelFormat pix_fmt, int width, int switch (desc.format) { case core::pixel_format::gray: case core::pixel_format::luma: { - desc.planes.push_back(core::pixel_format_desc::plane(linesizes[0], height, 1)); + desc.planes.emplace_back(linesizes[0], height, 1); return desc; } case core::pixel_format::bgr: case core::pixel_format::rgb: { - desc.planes.push_back(core::pixel_format_desc::plane(linesizes[0] / 3, height, 3)); + desc.planes.emplace_back(linesizes[0] / 3, height, 3); return desc; } case core::pixel_format::bgra: case core::pixel_format::argb: case core::pixel_format::rgba: case core::pixel_format::abgr: { - desc.planes.push_back(core::pixel_format_desc::plane(linesizes[0] / 4, height, 4)); + desc.planes.emplace_back(linesizes[0] / 4, height, 4); return desc; } case core::pixel_format::ycbcr: case core::pixel_format::ycbcra: { // Find chroma height - // av_image_fill_plane_sizes is not available until ffmpeg 4.4, but we still need to support ffmpeg 4.2, so - // we fall back to calling av_image_fill_pointers with a NULL image buffer. We can't unconditionally use - // av_image_fill_pointers because it will not accept a NULL buffer on ffmpeg >= 5.0. -#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(56, 56, 100) size_t sizes[4]; ptrdiff_t linesizes1[4]; for (int i = 0; i < 4; i++) linesizes1[i] = linesizes[i]; av_image_fill_plane_sizes(sizes, pix_fmt, height, linesizes1); - auto size2 = static_cast(sizes[1]); -#else - uint8_t* dummy_pict_data[4]; - av_image_fill_pointers(dummy_pict_data, pix_fmt, height, NULL, linesizes); - auto size2 = static_cast(dummy_pict_data[2] - dummy_pict_data[1]); -#endif - auto h2 = size2 / linesizes[1]; - desc.planes.push_back(core::pixel_format_desc::plane(linesizes[0], height, 1)); - desc.planes.push_back(core::pixel_format_desc::plane(linesizes[1], h2, 1)); - desc.planes.push_back(core::pixel_format_desc::plane(linesizes[2], h2, 1)); + auto h2 = static_cast(sizes[1]) / linesizes[1]; + + desc.planes.emplace_back(linesizes[0], height, 1); + desc.planes.emplace_back(linesizes[1], h2, 1); + desc.planes.emplace_back(linesizes[2], h2, 1); if (desc.format == core::pixel_format::ycbcra) - desc.planes.push_back(core::pixel_format_desc::plane(linesizes[3], height, 1)); + desc.planes.emplace_back(linesizes[3], height, 1); return desc; } @@ -236,8 +227,8 @@ core::pixel_format_desc pixel_format_desc(AVPixelFormat pix_fmt, int width, int return desc; } case core::pixel_format::uyvy: { - desc.planes.push_back(core::pixel_format_desc::plane(linesizes[0] / 2, height, 2)); - desc.planes.push_back(core::pixel_format_desc::plane(linesizes[0] / 4, height, 4)); + desc.planes.emplace_back(linesizes[0] / 2, height, 2); + desc.planes.emplace_back(linesizes[0] / 4, height, 4); data_map.clear(); data_map.push_back(0); From 4f58d1aaaf8533056351228b800da05ca6bb1eeb Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Sat, 30 Dec 2023 22:41:40 +0000 Subject: [PATCH 46/50] wip: tidying --- .../ogl/image/shader_from_rgba.comp | 20 +++++++ src/accelerator/ogl/image/shader_to_rgba.comp | 20 +++++++ src/accelerator/ogl/util/compute_shader.cpp | 53 ++----------------- src/accelerator/ogl/util/compute_shader.h | 21 ++------ 4 files changed, 47 insertions(+), 67 deletions(-) diff --git a/src/accelerator/ogl/image/shader_from_rgba.comp b/src/accelerator/ogl/image/shader_from_rgba.comp index da53c58e5e..df051149e0 100644 --- a/src/accelerator/ogl/image/shader_from_rgba.comp +++ b/src/accelerator/ogl/image/shader_from_rgba.comp @@ -1,3 +1,23 @@ +/* + * Copyright (c) 2011 Sveriges Television AB + * + * This file is part of CasparCG (www.casparcg.com). + * + * CasparCG is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * CasparCG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with CasparCG. If not, see . + * + * Author: Julian Waller, julian@superfly.tv + */ #version 430 layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in; diff --git a/src/accelerator/ogl/image/shader_to_rgba.comp b/src/accelerator/ogl/image/shader_to_rgba.comp index 4bc5fba7f3..6dbe5d34c6 100644 --- a/src/accelerator/ogl/image/shader_to_rgba.comp +++ b/src/accelerator/ogl/image/shader_to_rgba.comp @@ -1,3 +1,23 @@ +/* + * Copyright (c) 2011 Sveriges Television AB + * + * This file is part of CasparCG (www.casparcg.com). + * + * CasparCG is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * CasparCG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with CasparCG. If not, see . + * + * Author: Julian Waller, julian@superfly.tv + */ #version 430 layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in; diff --git a/src/accelerator/ogl/util/compute_shader.cpp b/src/accelerator/ogl/util/compute_shader.cpp index d737a94d3d..8c3e7663ac 100644 --- a/src/accelerator/ogl/util/compute_shader.cpp +++ b/src/accelerator/ogl/util/compute_shader.cpp @@ -16,7 +16,7 @@ * You should have received a copy of the GNU General Public License * along with CasparCG. If not, see . * - * Author: Robert Nagy, ronag89@gmail.com + * Author: Julian Waller, julian@superfly.tv */ #include "compute_shader.h" @@ -24,15 +24,11 @@ #include -#include - namespace caspar { namespace accelerator { namespace ogl { struct compute_shader::impl { - GLuint program_; - std::unordered_map uniform_locations_; - std::unordered_map attrib_locations_; + GLuint program_; impl(const impl&) = delete; impl& operator=(const impl&) = delete; @@ -47,9 +43,6 @@ struct compute_shader::impl glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 1, &work_grp_cnt[1]); glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 2, &work_grp_cnt[2]); - printf( - "max global (total) work group counts x:%i y:%i z:%i\n", work_grp_cnt[0], work_grp_cnt[1], work_grp_cnt[2]); - GLint success; const char* compute_source = compute_source_str.c_str(); @@ -90,38 +83,6 @@ struct compute_shader::impl ~impl() { glDeleteProgram(program_); } - GLint get_uniform_location(const char* name) - { - auto it = uniform_locations_.find(name); - if (it == uniform_locations_.end()) - it = uniform_locations_.insert(std::make_pair(name, glGetUniformLocation(program_, name))).first; - return it->second; - } - - GLint get_attrib_location(const char* name) - { - auto it = attrib_locations_.find(name); - if (it == attrib_locations_.end()) - it = attrib_locations_.insert(std::make_pair(name, glGetAttribLocation(program_, name))).first; - return it->second; - } - - void set(const std::string& name, bool value) { set(name, value ? 1 : 0); } - - void set(const std::string& name, int value) { GL(glUniform1i(get_uniform_location(name.c_str()), value)); } - - void set(const std::string& name, float value) { GL(glUniform1f(get_uniform_location(name.c_str()), value)); } - - void set(const std::string& name, double value0, double value1) - { - GL(glUniform2f(get_uniform_location(name.c_str()), static_cast(value0), static_cast(value1))); - } - - void set(const std::string& name, double value) - { - GL(glUniform1f(get_uniform_location(name.c_str()), static_cast(value))); - } - void use() { GL(glUseProgram(program_)); } }; @@ -130,13 +91,7 @@ compute_shader::compute_shader(const std::string& compute_source_str) { } compute_shader::~compute_shader() {} -void compute_shader::set(const std::string& name, bool value) { impl_->set(name, value); } -void compute_shader::set(const std::string& name, int value) { impl_->set(name, value); } -void compute_shader::set(const std::string& name, float value) { impl_->set(name, value); } -void compute_shader::set(const std::string& name, double value0, double value1) { impl_->set(name, value0, value1); } -void compute_shader::set(const std::string& name, double value) { impl_->set(name, value); } -GLint compute_shader::get_attrib_location(const char* name) { return impl_->get_attrib_location(name); } -int compute_shader::id() const { return impl_->program_; } -void compute_shader::use() const { impl_->use(); } +GLuint compute_shader::id() const { return impl_->program_; } +void compute_shader::use() const { impl_->use(); } }}} // namespace caspar::accelerator::ogl diff --git a/src/accelerator/ogl/util/compute_shader.h b/src/accelerator/ogl/util/compute_shader.h index 71aa6bb290..af86498e99 100644 --- a/src/accelerator/ogl/util/compute_shader.h +++ b/src/accelerator/ogl/util/compute_shader.h @@ -16,7 +16,7 @@ * You should have received a copy of the GNU General Public License * along with CasparCG. If not, see . * - * Author: Robert Nagy, ronag89@gmail.com + * Author: Julian Waller, julian@superfly.tv */ #pragma once @@ -24,7 +24,6 @@ #include #include #include -#include namespace caspar { namespace accelerator { namespace ogl { @@ -34,26 +33,12 @@ class compute_shader final compute_shader& operator=(const compute_shader&); public: - compute_shader(const std::string& compute_source_str); + explicit compute_shader(const std::string& compute_source_str); ~compute_shader(); - void set(const std::string& name, bool value); - void set(const std::string& name, int value); - void set(const std::string& name, float value); - void set(const std::string& name, double value0, double value1); - void set(const std::string& name, double value); - - GLint get_attrib_location(const char* name); - - template - typename std::enable_if::value, void>::type set(const std::string& name, E value) - { - set(name, static_cast::type>(value)); - } - void use() const; - int id() const; + [[nodiscard]] GLuint id() const; private: struct impl; From e0047af6e024a57b836657b858c50f394a0a7c67 Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Sat, 30 Dec 2023 22:52:18 +0000 Subject: [PATCH 47/50] wip: generic key-only implementation --- src/accelerator/ogl/image/shader_from_rgba.comp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/accelerator/ogl/image/shader_from_rgba.comp b/src/accelerator/ogl/image/shader_from_rgba.comp index df051149e0..8bb65fdccd 100644 --- a/src/accelerator/ogl/image/shader_from_rgba.comp +++ b/src/accelerator/ogl/image/shader_from_rgba.comp @@ -53,6 +53,7 @@ vec4 straighten_pixel(vec4 pixel) { vec4 read_pixel(ivec2 coord) { vec4 pixel = is_16_bit ? imageLoad(imgInput16bit, coord) : imageLoad(imgInput8bit, coord); + if (key_only) return pixel.aaaa; return straighten_pixel(pixel); } From f07b2818898a339486a3bc02bbefbf0838b38a95 Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Thu, 4 Jan 2024 14:59:07 +0000 Subject: [PATCH 48/50] wip: fixes --- src/accelerator/ogl/image/frame_converter.cpp | 74 +++++++++---------- src/accelerator/ogl/image/frame_converter.h | 4 +- .../ogl/image/shader_from_rgba.comp | 15 ++-- src/accelerator/ogl/util/device.h | 8 +- src/core/frame/frame_factory.h | 4 +- src/modules/ffmpeg/util/av_util.cpp | 30 ++++---- 6 files changed, 68 insertions(+), 67 deletions(-) diff --git a/src/accelerator/ogl/image/frame_converter.cpp b/src/accelerator/ogl/image/frame_converter.cpp index 154d1af910..ff7a070577 100644 --- a/src/accelerator/ogl/image/frame_converter.cpp +++ b/src/accelerator/ogl/image/frame_converter.cpp @@ -33,43 +33,43 @@ ogl_frame_converter::ogl_frame_converter(const spl::shared_ptr& ogl) { } -core::mutable_frame ogl_frame_converter::create_frame(const void* tag, const core::pixel_format_desc& desc) -{ - std::vector> image_data; - for (auto& plane : desc.planes) { - image_data.push_back(ogl_->create_array(plane.size)); - } - - using future_texture = std::shared_future>; - - std::weak_ptr weak_self = shared_from_this(); - return core::mutable_frame(tag, - std::move(image_data), - array{}, - desc, - [weak_self, desc](std::vector> image_data) -> boost::any { - // TODO - replace this - auto self = weak_self.lock(); - if (!self) { - return boost::any{}; - } - std::vector textures; - for (int n = 0; n < static_cast(desc.planes.size()); ++n) { - textures.emplace_back(self->ogl_->copy_async(image_data[n], - desc.planes[n].width, - desc.planes[n].height, - desc.planes[n].stride, - desc.planes[n].depth)); - } - return std::make_shared(std::move(textures)); - }); -} - -core::draw_frame ogl_frame_converter::convert_to_rgba(const core::mutable_frame& frame) -{ - // TODO - return core::draw_frame{}; -} +// core::mutable_frame ogl_frame_converter::create_frame(const void* tag, const core::pixel_format_desc& desc) +// { +// std::vector> image_data; +// for (auto& plane : desc.planes) { +// image_data.push_back(ogl_->create_array(plane.size)); +// } + +// using future_texture = std::shared_future>; + +// std::weak_ptr weak_self = shared_from_this(); +// return core::mutable_frame(tag, +// std::move(image_data), +// array{}, +// desc, +// [weak_self, desc](std::vector> image_data) -> boost::any { +// // TODO - replace this +// auto self = weak_self.lock(); +// if (!self) { +// return boost::any{}; +// } +// std::vector textures; +// for (int n = 0; n < static_cast(desc.planes.size()); ++n) { +// textures.emplace_back(self->ogl_->copy_async(image_data[n], +// desc.planes[n].width, +// desc.planes[n].height, +// desc.planes[n].stride, +// desc.planes[n].depth)); +// } +// return std::make_shared(std::move(textures)); +// }); +// } + +// core::draw_frame ogl_frame_converter::convert_to_rgba(const core::mutable_frame& frame) +// { +// // TODO +// return core::draw_frame{}; +// } std::shared_future> ogl_frame_converter::convert_from_rgba(const core::const_frame& frame, diff --git a/src/accelerator/ogl/image/frame_converter.h b/src/accelerator/ogl/image/frame_converter.h index 31f0e1a684..3de9f1b8cd 100644 --- a/src/accelerator/ogl/image/frame_converter.h +++ b/src/accelerator/ogl/image/frame_converter.h @@ -41,9 +41,9 @@ class ogl_frame_converter ogl_frame_converter& operator=(const ogl_frame_converter&) = delete; - core::mutable_frame create_frame(const void* video_stream_tag, const core::pixel_format_desc& desc) override; + // core::mutable_frame create_frame(const void* video_stream_tag, const core::pixel_format_desc& desc) override; - core::draw_frame convert_to_rgba(const core::mutable_frame& frame) override; + // core::draw_frame convert_to_rgba(const core::mutable_frame& frame) override; std::shared_future> convert_from_rgba(const core::const_frame& frame, core::encoded_frame_format format, diff --git a/src/accelerator/ogl/image/shader_from_rgba.comp b/src/accelerator/ogl/image/shader_from_rgba.comp index 8bb65fdccd..c17497a99c 100644 --- a/src/accelerator/ogl/image/shader_from_rgba.comp +++ b/src/accelerator/ogl/image/shader_from_rgba.comp @@ -34,12 +34,12 @@ layout(std430, binding = 3) buffer description_layout { // This must match convert_from_texture_description in device.h uint target_format; - bool is_16_bit; + uint is_16_bit; uint frame_width; uint frame_height; uint words_per_line; - bool key_only; - bool straigthen; + uint key_only; + uint straigthen; }; vec4 straighten_pixel(vec4 pixel) { @@ -52,13 +52,14 @@ vec4 straighten_pixel(vec4 pixel) { } vec4 read_pixel(ivec2 coord) { - vec4 pixel = is_16_bit ? imageLoad(imgInput16bit, coord) : imageLoad(imgInput8bit, coord); - if (key_only) return pixel.aaaa; - return straighten_pixel(pixel); + vec4 pixel = is_16_bit > 0 ? imageLoad(imgInput16bit, coord) : imageLoad(imgInput8bit, coord); + if (key_only > 0) return pixel.aaaa; + if (straigthen > 0) return straighten_pixel(pixel); + return pixel; } vec3 rgba_to_bt709(vec4 pixel) { - if (key_only) { + if (key_only > 0) { // TODO - verify everything about this return vec3(pixel.a, 0.5, 0.5); } diff --git a/src/accelerator/ogl/util/device.h b/src/accelerator/ogl/util/device.h index 07858b0fde..8a165a546b 100644 --- a/src/accelerator/ogl/util/device.h +++ b/src/accelerator/ogl/util/device.h @@ -37,13 +37,13 @@ namespace caspar { namespace accelerator { namespace ogl { // This must match description_layout in shader_from_rgba.comp struct convert_from_texture_description { - uint target_format; - bool is_16_bit; + uint32_t target_format; + uint32_t is_16_bit; uint32_t width; uint32_t height; uint32_t words_per_line; - bool key_only; - bool straighten; + uint32_t key_only; + uint32_t straighten; }; class device final diff --git a/src/core/frame/frame_factory.h b/src/core/frame/frame_factory.h index bff013c952..8175eba20e 100644 --- a/src/core/frame/frame_factory.h +++ b/src/core/frame/frame_factory.h @@ -43,9 +43,9 @@ class frame_converter frame_converter(const frame_converter&) = delete; - virtual class mutable_frame create_frame(const void* video_stream_tag, const struct pixel_format_desc& desc) = 0; + // virtual class mutable_frame create_frame(const void* video_stream_tag, const struct pixel_format_desc& desc) = 0; - virtual class draw_frame convert_to_rgba(const class mutable_frame& frame) = 0; + // virtual class draw_frame convert_to_rgba(const class mutable_frame& frame) = 0; virtual std::shared_future> convert_from_rgba(const core::const_frame& frame, encoded_frame_format format, bool key_only, bool straighten) = 0; diff --git a/src/modules/ffmpeg/util/av_util.cpp b/src/modules/ffmpeg/util/av_util.cpp index d89f2b384d..633841eedd 100644 --- a/src/modules/ffmpeg/util/av_util.cpp +++ b/src/modules/ffmpeg/util/av_util.cpp @@ -93,21 +93,21 @@ core::mutable_frame make_frame(void* tag, return copy_frame_tmp(std::move(frame), pix_desc, data_map, std::move(video), std::move(audio)); } -core::mutable_frame make_frame2(void* tag, - const std::shared_ptr& frame_factory, - std::shared_ptr video, - std::shared_ptr audio) -{ - std::vector data_map; // TODO(perf) when using data_map, avoid uploading duplicate planes - - const auto pix_desc = - video ? pixel_format_desc(static_cast(video->format), video->width, video->height, data_map) - : core::pixel_format_desc(core::pixel_format::invalid); - - auto frame = frame_factory->create_frame(tag, pix_desc); - - return copy_frame_tmp(std::move(frame), pix_desc, data_map, std::move(video), std::move(audio)); -} +//core::mutable_frame make_frame2(void* tag, +// const std::shared_ptr& frame_factory, +// std::shared_ptr video, +// std::shared_ptr audio) +//{ +// std::vector data_map; // TODO(perf) when using data_map, avoid uploading duplicate planes +// +// const auto pix_desc = +// video ? pixel_format_desc(static_cast(video->format), video->width, video->height, data_map) +// : core::pixel_format_desc(core::pixel_format::invalid); +// +// auto frame = frame_factory->create_frame(tag, pix_desc); +// +// return copy_frame_tmp(std::move(frame), pix_desc, data_map, std::move(video), std::move(audio)); +//} core::pixel_format get_pixel_format(AVPixelFormat pix_fmt) { From bfcedf43f9f182b6c325be1175e66ede0589122b Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Thu, 4 Jan 2024 17:00:27 +0000 Subject: [PATCH 49/50] fix: allow 16bit from ffmpeg --- src/modules/ffmpeg/producer/av_producer.cpp | 7 +++++-- src/modules/ffmpeg/util/av_util.cpp | 12 ++++++++++-- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/modules/ffmpeg/producer/av_producer.cpp b/src/modules/ffmpeg/producer/av_producer.cpp index ce52ff8a11..9b2078a073 100644 --- a/src/modules/ffmpeg/producer/av_producer.cpp +++ b/src/modules/ffmpeg/producer/av_producer.cpp @@ -500,6 +500,11 @@ struct Filter AV_PIX_FMT_ARGB, AV_PIX_FMT_RGBA, AV_PIX_FMT_ABGR, + AV_PIX_FMT_BGR48LE, + AV_PIX_FMT_RGB48LE, + AV_PIX_FMT_BGRA64LE, + AV_PIX_FMT_RGBA64LE, + // AV_PIX_FMT_X2RGB10LE TODO AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P, @@ -511,14 +516,12 @@ struct Filter AV_PIX_FMT_YUV444P10LE, AV_PIX_FMT_YUV422P10LE, AV_PIX_FMT_YUV420P10LE, - // AV_PIX_FMT_YUV410P10LE, AV_PIX_FMT_YUV444P16LE, AV_PIX_FMT_YUV422P16LE, AV_PIX_FMT_YUV420P16LE, AV_PIX_FMT_YUVA444P10LE, AV_PIX_FMT_YUVA422P10LE, AV_PIX_FMT_YUVA420P10LE, - // AV_PIX_FMT_UYVY42210LE, AV_PIX_FMT_YUVA444P16LE, AV_PIX_FMT_YUVA422P16LE, AV_PIX_FMT_YUVA420P16LE, diff --git a/src/modules/ffmpeg/util/av_util.cpp b/src/modules/ffmpeg/util/av_util.cpp index 633841eedd..7214be49c5 100644 --- a/src/modules/ffmpeg/util/av_util.cpp +++ b/src/modules/ffmpeg/util/av_util.cpp @@ -115,14 +115,18 @@ core::pixel_format get_pixel_format(AVPixelFormat pix_fmt) case AV_PIX_FMT_GRAY8: return core::pixel_format::gray; case AV_PIX_FMT_RGB24: + case AV_PIX_FMT_RGB48LE: return core::pixel_format::rgb; case AV_PIX_FMT_BGR24: + case AV_PIX_FMT_BGR48LE: return core::pixel_format::bgr; case AV_PIX_FMT_BGRA: + case AV_PIX_FMT_BGRA64LE: return core::pixel_format::bgra; case AV_PIX_FMT_ARGB: return core::pixel_format::argb; case AV_PIX_FMT_RGBA: + case AV_PIX_FMT_RGBA64LE: return core::pixel_format::rgba; case AV_PIX_FMT_ABGR: return core::pixel_format::abgr; @@ -175,14 +179,18 @@ core::pixel_format_desc pixel_format_desc(AVPixelFormat pix_fmt, int width, int } case core::pixel_format::bgr: case core::pixel_format::rgb: { - desc.planes.emplace_back(linesizes[0] / 3, height, 3); + auto depth = (pix_fmt == AV_PIX_FMT_BGR48LE || pix_fmt==AV_PIX_FMT_RGB48LE) ? common::bit_depth::bit16:common::bit_depth::bit8; + auto scale = depth == common::bit_depth::bit16?6:3; + desc.planes.emplace_back(linesizes[0] / scale, height, 3, depth); return desc; } case core::pixel_format::bgra: case core::pixel_format::argb: case core::pixel_format::rgba: case core::pixel_format::abgr: { - desc.planes.emplace_back(linesizes[0] / 4, height, 4); + auto depth = (pix_fmt == AV_PIX_FMT_BGRA64LE || pix_fmt==AV_PIX_FMT_RGBA64LE) ? common::bit_depth::bit16:common::bit_depth::bit8; + auto scale = depth == common::bit_depth::bit16?8:4; + desc.planes.emplace_back(linesizes[0] / scale, height, 4, depth); return desc; } case core::pixel_format::ycbcr: From 7adf098cb816943eea9d45cb4b38aa1adc1884a2 Mon Sep 17 00:00:00 2001 From: Julian Waller Date: Thu, 4 Jan 2024 17:31:42 +0000 Subject: [PATCH 50/50] wip: boilerplate for decklink 12bit, but nothing happens --- .../decklink/consumer/decklink_consumer.cpp | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/modules/decklink/consumer/decklink_consumer.cpp b/src/modules/decklink/consumer/decklink_consumer.cpp index ed58593bec..80d9946a3f 100644 --- a/src/modules/decklink/consumer/decklink_consumer.cpp +++ b/src/modules/decklink/consumer/decklink_consumer.cpp @@ -241,6 +241,9 @@ class decklink_frame : public IDeckLinkVideoFrame return static_cast(format_desc_.width) * 4; case bmdFormat10BitYUV: return ((static_cast(format_desc_.width) + 47) / 48) * 128; + case bmdFormat12BitRGBLE: + case bmdFormat12BitRGB: + return (static_cast(format_desc_.width) * 36) / 8; default: return 0; } @@ -790,10 +793,18 @@ struct decklink_consumer final : public IDeckLinkVideoOutputCallback auto buffer = frame1.value().frame.get(); - std::shared_ptr image_data = create_aligned_buffer(decklink_format_desc_.size, 128); - std::memcpy(image_data.get(), buffer.data(), buffer.size()); - schedule_next_video(image_data, bmdFormat10BitYUV, nb_samples, video_display_time); + // rgb12: +// std::shared_ptr image_data = create_aligned_buffer(((decklink_format_desc_.width * 36) / 8)*decklink_format_desc_.height, 128); +// std::memcpy(image_data.get(), buffer.data(), buffer.size()); +// +// schedule_next_video(image_data, bmdFormat12BitRGBLE, nb_samples, video_display_time); + + // yuv10: + std::shared_ptr image_data = create_aligned_buffer(buffer.size(), 128); + std::memcpy(image_data.get(), buffer.data(), buffer.size()); + + schedule_next_video(image_data, bmdFormat10BitYUV, nb_samples, video_display_time); if (config_.embedded_audio) { schedule_next_audio(std::move(audio_data), nb_samples);