diff --git a/Changelog b/Changelog
index 4a22ab4cb3566..4d80e5b54f4f4 100644
--- a/Changelog
+++ b/Changelog
@@ -2,6 +2,26 @@ Entries are sorted chronologically from oldest to youngest within each release,
 releases are sorted from youngest to oldest.
 
 version <next>:
+- tpad filter
+- AV1 decoding support through libdav1d
+- dedot filter
+- chromashift and rgbashift filters
+- freezedetect filter
+- truehd_core bitstream filter
+- dhav demuxer
+- PCM-DVD encoder
+- GIF parser
+- vividas demuxer
+- hymt decoder
+- anlmdn filter
+- maskfun filter
+- hcom demuxer and decoder
+- ARBC decoder
+- libaribb24 based ARIB STD-B24 caption support (profiles A and C)
+- Support decoding of HEVC 4:4:4 content in nvdec and cuviddec
+
+
+version 4.1:
 - deblock filter
 - tmix filter
 - amplify filter
@@ -34,6 +54,15 @@ version <next>:
 - audio denoiser as afftdn filter
 - AV1 parser
 - SER demuxer
+- sinc audio filter source
+- chromahold filter
+- setparams filter
+- vibrance filter
+- decoding S12M timecode in h264
+- xstack filter
+- pcm vidc decoder and encoder
+- (a)graphmonitor filter
+- yadif_cuda filter
 
 
 version 4.0:
diff --git a/INSTALL.md b/INSTALL.md
index 5db912231c2bb..3b220bc6ff216 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -1,4 +1,4 @@
-#Installing FFmpeg:
+## Installing FFmpeg
 
 1. Type `./configure` to create the configuration. A list of configure
 options is printed by running `configure --help`.
diff --git a/MAINTAINERS b/MAINTAINERS
index bc2ae133201f9..0df4d90121fb9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -39,7 +39,7 @@ QuickTime faststart:
 Miscellaneous Areas
 ===================
 
-documentation                           Stefano Sabatini, Mike Melanson, Timothy Gu, Lou Logan, Gyan Doshi
+documentation                           Stefano Sabatini, Mike Melanson, Timothy Gu, Gyan Doshi
 project server                          Árpád Gereöffy, Michael Niedermayer, Reimar Doeffinger, Alexander Strasser, Nikolay Aleksandrov
 presets                                 Robert Swain
 metadata subsystem                      Aurelien Jacobs
@@ -52,7 +52,7 @@ Communication
 
 website                                 Deby Barbara Lepage
 fate.ffmpeg.org                         Timothy Gu
-Trac bug tracker                        Alexander Strasser, Michael Niedermayer, Carl Eugen Hoyos, Lou Logan
+Trac bug tracker                        Alexander Strasser, Michael Niedermayer, Carl Eugen Hoyos
 mailing lists                           Baptiste Coudurier, Lou Logan
 Google+                                 Paul B Mahol, Michael Niedermayer, Alexander Strasser
 Twitter                                 Lou Logan, Reynaldo H. Verdejo Pinochet
@@ -143,6 +143,7 @@ Codecs:
   asv*                                  Michael Niedermayer
   atrac3plus*                           Maxim Poliakovski
   audiotoolbox*                         Rodger Combs
+  avs2*                                 Huiwen Ren
   bgmc.c, bgmc.h                        Thilo Borgmann
   binkaudio.c                           Peter Ross
   cavs*                                 Stefan Gehrer
@@ -189,6 +190,7 @@ Codecs:
   libcelt_dec.c                         Nicolas George
   libcodec2.c                           Tomas Härdin
   libdirac*                             David Conrad
+  libdavs2.c                            Huiwen Ren
   libgsm.c                              Michel Bardiaux
   libkvazaar.c                          Arttu Ylä-Outinen
   libopenjpeg.c                         Jaikrishnan Menon
@@ -360,6 +362,7 @@ Filters:
   vf_ssim.c                             Paul B Mahol
   vf_stereo3d.c                         Paul B Mahol
   vf_telecine.c                         Paul B Mahol
+  vf_tonemap_opencl.c                   Ruiling Song
   vf_yadif.c                            Michael Niedermayer
   vf_zoompan.c                          Paul B Mahol
 
@@ -526,6 +529,7 @@ Alpha                                   Falk Hueffner
 MIPS                                    Manojkumar Bhosale, Shiyou Yin
 Mac OS X / PowerPC                      Romain Dolbeau, Guillaume Poirier
 Amiga / PowerPC                         Colin Ward
+Linux / PowerPC                         Lauri Kasanen
 Windows MinGW                           Alex Beregszaszi, Ramiro Polla
 Windows Cygwin                          Victor Paesa
 Windows MSVC                            Matthew Oliver, Hendrik Leppkes
diff --git a/Makefile b/Makefile
index 4bf1dfedcfc0e..48c59733b83b4 100644
--- a/Makefile
+++ b/Makefile
@@ -135,7 +135,7 @@ uninstall-data:
 
 clean::
 	$(RM) $(CLEANSUFFIXES)
-	$(RM) $(addprefix compat/,$(CLEANSUFFIXES)) $(addprefix compat/*/,$(CLEANSUFFIXES))
+	$(RM) $(addprefix compat/,$(CLEANSUFFIXES)) $(addprefix compat/*/,$(CLEANSUFFIXES)) $(addprefix compat/*/*/,$(CLEANSUFFIXES))
 	$(RM) -r coverage-html
 	$(RM) -rf coverage.info coverage.info.in lcov
 
@@ -159,7 +159,7 @@ check: all alltools examples testprogs fate
 
 include $(SRC_PATH)/tests/Makefile
 
-$(sort $(OBJDIRS)):
+$(sort $(OUTDIRS)):
 	$(Q)mkdir -p $@
 
 # Dummy rule to stop make trying to rebuild removed or renamed headers
diff --git a/RELEASE b/RELEASE
index ff2c9d1a30b72..e3dcbea35c34b 100644
--- a/RELEASE
+++ b/RELEASE
@@ -1 +1 @@
-4.0.git
+4.1.git
diff --git a/compat/djgpp/math.c b/compat/djgpp/math.c
new file mode 100644
index 0000000000000..777b879e017e5
--- /dev/null
+++ b/compat/djgpp/math.c
@@ -0,0 +1,47 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <math.h>
+
+#define FUN(name, type, op) \
+type name(type x, type y) \
+{ \
+    if (fpclassify(x) == FP_NAN) return y; \
+    if (fpclassify(y) == FP_NAN) return x; \
+    return x op y ? x : y; \
+}
+
+FUN(fmin, double, <)
+FUN(fmax, double, >)
+FUN(fminf, float, <)
+FUN(fmaxf, float, >)
+
+long double fmodl(long double x, long double y)
+{
+    return fmod(x, y);
+}
+
+long double scalbnl(long double x, int exp)
+{
+    return scalbn(x, exp);
+}
+
+long double copysignl(long double x, long double y)
+{
+    return copysign(x, y);
+}
diff --git a/compat/djgpp/math.h b/compat/djgpp/math.h
new file mode 100644
index 0000000000000..4c02ea9c402d9
--- /dev/null
+++ b/compat/djgpp/math.h
@@ -0,0 +1,25 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+double fmin(double, double);
+double fmax(double, double);
+float fminf(float, float);
+float fmaxf(float, float);
+long double fmodl(long double, long double);
+long double scalbnl(long double, int);
+long double copysignl(long double, long double);
diff --git a/configure b/configure
index 85d5dd59624d7..938ff10da5187 100755
--- a/configure
+++ b/configure
@@ -218,6 +218,7 @@ External library support:
   --enable-jni             enable JNI support [no]
   --enable-ladspa          enable LADSPA audio filtering [no]
   --enable-libaom          enable AV1 video encoding/decoding via libaom [no]
+  --enable-libaribb24      enable ARIB text and caption decoding via libaribb24 [no]
   --enable-libass          enable libass subtitles rendering,
                            needed for subtitles and ass filter [no]
   --enable-libbluray       enable BluRay reading using libbluray [no]
@@ -226,6 +227,7 @@ External library support:
   --enable-libcelt         enable CELT decoding via libcelt [no]
   --enable-libcdio         enable audio CD grabbing with libcdio [no]
   --enable-libcodec2       enable codec2 en/decoding using libcodec2 [no]
+  --enable-libdav1d        enable AV1 decoding via libdav1d [no]
   --enable-libdavs2        enable AVS2 decoding via libdavs2 [no]
   --enable-libdc1394       enable IIDC-1394 grabbing using libdc1394
                            and libraw1394 [no]
@@ -319,7 +321,7 @@ External library support:
   The following libraries provide various hardware acceleration features:
   --disable-amf            disable AMF video encoding code [autodetect]
   --disable-audiotoolbox   disable Apple AudioToolbox code [autodetect]
-  --enable-cuda-sdk        enable CUDA features that require the CUDA SDK [no]
+  --enable-cuda-nvcc       enable Nvidia CUDA compiler [no]
   --disable-cuvid          disable Nvidia CUVID support [autodetect]
   --disable-d3d11va        disable Microsoft Direct3D 11 video acceleration code [autodetect]
   --disable-dxva2          disable Microsoft DirectX 9 video acceleration code [autodetect]
@@ -379,7 +381,7 @@ Toolchain options:
   --host-cppflags=HCPPFLAGS use HCPPFLAGS when compiling for host
   --host-ld=HOSTLD         use host linker HOSTLD
   --host-ldflags=HLDFLAGS  use HLDFLAGS when linking for host
-  --host-libs=HLIBS        use libs HLIBS when linking for host
+  --host-extralibs=HLIBS   use libs HLIBS when linking for host
   --host-os=OS             compiler host OS [$target_os]
   --extra-cflags=ECFLAGS   add ECFLAGS to CFLAGS [$CFLAGS]
   --extra-cxxflags=ECFLAGS add ECFLAGS to CXXFLAGS [$CXXFLAGS]
@@ -472,7 +474,7 @@ Developer options (useful when working on FFmpeg itself):
   --random-seed=VALUE      seed value for --enable/disable-random
   --disable-valgrind-backtrace do not print a backtrace under Valgrind
                            (only applies to --disable-optimizations builds)
-  --enable-osfuzz          Enable building fuzzer tool
+  --enable-ossfuzz         Enable building fuzzer tool
   --libfuzzer=PATH         path to libfuzzer
   --ignore-tests=TESTS     comma-separated list (without "fate-" prefix
                            in the name) of tests whose result is ignored
@@ -647,6 +649,12 @@ request(){
     done
 }
 
+warn_if_gets_disabled(){
+    for var in $*; do
+        WARN_IF_GETS_DISABLED_LIST="$WARN_IF_GETS_DISABLED_LIST $var"
+    done
+}
+
 enable(){
     set_all yes $*
 }
@@ -655,6 +663,14 @@ disable(){
     set_all no $*
 }
 
+disable_with_reason(){
+    disable $1
+    eval "${1}_disable_reason=\"$2\""
+    if requested $1; then
+        die "ERROR: $1 requested, but $2"
+    fi
+}
+
 enable_weak(){
     set_weak yes $*
 }
@@ -783,10 +799,10 @@ check_deps(){
 
             [ -n "$dep_ifa" ] && { enabled_all $dep_ifa && enable_weak $cfg; }
             [ -n "$dep_ifn" ] && { enabled_any $dep_ifn && enable_weak $cfg; }
-            enabled_all  $dep_all || { disable $cfg && requested $cfg && die "ERROR: $cfg requested, but not all dependencies are satisfied: $dep_all"; }
-            enabled_any  $dep_any || { disable $cfg && requested $cfg && die "ERROR: $cfg requested, but not any dependency is satisfied: $dep_any"; }
-            disabled_all $dep_con || { disable $cfg && requested $cfg && die "ERROR: $cfg requested, but some conflicting dependencies are unsatisfied: $dep_con"; }
-            disabled_any $dep_sel && { disable $cfg && requested $cfg && die "ERROR: $cfg requested, but some selected dependency is unsatisfied: $dep_sel"; }
+            enabled_all  $dep_all || { disable_with_reason $cfg "not all dependencies are satisfied: $dep_all"; }
+            enabled_any  $dep_any || { disable_with_reason $cfg "not any dependency is satisfied: $dep_any"; }
+            disabled_all $dep_con || { disable_with_reason $cfg "some conflicting dependencies are unsatisfied: $dep_con"; }
+            disabled_any $dep_sel && { disable_with_reason $cfg "some selected dependency is unsatisfied: $dep_sel"; }
 
             enabled $cfg && enable_deep_weak $dep_sel $dep_sgs
 
@@ -985,6 +1001,10 @@ hostcc_o(){
     eval printf '%s\\n' $HOSTCC_O
 }
 
+nvcc_o(){
+    eval printf '%s\\n' $NVCC_O
+}
+
 test_cc(){
     log test_cc "$@"
     cat > $TMPC
@@ -1006,6 +1026,25 @@ test_objcc(){
     test_cmd $objcc -Werror=missing-prototypes $CPPFLAGS $CFLAGS $OBJCFLAGS "$@" $OBJCC_C $(cc_o $TMPO) $TMPM
 }
 
+test_nvcc(){
+    log test_nvcc "$@"
+    cat > $TMPCU
+    log_file $TMPCU
+    tmpcu_=$TMPCU
+    tmpo_=$TMPO
+    [ -x "$(command -v cygpath)" ] && tmpcu_=$(cygpath -m $tmpcu_) && tmpo_=$(cygpath -m $tmpo_)
+    test_cmd $nvcc -ptx $NVCCFLAGS "$@" $NVCC_C $(nvcc_o $tmpo_) $tmpcu_
+}
+
+check_nvcc() {
+    log check_nvcc "$@"
+    test_nvcc <<EOF
+extern "C" {
+    __global__ void hello(unsigned char *data) {}
+}
+EOF
+}
+
 test_cpp(){
     log test_cpp "$@"
     cat > $TMPC
@@ -1682,6 +1721,7 @@ EXTERNAL_LIBRARY_NONFREE_LIST="
 
 EXTERNAL_LIBRARY_VERSION3_LIST="
     gmp
+    libaribb24
     liblensfun
     libopencore_amrnb
     libopencore_amrwb
@@ -1712,6 +1752,7 @@ EXTERNAL_LIBRARY_LIST="
     libcaca
     libcelt
     libcodec2
+    libdav1d
     libdc1394
     libdrm
     libflite
@@ -1788,6 +1829,7 @@ EXTRALIBS_LIST="
 "
 
 HWACCEL_LIBRARY_NONFREE_LIST="
+    cuda_nvcc
     cuda_sdk
     libnpp
 "
@@ -2335,6 +2377,7 @@ CONFIG_EXTRA="
     rtpdec
     rtpenc_chain
     rv34dsp
+    scene_sad
     sinewin
     snappy
     srtp
@@ -2433,6 +2476,7 @@ CMDLINE_SET="
     tempprefix
     toolchain
     valgrind
+    windres
     x86asmexe
 "
 
@@ -2659,6 +2703,7 @@ hap_encoder_select="texturedspenc"
 hevc_decoder_select="bswapdsp cabac golomb hevcparse videodsp"
 huffyuv_decoder_select="bswapdsp huffyuvdsp llviddsp"
 huffyuv_encoder_select="bswapdsp huffman huffyuvencdsp llvidencdsp"
+hymt_decoder_select="huffyuv_decoder"
 iac_decoder_select="imc_decoder"
 imc_decoder_select="bswapdsp fft mdct sinewin"
 indeo3_decoder_select="hpeldsp"
@@ -2678,7 +2723,7 @@ mjpeg_decoder_select="blockdsp hpeldsp exif idctdsp jpegtables"
 mjpeg_encoder_select="jpegtables mpegvideoenc"
 mjpegb_decoder_select="mjpeg_decoder"
 mlp_decoder_select="mlp_parser"
-mlp_encoder_select="lpc"
+mlp_encoder_select="lpc audio_frame_queue"
 motionpixels_decoder_select="bswapdsp"
 mp1_decoder_select="mpegaudio"
 mp1float_decoder_select="mpegaudio"
@@ -2760,7 +2805,7 @@ thp_decoder_select="mjpeg_decoder"
 tiff_decoder_suggest="zlib lzma"
 tiff_encoder_suggest="zlib"
 truehd_decoder_select="mlp_parser"
-truehd_encoder_select="lpc"
+truehd_encoder_select="lpc audio_frame_queue"
 truemotion2_decoder_select="bswapdsp"
 truespeech_decoder_select="bswapdsp"
 tscc_decoder_deps="zlib"
@@ -2812,6 +2857,7 @@ d3d11va_deps="dxva_h ID3D11VideoDecoder ID3D11VideoContext"
 dxva2_deps="dxva2api_h DXVA2_ConfigPictureDecode ole32 user32"
 ffnvcodec_deps_any="libdl LoadLibrary"
 nvdec_deps="ffnvcodec"
+vaapi_x11_deps="xlib"
 videotoolbox_hwaccel_deps="videotoolbox pthreads"
 videotoolbox_hwaccel_extralibs="-framework QuartzCore"
 xvmc_deps="X11_extensions_XvMClib_h"
@@ -2931,8 +2977,8 @@ v4l2_m2m_deps="linux_videodev2_h sem_timedwait"
 
 hwupload_cuda_filter_deps="ffnvcodec"
 scale_npp_filter_deps="ffnvcodec libnpp"
-scale_cuda_filter_deps="cuda_sdk"
-thumbnail_cuda_filter_deps="cuda_sdk"
+scale_cuda_filter_deps="ffnvcodec cuda_nvcc"
+thumbnail_cuda_filter_deps="ffnvcodec cuda_nvcc"
 transpose_npp_filter_deps="ffnvcodec libnpp"
 
 amf_deps_any="libdl LoadLibrary"
@@ -2957,6 +3003,7 @@ h264_rkmpp_decoder_deps="rkmpp"
 h264_rkmpp_decoder_select="h264_mp4toannexb_bsf"
 h264_vaapi_encoder_select="cbs_h264 vaapi_encode"
 h264_v4l2m2m_decoder_deps="v4l2_m2m h264_v4l2_m2m"
+h264_v4l2m2m_decoder_select="h264_mp4toannexb_bsf"
 h264_v4l2m2m_encoder_deps="v4l2_m2m h264_v4l2_m2m"
 hevc_amf_encoder_deps="amf"
 hevc_cuvid_decoder_deps="cuvid"
@@ -2971,6 +3018,7 @@ hevc_rkmpp_decoder_select="hevc_mp4toannexb_bsf"
 hevc_vaapi_encoder_deps="VAEncPictureParameterBufferHEVC"
 hevc_vaapi_encoder_select="cbs_h265 vaapi_encode"
 hevc_v4l2m2m_decoder_deps="v4l2_m2m hevc_v4l2_m2m"
+hevc_v4l2m2m_decoder_select="hevc_mp4toannexb_bsf"
 hevc_v4l2m2m_encoder_deps="v4l2_m2m hevc_v4l2_m2m"
 mjpeg_cuvid_decoder_deps="cuvid"
 mjpeg_qsv_encoder_deps="libmfx"
@@ -3040,6 +3088,7 @@ hevc_metadata_bsf_select="cbs_h265"
 mjpeg2jpeg_bsf_select="jpegtables"
 mpeg2_metadata_bsf_select="cbs_mpeg2"
 trace_headers_bsf_select="cbs"
+truehd_core_bsf_select="mlp_parser"
 vp9_metadata_bsf_select="cbs_vp9"
 
 # external libraries
@@ -3085,9 +3134,11 @@ hevc_videotoolbox_encoder_select="videotoolbox_encoder"
 libaom_av1_decoder_deps="libaom"
 libaom_av1_encoder_deps="libaom"
 libaom_av1_encoder_select="extract_extradata_bsf"
+libaribb24_decoder_deps="libaribb24"
 libcelt_decoder_deps="libcelt"
 libcodec2_decoder_deps="libcodec2"
 libcodec2_encoder_deps="libcodec2"
+libdav1d_decoder_deps="libdav1d"
 libdavs2_decoder_deps="libdavs2"
 libfdk_aac_decoder_deps="libfdk_aac"
 libfdk_aac_encoder_deps="libfdk_aac"
@@ -3180,6 +3231,7 @@ image2_alias_pix_demuxer_select="image2_demuxer"
 image2_brender_pix_demuxer_select="image2_demuxer"
 ipod_muxer_select="mov_muxer"
 ismv_muxer_select="mov_muxer"
+ivf_muxer_select="av1_metadata_bsf vp9_superframe_bsf"
 matroska_audio_muxer_select="matroska_muxer"
 matroska_demuxer_select="iso_media riffdec"
 matroska_demuxer_suggest="bzlib lzo zlib"
@@ -3267,6 +3319,7 @@ libcdio_indev_deps="libcdio"
 libdc1394_indev_deps="libdc1394"
 openal_indev_deps="openal"
 opengl_outdev_deps="opengl"
+opengl_outdev_suggest="sdl2"
 oss_indev_deps_any="sys_soundcard_h"
 oss_outdev_deps_any="sys_soundcard_h"
 pulse_indev_deps="libpulse"
@@ -3393,7 +3446,8 @@ find_rect_filter_deps="avcodec avformat gpl"
 firequalizer_filter_deps="avcodec"
 firequalizer_filter_select="rdft"
 flite_filter_deps="libflite"
-framerate_filter_select="pixelutils"
+framerate_filter_select="scene_sad"
+freezedetect_filter_select="scene_sad"
 frei0r_filter_deps="frei0r libdl"
 frei0r_src_filter_deps="frei0r libdl"
 fspp_filter_deps="gpl"
@@ -3409,6 +3463,7 @@ mcdeint_filter_deps="avcodec gpl"
 movie_filter_deps="avcodec avformat"
 mpdecimate_filter_deps="gpl"
 mpdecimate_filter_select="pixelutils"
+minterpolate_filter_select="scene_sad"
 mptestsrc_filter_deps="gpl"
 negate_filter_deps="lut_filter"
 nnedi_filter_deps="gpl"
@@ -3437,7 +3492,7 @@ sab_filter_deps="gpl swscale"
 scale2ref_filter_deps="swscale"
 scale_filter_deps="swscale"
 scale_qsv_filter_deps="libmfx"
-select_filter_select="pixelutils"
+select_filter_select="scene_sad"
 sharpness_vaapi_filter_deps="vaapi"
 showcqt_filter_deps="avcodec avformat swscale"
 showcqt_filter_suggest="libfontconfig libfreetype"
@@ -3468,6 +3523,8 @@ tinterlace_merge_test_deps="tinterlace_filter"
 tinterlace_pad_test_deps="tinterlace_filter"
 tonemap_filter_deps="const_nan"
 tonemap_opencl_filter_deps="opencl const_nan"
+transpose_opencl_filter_deps="opencl"
+transpose_vaapi_filter_deps="vaapi VAProcPipelineCaps_rotation_flags"
 unsharp_opencl_filter_deps="opencl"
 uspp_filter_deps="gpl avcodec"
 vaguedenoiser_filter_deps="gpl"
@@ -3480,6 +3537,7 @@ zscale_filter_deps="libzimg const_nan"
 scale_vaapi_filter_deps="vaapi"
 vpp_qsv_filter_deps="libmfx"
 vpp_qsv_filter_select="qsvvpp"
+yadif_cuda_filter_deps="ffnvcodec cuda_nvcc"
 
 # examples
 avio_dir_cmd_deps="avformat avutil"
@@ -3530,15 +3588,15 @@ swresample_suggest="libm libsoxr"
 swscale_deps="avutil"
 swscale_suggest="libm"
 
-avcodec_extralibs="pthreads_extralibs iconv_extralibs"
+avcodec_extralibs="pthreads_extralibs iconv_extralibs dxva2_extralibs"
 avfilter_extralibs="pthreads_extralibs"
 avutil_extralibs="d3d11va_extralibs nanosleep_extralibs pthreads_extralibs vaapi_drm_extralibs vaapi_x11_extralibs vdpau_x11_extralibs"
 
 # programs
 ffmpeg_deps="avcodec avfilter avformat"
 ffmpeg_select="aformat_filter anull_filter atrim_filter format_filter
-               null_filter
-               trim_filter"
+               hflip_filter null_filter
+               transpose_filter trim_filter vflip_filter"
 ffmpeg_suggest="ole32 psapi shell32"
 ffplay_deps="avcodec avformat swscale swresample sdl2"
 ffplay_select="rdft crop_filter transpose_filter hflip_filter vflip_filter rotate_filter"
@@ -3595,6 +3653,7 @@ host_os=$target_os_default
 if test "$target_os_default" = aix; then
     arch_default=$(uname -p)
     strip_default="strip -X32_64"
+    nm_default="nm -g -X32_64"
 else
     arch_default=$(uname -m)
 fi
@@ -3711,8 +3770,7 @@ find_things_extern(){
 
 find_filters_extern(){
     file=$source_path/$1
-    #sed -n "s/^extern AVFilter ff_\([avfsinkrc]\{2,5\}\)_\(\w\+\);/\2_filter/p" $file
-    sed -E -n "s/^extern AVFilter ff_([avfsinkrc]{2,5})_([a-zA-Z0-9_]+);/\2_filter/p" $file
+    sed -n 's/^extern AVFilter ff_[avfsinkrc]\{2,5\}_\([[:alnum:]_]\{1,\}\);/\1_filter/p' $file
 }
 
 FILTER_LIST=$(find_filters_extern libavfilter/allfilters.c)
@@ -3863,6 +3921,7 @@ for opt do
             name=$(echo "${optval}" | sed "s/,/_${thing}|/g")_${thing}
             list=$(filter "$name" $list)
             [ "$list" = "" ] && warn "Option $opt did not match anything"
+            test $action = enable && warn_if_gets_disabled $list
             $action $list
         ;;
         --enable-yasm|--disable-yasm)
@@ -4115,6 +4174,11 @@ if test -n "$cross_prefix"; then
     enable cross_compile
 fi
 
+set_default target_os
+if test "$target_os" = android; then
+    cc_default="clang"
+fi
+
 ar_default="${cross_prefix}${ar_default}"
 cc_default="${cross_prefix}${cc_default}"
 cxx_default="${cross_prefix}${cxx_default}"
@@ -4131,7 +4195,7 @@ windres_default="${cross_prefix}${windres_default}"
 sysinclude_default="${sysroot}/usr/include"
 
 set_default arch cc cxx doxygen pkg_config ranlib strip sysinclude \
-    target_exec target_os x86asmexe nvcc
+    target_exec x86asmexe nvcc
 enabled cross_compile || host_cc_default=$cc
 set_default host_cc
 
@@ -4198,6 +4262,7 @@ tmpfile TMPCPP .cpp
 tmpfile TMPE   $EXESUF
 tmpfile TMPH   .h
 tmpfile TMPM   .m
+tmpfile TMPCU  .cu
 tmpfile TMPO   .o
 tmpfile TMPS   .S
 tmpfile TMPSH  .sh
@@ -4835,7 +4900,6 @@ elif enabled mips; then
                 enable fast_cmov
                 enable fast_unaligned
                 disable aligned_stack
-                disable mipsfpu
                 disable mipsdsp
                 disable mipsdspr2
                 # When gcc version less than 5.3.0, add -fno-expensive-optimizations flag.
@@ -5130,6 +5194,7 @@ case $target_os in
             echo "hwcap_1 = OVERRIDE;" > mapfile &&
             add_ldflags -Wl,-M,mapfile
         nm_default='nm -P -g'
+        striptype=""
         version_script='-M'
         VERSION_SCRIPT_POSTPROCESS_CMD='perl $(SRC_PATH)/compat/solaris/make_sunver.pl - $(OBJS)'
         ;;
@@ -5283,7 +5348,6 @@ case $target_os in
         network_extralibs="-lsocket"
         objformat="coff"
         enable dos_paths
-        add_cppflags -U__STRICT_ANSI__
         ;;
     linux)
         enable section_data_rel_ro
@@ -5452,6 +5516,11 @@ EOF
     elif test_${pfx}cpp_condition sys/brand.h "defined LABELED_BRAND_NAME"; then
         eval ${pfx}libc_type=solaris
         add_${pfx}cppflags -D__EXTENSIONS__ -D_XOPEN_SOURCE=600
+    elif test_${pfx}cpp_condition sys/version.h "defined __DJGPP__"; then
+        eval ${pfx}libc_type=djgpp
+        add_cppflags -U__STRICT_ANSI__
+        add_cflags "-include $source_path/compat/djgpp/math.h"
+        add_compat djgpp/math.o
     fi
     test_${pfx}cc <<EOF
 #include <time.h>
@@ -5961,6 +6030,7 @@ check_type "d3d9.h dxva2api.h" DXVA2_ConfigPictureDecode -D_WIN32_WINNT=0x0602
 
 check_type "va/va.h va/va_dec_hevc.h" "VAPictureParameterBufferHEVC"
 check_struct "va/va.h" "VADecPictureParameterBufferVP9" bit_depth
+check_struct "va/va.h va/va_vpp.h" "VAProcPipelineCaps" rotation_flags
 check_type "va/va.h va/va_enc_hevc.h" "VAEncPictureParameterBufferHEVC"
 check_type "va/va.h va/va_enc_jpeg.h" "VAEncPictureParameterBufferJPEG"
 check_type "va/va.h va/va_enc_vp8.h"  "VAEncPictureParameterBufferVP8"
@@ -5968,11 +6038,17 @@ check_type "va/va.h va/va_enc_vp9.h"  "VAEncPictureParameterBufferVP9"
 
 check_type "vdpau/vdpau.h" "VdpPictureInfoHEVC"
 
+if enabled cuda_sdk; then
+    warn "Option --enable-cuda-sdk is deprecated. Use --enable-cuda-nvcc instead."
+    enable cuda_nvcc
+fi
+
 if ! disabled ffnvcodec; then
-    check_pkg_config ffnvcodec "ffnvcodec >= 8.1.24.2" \
-          "ffnvcodec/nvEncodeAPI.h ffnvcodec/dynlink_cuda.h ffnvcodec/dynlink_cuviddec.h ffnvcodec/dynlink_nvcuvid.h" "" || \
-      check_pkg_config ffnvcodec "ffnvcodec >= 8.0.14.2 ffnvcodec < 8.1" \
-          "ffnvcodec/nvEncodeAPI.h ffnvcodec/dynlink_cuda.h ffnvcodec/dynlink_cuviddec.h ffnvcodec/dynlink_nvcuvid.h" ""
+    ffnv_hdr_list="ffnvcodec/nvEncodeAPI.h ffnvcodec/dynlink_cuda.h ffnvcodec/dynlink_cuviddec.h ffnvcodec/dynlink_nvcuvid.h"
+    check_pkg_config ffnvcodec "ffnvcodec >= 9.0.18.0" "$ffnv_hdr_list" "" || \
+      check_pkg_config ffnvcodec "ffnvcodec >= 8.2.15.8 ffnvcodec < 8.3" "$ffnv_hdr_list" "" || \
+      check_pkg_config ffnvcodec "ffnvcodec >= 8.1.24.9 ffnvcodec < 8.2" "$ffnv_hdr_list" "" || \
+      check_pkg_config ffnvcodec "ffnvcodec >= 8.0.14.9 ffnvcodec < 8.1" "$ffnv_hdr_list" ""
 fi
 
 check_cpp_condition winrt windows.h "!WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)"
@@ -6042,17 +6118,20 @@ for func in $COMPLEX_FUNCS; do
 done
 
 # these are off by default, so fail if requested and not available
-enabled cuda_sdk          && require cuda_sdk cuda.h cuCtxCreate -lcuda
+enabled cuda_nvcc         && { check_nvcc || die "ERROR: failed checking for nvcc."; }
 enabled chromaprint       && require chromaprint chromaprint.h chromaprint_get_version -lchromaprint
 enabled decklink          && { require_headers DeckLinkAPI.h &&
                                { test_cpp_condition DeckLinkAPIVersion.h "BLACKMAGIC_DECKLINK_API_VERSION >= 0x0a090500" || die "ERROR: Decklink API version must be >= 10.9.5."; } }
 enabled libndi_newtek     && require_headers Processing.NDI.Lib.h
-enabled frei0r            && require_headers frei0r.h
+enabled frei0r            && require_headers "frei0r.h dlfcn.h"
 enabled gmp               && require gmp gmp.h mpz_export -lgmp
 enabled gnutls            && require_pkg_config gnutls gnutls gnutls/gnutls.h gnutls_global_init
 enabled jni               && { [ $target_os = "android" ] && check_headers jni.h && enabled pthreads || die "ERROR: jni not found"; }
-enabled ladspa            && require_headers ladspa.h
+enabled ladspa            && require_headers "ladspa.h dlfcn.h"
 enabled libaom            && require_pkg_config libaom "aom >= 1.0.0" aom/aom_codec.h aom_codec_version
+enabled libaribb24        && { check_pkg_config libaribb24 "aribb24 > 1.0.3" "aribb24/aribb24.h" arib_instance_new ||
+                               { enabled gpl && require_pkg_config libaribb24 aribb24 "aribb24/aribb24.h" arib_instance_new; } ||
+                               die "ERROR: libaribb24 requires version higher than 1.0.3 or --enable-gpl."; }
 enabled lv2               && require_pkg_config lv2 lilv-0 "lilv/lilv.h" lilv_world_new
 enabled libiec61883       && require libiec61883 libiec61883/iec61883.h iec61883_cmp_connect -lraw1394 -lavc1394 -lrom1394 -liec61883
 enabled libass            && require_pkg_config libass libass ass/ass.h ass_library_init
@@ -6063,7 +6142,8 @@ enabled libcelt           && require libcelt celt/celt.h celt_decode -lcelt0 &&
                                die "ERROR: libcelt must be installed and version must be >= 0.11.0."; }
 enabled libcaca           && require_pkg_config libcaca caca caca.h caca_create_canvas
 enabled libcodec2         && require libcodec2 codec2/codec2.h codec2_create -lcodec2
-enabled libdavs2          && require_pkg_config libdavs2 "davs2 >= 1.5.115" davs2.h davs2_decoder_open
+enabled libdav1d          && require_pkg_config libdav1d "dav1d >= 0.2.1" "dav1d/dav1d.h" dav1d_version
+enabled libdavs2          && require_pkg_config libdavs2 "davs2 >= 1.6.0" davs2.h davs2_decoder_open
 enabled libdc1394         && require_pkg_config libdc1394 libdc1394-2 dc1394/dc1394.h dc1394_new
 enabled libdrm            && require_pkg_config libdrm libdrm xf86drm.h drmGetVersion
 enabled libfdk_aac        && { check_pkg_config libfdk_aac fdk-aac "fdk-aac/aacenc_lib.h" aacEncOpen ||
@@ -6146,21 +6226,19 @@ enabled libvorbis         && require_pkg_config libvorbis vorbis vorbis/codec.h
 enabled libvpx            && {
     enabled libvpx_vp8_decoder && {
         check_pkg_config libvpx_vp8_decoder "vpx >= 1.4.0" "vpx/vpx_decoder.h vpx/vp8dx.h" vpx_codec_vp8_dx ||
-            check_lib libvpx_vp8_decoder "vpx/vpx_decoder.h vpx/vp8dx.h" "vpx_codec_dec_init_ver VPX_IMG_FMT_HIGHBITDEPTH" -lvpx ||
-                die "ERROR: libvpx decoder version must be >=1.4.0";
+            check_lib libvpx_vp8_decoder "vpx/vpx_decoder.h vpx/vp8dx.h" "vpx_codec_vp8_dx VPX_IMG_FMT_HIGHBITDEPTH" "-lvpx $libm_extralibs $pthreads_extralibs"
     }
     enabled libvpx_vp8_encoder && {
         check_pkg_config libvpx_vp8_encoder "vpx >= 1.4.0" "vpx/vpx_encoder.h vpx/vp8cx.h" vpx_codec_vp8_cx ||
-            check_lib libvpx_vp8_encoder "vpx/vpx_encoder.h vpx/vp8cx.h" "vpx_codec_enc_init_ver VPX_IMG_FMT_HIGHBITDEPTH" -lvpx ||
-                die "ERROR: libvpx encoder version must be >=1.4.0";
+            check_lib libvpx_vp8_encoder "vpx/vpx_encoder.h vpx/vp8cx.h" "vpx_codec_vp8_cx VPX_IMG_FMT_HIGHBITDEPTH" "-lvpx $libm_extralibs $pthreads_extralibs"
     }
     enabled libvpx_vp9_decoder && {
         check_pkg_config libvpx_vp9_decoder "vpx >= 1.4.0" "vpx/vpx_decoder.h vpx/vp8dx.h" vpx_codec_vp9_dx ||
-            check_lib libvpx_vp9_decoder "vpx/vpx_decoder.h vpx/vp8dx.h" "vpx_codec_vp9_dx VPX_IMG_FMT_HIGHBITDEPTH" "-lvpx $libm_extralibs"
+            check_lib libvpx_vp9_decoder "vpx/vpx_decoder.h vpx/vp8dx.h" "vpx_codec_vp9_dx VPX_IMG_FMT_HIGHBITDEPTH" "-lvpx $libm_extralibs $pthreads_extralibs"
     }
     enabled libvpx_vp9_encoder && {
         check_pkg_config libvpx_vp9_encoder "vpx >= 1.4.0" "vpx/vpx_encoder.h vpx/vp8cx.h" vpx_codec_vp9_cx ||
-            check_lib libvpx_vp9_encoder "vpx/vpx_encoder.h vpx/vp8cx.h" "vpx_codec_vp9_cx VPX_IMG_FMT_HIGHBITDEPTH" "-lvpx $libm_extralibs"
+            check_lib libvpx_vp9_encoder "vpx/vpx_encoder.h vpx/vp8cx.h" "vpx_codec_vp9_cx VPX_IMG_FMT_HIGHBITDEPTH" "-lvpx $libm_extralibs $pthreads_extralibs"
     }
     if disabled_all libvpx_vp8_decoder libvpx_vp9_decoder libvpx_vp8_encoder libvpx_vp9_encoder; then
         die "libvpx enabled but no supported decoders found"
@@ -6179,7 +6257,7 @@ enabled libx264           && { check_pkg_config libx264 x264 "stdint.h x264.h" x
 enabled libx265           && require_pkg_config libx265 x265 x265.h x265_api_get &&
                              require_cpp_condition libx265 x265.h "X265_BUILD >= 68"
 enabled libxavs           && require libxavs "stdint.h xavs.h" xavs_encoder_encode "-lxavs $pthreads_extralibs $libm_extralibs"
-enabled libxavs2          && require_pkg_config libxavs2 "xavs2 >= 1.2.77" "stdint.h xavs2.h" xavs2_api_get
+enabled libxavs2          && require_pkg_config libxavs2 "xavs2 >= 1.3.0" "stdint.h xavs2.h" xavs2_api_get
 enabled libxvid           && require libxvid xvid.h xvid_global -lxvidcore
 enabled libzimg           && require_pkg_config libzimg "zimg >= 2.7.0" zimg.h zimg_get_api_version
 enabled libzmq            && require_pkg_config libzmq libzmq zmq.h zmq_ctx_new
@@ -6494,6 +6572,7 @@ check_disable_warning -Wno-format-zero-length
 check_disable_warning -Wno-pointer-sign
 check_disable_warning -Wno-unused-const-variable
 check_disable_warning -Wno-bool-operation
+check_disable_warning -Wno-char-subscripts
 
 check_disable_warning_headers(){
     warning_flag=-W${1#-Wno-}
@@ -6509,7 +6588,7 @@ EOF
 
 # add some linker flags
 check_ldflags -Wl,--warn-common
-check_ldflags -Wl,-rpath-link=libpostproc:libswresample:libswscale:libavfilter:libavdevice:libavformat:libavcodec:libavutil:libavresample
+check_ldflags -Wl,-rpath-link=:libpostproc:libswresample:libswscale:libavfilter:libavdevice:libavformat:libavcodec:libavutil:libavresample
 enabled rpath && add_ldexeflags -Wl,-rpath,$libdir && add_ldsoflags -Wl,-rpath,$libdir
 test_ldflags -Wl,-Bsymbolic && append SHFLAGS -Wl,-Bsymbolic
 
@@ -6776,10 +6855,17 @@ check_deps $CONFIG_LIST       \
 enabled threads && ! enabled pthreads && ! enabled atomics_native && die "non pthread threading without atomics not supported, try adding --enable-pthreads or --cpu=i486 or higher if you are on x86"
 enabled avresample && warn "Building with deprecated library libavresample"
 
-if test $target_os = "haiku"; then
+case $target_os in
+haiku)
     disable memalign
     disable posix_memalign
-fi
+    ;;
+*-dos|freedos|opendos)
+    if test_cpp_condition sys/version.h "defined(__DJGPP__) && __DJGPP__ == 2 && __DJGPP_MINOR__ == 5"; then
+        disable memalign
+    fi
+    ;;
+esac
 
 flatten_extralibs(){
     nested_entries=
@@ -6903,7 +6989,8 @@ enabled zoompan_filter      && prepend avfilter_deps "swscale"
 enabled lavfi_indev         && prepend avdevice_deps "avfilter"
 
 #FIXME
-enabled sdl2_outdev     && add_cflags $(filter_out '-Dmain=SDL_main' $sdl2_cflags)
+enabled_any sdl2_outdev opengl_outdev && enabled sdl2 &&
+    add_cflags $(filter_out '-Dmain=SDL_main' $sdl2_cflags)
 
 enabled opus_decoder    && prepend avcodec_deps "swresample"
 
@@ -7066,6 +7153,15 @@ echo "License: $license"
 
 fi # test "$quiet" != "yes"
 
+if test -n "$WARN_IF_GETS_DISABLED_LIST"; then
+    for cfg in $WARN_IF_GETS_DISABLED_LIST; do
+        if disabled $cfg; then
+            varname=${cfg}_disable_reason
+            eval "warn \"Disabled $cfg because \$$varname\""
+        fi
+    done
+fi
+
 if test -n "$WARNINGS"; then
     printf "\n%s%s$WARNINGS%s" "$warn_color" "$bold_color" "$reset_color"
     enabled fatal_warnings && exit 1
@@ -7236,7 +7332,7 @@ cat > $TMPH <<EOF
 #define FFMPEG_CONFIG_H
 #define FFMPEG_CONFIGURATION "$(c_escape $FFMPEG_CONFIGURATION)"
 #define FFMPEG_LICENSE "$(c_escape $license)"
-#define CONFIG_THIS_YEAR 2018
+#define CONFIG_THIS_YEAR 2019
 #define FFMPEG_DATADIR "$(eval c_escape $datadir)"
 #define AVCONV_DATADIR "$(eval c_escape $datadir)"
 #define CC_IDENT "$(c_escape ${cc_ident:-Unknown compiler})"
diff --git a/doc/APIchanges b/doc/APIchanges
index 9e93555dac8e6..784a5e5bd2373 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -15,31 +15,49 @@ libavutil:     2017-10-21
 
 API changes, most recent first:
 
-2018-10-11 - xxxxxxxxxx - lavc 58.33.100 - mediacodec.h
+2019-01-27 - XXXXXXXXXX - lavc 58.46.100 - avcodec.h
+  Add discard_damaged_percentage
+
+2019-01-08 - xxxxxxxxxx - lavu 56.26.100 - frame.h
+  Add AV_FRAME_DATA_REGIONS_OF_INTEREST
+
+2018-12-21 - 2744d6b364 - lavu 56.25.100 - hdr_dynamic_metadata.h
+  Add AV_FRAME_DATA_DYNAMIC_HDR_PLUS enum value, av_dynamic_hdr_plus_alloc(),
+  av_dynamic_hdr_plus_create_side_data() functions, and related structs.
+
+-------- 8< --------- FFmpeg 4.1 was cut here -------- 8< ---------
+
+2018-10-27 - 718044dc19 - lavu 56.21.100 - pixdesc.h
+  Add av_read_image_line2(), av_write_image_line2()
+
+2018-10-24 - f9d4126f28 - lavu 56.20.100 - frame.h
+  Add AV_FRAME_DATA_S12M_TIMECODE
+
+2018-10-11 - f6d48b618a - lavc 58.33.100 - mediacodec.h
   Add av_mediacodec_render_buffer_at_time().
 
-2018-09-09 - xxxxxxxxxx - lavc 58.29.100 - avcodec.h
+2018-09-09 - 35498c124a - lavc 58.29.100 - avcodec.h
   Add AV_PKT_DATA_AFD
 
-2018-08-16 - xxxxxxxxxx - lavc 58.23.100 - avcodec.h
+2018-08-16 - b33f5299a5 - lavc 58.23.100 - avcodec.h
   Add av_bsf_flush().
 
-2018-05-xx - xxxxxxxxxx - lavf 58.15.100 - avformat.h
+2018-05-18 - 2b2f2f65f3 - lavf 58.15.100 - avformat.h
   Add pmt_version field to AVProgram
 
-2018-05-xx - xxxxxxxxxx - lavf 58.14.100 - avformat.h
+2018-05-17 - 5dfeb7f081 - lavf 58.14.100 - avformat.h
   Add AV_DISPOSITION_STILL_IMAGE
 
-2018-05-xx - xxxxxxxxxx - lavu 56.18.101 - hwcontext_cuda.h
+2018-05-10 - c855683427 - lavu 56.18.101 - hwcontext_cuda.h
   Add AVCUDADeviceContext.stream.
 
-2018-04-xx - xxxxxxxxxx - lavu 56.18.100 - pixdesc.h
+2018-04-30 - 56b081da57 - lavu 56.18.100 - pixdesc.h
   Add AV_PIX_FMT_FLAG_ALPHA to AV_PIX_FMT_PAL8.
 
-2018-04-xx - xxxxxxxxxx - lavu 56.17.100 - opt.h
+2018-04-26 - 5be0410cb3 - lavu 56.17.100 - opt.h
   Add AV_OPT_FLAG_DEPRECATED.
 
-2018-04-xx - xxxxxxxxxx - lavu 56.16.100 - threadmessage.h
+2018-04-26 - 71fa82bed6 - lavu 56.16.100 - threadmessage.h
   Add av_thread_message_queue_nb_elems().
 
 -------- 8< --------- FFmpeg 4.0 was cut here -------- 8< ---------
diff --git a/doc/bitstream_filters.texi b/doc/bitstream_filters.texi
index 53470c01ec302..076b910e4053a 100644
--- a/doc/bitstream_filters.texi
+++ b/doc/bitstream_filters.texi
@@ -530,6 +530,72 @@ ffmpeg -i INPUT -c copy -bsf noise[=1] output.mkv
 @section null
 This bitstream filter passes the packets through unchanged.
 
+@section prores_metadata
+
+Modify color property metadata embedded in prores stream.
+
+@table @option
+@item color_primaries
+Set the color primaries.
+Available values are:
+
+@table @samp
+@item auto
+Keep the same color primaries property (default).
+
+@item unknown
+@item bt709
+@item bt470bg
+BT601 625
+
+@item smpte170m
+BT601 525
+
+@item bt2020
+@item smpte431
+DCI P3
+
+@item smpte432
+P3 D65
+
+@end table
+
+@item transfer_characteristics
+Set the color transfer.
+Available values are:
+
+@table @samp
+@item auto
+Keep the same transfer characteristics property (default).
+
+@item unknown
+@item bt709
+BT 601, BT 709, BT 2020
+@end table
+
+
+@item matrix_coefficients
+Set the matrix coefficient.
+Available values are:
+
+@table @samp
+@item auto
+Keep the same transfer characteristics property (default).
+
+@item unknown
+@item bt709
+@item smpte170m
+BT 601
+
+@item bt2020nc
+@end table
+@end table
+
+Set Rec709 colorspace for each frame of the file
+@example
+ffmpeg -i INPUT -c copy -bsf:v prores_metadata=color_primaries=bt709:color_trc=bt709:colorspace=bt709 output.mov
+@end example
+
 @section remove_extra
 
 Remove extradata from packets.
@@ -566,7 +632,12 @@ Log trace output containing all syntax elements in the coded stream
 headers (everything above the level of individual coded blocks).
 This can be useful for debugging low-level stream issues.
 
-Supports H.264, H.265, MPEG-2 and VP9.
+Supports AV1, H.264, H.265, (M)JPEG, MPEG-2 and VP9, but depending
+on the build only a subset of these may be available.
+
+@section truehd_core
+
+Extract the core from a TrueHD stream, dropping ATMOS data.
 
 @section vp9_metadata
 
diff --git a/doc/codecs.texi b/doc/codecs.texi
index 3770f4f53bdad..572e561c1a631 100644
--- a/doc/codecs.texi
+++ b/doc/codecs.texi
@@ -775,8 +775,6 @@ Place global headers at every keyframe instead of in extradata.
 Frame data might be split into multiple chunks.
 @item showall
 Show all frames before the first keyframe.
-@item skiprd
-Deprecated, use mpegvideo private options instead.
 @item export_mvs
 Export motion vectors into frame side-data (see @code{AV_FRAME_DATA_MOTION_VECTORS})
 for codecs that support it. See also @file{doc/examples/export_mvs.c}.
@@ -1232,7 +1230,7 @@ instead of alpha. Default is 0.
 @item dump_separator @var{string} (@emph{input})
 Separator used to separate the fields printed on the command line about the
 Stream parameters.
-For example to separate the fields with newlines and indention:
+For example, to separate the fields with newlines and indentation:
 @example
 ffprobe -dump_separator "
                           "  -i ~/videos/matrixbench_mpeg2.mpg
diff --git a/doc/decoders.texi b/doc/decoders.texi
index 25187e30f1c62..704bd60b9f1a3 100644
--- a/doc/decoders.texi
+++ b/doc/decoders.texi
@@ -194,6 +194,31 @@ without this library.
 @chapter Subtitles Decoders
 @c man begin SUBTILES DECODERS
 
+@section libaribb24
+
+ARIB STD-B24 caption decoder.
+
+Implements profiles A and C of the ARIB STD-B24 standard.
+
+@subsection libaribb24 Decoder Options
+
+@table @option
+
+@item -aribb24-base-path @var{path}
+Sets the base path for the libaribb24 library. This is utilized for reading of
+configuration files (for custom unicode conversions), and for dumping of
+non-text symbols as images under that location.
+
+Unset by default.
+
+@item -aribb24-skip-ruby-text @var{boolean}
+Tells the decoder wrapper to skip text blocks that contain half-height ruby
+text.
+
+Enabled by default.
+
+@end table
+
 @section dvbsub
 
 @subsection Options
diff --git a/doc/demuxers.texi b/doc/demuxers.texi
index aad94eb50e6e3..2b0b37b061b0c 100644
--- a/doc/demuxers.texi
+++ b/doc/demuxers.texi
@@ -481,14 +481,84 @@ ffmpeg -framerate 10 -pattern_type glob -i "*.png" out.mkv
 
 The Game Music Emu library is a collection of video game music file emulators.
 
-See @url{http://code.google.com/p/game-music-emu/} for more information.
+See @url{https://bitbucket.org/mpyne/game-music-emu/overview} for more information.
 
-Some files have multiple tracks. The demuxer will pick the first track by
-default. The @option{track_index} option can be used to select a different
-track. Track indexes start at 0. The demuxer exports the number of tracks as
-@var{tracks} meta data entry.
+It accepts the following options:
+
+@table @option
+
+@item track_index
+Set the index of which track to demux. The demuxer can only export one track.
+Track indexes start at 0. Default is to pick the first track. Number of tracks
+is exported as @var{tracks} metadata entry.
+
+@item sample_rate
+Set the sampling rate of the exported track. Range is 1000 to 999999. Default is 44100.
+
+@item max_size @emph{(bytes)}
+The demuxer buffers the entire file into memory. Adjust this value to set the maximum buffer size,
+which in turn, acts as a ceiling for the size of files that can be read.
+Default is 50 MiB.
+
+@end table
+
+@section libmodplug
+
+ModPlug based module demuxer
+
+See @url{https://github.com/Konstanty/libmodplug}
+
+It will export one 2-channel 16-bit 44.1 kHz audio stream.
+Optionally, a @code{pal8} 16-color video stream can be exported with or without printed metadata.
+
+It accepts the following options:
+
+@table @option
+@item noise_reduction
+Apply a simple low-pass filter. Can be 1 (on) or 0 (off). Default is 0.
+
+@item reverb_depth
+Set amount of reverb. Range 0-100. Default is 0.
+
+@item reverb_delay
+Set delay in ms, clamped to 40-250 ms. Default is 0.
 
-For very large files, the @option{max_size} option may have to be adjusted.
+@item bass_amount
+Apply bass expansion a.k.a. XBass or megabass. Range is 0 (quiet) to 100 (loud). Default is 0.
+
+@item bass_range
+Set cutoff i.e. upper-bound for bass frequencies. Range is 10-100 Hz. Default is 0.
+
+@item surround_depth
+Apply a Dolby Pro-Logic surround effect. Range is 0 (quiet) to 100 (heavy). Default is 0.
+
+@item surround_delay
+Set surround delay in ms, clamped to 5-40 ms. Default is 0.
+
+@item max_size
+The demuxer buffers the entire file into memory. Adjust this value to set the maximum buffer size,
+which in turn, acts as a ceiling for the size of files that can be read. Range is 0 to 100 MiB.
+0 removes buffer size limit (not recommended). Default is 5 MiB.
+
+@item video_stream_expr
+String which is evaluated using the eval API to assign colors to the generated video stream.
+Variables which can be used are @code{x}, @code{y}, @code{w}, @code{h}, @code{t}, @code{speed},
+@code{tempo}, @code{order}, @code{pattern} and @code{row}.
+
+@item video_stream
+Generate video stream. Can be 1 (on) or 0 (off). Default is 0.
+
+@item video_stream_w
+Set video frame width in 'chars' where one char indicates 8 pixels. Range is 20-512. Default is 30.
+
+@item video_stream_h
+Set video frame height in 'chars' where one char indicates 8 pixels. Range is 20-512. Default is 30.
+
+@item video_stream_ptxt
+Print metadata on video stream. Includes @code{speed}, @code{tempo}, @code{order}, @code{pattern},
+@code{row} and @code{ts} (time in ms). Can be 1 (on) or 0 (off). Default is 1.
+
+@end table
 
 @section libopenmpt
 
@@ -662,4 +732,20 @@ Example: convert the captions to a format most players understand:
 ffmpeg -i http://www.ted.com/talks/subtitles/id/1/lang/en talk1-en.srt
 @end example
 
+@section vapoursynth
+
+Vapoursynth wrapper.
+
+Due to security concerns, Vapoursynth scripts will not
+be autodetected so the input format has to be forced. For ff* CLI tools,
+add @code{-f vapoursynth} before the input @code{-i yourscript.vpy}.
+
+This demuxer accepts the following option:
+@table @option
+@item max_script_size
+The demuxer buffers the entire script into memory. Adjust this value to set the maximum buffer size,
+which in turn, acts as a ceiling for the size of scripts that can be read.
+Default is 1 MiB.
+@end table
+
 @c man end DEMUXERS
diff --git a/doc/encoders.texi b/doc/encoders.texi
index 8d184f72f8710..29625ba07c60e 100644
--- a/doc/encoders.texi
+++ b/doc/encoders.texi
@@ -1370,6 +1370,118 @@ makes it possible to store non-rgb pix_fmts.
 
 @end table
 
+@section libaom-av1
+
+libaom AV1 encoder wrapper.
+
+Requires the presence of the libaom headers and library during
+configuration.  You need to explicitly configure the build with
+@code{--enable-libaom}.
+
+@subsection Options
+
+The wrapper supports the following standard libavcodec options:
+
+@table @option
+
+@item b
+Set bitrate target in bits/second.  By default this will use
+variable-bitrate mode.  If @option{maxrate} and @option{minrate} are
+also set to the same value then it will use constant-bitrate mode,
+otherwise if @option{crf} is set as well then it will use
+constrained-quality mode.
+
+@item g keyint_min
+Set key frame placement.  The GOP size sets the maximum distance between
+key frames; if zero the output stream will be intra-only.  The minimum
+distance is ignored unless it is the same as the GOP size, in which case
+key frames will always appear at a fixed interval.  Not set by default,
+so without this option the library has completely free choice about
+where to place key frames.
+
+@item qmin qmax
+Set minimum/maximum quantisation values.  Valid range is from 0 to 63
+(warning: this does not match the quantiser values actually used by AV1
+- divide by four to map real quantiser values to this range).  Defaults
+to min/max (no constraint).
+
+@item minrate maxrate bufsize rc_init_occupancy
+Set rate control buffering parameters.  Not used if not set - defaults
+to unconstrained variable bitrate.
+
+@item threads
+Set the number of threads to use while encoding.  This may require the
+@option{tiles} or @option{row-mt} options to also be set to actually
+use the specified number of threads fully. Defaults to the number of
+hardware threads supported by the host machine.
+
+@item profile
+Set the encoding profile.  Defaults to using the profile which matches
+the bit depth and chroma subsampling of the input.
+
+@end table
+
+The wrapper also has some specific options:
+
+@table @option
+
+@item cpu-used
+Set the quality/encoding speed tradeoff.  Valid range is from 0 to 8,
+higher numbers indicating greater speed and lower quality.  The default
+value is 1, which will be slow and high quality.
+
+@item auto-alt-ref
+Enable use of alternate reference frames.  Defaults to the internal
+default of the library.
+
+@item lag-in-frames
+Set the maximum number of frames which the encoder may keep in flight
+at any one time for lookahead purposes.  Defaults to the internal
+default of the library.
+
+@item error-resilience
+Enable error resilience features:
+@table @option
+@item default
+Improve resilience against losses of whole frames.
+@end table
+Not enabled by default.
+
+@item crf
+Set the quality/size tradeoff for constant-quality (no bitrate target)
+and constrained-quality (with maximum bitrate target) modes. Valid
+range is 0 to 63, higher numbers indicating lower quality and smaller
+output size.  Only used if set; by default only the bitrate target is
+used.
+
+@item static-thresh
+Set a change threshold on blocks below which they will be skipped by
+the encoder.  Defined in arbitrary units as a nonnegative integer,
+defaulting to zero (no blocks are skipped).
+
+@item drop-threshold
+Set a threshold for dropping frames when close to rate control bounds.
+Defined as a percentage of the target buffer - when the rate control
+buffer falls below this percentage, frames will be dropped until it
+has refilled above the threshold.  Defaults to zero (no frames are
+dropped).
+
+@item tiles
+Set the number of tiles to encode the input video with, as columns x
+rows.  Larger numbers allow greater parallelism in both encoding and
+decoding, but may decrease coding efficiency.  Defaults to the minimum
+number of tiles required by the size of the input video (this is 1x1
+(that is, a single tile) for sizes up to and including 4K).
+
+@item tile-columns tile-rows
+Set the number of tiles as log2 of the number of tile rows and columns.
+Provided for compatibility with libvpx/VP9.
+
+@item row-mt (Requires libaom >= 1.0.0-759-g90a15f4f2)
+Enable row based multi-threading. Disabled by default.
+
+@end table
+
 @section libkvazaar
 
 Kvazaar H.265/HEVC encoder.
@@ -1641,6 +1753,7 @@ means unlimited.
 @table @option
 @item auto-alt-ref
 Enable use of alternate reference frames (2-pass only).
+Values greater than 1 enable multi-layer alternate reference frames (VP9 only).
 @item arnr-max-frames
 Set altref noise reduction max frame count.
 @item arnr-type
@@ -1654,6 +1767,38 @@ Set number of frames to look ahead for frametype and ratecontrol.
 @item error-resilient
 Enable error resiliency features.
 
+@item sharpness @var{integer}
+Increase sharpness at the expense of lower PSNR.
+The valid range is [0, 7].
+
+@item VP8-specific options
+@table @option
+@item ts-parameters
+Sets the temporal scalability configuration using a :-separated list of
+key=value pairs. For example, to specify temporal scalability parameters
+with @code{ffmpeg}:
+@example
+ffmpeg -i INPUT -c:v libvpx -ts-parameters ts_number_layers=3:\
+ts_target_bitrate=250000,500000,1000000:ts_rate_decimator=4,2,1:\
+ts_periodicity=4:ts_layer_id=0,2,1,2 OUTPUT
+@end example
+Below is a brief explanation of each of the parameters, please
+refer to @code{struct vpx_codec_enc_cfg} in @code{vpx/vpx_encoder.h} for more
+details.
+@table @option
+@item ts_number_layers
+Number of temporal coding layers.
+@item ts_target_bitrate
+Target bitrate for each temporal layer.
+@item ts_rate_decimator
+Frame rate decimation factor for each temporal layer.
+@item ts_periodicity
+Length of the sequence defining frame temporal layer membership.
+@item ts_layer_id
+Template defining the membership of frames to temporal layers.
+@end table
+@end table
+
 @item VP9-specific options
 @table @option
 @item lossless
@@ -1692,6 +1837,8 @@ Corpus VBR mode is a variant of standard VBR where the complexity distribution
 midpoint is passed in rather than calculated for a specific clip or chunk.
 
 The valid range is [0, 10000]. 0 (default) uses standard VBR.
+@item enable-tpl @var{boolean}
+Enable temporal dependency model.
 @end table
 
 @end table
@@ -2180,6 +2327,63 @@ ffmpeg -i input -c:v libx265 -x265-params crf=26:psy-rd=1 output.mp4
 @end example
 @end table
 
+@section libxavs2
+
+xavs2 AVS2-P2/IEEE1857.4 encoder wrapper.
+
+This encoder requires the presence of the libxavs2 headers and library
+during configuration. You need to explicitly configure the build with
+@option{--enable-libxavs2}.
+
+The following standard libavcodec options are used:
+@itemize
+@item
+@option{b} / @option{bit_rate}
+@item
+@option{g} / @option{gop_size}
+@item
+@option{bf} / @option{max_b_frames}
+@end itemize
+
+The encoder also has its own specific options:
+@subsection Options
+
+@table @option
+@item lcu_row_threads
+Set the number of parallel threads for rows from 1 to 8 (default 5).
+
+@item initial_qp
+Set the xavs2 quantization parameter from 1 to 63 (default 34). This is
+used to set the initial qp for the first frame.
+
+@item qp
+Set the xavs2 quantization parameter from 1 to 63 (default 34). This is
+used to set the qp value under constant-QP mode.
+
+@item max_qp
+Set the max qp for rate control from 1 to 63 (default 55).
+
+@item min_qp
+Set the min qp for rate control from 1 to 63 (default 20).
+
+@item speed_level
+Set the Speed level from 0 to 9 (default 0). Higher is better but slower.
+
+@item log_level
+Set the log level from -1 to 3 (default 0). -1: none, 0: error,
+1: warning, 2: info, 3: debug.
+
+@item xavs2-params
+Set xavs2 options using a list of @var{key}=@var{value} couples separated
+by ":".
+
+For example to specify libxavs2 encoding options with @option{-xavs2-params}:
+
+@example
+ffmpeg -i input -c:v libxavs2 -xavs2-params RdoqLevel=0 output.avs2
+@end example
+@end table
+
 @section libxvid
 
 Xvid MPEG-4 Part 2 encoder wrapper.
@@ -2598,18 +2802,53 @@ Size / quality tradeoff: higher values are smaller / worse quality.
 @option{b_qfactor} / @option{b_quant_factor}
 @item
 @option{b_qoffset} / @option{b_quant_offset}
+@item
+@option{slices}
 @end itemize
 
 All encoders support the following options:
-@itemize
-@item
-@option{low_power}
-
+@table @option
+@item low_power
 Some drivers/platforms offer a second encoder for some codecs intended to use
 less power than the default encoder; setting this option will attempt to use
 that encoder.  Note that it may support a reduced feature set, so some other
 options may not be available in this mode.
-@end itemize
+
+@item idr_interval
+Set the number of normal intra frames between full-refresh (IDR) frames in
+open-GOP mode.  The intra frames are still IRAPs, but will not include global
+headers and may have non-decodable leading pictures.
+
+@item b_depth
+Set the B-frame reference depth.  When set to one (the default), all B-frames
+will refer only to P- or I-frames.  When set to greater values multiple layers
+of B-frames will be present, frames in each layer only referring to frames in
+higher layers.
+
+@item rc_mode
+Set the rate control mode to use.  A given driver may only support a subset of
+modes.
+
+Possible modes:
+@table @option
+@item auto
+Choose the mode automatically based on driver support and the other options.
+This is the default.
+@item CQP
+Constant-quality.
+@item CBR
+Constant-bitrate.
+@item VBR
+Variable-bitrate.
+@item ICQ
+Intelligent constant-quality.
+@item QVBR
+Quality-defined variable-bitrate.
+@item AVBR
+Average variable bitrate.
+@end table
+
+@end table
 
 Each encoder also has its own specific options:
 @table @option
@@ -2789,52 +3028,6 @@ Reduces detail but attempts to preserve color at extremely low bitrates.
 
 @end table
 
-@section libxavs2
-
-xavs2 AVS2-P2/IEEE1857.4 encoder wrapper.
-
-This encoder requires the presence of the libxavs2 headers and library
-during configuration. You need to explicitly configure the build with
-@option{--enable-libxavs2}.
-
-@subsection Options
-
-@table @option
-@item lcu_row_threads
-Set the number of parallel threads for rows from 1 to 8 (default 5).
-
-@item initial_qp
-Set the xavs2 quantization parameter from 1 to 63 (default 34). This is
-used to set the initial qp for the first frame.
-
-@item qp
-Set the xavs2 quantization parameter from 1 to 63 (default 34). This is
-used to set the qp value under constant-QP mode.
-
-@item max_qp
-Set the max qp for rate control from 1 to 63 (default 55).
-
-@item min_qp
-Set the min qp for rate control from 1 to 63 (default 20).
-
-@item speed_level
-Set the Speed level from 0 to 9 (default 0). Higher is better but slower.
-
-@item log_level
-Set the log level from -1 to 3 (default 0). -1: none, 0: error,
-1: warning, 2: info, 3: debug.
-
-@item xavs2-params
-Set xavs2 options using a list of @var{key}=@var{value} couples separated
-by ":".
-
-For example to specify libxavs2 encoding options with @option{-xavs2-params}:
-
-@example
-ffmpeg -i input -c:v libxavs2 -xavs2-params preset_level=5 output.avs2
-@end example
-@end table
-
 @c man end VIDEO ENCODERS
 
 @chapter Subtitles Encoders
diff --git a/doc/examples/Makefile b/doc/examples/Makefile
index 928ff306b356e..2935424e545af 100644
--- a/doc/examples/Makefile
+++ b/doc/examples/Makefile
@@ -37,7 +37,7 @@ $(EXAMPLES_G): %$(PROGSSUF)_g$(EXESUF): %.o
 examples: $(EXAMPLES)
 
 $(EXAMPLES:%$(PROGSSUF)$(EXESUF)=%.o): | doc/examples
-OBJDIRS += doc/examples
+OUTDIRS += doc/examples
 
 DOXY_INPUT += $(EXAMPLES:%$(PROGSSUF)$(EXESUF)=%.c)
 
diff --git a/doc/faq.texi b/doc/faq.texi
index 73624c647e501..8b165eb436d5b 100644
--- a/doc/faq.texi
+++ b/doc/faq.texi
@@ -76,7 +76,7 @@ the gcc developers. Note that we will not add workarounds for gcc bugs.
 
 Also note that (some of) the gcc developers believe this is not a bug or
 not a bug they should fix:
-@url{http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11203}.
+@url{https://gcc.gnu.org/bugzilla/show_bug.cgi?id=11203}.
 Then again, some of them do not know the difference between an undecidable
 problem and an NP-hard problem...
 
@@ -257,13 +257,13 @@ default.
 @section Which are good parameters for encoding high quality MPEG-4?
 
 '-mbd rd -flags +mv4+aic -trellis 2 -cmp 2 -subcmp 2 -g 300 -pass 1/2',
-things to try: '-bf 2', '-flags qprd', '-flags mv0', '-flags skiprd'.
+things to try: '-bf 2', '-mpv_flags qp_rd', '-mpv_flags mv0', '-mpv_flags skip_rd'.
 
 @section Which are good parameters for encoding high quality MPEG-1/MPEG-2?
 
 '-mbd rd -trellis 2 -cmp 2 -subcmp 2 -g 100 -pass 1/2'
 but beware the '-g 100' might cause problems with some decoders.
-Things to try: '-bf 2', '-flags qprd', '-flags mv0', '-flags skiprd.
+Things to try: '-bf 2', '-mpv_flags qp_rd', '-mpv_flags mv0', '-mpv_flags skip_rd'.
 
 @section Interlaced video looks very bad when encoded with ffmpeg, what is wrong?
 
@@ -516,7 +516,7 @@ in the ffmpeg invocation. This is effective whether you run ffmpeg in a shell
 or invoke ffmpeg in its own process via an operating system API.
 
 As an alternative, when you are running ffmpeg in a shell, you can redirect
-standard input to @code{/dev/null} (on Linux and Mac OS)
+standard input to @code{/dev/null} (on Linux and macOS)
 or @code{NUL} (on Windows). You can do this redirect either
 on the ffmpeg invocation, or from a shell script which calls ffmpeg.
 
@@ -526,7 +526,7 @@ For example:
 ffmpeg -nostdin -i INPUT OUTPUT
 @end example
 
-or (on Linux, Mac OS, and other UNIX-like shells):
+or (on Linux, macOS, and other UNIX-like shells):
 
 @example
 ffmpeg -i INPUT OUTPUT </dev/null
@@ -601,7 +601,7 @@ No. These tools are too bloated and they complicate the build.
 FFmpeg is already organized in a highly modular manner and does not need to
 be rewritten in a formal object language. Further, many of the developers
 favor straight C; it works for them. For more arguments on this matter,
-read @uref{http://www.tux.org/lkml/#s15, "Programming Religion"}.
+read @uref{https://web.archive.org/web/20111004021423/http://kernel.org/pub/linux/docs/lkml/#s15, "Programming Religion"}.
 
 @section Why are the ffmpeg programs devoid of debugging symbols?
 
diff --git a/doc/fate.texi b/doc/fate.texi
index a35299423065b..2be61d639c9bb 100644
--- a/doc/fate.texi
+++ b/doc/fate.texi
@@ -155,6 +155,8 @@ space on each client, network bandwidth and so on benefit from smaller test case
 Also keep in mind older checkouts use existing sample files, that means in
 practice generally do not replace, remove or overwrite files as it likely would
 break older checkouts or releases.
+Also all needed samples for a commit should be uploaded, ideally 24
+hours, before the push.
 
 @example
 #First update your local samples copy:
diff --git a/doc/ffmpeg.texi b/doc/ffmpeg.texi
index 3717f22d4202c..8a36a9699de4d 100644
--- a/doc/ffmpeg.texi
+++ b/doc/ffmpeg.texi
@@ -614,8 +614,13 @@ they do not conflict with the standard, as in:
 ffmpeg -i myfile.avi -target vcd -bf 2 /tmp/vcd.mpg
 @end example
 
-@item -dn (@emph{output})
-Disable data recording. For full manual control see the @code{-map}
+@item -dn (@emph{input/output})
+As an input option, blocks all data streams of a file from being filtered or
+being automatically selected or mapped for any output. See @code{-discard}
+option to disable streams individually.
+
+As an output option, disables data recording i.e. automatic selection or
+mapping of any data stream. For full manual control see the @code{-map}
 option.
 
 @item -dframes @var{number} (@emph{output})
@@ -775,8 +780,13 @@ If used together with @option{-vcodec copy}, it will affect the aspect ratio
 stored at container level, but not the aspect ratio stored in encoded
 frames, if it exists.
 
-@item -vn (@emph{output})
-Disable video recording. For full manual control see the @code{-map}
+@item -vn (@emph{input/output})
+As an input option, blocks all video streams of a file from being filtered or
+being automatically selected or mapped for any output. See @code{-discard}
+option to disable streams individually.
+
+As an output option, disables video recording i.e. automatic selection or
+mapping of any video stream. For full manual control see the @code{-map}
 option.
 
 @item -vcodec @var{codec} (@emph{output})
@@ -1089,8 +1099,13 @@ Set the number of audio channels. For output streams it is set by
 default to the number of input audio channels. For input streams
 this option only makes sense for audio grabbing devices and raw demuxers
 and is mapped to the corresponding demuxer options.
-@item -an (@emph{output})
-Disable audio recording. For full manual control see the @code{-map}
+@item -an (@emph{input/output})
+As an input option, blocks all audio streams of a file from being filtered or
+being automatically selected or mapped for any output. See @code{-discard}
+option to disable streams individually.
+
+As an output option, disables audio recording i.e. automatic selection or
+mapping of any audio stream. For full manual control see the @code{-map}
 option.
 @item -acodec @var{codec} (@emph{input/output})
 Set the audio codec. This is an alias for @code{-codec:a}.
@@ -1125,8 +1140,13 @@ stereo but not 6 channels as 5.1. The default is to always try to guess. Use
 @table @option
 @item -scodec @var{codec} (@emph{input/output})
 Set the subtitle codec. This is an alias for @code{-codec:s}.
-@item -sn (@emph{output})
-Disable subtitle recording. For full manual control see the @code{-map}
+@item -sn (@emph{input/output})
+As an input option, blocks all subtitle streams of a file from being filtered or
+being automatically selected or mapped for any output. See @code{-discard}
+option to disable streams individually.
+
+As an output option, disables subtitle recording i.e. automatic selection or
+mapping of any subtitle stream. For full manual control see the @code{-map}
 option.
 @item -sbsf @var{bitstream_filter}
 Deprecated, see -bsf
@@ -1620,8 +1640,10 @@ This allows dumping sdp information when at least one output isn't an
 rtp stream. (Requires at least one of the output formats to be rtp).
 
 @item -discard (@emph{input})
-Allows discarding specific streams or frames of streams at the demuxer.
-Not all demuxers support this.
+Allows discarding specific streams or frames from streams.
+Any input stream can be fully discarded, using value @code{all} whereas
+selective discarding of frames from a stream occurs at the demuxer
+and is not supported by all demuxers.
 
 @table @option
 @item none
diff --git a/doc/ffplay.texi b/doc/ffplay.texi
index 99e1d7468a199..c305465078817 100644
--- a/doc/ffplay.texi
+++ b/doc/ffplay.texi
@@ -195,6 +195,12 @@ input as soon as possible. Enabled by default for realtime streams, where data
 may be dropped if not read in time. Use this option to enable infinite buffers
 for all inputs, use @option{-noinfbuf} to disable it.
 
+@item -filter_threads @var{nb_threads}
+Defines how many threads are used to process a filter pipeline. Each pipeline
+will produce a thread pool with this many threads available for parallel
+processing. The default is 0 which means that the thread count will be
+determined by the number of available CPUs.
+
 @end table
 
 @section While playing
diff --git a/doc/ffprobe.texi b/doc/ffprobe.texi
index be0539feb2388..28371cee5924e 100644
--- a/doc/ffprobe.texi
+++ b/doc/ffprobe.texi
@@ -425,7 +425,7 @@ The @code{csv} writer is equivalent to @code{compact}, but supports
 different defaults.
 
 Each section is printed on a single line.
-If no option is specifid, the output has the form:
+If no option is specified, the output has the form:
 @example
 section|key1=val1| ... |keyN=valN
 @end example
@@ -591,7 +591,7 @@ This option automatically sets @option{fully_qualified} to 1.
 @end table
 
 For more information about the XML format, see
-@url{http://www.w3.org/XML/}.
+@url{https://www.w3.org/XML/}.
 @c man end WRITERS
 
 @chapter Timecode
diff --git a/doc/fftools-common-opts.texi b/doc/fftools-common-opts.texi
index 84705c0b68a19..e75bec4354773 100644
--- a/doc/fftools-common-opts.texi
+++ b/doc/fftools-common-opts.texi
@@ -34,27 +34,21 @@ Possible forms of stream specifiers are:
 @table @option
 @item @var{stream_index}
 Matches the stream with this index. E.g. @code{-threads:1 4} would set the
-thread count for the second stream to 4.
-@item @var{stream_type}[:@var{stream_index}]
+thread count for the second stream to 4. If @var{stream_index} is used as an
+additional stream specifier (see below), then it selects stream number
+@var{stream_index} from the matching streams.
+@item @var{stream_type}[:@var{additional_stream_specifier}]
 @var{stream_type} is one of following: 'v' or 'V' for video, 'a' for audio, 's'
 for subtitle, 'd' for data, and 't' for attachments. 'v' matches all video
 streams, 'V' only matches video streams which are not attached pictures, video
-thumbnails or cover arts.  If @var{stream_index} is given, then it matches
-stream number @var{stream_index} of this type. Otherwise, it matches all
-streams of this type.
-@item p:@var{program_id}[:@var{stream_index}] or p:@var{program_id}[:@var{stream_type}[:@var{stream_index}]] or
-p:@var{program_id}:m:@var{key}[:@var{value}]
-In first version, if @var{stream_index} is given, then it matches the stream with number @var{stream_index}
-in the program with the id @var{program_id}. Otherwise, it matches all streams in the
-program. In the second version, @var{stream_type} is one of following: 'v' for video, 'a' for audio, 's'
-for subtitle, 'd' for data. If @var{stream_index} is also given, then it matches
-stream number @var{stream_index} of this type in the program with the id @var{program_id}.
-Otherwise, if only @var{stream_type} is given, it matches all
-streams of this type in the program with the id @var{program_id}.
-In the third version matches streams in the program with the id @var{program_id} with the metadata
-tag @var{key} having the specified value. If
-@var{value} is not given, matches streams that contain the given tag with any
-value.
+thumbnails or cover arts. If @var{additional_stream_specifier} is used, then
+it matches streams which both have this type and match the
+@var{additional_stream_specifier}. Otherwise, it matches all streams of the
+specified type.
+@item p:@var{program_id}[:@var{additional_stream_specifier}]
+Matches streams which are in the program with the id @var{program_id}. If
+@var{additional_stream_specifier} is used, then it matches streams which both
+are part of the program and match the @var{additional_stream_specifier}.
 
 @item #@var{stream_id} or i:@var{stream_id}
 Match the stream by stream id (e.g. PID in MPEG-TS container).
@@ -245,7 +239,7 @@ Dump full command line and console output to a file named
 @code{@var{program}-@var{YYYYMMDD}-@var{HHMMSS}.log} in the current
 directory.
 This file can be useful for bug reports.
-It also implies @code{-loglevel verbose}.
+It also implies @code{-loglevel debug}.
 
 Setting the environment variable @env{FFREPORT} to any value has the
 same effect. If the value is a ':'-separated key=value sequence, these
@@ -371,7 +365,15 @@ ffmpeg -i input.flac -id3v2_version 3 out.mp3
 @end example
 
 All codec AVOptions are per-stream, and thus a stream specifier
-should be attached to them.
+should be attached to them:
+@example
+ffmpeg -i multichannel.mxf -map 0:v:0 -map 0:a:0 -map 0:a:0 -c:a:0 ac3 -b:a:0 640k -ac:a:1 2 -c:a:1 aac -b:2 128k out.mp4
+@end example
+
+In the above example, a multichannel audio stream is mapped twice for output.
+The first instance is encoded with codec ac3 and bitrate 640k.
+The second instance is downmixed to 2 channels and encoded with codec aac. A bitrate of 128k is specified for it using
+absolute index of the output stream.
 
 Note: the @option{-nooption} syntax cannot be used for boolean
 AVOptions, use @option{-option 0}/@option{-option 1}.
diff --git a/doc/filters.texi b/doc/filters.texi
index cadf78c93cf96..4bf96b6d90808 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -431,7 +431,7 @@ Range is between 0 and 1.
 @end table
 
 @section acontrast
-Simple audio dynamic range commpression/expansion filter.
+Simple audio dynamic range compression/expansion filter.
 
 The filter accepts the following options:
 
@@ -605,8 +605,8 @@ The lower value, the more samples will be detected as impulsive noise.
 @item b
 Set burst fusion, in percentage of window size. Allowed range is @code{0} to
 @code{10}. Default value is @code{2}.
-If any two samples deteced as noise are spaced less than this value then any
-sample inbetween those two samples will be also detected as noise.
+If any two samples detected as noise are spaced less than this value then any
+sample between those two samples will be also detected as noise.
 
 @item m
 Set overlap method.
@@ -683,6 +683,7 @@ Set list of delays in milliseconds for each channel separated by '|'.
 Unused delays will be silently ignored. If number of given delays is
 smaller than number of channels all remaining channels will not be delayed.
 If you want to delay exact number of samples, append 'S' to number.
+If you want instead to delay in seconds, append 's' to number.
 @end table
 
 @subsection Examples
@@ -957,6 +958,8 @@ select double-exponential seat
 select double-exponential sigmoid
 @item losi
 select logistic sigmoid
+@item nofade
+no fade applied
 @end table
 @end table
 
@@ -1072,17 +1075,17 @@ Apply arbitrary expressions to samples in frequency domain.
 @table @option
 @item real
 Set frequency domain real expression for each separate channel separated
-by '|'. Default is "1".
+by '|'. Default is "re".
 If the number of input channels is greater than the number of
 expressions, the last specified expression is used for the remaining
 output channels.
 
 @item imag
 Set frequency domain imaginary expression for each separate channel
-separated by '|'. If not set, @var{real} option is used.
+separated by '|'. Default is "im".
 
 Each expression in @var{real} and @var{imag} can contain the following
-constants:
+constants and functions:
 
 @table @option
 @item sr
@@ -1102,6 +1105,18 @@ number of channels
 
 @item pts
 current frame pts
+
+@item re
+current real part of frequency bin of current channel
+
+@item im
+current imaginary part of frequency bin of current channel
+
+@item real(b, ch)
+Return the value of real part of frequency bin at location (@var{bin},@var{channel})
+
+@item imag(b, ch)
+Return the value of imaginary part of frequency bin at location (@var{bin},@var{channel})
 @end table
 
 @item win_size
@@ -1139,7 +1154,7 @@ window function will be picked. Default is @code{0.75}.
 @item
 Leave almost only low frequencies in audio:
 @example
-afftfilt="1-clip((b/nb)*b,0,1)"
+afftfilt="'real=re * (1-clip((b/nb)*b,0,1))':imag='im * (1-clip((b/nb)*b,0,1))'"
 @end example
 @end itemize
 
@@ -1153,7 +1168,7 @@ up to 60 seconds long.
 
 It can be used as component for digital crossover filters,
 room equalization, cross talk cancellation, wavefield synthesis,
-auralization, ambiophonics and ambisonics.
+auralization, ambiophonics, ambisonics and spatialization.
 
 This filter uses second stream as FIR coefficients.
 If second stream holds single channel, it will be used
@@ -1205,7 +1220,7 @@ Set max allowed Impulse Response filter duration in seconds. Default is 30 secon
 Allowed range is 0.1 to 60 seconds.
 
 @item response
-Show IR frequency reponse, magnitude and phase in additional video stream.
+Show IR frequency response, magnitude(magenta), phase(green) and group delay(yellow) in additional video stream.
 By default it is disabled.
 
 @item channel
@@ -1214,6 +1229,19 @@ displayed. This option is used only when @var{response} is enabled.
 
 @item size
 Set video stream size. This option is used only when @var{response} is enabled.
+
+@item rate
+Set video stream frame rate. This option is used only when @var{response} is enabled.
+
+@item minp
+Set minimal partition size used for convolution. Default is @var{8192}.
+Allowed range is from @var{8} to @var{32768}.
+Lower values decreases latency at cost of higher CPU usage.
+
+@item maxp
+Set maximal partition size used for convolution. Default is @var{8192}.
+Allowed range is from @var{8} to @var{32768}.
+Lower values may increase CPU usage.
 @end table
 
 @subsection Examples
@@ -1354,7 +1382,7 @@ Z-plane zeros/poles, polar degrees
 
 @item r
 Set kind of processing.
-Can be @code{d} - direct or @code{s} - serial cascading. Defauls is @code{s}.
+Can be @code{d} - direct or @code{s} - serial cascading. Default is @code{s}.
 
 @item e
 Set filtering precision.
@@ -1371,7 +1399,7 @@ single-precision floating-point
 @end table
 
 @item response
-Show IR frequency reponse, magnitude and phase in additional video stream.
+Show IR frequency response, magnitude and phase in additional video stream.
 By default it is disabled.
 
 @item channel
@@ -1397,7 +1425,7 @@ used for all remaining channels.
 
 @itemize
 @item
-Apply 2 pole elliptic notch at arround 5000Hz for 48000 Hz sample rate:
+Apply 2 pole elliptic notch at around 5000Hz for 48000 Hz sample rate:
 @example
 aiir=k=1:z=7.957584807809675810E-1 -2.575128568908332300 3.674839853930788710 -2.57512875289799137 7.957586296317130880E-1:p=1 -2.86950072432325953 3.63022088054647218 -2.28075678147272232 6.361362326477423500E-1:f=tf:r=d
 @end example
@@ -1724,6 +1752,46 @@ Full filter invocation with asendcmd may look like this:
 asendcmd=c='4.0 anequalizer change 0|f=200|w=50|g=1',anequalizer=...
 @end table
 
+@section anlmdn
+
+Reduce broadband noise in audio samples using Non-Local Means algorithm.
+
+Each sample is adjusted by looking for other samples with similar contexts. This
+context similarity is defined by comparing their surrounding patches of size
+@option{p}. Patches are searched in an area of @option{r} around the sample.
+
+The filter accepts the following options.
+
+@table @option
+@item s
+Set denoising strength. Allowed range is from 0.00001 to 10. Default value is 0.00001.
+
+@item p
+Set patch radius duration. Allowed range is from 1 to 100 milliseconds.
+Default value is 2 milliseconds.
+
+@item r
+Set research radius duration. Allowed range is from 2 to 300 milliseconds.
+Default value is 6 milliseconds.
+
+@item o
+Set the output mode.
+
+It accepts the following values:
+@table @option
+@item i
+Pass input unchanged.
+
+@item o
+Pass noise filtered out.
+
+@item n
+Pass only noise.
+
+Default value is @var{o}.
+@end table
+@end table
+
 @section anull
 
 Pass the audio source unchanged to the output.
@@ -1751,11 +1819,23 @@ Set the minimum total number of samples in the output audio stream. If
 the value is longer than the input audio length, silence is added to
 the end, until the value is reached. This option is mutually exclusive
 with @option{pad_len}.
+
+@item pad_dur
+Specify the duration of samples of silence to add. See
+@ref{time duration syntax,,the Time duration section in the ffmpeg-utils(1) manual,ffmpeg-utils}
+for the accepted syntax. Used only if set to non-zero value.
+
+@item whole_dur
+Specify the minimum total duration in the output audio stream. See
+@ref{time duration syntax,,the Time duration section in the ffmpeg-utils(1) manual,ffmpeg-utils}
+for the accepted syntax. Used only if set to non-zero value. If the value is longer than
+the input audio length, silence is added to the end, until the value is reached.
+This option is mutually exclusive with @option{pad_dur}
 @end table
 
-If neither the @option{pad_len} nor the @option{whole_len} option is
-set, the filter will add silence to the end of the input stream
-indefinitely.
+If neither the @option{pad_len} nor the @option{whole_len} nor @option{pad_dur}
+nor @option{whole_dur} option is set, the filter will add silence to the end of
+the input stream indefinitely.
 
 @subsection Examples
 
@@ -3493,7 +3573,8 @@ Full example using wav files as coefficients with amovie filters for 7.1 downmix
 each amovie filter use stereo file with IR coefficients as input.
 The files give coefficients for each position of virtual loudspeaker:
 @example
-ffmpeg -i input.wav -lavfi-complex "amovie=azi_270_ele_0_DFC.wav[sr],amovie=azi_90_ele_0_DFC.wav[sl],amovie=azi_225_ele_0_DFC.wav[br],amovie=azi_135_ele_0_DFC.wav[bl],amovie=azi_0_ele_0_DFC.wav,asplit[fc][lfe],amovie=azi_35_ele_0_DFC.wav[fl],amovie=azi_325_ele_0_DFC.wav[fr],[a:0][fl][fr][fc][lfe][bl][br][sl][sr]headphone=FL|FR|FC|LFE|BL|BR|SL|SR"
+ffmpeg -i input.wav
+-filter_complex "amovie=azi_270_ele_0_DFC.wav[sr];amovie=azi_90_ele_0_DFC.wav[sl];amovie=azi_225_ele_0_DFC.wav[br];amovie=azi_135_ele_0_DFC.wav[bl];amovie=azi_0_ele_0_DFC.wav,asplit[fc][lfe];amovie=azi_35_ele_0_DFC.wav[fl];amovie=azi_325_ele_0_DFC.wav[fr];[0:a][fl][fr][fc][lfe][bl][br][sl][sr]headphone=FL|FR|FC|LFE|BL|BR|SL|SR"
 output.wav
 @end example
 
@@ -3501,7 +3582,7 @@ output.wav
 Full example using wav files as coefficients with amovie filters for 7.1 downmix,
 but now in @var{multich} @var{hrir} format.
 @example
-ffmpeg -i input.wav -lavfi-complex "amovie=minp.wav[hrirs],[a:0][hrirs]headphone=map=FL|FR|FC|LFE|BL|BR|SL|SR:hrir=multich"
+ffmpeg -i input.wav -filter_complex "amovie=minp.wav[hrirs];[0:a][hrirs]headphone=map=FL|FR|FC|LFE|BL|BR|SL|SR:hrir=multich"
 output.wav
 @end example
 @end itemize
@@ -4469,6 +4550,28 @@ Descriptions with unrecognised channel names are ignored.
 
 @item lfegain
 Set custom gain for LFE channels. Value is in dB. Default is 0.
+
+@item framesize
+Set custom frame size in number of samples. Default is 1024.
+Allowed range is from 1024 to 96000. Only used if option @samp{type}
+is set to @var{freq}.
+
+@item normalize
+Should all IRs be normalized upon importing SOFA file.
+By default is enabled.
+
+@item interpolate
+Should nearest IRs be interpolated with neighbor IRs if exact position
+does not match. By default is disabled.
+
+@item minphase
+Minphase all IRs upon loading of SOFA file. By default is disabled.
+
+@item anglestep
+Set neighbor search angle step. Only used if option @var{interpolate} is enabled.
+
+@item radstep
+Set neighbor search radius step. Only used if option @var{interpolate} is enabled.
 @end table
 
 @subsection Examples
@@ -5370,6 +5473,49 @@ Set number of samples per each frame.
 Set window function to be used when generating FIR coefficients.
 @end table
 
+@section sinc
+
+Generate a sinc kaiser-windowed low-pass, high-pass, band-pass, or band-reject FIR coefficients.
+
+The resulting stream can be used with @ref{afir} filter for filtering the audio signal.
+
+The filter accepts the following options:
+
+@table @option
+@item sample_rate, r
+Set sample rate, default is 44100.
+
+@item nb_samples, n
+Set number of samples per each frame. Default is 1024.
+
+@item hp
+Set high-pass frequency. Default is 0.
+
+@item lp
+Set low-pass frequency. Default is 0.
+If high-pass frequency is lower than low-pass frequency and low-pass frequency
+is higher than 0 then filter will create band-pass filter coefficients,
+otherwise band-reject filter coefficients.
+
+@item phase
+Set filter phase response. Default is 50. Allowed range is from 0 to 100.
+
+@item beta
+Set Kaiser window beta.
+
+@item att
+Set stop-band attenuation. Default is 120dB, allowed range is from 40 to 180 dB.
+
+@item round
+Enable rounding, by default is disabled.
+
+@item hptaps
+Set number of taps for high-pass filter.
+
+@item lptaps
+Set number of taps for low-pass filter.
+@end table
+
 @section sine
 
 Generate an audio signal made of a sine wave with amplitude 1/8.
@@ -5519,10 +5665,15 @@ For example radius of 3 will instruct filter to calculate average of 7 frames.
 Set factor to amplify difference. Default is 2. Allowed range is from 0 to 65535.
 
 @item threshold
-Set threshold for difference amplification. Any differrence greater or equal to
+Set threshold for difference amplification. Any difference greater or equal to
 this value will not alter source pixel. Default is 10.
 Allowed range is from 0 to 65535.
 
+@item tolerance
+Set tolerance for difference amplification. Any difference lower to
+this value will not alter source pixel. Default is 0.
+Allowed range is from 0 to 65535.
+
 @item low
 Set lower limit for changing source pixel. Default is 65535. Allowed range is from 0 to 65535.
 This option controls maximum possible value that will decrease source pixel value.
@@ -5904,7 +6055,7 @@ The filter accepts the following options.
 @item sigma
 Set denoising strength. Default value is 1.
 Allowed range is from 0 to 999.9.
-The denoising algorith is very sensitive to sigma, so adjust it
+The denoising algorithm is very sensitive to sigma, so adjust it
 according to the source.
 
 @item block
@@ -6120,6 +6271,26 @@ Only deinterlace frames marked as interlaced.
 The default value is @code{all}.
 @end table
 
+@section chromahold
+Remove all color information for all colors except for certain one.
+
+The filter accepts the following options:
+
+@table @option
+@item color
+The color which will not be replaced with neutral chroma.
+
+@item similarity
+Similarity percentage with the above color.
+0.01 matches only the exact key color, while 1.0 matches everything.
+
+@item yuv
+Signals that the color passed is already in YUV instead of RGB.
+
+Literal colors like "green" or "red" don't make sense with this enabled anymore.
+This can be used to pass exact YUV values as hexadecimal numbers.
+@end table
+
 @section chromakey
 YUV colorspace color/chroma keying.
 
@@ -6165,6 +6336,23 @@ ffmpeg -f lavfi -i color=c=black:s=1280x720 -i video.mp4 -shortest -filter_compl
 @end example
 @end itemize
 
+@section chromashift
+Shift chroma pixels horizontally and/or vertically.
+
+The filter accepts the following options:
+@table @option
+@item cbh
+Set amount to shift chroma-blue horizontally.
+@item cbv
+Set amount to shift chroma-blue vertically.
+@item crh
+Set amount to shift chroma-red horizontally.
+@item crv
+Set amount to shift chroma-red vertically.
+@item edge
+Set edge mode, can be @var{smear}, default, or @var{warp}.
+@end table
+
 @section ciescope
 
 Display CIE color diagram with pixels overlaid onto it.
@@ -7673,6 +7861,30 @@ had noise.
 
 The @code{deconvolve} filter also supports the @ref{framesync} options.
 
+@section dedot
+
+Reduce cross-luminance (dot-crawl) and cross-color (rainbows) from video.
+
+It accepts the following options:
+
+@table @option
+@item m
+Set mode of operation. Can be combination of @var{dotcrawl} for cross-luminance reduction and/or
+@var{rainbows} for cross-color reduction.
+
+@item lt
+Set spatial luma threshold. Lower values increases reduction of cross-luminance.
+
+@item tl
+Set tolerance for temporal luma. Higher values increases reduction of cross-luminance.
+
+@item tc
+Set tolerance for chroma temporal variation. Higher values increases reduction of cross-color.
+
+@item ct
+Set temporal chroma threshold. Lower values increases reduction of cross-color.
+@end table
+
 @section deflate
 
 Apply deflate effect to the video.
@@ -9940,6 +10152,35 @@ Select frame after every @code{step} frames.
 Allowed values are positive integers higher than 0. Default value is @code{1}.
 @end table
 
+@section freezedetect
+
+Detect frozen video.
+
+This filter logs a message and sets frame metadata when it detects that the
+input video has no significant change in content during a specified duration.
+Video freeze detection calculates the mean average absolute difference of all
+the components of video frames and compares it to a noise floor.
+
+The printed times and duration are expressed in seconds. The
+@code{lavfi.freezedetect.freeze_start} metadata key is set on the first frame
+whose timestamp equals or exceeds the detection duration and it contains the
+timestamp of the first frame of the freeze. The
+@code{lavfi.freezedetect.freeze_duration} and
+@code{lavfi.freezedetect.freeze_end} metadata keys are set on the first frame
+after the freeze.
+
+The filter accepts the following options:
+
+@table @option
+@item noise, n
+Set noise tolerance. Can be specified in dB (in case "dB" is appended to the
+specified value) or as a difference ratio between 0 and 1. Default is -60dB, or
+0.001.
+
+@item duration, d
+Set freeze duration until notification (default is 2 seconds).
+@end table
+
 @anchor{frei0r}
 @section frei0r
 
@@ -10046,7 +10287,7 @@ The filter accepts the following options:
 Set horizontal sigma, standard deviation of Gaussian blur. Default is @code{0.5}.
 
 @item steps
-Set number of steps for Gaussian approximation. Defauls is @code{1}.
+Set number of steps for Gaussian approximation. Default is @code{1}.
 
 @item planes
 Set which planes to filter. By default all planes are filtered.
@@ -10058,6 +10299,8 @@ Default is @code{-1}.
 
 @section geq
 
+Apply generic equation to each pixel.
+
 The filter accepts the following options:
 
 @table @option
@@ -10235,6 +10478,63 @@ gradfun=radius=8
 
 @end itemize
 
+@section graphmonitor, agraphmonitor
+Show various filtergraph stats.
+
+With this filter one can debug complete filtergraph.
+Especially issues with links filling with queued frames.
+
+The filter accepts the following options:
+
+@table @option
+@item size, s
+Set video output size. Default is @var{hd720}.
+
+@item opacity, o
+Set video opacity. Default is @var{0.9}. Allowed range is from @var{0} to @var{1}.
+
+@item mode, m
+Set output mode, can be @var{fulll} or @var{compact}.
+In @var{compact} mode only filters with some queued frames have displayed stats.
+
+@item flags, f
+Set flags which enable which stats are shown in video.
+
+Available values for flags are:
+@table @samp
+@item queue
+Display number of queued frames in each link.
+
+@item frame_count_in
+Display number of frames taken from filter.
+
+@item frame_count_out
+Display number of frames given out from filter.
+
+@item pts
+Display current filtered frame pts.
+
+@item time
+Display current filtered frame time.
+
+@item timebase
+Display time base for filter link.
+
+@item format
+Display used format for filter link.
+
+@item size
+Display video size or number of audio channels in case of audio used by filter link.
+
+@item rate
+Display video frame rate or sample rate in case of audio used by filter link.
+@end table
+
+@item rate, r
+Set upper limit for video rate of output stream, Default value is @var{25}.
+This guarantee that output video frame rate will not be higher than this value.
+@end table
+
 @section greyedge
 A color constancy variation filter which estimates scene illumination via grey edge algorithm
 and corrects the scene colors accordingly.
@@ -10256,7 +10556,7 @@ max value instead of calculating Minkowski distance.
 @item sigma
 The standard deviation of Gaussian blur to be applied on the scene. Must be
 chosen in the range [0,1024.0] and default value = 1. floor( @var{sigma} * break_off_sigma(3) )
-can't be euqal to 0 if @var{difford} is greater than 0.
+can't be equal to 0 if @var{difford} is greater than 0.
 @end table
 
 @subsection Examples
@@ -10485,6 +10785,7 @@ A floating point number which specifies chroma temporal strength. It defaults to
 @var{luma_tmp}*@var{chroma_spatial}/@var{luma_spatial}.
 @end table
 
+@anchor{hwdownload}
 @section hwdownload
 
 Download hardware frames to system memory.
@@ -10575,6 +10876,7 @@ ways if there are any additional constraints on that filter's output.
 Do not use it without fully understanding the implications of its use.
 @end table
 
+@anchor{hwupload}
 @section hwupload
 
 Upload system memory frames to hardware surfaces.
@@ -11180,7 +11482,9 @@ Set the file path to be used to store logs.
 Set the format of the log file (xml or json).
 
 @item enable_transform
-Enables transform for computing vmaf.
+This option can enable/disable the @code{score_transform} applied to the final predicted VMAF score,
+if you have specified score_transform option in the input parameter file passed to @code{run_vmaf_training.py}
+Default value: @code{false}
 
 @item phone_model
 Invokes the phone model which will generate VMAF scores higher than in the
@@ -11219,7 +11523,7 @@ ffmpeg -i main.mpg -i ref.mpg -lavfi libvmaf -f null -
 
 Example with options:
 @example
-ffmpeg -i main.mpg -i ref.mpg -lavfi libvmaf="psnr=1:enable-transform=1" -f null -
+ffmpeg -i main.mpg -i ref.mpg -lavfi libvmaf="psnr=1:log_fmt=json" -f null -
 @end example
 
 @section limiter
@@ -11257,6 +11561,28 @@ Set maximal size in number of frames. Default is 0.
 Set first frame of loop. Default is 0.
 @end table
 
+@subsection Examples
+
+@itemize
+@item
+Loop single first frame infinitely:
+@example
+loop=loop=-1:size=1:start=0
+@end example
+
+@item
+Loop single first frame 10 times:
+@example
+loop=loop=10:size=1:start=0
+@end example
+
+@item
+Loop 10 first frames 5 times:
+@example
+loop=loop=5:size=10:start=0
+@end example
+@end itemize
+
 @section lut1d
 
 Apply a 1D LUT to an input video.
@@ -11283,8 +11609,12 @@ Available values are:
 Use values from the nearest defined point.
 @item linear
 Interpolate values using the linear interpolation.
+@item cosine
+Interpolate values using the cosine interpolation.
 @item cubic
 Interpolate values using the cubic interpolation.
+@item spline
+Interpolate values using the spline interpolation.
 @end table
 @end table
 
@@ -11513,6 +11843,10 @@ set second pixel component expression
 set third pixel component expression
 @item c3
 set fourth pixel component expression, corresponds to the alpha component
+
+@item d
+set output bit depth, only available for @code{lut2} filter. By default is 0,
+which means bit depth is automatically picked from first input format.
 @end table
 
 Each of them specifies the expression to use for computing the lookup table for
@@ -11605,6 +11939,33 @@ copied from first stream.
 By default value 0xf, all planes will be processed.
 @end table
 
+@section maskfun
+Create mask from input video.
+
+For example it is useful to create motion masks after @code{tblend} filter.
+
+This filter accepts the following options:
+
+@table @option
+@item low
+Set low threshold. Any pixel component lower or exact than this value will be set to 0.
+
+@item high
+Set high threshold. Any pixel component higher than this value will be set to max value
+allowed for current pixel format.
+
+@item planes
+Set planes to filter, by default all available planes are filtered.
+
+@item fill
+Fill all frame pixels with this value.
+
+@item sum
+Set max average pixel value for frame. If sum of all pixel components is higher that this
+average, output frame will be completely filled with value set by @var{fill} option.
+Typically useful for scene changes when used in combination with @code{tblend} filter.
+@end table
+
 @section mcdeint
 
 Apply motion-compensation deinterlacing.
@@ -11958,10 +12319,10 @@ The filter accepts the following options.
 
 @table @option
 @item s
-Set denoising strength.
+Set denoising strength. Default is 1.0. Must be in range [1.0, 30.0].
 
 @item p
-Set patch size.
+Set patch size. Default is 7. Must be odd number in range [0, 99].
 
 @item pc
 Same as @option{p} but for chroma planes.
@@ -11969,7 +12330,7 @@ Same as @option{p} but for chroma planes.
 The default value is @var{0} and means automatic.
 
 @item r
-Set research size.
+Set research size. Default is 15. Must be odd number in range [0, 99].
 
 @item rc
 Same as @option{r} but for chroma planes.
@@ -14091,6 +14452,31 @@ trim=end=5,reverse
 @end example
 @end itemize
 
+@section rgbashift
+Shift R/G/B/A pixels horizontally and/or vertically.
+
+The filter accepts the following options:
+@table @option
+@item rh
+Set amount to shift red horizontally.
+@item rv
+Set amount to shift red vertically.
+@item gh
+Set amount to shift green horizontally.
+@item gv
+Set amount to shift green vertically.
+@item bh
+Set amount to shift blue horizontally.
+@item bv
+Set amount to shift blue vertically.
+@item ah
+Set amount to shift alpha horizontally.
+@item av
+Set amount to shift alpha vertically.
+@item edge
+Set edge mode, can be @var{smear}, default, or @var{warp}.
+@end table
+
 @section roberts
 Apply roberts cross operator to input video stream.
 
@@ -14904,59 +15290,185 @@ Mark the frame as progressive.
 @end table
 @end table
 
-@section showinfo
-
-Show a line containing various information for each input video frame.
-The input video is not modified.
+@anchor{setparams}
+@section setparams
 
-The shown line contains a sequence of key/value pairs of the form
-@var{key}:@var{value}.
+Force frame parameter for the output video frame.
 
-The following values are shown in the output:
+The @code{setparams} filter marks interlace and color range for the
+output frames. It does not change the input frame, but only sets the
+corresponding property, which affects how the frame is treated by
+filters/encoders.
 
 @table @option
-@item n
-The (sequential) number of the input frame, starting from 0.
-
-@item pts
-The Presentation TimeStamp of the input frame, expressed as a number of
-time base units. The time base unit depends on the filter input pad.
+@item field_mode
+Available values are:
 
-@item pts_time
-The Presentation TimeStamp of the input frame, expressed as a number of
-seconds.
+@table @samp
+@item auto
+Keep the same field property (default).
 
-@item pos
-The position of the frame in the input stream, or -1 if this information is
-unavailable and/or meaningless (for example in case of synthetic video).
+@item bff
+Mark the frame as bottom-field-first.
 
-@item fmt
-The pixel format name.
+@item tff
+Mark the frame as top-field-first.
 
-@item sar
-The sample aspect ratio of the input frame, expressed in the form
-@var{num}/@var{den}.
+@item prog
+Mark the frame as progressive.
+@end table
 
-@item s
-The size of the input frame. For the syntax of this option, check the
-@ref{video size syntax,,"Video size" section in the ffmpeg-utils manual,ffmpeg-utils}.
+@item range
+Available values are:
 
-@item i
-The type of interlaced mode ("P" for "progressive", "T" for top field first, "B"
-for bottom field first).
+@table @samp
+@item auto
+Keep the same color range property (default).
 
-@item iskey
-This is 1 if the frame is a key frame, 0 otherwise.
+@item unspecified, unknown
+Mark the frame as unspecified color range.
 
-@item type
-The picture type of the input frame ("I" for an I-frame, "P" for a
-P-frame, "B" for a B-frame, or "?" for an unknown type).
-Also refer to the documentation of the @code{AVPictureType} enum and of
-the @code{av_get_picture_type_char} function defined in
-@file{libavutil/avutil.h}.
+@item limited, tv, mpeg
+Mark the frame as limited range.
 
-@item checksum
-The Adler-32 checksum (printed in hexadecimal) of all the planes of the input frame.
+@item full, pc, jpeg
+Mark the frame as full range.
+@end table
+
+@item color_primaries
+Set the color primaries.
+Available values are:
+
+@table @samp
+@item auto
+Keep the same color primaries property (default).
+
+@item bt709
+@item unknown
+@item bt470m
+@item bt470bg
+@item smpte170m
+@item smpte240m
+@item film
+@item bt2020
+@item smpte428
+@item smpte431
+@item smpte432
+@item jedec-p22
+@end table
+
+@item color_trc
+Set the color transfer.
+Available values are:
+
+@table @samp
+@item auto
+Keep the same color trc property (default).
+
+@item bt709
+@item unknown
+@item bt470m
+@item bt470bg
+@item smpte170m
+@item smpte240m
+@item linear
+@item log100
+@item log316
+@item iec61966-2-4
+@item bt1361e
+@item iec61966-2-1
+@item bt2020-10
+@item bt2020-12
+@item smpte2084
+@item smpte428
+@item arib-std-b67
+@end table
+
+@item colorspace
+Set the colorspace.
+Available values are:
+
+@table @samp
+@item auto
+Keep the same colorspace property (default).
+
+@item gbr
+@item bt709
+@item unknown
+@item fcc
+@item bt470bg
+@item smpte170m
+@item smpte240m
+@item ycgco
+@item bt2020nc
+@item bt2020c
+@item smpte2085
+@item chroma-derived-nc
+@item chroma-derived-c
+@item ictcp
+@end table
+@end table
+
+@section showinfo
+
+Show a line containing various information for each input video frame.
+The input video is not modified.
+
+This filter supports the following options:
+
+@table @option
+@item checksum
+Calculate checksums of each plane. By default enabled.
+@end table
+
+The shown line contains a sequence of key/value pairs of the form
+@var{key}:@var{value}.
+
+The following values are shown in the output:
+
+@table @option
+@item n
+The (sequential) number of the input frame, starting from 0.
+
+@item pts
+The Presentation TimeStamp of the input frame, expressed as a number of
+time base units. The time base unit depends on the filter input pad.
+
+@item pts_time
+The Presentation TimeStamp of the input frame, expressed as a number of
+seconds.
+
+@item pos
+The position of the frame in the input stream, or -1 if this information is
+unavailable and/or meaningless (for example in case of synthetic video).
+
+@item fmt
+The pixel format name.
+
+@item sar
+The sample aspect ratio of the input frame, expressed in the form
+@var{num}/@var{den}.
+
+@item s
+The size of the input frame. For the syntax of this option, check the
+@ref{video size syntax,,"Video size" section in the ffmpeg-utils manual,ffmpeg-utils}.
+
+@item i
+The type of interlaced mode ("P" for "progressive", "T" for top field first, "B"
+for bottom field first).
+
+@item iskey
+This is 1 if the frame is a key frame, 0 otherwise.
+
+@item type
+The picture type of the input frame ("I" for an I-frame, "P" for a
+P-frame, "B" for a B-frame, or "?" for an unknown type).
+Also refer to the documentation of the @code{AVPictureType} enum and of
+the @code{av_get_picture_type_char} function defined in
+@file{libavutil/avutil.h}.
+
+@item checksum
+The Adler-32 checksum (printed in hexadecimal) of all the planes of the input frame.
 
 @item plane_checksum
 The Adler-32 checksum (printed in hexadecimal) of each plane of the input frame,
@@ -16385,6 +16897,7 @@ tmix=frames=3:weights="-1 2 -1":scale=1
 @end example
 @end itemize
 
+@anchor{tonemap}
 @section tonemap
 Tone map colors from different dynamic ranges.
 
@@ -16492,6 +17005,46 @@ embedded peak information in display metadata is not reliable or when tone
 mapping from a lower range to a higher range.
 @end table
 
+@section tpad
+
+Temporarily pad video frames.
+
+The filter accepts the following options:
+
+@table @option
+@item start
+Specify number of delay frames before input video stream.
+
+@item stop
+Specify number of padding frames after input video stream.
+Set to -1 to pad indefinitely.
+
+@item start_mode
+Set kind of frames added to beginning of stream.
+Can be either @var{add} or @var{clone}.
+With @var{add} frames of solid-color are added.
+With @var{clone} frames are clones of first frame.
+
+@item stop_mode
+Set kind of frames added to end of stream.
+Can be either @var{add} or @var{clone}.
+With @var{add} frames of solid-color are added.
+With @var{clone} frames are clones of last frame.
+
+@item start_duration, stop_duration
+Specify the duration of the start/stop delay. See
+@ref{time duration syntax,,the Time duration section in the ffmpeg-utils(1) manual,ffmpeg-utils}
+for the accepted syntax.
+These options override @var{start} and @var{stop}.
+
+@item color
+Specify the color of the padded area. For the syntax of this option,
+check the @ref{color syntax,,"Color" section in the ffmpeg-utils
+manual,ffmpeg-utils}.
+
+The default value of @var{color} is "black".
+@end table
+
 @anchor{transpose}
 @section transpose
 
@@ -17211,6 +17764,35 @@ and ones with constant delta pts.
 If there was frames with variable delta, than it will also show min and max delta
 encountered.
 
+@section vibrance
+
+Boost or alter saturation.
+
+The filter accepts the following options:
+@table @option
+@item intensity
+Set strength of boost if positive value or strength of alter if negative value.
+Default is 0. Allowed range is from -2 to 2.
+
+@item rbal
+Set the red balance. Default is 1. Allowed range is from -10 to 10.
+
+@item gbal
+Set the green balance. Default is 1. Allowed range is from -10 to 10.
+
+@item bbal
+Set the blue balance. Default is 1. Allowed range is from -10 to 10.
+
+@item rlum
+Set the red luma coefficient.
+
+@item glum
+Set the green luma coefficient.
+
+@item blum
+Set the blue luma coefficient.
+@end table
+
 @anchor{vignette}
 @section vignette
 
@@ -17587,6 +18169,61 @@ Set the scaling dimension: @code{2} for @code{2xBR}, @code{3} for
 Default is @code{3}.
 @end table
 
+@section xstack
+Stack video inputs into custom layout.
+
+All streams must be of same pixel format.
+
+The filter accept the following option:
+
+@table @option
+@item inputs
+Set number of input streams. Default is 2.
+
+@item layout
+Specify layout of inputs.
+This option requires the desired layout configuration to be explicitly set by the user.
+This sets position of each video input in output. Each input
+is separated by '|'.
+The first number represents the column, and the second number represents the row.
+Numbers start at 0 and are separated by '_'. Optionally one can use wX and hX,
+where X is video input from which to take width or height.
+Multiple values can be used when separated by '+'. In such
+case values are summed together.
+
+@item shortest
+If set to 1, force the output to terminate when the shortest input
+terminates. Default value is 0.
+@end table
+
+@subsection Examples
+
+@itemize
+@item
+Display 4 inputs into 2x2 grid,
+note that if inputs are of different sizes unused gaps might appear,
+as not all of output video is used.
+@example
+xstack=inputs=4:layout=0_0|0_h0|w0_0|w0_h0
+@end example
+
+@item
+Display 4 inputs into 1x4 grid,
+note that if inputs are of different sizes unused gaps might appear,
+as not all of output video is used.
+@example
+xstack=inputs=4:layout=0_0|0_h0|0_h0+h1|0_h0+h1+h2
+@end example
+
+@item
+Display 9 inputs into 3x3 grid,
+note that if inputs are of different sizes unused gaps might appear,
+as not all of output video is used.
+@example
+xstack=inputs=9:layout=w3_0|w3_h0+h2|w3_h0|0_h4|0_0|w3+w1_0|0_h1+h2|w3+w1_h0|w3+w1_h1+h2
+@end example
+@end itemize
+
 @anchor{yadif}
 @section yadif
 
@@ -17596,6 +18233,64 @@ filter").
 It accepts the following parameters:
 
 
+@table @option
+
+@item mode
+The interlacing mode to adopt. It accepts one of the following values:
+
+@table @option
+@item 0, send_frame
+Output one frame for each frame.
+@item 1, send_field
+Output one frame for each field.
+@item 2, send_frame_nospatial
+Like @code{send_frame}, but it skips the spatial interlacing check.
+@item 3, send_field_nospatial
+Like @code{send_field}, but it skips the spatial interlacing check.
+@end table
+
+The default value is @code{send_frame}.
+
+@item parity
+The picture field parity assumed for the input interlaced video. It accepts one
+of the following values:
+
+@table @option
+@item 0, tff
+Assume the top field is first.
+@item 1, bff
+Assume the bottom field is first.
+@item -1, auto
+Enable automatic detection of field parity.
+@end table
+
+The default value is @code{auto}.
+If the interlacing is unknown or the decoder does not export this information,
+top field first will be assumed.
+
+@item deint
+Specify which frames to deinterlace. Accept one of the following
+values:
+
+@table @option
+@item 0, all
+Deinterlace all frames.
+@item 1, interlaced
+Only deinterlace frames marked as interlaced.
+@end table
+
+The default value is @code{all}.
+@end table
+
+@section yadif_cuda
+
+Deinterlace the input video using the @ref{yadif} algorithm, but implemented
+in CUDA so that it can work as part of a GPU accelerated pipeline with nvdec
+and/or nvenc.
+
+It accepts the following parameters:
+
+
 @table @option
 
 @item mode
@@ -18014,6 +18709,585 @@ pixel format "yuv422p" @var{hsub} is 2 and @var{vsub} is 1.
 
 @c man end VIDEO FILTERS
 
+@chapter OpenCL Video Filters
+@c man begin OPENCL VIDEO FILTERS
+
+Below is a description of the currently available OpenCL video filters.
+
+To enable compilation of these filters you need to configure FFmpeg with
+@code{--enable-opencl}.
+
+Running OpenCL filters requires you to initialize a hardware device and to pass that device to all filters in any filter graph.
+@table @option
+
+@item -init_hw_device opencl[=@var{name}][:@var{device}[,@var{key=value}...]]
+Initialise a new hardware device of type @var{opencl} called @var{name}, using the
+given device parameters.
+
+@item -filter_hw_device @var{name}
+Pass the hardware device called @var{name} to all filters in any filter graph.
+
+@end table
+
+For more detailed information see @url{https://www.ffmpeg.org/ffmpeg.html#Advanced-Video-options}
+
+@itemize
+@item
+Example of choosing the first device on the second platform and running avgblur_opencl filter with default parameters on it.
+@example
+-init_hw_device opencl=gpu:1.0 -filter_hw_device gpu -i INPUT -vf "hwupload, avgblur_opencl, hwdownload" OUTPUT
+@end example
+@end itemize
+
+Since OpenCL filters are not able to access frame data in normal memory, all frame data needs to be uploaded(@ref{hwupload}) to hardware surfaces connected to the appropriate device before being used and then downloaded(@ref{hwdownload}) back to normal memory. Note that @ref{hwupload} will upload to a surface with the same layout as the software frame, so it may be necessary to add a @ref{format} filter immediately before to get the input into the right format and @ref{hwdownload} does not support all formats on the output - it may be necessary to insert an additional @ref{format} filter immediately following in the graph to get the output in a supported format.
+
+@section avgblur_opencl
+
+Apply average blur filter.
+
+The filter accepts the following options:
+
+@table @option
+@item sizeX
+Set horizontal radius size.
+Range is @code{[1, 1024]} and default value is @code{1}.
+
+@item planes
+Set which planes to filter. Default value is @code{0xf}, by which all planes are processed.
+
+@item sizeY
+Set vertical radius size. Range is @code{[1, 1024]} and default value is @code{0}. If zero, @code{sizeX} value will be used.
+@end table
+
+@subsection Example
+
+@itemize
+@item
+Apply average blur filter with horizontal and vertical size of 3, setting each pixel of the output to the average value of the 7x7 region centered on it in the input. For pixels on the edges of the image, the region does not extend beyond the image boundaries, and so out-of-range coordinates are not used in the calculations.
+@example
+-i INPUT -vf "hwupload, avgblur_opencl=3, hwdownload" OUTPUT
+@end example
+@end itemize
+
+@section boxblur_opencl
+
+Apply a boxblur algorithm to the input video.
+
+It accepts the following parameters:
+
+@table @option
+
+@item luma_radius, lr
+@item luma_power, lp
+@item chroma_radius, cr
+@item chroma_power, cp
+@item alpha_radius, ar
+@item alpha_power, ap
+
+@end table
+
+A description of the accepted options follows.
+
+@table @option
+@item luma_radius, lr
+@item chroma_radius, cr
+@item alpha_radius, ar
+Set an expression for the box radius in pixels used for blurring the
+corresponding input plane.
+
+The radius value must be a non-negative number, and must not be
+greater than the value of the expression @code{min(w,h)/2} for the
+luma and alpha planes, and of @code{min(cw,ch)/2} for the chroma
+planes.
+
+Default value for @option{luma_radius} is "2". If not specified,
+@option{chroma_radius} and @option{alpha_radius} default to the
+corresponding value set for @option{luma_radius}.
+
+The expressions can contain the following constants:
+@table @option
+@item w
+@item h
+The input width and height in pixels.
+
+@item cw
+@item ch
+The input chroma image width and height in pixels.
+
+@item hsub
+@item vsub
+The horizontal and vertical chroma subsample values. For example, for the
+pixel format "yuv422p", @var{hsub} is 2 and @var{vsub} is 1.
+@end table
+
+@item luma_power, lp
+@item chroma_power, cp
+@item alpha_power, ap
+Specify how many times the boxblur filter is applied to the
+corresponding plane.
+
+Default value for @option{luma_power} is 2. If not specified,
+@option{chroma_power} and @option{alpha_power} default to the
+corresponding value set for @option{luma_power}.
+
+A value of 0 will disable the effect.
+@end table
+
+@subsection Examples
+
+Apply boxblur filter, setting each pixel of the output to the average value of box-radiuses @var{luma_radius}, @var{chroma_radius}, @var{alpha_radius} for each plane respectively. The filter will apply @var{luma_power}, @var{chroma_power}, @var{alpha_power} times onto the corresponding plane. For pixels on the edges of the image, the radius does not extend beyond the image boundaries, and so out-of-range coordinates are not used in the calculations.
+
+@itemize
+@item
+Apply a boxblur filter with the luma, chroma, and alpha radius
+set to 2 and luma, chroma, and alpha power set to 3. The filter will run 3 times with box-radius set to 2 for every plane of the image.
+@example
+-i INPUT -vf "hwupload, boxblur_opencl=luma_radius=2:luma_power=3, hwdownload" OUTPUT
+-i INPUT -vf "hwupload, boxblur_opencl=2:3, hwdownload" OUTPUT
+@end example
+
+@item
+Apply a boxblur filter with luma radius set to 2, luma_power to 1, chroma_radius to 4, chroma_power to 5, alpha_radius to 3 and alpha_power to 7.
+
+For the luma plane, a 2x2 box radius will be run once.
+
+For the chroma plane, a 4x4 box radius will be run 5 times.
+
+For the alpha plane, a 3x3 box radius will be run 7 times.
+@example
+-i INPUT -vf "hwupload, boxblur_opencl=2:1:4:5:3:7, hwdownload" OUTPUT
+@end example
+@end itemize
+
+@section convolution_opencl
+
+Apply convolution of 3x3, 5x5, 7x7 matrix.
+
+The filter accepts the following options:
+
+@table @option
+@item 0m
+@item 1m
+@item 2m
+@item 3m
+Set matrix for each plane.
+Matrix is sequence of 9, 25 or 49 signed numbers.
+Default value for each plane is @code{0 0 0 0 1 0 0 0 0}.
+
+@item 0rdiv
+@item 1rdiv
+@item 2rdiv
+@item 3rdiv
+Set multiplier for calculated value for each plane.
+If unset or 0, it will be sum of all matrix elements.
+The option value must be a float number greater or equal to @code{0.0}. Default value is @code{1.0}.
+
+@item 0bias
+@item 1bias
+@item 2bias
+@item 3bias
+Set bias for each plane. This value is added to the result of the multiplication.
+Useful for making the overall image brighter or darker.
+The option value must be a float number greater or equal to @code{0.0}. Default value is @code{0.0}.
+
+@end table
+
+@subsection Examples
+
+@itemize
+@item
+Apply sharpen:
+@example
+-i INPUT -vf "hwupload, convolution_opencl=0 -1 0 -1 5 -1 0 -1 0:0 -1 0 -1 5 -1 0 -1 0:0 -1 0 -1 5 -1 0 -1 0:0 -1 0 -1 5 -1 0 -1 0, hwdownload" OUTPUT
+@end example
+
+@item
+Apply blur:
+@example
+-i INPUT -vf "hwupload, convolution_opencl=1 1 1 1 1 1 1 1 1:1 1 1 1 1 1 1 1 1:1 1 1 1 1 1 1 1 1:1 1 1 1 1 1 1 1 1:1/9:1/9:1/9:1/9, hwdownload" OUTPUT
+@end example
+
+@item
+Apply edge enhance:
+@example
+-i INPUT -vf "hwupload, convolution_opencl=0 0 0 -1 1 0 0 0 0:0 0 0 -1 1 0 0 0 0:0 0 0 -1 1 0 0 0 0:0 0 0 -1 1 0 0 0 0:5:1:1:1:0:128:128:128, hwdownload" OUTPUT
+@end example
+
+@item
+Apply edge detect:
+@example
+-i INPUT -vf "hwupload, convolution_opencl=0 1 0 1 -4 1 0 1 0:0 1 0 1 -4 1 0 1 0:0 1 0 1 -4 1 0 1 0:0 1 0 1 -4 1 0 1 0:5:5:5:1:0:128:128:128, hwdownload" OUTPUT
+@end example
+
+@item
+Apply laplacian edge detector which includes diagonals:
+@example
+-i INPUT -vf "hwupload, convolution_opencl=1 1 1 1 -8 1 1 1 1:1 1 1 1 -8 1 1 1 1:1 1 1 1 -8 1 1 1 1:1 1 1 1 -8 1 1 1 1:5:5:5:1:0:128:128:0, hwdownload" OUTPUT
+@end example
+
+@item
+Apply emboss:
+@example
+-i INPUT -vf "hwupload, convolution_opencl=-2 -1 0 -1 1 1 0 1 2:-2 -1 0 -1 1 1 0 1 2:-2 -1 0 -1 1 1 0 1 2:-2 -1 0 -1 1 1 0 1 2, hwdownload" OUTPUT
+@end example
+@end itemize
+
+@section dilation_opencl
+
+Apply dilation effect to the video.
+
+This filter replaces the pixel by the local(3x3) maximum.
+
+It accepts the following options:
+
+@table @option
+@item threshold0
+@item threshold1
+@item threshold2
+@item threshold3
+Limit the maximum change for each plane. Range is @code{[0, 65535]} and default value is @code{65535}.
+If @code{0}, plane will remain unchanged.
+
+@item coordinates
+Flag which specifies the pixel to refer to.
+Range is @code{[0, 255]} and default value is @code{255}, i.e. all eight pixels are used.
+
+Flags to local 3x3 coordinates region centered on @code{x}:
+
+    1 2 3
+
+    4 x 5
+
+    6 7 8
+@end table
+
+@subsection Example
+
+@itemize
+@item
+Apply dilation filter with threshold0 set to 30, threshold1 set 40, threshold2 set to 50 and coordinates set to 231, setting each pixel of the output to the local maximum between pixels: 1, 2, 3, 6, 7, 8 of the 3x3 region centered on it in the input. If the difference between input pixel and local maximum is more then threshold of the corresponding plane, output pixel will be set to input pixel + threshold of corresponding plane.
+@example
+-i INPUT -vf "hwupload, dilation_opencl=30:40:50:coordinates=231, hwdownload" OUTPUT
+@end example
+@end itemize
+
+@section erosion_opencl
+
+Apply erosion effect to the video.
+
+This filter replaces the pixel by the local(3x3) minimum.
+
+It accepts the following options:
+
+@table @option
+@item threshold0
+@item threshold1
+@item threshold2
+@item threshold3
+Limit the maximum change for each plane. Range is @code{[0, 65535]} and default value is @code{65535}.
+If @code{0}, plane will remain unchanged.
+
+@item coordinates
+Flag which specifies the pixel to refer to.
+Range is @code{[0, 255]} and default value is @code{255}, i.e. all eight pixels are used.
+
+Flags to local 3x3 coordinates region centered on @code{x}:
+
+    1 2 3
+
+    4 x 5
+
+    6 7 8
+@end table
+
+@subsection Example
+
+@itemize
+@item
+Apply erosion filter with threshold0 set to 30, threshold1 set 40, threshold2 set to 50 and coordinates set to 231, setting each pixel of the output to the local minimum between pixels: 1, 2, 3, 6, 7, 8 of the 3x3 region centered on it in the input. If the difference between input pixel and local minimum is more then threshold of the corresponding plane, output pixel will be set to input pixel - threshold of corresponding plane.
+@example
+-i INPUT -vf "hwupload, erosion_opencl=30:40:50:coordinates=231, hwdownload" OUTPUT
+@end example
+@end itemize
+
+@section overlay_opencl
+
+Overlay one video on top of another.
+
+It takes two inputs and has one output. The first input is the "main" video on which the second input is overlaid.
+This filter requires same memory layout for all the inputs. So, format conversion may be needed.
+
+The filter accepts the following options:
+
+@table @option
+
+@item x
+Set the x coordinate of the overlaid video on the main video.
+Default value is @code{0}.
+
+@item y
+Set the x coordinate of the overlaid video on the main video.
+Default value is @code{0}.
+
+@end table
+
+@subsection Examples
+
+@itemize
+@item
+Overlay an image LOGO at the top-left corner of the INPUT video. Both inputs are yuv420p format.
+@example
+-i INPUT -i LOGO -filter_complex "[0:v]hwupload[a], [1:v]format=yuv420p, hwupload[b], [a][b]overlay_opencl, hwdownload" OUTPUT
+@end example
+@item
+The inputs have same memory layout for color channels , the overlay has additional alpha plane, like INPUT is yuv420p, and the LOGO is yuva420p.
+@example
+-i INPUT -i LOGO -filter_complex "[0:v]hwupload[a], [1:v]format=yuva420p, hwupload[b], [a][b]overlay_opencl, hwdownload" OUTPUT
+@end example
+
+@end itemize
+
+@section prewitt_opencl
+
+Apply the Prewitt operator (@url{https://en.wikipedia.org/wiki/Prewitt_operator}) to input video stream.
+
+The filter accepts the following option:
+
+@table @option
+@item planes
+Set which planes to filter. Default value is @code{0xf}, by which all planes are processed.
+
+@item scale
+Set value which will be multiplied with filtered result.
+Range is @code{[0.0, 65535]} and default value is @code{1.0}.
+
+@item delta
+Set value which will be added to filtered result.
+Range is @code{[-65535, 65535]} and default value is @code{0.0}.
+@end table
+
+@subsection Example
+
+@itemize
+@item
+Apply the Prewitt operator with scale set to 2 and delta set to 10.
+@example
+-i INPUT -vf "hwupload, prewitt_opencl=scale=2:delta=10, hwdownload" OUTPUT
+@end example
+@end itemize
+
+@section roberts_opencl
+Apply the Roberts cross operator (@url{https://en.wikipedia.org/wiki/Roberts_cross}) to input video stream.
+
+The filter accepts the following option:
+
+@table @option
+@item planes
+Set which planes to filter. Default value is @code{0xf}, by which all planes are processed.
+
+@item scale
+Set value which will be multiplied with filtered result.
+Range is @code{[0.0, 65535]} and default value is @code{1.0}.
+
+@item delta
+Set value which will be added to filtered result.
+Range is @code{[-65535, 65535]} and default value is @code{0.0}.
+@end table
+
+@subsection Example
+
+@itemize
+@item
+Apply the Roberts cross operator with scale set to 2 and delta set to 10
+@example
+-i INPUT -vf "hwupload, roberts_opencl=scale=2:delta=10, hwdownload" OUTPUT
+@end example
+@end itemize
+
+@section sobel_opencl
+
+Apply the Sobel operator (@url{https://en.wikipedia.org/wiki/Sobel_operator}) to input video stream.
+
+The filter accepts the following option:
+
+@table @option
+@item planes
+Set which planes to filter. Default value is @code{0xf}, by which all planes are processed.
+
+@item scale
+Set value which will be multiplied with filtered result.
+Range is @code{[0.0, 65535]} and default value is @code{1.0}.
+
+@item delta
+Set value which will be added to filtered result.
+Range is @code{[-65535, 65535]} and default value is @code{0.0}.
+@end table
+
+@subsection Example
+
+@itemize
+@item
+Apply sobel operator with scale set to 2 and delta set to 10
+@example
+-i INPUT -vf "hwupload, sobel_opencl=scale=2:delta=10, hwdownload" OUTPUT
+@end example
+@end itemize
+
+@section tonemap_opencl
+
+Perform HDR(PQ/HLG) to SDR conversion with tone-mapping.
+
+It accepts the following parameters:
+
+@table @option
+@item tonemap
+Specify the tone-mapping operator to be used. Same as tonemap option in @ref{tonemap}.
+
+@item param
+Tune the tone mapping algorithm. same as param option in @ref{tonemap}.
+
+@item desat
+Apply desaturation for highlights that exceed this level of brightness. The
+higher the parameter, the more color information will be preserved. This
+setting helps prevent unnaturally blown-out colors for super-highlights, by
+(smoothly) turning into white instead. This makes images feel more natural,
+at the cost of reducing information about out-of-range colors.
+
+The default value is 0.5, and the algorithm here is a little different from
+the cpu version tonemap currently. A setting of 0.0 disables this option.
+
+@item threshold
+The tonemapping algorithm parameters is fine-tuned per each scene. And a threshold
+is used to detect whether the scene has changed or not. If the distance between
+the current frame average brightness and the current running average exceeds
+a threshold value, we would re-calculate scene average and peak brightness.
+The default value is 0.2.
+
+@item format
+Specify the output pixel format.
+
+Currently supported formats are:
+@table @var
+@item p010
+@item nv12
+@end table
+
+@item range, r
+Set the output color range.
+
+Possible values are:
+@table @var
+@item tv/mpeg
+@item pc/jpeg
+@end table
+
+Default is same as input.
+
+@item primaries, p
+Set the output color primaries.
+
+Possible values are:
+@table @var
+@item bt709
+@item bt2020
+@end table
+
+Default is same as input.
+
+@item transfer, t
+Set the output transfer characteristics.
+
+Possible values are:
+@table @var
+@item bt709
+@item bt2020
+@end table
+
+Default is bt709.
+
+@item matrix, m
+Set the output colorspace matrix.
+
+Possible value are:
+@table @var
+@item bt709
+@item bt2020
+@end table
+
+Default is same as input.
+
+@end table
+
+@subsection Example
+
+@itemize
+@item
+Convert HDR(PQ/HLG) video to bt2020-transfer-characteristic p010 format using linear operator.
+@example
+-i INPUT -vf "format=p010,hwupload,tonemap_opencl=t=bt2020:tonemap=linear:format=p010,hwdownload,format=p010" OUTPUT
+@end example
+@end itemize
+
+@section unsharp_opencl
+
+Sharpen or blur the input video.
+
+It accepts the following parameters:
+
+@table @option
+@item luma_msize_x, lx
+Set the luma matrix horizontal size.
+Range is @code{[1, 23]} and default value is @code{5}.
+
+@item luma_msize_y, ly
+Set the luma matrix vertical size.
+Range is @code{[1, 23]} and default value is @code{5}.
+
+@item luma_amount, la
+Set the luma effect strength.
+Range is @code{[-10, 10]} and default value is @code{1.0}.
+
+Negative values will blur the input video, while positive values will
+sharpen it, a value of zero will disable the effect.
+
+@item chroma_msize_x, cx
+Set the chroma matrix horizontal size.
+Range is @code{[1, 23]} and default value is @code{5}.
+
+@item chroma_msize_y, cy
+Set the chroma matrix vertical size.
+Range is @code{[1, 23]} and default value is @code{5}.
+
+@item chroma_amount, ca
+Set the chroma effect strength.
+Range is @code{[-10, 10]} and default value is @code{0.0}.
+
+Negative values will blur the input video, while positive values will
+sharpen it, a value of zero will disable the effect.
+
+@end table
+
+All parameters are optional and default to the equivalent of the
+string '5:5:1.0:5:5:0.0'.
+
+@subsection Examples
+
+@itemize
+@item
+Apply strong luma sharpen effect:
+@example
+-i INPUT -vf "hwupload, unsharp_opencl=luma_msize_x=7:luma_msize_y=7:luma_amount=2.5, hwdownload" OUTPUT
+@end example
+
+@item
+Apply a strong blur of both luma and chroma parameters:
+@example
+-i INPUT -vf "hwupload, unsharp_opencl=7:7:-2:7:7:-2, hwdownload" OUTPUT
+@end example
+@end itemize
+
+@c man end OPENCL VIDEO FILTERS
+
 @chapter Video Sources
 @c man begin VIDEO SOURCES
 
@@ -18306,7 +19580,7 @@ Set outer coloring mode.
 It shall assume one of following values:
 @table @option
 @item iteration_count
-Set iteration cound mode.
+Set iteration count mode.
 @item normalized_iteration_count
 set normalized iteration count mode.
 @end table
@@ -18884,7 +20158,7 @@ Default is @code{log}.
 
 @item acount
 Set how much frames to accumulate in histogram.
-Defauls is 1. Setting this to -1 accumulates all frames.
+Default is 1. Setting this to -1 accumulates all frames.
 
 @item rheight
 Set histogram ratio of window height.
@@ -20585,6 +21859,7 @@ It accepts the following values:
 @item cauchy
 @item parzen
 @item poisson
+@item bohman
 @end table
 Default is @code{hanning}.
 
@@ -20687,6 +21962,14 @@ each channel is displayed using the cool color scheme
 each channel is displayed using the magma color scheme
 @item green
 each channel is displayed using the green color scheme
+@item viridis
+each channel is displayed using the viridis color scheme
+@item plasma
+each channel is displayed using the plasma color scheme
+@item cividis
+each channel is displayed using the cividis color scheme
+@item terrain
+each channel is displayed using the terrain color scheme
 @end table
 
 Default value is @samp{channel}.
@@ -20743,6 +22026,7 @@ It accepts the following values:
 @item cauchy
 @item parzen
 @item poisson
+@item bohman
 @end table
 
 Default value is @code{hann}.
@@ -20852,6 +22136,14 @@ each channel is displayed using the cool color scheme
 each channel is displayed using the magma color scheme
 @item green
 each channel is displayed using the green color scheme
+@item viridis
+each channel is displayed using the viridis color scheme
+@item plasma
+each channel is displayed using the plasma color scheme
+@item cividis
+each channel is displayed using the cividis color scheme
+@item terrain
+each channel is displayed using the terrain color scheme
 @end table
 Default value is @samp{intensity}.
 
@@ -20906,6 +22198,7 @@ It accepts the following values:
 @item cauchy
 @item parzen
 @item poisson
+@item bohman
 @end table
 Default value is @code{hann}.
 
@@ -21208,7 +22501,7 @@ This filter is primarily created for reversing processed @ref{showspectrum}
 filter outputs, but can synthesize sound from other spectrograms too.
 But in such case results are going to be poor if the phase data is not
 available, because in such cases phase data need to be recreated, usually
-its just recreated from random noise.
+it's just recreated from random noise.
 For best results use gray only output (@code{channel} color mode in
 @ref{showspectrum} filter) and @code{log} scale for magnitude video and
 @code{lin} scale for phase video. To produce phase, for 2nd video, use
diff --git a/doc/formats.texi b/doc/formats.texi
index 4f334e03c7a41..a992506ac1d8e 100644
--- a/doc/formats.texi
+++ b/doc/formats.texi
@@ -211,7 +211,7 @@ is @code{0} (meaning that no offset is applied).
 @item dump_separator @var{string} (@emph{input})
 Separator used to separate the fields printed on the command line about the
 Stream parameters.
-For example to separate the fields with newlines and indention:
+For example, to separate the fields with newlines and indentation:
 @example
 ffprobe -dump_separator "
                           "  -i ~/videos/matrixbench_mpeg2.mpg
@@ -234,30 +234,10 @@ At present, applicable for MPEG-PS and MPEG-TS.
 Format stream specifiers allow selection of one or more streams that
 match specific properties.
 
-Possible forms of stream specifiers are:
-@table @option
-@item @var{stream_index}
-Matches the stream with this index.
-
-@item @var{stream_type}[:@var{stream_index}]
-@var{stream_type} is one of following: 'v' for video, 'a' for audio,
-'s' for subtitle, 'd' for data, and 't' for attachments. If
-@var{stream_index} is given, then it matches the stream number
-@var{stream_index} of this type. Otherwise, it matches all streams of
-this type.
-
-@item p:@var{program_id}[:@var{stream_index}]
-If @var{stream_index} is given, then it matches the stream with number
-@var{stream_index} in the program with the id
-@var{program_id}. Otherwise, it matches all streams in the program.
-
-@item #@var{stream_id}
-Matches the stream by a format-specific ID.
-@end table
-
 The exact semantics of stream specifiers is defined by the
 @code{avformat_match_stream_specifier()} function declared in the
-@file{libavformat/avformat.h} header.
+@file{libavformat/avformat.h} header and documented in the
+@ref{Stream specifiers,,Stream specifiers section in the ffmpeg(1) manual,ffmpeg}.
 
 @ifclear config-writeonly
 @include demuxers.texi
diff --git a/doc/general.texi b/doc/general.texi
index 4983134f7e0ca..fe94c40386a23 100644
--- a/doc/general.texi
+++ b/doc/general.texi
@@ -17,21 +17,85 @@ for more formats. None of them are used by default, their use has to be
 explicitly requested by passing the appropriate flags to
 @command{./configure}.
 
-@section libxavs2
+@section Alliance for Open Media (AOM)
 
-FFmpeg can make use of the xavs2 library for AVS2-P2/IEEE1857.4 video encoding.
+FFmpeg can make use of the AOM library for AV1 decoding and encoding.
 
-Go to @url{https://github.com/pkuvcl/xavs2} and follow the instructions for
-installing the library. Then pass @code{--enable-libxavs2} to configure to
+Go to @url{http://aomedia.org/} and follow the instructions for
+installing the library. Then pass @code{--enable-libaom} to configure to
 enable it.
 
+@section AMD AMF/VCE
+
+FFmpeg can use the AMD Advanced Media Framework library under Windows
+for accelerated H.264 and HEVC encoding on hardware with Video Coding Engine (VCE).
+
+To enable support you must obtain the AMF framework header files from
+@url{https://github.com/GPUOpen-LibrariesAndSDKs/AMF.git}.
+
+Create an @code{AMF/} directory in the system include path.
+Copy the contents of @code{AMF/amf/public/include/} into that directory.
+Then configure FFmpeg with @code{--enable-amf}.
+
+@section AviSynth
+
+FFmpeg can read AviSynth scripts as input. To enable support, pass
+@code{--enable-avisynth} to configure.  The correct headers are
+included in compat/avisynth/, which allows the user to enable support
+without needing to search for these headers themselves.
+
+For Windows, supported AviSynth variants are
+@url{http://avisynth.nl, AviSynth 2.6 RC1 or higher} for 32-bit builds and
+@url{http://avs-plus.net, AviSynth+ r1718 or higher} for 32-bit and 64-bit builds.
+
+For Linux and OS X, the supported AviSynth variant is
+@url{https://github.com/avxsynth/avxsynth, AvxSynth}.
+
 @float NOTE
-libxavs2 is under the GNU Public License Version 2 or later
-(see @url{http://www.gnu.org/licenses/old-licenses/gpl-2.0.html} for
-details), you must upgrade FFmpeg's license to GPL in order to use it.
+There is currently a regression in AviSynth+'s @code{capi.h} header as of
+October 2016, which interferes with the ability for builds of FFmpeg to use
+MSVC-built binaries of AviSynth. Until this is resolved, you can make sure
+a known good version is installed by checking out a version from before
+the regression occurred:
+
+@code{git clone -b MT git://github.com/AviSynth/AviSynthPlus.git @*
+cd AviSynthPlus @*
+git checkout -b oldheader b4f292b4dbfad149697fb65c6a037bb3810813f9 @*
+make install PREFIX=/install/prefix}
 @end float
 
-@section libdavs2
+@float NOTE
+AviSynth and AvxSynth are loaded dynamically.  Distributors can build FFmpeg
+with @code{--enable-avisynth}, and the binaries will work regardless of the
+end user having AviSynth or AvxSynth installed - they'll only need to be
+installed to use AviSynth scripts (obviously).
+@end float
+
+@section Chromaprint
+
+FFmpeg can make use of the Chromaprint library for generating audio fingerprints.
+Pass @code{--enable-chromaprint} to configure to
+enable it. See @url{https://acoustid.org/chromaprint}.
+
+@section codec2
+
+FFmpeg can make use of the codec2 library for codec2 decoding and encoding.
+There is currently no native decoder, so libcodec2 must be used for decoding.
+
+Go to @url{http://freedv.org/}, download "Codec 2 source archive".
+Build and install using CMake. Debian users can install the libcodec2-dev package instead.
+Once libcodec2 is installed you can pass @code{--enable-libcodec2} to configure to enable it.
+
+The easiest way to use codec2 is with .c2 files, since they contain the mode information required for decoding.
+To encode such a file, use a .c2 file extension and give the libcodec2 encoder the -mode option:
+@code{ffmpeg -i input.wav -mode 700C output.c2}.
+Playback is as simple as @code{ffplay output.c2}.
+For a list of supported modes, run @code{ffmpeg -h encoder=libcodec2}.
+Raw codec2 files are also supported.
+To make sense of them the mode in use needs to be specified as a format option:
+@code{ffmpeg -f codec2raw -mode 1300 -i input.raw output.wav}.
+
+@section davs2
 
 FFmpeg can make use of the davs2 library for AVS2-P2/IEEE1857.4 video decoding.
 
@@ -45,21 +109,63 @@ libdavs2 is under the GNU Public License Version 2 or later
 details), you must upgrade FFmpeg's license to GPL in order to use it.
 @end float
 
-@section Alliance for Open Media libaom
+@section Game Music Emu
 
-FFmpeg can make use of the libaom library for AV1 decoding.
+FFmpeg can make use of the Game Music Emu library to read audio from supported video game
+music file formats. Pass @code{--enable-libgme} to configure to
+enable it. See @url{https://bitbucket.org/mpyne/game-music-emu/overview}.
 
-Go to @url{http://aomedia.org/} and follow the instructions for
-installing the library. Then pass @code{--enable-libaom} to configure to
+@section Intel QuickSync Video
+
+FFmpeg can use Intel QuickSync Video (QSV) for accelerated decoding and encoding
+of multiple codecs. To use QSV, FFmpeg must be linked against the @code{libmfx}
+dispatcher, which loads the actual decoding libraries.
+
+The dispatcher is open source and can be downloaded from
+@url{https://github.com/lu-zero/mfx_dispatch.git}. FFmpeg needs to be configured
+with the @code{--enable-libmfx} option and @code{pkg-config} needs to be able to
+locate the dispatcher's @code{.pc} files.
+
+@section Kvazaar
+
+FFmpeg can make use of the Kvazaar library for HEVC encoding.
+
+Go to @url{https://github.com/ultravideo/kvazaar} and follow the
+instructions for installing the library. Then pass
+@code{--enable-libkvazaar} to configure to enable it.
+
+@section LAME
+
+FFmpeg can make use of the LAME library for MP3 encoding.
+
+Go to @url{http://lame.sourceforge.net/} and follow the
+instructions for installing the library.
+Then pass @code{--enable-libmp3lame} to configure to enable it.
+
+@section libilbc
+
+iLBC is a narrowband speech codec that has been made freely available
+by Google as part of the WebRTC project. libilbc is a packaging friendly
+copy of the iLBC codec. FFmpeg can make use of the libilbc library for
+iLBC decoding and encoding.
+
+Go to @url{https://github.com/TimothyGu/libilbc} and follow the instructions for
+installing the library. Then pass @code{--enable-libilbc} to configure to
 enable it.
 
-@section OpenJPEG
+@section libvpx
 
-FFmpeg can use the OpenJPEG libraries for encoding/decoding J2K videos.  Go to
-@url{http://www.openjpeg.org/} to get the libraries and follow the installation
-instructions.  To enable using OpenJPEG in FFmpeg, pass @code{--enable-libopenjpeg} to
-@file{./configure}.
+FFmpeg can make use of the libvpx library for VP8/VP9 decoding and encoding.
+
+Go to @url{http://www.webmproject.org/} and follow the instructions for
+installing the library. Then pass @code{--enable-libvpx} to configure to
+enable it.
 
+@section ModPlug
+
+FFmpeg can make use of this library, originating in Modplug-XMMS, to read from MOD-like music files.
+See @url{https://github.com/Konstanty/libmodplug}. Pass @code{--enable-libmodplug} to configure to
+enable it.
 
 @section OpenCORE, VisualOn, and Fraunhofer libraries
 
@@ -106,13 +212,27 @@ Go to @url{http://sourceforge.net/projects/opencore-amr/} and follow the
 instructions for installing the library.
 Then pass @code{--enable-libfdk-aac} to configure to enable it.
 
-@section LAME
+@section OpenH264
 
-FFmpeg can make use of the LAME library for MP3 encoding.
+FFmpeg can make use of the OpenH264 library for H.264 decoding and encoding.
 
-Go to @url{http://lame.sourceforge.net/} and follow the
-instructions for installing the library.
-Then pass @code{--enable-libmp3lame} to configure to enable it.
+Go to @url{http://www.openh264.org/} and follow the instructions for
+installing the library. Then pass @code{--enable-libopenh264} to configure to
+enable it.
+
+For decoding, this library is much more limited than the built-in decoder
+in libavcodec; currently, this library lacks support for decoding B-frames
+and some other main/high profile features. (It currently only supports
+constrained baseline profile and CABAC.) Using it is mostly useful for
+testing and for taking advantage of Cisco's patent portfolio license
+(@url{http://www.openh264.org/BINARY_LICENSE.txt}).
+
+@section OpenJPEG
+
+FFmpeg can use the OpenJPEG libraries for decoding/encoding J2K videos.  Go to
+@url{http://www.openjpeg.org/} to get the libraries and follow the installation
+instructions.  To enable using OpenJPEG in FFmpeg, pass @code{--enable-libopenjpeg} to
+@file{./configure}.
 
 @section TwoLAME
 
@@ -122,33 +242,18 @@ Go to @url{http://www.twolame.org/} and follow the
 instructions for installing the library.
 Then pass @code{--enable-libtwolame} to configure to enable it.
 
-@section libcodec2 / codec2 general
+@section VapourSynth
 
-FFmpeg can make use of libcodec2 for codec2 encoding and decoding.
-There is currently no native decoder, so libcodec2 must be used for decoding.
+FFmpeg can read VapourSynth scripts as input. To enable support, pass
+@code{--enable-vapoursynth} to configure. Vapoursynth is detected via
+@code{pkg-config}. Versions 42 or greater supported.
+See @url{http://www.vapoursynth.com/}.
 
-Go to @url{http://freedv.org/}, download "Codec 2 source archive".
-Build and install using CMake. Debian users can install the libcodec2-dev package instead.
-Once libcodec2 is installed you can pass @code{--enable-libcodec2} to configure to enable it.
+Due to security concerns, Vapoursynth scripts will not
+be autodetected so the input format has to be forced. For ff* CLI tools,
+add @code{-f vapoursynth} before the input @code{-i yourscript.vpy}.
 
-The easiest way to use codec2 is with .c2 files, since they contain the mode information required for decoding.
-To encode such a file, use a .c2 file extension and give the libcodec2 encoder the -mode option:
-@code{ffmpeg -i input.wav -mode 700C output.c2}.
-Playback is as simple as @code{ffplay output.c2}.
-For a list of supported modes, run @code{ffmpeg -h encoder=libcodec2}.
-Raw codec2 files are also supported.
-To make sense of them the mode in use needs to be specified as a format option:
-@code{ffmpeg -f codec2raw -mode 1300 -i input.raw output.wav}.
-
-@section libvpx
-
-FFmpeg can make use of the libvpx library for VP8/VP9 encoding.
-
-Go to @url{http://www.webmproject.org/} and follow the instructions for
-installing the library. Then pass @code{--enable-libvpx} to configure to
-enable it.
-
-@section libwavpack
+@section WavPack
 
 FFmpeg can make use of the libwavpack library for WavPack encoding.
 
@@ -156,29 +261,6 @@ Go to @url{http://www.wavpack.com/} and follow the instructions for
 installing the library. Then pass @code{--enable-libwavpack} to configure to
 enable it.
 
-@section libxavs
-
-FFmpeg can make use of the libxavs library for Xavs encoding.
-
-Go to @url{http://xavs.sf.net/} and follow the instructions for
-installing the library. Then pass @code{--enable-libxavs} to configure to
-enable it.
-
-@section OpenH264
-
-FFmpeg can make use of the OpenH264 library for H.264 encoding and decoding.
-
-Go to @url{http://www.openh264.org/} and follow the instructions for
-installing the library. Then pass @code{--enable-libopenh264} to configure to
-enable it.
-
-For decoding, this library is much more limited than the built-in decoder
-in libavcodec; currently, this library lacks support for decoding B-frames
-and some other main/high profile features. (It currently only supports
-constrained baseline profile and CABAC.) Using it is mostly useful for
-testing and for taking advantage of Cisco's patent portfolio license
-(@url{http://www.openh264.org/BINARY_LICENSE.txt}).
-
 @section x264
 
 FFmpeg can make use of the x264 library for H.264 encoding.
@@ -207,91 +289,36 @@ x265 is under the GNU Public License Version 2 or later
 details), you must upgrade FFmpeg's license to GPL in order to use it.
 @end float
 
-@section kvazaar
-
-FFmpeg can make use of the kvazaar library for HEVC encoding.
-
-Go to @url{https://github.com/ultravideo/kvazaar} and follow the
-instructions for installing the library. Then pass
-@code{--enable-libkvazaar} to configure to enable it.
+@section xavs
 
-@section libilbc
+FFmpeg can make use of the xavs library for AVS encoding.
 
-iLBC is a narrowband speech codec that has been made freely available
-by Google as part of the WebRTC project. libilbc is a packaging friendly
-copy of the iLBC codec. FFmpeg can make use of the libilbc library for
-iLBC encoding and decoding.
-
-Go to @url{https://github.com/TimothyGu/libilbc} and follow the instructions for
-installing the library. Then pass @code{--enable-libilbc} to configure to
+Go to @url{http://xavs.sf.net/} and follow the instructions for
+installing the library. Then pass @code{--enable-libxavs} to configure to
 enable it.
 
-@section libzvbi
+@section xavs2
 
-libzvbi is a VBI decoding library which can be used by FFmpeg to decode DVB
-teletext pages and DVB teletext subtitles.
+FFmpeg can make use of the xavs2 library for AVS2-P2/IEEE1857.4 video encoding.
 
-Go to @url{http://sourceforge.net/projects/zapping/} and follow the instructions for
-installing the library. Then pass @code{--enable-libzvbi} to configure to
+Go to @url{https://github.com/pkuvcl/xavs2} and follow the instructions for
+installing the library. Then pass @code{--enable-libxavs2} to configure to
 enable it.
 
-@section AviSynth
-
-FFmpeg can read AviSynth scripts as input. To enable support, pass
-@code{--enable-avisynth} to configure.  The correct headers are
-included in compat/avisynth/, which allows the user to enable support
-without needing to search for these headers themselves.
-
-For Windows, supported AviSynth variants are
-@url{http://avisynth.nl, AviSynth 2.6 RC1 or higher} for 32-bit builds and
-@url{http://avs-plus.net, AviSynth+ r1718 or higher} for 32-bit and 64-bit builds.
-
-For Linux and OS X, the supported AviSynth variant is
-@url{https://github.com/avxsynth/avxsynth, AvxSynth}.
-
-@float NOTE
-There is currently a regression in AviSynth+'s @code{capi.h} header as of
-October 2016, which interferes with the ability for builds of FFmpeg to use
-MSVC-built binaries of AviSynth. Until this is resolved, you can make sure
-a known good version is installed by checking out a version from before
-the regression occurred:
-
-@code{git clone -b MT git://github.com/AviSynth/AviSynthPlus.git @*
-cd AviSynthPlus @*
-git checkout -b oldheader b4f292b4dbfad149697fb65c6a037bb3810813f9 @*
-make install PREFIX=/install/prefix}
-@end float
-
 @float NOTE
-AviSynth and AvxSynth are loaded dynamically.  Distributors can build FFmpeg
-with @code{--enable-avisynth}, and the binaries will work regardless of the
-end user having AviSynth or AvxSynth installed - they'll only need to be
-installed to use AviSynth scripts (obviously).
+libxavs2 is under the GNU Public License Version 2 or later
+(see @url{http://www.gnu.org/licenses/old-licenses/gpl-2.0.html} for
+details), you must upgrade FFmpeg's license to GPL in order to use it.
 @end float
 
-@section Intel QuickSync Video
-
-FFmpeg can use Intel QuickSync Video (QSV) for accelerated encoding and decoding
-of multiple codecs. To use QSV, FFmpeg must be linked against the @code{libmfx}
-dispatcher, which loads the actual decoding libraries.
+@section ZVBI
 
-The dispatcher is open source and can be downloaded from
-@url{https://github.com/lu-zero/mfx_dispatch.git}. FFmpeg needs to be configured
-with the @code{--enable-libmfx} option and @code{pkg-config} needs to be able to
-locate the dispatcher's @code{.pc} files.
-
-@section AMD VCE
-
-FFmpeg can use the AMD Advanced Media Framework library for accelerated H.264
-and HEVC encoding on VCE enabled hardware under Windows.
-
-To enable support you must obtain the AMF framework header files from
-@url{https://github.com/GPUOpen-LibrariesAndSDKs/AMF.git}.
-
-Create an @code{AMF/} directory in the system include path.
-Copy the contents of @code{AMF/amf/public/include/} into that directory.
-Then configure FFmpeg with @code{--enable-amf}.
+ZVBI is a VBI decoding library which can be used by FFmpeg to decode DVB
+teletext pages and DVB teletext subtitles.
 
+Go to @url{http://sourceforge.net/projects/zapping/} and follow the instructions for
+installing the library. Then pass @code{--enable-libzvbi} to configure to
+enable it.
 
 @chapter Supported File Formats, Codecs or Features
 
@@ -545,6 +572,7 @@ library:
 @item raw VC-1                  @tab X @tab X
 @item raw PCM A-law             @tab X @tab X
 @item raw PCM mu-law            @tab X @tab X
+@item raw PCM Archimedes VIDC   @tab X @tab X
 @item raw PCM signed 8 bit      @tab X @tab X
 @item raw PCM signed 16 bit big-endian  @tab X @tab X
 @item raw PCM signed 16 bit little-endian  @tab X @tab X
@@ -757,8 +785,8 @@ following image formats are supported:
 @item Autodesk Animator Flic video  @tab     @tab  X
 @item Autodesk RLE           @tab     @tab  X
     @tab fourcc: AASC
-@item AV1                    @tab     @tab  E
-    @tab Supported through external library libaom
+@item AV1                    @tab  E  @tab  E
+    @tab Supported through external libraries libaom and libdav1d
 @item Avid 1:1 10-bit RGB Packer  @tab  X  @tab  X
     @tab fourcc: AVrp
 @item AVS (Audio Video Standard) video  @tab     @tab  X
@@ -1107,10 +1135,10 @@ following image formats are supported:
 @item DPCM Sol               @tab     @tab  X
 @item DPCM Xan               @tab     @tab  X
     @tab Used in Origin's Wing Commander IV AVI files.
-@item DSD (Direct Stream Digitial), least significant bit first  @tab  @tab  X
-@item DSD (Direct Stream Digitial), most significant bit first   @tab  @tab  X
-@item DSD (Direct Stream Digitial), least significant bit first, planar  @tab  @tab  X
-@item DSD (Direct Stream Digitial), most significant bit first, planar   @tab  @tab  X
+@item DSD (Direct Stream Digital), least significant bit first  @tab  @tab  X
+@item DSD (Direct Stream Digital), most significant bit first   @tab  @tab  X
+@item DSD (Direct Stream Digital), least significant bit first, planar  @tab  @tab  X
+@item DSD (Direct Stream Digital), most significant bit first, planar   @tab  @tab  X
 @item DSP Group TrueSpeech   @tab     @tab  X
 @item DST (Direct Stream Transfer) @tab  @tab  X
 @item DV audio               @tab     @tab  X
@@ -1147,6 +1175,7 @@ following image formats are supported:
     @tab encoding supported through external library libopus
 @item PCM A-law              @tab  X  @tab  X
 @item PCM mu-law             @tab  X  @tab  X
+@item PCM Archimedes VIDC    @tab  X  @tab  X
 @item PCM signed 8-bit planar  @tab  X  @tab  X
 @item PCM signed 16-bit big-endian planar  @tab  X  @tab  X
 @item PCM signed 16-bit little-endian planar  @tab  X  @tab  X
diff --git a/doc/indevs.texi b/doc/indevs.texi
index 9a9cb697d35f8..af3173217f8e4 100644
--- a/doc/indevs.texi
+++ b/doc/indevs.texi
@@ -374,7 +374,7 @@ Defaults to @option{false}.
 @item timestamp_align
 Capture start time alignment in seconds. If set to nonzero, input frames are
 dropped till the system timestamp aligns with configured value.
-Alignment difference of upto one frame duration is tolerated.
+Alignment difference of up to one frame duration is tolerated.
 This is useful for maintaining input synchronization across N different
 hardware devices deployed for 'N-way' redundancy. The system time of different
 hardware devices should be synchronized with protocols such as NTP or PTP,
@@ -787,7 +787,7 @@ ffplay -f iec61883 -i auto
 Grab and record the input of a FireWire DV/HDV device,
 using a packet buffer of 100000 packets if the source is HDV.
 @example
-ffmpeg -f iec61883 -i auto -hdvbuffer 100000 out.mpg
+ffmpeg -f iec61883 -i auto -dvbuffer 100000 out.mpg
 @end example
 
 @end itemize
@@ -1050,6 +1050,21 @@ IIDC1394 input device, based on libdc1394 and libraw1394.
 
 Requires the configure option @code{--enable-libdc1394}.
 
+@subsection Options
+@table @option
+
+@item framerate
+Set the frame rate. Default is @code{ntsc}, corresponding to a frame
+rate of @code{30000/1001}.
+
+@item pixel_format
+Select the pixel format. Default is @code{uyvy422}.
+
+@item video_size
+Set the video size given as a string such as @code{640x480} or @code{hd720}.
+Default is @code{qvga}.
+@end table
+
 @section libndi_newtek
 
 The libndi_newtek input device provides capture capabilities for using NDI (Network
@@ -1078,6 +1093,10 @@ Defaults to @option{0.5}.
 When this flag is @option{false}, all video that you receive will be progressive.
 Defaults to @option{true}.
 
+@item extra_ips
+If is set to list of comma separated ip addresses, scan for sources not only
+using mDNS but also use unicast ip addresses specified by this list.
+
 @end table
 
 @subsection Examples
@@ -1090,12 +1109,25 @@ List input devices:
 ffmpeg -f libndi_newtek -find_sources 1 -i dummy
 @end example
 
+@item
+List local and remote input devices:
+@example
+ffmpeg -f libndi_newtek -extra_ips "192.168.10.10" -find_sources 1 -i dummy
+@end example
+
 @item
 Restream to NDI:
 @example
 ffmpeg -f libndi_newtek -i "DEV-5.INTERNAL.M1STEREO.TV (NDI_SOURCE_NAME_1)" -f libndi_newtek -y NDI_SOURCE_NAME_2
 @end example
 
+@item
+Restream remote NDI to local NDI:
+@example
+ffmpeg -f libndi_newtek -extra_ips "192.168.10.10" -i "DEV-5.REMOTE.M1STEREO.TV (NDI_SOURCE_NAME_1)" -f libndi_newtek -y NDI_SOURCE_NAME_2
+@end example
+
+
 @end itemize
 
 @section openal
diff --git a/doc/libav-merge.txt b/doc/libav-merge.txt
index d5e671ca55d00..bcd0aacba570e 100644
--- a/doc/libav-merge.txt
+++ b/doc/libav-merge.txt
@@ -100,6 +100,7 @@ Stuff that didn't reach the codebase:
   - 4de220d2e frame: allow align=0 (meaning automatic) for av_frame_get_buffer()
 - Support recovery from an already present HLS playlist (see 16cb06bb30)
 - Remove all output devices (see 8e7e042d41, 8d3db95f20, 6ce13070bd, d46cd24986 and https://ffmpeg.org/pipermail/ffmpeg-devel/2017-September/216904.html)
+- avcodec/libaomenc: export the Sequence Header OBU as extradata (See a024c3ce9a)
 
 Collateral damage that needs work locally:
 ------------------------------------------
diff --git a/doc/mailing-list-faq.texi b/doc/mailing-list-faq.texi
index 3ab89d6677b23..3f2be1071a260 100644
--- a/doc/mailing-list-faq.texi
+++ b/doc/mailing-list-faq.texi
@@ -228,6 +228,33 @@ or headers.
 
 You can then filter the mailing list messages to their own folder.
 
+@section How do I disable mail delivery without unsubscribing?
+
+Sometimes you may want to temporarily stop receiving all mailing list
+messages. This "vacation mode" is simple to do:
+
+@enumerate
+@item
+Go to the @url{https://lists.ffmpeg.org/mailman/listinfo/ffmpeg-user/, ffmpeg-user mailing list info page}
+
+@item
+Enter your email address in the box at very bottom of the page and click the
+@emph{Unsubscribe or edit options} box.
+
+@item
+Enter your password and click the @emph{Log in} button.
+
+@item
+Look for the @emph{Mail delivery} option. Here you can disable/enable mail
+delivery. If you check @emph{Set globally} it will apply your choice to all
+other FFmpeg mailing lists you are subscribed to.
+@end enumerate
+
+Alternatively, from your subscribed address, send a message to @email{ffmpeg-user-request@@ffmpeg.org}
+with the subject @emph{set delivery off}. To re-enable mail delivery send a
+message to @email{ffmpeg-user-request@@ffmpeg.org} with the subject
+@emph{set delivery on}.
+
 @chapter Rules and Etiquette
 
 @section What are the rules and the proper etiquette?
diff --git a/doc/metadata.texi b/doc/metadata.texi
index bddcc99470d2a..be91059a98c39 100644
--- a/doc/metadata.texi
+++ b/doc/metadata.texi
@@ -33,7 +33,7 @@ At the beginning of a chapter section there may be an optional timebase to be
 used for start/end values. It must be in form
 @samp{TIMEBASE=@var{num}/@var{den}}, where @var{num} and @var{den} are
 integers. If the timebase is missing then start/end times are assumed to
-be in milliseconds.
+be in nanoseconds.
 
 Next a chapter section must contain chapter start and end times in form
 @samp{START=@var{num}}, @samp{END=@var{num}}, where @var{num} is a positive
diff --git a/doc/muxers.texi b/doc/muxers.texi
index f18543e83d842..aac7d94edf37d 100644
--- a/doc/muxers.texi
+++ b/doc/muxers.texi
@@ -94,21 +94,23 @@ compatibility with software that only supports a single audio stream in AVI
 @anchor{chromaprint}
 @section chromaprint
 
-Chromaprint fingerprinter
+Chromaprint fingerprinter.
 
-This muxer feeds audio data to the Chromaprint library, which generates
-a fingerprint for the provided audio data. It takes a single signed
-native-endian 16-bit raw audio stream.
+This muxer feeds audio data to the Chromaprint library,
+which generates a fingerprint for the provided audio data. See @url{https://acoustid.org/chromaprint}
+
+It takes a single signed native-endian 16-bit raw audio stream of at most 2 channels.
 
 @subsection Options
 
 @table @option
 @item silence_threshold
-Threshold for detecting silence, ranges from 0 to 32767. -1 for default
-(required for use with the AcoustID service).
+Threshold for detecting silence, ranges from -1 to 32767. -1 disables silence detection and
+is required for use with the AcoustID service. Default is -1.
 
 @item algorithm
-Algorithm index to fingerprint with.
+Version of algorithm to fingerprint with. Range is 0 to 4. Version 2 requires that silence
+detection be enabled. Default is 1.
 
 @item fp_format
 Format to output the fingerprint as. Accepts the following options:
@@ -120,7 +122,7 @@ Binary raw fingerprint
 Binary compressed fingerprint
 
 @item base64
-Base64 compressed fingerprint
+Base64 compressed fingerprint @emph{(default)}
 
 @end table
 
@@ -214,6 +216,8 @@ It creates a MPD manifest file and segment files for each stream.
 The segment filename might contain pre-defined identifiers used with SegmentTemplate
 as defined in section 5.3.9.4.4 of the standard. Available identifiers are "$RepresentationID$",
 "$Number$", "$Bandwidth$" and "$Time$".
+In addition to the standard identifiers, an ffmpeg-specific "$ext$" identifier is also supported.
+When specified ffmpeg will replace $ext$ in the file name with muxing format's extensions such as mp4, webm etc.,
 
 @example
 ffmpeg -re -i <input> -map 0 -map 0 -c:a libfdk_aac -c:v libx264
@@ -245,11 +249,11 @@ Enable (1) or disable (0) use of SegmentTimeline in SegmentTemplate.
 @item -single_file @var{single_file}
 Enable (1) or disable (0) storing all segments in one file, accessed using byte ranges.
 @item -single_file_name @var{file_name}
-DASH-templated name to be used for baseURL. Implies @var{single_file} set to "1".
+DASH-templated name to be used for baseURL. Implies @var{single_file} set to "1". In the template, "$ext$" is replaced with the file name extension specific for the segment format.
 @item -init_seg_name @var{init_name}
-DASH-templated name to used for the initialization segment. Default is "init-stream$RepresentationID$.m4s"
+DASH-templated name to used for the initialization segment. Default is "init-stream$RepresentationID$.$ext$". "$ext$" is replaced with the file name extension specific for the segment format.
 @item -media_seg_name @var{segment_name}
-DASH-templated name to used for the media segments. Default is "chunk-stream$RepresentationID$-$Number%05d$.m4s"
+DASH-templated name to used for the media segments. Default is "chunk-stream$RepresentationID$-$Number%05d$.$ext$". "$ext$" is replaced with the file name extension specific for the segment format.
 @item -utc_timing_url @var{utc_url}
 URL of the page that will return the UTC timestamp in ISO format. Example: "https://time.akamai.com/?iso"
 @item method @var{method}
@@ -289,14 +293,31 @@ Set container format (mp4/webm) options using a @code{:} separated list of
 key=value parameters. Values containing @code{:} special characters must be
 escaped.
 
-@item dash_segment_type @var{dash_segment_type}
+@item -dash_segment_type @var{dash_segment_type}
 Possible values:
+@item auto
+If this flag is set, the dash segment files format will be selected based on the stream codec. This is the default mode.
+
 @item mp4
-If this flag is set, the dash segment files will be in in ISOBMFF format. This is the default format.
+If this flag is set, the dash segment files will be in in ISOBMFF format.
 
 @item webm
 If this flag is set, the dash segment files will be in in WebM format.
 
+@item -ignore_io_errors @var{ignore_io_errors}
+Ignore IO errors during open and write. Useful for long-duration runs with network output.
+
+@item -lhls @var{lhls}
+Enable Low-latency HLS(LHLS). Adds #EXT-X-PREFETCH tag with current segment's URI.
+Apple doesn't have an official spec for LHLS. Meanwhile hls.js player folks are
+trying to standardize a open LHLS spec. The draft spec is available in https://github.com/video-dev/hlsjs-rfcs/blob/lhls-spec/proposals/0001-lhls.md
+This option will also try to comply with the above open spec, till Apple's spec officially supports it.
+Applicable only when @var{streaming} and @var{hls_playlist} options are enabled.
+This is an experimental feature.
+
+@item -master_m3u8_publish_rate @var{master_m3u8_publish_rate}
+Publish master playlist repeatedly every after specified number of segment intervals.
+
 @end table
 
 @anchor{framecrc}
@@ -764,17 +785,17 @@ Possible values:
 
 @table @samp
 @item mpegts
-If this flag is set, the hls segment files will format to mpegts.
-the mpegts files is used in all hls versions.
+Output segment files in MPEG-2 Transport Stream format. This is
+compatible with all HLS versions.
 
 @item fmp4
-If this flag is set, the hls segment files will format to fragment mp4 looks like dash.
-the fmp4 files is used in hls after version 7.
+Output segment files in fragmented MP4 format, similar to MPEG-DASH.
+fmp4 files may be used in HLS version 7 and above.
 
 @end table
 
 @item hls_fmp4_init_filename @var{filename}
-set filename to the fragment files header file, default filename is @file{init.mp4}.
+Set filename to the fragment files header file, default filename is @file{init.mp4}.
 
 When @code{var_stream_map} is set with two or more variant streams, the
 @var{filename} pattern must contain the string "%v", this string specifies
@@ -949,6 +970,37 @@ and they are mapped to the two video only variant streams with audio group names
 
 By default, a single hls variant containing all the encoded streams is created.
 
+@example
+ffmpeg -re -i in.ts -b:a:0 32k -b:a:1 64k -b:v:0 1000k \
+  -map 0:a -map 0:a -map 0:v -f hls \
+  -var_stream_map "a:0,agroup:aud_low,default:yes a:1,agroup:aud_low v:0,agroup:aud_low" \
+  -master_pl_name master.m3u8 \
+  http://example.com/live/out_%v.m3u8
+@end example
+This example creates two audio only and one video only variant streams. In
+addition to the #EXT-X-STREAM-INF tag for each variant stream in the master
+playlist, #EXT-X-MEDIA tag is also added for the two audio only variant streams
+and they are mapped to the one video only variant streams with audio group name
+'aud_low', and the audio group have default stat is NO or YES.
+
+By default, a single hls variant containing all the encoded streams is created.
+
+@example
+ffmpeg -re -i in.ts -b:a:0 32k -b:a:1 64k -b:v:0 1000k \
+  -map 0:a -map 0:a -map 0:v -f hls \
+  -var_stream_map "a:0,agroup:aud_low,default:yes,language=ENG a:1,agroup:aud_low,language:CHN v:0,agroup:aud_low" \
+  -master_pl_name master.m3u8 \
+  http://example.com/live/out_%v.m3u8
+@end example
+This example creates two audio only and one video only variant streams. In
+addition to the #EXT-X-STREAM-INF tag for each variant stream in the master
+playlist, #EXT-X-MEDIA tag is also added for the two audio only variant streams
+and they are mapped to the one video only variant streams with audio group name
+'aud_low', and the audio group have default stat is NO or YES, and one audio
+have and language is named ENG, the other audio language is named CHN.
+
+By default, a single hls variant containing all the encoded streams is created.
+
 @item cc_stream_map
 Map string which specifies different closed captions groups and their
 attributes. The closed captions stream groups are separated by space.
@@ -969,7 +1021,7 @@ ffmpeg -re -i in.ts -b:v 1000k -b:a 64k -a53cc 1 -f hls \
   http://example.com/live/out.m3u8
 @end example
 This example adds @code{#EXT-X-MEDIA} tag with @code{TYPE=CLOSED-CAPTIONS} in
-the master playlist with group name 'cc', langauge 'en' (english) and
+the master playlist with group name 'cc', language 'en' (english) and
 INSTREAM-ID 'CC1'. Also, it adds @code{CLOSED-CAPTIONS} attribute with group
 name 'cc' for the output variant stream.
 @example
@@ -1012,6 +1064,9 @@ Use persistent HTTP connections. Applicable only for HTTP output.
 @item timeout
 Set timeout for socket I/O operations. Applicable only for HTTP output.
 
+@item -ignore_io_errors
+Ignore IO errors during open, write and delete. Useful for long-duration runs with network output.
+
 @end table
 
 @anchor{ico}
@@ -1319,6 +1374,10 @@ more efficient), but with this option set, the muxer writes one moof/mdat
 pair for each track, making it easier to separate tracks.
 
 This option is implicitly set when writing ismv (Smooth Streaming) files.
+@item -movflags skip_sidx
+Skip writing of sidx atom. When bitrate overhead due to sidx atom is high,
+this option could be used for cases where sidx atom is not mandatory.
+When global_sidx flag is enabled, this option will be ignored.
 @item -movflags faststart
 Run a second pass moving the index (moov atom) to the beginning of the file.
 This operation can take a while, and will not work in various situations such
@@ -1467,7 +1526,7 @@ Set the program @samp{service_type}. Default is @code{digital_tv}.
 Accepts the following options:
 @table @samp
 @item hex_value
-Any hexdecimal value between @code{0x01} to @code{0xff} as defined in
+Any hexadecimal value between @code{0x01} and @code{0xff} as defined in
 ETSI 300 468.
 @item digital_tv
 Digital TV service.
@@ -1570,7 +1629,7 @@ ffmpeg -i file.mpg -c copy \
      out.ts
 @end example
 
-@section mxf, mxf_d10
+@section mxf, mxf_d10, mxf_opatom
 
 MXF muxer.
 
@@ -1582,7 +1641,7 @@ The muxer options are:
 @item store_user_comments @var{bool}
 Set if user comments should be stored if available or never.
 IRT D-10 does not allow user comments. The default is thus to write them for
-mxf but not for mxf_d10
+mxf and mxf_opatom but not for mxf_d10
 @end table
 
 @section null
diff --git a/doc/outdevs.texi b/doc/outdevs.texi
index 2518f9b55950e..1fd83c28cc10f 100644
--- a/doc/outdevs.texi
+++ b/doc/outdevs.texi
@@ -155,6 +155,10 @@ Defaults to @option{0.5}.
 Sets the decklink device duplex mode. Must be @samp{unset}, @samp{half} or @samp{full}.
 Defaults to @samp{unset}.
 
+@item timing_offset
+Sets the genlock timing pixel offset on the used output.
+Defaults to @samp{unset}.
+
 @end table
 
 @subsection Examples
@@ -398,6 +402,10 @@ Set the SDL window size, can be a string of the form
 If not specified it defaults to the size of the input video,
 downscaled according to the aspect ratio.
 
+@item window_x
+@item window_y
+Set the position of the window on the screen.
+
 @item window_fullscreen
 Set fullscreen mode when non-zero value is provided.
 Default value is zero.
diff --git a/doc/protocols.texi b/doc/protocols.texi
index b34f29eebf9aa..f0fd344ce9634 100644
--- a/doc/protocols.texi
+++ b/doc/protocols.texi
@@ -193,6 +193,12 @@ Set I/O operation maximum block size, in bytes. Default value is
 @code{INT_MAX}, which results in not limiting the requested block size.
 Setting this value reasonably low improves user termination request reaction
 time, which is valuable for files on slow medium.
+
+@item follow
+If set to 1, the protocol will retry reading at the end of the file, allowing
+reading files that still are being written. In order for this to terminate,
+you either need to use the rw_timeout option, or use the interrupt callback
+(for API users).
 @end table
 
 @section ftp
@@ -229,17 +235,6 @@ it, unless special care is taken (tests, customized server configuration
 etc.). Different FTP servers behave in different way during seek
 operation. ff* tools may produce incomplete content due to server limitations.
 
-This protocol accepts the following options:
-
-@table @option
-@item follow
-If set to 1, the protocol will retry reading at the end of the file, allowing
-reading files that still are being written. In order for this to terminate,
-you either need to use the rw_timeout option, or use the interrupt callback
-(for API users).
-
-@end table
-
 @section gopher
 
 Gopher protocol.
@@ -1306,10 +1301,10 @@ set by the peer side. Before version 1.3.0 this option
 is only available as @option{latency}.
 
 @item recv_buffer_size=@var{bytes}
-Set receive buffer size, expressed in bytes.
+Set UDP receive buffer size, expressed in bytes.
 
 @item send_buffer_size=@var{bytes}
-Set send buffer size, expressed in bytes.
+Set UDP send buffer size, expressed in bytes.
 
 @item rw_timeout
 Set raise error timeout for read/write optations.
@@ -1329,6 +1324,87 @@ have no chance of being delivered in time. It was
 automatically enabled in the sender if the receiver
 supports it.
 
+@item sndbuf=@var{bytes}
+Set send buffer size, expressed in bytes.
+
+@item rcvbuf=@var{bytes}
+Set receive buffer size, expressed in bytes.
+
+Receive buffer must not be greater than @option{ffs}.
+
+@item lossmaxttl=@var{packets}
+The value up to which the Reorder Tolerance may grow. When
+Reorder Tolerance is > 0, then packet loss report is delayed
+until that number of packets come in. Reorder Tolerance
+increases every time a "belated" packet has come, but it
+wasn't due to retransmission (that is, when UDP packets tend
+to come out of order), with the difference between the latest
+sequence and this packet's sequence, and not more than the
+value of this option. By default it's 0, which means that this
+mechanism is turned off, and the loss report is always sent
+immediately upon experiencing a "gap" in sequences.
+
+@item minversion
+The minimum SRT version that is required from the peer. A connection
+to a peer that does not satisfy the minimum version requirement
+will be rejected.
+
+The version format in hex is 0xXXYYZZ for x.y.z in human readable
+form.
+
+@item streamid=@var{string}
+A string limited to 512 characters that can be set on the socket prior
+to connecting. This stream ID will be able to be retrieved by the
+listener side from the socket that is returned from srt_accept and
+was connected by a socket with that set stream ID. SRT does not enforce
+any special interpretation of the contents of this string.
+This option doesn’t make sense in Rendezvous connection; the result
+might be that simply one side will override the value from the other
+side and it’s the matter of luck which one would win
+
+@item smoother=@var{live|file}
+The type of Smoother used for the transmission for that socket, which
+is responsible for the transmission and congestion control. The Smoother
+type must be exactly the same on both connecting parties, otherwise
+the connection is rejected.
+
+@item messageapi=@var{1|0}
+When set, this socket uses the Message API, otherwise it uses Buffer
+API. Note that in live mode (see @option{transtype}) there’s only
+message API available. In File mode you can chose to use one of two modes:
+
+Stream API (default, when this option is false). In this mode you may
+send as many data as you wish with one sending instruction, or even use
+dedicated functions that read directly from a file. The internal facility
+will take care of any speed and congestion control. When receiving, you
+can also receive as many data as desired, the data not extracted will be
+waiting for the next call. There is no boundary between data portions in
+the Stream mode.
+
+Message API. In this mode your single sending instruction passes exactly
+one piece of data that has boundaries (a message). Contrary to Live mode,
+this message may span across multiple UDP packets and the only size
+limitation is that it shall fit as a whole in the sending buffer. The
+receiver shall use as large buffer as necessary to receive the message,
+otherwise the message will not be given up. When the message is not
+complete (not all packets received or there was a packet loss) it will
+not be given up.
+
+@item transtype=@var{live|file}
+Sets the transmission type for the socket, in particular, setting this
+option sets multiple other parameters to their default values as required
+for a particular transmission type.
+
+live: Set options as for live transmission. In this mode, you should
+send by one sending instruction only so many data that fit in one UDP packet,
+and limited to the value defined first in @option{payload_size} (1316 is
+default in this mode). There is no speed control in this mode, only the
+bandwidth control, if configured, in order to not exceed the bandwidth with
+the overhead transmission (retransmitted and control packets).
+
+file: Set options as for non-live transmission. See @option{messageapi}
+for further explanations
+
 @end table
 
 For more information see: @url{https://github.com/Haivision/srt}.
diff --git a/doc/snow.txt b/doc/snow.txt
index bbf28bc6c247f..b4098fd64dfa7 100644
--- a/doc/snow.txt
+++ b/doc/snow.txt
@@ -172,7 +172,7 @@ spatial_decomposition_count
     FIXME
 
 colorspace_type
-    0   unspecified YcbCr
+    0   unspecified YCbCr
     1   Gray
     2   Gray + Alpha
     3   GBR
@@ -235,7 +235,7 @@ spatial_decomposition_type
     stored as delta from last, last is reset to 0 if always_reset || keyframe
 
 qlog
-    quality (logarthmic quantizer scale)
+    quality (logarithmic quantizer scale)
     stored as delta from last, last is reset to 0 if always_reset || keyframe
 
 mv_scale
@@ -251,11 +251,11 @@ block_max_depth
     stored as delta from last, last is reset to 0 if always_reset || keyframe
 
 quant_table
-    quantiztation table
+    quantization table
 
 
 Highlevel bitstream structure:
-=============================
+==============================
  --------------------------------------------
 |                   Header                   |
  --------------------------------------------
@@ -303,7 +303,7 @@ Decoding process:
                   |  Intra DC  |               |
                   |            |    LL0 subband prediction
                    ------------                |
-                                \        Dequantizaton
+                                \        Dequantization
  -------------------             \             |
 |  Reference frames |             \           IDWT
 | -------   ------- |    Motion    \           |
@@ -390,8 +390,8 @@ motion vector prediction
    (mvx_diff, mvy_diff)*mv_scale
 
 
-Intra DC Predicton:
-======================
+Intra DC Prediction:
+====================
 the luma and chroma values of the left block are used as predictors
 
 the used luma and chroma is the sum of the predictor and y_diff, cb_diff, cr_diff
@@ -407,7 +407,7 @@ Motion Compensation:
 
 Halfpel interpolation:
 ----------------------
-halfpel interpolation is done by convolution with the halfpel filter stored
+Halfpel interpolation is done by convolution with the halfpel filter stored
 in the header:
 
 horizontal halfpel samples are found by
@@ -463,8 +463,8 @@ to the closest available fullpel sample
 Smaller pel interpolation:
 --------------------------
 if diag_mc is set then points which lie on a line between 2 vertically,
-horiziontally or diagonally adjacent halfpel points shall be interpolated
-linearls with rounding to nearest and halfway values rounded up.
+horizontally or diagonally adjacent halfpel points shall be interpolated
+linearly with rounding to nearest and halfway values rounded up.
 points which lie on 2 diagonals at the same time should only use the one
 diagonal not containing the fullpel point
 
@@ -519,8 +519,8 @@ width,height here are the width and height of the LL0 subband not of the final
 video
 
 
-Dequantizaton:
-==============
+Dequantization:
+===============
 FIXME
 
 Wavelet Transform:
diff --git a/ffbuild/common.mak b/ffbuild/common.mak
index eb41b05ee661b..d2b33320c03a9 100644
--- a/ffbuild/common.mak
+++ b/ffbuild/common.mak
@@ -161,7 +161,7 @@ $(SLIBOBJS): | $(sort $(dir $(SLIBOBJS)))
 $(TESTOBJS): | $(sort $(dir $(TESTOBJS)))
 $(TOOLOBJS): | tools
 
-OBJDIRS := $(OBJDIRS) $(dir $(OBJS) $(HOBJS) $(HOSTOBJS) $(SLIBOBJS) $(TESTOBJS))
+OUTDIRS := $(OUTDIRS) $(dir $(OBJS) $(HOBJS) $(HOSTOBJS) $(SLIBOBJS) $(TESTOBJS))
 
 CLEANSUFFIXES     = *.d *.gcda *.gcno *.h.c *.ho *.map *.o *.pc *.ptx *.ptx.c *.ver *.version *$(DEFAULT_X86ASMD).asm *~
 LIBSUFFIXES       = *.a *.lib *.so *.so.* *.dylib *.dll *.def *.dll.a
diff --git a/fftools/Makefile b/fftools/Makefile
index c3a0ff340b0d4..6cec666dd9e72 100644
--- a/fftools/Makefile
+++ b/fftools/Makefile
@@ -32,7 +32,7 @@ $(foreach P,$(AVPROGS-yes),$(eval $(call DOFFTOOL,$(P))))
 all: $(AVPROGS)
 
 fftools/ffprobe.o fftools/cmdutils.o: libavutil/ffversion.h | fftools
-OBJDIRS += fftools
+OUTDIRS += fftools
 
 ifdef AVPROGS
 install: install-progs install-data
diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
index da4259a9a877f..544f1a1cef334 100644
--- a/fftools/ffmpeg.c
+++ b/fftools/ffmpeg.c
@@ -1193,33 +1193,27 @@ static void do_video_out(OutputFile *of,
     }
     ost->last_dropped = nb_frames == nb0_frames && next_picture;
 
-  /* duplicates frame if needed */
-  for (i = 0; i < nb_frames; i++) {
-    AVFrame *in_picture;
-    av_init_packet(&pkt);
-    pkt.data = NULL;
-    pkt.size = 0;
-
-    if (i < nb0_frames && ost->last_frame) {
-        in_picture = ost->last_frame;
-    } else
-        in_picture = next_picture;
+    /* duplicates frame if needed */
+    for (i = 0; i < nb_frames; i++) {
+        AVFrame *in_picture;
+        int forced_keyframe = 0;
+        double pts_time;
+        av_init_packet(&pkt);
+        pkt.data = NULL;
+        pkt.size = 0;
 
-    if (!in_picture)
-        return;
+        if (i < nb0_frames && ost->last_frame) {
+            in_picture = ost->last_frame;
+        } else
+            in_picture = next_picture;
 
-    in_picture->pts = ost->sync_opts;
+        if (!in_picture)
+            return;
 
-#if 1
-    if (!check_recording_time(ost))
-#else
-    if (ost->frame_number >= ost->max_frames)
-#endif
-        return;
+        in_picture->pts = ost->sync_opts;
 
-    {
-        int forced_keyframe = 0;
-        double pts_time;
+        if (!check_recording_time(ost))
+            return;
 
         if (enc->flags & (AV_CODEC_FLAG_INTERLACED_DCT | AV_CODEC_FLAG_INTERLACED_ME) &&
             ost->top_field_first >= 0)
@@ -1292,6 +1286,8 @@ static void do_video_out(OutputFile *of,
         ret = avcodec_send_frame(enc, in_picture);
         if (ret < 0)
             goto error;
+        // Make sure Closed Captions will not be duplicated
+        av_frame_remove_side_data(in_picture, AV_FRAME_DATA_A53_CC);
 
         while (1) {
             ret = avcodec_receive_packet(enc, &pkt);
@@ -1328,18 +1324,17 @@ static void do_video_out(OutputFile *of,
                 fprintf(ost->logfile, "%s", enc->stats_out);
             }
         }
-    }
-    ost->sync_opts++;
-    /*
-     * For video, number of frames in == number of packets out.
-     * But there may be reordering, so we can't throw away frames on encoder
-     * flush, we need to limit them here, before they go into encoder.
-     */
-    ost->frame_number++;
+        ost->sync_opts++;
+        /*
+         * For video, number of frames in == number of packets out.
+         * But there may be reordering, so we can't throw away frames on encoder
+         * flush, we need to limit them here, before they go into encoder.
+         */
+        ost->frame_number++;
 
-    if (vstats_filename && frame_size)
-        do_video_stats(ost, frame_size);
-  }
+        if (vstats_filename && frame_size)
+            do_video_stats(ost, frame_size);
+    }
 
     if (!ost->last_frame)
         ost->last_frame = av_frame_alloc();
@@ -1492,8 +1487,6 @@ static int reap_filters(int flush)
                     av_rescale_q(filtered_frame->pts, filter_tb, enc->time_base) -
                     av_rescale_q(start_time, AV_TIME_BASE_Q, enc->time_base);
             }
-            //if (ost->source_index >= 0)
-            //    *filtered_frame= *input_streams[ost->source_index]->decoded_frame; //for me_threshold
 
             switch (av_buffersink_get_type(filter)) {
             case AVMEDIA_TYPE_VIDEO:
@@ -1824,7 +1817,7 @@ static void print_report(int is_last_report, int64_t timer_start, int64_t cur_ti
         } else
             av_log(NULL, AV_LOG_INFO, "%s    %c", buf.str, end);
 
-    fflush(stderr);
+        fflush(stderr);
     }
     av_bprint_finalize(&buf, NULL);
 
@@ -1931,46 +1924,46 @@ static void flush_encoders(void)
                 av_assert0(0);
             }
 
-                av_init_packet(&pkt);
-                pkt.data = NULL;
-                pkt.size = 0;
+            av_init_packet(&pkt);
+            pkt.data = NULL;
+            pkt.size = 0;
 
-                update_benchmark(NULL);
-
-                while ((ret = avcodec_receive_packet(enc, &pkt)) == AVERROR(EAGAIN)) {
-                    ret = avcodec_send_frame(enc, NULL);
-                    if (ret < 0) {
-                        av_log(NULL, AV_LOG_FATAL, "%s encoding failed: %s\n",
-                               desc,
-                               av_err2str(ret));
-                        exit_program(1);
-                    }
-                }
+            update_benchmark(NULL);
 
-                update_benchmark("flush_%s %d.%d", desc, ost->file_index, ost->index);
-                if (ret < 0 && ret != AVERROR_EOF) {
+            while ((ret = avcodec_receive_packet(enc, &pkt)) == AVERROR(EAGAIN)) {
+                ret = avcodec_send_frame(enc, NULL);
+                if (ret < 0) {
                     av_log(NULL, AV_LOG_FATAL, "%s encoding failed: %s\n",
                            desc,
                            av_err2str(ret));
                     exit_program(1);
                 }
-                if (ost->logfile && enc->stats_out) {
-                    fprintf(ost->logfile, "%s", enc->stats_out);
-                }
-                if (ret == AVERROR_EOF) {
-                    output_packet(of, &pkt, ost, 1);
-                    break;
-                }
-                if (ost->finished & MUXER_FINISHED) {
-                    av_packet_unref(&pkt);
-                    continue;
-                }
-                av_packet_rescale_ts(&pkt, enc->time_base, ost->mux_timebase);
-                pkt_size = pkt.size;
-                output_packet(of, &pkt, ost, 0);
-                if (ost->enc_ctx->codec_type == AVMEDIA_TYPE_VIDEO && vstats_filename) {
-                    do_video_stats(ost, pkt_size);
-                }
+            }
+
+            update_benchmark("flush_%s %d.%d", desc, ost->file_index, ost->index);
+            if (ret < 0 && ret != AVERROR_EOF) {
+                av_log(NULL, AV_LOG_FATAL, "%s encoding failed: %s\n",
+                       desc,
+                       av_err2str(ret));
+                exit_program(1);
+            }
+            if (ost->logfile && enc->stats_out) {
+                fprintf(ost->logfile, "%s", enc->stats_out);
+            }
+            if (ret == AVERROR_EOF) {
+                output_packet(of, &pkt, ost, 1);
+                break;
+            }
+            if (ost->finished & MUXER_FINISHED) {
+                av_packet_unref(&pkt);
+                continue;
+            }
+            av_packet_rescale_ts(&pkt, enc->time_base, ost->mux_timebase);
+            pkt_size = pkt.size;
+            output_packet(of, &pkt, ost, 0);
+            if (ost->enc_ctx->codec_type == AVMEDIA_TYPE_VIDEO && vstats_filename) {
+                do_video_stats(ost, pkt_size);
+            }
         }
     }
 }
@@ -2139,9 +2132,6 @@ static int ifilter_send_frame(InputFilter *ifilter, AVFrame *frame)
 
     /* determine if the parameters for this input changed */
     need_reinit = ifilter->format != frame->format;
-    if (!!ifilter->hw_frames_ctx != !!frame->hw_frames_ctx ||
-        (ifilter->hw_frames_ctx && ifilter->hw_frames_ctx->data != frame->hw_frames_ctx->data))
-        need_reinit = 1;
 
     switch (ifilter->ist->st->codecpar->codec_type) {
     case AVMEDIA_TYPE_AUDIO:
@@ -2155,6 +2145,13 @@ static int ifilter_send_frame(InputFilter *ifilter, AVFrame *frame)
         break;
     }
 
+    if (!ifilter->ist->reinit_filters && fg->graph)
+        need_reinit = 0;
+
+    if (!!ifilter->hw_frames_ctx != !!frame->hw_frames_ctx ||
+        (ifilter->hw_frames_ctx && ifilter->hw_frames_ctx->data != frame->hw_frames_ctx->data))
+        need_reinit = 1;
+
     if (need_reinit) {
         ret = ifilter_parameters_from_frame(ifilter, frame);
         if (ret < 0)
@@ -2315,14 +2312,12 @@ static int decode_audio(InputStream *ist, AVPacket *pkt, int *got_output,
     ist->samples_decoded += decoded_frame->nb_samples;
     ist->frames_decoded++;
 
-#if 1
     /* increment next_dts to use for the case where the input stream does not
        have timestamps or there are multiple frames in the packet */
     ist->next_pts += ((int64_t)AV_TIME_BASE * decoded_frame->nb_samples) /
                      avctx->sample_rate;
     ist->next_dts += ((int64_t)AV_TIME_BASE * decoded_frame->nb_samples) /
                      avctx->sample_rate;
-#endif
 
     if (decoded_frame->pts != AV_NOPTS_VALUE) {
         decoded_frame_tb   = ist->st->time_base;
@@ -3345,7 +3340,7 @@ static int init_output_stream_encode(OutputStream *ost)
                    "if you want a different framerate.\n",
                    ost->file_index, ost->index);
         }
-//      ost->frame_rate = ist->st->avg_frame_rate.num ? ist->st->avg_frame_rate : (AVRational){25, 1};
+
         if (ost->enc->supported_framerates && !ost->force_fps) {
             int idx = av_find_nearest_q_idx(ost->frame_rate, ost->enc->supported_framerates);
             ost->frame_rate = ost->enc->supported_framerates[idx];
@@ -3428,8 +3423,8 @@ static int init_output_stream_encode(OutputStream *ost)
                 ost->forced_keyframes_expr_const_values[FKF_PREV_FORCED_N] = NAN;
                 ost->forced_keyframes_expr_const_values[FKF_PREV_FORCED_T] = NAN;
 
-            // Don't parse the 'forced_keyframes' in case of 'keep-source-keyframes',
-            // parse it only for static kf timings
+                // Don't parse the 'forced_keyframes' in case of 'keep-source-keyframes',
+                // parse it only for static kf timings
             } else if(strncmp(ost->forced_keyframes, "source", 6)) {
                 parse_forced_key_frames(ost->forced_keyframes, ost, ost->enc_ctx);
             }
@@ -4172,7 +4167,7 @@ static void reset_eagain(void)
 
 // set duration to max(tmp, duration) in a proper time base and return duration's time_base
 static AVRational duration_max(int64_t tmp, int64_t *duration, AVRational tmp_time_base,
-                                AVRational time_base)
+                               AVRational time_base)
 {
     int ret;
 
@@ -4476,7 +4471,10 @@ static int process_input(int file_index)
                 pkt_dts + AV_TIME_BASE/10 < FFMAX(ist->pts, ist->dts)) {
                 ifile->ts_offset -= delta;
                 av_log(NULL, AV_LOG_DEBUG,
-                       "timestamp discontinuity %"PRId64", new offset= %"PRId64"\n",
+                       "timestamp discontinuity for stream #%d:%d "
+                       "(id=%d, type=%s): %"PRId64", new offset= %"PRId64"\n",
+                       ist->file_index, ist->st->index, ist->st->id,
+                       av_get_media_type_string(ist->dec_ctx->codec_type),
                        delta, ifile->ts_offset);
                 pkt.dts -= av_rescale_q(delta, AV_TIME_BASE_Q, ist->st->time_base);
                 if (pkt.pts != AV_NOPTS_VALUE)
@@ -4884,11 +4882,6 @@ int main(int argc, char **argv)
         exit_program(1);
     }
 
-//     if (nb_input_files == 0) {
-//         av_log(NULL, AV_LOG_FATAL, "At least one input file must be specified\n");
-//         exit_program(1);
-//     }
-
     for (i = 0; i < nb_output_files; i++) {
         if (strcmp(output_files[i]->ctx->oformat->name, "rtp"))
             want_sdp = 0;
diff --git a/fftools/ffmpeg_filter.c b/fftools/ffmpeg_filter.c
index 6518d50870c1d..72838de1e2c57 100644
--- a/fftools/ffmpeg_filter.c
+++ b/fftools/ffmpeg_filter.c
@@ -293,10 +293,17 @@ static void init_input_filter(FilterGraph *fg, AVFilterInOut *in)
             exit_program(1);
         }
         ist = input_streams[input_files[file_idx]->ist_index + st->index];
+        if (ist->user_set_discard == AVDISCARD_ALL) {
+            av_log(NULL, AV_LOG_FATAL, "Stream specifier '%s' in filtergraph description %s "
+                   "matches a disabled input stream.\n", p, fg->graph_desc);
+            exit_program(1);
+        }
     } else {
         /* find the first unused stream of corresponding type */
         for (i = 0; i < nb_input_streams; i++) {
             ist = input_streams[i];
+            if (ist->user_set_discard == AVDISCARD_ALL)
+                continue;
             if (ist->dec_ctx->codec_type == type && ist->discard)
                 break;
         }
@@ -732,6 +739,7 @@ static int sub2video_prepare(InputStream *ist, InputFilter *ifilter)
     if (!ist->sub2video.frame)
         return AVERROR(ENOMEM);
     ist->sub2video.last_pts = INT64_MIN;
+    ist->sub2video.end_pts  = INT64_MIN;
     return 0;
 }
 
diff --git a/fftools/ffmpeg_opt.c b/fftools/ffmpeg_opt.c
index d4851a2cd8c96..53d688b76499b 100644
--- a/fftools/ffmpeg_opt.c
+++ b/fftools/ffmpeg_opt.c
@@ -268,7 +268,7 @@ static int opt_map(void *optctx, const char *opt, const char *arg)
 {
     OptionsContext *o = optctx;
     StreamMap *m = NULL;
-    int i, negative = 0, file_idx;
+    int i, negative = 0, file_idx, disabled = 0;
     int sync_file_idx = -1, sync_stream_idx = 0;
     char *p, *sync;
     char *map;
@@ -303,6 +303,11 @@ static int opt_map(void *optctx, const char *opt, const char *arg)
                                        "match any streams.\n", arg);
             exit_program(1);
         }
+        if (input_streams[input_files[sync_file_idx]->ist_index + sync_stream_idx]->user_set_discard == AVDISCARD_ALL) {
+            av_log(NULL, AV_LOG_FATAL, "Sync stream specification in map %s matches a disabled input "
+                                       "stream.\n", arg);
+            exit_program(1);
+        }
     }
 
 
@@ -339,6 +344,10 @@ static int opt_map(void *optctx, const char *opt, const char *arg)
                 if (check_stream_specifier(input_files[file_idx]->ctx, input_files[file_idx]->ctx->streams[i],
                             *p == ':' ? p + 1 : p) <= 0)
                     continue;
+                if (input_streams[input_files[file_idx]->ist_index + i]->user_set_discard == AVDISCARD_ALL) {
+                    disabled = 1;
+                    continue;
+                }
                 GROW_ARRAY(o->stream_maps, o->nb_stream_maps);
                 m = &o->stream_maps[o->nb_stream_maps - 1];
 
@@ -358,6 +367,10 @@ static int opt_map(void *optctx, const char *opt, const char *arg)
     if (!m) {
         if (allow_unused) {
             av_log(NULL, AV_LOG_VERBOSE, "Stream map '%s' matches no streams; ignoring.\n", arg);
+        } else if (disabled) {
+            av_log(NULL, AV_LOG_FATAL, "Stream map '%s' matches disabled streams.\n"
+                                       "To ignore this, add a trailing '?' to the map.\n", arg);
+            exit_program(1);
         } else {
             av_log(NULL, AV_LOG_FATAL, "Stream map '%s' matches no streams.\n"
                                        "To ignore this, add a trailing '?' to the map.\n", arg);
@@ -437,7 +450,8 @@ static int opt_map_channel(void *optctx, const char *opt, const char *arg)
     /* allow trailing ? to map_channel */
     if (allow_unused = strchr(mapchan, '?'))
         *allow_unused = 0;
-    if (m->channel_idx < 0 || m->channel_idx >= st->codecpar->channels) {
+    if (m->channel_idx < 0 || m->channel_idx >= st->codecpar->channels ||
+        input_streams[input_files[m->file_idx]->ist_index + m->stream_idx]->user_set_discard == AVDISCARD_ALL) {
         if (allow_unused) {
             av_log(NULL, AV_LOG_VERBOSE, "mapchan: invalid audio channel #%d.%d.%d\n",
                     m->file_idx, m->stream_idx, m->channel_idx);
@@ -746,6 +760,13 @@ static void add_input_streams(OptionsContext *o, AVFormatContext *ic)
 
         MATCH_PER_STREAM_OPT(discard, str, discard_str, ic, st);
         ist->user_set_discard = AVDISCARD_NONE;
+
+        if ((o->video_disable && ist->st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) ||
+            (o->audio_disable && ist->st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) ||
+            (o->subtitle_disable && ist->st->codecpar->codec_type == AVMEDIA_TYPE_SUBTITLE) ||
+            (o->data_disable && ist->st->codecpar->codec_type == AVMEDIA_TYPE_DATA))
+                ist->user_set_discard = AVDISCARD_ALL;
+
         if (discard_str && av_opt_eval_int(&cc, discard_opt, discard_str, &ist->user_set_discard) < 0) {
             av_log(NULL, AV_LOG_ERROR, "Error parsing discard %s.\n",
                     discard_str);
@@ -1660,6 +1681,8 @@ static OutputStream *new_video_stream(OptionsContext *o, AVFormatContext *oc, in
 
     MATCH_PER_STREAM_OPT(filter_scripts, str, ost->filters_script, oc, st);
     MATCH_PER_STREAM_OPT(filters,        str, ost->filters,        oc, st);
+    if (o->nb_filters > 1)
+        av_log(NULL, AV_LOG_ERROR, "Only '-vf %s' read, ignoring remaining -vf options: Use ',' to separate filters\n", ost->filters);
 
     if (!ost->stream_copy) {
         const char *p = NULL;
@@ -1841,6 +1864,8 @@ static OutputStream *new_audio_stream(OptionsContext *o, AVFormatContext *oc, in
 
     MATCH_PER_STREAM_OPT(filter_scripts, str, ost->filters_script, oc, st);
     MATCH_PER_STREAM_OPT(filters,        str, ost->filters,        oc, st);
+    if (o->nb_filters > 1)
+        av_log(NULL, AV_LOG_ERROR, "Only '-af %s' read, ignoring remaining -af options: Use ',' to separate filters\n", ost->filters);
 
     if (!ost->stream_copy) {
         char *sample_fmt = NULL;
@@ -2174,6 +2199,8 @@ static int open_output_file(OptionsContext *o, const char *filename)
                 int new_area;
                 ist = input_streams[i];
                 new_area = ist->st->codecpar->width * ist->st->codecpar->height + 100000000*!!ist->st->codec_info_nb_frames;
+                if (ist->user_set_discard == AVDISCARD_ALL)
+                    continue;
                 if((qcr!=MKTAG('A', 'P', 'I', 'C')) && (ist->st->disposition & AV_DISPOSITION_ATTACHED_PIC))
                     new_area = 1;
                 if (ist->st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO &&
@@ -2195,6 +2222,8 @@ static int open_output_file(OptionsContext *o, const char *filename)
                 int score;
                 ist = input_streams[i];
                 score = ist->st->codecpar->channels + 100000000*!!ist->st->codec_info_nb_frames;
+                if (ist->user_set_discard == AVDISCARD_ALL)
+                    continue;
                 if (ist->st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO &&
                     score > best_score) {
                     best_score = score;
@@ -2216,6 +2245,8 @@ static int open_output_file(OptionsContext *o, const char *filename)
                     AVCodec const *output_codec =
                         avcodec_find_encoder(oc->oformat->subtitle_codec);
                     int input_props = 0, output_props = 0;
+                    if (input_streams[i]->user_set_discard == AVDISCARD_ALL)
+                        continue;
                     if (output_codec)
                         output_descriptor = avcodec_descriptor_get(output_codec->id);
                     if (input_descriptor)
@@ -2237,6 +2268,8 @@ static int open_output_file(OptionsContext *o, const char *filename)
         if (!o->data_disable ) {
             enum AVCodecID codec_id = av_guess_codec(oc->oformat, NULL, filename, NULL, AVMEDIA_TYPE_DATA);
             for (i = 0; codec_id != AV_CODEC_ID_NONE && i < nb_input_streams; i++) {
+                if (input_streams[i]->user_set_discard == AVDISCARD_ALL)
+                    continue;
                 if (input_streams[i]->st->codecpar->codec_type == AVMEDIA_TYPE_DATA
                     && input_streams[i]->st->codecpar->codec_id == codec_id )
                     new_data_stream(o, oc, i);
@@ -2275,6 +2308,11 @@ static int open_output_file(OptionsContext *o, const char *filename)
                 int src_idx = input_files[map->file_index]->ist_index + map->stream_index;
 
                 ist = input_streams[input_files[map->file_index]->ist_index + map->stream_index];
+                if (ist->user_set_discard == AVDISCARD_ALL) {
+                    av_log(NULL, AV_LOG_FATAL, "Stream #%d:%d is disabled and cannot be mapped.\n",
+                           map->file_index, map->stream_index);
+                    exit_program(1);
+                }
                 if(o->subtitle_disable && ist->st->codecpar->codec_type == AVMEDIA_TYPE_SUBTITLE)
                     continue;
                 if(o->   audio_disable && ist->st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
diff --git a/fftools/ffplay.c b/fftools/ffplay.c
index ab1f9faccfb7f..8f050e16e64b6 100644
--- a/fftools/ffplay.c
+++ b/fftools/ffplay.c
@@ -353,6 +353,7 @@ static char *afilters = NULL;
 #endif
 static int autorotate = 1;
 static int find_stream_info = 1;
+static int filter_nbthreads = 0;
 
 /* current context */
 static int is_full_screen;
@@ -861,31 +862,27 @@ static void calculate_display_rect(SDL_Rect *rect,
                                    int scr_xleft, int scr_ytop, int scr_width, int scr_height,
                                    int pic_width, int pic_height, AVRational pic_sar)
 {
-    float aspect_ratio;
-    int width, height, x, y;
+    AVRational aspect_ratio = pic_sar;
+    int64_t width, height, x, y;
 
-    if (pic_sar.num == 0)
-        aspect_ratio = 0;
-    else
-        aspect_ratio = av_q2d(pic_sar);
+    if (av_cmp_q(aspect_ratio, av_make_q(0, 1)) <= 0)
+        aspect_ratio = av_make_q(1, 1);
 
-    if (aspect_ratio <= 0.0)
-        aspect_ratio = 1.0;
-    aspect_ratio *= (float)pic_width / (float)pic_height;
+    aspect_ratio = av_mul_q(aspect_ratio, av_make_q(pic_width, pic_height));
 
     /* XXX: we suppose the screen has a 1.0 pixel ratio */
     height = scr_height;
-    width = lrint(height * aspect_ratio) & ~1;
+    width = av_rescale(height, aspect_ratio.num, aspect_ratio.den) & ~1;
     if (width > scr_width) {
         width = scr_width;
-        height = lrint(width / aspect_ratio) & ~1;
+        height = av_rescale(width, aspect_ratio.den, aspect_ratio.num) & ~1;
     }
     x = (scr_width - width) / 2;
     y = (scr_height - height) / 2;
     rect->x = scr_xleft + x;
     rect->y = scr_ytop  + y;
-    rect->w = FFMAX(width,  1);
-    rect->h = FFMAX(height, 1);
+    rect->w = FFMAX((int)width,  1);
+    rect->h = FFMAX((int)height, 1);
 }
 
 static void get_sdl_pix_fmt_and_blendmode(int format, Uint32 *sdl_pix_fmt, SDL_BlendMode *sdl_blendmode)
@@ -1326,7 +1323,11 @@ static void sigterm_handler(int sig)
 static void set_default_window_size(int width, int height, AVRational sar)
 {
     SDL_Rect rect;
-    calculate_display_rect(&rect, 0, 0, INT_MAX, height, width, height, sar);
+    int max_width  = screen_width  ? screen_width  : INT_MAX;
+    int max_height = screen_height ? screen_height : INT_MAX;
+    if (max_width == INT_MAX && max_height == INT_MAX)
+        max_height = height;
+    calculate_display_rect(&rect, 0, 0, max_width, max_height, width, height, sar);
     default_width  = rect.w;
     default_height = rect.h;
 }
@@ -1335,13 +1336,8 @@ static int video_open(VideoState *is)
 {
     int w,h;
 
-    if (screen_width) {
-        w = screen_width;
-        h = screen_height;
-    } else {
-        w = default_width;
-        h = default_height;
-    }
+    w = screen_width ? screen_width : default_width;
+    h = screen_height ? screen_height : default_height;
 
     if (!window_title)
         window_title = input_filename;
@@ -1959,6 +1955,7 @@ static int configure_audio_filters(VideoState *is, const char *afilters, int for
     avfilter_graph_free(&is->agraph);
     if (!(is->agraph = avfilter_graph_alloc()))
         return AVERROR(ENOMEM);
+    is->agraph->nb_threads = filter_nbthreads;
 
     while ((e = av_dict_get(swr_opts, "", e, AV_DICT_IGNORE_SUFFIX)))
         av_strlcatf(aresample_swr_opts, sizeof(aresample_swr_opts), "%s=%s:", e->key, e->value);
@@ -2108,10 +2105,10 @@ static int audio_thread(void *arg)
     return ret;
 }
 
-static int decoder_start(Decoder *d, int (*fn)(void *), void *arg)
+static int decoder_start(Decoder *d, int (*fn)(void *), const char *thread_name, void* arg)
 {
     packet_queue_start(d->queue);
-    d->decoder_tid = SDL_CreateThread(fn, "decoder", arg);
+    d->decoder_tid = SDL_CreateThread(fn, thread_name, arg);
     if (!d->decoder_tid) {
         av_log(NULL, AV_LOG_ERROR, "SDL_CreateThread(): %s\n", SDL_GetError());
         return AVERROR(ENOMEM);
@@ -2130,26 +2127,17 @@ static int video_thread(void *arg)
     AVRational frame_rate = av_guess_frame_rate(is->ic, is->video_st, NULL);
 
 #if CONFIG_AVFILTER
-    AVFilterGraph *graph = avfilter_graph_alloc();
+    AVFilterGraph *graph = NULL;
     AVFilterContext *filt_out = NULL, *filt_in = NULL;
     int last_w = 0;
     int last_h = 0;
     enum AVPixelFormat last_format = -2;
     int last_serial = -1;
     int last_vfilter_idx = 0;
-    if (!graph) {
-        av_frame_free(&frame);
-        return AVERROR(ENOMEM);
-    }
-
 #endif
 
-    if (!frame) {
-#if CONFIG_AVFILTER
-        avfilter_graph_free(&graph);
-#endif
+    if (!frame)
         return AVERROR(ENOMEM);
-    }
 
     for (;;) {
         ret = get_video_frame(is, frame);
@@ -2172,6 +2160,11 @@ static int video_thread(void *arg)
                    (const char *)av_x_if_null(av_get_pix_fmt_name(frame->format), "none"), is->viddec.pkt_serial);
             avfilter_graph_free(&graph);
             graph = avfilter_graph_alloc();
+            if (!graph) {
+                ret = AVERROR(ENOMEM);
+                goto the_end;
+            }
+            graph->nb_threads = filter_nbthreads;
             if ((ret = configure_video_filters(graph, is, vfilters_list ? vfilters_list[is->vfilter_idx] : NULL, frame)) < 0) {
                 SDL_Event event;
                 event.type = FF_QUIT_EVENT;
@@ -2681,7 +2674,7 @@ static int stream_component_open(VideoState *is, int stream_index)
             is->auddec.start_pts = is->audio_st->start_time;
             is->auddec.start_pts_tb = is->audio_st->time_base;
         }
-        if ((ret = decoder_start(&is->auddec, audio_thread, is)) < 0)
+        if ((ret = decoder_start(&is->auddec, audio_thread, "audio_decoder", is)) < 0)
             goto out;
         SDL_PauseAudioDevice(audio_dev, 0);
         break;
@@ -2690,7 +2683,7 @@ static int stream_component_open(VideoState *is, int stream_index)
         is->video_st = ic->streams[stream_index];
 
         decoder_init(&is->viddec, avctx, &is->videoq, is->continue_read_thread);
-        if ((ret = decoder_start(&is->viddec, video_thread, is)) < 0)
+        if ((ret = decoder_start(&is->viddec, video_thread, "video_decoder", is)) < 0)
             goto out;
         is->queue_attachments_req = 1;
         break;
@@ -2699,7 +2692,7 @@ static int stream_component_open(VideoState *is, int stream_index)
         is->subtitle_st = ic->streams[stream_index];
 
         decoder_init(&is->subdec, avctx, &is->subtitleq, is->continue_read_thread);
-        if ((ret = decoder_start(&is->subdec, subtitle_thread, is)) < 0)
+        if ((ret = decoder_start(&is->subdec, subtitle_thread, "subtitle_decoder", is)) < 0)
             goto out;
         break;
     default:
@@ -3621,6 +3614,7 @@ static const OptionDef options[] = {
     { "autorotate", OPT_BOOL, { &autorotate }, "automatically rotate video", "" },
     { "find_stream_info", OPT_BOOL | OPT_INPUT | OPT_EXPERT, { &find_stream_info },
         "read and decode the streams to fill missing information with heuristics" },
+    { "filter_threads", HAS_ARG | OPT_INT | OPT_EXPERT, { &filter_nbthreads }, "number of filter threads per graph" },
     { NULL, },
 };
 
diff --git a/fftools/ffprobe.c b/fftools/ffprobe.c
index 544786ec728ae..dea489d02eb9f 100644
--- a/fftools/ffprobe.c
+++ b/fftools/ffprobe.c
@@ -2646,20 +2646,20 @@ static int show_stream(WriterContext *w, AVFormatContext *fmt_ctx, int stream_id
     } while (0)
 
     if (do_show_stream_disposition) {
-    writer_print_section_header(w, in_program ? SECTION_ID_PROGRAM_STREAM_DISPOSITION : SECTION_ID_STREAM_DISPOSITION);
-    PRINT_DISPOSITION(DEFAULT,          "default");
-    PRINT_DISPOSITION(DUB,              "dub");
-    PRINT_DISPOSITION(ORIGINAL,         "original");
-    PRINT_DISPOSITION(COMMENT,          "comment");
-    PRINT_DISPOSITION(LYRICS,           "lyrics");
-    PRINT_DISPOSITION(KARAOKE,          "karaoke");
-    PRINT_DISPOSITION(FORCED,           "forced");
-    PRINT_DISPOSITION(HEARING_IMPAIRED, "hearing_impaired");
-    PRINT_DISPOSITION(VISUAL_IMPAIRED,  "visual_impaired");
-    PRINT_DISPOSITION(CLEAN_EFFECTS,    "clean_effects");
-    PRINT_DISPOSITION(ATTACHED_PIC,     "attached_pic");
-    PRINT_DISPOSITION(TIMED_THUMBNAILS, "timed_thumbnails");
-    writer_print_section_footer(w);
+        writer_print_section_header(w, in_program ? SECTION_ID_PROGRAM_STREAM_DISPOSITION : SECTION_ID_STREAM_DISPOSITION);
+        PRINT_DISPOSITION(DEFAULT,          "default");
+        PRINT_DISPOSITION(DUB,              "dub");
+        PRINT_DISPOSITION(ORIGINAL,         "original");
+        PRINT_DISPOSITION(COMMENT,          "comment");
+        PRINT_DISPOSITION(LYRICS,           "lyrics");
+        PRINT_DISPOSITION(KARAOKE,          "karaoke");
+        PRINT_DISPOSITION(FORCED,           "forced");
+        PRINT_DISPOSITION(HEARING_IMPAIRED, "hearing_impaired");
+        PRINT_DISPOSITION(VISUAL_IMPAIRED,  "visual_impaired");
+        PRINT_DISPOSITION(CLEAN_EFFECTS,    "clean_effects");
+        PRINT_DISPOSITION(ATTACHED_PIC,     "attached_pic");
+        PRINT_DISPOSITION(TIMED_THUMBNAILS, "timed_thumbnails");
+        writer_print_section_footer(w);
     }
 
     if (do_show_stream_tags)
diff --git a/libavcodec/4xm.c b/libavcodec/4xm.c
index 5547dfd87f223..2b88c899d0a5d 100644
--- a/libavcodec/4xm.c
+++ b/libavcodec/4xm.c
@@ -145,7 +145,7 @@ typedef struct FourXContext {
     int mv[256];
     VLC pre_vlc;
     int last_dc;
-    DECLARE_ALIGNED(16, int16_t, block)[6][64];
+    DECLARE_ALIGNED(32, int16_t, block)[6][64];
     void *bitstream_buffer;
     unsigned int bitstream_buffer_size;
     int version;
@@ -498,7 +498,7 @@ static int decode_i_block(FourXContext *f, int16_t *block)
 
     if (get_bits_left(&f->gb) < 2){
         av_log(f->avctx, AV_LOG_ERROR, "%d bits left before decode_i_block()\n", get_bits_left(&f->gb));
-        return -1;
+        return AVERROR_INVALIDDATA;
     }
 
     /* DC coef */
@@ -732,7 +732,7 @@ static int decode_i2_frame(FourXContext *f, const uint8_t *buf, int length)
         for (x = 0; x < width; x += 16) {
             unsigned int color[4] = { 0 }, bits;
             if (buf_end - buf < 8)
-                return -1;
+                return AVERROR_INVALIDDATA;
             // warning following is purely guessed ...
             color[0] = bytestream2_get_le16u(&g3);
             color[1] = bytestream2_get_le16u(&g3);
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index ce766aa466eba..15c43a8a6a111 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -198,6 +198,7 @@ OBJS-$(CONFIG_APTX_HD_DECODER)         += aptx.o
 OBJS-$(CONFIG_APTX_HD_ENCODER)         += aptx.o
 OBJS-$(CONFIG_APNG_DECODER)            += png.o pngdec.o pngdsp.o
 OBJS-$(CONFIG_APNG_ENCODER)            += png.o pngenc.o
+OBJS-$(CONFIG_ARBC_DECODER)            += arbc.o
 OBJS-$(CONFIG_SSA_DECODER)             += assdec.o ass.o
 OBJS-$(CONFIG_SSA_ENCODER)             += assenc.o ass.o
 OBJS-$(CONFIG_ASS_DECODER)             += assdec.o ass.o
@@ -362,6 +363,7 @@ OBJS-$(CONFIG_H264_V4L2M2M_DECODER)    += v4l2_m2m_dec.o
 OBJS-$(CONFIG_H264_V4L2M2M_ENCODER)    += v4l2_m2m_enc.o
 OBJS-$(CONFIG_HAP_DECODER)             += hapdec.o hap.o
 OBJS-$(CONFIG_HAP_ENCODER)             += hapenc.o hap.o
+OBJS-$(CONFIG_HCOM_DECODER)            += hcom.o
 OBJS-$(CONFIG_HEVC_DECODER)            += hevcdec.o hevc_mvs.o \
                                           hevc_cabac.o hevc_refs.o hevcpred.o    \
                                           hevcdsp.o hevc_filter.o hevc_data.o
@@ -383,6 +385,7 @@ OBJS-$(CONFIG_HQ_HQA_DECODER)          += hq_hqa.o hq_hqadata.o hq_hqadsp.o \
 OBJS-$(CONFIG_HQX_DECODER)             += hqx.o hqxvlc.o hqxdsp.o canopus.o
 OBJS-$(CONFIG_HUFFYUV_DECODER)         += huffyuv.o huffyuvdec.o
 OBJS-$(CONFIG_HUFFYUV_ENCODER)         += huffyuv.o huffyuvenc.o
+OBJS-$(CONFIG_HYMT_DECODER)            += huffyuv.o huffyuvdec.o
 OBJS-$(CONFIG_IDCIN_DECODER)           += idcinvideo.o
 OBJS-$(CONFIG_IDF_DECODER)             += bintext.o cga_data.o
 OBJS-$(CONFIG_IFF_ILBM_DECODER)        += iff.o
@@ -516,8 +519,8 @@ OBJS-$(CONFIG_PNG_ENCODER)             += png.o pngenc.o
 OBJS-$(CONFIG_PPM_DECODER)             += pnmdec.o pnm.o
 OBJS-$(CONFIG_PPM_ENCODER)             += pnmenc.o
 OBJS-$(CONFIG_PRORES_DECODER)          += proresdec2.o proresdsp.o proresdata.o
-OBJS-$(CONFIG_PRORES_ENCODER)          += proresenc_anatoliy.o
-OBJS-$(CONFIG_PRORES_AW_ENCODER)       += proresenc_anatoliy.o
+OBJS-$(CONFIG_PRORES_ENCODER)          += proresenc_anatoliy.o proresdata.o
+OBJS-$(CONFIG_PRORES_AW_ENCODER)       += proresenc_anatoliy.o proresdata.o
 OBJS-$(CONFIG_PRORES_KS_ENCODER)       += proresenc_kostya.o proresdata.o
 OBJS-$(CONFIG_PROSUMER_DECODER)        += prosumer.o
 OBJS-$(CONFIG_PSD_DECODER)             += psd.o
@@ -737,6 +740,7 @@ OBJS-$(CONFIG_PCM_ALAW_DECODER)           += pcm.o
 OBJS-$(CONFIG_PCM_ALAW_ENCODER)           += pcm.o
 OBJS-$(CONFIG_PCM_BLURAY_DECODER)         += pcm-bluray.o
 OBJS-$(CONFIG_PCM_DVD_DECODER)            += pcm-dvd.o
+OBJS-$(CONFIG_PCM_DVD_ENCODER)            += pcm-dvdenc.o
 OBJS-$(CONFIG_PCM_F16LE_DECODER)          += pcm.o
 OBJS-$(CONFIG_PCM_F24LE_DECODER)          += pcm.o
 OBJS-$(CONFIG_PCM_F32BE_DECODER)          += pcm.o
@@ -794,6 +798,8 @@ OBJS-$(CONFIG_PCM_U32BE_DECODER)          += pcm.o
 OBJS-$(CONFIG_PCM_U32BE_ENCODER)          += pcm.o
 OBJS-$(CONFIG_PCM_U32LE_DECODER)          += pcm.o
 OBJS-$(CONFIG_PCM_U32LE_ENCODER)          += pcm.o
+OBJS-$(CONFIG_PCM_VIDC_DECODER)           += pcm.o
+OBJS-$(CONFIG_PCM_VIDC_ENCODER)           += pcm.o
 OBJS-$(CONFIG_PCM_ZORK_DECODER)           += pcm.o
 
 OBJS-$(CONFIG_ADPCM_4XM_DECODER)          += adpcm.o adpcm_data.o
@@ -951,9 +957,11 @@ OBJS-$(CONFIG_PCM_ALAW_AT_ENCODER)        += audiotoolboxenc.o
 OBJS-$(CONFIG_PCM_MULAW_AT_ENCODER)       += audiotoolboxenc.o
 OBJS-$(CONFIG_LIBAOM_AV1_DECODER)         += libaomdec.o
 OBJS-$(CONFIG_LIBAOM_AV1_ENCODER)         += libaomenc.o
+OBJS-$(CONFIG_LIBARIBB24_DECODER)         += libaribb24.o ass.o
 OBJS-$(CONFIG_LIBCELT_DECODER)            += libcelt_dec.o
 OBJS-$(CONFIG_LIBCODEC2_DECODER)          += libcodec2.o codec2utils.o
 OBJS-$(CONFIG_LIBCODEC2_ENCODER)          += libcodec2.o codec2utils.o
+OBJS-$(CONFIG_LIBDAV1D_DECODER)           += libdav1d.o
 OBJS-$(CONFIG_LIBDAVS2_DECODER)           += libdavs2.o
 OBJS-$(CONFIG_LIBFDK_AAC_DECODER)         += libfdk-aacdec.o
 OBJS-$(CONFIG_LIBFDK_AAC_ENCODER)         += libfdk-aacenc.o
@@ -1021,7 +1029,9 @@ OBJS-$(CONFIG_DVD_NAV_PARSER)          += dvd_nav_parser.o
 OBJS-$(CONFIG_DVDSUB_PARSER)           += dvdsub_parser.o
 OBJS-$(CONFIG_FLAC_PARSER)             += flac_parser.o flacdata.o flac.o \
                                           vorbis_data.o
+OBJS-$(CONFIG_G723_1_PARSER)           += g723_1_parser.o
 OBJS-$(CONFIG_G729_PARSER)             += g729_parser.o
+OBJS-$(CONFIG_GIF_PARSER)              += gif_parser.o
 OBJS-$(CONFIG_GSM_PARSER)              += gsm_parser.o
 OBJS-$(CONFIG_H261_PARSER)             += h261_parser.o
 OBJS-$(CONFIG_H263_PARSER)             += h263_parser.o
@@ -1078,9 +1088,11 @@ OBJS-$(CONFIG_MP3_HEADER_DECOMPRESS_BSF)  += mp3_header_decompress_bsf.o \
 OBJS-$(CONFIG_MPEG2_METADATA_BSF)         += mpeg2_metadata_bsf.o
 OBJS-$(CONFIG_NOISE_BSF)                  += noise_bsf.o
 OBJS-$(CONFIG_NULL_BSF)                   += null_bsf.o
+OBJS-$(CONFIG_PRORES_METADATA_BSF)        += prores_metadata_bsf.o
 OBJS-$(CONFIG_REMOVE_EXTRADATA_BSF)       += remove_extradata_bsf.o
 OBJS-$(CONFIG_TEXT2MOVSUB_BSF)            += movsub_bsf.o
 OBJS-$(CONFIG_TRACE_HEADERS_BSF)          += trace_headers_bsf.o
+OBJS-$(CONFIG_TRUEHD_CORE_BSF)            += truehd_core_bsf.o mlp_parser.o mlp.o
 OBJS-$(CONFIG_VP9_METADATA_BSF)           += vp9_metadata_bsf.o
 OBJS-$(CONFIG_VP9_RAW_REORDER_BSF)        += vp9_raw_reorder_bsf.o
 OBJS-$(CONFIG_VP9_SUPERFRAME_BSF)         += vp9_superframe_bsf.o
diff --git a/libavcodec/aac.h b/libavcodec/aac.h
index 05bc95385f231..c2b9c980cbdc1 100644
--- a/libavcodec/aac.h
+++ b/libavcodec/aac.h
@@ -368,7 +368,7 @@ struct AACContext {
                                    INTFLOAT *in, IndividualChannelStream *ics);
     void (*update_ltp)(AACContext *ac, SingleChannelElement *sce);
     void (*vector_pow43)(int *coefs, int len);
-    void (*subband_scale)(int *dst, int *src, int scale, int offset, int len);
+    void (*subband_scale)(int *dst, int *src, int scale, int offset, int len, void *log_context);
 
 };
 
diff --git a/libavcodec/aacdec_fixed.c b/libavcodec/aacdec_fixed.c
index 1bdb93f5bc876..2c594c6792205 100644
--- a/libavcodec/aacdec_fixed.c
+++ b/libavcodec/aacdec_fixed.c
@@ -162,7 +162,7 @@ static void vector_pow43(int *coefs, int len)
     }
 }
 
-static void subband_scale(int *dst, int *src, int scale, int offset, int len)
+static void subband_scale(int *dst, int *src, int scale, int offset, int len, void *log_context)
 {
     int ssign = scale < 0 ? -1 : 1;
     int s = FFABS(scale);
@@ -189,7 +189,7 @@ static void subband_scale(int *dst, int *src, int scale, int offset, int len)
             dst[i] = out * (unsigned)ssign;
         }
     } else {
-        av_log(NULL, AV_LOG_ERROR, "Overflow in subband_scale()\n");
+        av_log(log_context, AV_LOG_ERROR, "Overflow in subband_scale()\n");
     }
 }
 
diff --git a/libavcodec/aacdec_template.c b/libavcodec/aacdec_template.c
index dce6035d67ebd..721511c5e953a 100644
--- a/libavcodec/aacdec_template.c
+++ b/libavcodec/aacdec_template.c
@@ -1927,7 +1927,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, INTFLOAT coef[1024],
             if (cbt_m1 < NOISE_BT - 1) {
                 for (group = 0; group < (int)g_len; group++, cfo+=128) {
                     ac->vector_pow43(cfo, off_len);
-                    ac->subband_scale(cfo, cfo, sf[idx], 34, off_len);
+                    ac->subband_scale(cfo, cfo, sf[idx], 34, off_len, ac->avctx);
                 }
             }
         }
@@ -2158,7 +2158,7 @@ static void apply_intensity_stereo(AACContext *ac,
                                       coef0 + group * 128 + offsets[i],
                                       scale,
                                       23,
-                                      offsets[i + 1] - offsets[i]);
+                                      offsets[i + 1] - offsets[i] ,ac->avctx);
 #else
                         ac->fdsp->vector_fmul_scalar(coef1 + group * 128 + offsets[i],
                                                     coef0 + group * 128 + offsets[i],
diff --git a/libavcodec/aacps.c b/libavcodec/aacps.c
index b16c3393d1061..d5dca64b0f2a5 100644
--- a/libavcodec/aacps.c
+++ b/libavcodec/aacps.c
@@ -118,7 +118,7 @@ static int read_ ## PAR ## _data(AVCodecContext *avctx, GetBitContext *gb, PSCon
     return 0; \
 err: \
     av_log(avctx, AV_LOG_ERROR, "illegal "#PAR"\n"); \
-    return -1; \
+    return AVERROR_INVALIDDATA; \
 }
 
 READ_PAR_DATA(iid,    huff_offset[table_idx],    0, FFABS(ps->iid_par[e][b]) > 7 + 8 * ps->iid_quant)
diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile
index 72080c2dbb1d0..8bc8bc528cee6 100644
--- a/libavcodec/aarch64/Makefile
+++ b/libavcodec/aarch64/Makefile
@@ -9,6 +9,7 @@ OBJS-$(CONFIG_HPELDSP)                  += aarch64/hpeldsp_init_aarch64.o
 OBJS-$(CONFIG_MPEGAUDIODSP)             += aarch64/mpegaudiodsp_init.o
 OBJS-$(CONFIG_NEON_CLOBBER_TEST)        += aarch64/neontest.o
 OBJS-$(CONFIG_VIDEODSP)                 += aarch64/videodsp_init.o
+OBJS-$(CONFIG_VP8DSP)                   += aarch64/vp8dsp_init_aarch64.o
 
 # decoders/encoders
 OBJS-$(CONFIG_AAC_DECODER)              += aarch64/aacpsdsp_init_aarch64.o \
@@ -43,6 +44,7 @@ NEON-OBJS-$(CONFIG_IDCTDSP)             += aarch64/idctdsp_init_aarch64.o      \
                                            aarch64/simple_idct_neon.o
 NEON-OBJS-$(CONFIG_MDCT)                += aarch64/mdct_neon.o
 NEON-OBJS-$(CONFIG_MPEGAUDIODSP)        += aarch64/mpegaudiodsp_neon.o
+NEON-OBJS-$(CONFIG_VP8DSP)              += aarch64/vp8dsp_neon.o
 
 # decoders/encoders
 NEON-OBJS-$(CONFIG_AAC_DECODER)         += aarch64/aacpsdsp_neon.o
diff --git a/libavcodec/aarch64/h264dsp_init_aarch64.c b/libavcodec/aarch64/h264dsp_init_aarch64.c
index eb2014e24d6cb..649d2ab1d7468 100644
--- a/libavcodec/aarch64/h264dsp_init_aarch64.c
+++ b/libavcodec/aarch64/h264dsp_init_aarch64.c
@@ -25,14 +25,24 @@
 #include "libavutil/aarch64/cpu.h"
 #include "libavcodec/h264dsp.h"
 
-void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha,
+void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
                                      int beta, int8_t *tc0);
-void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha,
+void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
                                      int beta, int8_t *tc0);
-void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
+void ff_h264_v_loop_filter_luma_intra_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
+                                           int beta);
+void ff_h264_h_loop_filter_luma_intra_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
+                                           int beta);
+void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
                                        int beta, int8_t *tc0);
-void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
+void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
                                        int beta, int8_t *tc0);
+void ff_h264_v_loop_filter_chroma_intra_neon(uint8_t *pix, ptrdiff_t stride,
+                                             int alpha, int beta);
+void ff_h264_h_loop_filter_chroma_intra_neon(uint8_t *pix, ptrdiff_t stride,
+                                             int alpha, int beta);
+void ff_h264_h_loop_filter_chroma_mbaff_intra_neon(uint8_t *pix, ptrdiff_t stride,
+                                                   int alpha, int beta);
 
 void ff_weight_h264_pixels_16_neon(uint8_t *dst, ptrdiff_t stride, int height,
                                    int log2_den, int weight, int offset);
@@ -77,9 +87,16 @@ av_cold void ff_h264dsp_init_aarch64(H264DSPContext *c, const int bit_depth,
     if (have_neon(cpu_flags) && bit_depth == 8) {
         c->h264_v_loop_filter_luma   = ff_h264_v_loop_filter_luma_neon;
         c->h264_h_loop_filter_luma   = ff_h264_h_loop_filter_luma_neon;
+        c->h264_v_loop_filter_luma_intra= ff_h264_v_loop_filter_luma_intra_neon;
+        c->h264_h_loop_filter_luma_intra= ff_h264_h_loop_filter_luma_intra_neon;
+
         c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon;
-        if (chroma_format_idc <= 1)
-        c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon;
+        if (chroma_format_idc <= 1) {
+            c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon;
+            c->h264_h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma_intra_neon;
+            c->h264_h_loop_filter_chroma_mbaff_intra = ff_h264_h_loop_filter_chroma_mbaff_intra_neon;
+        }
+        c->h264_v_loop_filter_chroma_intra = ff_h264_v_loop_filter_chroma_intra_neon;
 
         c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16_neon;
         c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_8_neon;
diff --git a/libavcodec/aarch64/h264dsp_neon.S b/libavcodec/aarch64/h264dsp_neon.S
index 4ec35f290506e..80ac09d2be400 100644
--- a/libavcodec/aarch64/h264dsp_neon.S
+++ b/libavcodec/aarch64/h264dsp_neon.S
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
  * Copyright (c) 2013 Janne Grunau <janne-libav@jannau.net>
+ * Copyright (c) 2014 Janne Grunau <janne-libav@jannau.net>
  *
  * This file is part of FFmpeg.
  *
@@ -54,9 +55,12 @@
         uabd            v17.16B, v20.16B, v16.16B       // abs(p2 - p0)
         and             v21.16B, v21.16B, v28.16B
         uabd            v19.16B,  v4.16B,  v0.16B       // abs(q2 - q0)
+        and             v21.16B, v21.16B, v30.16B      // < beta
+        shrn            v30.8b,  v21.8h,  #4
+        mov             x7, v30.d[0]
         cmhi            v17.16B, v22.16B, v17.16B       // < beta
-        and             v21.16B, v21.16B, v30.16B
         cmhi            v19.16B, v22.16B, v19.16B       // < beta
+        cbz             x7,  9f
         and             v17.16B, v17.16B, v21.16B
         and             v19.16B, v19.16B, v21.16B
         and             v24.16B, v24.16B, v21.16B
@@ -124,12 +128,13 @@ function ff_h264_v_loop_filter_luma_neon, export=1
         st1             {v16.16B}, [x0], x1
         st1             {v0.16B},  [x0], x1
         st1             {v19.16B}, [x0]
-
+9:
         ret
 endfunc
 
 function ff_h264_h_loop_filter_luma_neon, export=1
         h264_loop_filter_start
+        sxtw            x1,  w1
 
         sub             x0,  x0,  #4
         ld1             {v6.8B},  [x0], x1
@@ -173,32 +178,231 @@ function ff_h264_h_loop_filter_luma_neon, export=1
         st1             {v16.S}[3], [x0], x1
         st1             {v0.S}[3],  [x0], x1
         st1             {v19.S}[3], [x0], x1
-
+9:
         ret
 endfunc
 
+
+.macro h264_loop_filter_start_intra
+    orr             w4,  w2,  w3
+    cbnz            w4,  1f
+    ret
+1:
+    sxtw            x1,  w1
+    dup             v30.16b, w2                // alpha
+    dup             v31.16b, w3                // beta
+.endm
+
+.macro h264_loop_filter_luma_intra
+    uabd            v16.16b, v7.16b,  v0.16b        // abs(p0 - q0)
+    uabd            v17.16b, v6.16b,  v7.16b        // abs(p1 - p0)
+    uabd            v18.16b, v1.16b,  v0.16b        // abs(q1 - q0)
+    cmhi            v19.16b, v30.16b, v16.16b       // < alpha
+    cmhi            v17.16b, v31.16b, v17.16b       // < beta
+    cmhi            v18.16b, v31.16b, v18.16b       // < beta
+
+    movi            v29.16b, #2
+    ushr            v30.16b, v30.16b, #2            // alpha >> 2
+    add             v30.16b, v30.16b, v29.16b       // (alpha >> 2) + 2
+    cmhi            v16.16b, v30.16b, v16.16b       // < (alpha >> 2) + 2
+
+    and             v19.16b, v19.16b, v17.16b
+    and             v19.16b, v19.16b, v18.16b
+    shrn            v20.8b,  v19.8h,  #4
+    mov             x4, v20.d[0]
+    cbz             x4, 9f
+
+    ushll           v20.8h,  v6.8b,   #1
+    ushll           v22.8h,  v1.8b,   #1
+    ushll2          v21.8h,  v6.16b,  #1
+    ushll2          v23.8h,  v1.16b,  #1
+    uaddw           v20.8h,  v20.8h,  v7.8b
+    uaddw           v22.8h,  v22.8h,  v0.8b
+    uaddw2          v21.8h,  v21.8h,  v7.16b
+    uaddw2          v23.8h,  v23.8h,  v0.16b
+    uaddw           v20.8h,  v20.8h,  v1.8b
+    uaddw           v22.8h,  v22.8h,  v6.8b
+    uaddw2          v21.8h,  v21.8h,  v1.16b
+    uaddw2          v23.8h,  v23.8h,  v6.16b
+
+    rshrn           v24.8b,  v20.8h,  #2 // p0'_1
+    rshrn           v25.8b,  v22.8h,  #2 // q0'_1
+    rshrn2          v24.16b, v21.8h,  #2 // p0'_1
+    rshrn2          v25.16b, v23.8h,  #2 // q0'_1
+
+    uabd            v17.16b, v5.16b,  v7.16b        // abs(p2 - p0)
+    uabd            v18.16b, v2.16b,  v0.16b        // abs(q2 - q0)
+    cmhi            v17.16b, v31.16b, v17.16b       // < beta
+    cmhi            v18.16b, v31.16b, v18.16b       // < beta
+
+    and             v17.16b, v16.16b, v17.16b  // if_2 && if_3
+    and             v18.16b, v16.16b, v18.16b  // if_2 && if_4
+
+    not             v30.16b, v17.16b
+    not             v31.16b, v18.16b
+
+    and             v30.16b, v30.16b, v19.16b  // if_1 && !(if_2 && if_3)
+    and             v31.16b, v31.16b, v19.16b  // if_1 && !(if_2 && if_4)
+
+    and             v17.16b, v19.16b, v17.16b  // if_1 && if_2 && if_3
+    and             v18.16b, v19.16b, v18.16b  // if_1 && if_2 && if_4
+
+    //calc            p, v7, v6, v5, v4, v17, v7, v6, v5, v4
+    uaddl           v26.8h,  v5.8b,   v7.8b
+    uaddl2          v27.8h,  v5.16b,  v7.16b
+    uaddw           v26.8h,  v26.8h,  v0.8b
+    uaddw2          v27.8h,  v27.8h,  v0.16b
+    add             v20.8h,  v20.8h,  v26.8h
+    add             v21.8h,  v21.8h,  v27.8h
+    uaddw           v20.8h,  v20.8h,  v0.8b
+    uaddw2          v21.8h,  v21.8h,  v0.16b
+    rshrn           v20.8b,  v20.8h,  #3 // p0'_2
+    rshrn2          v20.16b, v21.8h,  #3 // p0'_2
+    uaddw           v26.8h,  v26.8h,  v6.8b
+    uaddw2          v27.8h,  v27.8h,  v6.16b
+    rshrn           v21.8b,  v26.8h,  #2 // p1'_2
+    rshrn2          v21.16b, v27.8h,  #2 // p1'_2
+    uaddl           v28.8h,  v4.8b,   v5.8b
+    uaddl2          v29.8h,  v4.16b,  v5.16b
+    shl             v28.8h,  v28.8h,  #1
+    shl             v29.8h,  v29.8h,  #1
+    add             v28.8h,  v28.8h,  v26.8h
+    add             v29.8h,  v29.8h,  v27.8h
+    rshrn           v19.8b,  v28.8h,  #3 // p2'_2
+    rshrn2          v19.16b, v29.8h,  #3 // p2'_2
+
+    //calc            q, v0, v1, v2, v3, v18, v0, v1, v2, v3
+    uaddl           v26.8h,  v2.8b,   v0.8b
+    uaddl2          v27.8h,  v2.16b,  v0.16b
+    uaddw           v26.8h,  v26.8h,  v7.8b
+    uaddw2          v27.8h,  v27.8h,  v7.16b
+    add             v22.8h,  v22.8h,  v26.8h
+    add             v23.8h,  v23.8h,  v27.8h
+    uaddw           v22.8h,  v22.8h,  v7.8b
+    uaddw2          v23.8h,  v23.8h,  v7.16b
+    rshrn           v22.8b,  v22.8h,  #3 // q0'_2
+    rshrn2          v22.16b, v23.8h,  #3 // q0'_2
+    uaddw           v26.8h,  v26.8h,  v1.8b
+    uaddw2          v27.8h,  v27.8h,  v1.16b
+    rshrn           v23.8b,  v26.8h,  #2 // q1'_2
+    rshrn2          v23.16b, v27.8h,  #2 // q1'_2
+    uaddl           v28.8h,  v2.8b,   v3.8b
+    uaddl2          v29.8h,  v2.16b,  v3.16b
+    shl             v28.8h,  v28.8h,  #1
+    shl             v29.8h,  v29.8h,  #1
+    add             v28.8h,  v28.8h,  v26.8h
+    add             v29.8h,  v29.8h,  v27.8h
+    rshrn           v26.8b,  v28.8h,  #3 // q2'_2
+    rshrn2          v26.16b, v29.8h,  #3 // q2'_2
+
+    bit             v7.16b,  v24.16b, v30.16b  // p0'_1
+    bit             v0.16b,  v25.16b, v31.16b  // q0'_1
+    bit             v7.16b, v20.16b,  v17.16b  // p0'_2
+    bit             v6.16b, v21.16b,  v17.16b  // p1'_2
+    bit             v5.16b, v19.16b,  v17.16b  // p2'_2
+    bit             v0.16b, v22.16b,  v18.16b  // q0'_2
+    bit             v1.16b, v23.16b,  v18.16b  // q1'_2
+    bit             v2.16b, v26.16b,  v18.16b  // q2'_2
+.endm
+
+function ff_h264_v_loop_filter_luma_intra_neon, export=1
+    h264_loop_filter_start_intra
+
+    ld1             {v0.16b},  [x0], x1 // q0
+    ld1             {v1.16b},  [x0], x1 // q1
+    ld1             {v2.16b},  [x0], x1 // q2
+    ld1             {v3.16b},  [x0], x1 // q3
+    sub             x0,  x0,  x1, lsl #3
+    ld1             {v4.16b},  [x0], x1 // p3
+    ld1             {v5.16b},  [x0], x1 // p2
+    ld1             {v6.16b},  [x0], x1 // p1
+    ld1             {v7.16b},  [x0]     // p0
+
+    h264_loop_filter_luma_intra
+
+    sub             x0,  x0,  x1, lsl #1
+    st1             {v5.16b}, [x0], x1  // p2
+    st1             {v6.16b}, [x0], x1  // p1
+    st1             {v7.16b}, [x0], x1  // p0
+    st1             {v0.16b}, [x0], x1  // q0
+    st1             {v1.16b}, [x0], x1  // q1
+    st1             {v2.16b}, [x0]      // q2
+9:
+    ret
+endfunc
+
+function ff_h264_h_loop_filter_luma_intra_neon, export=1
+    h264_loop_filter_start_intra
+
+    sub             x0,  x0,  #4
+    ld1             {v4.8b},  [x0], x1
+    ld1             {v5.8b},  [x0], x1
+    ld1             {v6.8b},  [x0], x1
+    ld1             {v7.8b},  [x0], x1
+    ld1             {v0.8b},  [x0], x1
+    ld1             {v1.8b},  [x0], x1
+    ld1             {v2.8b},  [x0], x1
+    ld1             {v3.8b},  [x0], x1
+    ld1             {v4.d}[1],  [x0], x1
+    ld1             {v5.d}[1],  [x0], x1
+    ld1             {v6.d}[1],  [x0], x1
+    ld1             {v7.d}[1],  [x0], x1
+    ld1             {v0.d}[1],  [x0], x1
+    ld1             {v1.d}[1],  [x0], x1
+    ld1             {v2.d}[1],  [x0], x1
+    ld1             {v3.d}[1],  [x0], x1
+
+    transpose_8x16B v4, v5, v6, v7, v0, v1, v2, v3, v21, v23
+
+    h264_loop_filter_luma_intra
+
+    transpose_8x16B v4, v5, v6, v7, v0, v1, v2, v3, v21, v23
+
+    sub             x0,  x0,  x1, lsl #4
+    st1             {v4.8b},  [x0], x1
+    st1             {v5.8b},  [x0], x1
+    st1             {v6.8b},  [x0], x1
+    st1             {v7.8b},  [x0], x1
+    st1             {v0.8b},  [x0], x1
+    st1             {v1.8b},  [x0], x1
+    st1             {v2.8b},  [x0], x1
+    st1             {v3.8b},  [x0], x1
+    st1             {v4.d}[1],  [x0], x1
+    st1             {v5.d}[1],  [x0], x1
+    st1             {v6.d}[1],  [x0], x1
+    st1             {v7.d}[1],  [x0], x1
+    st1             {v0.d}[1],  [x0], x1
+    st1             {v1.d}[1],  [x0], x1
+    st1             {v2.d}[1],  [x0], x1
+    st1             {v3.d}[1],  [x0], x1
+9:
+    ret
+endfunc
+
 .macro  h264_loop_filter_chroma
         dup             v22.8B, w2              // alpha
+        dup             v23.8B, w3              // beta
         uxtl            v24.8H, v24.8B
         uabd            v26.8B, v16.8B, v0.8B   // abs(p0 - q0)
-        uxtl            v4.8H,  v0.8B
         uabd            v28.8B, v18.8B, v16.8B  // abs(p1 - p0)
+        uabd            v30.8B, v2.8B,  v0.8B   // abs(q1 - q0)
+        cmhi            v26.8B, v22.8B, v26.8B  // < alpha
+        cmhi            v28.8B, v23.8B, v28.8B  // < beta
+        cmhi            v30.8B, v23.8B, v30.8B  // < beta
+        uxtl            v4.8H,  v0.8B
+        and             v26.8B, v26.8B, v28.8B
         usubw           v4.8H,  v4.8H,  v16.8B
-        sli             v24.8H, v24.8H, #8
+        and             v26.8B, v26.8B, v30.8B
         shl             v4.8H,  v4.8H,  #2
-        uabd            v30.8B, v2.8B,  v0.8B   // abs(q1 - q0)
+        mov             x2,  v26.d[0]
+        sli             v24.8H, v24.8H, #8
         uaddw           v4.8H,  v4.8H,  v18.8B
-        cmhi            v26.8B, v22.8B, v26.8B  // < alpha
+        cbz             x2,  9f
         usubw           v4.8H,  v4.8H,  v2.8B
-        dup             v22.8B, w3              // beta
         rshrn           v4.8B,  v4.8H,  #3
-        cmhi            v28.8B, v22.8B, v28.8B  // < beta
-        cmhi            v30.8B, v22.8B, v30.8B  // < beta
         smin            v4.8B,  v4.8B,  v24.8B
         neg             v25.8B, v24.8B
-        and             v26.8B, v26.8B, v28.8B
         smax            v4.8B,  v4.8B,  v25.8B
-        and             v26.8B, v26.8B, v30.8B
         uxtl            v22.8H, v0.8B
         and             v4.8B,  v4.8B,  v26.8B
         uxtl            v28.8H, v16.8B
@@ -210,6 +414,7 @@ endfunc
 
 function ff_h264_v_loop_filter_chroma_neon, export=1
         h264_loop_filter_start
+        sxtw            x1,  w1
 
         sub             x0,  x0,  x1, lsl #1
         ld1             {v18.8B}, [x0], x1
@@ -222,12 +427,13 @@ function ff_h264_v_loop_filter_chroma_neon, export=1
         sub             x0,  x0,  x1, lsl #1
         st1             {v16.8B}, [x0], x1
         st1             {v0.8B},  [x0], x1
-
+9:
         ret
 endfunc
 
 function ff_h264_h_loop_filter_chroma_neon, export=1
         h264_loop_filter_start
+        sxtw            x1,  w1
 
         sub             x0,  x0,  #2
         ld1             {v18.S}[0], [x0], x1
@@ -254,10 +460,109 @@ function ff_h264_h_loop_filter_chroma_neon, export=1
         st1             {v16.S}[1], [x0], x1
         st1             {v0.S}[1],  [x0], x1
         st1             {v2.S}[1],  [x0], x1
-
+9:
         ret
 endfunc
 
+
+.macro h264_loop_filter_chroma_intra
+    uabd            v26.8b, v16.8b, v17.8b  // abs(p0 - q0)
+    uabd            v27.8b, v18.8b, v16.8b  // abs(p1 - p0)
+    uabd            v28.8b, v19.8b, v17.8b  // abs(q1 - q0)
+    cmhi            v26.8b, v30.8b, v26.8b  // < alpha
+    cmhi            v27.8b, v31.8b, v27.8b  // < beta
+    cmhi            v28.8b, v31.8b, v28.8b  // < beta
+    and             v26.8b, v26.8b, v27.8b
+    and             v26.8b, v26.8b, v28.8b
+    mov             x2, v26.d[0]
+
+    ushll           v4.8h,   v18.8b,  #1
+    ushll           v6.8h,   v19.8b,  #1
+    cbz             x2, 9f
+    uaddl           v20.8h,  v16.8b,  v19.8b
+    uaddl           v22.8h,  v17.8b,  v18.8b
+    add             v20.8h,  v20.8h,  v4.8h
+    add             v22.8h,  v22.8h,  v6.8h
+    uqrshrn         v24.8b,  v20.8h,  #2
+    uqrshrn         v25.8b,  v22.8h,  #2
+    bit             v16.8b, v24.8b, v26.8b
+    bit             v17.8b, v25.8b, v26.8b
+.endm
+
+function ff_h264_v_loop_filter_chroma_intra_neon, export=1
+    h264_loop_filter_start_intra
+
+    sub             x0,  x0,  x1, lsl #1
+    ld1             {v18.8b}, [x0], x1
+    ld1             {v16.8b}, [x0], x1
+    ld1             {v17.8b}, [x0], x1
+    ld1             {v19.8b}, [x0]
+
+    h264_loop_filter_chroma_intra
+
+    sub             x0,  x0,  x1, lsl #1
+    st1             {v16.8b}, [x0], x1
+    st1             {v17.8b}, [x0], x1
+
+9:
+    ret
+endfunc
+
+function ff_h264_h_loop_filter_chroma_mbaff_intra_neon, export=1
+    h264_loop_filter_start_intra
+
+    sub             x4,  x0,  #2
+    sub             x0,  x0,  #1
+    ld1             {v18.8b}, [x4], x1
+    ld1             {v16.8b}, [x4], x1
+    ld1             {v17.8b}, [x4], x1
+    ld1             {v19.8b}, [x4], x1
+
+    transpose_4x8B v18, v16, v17, v19, v26, v27, v28, v29
+
+    h264_loop_filter_chroma_intra
+
+    st2             {v16.b,v17.b}[0], [x0], x1
+    st2             {v16.b,v17.b}[1], [x0], x1
+    st2             {v16.b,v17.b}[2], [x0], x1
+    st2             {v16.b,v17.b}[3], [x0], x1
+
+9:
+    ret
+endfunc
+
+function ff_h264_h_loop_filter_chroma_intra_neon, export=1
+    h264_loop_filter_start_intra
+
+    sub             x4,  x0,  #2
+    sub             x0,  x0,  #1
+    ld1             {v18.8b}, [x4], x1
+    ld1             {v16.8b}, [x4], x1
+    ld1             {v17.8b}, [x4], x1
+    ld1             {v19.8b}, [x4], x1
+    ld1             {v18.s}[1], [x4], x1
+    ld1             {v16.s}[1], [x4], x1
+    ld1             {v17.s}[1], [x4], x1
+    ld1             {v19.s}[1], [x4]
+
+    transpose_4x8B v18, v16, v17, v19, v26, v27, v28, v29
+
+    h264_loop_filter_chroma_intra
+
+    st2             {v16.b,v17.b}[0], [x0], x1
+    st2             {v16.b,v17.b}[1], [x0], x1
+    st2             {v16.b,v17.b}[2], [x0], x1
+    st2             {v16.b,v17.b}[3], [x0], x1
+    st2             {v16.b,v17.b}[4], [x0], x1
+    st2             {v16.b,v17.b}[5], [x0], x1
+    st2             {v16.b,v17.b}[6], [x0], x1
+    st2             {v16.b,v17.b}[7], [x0], x1
+
+9:
+    ret
+endfunc
+
+
 .macro  biweight_16     macs, macd
         dup             v0.16B,  w5
         dup             v1.16B,  w6
diff --git a/libavcodec/aarch64/h264idct_neon.S b/libavcodec/aarch64/h264idct_neon.S
index 825ec49f8c6d9..7de44205d39a4 100644
--- a/libavcodec/aarch64/h264idct_neon.S
+++ b/libavcodec/aarch64/h264idct_neon.S
@@ -23,6 +23,7 @@
 #include "neon.S"
 
 function ff_h264_idct_add_neon, export=1
+.L_ff_h264_idct_add_neon:
         ld1             {v0.4H, v1.4H, v2.4H, v3.4H},  [x1]
         sxtw            x2,     w2
         movi            v30.8H, #0
@@ -77,6 +78,7 @@ function ff_h264_idct_add_neon, export=1
 endfunc
 
 function ff_h264_idct_dc_add_neon, export=1
+.L_ff_h264_idct_dc_add_neon:
         sxtw            x2,  w2
         mov             w3,       #0
         ld1r            {v2.8H},  [x1]
@@ -106,8 +108,8 @@ function ff_h264_idct_add16_neon, export=1
         mov             w9,  w3         // stride
         movrel          x7,  scan8
         mov             x10, #16
-        movrel          x13, X(ff_h264_idct_dc_add_neon)
-        movrel          x14, X(ff_h264_idct_add_neon)
+        movrel          x13, .L_ff_h264_idct_dc_add_neon
+        movrel          x14, .L_ff_h264_idct_add_neon
 1:      mov             w2,  w9
         ldrb            w3,  [x7], #1
         ldrsw           x0,  [x5], #4
@@ -133,8 +135,8 @@ function ff_h264_idct_add16intra_neon, export=1
         mov             w9,  w3         // stride
         movrel          x7,  scan8
         mov             x10, #16
-        movrel          x13, X(ff_h264_idct_dc_add_neon)
-        movrel          x14, X(ff_h264_idct_add_neon)
+        movrel          x13, .L_ff_h264_idct_dc_add_neon
+        movrel          x14, .L_ff_h264_idct_add_neon
 1:      mov             w2,  w9
         ldrb            w3,  [x7], #1
         ldrsw           x0,  [x5], #4
@@ -160,8 +162,8 @@ function ff_h264_idct_add8_neon, export=1
         add             x5,  x1,  #16*4         // block_offset
         add             x9,  x2,  #16*32        // block
         mov             w19, w3                 // stride
-        movrel          x13, X(ff_h264_idct_dc_add_neon)
-        movrel          x14, X(ff_h264_idct_add_neon)
+        movrel          x13, .L_ff_h264_idct_dc_add_neon
+        movrel          x14, .L_ff_h264_idct_add_neon
         movrel          x7,  scan8, 16
         mov             x10, #0
         mov             x11, #16
@@ -263,6 +265,7 @@ endfunc
 .endm
 
 function ff_h264_idct8_add_neon, export=1
+.L_ff_h264_idct8_add_neon:
         movi            v19.8H,   #0
         sxtw            x2,       w2
         ld1             {v24.8H, v25.8H}, [x1]
@@ -326,6 +329,7 @@ function ff_h264_idct8_add_neon, export=1
 endfunc
 
 function ff_h264_idct8_dc_add_neon, export=1
+.L_ff_h264_idct8_dc_add_neon:
         mov             w3,       #0
         sxtw            x2,       w2
         ld1r            {v31.8H}, [x1]
@@ -375,8 +379,8 @@ function ff_h264_idct8_add4_neon, export=1
         mov             w2,  w3
         movrel          x7,  scan8
         mov             w10, #16
-        movrel          x13, X(ff_h264_idct8_dc_add_neon)
-        movrel          x14, X(ff_h264_idct8_add_neon)
+        movrel          x13, .L_ff_h264_idct8_dc_add_neon
+        movrel          x14, .L_ff_h264_idct8_add_neon
 1:      ldrb            w9,  [x7], #4
         ldrsw           x0,  [x5], #16
         ldrb            w9,  [x4, w9, UXTW]
diff --git a/libavcodec/aarch64/vp8dsp.h b/libavcodec/aarch64/vp8dsp.h
new file mode 100644
index 0000000000000..ea7665dcc8cad
--- /dev/null
+++ b/libavcodec/aarch64/vp8dsp.h
@@ -0,0 +1,70 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AARCH64_VP8DSP_H
+#define AVCODEC_AARCH64_VP8DSP_H
+
+#include "libavcodec/vp8dsp.h"
+
+#define VP8_LF_Y(hv, inner, opt)                                             \
+    void ff_vp8_##hv##_loop_filter16##inner##_##opt(uint8_t *dst,            \
+                                                    ptrdiff_t stride,        \
+                                                    int flim_E, int flim_I,  \
+                                                    int hev_thresh)
+
+#define VP8_LF_UV(hv, inner, opt)                                            \
+    void ff_vp8_##hv##_loop_filter8uv##inner##_##opt(uint8_t *dstU,          \
+                                                     uint8_t *dstV,          \
+                                                     ptrdiff_t stride,       \
+                                                     int flim_E, int flim_I, \
+                                                     int hev_thresh)
+
+#define VP8_LF_SIMPLE(hv, opt)                                          \
+    void ff_vp8_##hv##_loop_filter16_simple_##opt(uint8_t *dst,         \
+                                                  ptrdiff_t stride,     \
+                                                  int flim)
+
+#define VP8_LF_HV(inner, opt)                   \
+    VP8_LF_Y(h,  inner, opt);                   \
+    VP8_LF_Y(v,  inner, opt);                   \
+    VP8_LF_UV(h, inner, opt);                   \
+    VP8_LF_UV(v, inner, opt)
+
+#define VP8_LF(opt)                             \
+    VP8_LF_HV(,       opt);                     \
+    VP8_LF_HV(_inner, opt);                     \
+    VP8_LF_SIMPLE(h, opt);                      \
+    VP8_LF_SIMPLE(v, opt)
+
+#define VP8_MC(n, opt)                                                  \
+    void ff_put_vp8_##n##_##opt(uint8_t *dst, ptrdiff_t dststride,      \
+                                uint8_t *src, ptrdiff_t srcstride,      \
+                                int h, int x, int y)
+
+#define VP8_EPEL(w, opt)                        \
+    VP8_MC(pixels ## w, opt);                   \
+    VP8_MC(epel ## w ## _h4, opt);              \
+    VP8_MC(epel ## w ## _h6, opt);              \
+    VP8_MC(epel ## w ## _v4, opt);              \
+    VP8_MC(epel ## w ## _h4v4, opt);            \
+    VP8_MC(epel ## w ## _h6v4, opt);            \
+    VP8_MC(epel ## w ## _v6, opt);              \
+    VP8_MC(epel ## w ## _h4v6, opt);            \
+    VP8_MC(epel ## w ## _h6v6, opt)
+
+#endif /* AVCODEC_AARCH64_VP8DSP_H */
diff --git a/libavcodec/aarch64/vp8dsp_init_aarch64.c b/libavcodec/aarch64/vp8dsp_init_aarch64.c
new file mode 100644
index 0000000000000..dbc07408a0503
--- /dev/null
+++ b/libavcodec/aarch64/vp8dsp_init_aarch64.c
@@ -0,0 +1,77 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/aarch64/cpu.h"
+#include "libavcodec/vp8dsp.h"
+#include "vp8dsp.h"
+
+void ff_vp8_luma_dc_wht_neon(int16_t block[4][4][16], int16_t dc[16]);
+
+void ff_vp8_idct_add_neon(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
+void ff_vp8_idct_dc_add_neon(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
+void ff_vp8_idct_dc_add4y_neon(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride);
+
+VP8_LF(neon);
+
+VP8_EPEL(16, neon);
+VP8_EPEL(8,  neon);
+
+
+av_cold void ff_vp78dsp_init_aarch64(VP8DSPContext *dsp)
+{
+    if (!have_neon(av_get_cpu_flags())) {
+        return;
+    }
+    dsp->put_vp8_epel_pixels_tab[0][0][0] = ff_put_vp8_pixels16_neon;
+    dsp->put_vp8_epel_pixels_tab[0][0][2] = ff_put_vp8_epel16_h6_neon;
+    dsp->put_vp8_epel_pixels_tab[0][2][0] = ff_put_vp8_epel16_v6_neon;
+    dsp->put_vp8_epel_pixels_tab[0][2][2] = ff_put_vp8_epel16_h6v6_neon;
+
+    dsp->put_vp8_epel_pixels_tab[1][0][0] = ff_put_vp8_pixels8_neon;
+    dsp->put_vp8_epel_pixels_tab[1][2][2] = ff_put_vp8_epel8_h6v6_neon;
+    dsp->put_vp8_epel_pixels_tab[1][2][1] = ff_put_vp8_epel8_h4v6_neon;
+    dsp->put_vp8_epel_pixels_tab[1][1][2] = ff_put_vp8_epel8_h6v4_neon;
+    dsp->put_vp8_epel_pixels_tab[1][1][1] = ff_put_vp8_epel8_h4v4_neon;
+}
+
+av_cold void ff_vp8dsp_init_aarch64(VP8DSPContext *dsp)
+{
+    if (!have_neon(av_get_cpu_flags())) {
+        return;
+    }
+
+    dsp->vp8_idct_add       = ff_vp8_idct_add_neon;
+    dsp->vp8_idct_dc_add    = ff_vp8_idct_dc_add_neon;
+    dsp->vp8_idct_dc_add4y  = ff_vp8_idct_dc_add4y_neon;
+
+    dsp->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16_neon;
+    dsp->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16_neon;
+    dsp->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_neon;
+    dsp->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_neon;
+
+    dsp->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16_inner_neon;
+    dsp->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16_inner_neon;
+    dsp->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_neon;
+    dsp->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_neon;
+
+    dsp->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter16_simple_neon;
+    dsp->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter16_simple_neon;
+}
diff --git a/libavcodec/aarch64/vp8dsp_neon.S b/libavcodec/aarch64/vp8dsp_neon.S
new file mode 100644
index 0000000000000..0ce9e301de937
--- /dev/null
+++ b/libavcodec/aarch64/vp8dsp_neon.S
@@ -0,0 +1,1031 @@
+/*
+ * VP8 NEON optimisations
+ *
+ * Copyright (c) 2010 Rob Clark <rob@ti.com>
+ * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
+ * Copyright (c) 2018 Magnus Röös <mla2.roos@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/aarch64/asm.S"
+#include "neon.S"
+
+function ff_vp8_idct_add_neon, export=1
+        ld1             {v0.8b - v3.8b},  [x1]
+        mov             w4,  #20091
+        movk            w4,  #35468/2, lsl #16
+        dup             v4.2s, w4
+
+        smull           v26.4s, v1.4h,  v4.h[0]
+        smull           v27.4s, v3.4h,  v4.h[0]
+        sqdmulh         v20.4h, v1.4h,  v4.h[1]
+        sqdmulh         v23.4h, v3.4h,  v4.h[1]
+        sqshrn          v21.4h, v26.4s, #16
+        sqshrn          v22.4h, v27.4s, #16
+        add             v21.4h, v21.4h, v1.4h
+        add             v22.4h, v22.4h, v3.4h
+
+        add             v16.4h,  v0.4h,   v2.4h
+        sub             v17.4h,  v0.4h,   v2.4h
+
+        add             v18.4h,  v21.4h,  v23.4h
+        sub             v19.4h,  v20.4h,  v22.4h
+
+        add             v0.4h,   v16.4h,  v18.4h
+        add             v1.4h,   v17.4h,  v19.4h
+        sub             v3.4h,   v16.4h,  v18.4h
+        sub             v2.4h,   v17.4h,  v19.4h
+
+        transpose_4x4H  v0, v1, v2, v3, v24, v5, v6, v7
+
+        movi            v29.8h, #0
+        smull           v26.4s,     v1.4h,  v4.h[0]
+        st1             {v29.8h},   [x1],   #16
+        smull           v27.4s,     v3.4h,  v4.h[0]
+        st1             {v29.16b},  [x1]
+        sqdmulh         v21.4h,     v1.4h,  v4.h[1]
+        sqdmulh         v23.4h,     v3.4h,  v4.h[1]
+        sqshrn          v20.4h,     v26.4s, #16
+        sqshrn          v22.4h,     v27.4s, #16
+        add             v20.4h,     v20.4h, v1.4h
+        add             v22.4h,     v22.4h, v3.4h
+        add             v16.4h,     v0.4h,  v2.4h
+        sub             v17.4h,     v0.4h,  v2.4h
+
+        add             v18.4h,     v20.4h, v23.4h
+        ld1             {v24.d}[0], [x0],   x2
+        zip1            v16.2d,     v16.2d, v17.2d
+        sub             v19.4h,     v21.4h, v22.4h
+        ld1             {v25.d}[0], [x0],   x2
+        zip1            v18.2d,     v18.2d, v19.2d
+        add             v0.8h,      v16.8h, v18.8h
+        ld1             {v25.d}[1], [x0],   x2
+        sub             v1.8h,      v16.8h, v18.8h
+        ld1             {v24.d}[1], [x0],   x2
+        srshr           v0.8h,      v0.8h,  #3
+        trn1            v24.4s,     v24.4s, v25.4s
+        srshr           v1.8h,      v1.8h,  #3
+        sub             x0,  x0,  x2,  lsl #2
+
+        ext             v1.16b, v1.16b, v1.16b, #8
+        trn1            v3.2d,  v0.2d,  v1.2d
+        trn2            v0.2d,  v0.2d,  v1.2d
+        trn1            v1.8h,  v3.8h,  v0.8h
+        trn2            v3.8h,  v3.8h,  v0.8h
+        uzp1            v0.4s,  v1.4s,  v3.4s
+        uzp2            v1.4s,  v3.4s,  v1.4s
+
+        uaddw           v0.8h,  v0.8h, v24.8b
+        uaddw2          v1.8h,  v1.8h, v24.16b
+        sqxtun          v0.8b,  v0.8h
+        sqxtun2         v0.16b, v1.8h
+        st1             {v0.s}[0],  [x0], x2
+        st1             {v0.s}[1],  [x0], x2
+        st1             {v0.s}[3],  [x0], x2
+        st1             {v0.s}[2],  [x0], x2
+
+        ret
+endfunc
+
+function ff_vp8_idct_dc_add4y_neon, export=1
+        movi            v0.16b,  #0
+        mov             x3,  #32
+        ld1r            {v16.4h},    [x1]
+        st1             {v0.h}[0],   [x1], x3
+        ld1r            {v17.4h},    [x1]
+        st1             {v0.h}[0],   [x1], x3
+        zip1            v16.2d,      v16.2d, v17.2d
+        ld1r            {v18.4h},    [x1]
+        st1             {v0.h}[0],   [x1], x3
+        ld1r            {v19.4h},    [x1]
+        st1             {v0.h}[0],   [x1], x3
+        zip1            v18.2d,      v18.2d, v19.2d
+        srshr           v16.8h,      v16.8h,  #3            // dc >>= 3
+        ld1             {v0.16b},     [x0], x2
+        srshr           v18.8h,       v18.8h,  #3
+        ld1             {v1.16b},     [x0], x2
+        uaddw           v20.8h,       v16.8h,  v0.8b
+        ld1             {v2.16b},     [x0], x2
+        uaddw2          v0.8h,        v18.8h,   v0.16b
+        ld1             {v3.16b},     [x0], x2
+        uaddw           v21.8h, v16.8h,  v1.8b
+        uaddw2          v1.8h,  v18.8h,  v1.16b
+        uaddw           v22.8h, v16.8h,  v2.8b
+        uaddw2          v2.8h,  v18.8h,  v2.16b
+        uaddw           v23.8h, v16.8h,  v3.8b
+        uaddw2          v3.8h,  v18.8h,  v3.16b
+        sub             x0,  x0,  x2,  lsl #2
+        sqxtun          v20.8b,  v20.8h
+        sqxtun2         v20.16b, v0.8h
+        sqxtun          v21.8b,  v21.8h
+        sqxtun2         v21.16b, v1.8h
+        sqxtun          v22.8b,  v22.8h
+        st1             {v20.16b},    [x0], x2
+        sqxtun2         v22.16b, v2.8h
+        st1             {v21.16b},    [x0], x2
+        sqxtun          v23.8b,  v23.8h
+        st1             {v22.16b},    [x0], x2
+        sqxtun2         v23.16b, v3.8h
+        st1             {v23.16b},    [x0], x2
+
+        ret
+endfunc
+
+function ff_vp8_idct_dc_add_neon, export=1
+        mov             w3,       #0
+        ld1r            {v2.8h},  [x1]
+        strh            w3,       [x1]
+        srshr           v2.8h,  v2.8h,  #3
+        ld1             {v0.s}[0],  [x0], x2
+        ld1             {v0.s}[1],  [x0], x2
+        uaddw           v3.8h,  v2.8h,  v0.8b
+        ld1             {v1.s}[0],  [x0], x2
+        ld1             {v1.s}[1],  [x0], x2
+        uaddw           v4.8h,  v2.8h,  v1.8b
+        sqxtun          v0.8b,  v3.8h
+        sqxtun          v1.8b,  v4.8h
+        sub             x0,  x0,  x2, lsl #2
+        st1             {v0.s}[0],  [x0], x2
+        st1             {v0.s}[1],  [x0], x2
+        st1             {v1.s}[0],  [x0], x2
+        st1             {v1.s}[1],  [x0], x2
+        ret
+endfunc
+
+// Register layout:
+//   P3..Q3 -> v0..v7
+//   flim_E -> v22
+//   flim_I -> v23
+//   hev_thresh -> x5
+//
+.macro  vp8_loop_filter, inner=0, simple=0, hev_thresh
+    .if \simple
+        uabd            v17.16b, v3.16b,  v4.16b      // abs(P0-Q0)
+        uabd            v23.16b, v2.16b,  v5.16b      // abs(P1-Q1)
+        uqadd           v17.16b, v17.16b, v17.16b     // abs(P0-Q0) * 2
+        ushr            v18.16b, v23.16b, #1          // abs(P1-Q1) / 2
+        uqadd           v19.16b, v17.16b,  v18.16b    // (abs(P0-Q0)*2) + (abs(P1-Q1)/2)
+        movi            v21.16b, #0x80
+        cmhs            v16.16b, v22.16b, v19.16b    // (abs(P0-Q0)*2) + (abs(P1-Q1)/2) <= flim
+    .else
+        // calculate hev and normal_limit:
+        uabd            v20.16b, v2.16b,  v3.16b      // abs(P1-P0)
+        uabd            v21.16b, v5.16b,  v4.16b      // abs(Q1-Q0)
+        uabd            v18.16b, v0.16b,  v1.16b      // abs(P3-P2)
+        uabd            v19.16b, v1.16b,  v2.16b      // abs(P2-P1)
+        cmhs            v16.16b, v23.16b, v20.16b     // abs(P1-P0) <= flim_I
+        cmhs            v17.16b, v23.16b, v21.16b     // abs(Q1-Q0) <= flim_I
+        cmhs            v18.16b, v23.16b, v18.16b     // abs(P3-P2) <= flim_I
+        cmhs            v19.16b, v23.16b, v19.16b     // abs(P2-P1) <= flim_I
+        and             v16.16b, v17.16b, v16.16b
+        uabd            v17.16b, v7.16b,  v6.16b      // abs(Q3-Q2)
+        and             v16.16b, v16.16b, v19.16b
+        uabd            v19.16b, v6.16b,  v5.16b      // abs(Q2-Q1)
+        and             v16.16b, v16.16b, v18.16b
+        cmhs            v18.16b, v23.16b, v17.16b     // abs(Q3-Q2) <= flim_I
+        cmhs            v19.16b, v23.16b, v19.16b     // abs(Q2-Q1) <= flim_I
+        uabd            v17.16b, v3.16b,  v4.16b      // abs(P0-Q0)
+        uabd            v23.16b, v2.16b,  v5.16b      // abs(P1-Q1)
+        and             v16.16b, v16.16b, v18.16b
+        uqadd           v17.16b, v17.16b, v17.16b     // abs(P0-Q0) * 2
+        and             v16.16b, v16.16b, v19.16b
+        ushr            v18.16b, v23.16b, #1          // abs(P1-Q1) / 2
+        dup             v23.16b, \hev_thresh          // hev_thresh
+        uqadd           v19.16b, v17.16b, v18.16b     // (abs(P0-Q0)*2) + (abs(P1-Q1)/2)
+        cmhi            v20.16b, v20.16b, v23.16b     // abs(P1-P0) > hev_thresh
+        cmhs            v19.16b, v22.16b, v19.16b     // (abs(P0-Q0)*2) + (abs(P1-Q1)/2) <= flim_E
+        cmhi            v22.16b, v21.16b, v23.16b     // abs(Q1-Q0) > hev_thresh
+        and             v16.16b, v16.16b, v19.16b
+        movi            v21.16b, #0x80
+        orr             v17.16b, v20.16b, v22.16b
+    .endif
+
+        // at this point:
+        //   v16: normal_limit
+        //   v17: hev
+
+        // convert to signed value:
+        eor            v3.16b, v3.16b, v21.16b           // PS0 = P0 ^ 0x80
+        eor            v4.16b, v4.16b, v21.16b           // QS0 = Q0 ^ 0x80
+
+        movi           v20.8h, #3
+        ssubl          v18.8h, v4.8b,  v3.8b             // QS0 - PS0
+        ssubl2         v19.8h, v4.16b, v3.16b            //   (widened to 16bit)
+        eor            v2.16b, v2.16b, v21.16b           // PS1 = P1 ^ 0x80
+        eor            v5.16b, v5.16b, v21.16b           // QS1 = Q1 ^ 0x80
+        mul            v18.8h, v18.8h, v20.8h            // w = 3 * (QS0 - PS0)
+        mul            v19.8h, v19.8h, v20.8h
+
+        sqsub          v20.16b, v2.16b, v5.16b           // clamp(PS1-QS1)
+        movi           v22.16b, #4
+        movi           v23.16b, #3
+    .if \inner
+        and            v20.16b, v20.16b, v17.16b         // if(hev) w += clamp(PS1-QS1)
+    .endif
+        saddw          v18.8h,  v18.8h, v20.8b           // w += clamp(PS1-QS1)
+        saddw2         v19.8h,  v19.8h, v20.16b
+        sqxtn          v18.8b,  v18.8h                   // narrow result back into v18
+        sqxtn2         v18.16b, v19.8h
+    .if !\inner && !\simple
+        eor            v1.16b,  v1.16b,  v21.16b         // PS2 = P2 ^ 0x80
+        eor            v6.16b,  v6.16b,  v21.16b         // QS2 = Q2 ^ 0x80
+    .endif
+        and            v18.16b, v18.16b, v16.16b         // w &= normal_limit
+
+        // registers used at this point..
+        //   v0 -> P3  (don't corrupt)
+        //   v1-v6 -> PS2-QS2
+        //   v7 -> Q3  (don't corrupt)
+        //   v17 -> hev
+        //   v18 -> w
+        //   v21 -> #0x80
+        //   v22 -> #4
+        //   v23 -> #3
+        //   v16, v19, v29 -> unused
+        //
+        // filter_common:   is4tap==1
+        //   c1 = clamp(w + 4) >> 3;
+        //   c2 = clamp(w + 3) >> 3;
+        //   Q0 = s2u(QS0 - c1);
+        //   P0 = s2u(PS0 + c2);
+
+    .if \simple
+        sqadd          v19.16b, v18.16b, v22.16b           // c1 = clamp((w&hev)+4)
+        sqadd          v20.16b, v18.16b, v23.16b           // c2 = clamp((w&hev)+3)
+        sshr           v19.16b, v19.16b, #3                // c1 >>= 3
+        sshr           v20.16b, v20.16b, #3                // c2 >>= 3
+        sqsub          v4.16b,  v4.16b,  v19.16b           // QS0 = clamp(QS0-c1)
+        sqadd          v3.16b,  v3.16b,  v20.16b           // PS0 = clamp(PS0+c2)
+        eor            v4.16b,  v4.16b,  v21.16b           // Q0 = QS0 ^ 0x80
+        eor            v3.16b,  v3.16b,  v21.16b           // P0 = PS0 ^ 0x80
+        eor            v5.16b,  v5.16b,  v21.16b           // Q1 = QS1 ^ 0x80
+        eor            v2.16b,  v2.16b,  v21.16b           // P1 = PS1 ^ 0x80
+    .elseif \inner
+        // the !is4tap case of filter_common, only used for inner blocks
+        //   c3 = ((c1&~hev) + 1) >> 1;
+        //   Q1 = s2u(QS1 - c3);
+        //   P1 = s2u(PS1 + c3);
+        sqadd          v19.16b, v18.16b, v22.16b           // c1 = clamp((w&hev)+4)
+        sqadd          v20.16b, v18.16b, v23.16b           // c2 = clamp((w&hev)+3)
+        sshr           v19.16b, v19.16b, #3                // c1 >>= 3
+        sshr           v20.16b, v20.16b, #3                // c2 >>= 3
+        sqsub          v4.16b,  v4.16b,  v19.16b           // QS0 = clamp(QS0-c1)
+        sqadd          v3.16b,  v3.16b,  v20.16b           // PS0 = clamp(PS0+c2)
+        bic            v19.16b, v19.16b, v17.16b           // c1 & ~hev
+        eor            v4.16b,  v4.16b,  v21.16b           // Q0 = QS0 ^ 0x80
+        srshr          v19.16b, v19.16b, #1                // c3 >>= 1
+        eor            v3.16b,  v3.16b,  v21.16b           // P0 = PS0 ^ 0x80
+        sqsub          v5.16b,  v5.16b,  v19.16b           // QS1 = clamp(QS1-c3)
+        sqadd          v2.16b,  v2.16b,  v19.16b           // PS1 = clamp(PS1+c3)
+        eor            v5.16b,  v5.16b,  v21.16b           // Q1 = QS1 ^ 0x80
+        eor            v2.16b,  v2.16b,  v21.16b           // P1 = PS1 ^ 0x80
+    .else
+        and            v20.16b, v18.16b, v17.16b           // w & hev
+        sqadd          v19.16b, v20.16b, v22.16b           // c1 = clamp((w&hev)+4)
+        sqadd          v20.16b, v20.16b, v23.16b           // c2 = clamp((w&hev)+3)
+        sshr           v19.16b, v19.16b, #3                // c1 >>= 3
+        sshr           v20.16b, v20.16b, #3                // c2 >>= 3
+        bic            v18.16b, v18.16b, v17.16b           // w &= ~hev
+        sqsub          v4.16b,  v4.16b,  v19.16b           // QS0 = clamp(QS0-c1)
+        sqadd          v3.16b,  v3.16b,  v20.16b           // PS0 = clamp(PS0+c2)
+
+        // filter_mbedge:
+        //   a = clamp((27*w + 63) >> 7);
+        //   Q0 = s2u(QS0 - a);
+        //   P0 = s2u(PS0 + a);
+        //   a = clamp((18*w + 63) >> 7);
+        //   Q1 = s2u(QS1 - a);
+        //   P1 = s2u(PS1 + a);
+        //   a = clamp((9*w + 63) >> 7);
+        //   Q2 = s2u(QS2 - a);
+        //   P2 = s2u(PS2 + a);
+        movi           v17.8h,  #63
+        sshll          v22.8h,  v18.8b, #3
+        sshll2         v23.8h,  v18.16b, #3
+        saddw          v22.8h,  v22.8h, v18.8b
+        saddw2         v23.8h,  v23.8h, v18.16b
+        add            v16.8h,  v17.8h, v22.8h
+        add            v17.8h,  v17.8h, v23.8h           //  9*w + 63
+        add            v19.8h,  v16.8h, v22.8h
+        add            v20.8h,  v17.8h, v23.8h           // 18*w + 63
+        add            v22.8h,  v19.8h, v22.8h
+        add            v23.8h,  v20.8h, v23.8h           // 27*w + 63
+        sqshrn         v16.8b,  v16.8h,  #7
+        sqshrn2        v16.16b, v17.8h, #7              // clamp(( 9*w + 63)>>7)
+        sqshrn         v19.8b,  v19.8h, #7
+        sqshrn2        v19.16b, v20.8h, #7              // clamp((18*w + 63)>>7)
+        sqshrn         v22.8b,  v22.8h, #7
+        sqshrn2        v22.16b, v23.8h, #7              // clamp((27*w + 63)>>7)
+        sqadd          v1.16b,  v1.16b,  v16.16b        // PS2 = clamp(PS2+a)
+        sqsub          v6.16b,  v6.16b,  v16.16b        // QS2 = clamp(QS2-a)
+        sqadd          v2.16b,  v2.16b,  v19.16b        // PS1 = clamp(PS1+a)
+        sqsub          v5.16b,  v5.16b,  v19.16b        // QS1 = clamp(QS1-a)
+        sqadd          v3.16b,  v3.16b,  v22.16b        // PS0 = clamp(PS0+a)
+        sqsub          v4.16b,  v4.16b,  v22.16b        // QS0 = clamp(QS0-a)
+        eor            v3.16b,  v3.16b,  v21.16b        // P0 = PS0 ^ 0x80
+        eor            v4.16b,  v4.16b,  v21.16b        // Q0 = QS0 ^ 0x80
+        eor            v2.16b,  v2.16b,  v21.16b        // P1 = PS1 ^ 0x80
+        eor            v5.16b,  v5.16b,  v21.16b        // Q1 = QS1 ^ 0x80
+        eor            v1.16b,  v1.16b,  v21.16b        // P2 = PS2 ^ 0x80
+        eor            v6.16b,  v6.16b,  v21.16b        // Q2 = QS2 ^ 0x80
+    .endif
+.endm
+
+.macro  vp8_v_loop_filter16 name, inner=0, simple=0
+function ff_vp8_v_loop_filter16\name\()_neon, export=1
+        sub             x0,  x0,  x1,  lsl #1+!\simple
+
+        // Load pixels:
+    .if !\simple
+        ld1             {v0.16b},     [x0], x1 // P3
+        ld1             {v1.16b},     [x0], x1 // P2
+    .endif
+        ld1             {v2.16b},     [x0], x1 // P1
+        ld1             {v3.16b},     [x0], x1 // P0
+        ld1             {v4.16b},     [x0], x1 // Q0
+        ld1             {v5.16b},     [x0], x1 // Q1
+    .if !\simple
+        ld1             {v6.16b},     [x0], x1 // Q2
+        ld1             {v7.16b},     [x0]     // Q3
+        dup             v23.16b, w3                 // flim_I
+    .endif
+        dup             v22.16b, w2                 // flim_E
+
+        vp8_loop_filter inner=\inner, simple=\simple, hev_thresh=w4
+
+        // back up to P2:  dst -= stride * 6
+        sub             x0,  x0,  x1,  lsl #2
+    .if !\simple
+        sub             x0,  x0,  x1,  lsl #1
+
+        // Store pixels:
+        st1             {v1.16b},     [x0], x1 // P2
+    .endif
+        st1             {v2.16b},     [x0], x1 // P1
+        st1             {v3.16b},     [x0], x1 // P0
+        st1             {v4.16b},     [x0], x1 // Q0
+        st1             {v5.16b},     [x0], x1 // Q1
+    .if !\simple
+        st1             {v6.16b},     [x0]     // Q2
+    .endif
+
+        ret
+endfunc
+.endm
+
+vp8_v_loop_filter16
+vp8_v_loop_filter16 _inner,  inner=1
+vp8_v_loop_filter16 _simple, simple=1
+
+.macro  vp8_v_loop_filter8uv name, inner=0
+function ff_vp8_v_loop_filter8uv\name\()_neon, export=1
+        sub             x0,  x0,  x2,  lsl #2
+        sub             x1,  x1,  x2,  lsl #2
+        // Load pixels:
+        ld1          {v0.d}[0],     [x0], x2  // P3
+        ld1          {v0.d}[1],     [x1], x2  // P3
+        ld1          {v1.d}[0],     [x0], x2  // P2
+        ld1          {v1.d}[1],     [x1], x2  // P2
+        ld1          {v2.d}[0],     [x0], x2  // P1
+        ld1          {v2.d}[1],     [x1], x2  // P1
+        ld1          {v3.d}[0],     [x0], x2  // P0
+        ld1          {v3.d}[1],     [x1], x2  // P0
+        ld1          {v4.d}[0],     [x0], x2  // Q0
+        ld1          {v4.d}[1],     [x1], x2  // Q0
+        ld1          {v5.d}[0],     [x0], x2  // Q1
+        ld1          {v5.d}[1],     [x1], x2  // Q1
+        ld1          {v6.d}[0],     [x0], x2  // Q2
+        ld1          {v6.d}[1],     [x1], x2  // Q2
+        ld1          {v7.d}[0],     [x0]      // Q3
+        ld1          {v7.d}[1],     [x1]      // Q3
+
+        dup          v22.16b, w3                 // flim_E
+        dup          v23.16b, w4                 // flim_I
+
+        vp8_loop_filter inner=\inner, hev_thresh=w5
+
+        // back up to P2:  u,v -= stride * 6
+        sub          x0,  x0,  x2,  lsl #2
+        sub          x1,  x1,  x2,  lsl #2
+        sub          x0,  x0,  x2,  lsl #1
+        sub          x1,  x1,  x2,  lsl #1
+
+        // Store pixels:
+
+        st1          {v1.d}[0],     [x0], x2  // P2
+        st1          {v1.d}[1],     [x1], x2  // P2
+        st1          {v2.d}[0],     [x0], x2  // P1
+        st1          {v2.d}[1],     [x1], x2  // P1
+        st1          {v3.d}[0],     [x0], x2  // P0
+        st1          {v3.d}[1],     [x1], x2  // P0
+        st1          {v4.d}[0],     [x0], x2  // Q0
+        st1          {v4.d}[1],     [x1], x2  // Q0
+        st1          {v5.d}[0],     [x0], x2  // Q1
+        st1          {v5.d}[1],     [x1], x2  // Q1
+        st1          {v6.d}[0],     [x0]      // Q2
+        st1          {v6.d}[1],     [x1]      // Q2
+
+        ret
+endfunc
+.endm
+
+vp8_v_loop_filter8uv
+vp8_v_loop_filter8uv _inner, inner=1
+
+.macro  vp8_h_loop_filter16 name, inner=0, simple=0
+function ff_vp8_h_loop_filter16\name\()_neon, export=1
+
+        sub             x0,  x0,  #4
+        // Load pixels:
+        ld1             {v0.d}[0], [x0], x1
+        ld1             {v1.d}[0], [x0], x1
+        ld1             {v2.d}[0], [x0], x1
+        ld1             {v3.d}[0], [x0], x1
+        ld1             {v4.d}[0], [x0], x1
+        ld1             {v5.d}[0], [x0], x1
+        ld1             {v6.d}[0], [x0], x1
+        ld1             {v7.d}[0], [x0], x1
+        ld1             {v0.d}[1], [x0], x1
+        ld1             {v1.d}[1], [x0], x1
+        ld1             {v2.d}[1], [x0], x1
+        ld1             {v3.d}[1], [x0], x1
+        ld1             {v4.d}[1], [x0], x1
+        ld1             {v5.d}[1], [x0], x1
+        ld1             {v6.d}[1], [x0], x1
+        ld1             {v7.d}[1], [x0], x1
+
+        transpose_8x16B   v0,  v1,  v2,  v3,  v4,  v5,  v6,  v7, v30, v31
+
+        dup             v22.16b, w2                 // flim_E
+    .if !\simple
+        dup             v23.16b, w3                 // flim_I
+    .endif
+
+        vp8_loop_filter inner=\inner, simple=\simple, hev_thresh=w4
+
+        sub             x0,  x0,  x1, lsl #4    // backup 16 rows
+
+        transpose_8x16B   v0,  v1,  v2,  v3,  v4,  v5,  v6,  v7, v30, v31
+
+        // Store pixels:
+        st1             {v0.d}[0], [x0], x1
+        st1             {v1.d}[0], [x0], x1
+        st1             {v2.d}[0], [x0], x1
+        st1             {v3.d}[0], [x0], x1
+        st1             {v4.d}[0], [x0], x1
+        st1             {v5.d}[0], [x0], x1
+        st1             {v6.d}[0], [x0], x1
+        st1             {v7.d}[0], [x0], x1
+        st1             {v0.d}[1], [x0], x1
+        st1             {v1.d}[1], [x0], x1
+        st1             {v2.d}[1], [x0], x1
+        st1             {v3.d}[1], [x0], x1
+        st1             {v4.d}[1], [x0], x1
+        st1             {v5.d}[1], [x0], x1
+        st1             {v6.d}[1], [x0], x1
+        st1             {v7.d}[1], [x0]
+
+        ret
+endfunc
+.endm
+
+vp8_h_loop_filter16
+vp8_h_loop_filter16 _inner,  inner=1
+vp8_h_loop_filter16 _simple, simple=1
+
+.macro  vp8_h_loop_filter8uv name, inner=0
+function ff_vp8_h_loop_filter8uv\name\()_neon, export=1
+        sub             x0,  x0,  #4
+        sub             x1,  x1,  #4
+
+        // Load pixels:
+        ld1          {v0.d}[0],     [x0], x2 // load u
+        ld1          {v0.d}[1],     [x1], x2 // load v
+        ld1          {v1.d}[0],     [x0], x2
+        ld1          {v1.d}[1],     [x1], x2
+        ld1          {v2.d}[0],     [x0], x2
+        ld1          {v2.d}[1],     [x1], x2
+        ld1          {v3.d}[0],     [x0], x2
+        ld1          {v3.d}[1],     [x1], x2
+        ld1          {v4.d}[0],     [x0], x2
+        ld1          {v4.d}[1],     [x1], x2
+        ld1          {v5.d}[0],     [x0], x2
+        ld1          {v5.d}[1],     [x1], x2
+        ld1          {v6.d}[0],     [x0], x2
+        ld1          {v6.d}[1],     [x1], x2
+        ld1          {v7.d}[0],     [x0], x2
+        ld1          {v7.d}[1],     [x1], x2
+
+        transpose_8x16B   v0,  v1,  v2,  v3,  v4,  v5,  v6,  v7, v30, v31
+
+        dup             v22.16b, w3                 // flim_E
+        dup             v23.16b, w4                 // flim_I
+
+        vp8_loop_filter inner=\inner, hev_thresh=w5
+
+        sub             x0,  x0,  x2, lsl #3    // backup u 8 rows
+        sub             x1,  x1,  x2, lsl #3    // backup v 8 rows
+
+        transpose_8x16B   v0,  v1,  v2,  v3,  v4,  v5,  v6,  v7, v30, v31
+
+        // Store pixels:
+        st1          {v0.d}[0],     [x0], x2 // load u
+        st1          {v0.d}[1],     [x1], x2 // load v
+        st1          {v1.d}[0],     [x0], x2
+        st1          {v1.d}[1],     [x1], x2
+        st1          {v2.d}[0],     [x0], x2
+        st1          {v2.d}[1],     [x1], x2
+        st1          {v3.d}[0],     [x0], x2
+        st1          {v3.d}[1],     [x1], x2
+        st1          {v4.d}[0],     [x0], x2
+        st1          {v4.d}[1],     [x1], x2
+        st1          {v5.d}[0],     [x0], x2
+        st1          {v5.d}[1],     [x1], x2
+        st1          {v6.d}[0],     [x0], x2
+        st1          {v6.d}[1],     [x1], x2
+        st1          {v7.d}[0],     [x0]
+        st1          {v7.d}[1],     [x1]
+
+        ret
+
+endfunc
+.endm
+
+vp8_h_loop_filter8uv
+vp8_h_loop_filter8uv _inner, inner=1
+
+
+function ff_put_vp8_pixels16_neon, export=1
+1:
+        subs            w4, w4, #4
+        ld1             {v0.16b},     [x2], x3
+        ld1             {v1.16b},     [x2], x3
+        ld1             {v2.16b},     [x2], x3
+        ld1             {v3.16b},     [x2], x3
+        st1             {v0.16b},     [x0], x1
+        st1             {v1.16b},     [x0], x1
+        st1             {v2.16b},     [x0], x1
+        st1             {v3.16b},     [x0], x1
+        bgt             1b
+        ret
+endfunc
+
+function ff_put_vp8_pixels8_neon, export=1
+1:
+        subs            w4, w4, #4
+        ld1             {v0.8b},   [x2], x3
+        ld1             {v0.d}[1], [x2], x3
+        ld1             {v1.8b},   [x2], x3
+        ld1             {v1.d}[1], [x2], x3
+        st1             {v0.8b},   [x0], x1
+        st1             {v0.d}[1], [x0], x1
+        st1             {v1.8b},   [x0], x1
+        st1             {v1.d}[1], [x0], x1
+        bgt             1b
+        ret
+endfunc
+
+/* 4/6-tap 8th-pel MC */
+
+.macro  vp8_epel8_h6    d,   s0,   s1
+        ext             v22.8b, \s0\().8b,  \s1\().8b,  #1
+        uxtl            v18.8h, \s0\().8b
+        ext             v23.8b, \s0\().8b,  \s1\().8b,  #2
+        uxtl            v19.8h, v22.8b
+        ext             v24.8b, \s0\().8b,  \s1\().8b,  #3
+        uxtl            v21.8h, v23.8b
+        ext             v25.8b, \s0\().8b,  \s1\().8b,  #4
+        uxtl            v22.8h, v24.8b
+        ext             v26.8b, \s0\().8b,  \s1\().8b,  #5
+        uxtl            v25.8h, v25.8b
+        mul             v21.8h, v21.8h, v0.h[2]
+        uxtl            v26.8h, v26.8b
+        mul             v22.8h, v22.8h, v0.h[3]
+        mls             v21.8h, v19.8h, v0.h[1]
+        mls             v22.8h, v25.8h, v0.h[4]
+        mla             v21.8h, v18.8h, v0.h[0]
+        mla             v22.8h, v26.8h, v0.h[5]
+        sqadd           v22.8h, v21.8h, v22.8h
+        sqrshrun        \d\().8b, v22.8h, #7
+.endm
+
+.macro  vp8_epel16_h6   d0,  v0,  v1
+        ext             v22.16b, \v0\().16b, \v1\().16b, #3
+        ext             v23.16b, \v0\().16b, \v1\().16b, #4
+        uxtl            v19.8h,  v22.8b
+        uxtl2           v22.8h,  v22.16b
+        ext             v3.16b,  \v0\().16b, \v1\().16b, #2
+        uxtl            v20.8h,  v23.8b
+        uxtl2           v23.8h,  v23.16b
+        ext             v16.16b, \v0\().16b, \v1\().16b, #1
+        uxtl            v18.8h,  v3.8b
+        uxtl2           v3.8h,   v3.16b
+        ext             v2.16b,  \v0\().16b, \v1\().16b, #5
+        uxtl            v21.8h,  v2.8b
+        uxtl2           v2.8h,   v2.16b
+        uxtl            v17.8h,  v16.8b
+        uxtl2           v16.8h,  v16.16b
+        mul             v19.8h,  v19.8h, v0.h[3]
+        mul             v18.8h,  v18.8h, v0.h[2]
+        mul             v3.8h,   v3.8h,  v0.h[2]
+        mul             v22.8h,  v22.8h, v0.h[3]
+        mls             v19.8h,  v20.8h, v0.h[4]
+        uxtl            v20.8h,  \v0\().8b
+        uxtl2           v1.8h,   \v0\().16b
+        mls             v18.8h,  v17.8h, v0.h[1]
+        mls             v3.8h,   v16.8h, v0.h[1]
+        mls             v22.8h,  v23.8h, v0.h[4]
+        mla             v18.8h,  v20.8h, v0.h[0]
+        mla             v19.8h,  v21.8h, v0.h[5]
+        mla             v3.8h,   v1.8h,  v0.h[0]
+        mla             v22.8h,  v2.8h,  v0.h[5]
+        sqadd           v19.8h,  v18.8h, v19.8h
+        sqadd           v22.8h,  v3.8h,  v22.8h
+        sqrshrun        \d0\().8b,  v19.8h, #7
+        sqrshrun2       \d0\().16b, v22.8h, #7
+.endm
+
+.macro  vp8_epel8_v6    d0,  s0,  s1,  s2, s3, s4, s5
+        uxtl            \s2\().8h, \s2\().8b
+        uxtl            \s3\().8h, \s3\().8b
+        uxtl            \s1\().8h, \s1\().8b
+        uxtl            \s4\().8h, \s4\().8b
+        uxtl            \s0\().8h, \s0\().8b
+        uxtl            \s5\().8h, \s5\().8b
+        mul             \s2\().8h, \s2\().8h, v0.h[2]
+        mul             \s3\().8h, \s3\().8h, v0.h[3]
+        mls             \s2\().8h, \s1\().8h, v0.h[1]
+        mls             \s3\().8h, \s4\().8h, v0.h[4]
+        mla             \s2\().8h, \s0\().8h, v0.h[0]
+        mla             \s3\().8h, \s5\().8h, v0.h[5]
+        sqadd           \s3\().8h, \s2\().8h, \s3\().8h
+        sqrshrun        \d0\().8b, \s3\().8h, #7
+.endm
+
+.macro  vp8_epel8_v6_y2 d0, d1, s0, s1, s2, s3, s4, s5, s6
+        uxtl            \s0\().8h, \s0\().8b
+        uxtl            \s3\().8h, \s3\().8b
+        uxtl            \s6\().8h, \s6\().8b
+        uxtl            \s1\().8h, \s1\().8b
+        uxtl            \s4\().8h, \s4\().8b
+        uxtl            \s2\().8h, \s2\().8b
+        uxtl            \s5\().8h, \s5\().8b
+        mul             \s0\().8h, \s0\().8h, v0.h[0]
+        mul             v31.8h   , \s3\().8h, v0.h[3]
+        mul             \s3\().8h, \s3\().8h, v0.h[2]
+        mul             \s6\().8h, \s6\().8h, v0.h[5]
+
+        mls             \s0\().8h, \s1\().8h, v0.h[1]
+        mls             v31.8h   , \s4\().8h, v0.h[4]
+        mls             \s3\().8h, \s2\().8h, v0.h[1]
+        mls             \s6\().8h, \s5\().8h, v0.h[4]
+
+        mla             \s0\().8h, \s2\().8h, v0.h[2]
+        mla             v31.8h   , \s5\().8h, v0.h[5]
+        mla             \s3\().8h, \s1\().8h, v0.h[0]
+        mla             \s6\().8h, \s4\().8h, v0.h[3]
+        sqadd           v31.8h   , \s0\().8h, v31.8h
+        sqadd           \s6\().8h, \s3\().8h, \s6\().8h
+        sqrshrun        \d0\().8b, v31.8h,    #7
+        sqrshrun        \d1\().8b, \s6\().8h, #7
+.endm
+
+.macro  vp8_epel8_h4    d,   v0,   v1
+        ext             v22.8b, \v0\().8b,  \v1\().8b,  #1
+        uxtl            v19.8h, \v0\().8b
+        ext             v23.8b, \v0\().8b,  \v1\().8b,  #2
+        uxtl            v20.8h, v22.8b
+        ext             v25.8b, \v0\().8b,  \v1\().8b,  #3
+        uxtl            v22.8h, v23.8b
+        uxtl            v25.8h, v25.8b
+        mul             v20.8h, v20.8h, v0.h[2]
+        mul             v22.8h, v22.8h, v0.h[3]
+        mls             v20.8h, v19.8h, v0.h[1]
+        mls             v22.8h, v25.8h, v0.h[4]
+        sqadd           v22.8h, v20.8h, v22.8h
+        sqrshrun        \d\().8b, v22.8h, #7
+.endm
+
+.macro  vp8_epel8_v4_y2 d0, s0, s1, s2, s3, s4
+        uxtl            \s0\().8h,  \s0\().8b
+        uxtl            \s1\().8h,  \s1\().8b
+        uxtl            \s2\().8h,  \s2\().8b
+        uxtl            \s3\().8h,  \s3\().8b
+        uxtl            \s4\().8h,  \s4\().8b
+        mul             v21.8h,     \s1\().8h, v0.h[2]
+        mul             v23.8h,     \s2\().8h, v0.h[3]
+        mul             \s2\().8h,  \s2\().8h, v0.h[2]
+        mul             v22.8h,     \s3\().8h, v0.h[3]
+        mls             v21.8h,     \s0\().8h, v0.h[1]
+        mls             v23.8h,     \s3\().8h, v0.h[4]
+        mls             \s2\().8h,  \s1\().8h, v0.h[1]
+        mls             v22.8h,     \s4\().8h, v0.h[4]
+        sqadd           v21.8h,     v21.8h,    v23.8h
+        sqadd           \s2\().8h,  \s2\().8h, v22.8h
+        sqrshrun        \d0\().8b,  v21.8h,    #7
+        sqrshrun2       \d0\().16b, \s2\().8h, #7
+.endm
+
+
+// note: worst case sum of all 6-tap filter values * 255 is 0x7f80 so 16 bit
+// arithmatic can be used to apply filters
+const   subpel_filters, align=4
+        .short     0,   6, 123,  12,   1,   0,   0,   0
+        .short     2,  11, 108,  36,   8,   1,   0,   0
+        .short     0,   9,  93,  50,   6,   0,   0,   0
+        .short     3,  16,  77,  77,  16,   3,   0,   0
+        .short     0,   6,  50,  93,   9,   0,   0,   0
+        .short     1,   8,  36, 108,  11,   2,   0,   0
+        .short     0,   1,  12, 123,   6,   0,   0,   0
+endconst
+
+function ff_put_vp8_epel16_v6_neon, export=1
+        sub             x2,  x2,  x3,  lsl #1
+
+        sxtw            x4,  w4
+        sxtw            x6,  w6
+        movrel          x17,  subpel_filters, -16
+        add             x6,  x17,  x6, lsl #4  // y
+        ld1             {v0.8h},     [x6]
+1:
+        ld1             {v1.1d - v2.1d},    [x2], x3
+        ld1             {v3.1d - v4.1d},    [x2], x3
+        ld1             {v16.1d - v17.1d},  [x2], x3
+        ld1             {v18.1d - v19.1d},  [x2], x3
+        ld1             {v20.1d - v21.1d},  [x2], x3
+        ld1             {v22.1d - v23.1d},  [x2], x3
+        ld1             {v24.1d - v25.1d},  [x2]
+        sub             x2,  x2,  x3, lsl #2
+
+        vp8_epel8_v6_y2 v1, v3, v1, v3, v16, v18, v20, v22, v24
+        vp8_epel8_v6_y2 v2, v4, v2, v4, v17, v19, v21, v23, v25
+
+        st1             {v1.1d - v2.1d}, [x0], x1
+        st1             {v3.1d - v4.1d}, [x0], x1
+        subs            x4, x4, #2
+        bne             1b
+
+        ret
+endfunc
+
+function ff_put_vp8_epel16_h6_neon, export=1
+        sub             x2,  x2,  #2
+        sxtw            x5,  w5 // x
+
+        // first pass (horizontal):
+        movrel          x17,  subpel_filters, -16
+        add             x5,  x17,  x5, lsl #4 // x
+        ld1             {v0.8h},  [x5]
+1:
+        ld1             {v1.16b, v2.16b}, [x2], x3
+        vp8_epel16_h6   v1, v1, v2
+        st1             {v1.16b}, [x0], x1
+
+        subs            w4, w4, #1
+        bne             1b
+        ret
+endfunc
+
+
+function ff_put_vp8_epel16_h6v6_neon, export=1
+        sub             x2,  x2,  x3,  lsl #1
+        sub             x2,  x2,  #2
+
+        // first pass (horizontal):
+        movrel          x17,  subpel_filters, -16
+        sxtw            x5,  w5 // x
+        add             x16,  x17,  x5, lsl #4 // x
+        sub             sp,  sp,  #336+16
+        ld1             {v0.8h},  [x16]
+        add             x7,  sp,  #15
+        sxtw            x4,  w4
+        add             x16, x4, #5   // h
+        bic             x7,  x7,  #15
+1:
+        ld1             {v1.16b, v2.16b}, [x2], x3
+        vp8_epel16_h6   v1, v1, v2
+        st1             {v1.16b}, [x7], #16
+        subs            x16, x16, #1
+        bne             1b
+
+
+        // second pass (vertical):
+        sxtw            x6,  w6
+        add             x6,  x17,  x6, lsl #4  // y
+        add             x7,  sp,  #15
+        ld1             {v0.8h},     [x6]
+        bic             x7,  x7,  #15
+2:
+        ld1             {v1.8b - v4.8b},    [x7], #32
+        ld1             {v16.8b - v19.8b},  [x7], #32
+        ld1             {v20.8b - v23.8b},  [x7]
+        sub             x7,  x7,  #48
+
+        vp8_epel8_v6    v5, v1, v3, v16, v18, v20, v22
+        vp8_epel8_v6    v2, v2, v4, v17, v19, v21, v23
+        trn1            v2.2d, v5.2d, v2.2d
+
+        st1             {v2.16b}, [x0], x1
+        subs            x4, x4, #1
+        bne             2b
+
+        add             sp,  sp,  #336+16
+        ret
+endfunc
+
+function ff_put_vp8_epel8_h6v6_neon, export=1
+        sub             x2,  x2,  x3,  lsl #1
+        sub             x2,  x2,  #2
+        sxtw            x4,  w4
+
+        // first pass (horizontal):
+        movrel          x17,  subpel_filters, -16
+        sxtw            x5,  w5
+        add             x5,  x17,  x5, lsl #4 // x
+        sub             sp,  sp,  #168+16
+        ld1             {v0.8h},  [x5]
+        add             x7,  sp,  #15
+        add             x16, x4,  #5   // h
+        bic             x7,  x7,  #15
+1:
+        ld1             {v1.8b, v2.8b}, [x2], x3
+
+        vp8_epel8_h6    v1, v1, v2
+
+        st1             {v1.8b}, [x7], #8
+        subs            x16, x16, #1
+        bne             1b
+
+        // second pass (vertical):
+        sxtw            x6,  w6
+        add             x6,  x17,  x6, lsl #4  // y
+        add             x7,  sp,   #15
+        ld1             {v0.8h},   [x6]
+        bic             x7,  x7,   #15
+2:
+        ld1             {v1.8b - v4.8b}, [x7], #32
+        ld1             {v5.8b - v7.8b}, [x7]
+
+        sub             x7,  x7,  #16
+
+        vp8_epel8_v6_y2 v1, v2, v1, v2, v3, v4, v5, v6, v7
+
+        st1             {v1.8b}, [x0], x1
+        st1             {v2.8b}, [x0], x1
+        subs            x4, x4, #2
+        bne             2b
+
+        add             sp,  sp,  #168+16
+        ret
+endfunc
+
+function ff_put_vp8_epel8_h4v6_neon, export=1
+        sub             x2,  x2,  x3,  lsl #1
+        sub             x2,  x2,  #1
+        sxtw            x4,  w4
+
+        // first pass (horizontal):
+        movrel          x17,  subpel_filters, -16
+        sxtw            x5,  w5
+        add             x5,  x17,  x5, lsl #4 // x
+        sub             sp,  sp,  #168+16
+        ld1             {v0.8h},  [x5]
+        add             x7,  sp,  #15
+        add             x16, x4, #5   // h
+        bic             x7,  x7,  #15
+1:
+        ld1             {v1.8b, v2.8b}, [x2], x3
+
+        vp8_epel8_h4    v1, v1, v2
+
+        st1             {v1.8b}, [x7], #8
+        subs            x16, x16, #1
+        bne             1b
+
+        // second pass (vertical):
+        sxtw            x6,  w6
+        add             x6,  x17,  x6, lsl #4  // y
+        add             x7,  sp,   #15
+        ld1             {v0.8h},   [x6]
+        bic             x7,  x7,   #15
+2:
+        ld1             {v1.8b - v4.8b}, [x7], #32
+        ld1             {v5.8b - v7.8b}, [x7]
+
+        sub             x7,  x7,  #16
+
+        vp8_epel8_v6_y2 v1, v2, v1, v2, v3, v4, v5, v6, v7
+
+        st1             {v1.8b}, [x0], x1
+        st1             {v2.8b}, [x0], x1
+        subs            x4, x4, #2
+        bne             2b
+
+        add             sp,  sp,  #168+16
+        ret
+endfunc
+
+function ff_put_vp8_epel8_h4v4_neon, export=1
+        sub             x2,  x2,  x3
+        sub             x2,  x2,  #1
+        sxtw            x4,  w4
+
+
+        // first pass (horizontal):
+        movrel          x17,  subpel_filters, -16
+        sxtw            x5,  w5
+        add             x5,  x17,  x5, lsl #4 // x
+        sub             sp,  sp,  #168+16
+        ld1             {v0.8h},  [x5]
+        add             x7,  sp,  #15
+        add             x16, x4, #3   // h
+        bic             x7,  x7,  #15
+1:
+        ld1             {v1.8b, v2.8b}, [x2], x3
+
+        vp8_epel8_h4    v1, v1, v2
+
+        st1             {v1.8b}, [x7], #8
+        subs            x16, x16, #1
+        bne             1b
+
+        // second pass (vertical):
+        sxtw            x6,  w6
+        add             x6,  x17,  x6, lsl #4  // y
+        add             x7,  sp,   #15
+        ld1             {v0.8h},   [x6]
+        bic             x7,  x7,   #15
+2:
+        ld1             {v1.8b - v2.8b}, [x7], #16
+        ld1             {v3.8b - v5.8b}, [x7]
+
+        vp8_epel8_v4_y2 v1, v1, v2, v3, v4, v5
+
+        st1             {v1.d}[0], [x0], x1
+        st1             {v1.d}[1], [x0], x1
+        subs            x4, x4, #2
+        bne             2b
+
+        add             sp,  sp,  #168+16
+        ret
+endfunc
+
+function ff_put_vp8_epel8_h6v4_neon, export=1
+        sub             x2,  x2,  x3
+        sub             x2,  x2,  #2
+        sxtw            x4,  w4
+
+
+        // first pass (horizontal):
+        movrel          x17,  subpel_filters, -16
+        sxtw            x5,  w5
+        add             x5,  x17,  x5, lsl #4 // x
+        sub             sp,  sp,  #168+16
+        ld1             {v0.8h},  [x5]
+        add             x7,  sp,  #15
+        add             x16, x4, #3   // h
+        bic             x7,  x7,  #15
+1:
+        ld1             {v1.8b, v2.8b}, [x2], x3
+
+        vp8_epel8_h6    v1, v1, v2
+
+        st1             {v1.8b}, [x7], #8
+        subs            x16, x16, #1
+        bne             1b
+
+        // second pass (vertical):
+        sxtw            x6,  w6
+        add             x6,  x17,  x6, lsl #4  // y
+        add             x7,  sp,   #15
+        ld1             {v0.8h},   [x6]
+        bic             x7,  x7,   #15
+2:
+        ld1             {v1.8b - v2.8b}, [x7], #16
+        ld1             {v3.8b - v5.8b}, [x7]
+
+        vp8_epel8_v4_y2 v1, v1, v2, v3, v4, v5
+
+        st1             {v1.d}[0], [x0], x1
+        st1             {v1.d}[1], [x0], x1
+        subs            x4, x4, #2
+        bne             2b
+
+        add             sp,  sp,  #168+16
+        ret
+endfunc
diff --git a/libavcodec/ac3dec.c b/libavcodec/ac3dec.c
index 43b22b7654cfe..eaa327a3ee944 100644
--- a/libavcodec/ac3dec.c
+++ b/libavcodec/ac3dec.c
@@ -452,7 +452,7 @@ static int decode_exponents(AC3DecodeContext *s,
         prevexp += dexp[i] - 2;
         if (prevexp > 24U) {
             av_log(s->avctx, AV_LOG_ERROR, "exponent %d is out-of-range\n", prevexp);
-            return -1;
+            return AVERROR_INVALIDDATA;
         }
         switch (group_size) {
         case 4: dexps[j++] = prevexp;
@@ -1467,7 +1467,8 @@ static int ac3_decode_frame(AVCodecContext * avctx, void *data,
     int buf_size, full_buf_size = avpkt->size;
     AC3DecodeContext *s = avctx->priv_data;
     int blk, ch, err, offset, ret;
-    int got_independent_frame = 0;
+    int i;
+    int skip = 0, got_independent_frame = 0;
     const uint8_t *channel_map;
     uint8_t extended_channel_map[EAC3_MAX_CHANNELS];
     const SHORTFLOAT *output[AC3_MAX_CHANNELS];
@@ -1477,6 +1478,23 @@ static int ac3_decode_frame(AVCodecContext * avctx, void *data,
     s->superframe_size = 0;
 
     buf_size = full_buf_size;
+    for (i = 1; i < buf_size; i += 2) {
+        if (buf[i] == 0x77 || buf[i] == 0x0B) {
+            if ((buf[i] ^ buf[i-1]) == (0x77 ^ 0x0B)) {
+                i--;
+                break;
+            } else if ((buf[i] ^ buf[i+1]) == (0x77 ^ 0x0B)) {
+                break;
+            }
+        }
+    }
+    if (i >= buf_size)
+        return AVERROR_INVALIDDATA;
+    if (i > 10)
+        return i;
+    buf += i;
+    buf_size -= i;
+
     /* copy input buffer to decoder context to avoid reading past the end
        of the buffer, which can be caused by a damaged input stream. */
     if (buf_size >= 2 && AV_RB16(buf) == 0x770B) {
@@ -1637,6 +1655,11 @@ static int ac3_decode_frame(AVCodecContext * avctx, void *data,
         AC3HeaderInfo hdr;
         int err;
 
+        if (buf_size - s->frame_size <= 16) {
+            skip = buf_size - s->frame_size;
+            goto skip;
+        }
+
         if ((ret = init_get_bits8(&s->gbc, buf + s->frame_size, buf_size - s->frame_size)) < 0)
             return ret;
 
@@ -1657,6 +1680,7 @@ static int ac3_decode_frame(AVCodecContext * avctx, void *data,
             }
         }
     }
+skip:
 
     frame->decode_error_flags = err ? FF_DECODE_ERROR_INVALID_BITSTREAM : 0;
 
@@ -1796,9 +1820,9 @@ static int ac3_decode_frame(AVCodecContext * avctx, void *data,
     *got_frame_ptr = 1;
 
     if (!s->superframe_size)
-        return FFMIN(full_buf_size, s->frame_size);
+        return FFMIN(full_buf_size, s->frame_size + skip);
 
-    return FFMIN(full_buf_size, s->superframe_size);
+    return FFMIN(full_buf_size, s->superframe_size + skip);
 }
 
 /**
diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index 636ca72050450..e7e18af92d5b7 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -1800,7 +1800,7 @@ static int validate_float_option(float v, const float *v_list, int v_list_size)
             break;
     }
     if (i == v_list_size)
-        return -1;
+        return AVERROR(EINVAL);
 
     return i;
 }
diff --git a/libavcodec/acelp_pitch_delay.c b/libavcodec/acelp_pitch_delay.c
index c345a99c81612..a070d1b25d61d 100644
--- a/libavcodec/acelp_pitch_delay.c
+++ b/libavcodec/acelp_pitch_delay.c
@@ -118,7 +118,7 @@ int16_t ff_acelp_decode_gain_code(
                (mr_energy >> 15) - 25
            );
 #else
-    mr_energy = gain_corr_factor * exp(M_LN10 / (20 << 23) * mr_energy) /
+    mr_energy = gain_corr_factor * ff_exp10((double)mr_energy / (20 << 23)) /
                 sqrt(adsp->scalarproduct_int16(fc_v, fc_v, subframe_size));
     return mr_energy >> 12;
 #endif
diff --git a/libavcodec/adpcm.c b/libavcodec/adpcm.c
index cd3bbd33c2e44..aa9c7c5c4f8f5 100644
--- a/libavcodec/adpcm.c
+++ b/libavcodec/adpcm.c
@@ -1679,7 +1679,7 @@ static int adpcm_decode_frame(AVCodecContext *avctx, void *data,
         break;
 
     default:
-        return -1;
+        av_assert0(0); // unsupported codec_id should not happen
     }
 
     if (avpkt->size && bytestream2_tell(&gb) == 0) {
diff --git a/libavcodec/aic.c b/libavcodec/aic.c
index 9c6f806655e3e..dc28c83661e02 100644
--- a/libavcodec/aic.c
+++ b/libavcodec/aic.c
@@ -208,6 +208,9 @@ static int aic_decode_coeffs(GetBitContext *gb, int16_t *dst,
     int mb, idx;
     unsigned val;
 
+    if (get_bits_left(gb) < 5)
+        return AVERROR_INVALIDDATA;
+
     has_skips  = get_bits1(gb);
     coeff_type = get_bits1(gb);
     coeff_bits = get_bits(gb, 3);
diff --git a/libavcodec/alac.c b/libavcodec/alac.c
index 93cf198eeafe5..d6b87db734ff3 100644
--- a/libavcodec/alac.c
+++ b/libavcodec/alac.c
@@ -121,7 +121,7 @@ static int rice_decompress(ALACContext *alac, int32_t *output_buffer,
         unsigned int x;
 
         if(get_bits_left(&alac->gb) <= 0)
-            return -1;
+            return AVERROR_INVALIDDATA;
 
         /* calculate rice param and decode next value */
         k = av_log2((history >> 9) + 3);
@@ -317,7 +317,7 @@ static int decode_element(AVCodecContext *avctx, AVFrame *frame, int ch_index,
         if (alac->extra_bits) {
             for (i = 0; i < alac->nb_samples; i++) {
                 if(get_bits_left(&alac->gb) <= 0)
-                    return -1;
+                    return AVERROR_INVALIDDATA;
                 for (ch = 0; ch < channels; ch++)
                     alac->extra_bits_buffer[ch][i] = get_bits(&alac->gb, alac->extra_bits);
             }
@@ -353,7 +353,7 @@ static int decode_element(AVCodecContext *avctx, AVFrame *frame, int ch_index,
         /* not compressed, easy case */
         for (i = 0; i < alac->nb_samples; i++) {
             if(get_bits_left(&alac->gb) <= 0)
-                return -1;
+                return AVERROR_INVALIDDATA;
             for (ch = 0; ch < channels; ch++) {
                 alac->output_samples_buffer[ch][i] =
                          get_sbits_long(&alac->gb, alac->sample_size);
@@ -555,9 +555,9 @@ static av_cold int alac_decode_init(AVCodecContext * avctx)
         av_log(avctx, AV_LOG_ERROR, "extradata is too small\n");
         return AVERROR_INVALIDDATA;
     }
-    if (alac_set_info(alac)) {
+    if ((ret = alac_set_info(alac)) < 0) {
         av_log(avctx, AV_LOG_ERROR, "set_info failed\n");
-        return -1;
+        return ret;
     }
 
     switch (alac->sample_size) {
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index c0b4d56d0d475..b26aeca239420 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -41,6 +41,7 @@ extern AVCodec ff_anm_decoder;
 extern AVCodec ff_ansi_decoder;
 extern AVCodec ff_apng_encoder;
 extern AVCodec ff_apng_decoder;
+extern AVCodec ff_arbc_decoder;
 extern AVCodec ff_asv1_encoder;
 extern AVCodec ff_asv1_decoder;
 extern AVCodec ff_asv2_encoder;
@@ -152,6 +153,7 @@ extern AVCodec ff_hq_hqa_decoder;
 extern AVCodec ff_hqx_decoder;
 extern AVCodec ff_huffyuv_encoder;
 extern AVCodec ff_huffyuv_decoder;
+extern AVCodec ff_hymt_decoder;
 extern AVCodec ff_idcin_decoder;
 extern AVCodec ff_iff_ilbm_decoder;
 extern AVCodec ff_imm4_decoder;
@@ -423,6 +425,7 @@ extern AVCodec ff_g723_1_decoder;
 extern AVCodec ff_g729_decoder;
 extern AVCodec ff_gsm_decoder;
 extern AVCodec ff_gsm_ms_decoder;
+extern AVCodec ff_hcom_decoder;
 extern AVCodec ff_iac_decoder;
 extern AVCodec ff_ilbc_decoder;
 extern AVCodec ff_imc_decoder;
@@ -494,6 +497,7 @@ extern AVCodec ff_xma2_decoder;
 extern AVCodec ff_pcm_alaw_encoder;
 extern AVCodec ff_pcm_alaw_decoder;
 extern AVCodec ff_pcm_bluray_decoder;
+extern AVCodec ff_pcm_dvd_encoder;
 extern AVCodec ff_pcm_dvd_decoder;
 extern AVCodec ff_pcm_f16le_decoder;
 extern AVCodec ff_pcm_f24le_decoder;
@@ -552,6 +556,8 @@ extern AVCodec ff_pcm_u32be_encoder;
 extern AVCodec ff_pcm_u32be_decoder;
 extern AVCodec ff_pcm_u32le_encoder;
 extern AVCodec ff_pcm_u32le_decoder;
+extern AVCodec ff_pcm_vidc_encoder;
+extern AVCodec ff_pcm_vidc_decoder;
 extern AVCodec ff_pcm_zork_decoder;
 
 /* DPCM codecs */
@@ -671,9 +677,11 @@ extern AVCodec ff_qdmc_at_decoder;
 extern AVCodec ff_qdm2_at_decoder;
 extern AVCodec ff_libaom_av1_decoder;
 extern AVCodec ff_libaom_av1_encoder;
+extern AVCodec ff_libaribb24_decoder;
 extern AVCodec ff_libcelt_decoder;
 extern AVCodec ff_libcodec2_encoder;
 extern AVCodec ff_libcodec2_decoder;
+extern AVCodec ff_libdav1d_decoder;
 extern AVCodec ff_libdavs2_decoder;
 extern AVCodec ff_libfdk_aac_encoder;
 extern AVCodec ff_libfdk_aac_decoder;
diff --git a/libavcodec/arbc.c b/libavcodec/arbc.c
new file mode 100644
index 0000000000000..841a9f10acce3
--- /dev/null
+++ b/libavcodec/arbc.c
@@ -0,0 +1,218 @@
+/*
+ * Gryphon's Anim Compressor decoder
+ * Copyright (c) 2019 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "libavutil/imgutils.h"
+#include "libavutil/internal.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/mem.h"
+
+#include "avcodec.h"
+#include "bytestream.h"
+#include "internal.h"
+
+typedef struct ARBCContext {
+    GetByteContext gb;
+
+    AVFrame *prev_frame;
+} ARBCContext;
+
+static void fill_tile4(AVCodecContext *avctx, uint8_t *color, AVFrame *frame)
+{
+    ARBCContext *s = avctx->priv_data;
+    GetByteContext *gb = &s->gb;
+    int nb_tiles = bytestream2_get_le16(gb);
+    int h = avctx->height - 1;
+
+    if ((avctx->width / 4 + 1) * (avctx->height / 4 + 1) < nb_tiles)
+        return;
+
+    for (int i = 0; i < nb_tiles; i++) {
+        int y = bytestream2_get_byte(gb);
+        int x = bytestream2_get_byte(gb);
+        uint16_t mask = bytestream2_get_le16(gb);
+        int start_y = y * 4, start_x = x * 4;
+        int end_y = start_y + 4, end_x = start_x + 4;
+
+        for (int j = start_y; j < end_y; j++) {
+            for (int k = start_x; k < end_x; k++) {
+                if (mask & 0x8000) {
+                    if (j >= avctx->height || k >= avctx->width) {
+                        mask = mask << 1;
+                        continue;
+                    }
+                    frame->data[0][frame->linesize[0] * (h - j) + 3 * k + 0] = color[0];
+                    frame->data[0][frame->linesize[0] * (h - j) + 3 * k + 1] = color[1];
+                    frame->data[0][frame->linesize[0] * (h - j) + 3 * k + 2] = color[2];
+                }
+                mask = mask << 1;
+            }
+        }
+    }
+}
+
+static void fill_tileX(AVCodecContext *avctx, int tile_width, int tile_height,
+                       uint8_t *color, AVFrame *frame)
+{
+    ARBCContext *s = avctx->priv_data;
+    GetByteContext *gb = &s->gb;
+    const int step_h = tile_height / 4;
+    const int step_w = tile_width / 4;
+    int nb_tiles = bytestream2_get_le16(gb);
+    int h = avctx->height - 1;
+
+    if ((avctx->width / tile_width + 1) * (avctx->height / tile_height + 1) < nb_tiles)
+        return;
+
+    for (int i = 0; i < nb_tiles; i++) {
+        int y = bytestream2_get_byte(gb);
+        int x = bytestream2_get_byte(gb);
+        uint16_t mask = bytestream2_get_le16(gb);
+        int start_y = y * tile_height, start_x = x * tile_width;
+        int end_y = start_y + tile_height, end_x = start_x + tile_width;
+
+        for (int j = start_y; j < end_y; j += step_h) {
+            for (int k = start_x; k < end_x; k += step_w) {
+                if (mask & 0x8000U) {
+                    for (int m = 0; m < step_h; m++) {
+                        for (int n = 0; n < step_w; n++) {
+                            if (j + m >= avctx->height || k + n >= avctx->width)
+                                continue;
+                            frame->data[0][frame->linesize[0] * (h - (j + m)) + 3 * (k + n) + 0] = color[0];
+                            frame->data[0][frame->linesize[0] * (h - (j + m)) + 3 * (k + n) + 1] = color[1];
+                            frame->data[0][frame->linesize[0] * (h - (j + m)) + 3 * (k + n) + 2] = color[2];
+                        }
+                    }
+                }
+                mask = mask << 1;
+            }
+        }
+    }
+}
+
+static int decode_frame(AVCodecContext *avctx, void *data,
+                        int *got_frame, AVPacket *avpkt)
+{
+    ARBCContext *s = avctx->priv_data;
+    AVFrame *frame = data;
+    int ret, nb_segments, keyframe = 1;
+
+    if (avpkt->size < 10)
+        return AVERROR_INVALIDDATA;
+
+    if ((ret = ff_get_buffer(avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
+        return ret;
+
+    if (s->prev_frame->data[0]) {
+        ret = av_frame_copy(frame, s->prev_frame);
+        if (ret < 0)
+            return ret;
+    }
+
+    bytestream2_init(&s->gb, avpkt->data, avpkt->size);
+    bytestream2_skip(&s->gb, 8);
+    nb_segments = bytestream2_get_le16(&s->gb);
+    if (nb_segments == 0)
+        keyframe = 0;
+
+    for (int i = 0; i < nb_segments; i++) {
+        int resolution_flag;
+        uint8_t fill[3];
+
+        if (bytestream2_get_bytes_left(&s->gb) <= 0)
+            return AVERROR_INVALIDDATA;
+
+        fill[0] = bytestream2_get_byte(&s->gb);
+        bytestream2_skip(&s->gb, 1);
+        fill[1] = bytestream2_get_byte(&s->gb);
+        bytestream2_skip(&s->gb, 1);
+        fill[2] = bytestream2_get_byte(&s->gb);
+        bytestream2_skip(&s->gb, 1);
+        resolution_flag = bytestream2_get_byte(&s->gb);
+
+        if (resolution_flag & 0x10)
+            fill_tileX(avctx, 1024, 1024, fill, frame);
+        if (resolution_flag & 0x08)
+            fill_tileX(avctx, 256, 256, fill, frame);
+        if (resolution_flag & 0x04)
+            fill_tileX(avctx, 64, 64, fill, frame);
+        if (resolution_flag & 0x02)
+            fill_tileX(avctx, 16, 16, fill, frame);
+        if (resolution_flag & 0x01)
+            fill_tile4(avctx, fill, frame);
+    }
+
+    av_frame_unref(s->prev_frame);
+    if ((ret = av_frame_ref(s->prev_frame, frame)) < 0)
+        return ret;
+
+    frame->pict_type = keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
+    frame->key_frame = keyframe;
+    *got_frame = 1;
+
+    return avpkt->size;
+}
+
+static av_cold int decode_init(AVCodecContext *avctx)
+{
+    ARBCContext *s = avctx->priv_data;
+
+    avctx->pix_fmt = AV_PIX_FMT_RGB24;
+
+    s->prev_frame = av_frame_alloc();
+    if (!s->prev_frame)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static void decode_flush(AVCodecContext *avctx)
+{
+    ARBCContext *s = avctx->priv_data;
+
+    av_frame_unref(s->prev_frame);
+}
+
+static av_cold int decode_close(AVCodecContext *avctx)
+{
+    ARBCContext *s = avctx->priv_data;
+
+    av_frame_free(&s->prev_frame);
+
+    return 0;
+}
+
+AVCodec ff_arbc_decoder = {
+    .name           = "arbc",
+    .long_name      = NULL_IF_CONFIG_SMALL("Gryphon's Anim Compressor"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_ARBC,
+    .priv_data_size = sizeof(ARBCContext),
+    .init           = decode_init,
+    .decode         = decode_frame,
+    .flush          = decode_flush,
+    .close          = decode_close,
+    .capabilities   = AV_CODEC_CAP_DR1,
+    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
+};
diff --git a/libavcodec/arm/h264dsp_init_arm.c b/libavcodec/arm/h264dsp_init_arm.c
index 90144d0da2f28..89402659307d0 100644
--- a/libavcodec/arm/h264dsp_init_arm.c
+++ b/libavcodec/arm/h264dsp_init_arm.c
@@ -25,13 +25,13 @@
 #include "libavcodec/h264dsp.h"
 #include "libavcodec/arm/startcode.h"
 
-void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha,
+void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
                                      int beta, int8_t *tc0);
-void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha,
+void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
                                      int beta, int8_t *tc0);
-void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
+void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
                                        int beta, int8_t *tc0);
-void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
+void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
                                        int beta, int8_t *tc0);
 
 void ff_weight_h264_pixels_16_neon(uint8_t *dst, int stride, int height,
diff --git a/libavcodec/ass_split.c b/libavcodec/ass_split.c
index 872528bdfa6df..67da7c6d84c00 100644
--- a/libavcodec/ass_split.c
+++ b/libavcodec/ass_split.c
@@ -249,7 +249,7 @@ static const char *ass_split_section(ASSSplitContext *ctx, const char *buf)
     const ASSSection *section = &ass_sections[ctx->current_section];
     int *number = &ctx->field_number[ctx->current_section];
     int *order = ctx->field_order[ctx->current_section];
-    int *tmp, i, len;
+    int i, len;
 
     while (buf && *buf) {
         if (buf[0] == '[') {
@@ -280,9 +280,9 @@ static const char *ass_split_section(ASSSplitContext *ctx, const char *buf)
                 while (!is_eol(*buf)) {
                     buf = skip_space(buf);
                     len = strcspn(buf, ", \r\n");
-                    if (!(tmp = av_realloc_array(order, (*number + 1), sizeof(*order))))
+                    if (av_reallocp_array(&order, (*number + 1), sizeof(*order)) != 0)
                         return NULL;
-                    order = tmp;
+
                     order[*number] = -1;
                     for (i=0; section->fields[i].name; i++)
                         if (!strncmp(buf, section->fields[i].name, len)) {
diff --git a/libavcodec/assenc.c b/libavcodec/assenc.c
index dc4f0ffa97220..e54c1d8ec39b7 100644
--- a/libavcodec/assenc.c
+++ b/libavcodec/assenc.c
@@ -57,7 +57,7 @@ static int ass_encode_frame(AVCodecContext *avctx,
 
         if (sub->rects[i]->type != SUBTITLE_ASS) {
             av_log(avctx, AV_LOG_ERROR, "Only SUBTITLE_ASS type supported.\n");
-            return -1;
+            return AVERROR(EINVAL);
         }
 
 #if FF_API_ASS_TIMING
@@ -93,7 +93,7 @@ static int ass_encode_frame(AVCodecContext *avctx,
 
         if (len > bufsize-total_len-1) {
             av_log(avctx, AV_LOG_ERROR, "Buffer too small for ASS event.\n");
-            return -1;
+            return AVERROR(EINVAL);
         }
 
         total_len += len;
diff --git a/libavcodec/asvenc.c b/libavcodec/asvenc.c
index c4eca2a13d6ae..3cc94bf91a291 100644
--- a/libavcodec/asvenc.c
+++ b/libavcodec/asvenc.c
@@ -173,10 +173,7 @@ static inline int encode_mb(ASV1Context *a, int16_t block[6][64])
 {
     int i;
 
-    if (a->pb.buf_end - a->pb.buf - (put_bits_count(&a->pb) >> 3) < MAX_MB_SIZE) {
-        av_log(a->avctx, AV_LOG_ERROR, "encoded frame too large\n");
-        return -1;
-    }
+    av_assert0(a->pb.buf_end - a->pb.buf - (put_bits_count(&a->pb) >> 3) >= MAX_MB_SIZE);
 
     if (a->avctx->codec_id == AV_CODEC_ID_ASV1) {
         for (i = 0; i < 6; i++)
diff --git a/libavcodec/av1_metadata_bsf.c b/libavcodec/av1_metadata_bsf.c
index 52d383661f1ad..2b74b697e41ad 100644
--- a/libavcodec/av1_metadata_bsf.c
+++ b/libavcodec/av1_metadata_bsf.c
@@ -170,7 +170,7 @@ static int av1_metadata_filter(AVBSFContext *bsf, AVPacket *out)
 
     err = 0;
 fail:
-    ff_cbs_fragment_uninit(ctx->cbc, frag);
+    ff_cbs_fragment_reset(ctx->cbc, frag);
 
     if (err < 0)
         av_packet_unref(out);
@@ -215,13 +215,15 @@ static int av1_metadata_init(AVBSFContext *bsf)
 
     err = 0;
 fail:
-    ff_cbs_fragment_uninit(ctx->cbc, frag);
+    ff_cbs_fragment_reset(ctx->cbc, frag);
     return err;
 }
 
 static void av1_metadata_close(AVBSFContext *bsf)
 {
     AV1MetadataContext *ctx = bsf->priv_data;
+
+    ff_cbs_fragment_free(ctx->cbc, &ctx->access_unit);
     ff_cbs_close(&ctx->cbc);
 }
 
diff --git a/libavcodec/av1_parser.c b/libavcodec/av1_parser.c
index 8df66498f4d61..bb8737a393ad5 100644
--- a/libavcodec/av1_parser.c
+++ b/libavcodec/av1_parser.c
@@ -72,7 +72,7 @@ static int av1_parser_parse(AVCodecParserContext *ctx,
             goto end;
         }
 
-        ff_cbs_fragment_uninit(s->cbc, td);
+        ff_cbs_fragment_reset(s->cbc, td);
     }
 
     ret = ff_cbs_read(s->cbc, td, data, size);
@@ -159,7 +159,7 @@ static int av1_parser_parse(AVCodecParserContext *ctx,
     }
 
 end:
-    ff_cbs_fragment_uninit(s->cbc, td);
+    ff_cbs_fragment_reset(s->cbc, td);
 
     s->cbc->log_ctx = NULL;
 
@@ -193,6 +193,7 @@ static void av1_parser_close(AVCodecParserContext *ctx)
 {
     AV1ParseContext *s = ctx->priv_data;
 
+    ff_cbs_fragment_free(s->cbc, &s->temporal_unit);
     ff_cbs_close(&s->cbc);
 }
 
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index b2ec24f7d6df7..8ac8feadf925b 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -452,6 +452,8 @@ enum AVCodecID {
     AV_CODEC_ID_MWSC,
     AV_CODEC_ID_WCMV,
     AV_CODEC_ID_RASC,
+    AV_CODEC_ID_HYMT,
+    AV_CODEC_ID_ARBC,
 
     /* various PCM "codecs" */
     AV_CODEC_ID_FIRST_AUDIO = 0x10000,     ///< A dummy id pointing at the start of audio codecs
@@ -491,6 +493,7 @@ enum AVCodecID {
     AV_CODEC_ID_PCM_S64BE,
     AV_CODEC_ID_PCM_F16LE,
     AV_CODEC_ID_PCM_F24LE,
+    AV_CODEC_ID_PCM_VIDC,
 
     /* various ADPCM codecs */
     AV_CODEC_ID_ADPCM_IMA_QT = 0x11000,
@@ -644,6 +647,7 @@ enum AVCodecID {
     AV_CODEC_ID_APTX_HD,
     AV_CODEC_ID_SBC,
     AV_CODEC_ID_ATRAC9,
+    AV_CODEC_ID_HCOM,
 
     /* subtitle codecs */
     AV_CODEC_ID_FIRST_SUBTITLE = 0x17000,          ///< A dummy ID pointing at the start of subtitle codecs.
@@ -673,6 +677,7 @@ enum AVCodecID {
     AV_CODEC_ID_ASS,
     AV_CODEC_ID_HDMV_TEXT_SUBTITLE,
     AV_CODEC_ID_TTML,
+    AV_CODEC_ID_ARIB_CAPTION,
 
     /* other specific kind of codecs (generally used for attachments) */
     AV_CODEC_ID_FIRST_UNKNOWN = 0x18000,           ///< A dummy ID pointing at the start of various fake codecs.
@@ -1070,6 +1075,13 @@ typedef struct RcOverride{
  */
 #define AV_CODEC_CAP_HYBRID              (1 << 19)
 
+/**
+ * This codec takes the reordered_opaque field from input AVFrames
+ * and returns it in the corresponding field in AVCodecContext after
+ * encoding.
+ */
+#define AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE (1 << 20)
+
 /**
  * Pan Scan area.
  * This specifies the area which should be displayed.
@@ -1109,17 +1121,29 @@ typedef struct AVCPBProperties {
      * Maximum bitrate of the stream, in bits per second.
      * Zero if unknown or unspecified.
      */
+#if FF_API_UNSANITIZED_BITRATES
     int max_bitrate;
+#else
+    int64_t max_bitrate;
+#endif
     /**
      * Minimum bitrate of the stream, in bits per second.
      * Zero if unknown or unspecified.
      */
+#if FF_API_UNSANITIZED_BITRATES
     int min_bitrate;
+#else
+    int64_t min_bitrate;
+#endif
     /**
      * Average bitrate of the stream, in bits per second.
      * Zero if unknown or unspecified.
      */
+#if FF_API_UNSANITIZED_BITRATES
     int avg_bitrate;
+#else
+    int64_t avg_bitrate;
+#endif
 
     /**
      * The size of the buffer to which the ratecontrol is applied, in bits.
@@ -1320,7 +1344,7 @@ enum AVPacketSideDataType {
     AV_PKT_DATA_METADATA_UPDATE,
 
     /**
-     * MPEGTS stream ID, this is required to pass the stream ID
+     * MPEGTS stream ID as uint8_t, this is required to pass the stream ID
      * information from the demuxer to the corresponding muxer.
      */
     AV_PKT_DATA_MPEGTS_STREAM_ID,
@@ -2682,7 +2706,10 @@ typedef struct AVCodecContext {
     /**
      * opaque 64-bit number (generally a PTS) that will be reordered and
      * output in AVFrame.reordered_opaque
-     * - encoding: unused
+     * - encoding: Set by libavcodec to the reordered_opaque of the input
+     *             frame corresponding to the last returned packet. Only
+     *             supported by encoders with the
+     *             AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE capability.
      * - decoding: Set by user.
      */
     int64_t reordered_opaque;
@@ -2966,6 +2993,16 @@ typedef struct AVCodecContext {
 
 #define FF_PROFILE_SBC_MSBC                         1
 
+#define FF_PROFILE_PRORES_PROXY     0
+#define FF_PROFILE_PRORES_LT        1
+#define FF_PROFILE_PRORES_STANDARD  2
+#define FF_PROFILE_PRORES_HQ        3
+#define FF_PROFILE_PRORES_4444      4
+#define FF_PROFILE_PRORES_XQ        5
+
+#define FF_PROFILE_ARIB_PROFILE_A 0
+#define FF_PROFILE_ARIB_PROFILE_C 1
+
     /**
      * level
      * - encoding: Set by user.
@@ -3318,6 +3355,14 @@ typedef struct AVCodecContext {
      * used as reference pictures).
      */
     int extra_hw_frames;
+
+    /**
+     * The percentage of damaged samples to discard a frame.
+     *
+     * - decoding: set by user
+     * - encoding: unused
+     */
+    int discard_damaged_percentage;
 } AVCodecContext;
 
 #if FF_API_CODEC_GET_SET
diff --git a/libavcodec/avpacket.c b/libavcodec/avpacket.c
index e160ad3033464..8f0603df78238 100644
--- a/libavcodec/avpacket.c
+++ b/libavcodec/avpacket.c
@@ -112,7 +112,7 @@ int av_grow_packet(AVPacket *pkt, int grow_by)
     av_assert0((unsigned)pkt->size <= INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE);
     if ((unsigned)grow_by >
         INT_MAX - (pkt->size + AV_INPUT_BUFFER_PADDING_SIZE))
-        return -1;
+        return AVERROR(ENOMEM);
 
     new_size = pkt->size + grow_by + AV_INPUT_BUFFER_PADDING_SIZE;
     if (pkt->buf) {
@@ -124,7 +124,7 @@ int av_grow_packet(AVPacket *pkt, int grow_by)
         } else {
             data_offset = pkt->data - pkt->buf->data;
             if (data_offset > INT_MAX - new_size)
-                return -1;
+                return AVERROR(ENOMEM);
         }
 
         if (new_size + data_offset > pkt->buf->size) {
@@ -616,6 +616,7 @@ int av_packet_ref(AVPacket *dst, const AVPacket *src)
         ret = packet_alloc(&dst->buf, src->size);
         if (ret < 0)
             goto fail;
+        av_assert1(!src->size || src->data);
         if (src->size)
             memcpy(dst->buf->data, src->data, src->size);
 
@@ -668,6 +669,7 @@ int av_packet_make_refcounted(AVPacket *pkt)
     ret = packet_alloc(&pkt->buf, pkt->size);
     if (ret < 0)
         return ret;
+    av_assert1(!pkt->size || pkt->data);
     if (pkt->size)
         memcpy(pkt->buf->data, pkt->data, pkt->size);
 
@@ -687,6 +689,7 @@ int av_packet_make_writable(AVPacket *pkt)
     ret = packet_alloc(&buf, pkt->size);
     if (ret < 0)
         return ret;
+    av_assert1(!pkt->size || pkt->data);
     if (pkt->size)
         memcpy(buf->data, pkt->data, pkt->size);
 
diff --git a/libavcodec/bethsoftvideo.c b/libavcodec/bethsoftvideo.c
index 274516bf4d8fa..e5a73f55a1c2a 100644
--- a/libavcodec/bethsoftvideo.c
+++ b/libavcodec/bethsoftvideo.c
@@ -109,6 +109,11 @@ static int bethsoftvid_decode_frame(AVCodecContext *avctx,
             if(yoffset >= avctx->height)
                 return AVERROR_INVALIDDATA;
             dst += vid->frame->linesize[0] * yoffset;
+        case VIDEO_P_FRAME:
+        case VIDEO_I_FRAME:
+            break;
+        default:
+            return AVERROR_INVALIDDATA;
     }
 
     // main code
diff --git a/libavcodec/bfi.c b/libavcodec/bfi.c
index 233a1d27a8f1f..a4cb002053e14 100644
--- a/libavcodec/bfi.c
+++ b/libavcodec/bfi.c
@@ -71,7 +71,7 @@ static int bfi_decode_frame(AVCodecContext *avctx, void *data,
         frame->key_frame = 1;
         /* Setting the palette */
         if (avctx->extradata_size > 768) {
-            av_log(NULL, AV_LOG_ERROR, "Palette is too large.\n");
+            av_log(avctx, AV_LOG_ERROR, "Palette is too large.\n");
             return AVERROR_INVALIDDATA;
         }
         pal = (uint32_t *)frame->data[1];
diff --git a/libavcodec/bink.c b/libavcodec/bink.c
index 9c17dedcba966..6673afa78949f 100644
--- a/libavcodec/bink.c
+++ b/libavcodec/bink.c
@@ -609,7 +609,7 @@ static inline int binkb_get_value(BinkContext *c, int bundle_num)
  * @param quant_matrices quantization matrices
  * @return 0 for success, negative value in other cases
  */
-static int read_dct_coeffs(GetBitContext *gb, int32_t block[64],
+static int read_dct_coeffs(BinkContext *c, GetBitContext *gb, int32_t block[64],
                            const uint8_t *scan, int *coef_count_,
                            int coef_idx[64], int q)
 {
@@ -692,7 +692,7 @@ static int read_dct_coeffs(GetBitContext *gb, int32_t block[64],
     } else {
         quant_idx = q;
         if (quant_idx > 15U) {
-            av_log(NULL, AV_LOG_ERROR, "quant_index %d out of range\n", quant_idx);
+            av_log(c->avctx, AV_LOG_ERROR, "quant_index %d out of range\n", quant_idx);
             return AVERROR_INVALIDDATA;
         }
     }
@@ -885,7 +885,7 @@ static int binkb_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb,
                 memset(dctblock, 0, sizeof(*dctblock) * 64);
                 dctblock[0] = binkb_get_value(c, BINKB_SRC_INTRA_DC);
                 qp = binkb_get_value(c, BINKB_SRC_INTRA_Q);
-                if ((quant_idx = read_dct_coeffs(gb, dctblock, bink_scan, &coef_count, coef_idx, qp)) < 0)
+                if ((quant_idx = read_dct_coeffs(c, gb, dctblock, bink_scan, &coef_count, coef_idx, qp)) < 0)
                     return quant_idx;
                 unquantize_dct_coeffs(dctblock, binkb_intra_quant[quant_idx], coef_count, coef_idx, bink_scan);
                 c->binkdsp.idct_put(dst, stride, dctblock);
@@ -920,7 +920,7 @@ static int binkb_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb,
                 memset(dctblock, 0, sizeof(*dctblock) * 64);
                 dctblock[0] = binkb_get_value(c, BINKB_SRC_INTER_DC);
                 qp = binkb_get_value(c, BINKB_SRC_INTER_Q);
-                if ((quant_idx = read_dct_coeffs(gb, dctblock, bink_scan, &coef_count, coef_idx, qp)) < 0)
+                if ((quant_idx = read_dct_coeffs(c, gb, dctblock, bink_scan, &coef_count, coef_idx, qp)) < 0)
                     return quant_idx;
                 unquantize_dct_coeffs(dctblock, binkb_inter_quant[quant_idx], coef_count, coef_idx, bink_scan);
                 c->binkdsp.idct_add(dst, stride, dctblock);
@@ -1093,7 +1093,7 @@ static int bink_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb,
                 case INTRA_BLOCK:
                     memset(dctblock, 0, sizeof(*dctblock) * 64);
                     dctblock[0] = get_value(c, BINK_SRC_INTRA_DC);
-                    if ((quant_idx = read_dct_coeffs(gb, dctblock, bink_scan, &coef_count, coef_idx, -1)) < 0)
+                    if ((quant_idx = read_dct_coeffs(c, gb, dctblock, bink_scan, &coef_count, coef_idx, -1)) < 0)
                         return quant_idx;
                     unquantize_dct_coeffs(dctblock, bink_intra_quant[quant_idx], coef_count, coef_idx, bink_scan);
                     c->binkdsp.idct_put(ublock, 8, dctblock);
@@ -1168,7 +1168,7 @@ static int bink_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb,
             case INTRA_BLOCK:
                 memset(dctblock, 0, sizeof(*dctblock) * 64);
                 dctblock[0] = get_value(c, BINK_SRC_INTRA_DC);
-                if ((quant_idx = read_dct_coeffs(gb, dctblock, bink_scan, &coef_count, coef_idx, -1)) < 0)
+                if ((quant_idx = read_dct_coeffs(c, gb, dctblock, bink_scan, &coef_count, coef_idx, -1)) < 0)
                     return quant_idx;
                 unquantize_dct_coeffs(dctblock, bink_intra_quant[quant_idx], coef_count, coef_idx, bink_scan);
                 c->binkdsp.idct_put(dst, stride, dctblock);
@@ -1184,7 +1184,7 @@ static int bink_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb,
                     return ret;
                 memset(dctblock, 0, sizeof(*dctblock) * 64);
                 dctblock[0] = get_value(c, BINK_SRC_INTER_DC);
-                if ((quant_idx = read_dct_coeffs(gb, dctblock, bink_scan, &coef_count, coef_idx, -1)) < 0)
+                if ((quant_idx = read_dct_coeffs(c, gb, dctblock, bink_scan, &coef_count, coef_idx, -1)) < 0)
                     return quant_idx;
                 unquantize_dct_coeffs(dctblock, bink_inter_quant[quant_idx], coef_count, coef_idx, bink_scan);
                 c->binkdsp.idct_add(dst, stride, dctblock);
diff --git a/libavcodec/binkaudio.c b/libavcodec/binkaudio.c
index e0f3d14eef2ca..96cf968c66e6e 100644
--- a/libavcodec/binkaudio.c
+++ b/libavcodec/binkaudio.c
@@ -139,7 +139,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
     else if (CONFIG_BINKAUDIO_DCT_DECODER)
         ff_dct_init(&s->trans.dct, frame_len_bits, DCT_III);
     else
-        return -1;
+        av_assert0(0);
 
     s->pkt = av_packet_alloc();
     if (!s->pkt)
diff --git a/libavcodec/bitstream.c b/libavcodec/bitstream.c
index ed528fe4af7d1..8762e5f4b2fec 100644
--- a/libavcodec/bitstream.c
+++ b/libavcodec/bitstream.c
@@ -164,7 +164,7 @@ static int build_table(VLC *vlc, int table_nb_bits, int nb_codes,
 
     table_size = 1 << table_nb_bits;
     if (table_nb_bits > 30)
-       return -1;
+       return AVERROR(EINVAL);
     table_index = alloc_table(vlc, table_size, flags & INIT_VLC_USE_NEW_STATIC);
     ff_dlog(NULL, "new table index=%d size=%d\n", table_index, table_size);
     if (table_index < 0)
@@ -306,7 +306,7 @@ int ff_init_vlc_sparse(VLC *vlc_arg, int nb_bits, int nb_codes,
             av_log(NULL, AV_LOG_ERROR, "Too long VLC (%d) in init_vlc\n", buf[j].bits);\
             if (!(flags & INIT_VLC_USE_NEW_STATIC))                         \
                 av_free(buf);                                               \
-            return -1;                                                      \
+            return AVERROR(EINVAL);                                         \
         }                                                                   \
         GET_DATA(buf[j].code, codes, i, codes_wrap, codes_size);            \
         if (buf[j].code >= (1LL<<buf[j].bits)) {                            \
@@ -314,7 +314,7 @@ int ff_init_vlc_sparse(VLC *vlc_arg, int nb_bits, int nb_codes,
                    "init_vlc\n", buf[j].code, i);                           \
             if (!(flags & INIT_VLC_USE_NEW_STATIC))                         \
                 av_free(buf);                                               \
-            return -1;                                                      \
+            return AVERROR(EINVAL);                                         \
         }                                                                   \
         if (flags & INIT_VLC_LE)                                            \
             buf[j].code = bitswap_32(buf[j].code);                          \
diff --git a/libavcodec/bitstream_filters.c b/libavcodec/bitstream_filters.c
index 96b1746a754fc..2c999d3c1ddaf 100644
--- a/libavcodec/bitstream_filters.c
+++ b/libavcodec/bitstream_filters.c
@@ -47,9 +47,11 @@ extern const AVBitStreamFilter ff_mpeg4_unpack_bframes_bsf;
 extern const AVBitStreamFilter ff_mov2textsub_bsf;
 extern const AVBitStreamFilter ff_noise_bsf;
 extern const AVBitStreamFilter ff_null_bsf;
+extern const AVBitStreamFilter ff_prores_metadata_bsf;
 extern const AVBitStreamFilter ff_remove_extradata_bsf;
 extern const AVBitStreamFilter ff_text2movsub_bsf;
 extern const AVBitStreamFilter ff_trace_headers_bsf;
+extern const AVBitStreamFilter ff_truehd_core_bsf;
 extern const AVBitStreamFilter ff_vp9_metadata_bsf;
 extern const AVBitStreamFilter ff_vp9_raw_reorder_bsf;
 extern const AVBitStreamFilter ff_vp9_superframe_bsf;
diff --git a/libavcodec/bsf.c b/libavcodec/bsf.c
index 03841da682a72..508130760335f 100644
--- a/libavcodec/bsf.c
+++ b/libavcodec/bsf.c
@@ -350,6 +350,15 @@ static int bsf_list_filter(AVBSFContext *bsf, AVPacket *out)
     return ret;
 }
 
+static void bsf_list_flush(AVBSFContext *bsf)
+{
+    BSFListContext *lst = bsf->priv_data;
+
+    for (int i = 0; i < lst->nb_bsfs; i++)
+        av_bsf_flush(lst->bsfs[i]);
+    lst->idx = lst->flushed_idx = 0;
+}
+
 static void bsf_list_close(AVBSFContext *bsf)
 {
     BSFListContext *lst = bsf->priv_data;
@@ -398,6 +407,7 @@ const AVBitStreamFilter ff_list_bsf = {
         .priv_class     = &bsf_list_class,
         .init           = bsf_list_init,
         .filter         = bsf_list_filter,
+        .flush          = bsf_list_flush,
         .close          = bsf_list_close,
 };
 
diff --git a/libavcodec/cavsdec.c b/libavcodec/cavsdec.c
index c7fff67c06c66..5f3b354518e64 100644
--- a/libavcodec/cavsdec.c
+++ b/libavcodec/cavsdec.c
@@ -591,14 +591,21 @@ static int decode_residual_block(AVSContext *h, GetBitContext *gb,
 }
 
 
-static inline void decode_residual_chroma(AVSContext *h)
+static inline int decode_residual_chroma(AVSContext *h)
 {
-    if (h->cbp & (1 << 4))
-        decode_residual_block(h, &h->gb, chroma_dec, 0,
+    if (h->cbp & (1 << 4)) {
+        int ret = decode_residual_block(h, &h->gb, chroma_dec, 0,
                               ff_cavs_chroma_qp[h->qp], h->cu, h->c_stride);
-    if (h->cbp & (1 << 5))
-        decode_residual_block(h, &h->gb, chroma_dec, 0,
+        if (ret < 0)
+            return ret;
+    }
+    if (h->cbp & (1 << 5)) {
+        int ret = decode_residual_block(h, &h->gb, chroma_dec, 0,
                               ff_cavs_chroma_qp[h->qp], h->cv, h->c_stride);
+        if (ret < 0)
+            return ret;
+    }
+    return 0;
 }
 
 static inline int decode_residual_inter(AVSContext *h)
@@ -649,6 +656,7 @@ static int decode_mb_i(AVSContext *h, int cbp_code)
     uint8_t top[18];
     uint8_t *left = NULL;
     uint8_t *d;
+    int ret;
 
     ff_cavs_init_mb(h);
 
@@ -692,8 +700,11 @@ static int decode_mb_i(AVSContext *h, int cbp_code)
         ff_cavs_load_intra_pred_luma(h, top, &left, block);
         h->intra_pred_l[h->pred_mode_Y[scan3x3[block]]]
             (d, top, left, h->l_stride);
-        if (h->cbp & (1<<block))
-            decode_residual_block(h, gb, intra_dec, 1, h->qp, d, h->l_stride);
+        if (h->cbp & (1<<block)) {
+            ret = decode_residual_block(h, gb, intra_dec, 1, h->qp, d, h->l_stride);
+            if (ret < 0)
+                return ret;
+        }
     }
 
     /* chroma intra prediction */
@@ -703,7 +714,9 @@ static int decode_mb_i(AVSContext *h, int cbp_code)
     h->intra_pred_c[pred_mode_uv](h->cv, &h->top_border_v[h->mbx * 10],
                                   h->left_border_v, h->c_stride);
 
-    decode_residual_chroma(h);
+    ret = decode_residual_chroma(h);
+    if (ret < 0)
+        return ret;
     ff_cavs_filter(h, I_8X8);
     set_mv_intra(h);
     return 0;
diff --git a/libavcodec/cbs.c b/libavcodec/cbs.c
index ecbf57c29347d..c388be896bd61 100644
--- a/libavcodec/cbs.c
+++ b/libavcodec/cbs.c
@@ -136,14 +136,13 @@ static void cbs_unit_uninit(CodedBitstreamContext *ctx,
     unit->data_bit_padding = 0;
 }
 
-void ff_cbs_fragment_uninit(CodedBitstreamContext *ctx,
-                            CodedBitstreamFragment *frag)
+void ff_cbs_fragment_reset(CodedBitstreamContext *ctx,
+                           CodedBitstreamFragment *frag)
 {
     int i;
 
     for (i = 0; i < frag->nb_units; i++)
         cbs_unit_uninit(ctx, &frag->units[i]);
-    av_freep(&frag->units);
     frag->nb_units = 0;
 
     av_buffer_unref(&frag->data_ref);
@@ -152,6 +151,15 @@ void ff_cbs_fragment_uninit(CodedBitstreamContext *ctx,
     frag->data_bit_padding = 0;
 }
 
+void ff_cbs_fragment_free(CodedBitstreamContext *ctx,
+                          CodedBitstreamFragment *frag)
+{
+    ff_cbs_fragment_reset(ctx, frag);
+
+    av_freep(&frag->units);
+    frag->nb_units_allocated = 0;
+}
+
 static int cbs_read_fragment_content(CodedBitstreamContext *ctx,
                                      CodedBitstreamFragment *frag)
 {
@@ -216,8 +224,6 @@ int ff_cbs_read_extradata(CodedBitstreamContext *ctx,
 {
     int err;
 
-    memset(frag, 0, sizeof(*frag));
-
     err = cbs_fill_fragment_data(ctx, frag, par->extradata,
                                  par->extradata_size);
     if (err < 0)
@@ -236,8 +242,6 @@ int ff_cbs_read_packet(CodedBitstreamContext *ctx,
 {
     int err;
 
-    memset(frag, 0, sizeof(*frag));
-
     if (pkt->buf) {
         frag->data_ref = av_buffer_ref(pkt->buf);
         if (!frag->data_ref)
@@ -265,8 +269,6 @@ int ff_cbs_read(CodedBitstreamContext *ctx,
 {
     int err;
 
-    memset(frag, 0, sizeof(*frag));
-
     err = cbs_fill_fragment_data(ctx, frag, data, size);
     if (err < 0)
         return err;
@@ -548,20 +550,34 @@ static int cbs_insert_unit(CodedBitstreamContext *ctx,
 {
     CodedBitstreamUnit *units;
 
-    units = av_malloc_array(frag->nb_units + 1, sizeof(*units));
-    if (!units)
-        return AVERROR(ENOMEM);
+    if (frag->nb_units < frag->nb_units_allocated) {
+        units = frag->units;
+
+        if (position < frag->nb_units)
+            memmove(units + position + 1, units + position,
+                    (frag->nb_units - position) * sizeof(*units));
+    } else {
+        units = av_malloc_array(frag->nb_units + 1, sizeof(*units));
+        if (!units)
+            return AVERROR(ENOMEM);
+
+        ++frag->nb_units_allocated;
 
-    if (position > 0)
-        memcpy(units, frag->units, position * sizeof(*units));
-    if (position < frag->nb_units)
-        memcpy(units + position + 1, frag->units + position,
-               (frag->nb_units - position) * sizeof(*units));
+        if (position > 0)
+            memcpy(units, frag->units, position * sizeof(*units));
+
+        if (position < frag->nb_units)
+            memcpy(units + position + 1, frag->units + position,
+                   (frag->nb_units - position) * sizeof(*units));
+    }
 
     memset(units + position, 0, sizeof(*units));
 
-    av_freep(&frag->units);
-    frag->units = units;
+    if (units != frag->units) {
+        av_free(frag->units);
+        frag->units = units;
+    }
+
     ++frag->nb_units;
 
     return 0;
@@ -652,16 +668,10 @@ int ff_cbs_delete_unit(CodedBitstreamContext *ctx,
 
     --frag->nb_units;
 
-    if (frag->nb_units == 0) {
-        av_freep(&frag->units);
-
-    } else {
+    if (frag->nb_units > 0)
         memmove(frag->units + position,
                 frag->units + position + 1,
                 (frag->nb_units - position) * sizeof(*frag->units));
 
-        // Don't bother reallocating the unit array.
-    }
-
     return 0;
 }
diff --git a/libavcodec/cbs.h b/libavcodec/cbs.h
index 53ac360bb1dda..967dcd1468643 100644
--- a/libavcodec/cbs.h
+++ b/libavcodec/cbs.h
@@ -145,10 +145,19 @@ typedef struct CodedBitstreamFragment {
      * and has not been decomposed.
      */
     int              nb_units;
+
+    /**
+     * Number of allocated units.
+     *
+     * Must always be >= nb_units; designed for internal use by cbs.
+     */
+     int             nb_units_allocated;
+
     /**
-     * Pointer to an array of units of length nb_units.
+     * Pointer to an array of units of length nb_units_allocated.
+     * Only the first nb_units are valid.
      *
-     * Must be NULL if nb_units is zero.
+     * Must be NULL if nb_units_allocated is zero.
      */
     CodedBitstreamUnit *units;
 } CodedBitstreamFragment;
@@ -231,6 +240,9 @@ void ff_cbs_close(CodedBitstreamContext **ctx);
  * This also updates the internal state, so will need to be called for
  * codecs with extradata to read parameter sets necessary for further
  * parsing even if the fragment itself is not desired.
+ *
+ * The fragment must have been zeroed or reset via ff_cbs_fragment_reset
+ * before use.
  */
 int ff_cbs_read_extradata(CodedBitstreamContext *ctx,
                           CodedBitstreamFragment *frag,
@@ -243,6 +255,9 @@ int ff_cbs_read_extradata(CodedBitstreamContext *ctx,
  * This also updates the internal state of the coded bitstream context
  * with any persistent data from the fragment which may be required to
  * read following fragments (e.g. parameter sets).
+ *
+ * The fragment must have been zeroed or reset via ff_cbs_fragment_reset
+ * before use.
  */
 int ff_cbs_read_packet(CodedBitstreamContext *ctx,
                        CodedBitstreamFragment *frag,
@@ -255,6 +270,9 @@ int ff_cbs_read_packet(CodedBitstreamContext *ctx,
  * This also updates the internal state of the coded bitstream context
  * with any persistent data from the fragment which may be required to
  * read following fragments (e.g. parameter sets).
+ *
+ * The fragment must have been zeroed or reset via ff_cbs_fragment_reset
+ * before use.
  */
 int ff_cbs_read(CodedBitstreamContext *ctx,
                 CodedBitstreamFragment *frag,
@@ -294,11 +312,18 @@ int ff_cbs_write_packet(CodedBitstreamContext *ctx,
 
 
 /**
- * Free all allocated memory in a fragment.
+ * Free the units contained in a fragment as well as the fragment's
+ * own data buffer, but not the units array itself.
  */
-void ff_cbs_fragment_uninit(CodedBitstreamContext *ctx,
+void ff_cbs_fragment_reset(CodedBitstreamContext *ctx,
                             CodedBitstreamFragment *frag);
 
+/**
+ * Free the units array of a fragment in addition to what
+ * ff_cbs_fragment_reset does.
+ */
+void ff_cbs_fragment_free(CodedBitstreamContext *ctx,
+                          CodedBitstreamFragment *frag);
 
 /**
  * Allocate a new internal content buffer of the given size in the unit.
diff --git a/libavcodec/cbs_av1.c b/libavcodec/cbs_av1.c
index 9bac9dde0912a..02f168b58d538 100644
--- a/libavcodec/cbs_av1.c
+++ b/libavcodec/cbs_av1.c
@@ -29,45 +29,67 @@ static int cbs_av1_read_uvlc(CodedBitstreamContext *ctx, GetBitContext *gbc,
                              const char *name, uint32_t *write_to,
                              uint32_t range_min, uint32_t range_max)
 {
-    uint32_t value;
-    int position, zeroes, i, j;
-    char bits[65];
+    uint32_t zeroes, bits_value, value;
+    int position;
 
     if (ctx->trace_enable)
         position = get_bits_count(gbc);
 
-    zeroes = i = 0;
+    zeroes = 0;
     while (1) {
-        if (get_bits_left(gbc) < zeroes + 1) {
+        if (get_bits_left(gbc) < 1) {
             av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid uvlc code at "
                    "%s: bitstream ended.\n", name);
             return AVERROR_INVALIDDATA;
         }
 
-        if (get_bits1(gbc)) {
-            bits[i++] = '1';
+        if (get_bits1(gbc))
             break;
-        } else {
-            bits[i++] = '0';
-            ++zeroes;
-        }
+        ++zeroes;
     }
 
     if (zeroes >= 32) {
         value = MAX_UINT_BITS(32);
     } else {
-        value = get_bits_long(gbc, zeroes);
-
-        for (j = 0; j < zeroes; j++)
-            bits[i++] = (value >> (zeroes - j - 1) & 1) ? '1' : '0';
+        if (get_bits_left(gbc) < zeroes) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid uvlc code at "
+                   "%s: bitstream ended.\n", name);
+            return AVERROR_INVALIDDATA;
+        }
 
-        value += (1 << zeroes) - 1;
+        bits_value = get_bits_long(gbc, zeroes);
+        value = bits_value + (UINT32_C(1) << zeroes) - 1;
     }
 
     if (ctx->trace_enable) {
+        char bits[65];
+        int i, j, k;
+
+        if (zeroes >= 32) {
+            while (zeroes > 32) {
+                k = FFMIN(zeroes - 32, 32);
+                for (i = 0; i < k; i++)
+                    bits[i] = '0';
+                bits[i] = 0;
+                ff_cbs_trace_syntax_element(ctx, position, name,
+                                            NULL, bits, 0);
+                zeroes -= k;
+                position += k;
+            }
+        }
+
+        for (i = 0; i < zeroes; i++)
+            bits[i] = '0';
+        bits[i++] = '1';
+
+        if (zeroes < 32) {
+            for (j = 0; j < zeroes; j++)
+                bits[i++] = (bits_value >> (zeroes - j - 1) & 1) ? '1' : '0';
+        }
+
         bits[i] = 0;
-        ff_cbs_trace_syntax_element(ctx, position, name, NULL,
-                                    bits, value);
+        ff_cbs_trace_syntax_element(ctx, position, name,
+                                    NULL, bits, value);
     }
 
     if (value < range_min || value > range_max) {
@@ -189,30 +211,26 @@ static int cbs_av1_read_su(CodedBitstreamContext *ctx, GetBitContext *gbc,
                            int width, const char *name,
                            const int *subscripts, int32_t *write_to)
 {
-    uint32_t magnitude;
-    int position, sign;
+    int position;
     int32_t value;
 
     if (ctx->trace_enable)
         position = get_bits_count(gbc);
 
-    if (get_bits_left(gbc) < width + 1) {
+    if (get_bits_left(gbc) < width) {
         av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid signed value at "
                "%s: bitstream ended.\n", name);
         return AVERROR_INVALIDDATA;
     }
 
-    magnitude = get_bits(gbc, width);
-    sign      = get_bits1(gbc);
-    value     = sign ? -(int32_t)magnitude : magnitude;
+    value = get_sbits(gbc, width);
 
     if (ctx->trace_enable) {
         char bits[33];
         int i;
         for (i = 0; i < width; i++)
-            bits[i] = magnitude >> (width - i - 1) & 1 ? '1' : '0';
-        bits[i] = sign ? '1' : '0';
-        bits[i + 1] = 0;
+            bits[i] = value & (1 << (width - i - 1)) ? '1' : '0';
+        bits[i] = 0;
 
         ff_cbs_trace_syntax_element(ctx, position,
                                     name, subscripts, bits, value);
@@ -226,29 +244,21 @@ static int cbs_av1_write_su(CodedBitstreamContext *ctx, PutBitContext *pbc,
                             int width, const char *name,
                             const int *subscripts, int32_t value)
 {
-    uint32_t magnitude;
-    int sign;
-
-    if (put_bits_left(pbc) < width + 1)
+    if (put_bits_left(pbc) < width)
         return AVERROR(ENOSPC);
 
-    sign      = value < 0;
-    magnitude = sign ? -value : value;
-
     if (ctx->trace_enable) {
         char bits[33];
         int i;
         for (i = 0; i < width; i++)
-            bits[i] = magnitude >> (width - i - 1) & 1 ? '1' : '0';
-        bits[i] = sign ? '1' : '0';
-        bits[i + 1] = 0;
+            bits[i] = value & (1 << (width - i - 1)) ? '1' : '0';
+        bits[i] = 0;
 
         ff_cbs_trace_syntax_element(ctx, put_bits_count(pbc),
                                     name, subscripts, bits, value);
     }
 
-    put_bits(pbc, width, magnitude);
-    put_bits(pbc, 1, sign);
+    put_sbits(pbc, width, value);
 
     return 0;
 }
@@ -785,7 +795,7 @@ static int cbs_av1_split_fragment(CodedBitstreamContext *ctx,
 
     if (INT_MAX / 8 < size) {
         av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid fragment: "
-               "too large (%zu bytes).\n", size);
+               "too large (%"SIZE_SPECIFIER" bytes).\n", size);
         err = AVERROR_INVALIDDATA;
         goto fail;
     }
@@ -800,23 +810,19 @@ static int cbs_av1_split_fragment(CodedBitstreamContext *ctx,
         if (err < 0)
             goto fail;
 
-        if (!header.obu_has_size_field) {
-            av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid OBU for raw "
-                   "stream: size field must be present.\n");
-            err = AVERROR_INVALIDDATA;
-            goto fail;
-        }
-
         if (get_bits_left(&gbc) < 8) {
             av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid OBU: fragment "
-                   "too short (%zu bytes).\n", size);
+                   "too short (%"SIZE_SPECIFIER" bytes).\n", size);
             err = AVERROR_INVALIDDATA;
             goto fail;
         }
 
-        err = cbs_av1_read_leb128(ctx, &gbc, "obu_size", &obu_size);
-        if (err < 0)
-            goto fail;
+        if (header.obu_has_size_field) {
+            err = cbs_av1_read_leb128(ctx, &gbc, "obu_size", &obu_size);
+            if (err < 0)
+                goto fail;
+        } else
+            obu_size = size - 1 - header.obu_extension_flag;
 
         pos = get_bits_count(&gbc);
         av_assert0(pos % 8 == 0 && pos / 8 <= size);
@@ -825,7 +831,7 @@ static int cbs_av1_split_fragment(CodedBitstreamContext *ctx,
 
         if (size < obu_length) {
             av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid OBU length: "
-                   "%"PRIu64", but only %zu bytes remaining in fragment.\n",
+                   "%"PRIu64", but only %"SIZE_SPECIFIER" bytes remaining in fragment.\n",
                    obu_length, size);
             err = AVERROR_INVALIDDATA;
             goto fail;
@@ -940,7 +946,7 @@ static int cbs_av1_read_unit(CodedBitstreamContext *ctx,
     } else {
         if (unit->data_size < 1 + obu->header.obu_extension_flag) {
             av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid OBU length: "
-                   "unit too short (%zu).\n", unit->data_size);
+                   "unit too short (%"SIZE_SPECIFIER").\n", unit->data_size);
             return AVERROR_INVALIDDATA;
         }
         obu->obu_size = unit->data_size - 1 - obu->header.obu_extension_flag;
@@ -996,7 +1002,10 @@ static int cbs_av1_read_unit(CodedBitstreamContext *ctx,
     case AV1_OBU_REDUNDANT_FRAME_HEADER:
         {
             err = cbs_av1_read_frame_header_obu(ctx, &gbc,
-                                                &obu->obu.frame_header);
+                                                &obu->obu.frame_header,
+                                                obu->header.obu_type ==
+                                                AV1_OBU_REDUNDANT_FRAME_HEADER,
+                                                unit->data_ref);
             if (err < 0)
                 return err;
         }
@@ -1016,7 +1025,8 @@ static int cbs_av1_read_unit(CodedBitstreamContext *ctx,
         break;
     case AV1_OBU_FRAME:
         {
-            err = cbs_av1_read_frame_obu(ctx, &gbc, &obu->obu.frame);
+            err = cbs_av1_read_frame_obu(ctx, &gbc, &obu->obu.frame,
+                                         unit->data_ref);
             if (err < 0)
                 return err;
 
@@ -1057,8 +1067,12 @@ static int cbs_av1_read_unit(CodedBitstreamContext *ctx,
     if (obu->obu_size > 0 &&
         obu->header.obu_type != AV1_OBU_TILE_GROUP &&
         obu->header.obu_type != AV1_OBU_FRAME) {
-        err = cbs_av1_read_trailing_bits(ctx, &gbc,
-                                         obu->obu_size * 8 + start_pos - end_pos);
+        int nb_bits = obu->obu_size * 8 + start_pos - end_pos;
+
+        if (nb_bits <= 0)
+            return AVERROR_INVALIDDATA;
+
+        err = cbs_av1_read_trailing_bits(ctx, &gbc, nb_bits);
         if (err < 0)
             return err;
     }
@@ -1124,7 +1138,10 @@ static int cbs_av1_write_obu(CodedBitstreamContext *ctx,
     case AV1_OBU_REDUNDANT_FRAME_HEADER:
         {
             err = cbs_av1_write_frame_header_obu(ctx, pbc,
-                                                 &obu->obu.frame_header);
+                                                 &obu->obu.frame_header,
+                                                 obu->header.obu_type ==
+                                                 AV1_OBU_REDUNDANT_FRAME_HEADER,
+                                                 NULL);
             if (err < 0)
                 return err;
         }
@@ -1141,7 +1158,7 @@ static int cbs_av1_write_obu(CodedBitstreamContext *ctx,
         break;
     case AV1_OBU_FRAME:
         {
-            err = cbs_av1_write_frame_obu(ctx, pbc, &obu->obu.frame);
+            err = cbs_av1_write_frame_obu(ctx, pbc, &obu->obu.frame, NULL);
             if (err < 0)
                 return err;
 
@@ -1179,7 +1196,7 @@ static int cbs_av1_write_obu(CodedBitstreamContext *ctx,
         if (err < 0)
             return err;
         end_pos = put_bits_count(pbc);
-        obu->obu_size = (end_pos - start_pos + 7) / 8;
+        obu->obu_size = header_size = (end_pos - start_pos + 7) / 8;
     } else {
         // Empty OBU.
         obu->obu_size = 0;
@@ -1235,7 +1252,7 @@ static int cbs_av1_write_unit(CodedBitstreamContext *ctx,
         if (err < 0) {
             av_log(ctx->log_ctx, AV_LOG_ERROR, "Unable to allocate a "
                    "sufficiently large write buffer (last attempt "
-                   "%zu bytes).\n", priv->write_buffer_size);
+                   "%"SIZE_SPECIFIER" bytes).\n", priv->write_buffer_size);
             return err;
         }
     }
@@ -1302,6 +1319,7 @@ static void cbs_av1_close(CodedBitstreamContext *ctx)
     CodedBitstreamAV1Context *priv = ctx->priv_data;
 
     av_buffer_unref(&priv->sequence_header_ref);
+    av_buffer_unref(&priv->frame_header_ref);
 
     av_freep(&priv->write_buffer);
 }
diff --git a/libavcodec/cbs_av1.h b/libavcodec/cbs_av1.h
index 0d7fd761f1ead..71ceff9427354 100644
--- a/libavcodec/cbs_av1.h
+++ b/libavcodec/cbs_av1.h
@@ -87,8 +87,8 @@ typedef struct AV1RawSequenceHeader {
     uint8_t  seq_level_idx[AV1_MAX_OPERATING_POINTS];
     uint8_t  seq_tier[AV1_MAX_OPERATING_POINTS];
     uint8_t  decoder_model_present_for_this_op[AV1_MAX_OPERATING_POINTS];
-    uint8_t  decoder_buffer_delay[AV1_MAX_OPERATING_POINTS];
-    uint8_t  encoder_buffer_delay[AV1_MAX_OPERATING_POINTS];
+    uint32_t decoder_buffer_delay[AV1_MAX_OPERATING_POINTS];
+    uint32_t encoder_buffer_delay[AV1_MAX_OPERATING_POINTS];
     uint8_t  low_delay_mode_flag[AV1_MAX_OPERATING_POINTS];
     uint8_t  initial_display_delay_present_for_this_op[AV1_MAX_OPERATING_POINTS];
     uint8_t  initial_display_delay_minus_1[AV1_MAX_OPERATING_POINTS];
@@ -161,7 +161,7 @@ typedef struct AV1RawFrameHeader {
     uint8_t  render_width_minus_1;
     uint8_t  render_height_minus_1;
 
-    uint8_t found_ref;
+    uint8_t found_ref[AV1_REFS_PER_FRAME];
 
     uint8_t refresh_frame_flags;
     uint8_t allow_intrabc;
@@ -170,7 +170,7 @@ typedef struct AV1RawFrameHeader {
     uint8_t last_frame_idx;
     uint8_t golden_frame_idx;
     int8_t  ref_frame_idx[AV1_REFS_PER_FRAME];
-    uint8_t delta_frame_id_minus1;
+    uint32_t delta_frame_id_minus1[AV1_REFS_PER_FRAME];
 
     uint8_t allow_high_precision_mv;
     uint8_t is_filter_switchable;
@@ -210,7 +210,7 @@ typedef struct AV1RawFrameHeader {
     uint8_t segmentation_temporal_update;
     uint8_t segmentation_update_data;
     uint8_t feature_enabled[AV1_MAX_SEGMENTS][AV1_SEG_LVL_MAX];
-    uint8_t feature_value[AV1_MAX_SEGMENTS][AV1_SEG_LVL_MAX];
+    int16_t feature_value[AV1_MAX_SEGMENTS][AV1_SEG_LVL_MAX];
 
     uint8_t delta_q_present;
     uint8_t delta_q_res;
@@ -399,7 +399,10 @@ typedef struct CodedBitstreamAV1Context {
     AV1RawSequenceHeader *sequence_header;
     AVBufferRef          *sequence_header_ref;
 
-    int seen_frame_header;
+    int     seen_frame_header;
+    AVBufferRef *frame_header_ref;
+    uint8_t     *frame_header;
+    size_t       frame_header_size;
 
     int temporal_id;
     int spatial_id;
diff --git a/libavcodec/cbs_av1_syntax_template.c b/libavcodec/cbs_av1_syntax_template.c
index 84ab2973ab072..48f4fab514a51 100644
--- a/libavcodec/cbs_av1_syntax_template.c
+++ b/libavcodec/cbs_av1_syntax_template.c
@@ -417,8 +417,8 @@ static int FUNC(frame_size_with_refs)(CodedBitstreamContext *ctx, RWContext *rw,
     int i, err;
 
     for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
-        flag(found_ref);
-        if (current->found_ref) {
+        flags(found_ref[i], 1, i);
+        if (current->found_ref[i]) {
             AV1ReferenceFrameState *ref =
                 &priv->ref[current->ref_frame_idx[i]];
 
@@ -439,7 +439,7 @@ static int FUNC(frame_size_with_refs)(CodedBitstreamContext *ctx, RWContext *rw,
         }
     }
 
-    if (current->found_ref == 0) {
+    if (i >= AV1_REFS_PER_FRAME) {
         CHECK(FUNC(frame_size)(ctx, rw, current));
         CHECK(FUNC(render_size)(ctx, rw, current));
     } else {
@@ -1323,8 +1323,8 @@ static int FUNC(uncompressed_header)(CodedBitstreamContext *ctx, RWContext *rw,
             if (!current->frame_refs_short_signaling)
                 fbs(3, ref_frame_idx[i], 1, i);
             if (seq->frame_id_numbers_present_flag) {
-                fb(seq->delta_frame_id_length_minus_2 + 2,
-                   delta_frame_id_minus1);
+                fbs(seq->delta_frame_id_length_minus_2 + 2,
+                    delta_frame_id_minus1[i], 1, i);
             }
         }
 
@@ -1463,24 +1463,90 @@ static int FUNC(uncompressed_header)(CodedBitstreamContext *ctx, RWContext *rw,
 }
 
 static int FUNC(frame_header_obu)(CodedBitstreamContext *ctx, RWContext *rw,
-                                  AV1RawFrameHeader *current)
+                                  AV1RawFrameHeader *current, int redundant,
+                                  AVBufferRef *rw_buffer_ref)
 {
     CodedBitstreamAV1Context *priv = ctx->priv_data;
-    int err;
-
-    HEADER("Frame Header");
+    int start_pos, fh_bits, fh_bytes, err;
+    uint8_t *fh_start;
 
     if (priv->seen_frame_header) {
-        // Nothing to do.
+        if (!redundant) {
+            av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid repeated "
+                   "frame header OBU.\n");
+            return AVERROR_INVALIDDATA;
+        } else {
+            GetBitContext fh;
+            size_t i, b;
+            uint32_t val;
+
+            HEADER("Redundant Frame Header");
+
+            av_assert0(priv->frame_header_ref && priv->frame_header);
+
+            init_get_bits(&fh, priv->frame_header,
+                          priv->frame_header_size);
+            for (i = 0; i < priv->frame_header_size; i += 8) {
+                b = FFMIN(priv->frame_header_size - i, 8);
+                val = get_bits(&fh, b);
+                xf(b, frame_header_copy[i],
+                   val, val, val, 1, i / 8);
+            }
+        }
     } else {
+        if (redundant)
+            HEADER("Redundant Frame Header (used as Frame Header)");
+        else
+            HEADER("Frame Header");
+
         priv->seen_frame_header = 1;
 
+#ifdef READ
+        start_pos = get_bits_count(rw);
+#else
+        start_pos = put_bits_count(rw);
+#endif
+
         CHECK(FUNC(uncompressed_header)(ctx, rw, current));
 
         if (current->show_existing_frame) {
             priv->seen_frame_header = 0;
         } else {
             priv->seen_frame_header = 1;
+
+            av_buffer_unref(&priv->frame_header_ref);
+
+#ifdef READ
+            fh_bits  = get_bits_count(rw) - start_pos;
+            fh_start = (uint8_t*)rw->buffer + start_pos / 8;
+#else
+            // Need to flush the bitwriter so that we can copy its output,
+            // but use a copy so we don't affect the caller's structure.
+            {
+                PutBitContext tmp = *rw;
+                flush_put_bits(&tmp);
+            }
+
+            fh_bits  = put_bits_count(rw) - start_pos;
+            fh_start = rw->buf + start_pos / 8;
+#endif
+            fh_bytes = (fh_bits + 7) / 8;
+
+            priv->frame_header_size = fh_bits;
+
+            if (rw_buffer_ref) {
+                priv->frame_header_ref = av_buffer_ref(rw_buffer_ref);
+                if (!priv->frame_header_ref)
+                    return AVERROR(ENOMEM);
+                priv->frame_header = fh_start;
+            } else {
+                priv->frame_header_ref =
+                    av_buffer_alloc(fh_bytes + AV_INPUT_BUFFER_PADDING_SIZE);
+                if (!priv->frame_header_ref)
+                    return AVERROR(ENOMEM);
+                priv->frame_header = priv->frame_header_ref->data;
+                memcpy(priv->frame_header, fh_start, fh_bytes);
+            }
         }
     }
 
@@ -1524,11 +1590,13 @@ static int FUNC(tile_group_obu)(CodedBitstreamContext *ctx, RWContext *rw,
 }
 
 static int FUNC(frame_obu)(CodedBitstreamContext *ctx, RWContext *rw,
-                           AV1RawFrame *current)
+                           AV1RawFrame *current,
+                           AVBufferRef *rw_buffer_ref)
 {
     int err;
 
-    CHECK(FUNC(frame_header_obu)(ctx, rw, &current->header));
+    CHECK(FUNC(frame_header_obu)(ctx, rw, &current->header,
+                                 0, rw_buffer_ref));
 
     CHECK(FUNC(byte_alignment)(ctx, rw));
 
diff --git a/libavcodec/cbs_h2645.c b/libavcodec/cbs_h2645.c
index 4b31601c0ff53..e74f8dce810c2 100644
--- a/libavcodec/cbs_h2645.c
+++ b/libavcodec/cbs_h2645.c
@@ -319,7 +319,8 @@ static int cbs_h2645_read_more_rbsp_data(GetBitContext *gbc)
 #define byte_alignment(rw) (get_bits_count(rw) % 8)
 
 #define allocate(name, size) do { \
-        name ## _ref = av_buffer_allocz(size); \
+        name ## _ref = av_buffer_allocz(size + \
+                                        AV_INPUT_BUFFER_PADDING_SIZE); \
         if (!name ## _ref) \
             return AVERROR(ENOMEM); \
         name = name ## _ref->data; \
@@ -489,8 +490,23 @@ static void cbs_h265_free_slice(void *unit, uint8_t *content)
 static void cbs_h265_free_sei_payload(H265RawSEIPayload *payload)
 {
     switch (payload->payload_type) {
+    case HEVC_SEI_TYPE_BUFFERING_PERIOD:
+    case HEVC_SEI_TYPE_PICTURE_TIMING:
+    case HEVC_SEI_TYPE_PAN_SCAN_RECT:
+    case HEVC_SEI_TYPE_RECOVERY_POINT:
+    case HEVC_SEI_TYPE_DISPLAY_ORIENTATION:
+    case HEVC_SEI_TYPE_ACTIVE_PARAMETER_SETS:
+    case HEVC_SEI_TYPE_DECODED_PICTURE_HASH:
+    case HEVC_SEI_TYPE_TIME_CODE:
     case HEVC_SEI_TYPE_MASTERING_DISPLAY_INFO:
     case HEVC_SEI_TYPE_CONTENT_LIGHT_LEVEL_INFO:
+    case HEVC_SEI_TYPE_ALTERNATIVE_TRANSFER_CHARACTERISTICS:
+        break;
+    case HEVC_SEI_TYPE_USER_DATA_REGISTERED_ITU_T_T35:
+        av_buffer_unref(&payload->payload.user_data_registered.data_ref);
+        break;
+    case HEVC_SEI_TYPE_USER_DATA_UNREGISTERED:
+        av_buffer_unref(&payload->payload.user_data_unregistered.data_ref);
         break;
     default:
         av_buffer_unref(&payload->payload.other.data_ref);
@@ -515,26 +531,21 @@ static int cbs_h2645_fragment_add_nals(CodedBitstreamContext *ctx,
 
     for (i = 0; i < packet->nb_nals; i++) {
         const H2645NAL *nal = &packet->nals[i];
+        AVBufferRef *ref;
         size_t size = nal->size;
-        uint8_t *data;
 
         // Remove trailing zeroes.
         while (size > 0 && nal->data[size - 1] == 0)
             --size;
         av_assert0(size > 0);
 
-        data = av_malloc(size + AV_INPUT_BUFFER_PADDING_SIZE);
-        if (!data)
-            return AVERROR(ENOMEM);
-        memcpy(data, nal->data, size);
-        memset(data + size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+        ref = (nal->data == nal->raw_data) ? frag->data_ref
+                                           : packet->rbsp.rbsp_buffer_ref;
 
         err = ff_cbs_insert_unit_data(ctx, frag, -1, nal->type,
-                                      data, size, NULL);
-        if (err < 0) {
-            av_freep(&data);
+                            (uint8_t*)nal->data, size, ref);
+        if (err < 0)
             return err;
-        }
     }
 
     return 0;
@@ -590,7 +601,7 @@ static int cbs_h2645_split_fragment(CodedBitstreamContext *ctx,
 
         err = ff_h2645_packet_split(&priv->read_packet,
                                     frag->data + start, end - start,
-                                    ctx->log_ctx, 1, 2, AV_CODEC_ID_H264, 1);
+                                    ctx->log_ctx, 1, 2, AV_CODEC_ID_H264, 1, 1);
         if (err < 0) {
             av_log(ctx->log_ctx, AV_LOG_ERROR, "Failed to split AVCC SPS array.\n");
             return err;
@@ -614,7 +625,7 @@ static int cbs_h2645_split_fragment(CodedBitstreamContext *ctx,
 
         err = ff_h2645_packet_split(&priv->read_packet,
                                     frag->data + start, end - start,
-                                    ctx->log_ctx, 1, 2, AV_CODEC_ID_H264, 1);
+                                    ctx->log_ctx, 1, 2, AV_CODEC_ID_H264, 1, 1);
         if (err < 0) {
             av_log(ctx->log_ctx, AV_LOG_ERROR, "Failed to split AVCC PPS array.\n");
             return err;
@@ -668,7 +679,7 @@ static int cbs_h2645_split_fragment(CodedBitstreamContext *ctx,
 
             err = ff_h2645_packet_split(&priv->read_packet,
                                         frag->data + start, end - start,
-                                        ctx->log_ctx, 1, 2, AV_CODEC_ID_HEVC, 1);
+                                        ctx->log_ctx, 1, 2, AV_CODEC_ID_HEVC, 1, 1);
             if (err < 0) {
                 av_log(ctx->log_ctx, AV_LOG_ERROR, "Failed to split "
                        "HVCC array %d (%d NAL units of type %d).\n",
@@ -687,7 +698,7 @@ static int cbs_h2645_split_fragment(CodedBitstreamContext *ctx,
                                     frag->data, frag->data_size,
                                     ctx->log_ctx,
                                     priv->mp4, priv->nal_length_size,
-                                    codec_id, 1);
+                                    codec_id, 1, 1);
         if (err < 0)
             return err;
 
@@ -872,7 +883,21 @@ static int cbs_h264_read_nal_unit(CodedBitstreamContext *ctx,
         break;
 
     case H264_NAL_END_SEQUENCE:
-        return 0;
+    case H264_NAL_END_STREAM:
+        {
+            err = ff_cbs_alloc_unit_content(ctx, unit,
+                                            sizeof(H264RawNALUnitHeader),
+                                            NULL);
+            if (err < 0)
+                return err;
+
+            err = (unit->type == H264_NAL_END_SEQUENCE ?
+                   cbs_h264_read_end_of_sequence :
+                   cbs_h264_read_end_of_stream)(ctx, &gbc, unit->content);
+            if (err < 0)
+                return err;
+        }
+        break;
 
     default:
         return AVERROR(ENOSYS);
@@ -1014,6 +1039,7 @@ static int cbs_h265_read_nal_unit(CodedBitstreamContext *ctx,
         break;
 
     case HEVC_NAL_SEI_PREFIX:
+    case HEVC_NAL_SEI_SUFFIX:
         {
             err = ff_cbs_alloc_unit_content(ctx, unit, sizeof(H265RawSEI),
                                             &cbs_h265_free_sei);
@@ -1021,7 +1047,8 @@ static int cbs_h265_read_nal_unit(CodedBitstreamContext *ctx,
             if (err < 0)
                 return err;
 
-            err = cbs_h265_read_sei(ctx, &gbc, unit->content);
+            err = cbs_h265_read_sei(ctx, &gbc, unit->content,
+                                    unit->type == HEVC_NAL_SEI_PREFIX);
 
             if (err < 0)
                 return err;
@@ -1035,6 +1062,64 @@ static int cbs_h265_read_nal_unit(CodedBitstreamContext *ctx,
     return 0;
 }
 
+static int cbs_h2645_write_slice_data(CodedBitstreamContext *ctx,
+                                      PutBitContext *pbc, const uint8_t *data,
+                                      size_t data_size, int data_bit_start)
+{
+    size_t rest  = data_size - (data_bit_start + 7) / 8;
+    const uint8_t *pos = data + data_bit_start / 8;
+
+    av_assert0(data_bit_start >= 0 &&
+               8 * data_size > data_bit_start);
+
+    if (data_size * 8 + 8 > put_bits_left(pbc))
+        return AVERROR(ENOSPC);
+
+    if (!rest)
+        goto rbsp_stop_one_bit;
+
+    // First copy the remaining bits of the first byte
+    // The above check ensures that we do not accidentally
+    // copy beyond the rbsp_stop_one_bit.
+    if (data_bit_start % 8)
+        put_bits(pbc, 8 - data_bit_start % 8,
+                 *pos++ & MAX_UINT_BITS(8 - data_bit_start % 8));
+
+    if (put_bits_count(pbc) % 8 == 0) {
+        // If the writer is aligned at this point,
+        // memcpy can be used to improve performance.
+        // This happens normally for CABAC.
+        flush_put_bits(pbc);
+        memcpy(put_bits_ptr(pbc), pos, rest);
+        skip_put_bytes(pbc, rest);
+    } else {
+        // If not, we have to copy manually.
+        // rbsp_stop_one_bit forces us to special-case
+        // the last byte.
+        uint8_t temp;
+        int i;
+
+        for (; rest > 4; rest -= 4, pos += 4)
+            put_bits32(pbc, AV_RB32(pos));
+
+        for (; rest > 1; rest--, pos++)
+            put_bits(pbc, 8, *pos);
+
+    rbsp_stop_one_bit:
+        temp = rest ? *pos : *pos & MAX_UINT_BITS(8 - data_bit_start % 8);
+
+        av_assert0(temp);
+        i = ff_ctz(*pos);
+        temp = temp >> i;
+        i = rest ? (8 - i) : (8 - i - data_bit_start % 8);
+        put_bits(pbc, i, temp);
+        if (put_bits_count(pbc) % 8)
+            put_bits(pbc, 8 - put_bits_count(pbc) % 8, 0);
+    }
+
+    return 0;
+}
+
 static int cbs_h264_write_nal_unit(CodedBitstreamContext *ctx,
                                    CodedBitstreamUnit *unit,
                                    PutBitContext *pbc)
@@ -1085,37 +1170,17 @@ static int cbs_h264_write_nal_unit(CodedBitstreamContext *ctx,
     case H264_NAL_AUXILIARY_SLICE:
         {
             H264RawSlice *slice = unit->content;
-            GetBitContext gbc;
-            int bits_left, end, zeroes;
 
             err = cbs_h264_write_slice_header(ctx, pbc, &slice->header);
             if (err < 0)
                 return err;
 
             if (slice->data) {
-                if (slice->data_size * 8 + 8 > put_bits_left(pbc))
-                    return AVERROR(ENOSPC);
-
-                init_get_bits(&gbc, slice->data, slice->data_size * 8);
-                skip_bits_long(&gbc, slice->data_bit_start);
-
-                // Copy in two-byte blocks, but stop before copying the
-                // rbsp_stop_one_bit in the final byte.
-                while (get_bits_left(&gbc) > 23)
-                    put_bits(pbc, 16, get_bits(&gbc, 16));
-
-                bits_left = get_bits_left(&gbc);
-                end = get_bits(&gbc, bits_left);
-
-                // rbsp_stop_one_bit must be present here.
-                av_assert0(end);
-                zeroes = ff_ctz(end);
-                if (bits_left > zeroes + 1)
-                    put_bits(pbc, bits_left - zeroes - 1,
-                             end >> (zeroes + 1));
-                put_bits(pbc, 1, 1);
-                while (put_bits_count(pbc) % 8 != 0)
-                    put_bits(pbc, 1, 0);
+                err = cbs_h2645_write_slice_data(ctx, pbc, slice->data,
+                                                 slice->data_size,
+                                                 slice->data_bit_start);
+                if (err < 0)
+                    return err;
             } else {
                 // No slice data - that was just the header.
                 // (Bitstream may be unaligned!)
@@ -1147,6 +1212,22 @@ static int cbs_h264_write_nal_unit(CodedBitstreamContext *ctx,
         }
         break;
 
+    case H264_NAL_END_SEQUENCE:
+        {
+            err = cbs_h264_write_end_of_sequence(ctx, pbc, unit->content);
+            if (err < 0)
+                return err;
+        }
+        break;
+
+    case H264_NAL_END_STREAM:
+        {
+            err = cbs_h264_write_end_of_stream(ctx, pbc, unit->content);
+            if (err < 0)
+                return err;
+        }
+        break;
+
     default:
         av_log(ctx->log_ctx, AV_LOG_ERROR, "Write unimplemented for "
                "NAL unit type %"PRIu32".\n", unit->type);
@@ -1223,37 +1304,17 @@ static int cbs_h265_write_nal_unit(CodedBitstreamContext *ctx,
     case HEVC_NAL_CRA_NUT:
         {
             H265RawSlice *slice = unit->content;
-            GetBitContext gbc;
-            int bits_left, end, zeroes;
 
             err = cbs_h265_write_slice_segment_header(ctx, pbc, &slice->header);
             if (err < 0)
                 return err;
 
             if (slice->data) {
-                if (slice->data_size * 8 + 8 > put_bits_left(pbc))
-                    return AVERROR(ENOSPC);
-
-                init_get_bits(&gbc, slice->data, slice->data_size * 8);
-                skip_bits_long(&gbc, slice->data_bit_start);
-
-                // Copy in two-byte blocks, but stop before copying the
-                // rbsp_stop_one_bit in the final byte.
-                while (get_bits_left(&gbc) > 23)
-                    put_bits(pbc, 16, get_bits(&gbc, 16));
-
-                bits_left = get_bits_left(&gbc);
-                end = get_bits(&gbc, bits_left);
-
-                // rbsp_stop_one_bit must be present here.
-                av_assert0(end);
-                zeroes = ff_ctz(end);
-                if (bits_left > zeroes + 1)
-                    put_bits(pbc, bits_left - zeroes - 1,
-                             end >> (zeroes + 1));
-                put_bits(pbc, 1, 1);
-                while (put_bits_count(pbc) % 8 != 0)
-                    put_bits(pbc, 1, 0);
+                err = cbs_h2645_write_slice_data(ctx, pbc, slice->data,
+                                                 slice->data_size,
+                                                 slice->data_bit_start);
+                if (err < 0)
+                    return err;
             } else {
                 // No slice data - that was just the header.
             }
@@ -1269,8 +1330,10 @@ static int cbs_h265_write_nal_unit(CodedBitstreamContext *ctx,
         break;
 
     case HEVC_NAL_SEI_PREFIX:
+    case HEVC_NAL_SEI_SUFFIX:
         {
-            err = cbs_h265_write_sei(ctx, pbc, unit->content);
+            err = cbs_h265_write_sei(ctx, pbc, unit->content,
+                                     unit->type == HEVC_NAL_SEI_PREFIX);
 
             if (err < 0)
                 return err;
diff --git a/libavcodec/cbs_h264_syntax_template.c b/libavcodec/cbs_h264_syntax_template.c
index 1c8d7d5eae2be..4da4c5da67360 100644
--- a/libavcodec/cbs_h264_syntax_template.c
+++ b/libavcodec/cbs_h264_syntax_template.c
@@ -513,6 +513,8 @@ static int FUNC(sei_buffering_period)(CodedBitstreamContext *ctx, RWContext *rw,
     const H264RawSPS *sps;
     int err, i, length;
 
+    HEADER("Buffering Period");
+
     ue(seq_parameter_set_id, 0, 31);
 
     sps = h264->sps[current->seq_parameter_set_id];
@@ -551,10 +553,9 @@ static int FUNC(sei_buffering_period)(CodedBitstreamContext *ctx, RWContext *rw,
 }
 
 static int FUNC(sei_pic_timestamp)(CodedBitstreamContext *ctx, RWContext *rw,
-                                   H264RawSEIPicTimestamp *current)
+                                   H264RawSEIPicTimestamp *current,
+                                   const H264RawSPS *sps)
 {
-    CodedBitstreamH264Context *h264 = ctx->priv_data;
-    const H264RawSPS *sps;
     uint8_t time_offset_length;
     int err;
 
@@ -583,13 +584,6 @@ static int FUNC(sei_pic_timestamp)(CodedBitstreamContext *ctx, RWContext *rw,
         }
     }
 
-    sps = h264->active_sps;
-    if (!sps) {
-        av_log(ctx->log_ctx, AV_LOG_ERROR,
-               "No active SPS for pic_timestamp.\n");
-        return AVERROR_INVALIDDATA;
-    }
-
     if (sps->vui.nal_hrd_parameters_present_flag)
         time_offset_length = sps->vui.nal_hrd_parameters.time_offset_length;
     else if (sps->vui.vcl_hrd_parameters_present_flag)
@@ -613,6 +607,8 @@ static int FUNC(sei_pic_timing)(CodedBitstreamContext *ctx, RWContext *rw,
     const H264RawSPS *sps;
     int err;
 
+    HEADER("Picture Timing");
+
     sps = h264->active_sps;
     if (!sps) {
         // If there is exactly one possible SPS but it is not yet active
@@ -669,7 +665,8 @@ static int FUNC(sei_pic_timing)(CodedBitstreamContext *ctx, RWContext *rw,
         for (i = 0; i < num_clock_ts[current->pic_struct]; i++) {
             flags(clock_timestamp_flag[i], 1, i);
             if (current->clock_timestamp_flag[i])
-                CHECK(FUNC(sei_pic_timestamp)(ctx, rw, &current->timestamp[i]));
+                CHECK(FUNC(sei_pic_timestamp)(ctx, rw,
+                                              &current->timestamp[i], sps));
         }
     }
 
@@ -681,6 +678,8 @@ static int FUNC(sei_pan_scan_rect)(CodedBitstreamContext *ctx, RWContext *rw,
 {
     int err, i;
 
+    HEADER("Pan-Scan Rectangle");
+
     ue(pan_scan_rect_id, 0, UINT32_MAX - 1);
     flag(pan_scan_rect_cancel_flag);
 
@@ -706,6 +705,8 @@ static int FUNC(sei_user_data_registered)(CodedBitstreamContext *ctx, RWContext
 {
     int err, i, j;
 
+    HEADER("User Data Registered ITU-T T.35");
+
     u(8, itu_t_t35_country_code, 0x00, 0xff);
     if (current->itu_t_t35_country_code != 0xff)
         i = 1;
@@ -725,7 +726,7 @@ static int FUNC(sei_user_data_registered)(CodedBitstreamContext *ctx, RWContext
     *payload_size = i + current->data_length;
 #endif
 
-    allocate(current->data, current->data_length + AV_INPUT_BUFFER_PADDING_SIZE);
+    allocate(current->data, current->data_length);
     for (j = 0; j < current->data_length; j++)
         xu(8, itu_t_t35_payload_byte[i], current->data[j], 0x00, 0xff, 1, i + j);
 
@@ -738,6 +739,8 @@ static int FUNC(sei_user_data_unregistered)(CodedBitstreamContext *ctx, RWContex
 {
     int err, i;
 
+    HEADER("User Data Unregistered");
+
 #ifdef READ
     if (*payload_size < 16) {
         av_log(ctx->log_ctx, AV_LOG_ERROR,
@@ -765,6 +768,8 @@ static int FUNC(sei_recovery_point)(CodedBitstreamContext *ctx, RWContext *rw,
 {
     int err;
 
+    HEADER("Recovery Point");
+
     ue(recovery_frame_cnt, 0, 65535);
     flag(exact_match_flag);
     flag(broken_link_flag);
@@ -778,6 +783,8 @@ static int FUNC(sei_display_orientation)(CodedBitstreamContext *ctx, RWContext *
 {
     int err;
 
+    HEADER("Display Orientation");
+
     flag(display_orientation_cancel_flag);
     if (!current->display_orientation_cancel_flag) {
         flag(hor_flip);
@@ -795,6 +802,8 @@ static int FUNC(sei_mastering_display_colour_volume)(CodedBitstreamContext *ctx,
 {
     int err, c;
 
+    HEADER("Mastering Display Colour Volume");
+
     for (c = 0; c < 3; c++) {
         us(16, display_primaries_x[c], 0, 50000, 1, c);
         us(16, display_primaries_y[c], 0, 50000, 1, c);
@@ -1181,11 +1190,10 @@ static int FUNC(slice_header)(CodedBitstreamContext *ctx, RWContext *rw,
                    "in the same access unit.\n");
             return AVERROR_INVALIDDATA;
         }
+        idr_pic_flag = h264->last_slice_nal_unit_type == H264_NAL_IDR_SLICE;
     } else {
-        h264->last_slice_nal_unit_type =
-            current->nal_unit_header.nal_unit_type;
+        idr_pic_flag = current->nal_unit_header.nal_unit_type == H264_NAL_IDR_SLICE;
     }
-    idr_pic_flag = h264->last_slice_nal_unit_type == H264_NAL_IDR_SLICE;
 
     ue(first_mb_in_slice, 0, H264_MAX_MB_PIC_SIZE - 1);
     ue(slice_type, 0, 9);
@@ -1263,6 +1271,13 @@ static int FUNC(slice_header)(CodedBitstreamContext *ctx, RWContext *rw,
 
     if (pps->redundant_pic_cnt_present_flag)
         ue(redundant_pic_cnt, 0, 127);
+    else
+        infer(redundant_pic_cnt, 0);
+
+    if (current->nal_unit_header.nal_unit_type != H264_NAL_AUXILIARY_SLICE
+        && !current->redundant_pic_cnt)
+        h264->last_slice_nal_unit_type =
+            current->nal_unit_header.nal_unit_type;
 
     if (slice_type_b)
         flag(direct_spatial_mv_pred_flag);
@@ -1375,3 +1390,21 @@ static int FUNC(filler)(CodedBitstreamContext *ctx, RWContext *rw,
 
     return 0;
 }
+
+static int FUNC(end_of_sequence)(CodedBitstreamContext *ctx, RWContext *rw,
+                                 H264RawNALUnitHeader *current)
+{
+    HEADER("End of Sequence");
+
+    return FUNC(nal_unit_header)(ctx, rw, current,
+                                 1 << H264_NAL_END_SEQUENCE);
+}
+
+static int FUNC(end_of_stream)(CodedBitstreamContext *ctx, RWContext *rw,
+                               H264RawNALUnitHeader *current)
+{
+    HEADER("End of Stream");
+
+    return FUNC(nal_unit_header)(ctx, rw, current,
+                                 1 << H264_NAL_END_STREAM);
+}
diff --git a/libavcodec/cbs_h265.h b/libavcodec/cbs_h265.h
index cca1d7590b099..d216caca2b114 100644
--- a/libavcodec/cbs_h265.h
+++ b/libavcodec/cbs_h265.h
@@ -71,7 +71,31 @@ typedef struct H265RawProfileTierLevel {
     uint8_t sub_layer_profile_present_flag[HEVC_MAX_SUB_LAYERS];
     uint8_t sub_layer_level_present_flag[HEVC_MAX_SUB_LAYERS];
 
-    // TODO: much of that again for each sub-layer.
+    uint8_t sub_layer_profile_space[HEVC_MAX_SUB_LAYERS];
+    uint8_t sub_layer_tier_flag[HEVC_MAX_SUB_LAYERS];
+    uint8_t sub_layer_profile_idc[HEVC_MAX_SUB_LAYERS];
+
+    uint8_t sub_layer_profile_compatibility_flag[HEVC_MAX_SUB_LAYERS][32];
+
+    uint8_t sub_layer_progressive_source_flag[HEVC_MAX_SUB_LAYERS];
+    uint8_t sub_layer_interlaced_source_flag[HEVC_MAX_SUB_LAYERS];
+    uint8_t sub_layer_non_packed_constraint_flag[HEVC_MAX_SUB_LAYERS];
+    uint8_t sub_layer_frame_only_constraint_flag[HEVC_MAX_SUB_LAYERS];
+
+    uint8_t sub_layer_max_12bit_constraint_flag[HEVC_MAX_SUB_LAYERS];
+    uint8_t sub_layer_max_10bit_constraint_flag[HEVC_MAX_SUB_LAYERS];
+    uint8_t sub_layer_max_8bit_constraint_flag[HEVC_MAX_SUB_LAYERS];
+    uint8_t sub_layer_max_422chroma_constraint_flag[HEVC_MAX_SUB_LAYERS];
+    uint8_t sub_layer_max_420chroma_constraint_flag[HEVC_MAX_SUB_LAYERS];
+    uint8_t sub_layer_max_monochrome_constraint_flag[HEVC_MAX_SUB_LAYERS];
+    uint8_t sub_layer_intra_constraint_flag[HEVC_MAX_SUB_LAYERS];
+    uint8_t sub_layer_one_picture_only_constraint_flag[HEVC_MAX_SUB_LAYERS];
+    uint8_t sub_layer_lower_bit_rate_constraint_flag[HEVC_MAX_SUB_LAYERS];
+    uint8_t sub_layer_max_14bit_constraint_flag[HEVC_MAX_SUB_LAYERS];
+
+    uint8_t sub_layer_inbld_flag[HEVC_MAX_SUB_LAYERS];
+
+    uint8_t sub_layer_level_idc[HEVC_MAX_SUB_LAYERS];
 } H265RawProfileTierLevel;
 
 typedef struct H265RawSubLayerHRDParameters {
@@ -524,6 +548,120 @@ typedef struct H265RawSlice {
     AVBufferRef *data_ref;
 } H265RawSlice;
 
+
+typedef struct H265RawSEIBufferingPeriod {
+    uint8_t  bp_seq_parameter_set_id;
+    uint8_t  irap_cpb_params_present_flag;
+    uint32_t cpb_delay_offset;
+    uint32_t dpb_delay_offset;
+    uint8_t  concatenation_flag;
+    uint32_t au_cpb_removal_delay_delta_minus1;
+
+    uint32_t nal_initial_cpb_removal_delay[HEVC_MAX_CPB_CNT];
+    uint32_t nal_initial_cpb_removal_offset[HEVC_MAX_CPB_CNT];
+    uint32_t nal_initial_alt_cpb_removal_delay[HEVC_MAX_CPB_CNT];
+    uint32_t nal_initial_alt_cpb_removal_offset[HEVC_MAX_CPB_CNT];
+
+    uint32_t vcl_initial_cpb_removal_delay[HEVC_MAX_CPB_CNT];
+    uint32_t vcl_initial_cpb_removal_offset[HEVC_MAX_CPB_CNT];
+    uint32_t vcl_initial_alt_cpb_removal_delay[HEVC_MAX_CPB_CNT];
+    uint32_t vcl_initial_alt_cpb_removal_offset[HEVC_MAX_CPB_CNT];
+
+    uint8_t  use_alt_cpb_params_flag;
+} H265RawSEIBufferingPeriod;
+
+typedef struct H265RawSEIPicTiming {
+    uint8_t pic_struct;
+    uint8_t source_scan_type;
+    uint8_t duplicate_flag;
+
+    uint32_t au_cpb_removal_delay_minus1;
+    uint32_t pic_dpb_output_delay;
+    uint32_t pic_dpb_output_du_delay;
+
+    uint16_t num_decoding_units_minus1;
+    uint8_t  du_common_cpb_removal_delay_flag;
+    uint32_t du_common_cpb_removal_delay_increment_minus1;
+    uint16_t num_nalus_in_du_minus1[HEVC_MAX_SLICE_SEGMENTS];
+    uint32_t du_cpb_removal_delay_increment_minus1[HEVC_MAX_SLICE_SEGMENTS];
+} H265RawSEIPicTiming;
+
+typedef struct H265RawSEIPanScanRect {
+    uint32_t pan_scan_rect_id;
+    uint8_t  pan_scan_rect_cancel_flag;
+    uint8_t  pan_scan_cnt_minus1;
+    int32_t  pan_scan_rect_left_offset[3];
+    int32_t  pan_scan_rect_right_offset[3];
+    int32_t  pan_scan_rect_top_offset[3];
+    int32_t  pan_scan_rect_bottom_offset[3];
+    uint16_t pan_scan_rect_persistence_flag;
+} H265RawSEIPanScanRect;
+
+typedef struct H265RawSEIUserDataRegistered {
+    uint8_t itu_t_t35_country_code;
+    uint8_t itu_t_t35_country_code_extension_byte;
+    uint8_t     *data;
+    size_t       data_length;
+    AVBufferRef *data_ref;
+} H265RawSEIUserDataRegistered;
+
+typedef struct H265RawSEIUserDataUnregistered {
+    uint8_t uuid_iso_iec_11578[16];
+    uint8_t     *data;
+    size_t       data_length;
+    AVBufferRef *data_ref;
+} H265RawSEIUserDataUnregistered;
+
+typedef struct H265RawSEIRecoveryPoint {
+    int16_t recovery_poc_cnt;
+    uint8_t exact_match_flag;
+    uint8_t broken_link_flag;
+} H265RawSEIRecoveryPoint;
+
+typedef struct H265RawSEIDisplayOrientation {
+    uint8_t display_orientation_cancel_flag;
+    uint8_t hor_flip;
+    uint8_t ver_flip;
+    uint16_t anticlockwise_rotation;
+    uint16_t display_orientation_repetition_period;
+    uint8_t display_orientation_persistence_flag;
+} H265RawSEIDisplayOrientation;
+
+typedef struct H265RawSEIActiveParameterSets {
+    uint8_t active_video_parameter_set_id;
+    uint8_t self_contained_cvs_flag;
+    uint8_t no_parameter_set_update_flag;
+    uint8_t num_sps_ids_minus1;
+    uint8_t active_seq_parameter_set_id[HEVC_MAX_SPS_COUNT];
+    uint8_t layer_sps_idx[HEVC_MAX_LAYERS];
+} H265RawSEIActiveParameterSets;
+
+typedef struct H265RawSEIDecodedPictureHash {
+    uint8_t  hash_type;
+    uint8_t  picture_md5[3][16];
+    uint16_t picture_crc[3];
+    uint32_t picture_checksum[3];
+} H265RawSEIDecodedPictureHash;
+
+typedef struct H265RawSEITimeCode {
+    uint8_t  num_clock_ts;
+    uint8_t  clock_timestamp_flag[3];
+    uint8_t  units_field_based_flag[3];
+    uint8_t  counting_type[3];
+    uint8_t  full_timestamp_flag[3];
+    uint8_t  discontinuity_flag[3];
+    uint8_t  cnt_dropped_flag[3];
+    uint16_t n_frames[3];
+    uint8_t  seconds_value[3];
+    uint8_t  minutes_value[3];
+    uint8_t  hours_value[3];
+    uint8_t  seconds_flag[3];
+    uint8_t  minutes_flag[3];
+    uint8_t  hours_flag[3];
+    uint8_t  time_offset_length[3];
+    uint32_t time_offset_value[3];
+} H265RawSEITimeCode;
+
 typedef struct H265RawSEIMasteringDisplayColourVolume {
     uint16_t display_primaries_x[3];
     uint16_t display_primaries_y[3];
@@ -538,12 +676,28 @@ typedef struct H265RawSEIContentLightLevelInfo {
     uint16_t max_pic_average_light_level;
 } H265RawSEIContentLightLevelInfo;
 
+typedef struct H265RawSEIAlternativeTransferCharacteristics {
+    uint8_t preferred_transfer_characteristics;
+} H265RawSEIAlternativeTransferCharacteristics;
+
 typedef struct H265RawSEIPayload {
     uint32_t payload_type;
     uint32_t payload_size;
     union {
+        H265RawSEIBufferingPeriod buffering_period;
+        H265RawSEIPicTiming pic_timing;
+        H265RawSEIPanScanRect pan_scan_rect;
+        H265RawSEIUserDataRegistered user_data_registered;
+        H265RawSEIUserDataUnregistered user_data_unregistered;
+        H265RawSEIRecoveryPoint recovery_point;
+        H265RawSEIDisplayOrientation display_orientation;
+        H265RawSEIActiveParameterSets active_parameter_sets;
+        H265RawSEIDecodedPictureHash decoded_picture_hash;
+        H265RawSEITimeCode time_code;
         H265RawSEIMasteringDisplayColourVolume mastering_display;
         H265RawSEIContentLightLevelInfo content_light_level;
+        H265RawSEIAlternativeTransferCharacteristics
+            alternative_transfer_characteristics;
         struct {
             uint8_t *data;
             size_t data_length;
diff --git a/libavcodec/cbs_h265_syntax_template.c b/libavcodec/cbs_h265_syntax_template.c
index d4e4f7b1c2459..f1e1bb0e7e60f 100644
--- a/libavcodec/cbs_h265_syntax_template.c
+++ b/libavcodec/cbs_h265_syntax_template.c
@@ -130,6 +130,11 @@ static int FUNC(profile_tier_level)(CodedBitstreamContext *ctx, RWContext *rw,
                 fixed(24, general_reserved_zero_34bits, 0);
                 fixed(10, general_reserved_zero_34bits, 0);
             }
+        } else if (profile_compatible(2)) {
+            fixed(7, general_reserved_zero_7bits, 0);
+            flag(general_one_picture_only_constraint_flag);
+            fixed(24, general_reserved_zero_35bits, 0);
+            fixed(11, general_reserved_zero_35bits, 0);
         } else {
             fixed(24, general_reserved_zero_43bits, 0);
             fixed(19, general_reserved_zero_43bits, 0);
@@ -158,10 +163,64 @@ static int FUNC(profile_tier_level)(CodedBitstreamContext *ctx, RWContext *rw,
     }
 
     for (i = 0; i < max_num_sub_layers_minus1; i++) {
-        if (current->sub_layer_profile_present_flag[i])
-            return AVERROR_PATCHWELCOME;
+        if (current->sub_layer_profile_present_flag[i]) {
+            us(2, sub_layer_profile_space[i], 0, 0, 1, i);
+            flags(sub_layer_tier_flag[i],           1, i);
+            us(5, sub_layer_profile_idc[i], 0, 31,  1, i);
+
+            for (j = 0; j < 32; j++)
+                flags(sub_layer_profile_compatibility_flag[i][j], 2, i, j);
+
+            flags(sub_layer_progressive_source_flag[i],    1, i);
+            flags(sub_layer_interlaced_source_flag[i],     1, i);
+            flags(sub_layer_non_packed_constraint_flag[i], 1, i);
+            flags(sub_layer_frame_only_constraint_flag[i], 1, i);
+
+#define profile_compatible(x) (current->sub_layer_profile_idc[i] == (x) ||   \
+                               current->sub_layer_profile_compatibility_flag[i][x])
+            if (profile_compatible(4) || profile_compatible(5) ||
+                profile_compatible(6) || profile_compatible(7) ||
+                profile_compatible(8) || profile_compatible(9) ||
+                profile_compatible(10)) {
+                flags(sub_layer_max_12bit_constraint_flag[i],        1, i);
+                flags(sub_layer_max_10bit_constraint_flag[i],        1, i);
+                flags(sub_layer_max_8bit_constraint_flag[i],         1, i);
+                flags(sub_layer_max_422chroma_constraint_flag[i],    1, i);
+                flags(sub_layer_max_420chroma_constraint_flag[i],    1, i);
+                flags(sub_layer_max_monochrome_constraint_flag[i],   1, i);
+                flags(sub_layer_intra_constraint_flag[i],            1, i);
+                flags(sub_layer_one_picture_only_constraint_flag[i], 1, i);
+                flags(sub_layer_lower_bit_rate_constraint_flag[i],   1, i);
+
+                if (profile_compatible(5)) {
+                    flags(sub_layer_max_14bit_constraint_flag[i], 1, i);
+                    fixed(24, sub_layer_reserved_zero_33bits, 0);
+                    fixed( 9, sub_layer_reserved_zero_33bits, 0);
+                } else {
+                    fixed(24, sub_layer_reserved_zero_34bits, 0);
+                    fixed(10, sub_layer_reserved_zero_34bits, 0);
+                }
+            } else if (profile_compatible(2)) {
+                fixed(7, sub_layer_reserved_zero_7bits, 0);
+                flags(sub_layer_one_picture_only_constraint_flag[i], 1, i);
+                fixed(24, sub_layer_reserved_zero_43bits, 0);
+                fixed(11, sub_layer_reserved_zero_43bits, 0);
+            } else {
+                fixed(24, sub_layer_reserved_zero_43bits, 0);
+                fixed(19, sub_layer_reserved_zero_43bits, 0);
+            }
+
+            if (profile_compatible(1) || profile_compatible(2) ||
+                profile_compatible(3) || profile_compatible(4) ||
+                profile_compatible(5) || profile_compatible(9)) {
+                flags(sub_layer_inbld_flag[i], 1, i);
+            } else {
+                fixed(1, sub_layer_reserved_zero_bit, 0);
+            }
+#undef profile_compatible
+        }
         if (current->sub_layer_level_present_flag[i])
-            return AVERROR_PATCHWELCOME;
+            us(8, sub_layer_level_idc[i], 0, 255, 1, i);
     }
 
     return 0;
@@ -1505,11 +1564,443 @@ static int FUNC(slice_segment_header)(CodedBitstreamContext *ctx, RWContext *rw,
     return 0;
 }
 
+static int FUNC(sei_buffering_period)(CodedBitstreamContext *ctx, RWContext *rw,
+                                      H265RawSEIBufferingPeriod *current,
+                                      uint32_t *payload_size)
+{
+    CodedBitstreamH265Context *h265 = ctx->priv_data;
+    const H265RawSPS *sps;
+    const H265RawHRDParameters *hrd;
+    int err, i, length;
+
+#ifdef READ
+    int start_pos, end_pos, bits_left;
+    start_pos = get_bits_count(rw);
+#endif
+
+    HEADER("Buffering Period");
+
+    ue(bp_seq_parameter_set_id, 0, HEVC_MAX_SPS_COUNT - 1);
+
+    sps = h265->sps[current->bp_seq_parameter_set_id];
+    if (!sps) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "SPS id %d not available.\n",
+               current->bp_seq_parameter_set_id);
+        return AVERROR_INVALIDDATA;
+    }
+    h265->active_sps = sps;
+
+    if (!sps->vui_parameters_present_flag ||
+        !sps->vui.vui_hrd_parameters_present_flag) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "Buffering period SEI requires "
+               "HRD parameters to be present in SPS.\n");
+        return AVERROR_INVALIDDATA;
+    }
+    hrd = &sps->vui.hrd_parameters;
+    if (!hrd->nal_hrd_parameters_present_flag &&
+        !hrd->vcl_hrd_parameters_present_flag) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "Buffering period SEI requires "
+               "NAL or VCL HRD parameters to be present.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (!hrd->sub_pic_hrd_params_present_flag)
+        flag(irap_cpb_params_present_flag);
+    else
+        infer(irap_cpb_params_present_flag, 0);
+    if (current->irap_cpb_params_present_flag) {
+        length = hrd->au_cpb_removal_delay_length_minus1 + 1;
+        u(length, cpb_delay_offset, 0, MAX_UINT_BITS(length));
+        length = hrd->dpb_output_delay_length_minus1 + 1;
+        u(length, dpb_delay_offset, 0, MAX_UINT_BITS(length));
+    } else {
+        infer(cpb_delay_offset, 0);
+        infer(dpb_delay_offset, 0);
+    }
+
+    flag(concatenation_flag);
+
+    length = hrd->au_cpb_removal_delay_length_minus1 + 1;
+    u(length, au_cpb_removal_delay_delta_minus1, 0, MAX_UINT_BITS(length));
+
+    if (hrd->nal_hrd_parameters_present_flag) {
+        for (i = 0; i <= hrd->cpb_cnt_minus1[0]; i++) {
+            length = hrd->initial_cpb_removal_delay_length_minus1 + 1;
+
+            us(length, nal_initial_cpb_removal_delay[i],
+               0, MAX_UINT_BITS(length), 1, i);
+            us(length, nal_initial_cpb_removal_offset[i],
+               0, MAX_UINT_BITS(length), 1, i);
+
+            if (hrd->sub_pic_hrd_params_present_flag ||
+                current->irap_cpb_params_present_flag) {
+                us(length, nal_initial_alt_cpb_removal_delay[i],
+                   0, MAX_UINT_BITS(length), 1, i);
+                us(length, nal_initial_alt_cpb_removal_offset[i],
+                   0, MAX_UINT_BITS(length), 1, i);
+            }
+        }
+    }
+    if (hrd->vcl_hrd_parameters_present_flag) {
+        for (i = 0; i <= hrd->cpb_cnt_minus1[0]; i++) {
+            length = hrd->initial_cpb_removal_delay_length_minus1 + 1;
+
+            us(length, vcl_initial_cpb_removal_delay[i],
+               0, MAX_UINT_BITS(length), 1, i);
+            us(length, vcl_initial_cpb_removal_offset[i],
+               0, MAX_UINT_BITS(length), 1, i);
+
+            if (hrd->sub_pic_hrd_params_present_flag ||
+                current->irap_cpb_params_present_flag) {
+                us(length, vcl_initial_alt_cpb_removal_delay[i],
+                   0, MAX_UINT_BITS(length), 1, i);
+                us(length, vcl_initial_alt_cpb_removal_offset[i],
+                   0, MAX_UINT_BITS(length), 1, i);
+            }
+        }
+    }
+
+#ifdef READ
+    // payload_extension_present() - true if we are before the last 1-bit
+    // in the payload structure, which must be in the last byte.
+    end_pos = get_bits_count(rw);
+    bits_left = *payload_size * 8 - (end_pos - start_pos);
+    if (bits_left > 0 &&
+        (bits_left > 7 || ff_ctz(show_bits(rw, bits_left)) < bits_left - 1))
+        flag(use_alt_cpb_params_flag);
+    else
+        infer(use_alt_cpb_params_flag, 0);
+#else
+    if (current->use_alt_cpb_params_flag)
+        flag(use_alt_cpb_params_flag);
+#endif
+
+    return 0;
+}
+
+static int FUNC(sei_pic_timing)(CodedBitstreamContext *ctx, RWContext *rw,
+                                H265RawSEIPicTiming *current)
+{
+    CodedBitstreamH265Context *h265 = ctx->priv_data;
+    const H265RawSPS *sps;
+    const H265RawHRDParameters *hrd;
+    int err, expected_source_scan_type, i, length;
+
+    HEADER("Picture Timing");
+
+    sps = h265->active_sps;
+    if (!sps) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR,
+               "No active SPS for pic_timing.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    expected_source_scan_type = 2 -
+        2 * sps->profile_tier_level.general_interlaced_source_flag -
+        sps->profile_tier_level.general_progressive_source_flag;
+
+    if (sps->vui.frame_field_info_present_flag) {
+        u(4, pic_struct, 0, 12);
+        u(2, source_scan_type,
+          expected_source_scan_type >= 0 ? expected_source_scan_type : 0,
+          expected_source_scan_type >= 0 ? expected_source_scan_type : 2);
+        flag(duplicate_flag);
+    } else {
+        infer(pic_struct, 0);
+        infer(source_scan_type,
+              expected_source_scan_type >= 0 ? expected_source_scan_type : 2);
+        infer(duplicate_flag, 0);
+    }
+
+    if (sps->vui_parameters_present_flag &&
+        sps->vui.vui_hrd_parameters_present_flag)
+        hrd = &sps->vui.hrd_parameters;
+    else
+        hrd = NULL;
+    if (hrd && (hrd->nal_hrd_parameters_present_flag ||
+                hrd->vcl_hrd_parameters_present_flag)) {
+        length = hrd->au_cpb_removal_delay_length_minus1 + 1;
+        u(length, au_cpb_removal_delay_minus1, 0, MAX_UINT_BITS(length));
+
+        length = hrd->dpb_output_delay_length_minus1 + 1;
+        u(length, pic_dpb_output_delay, 0, MAX_UINT_BITS(length));
+
+        if (hrd->sub_pic_hrd_params_present_flag) {
+            length = hrd->dpb_output_delay_du_length_minus1 + 1;
+            u(length, pic_dpb_output_du_delay, 0, MAX_UINT_BITS(length));
+        }
+
+        if (hrd->sub_pic_hrd_params_present_flag &&
+            hrd->sub_pic_cpb_params_in_pic_timing_sei_flag) {
+            // Each decoding unit must contain at least one slice segment.
+            ue(num_decoding_units_minus1, 0, HEVC_MAX_SLICE_SEGMENTS);
+            flag(du_common_cpb_removal_delay_flag);
+
+            length = hrd->du_cpb_removal_delay_increment_length_minus1 + 1;
+            if (current->du_common_cpb_removal_delay_flag)
+                u(length, du_common_cpb_removal_delay_increment_minus1,
+                  0, MAX_UINT_BITS(length));
+
+            for (i = 0; i <= current->num_decoding_units_minus1; i++) {
+                ues(num_nalus_in_du_minus1[i],
+                    0, HEVC_MAX_SLICE_SEGMENTS, 1, i);
+                if (!current->du_common_cpb_removal_delay_flag &&
+                    i < current->num_decoding_units_minus1)
+                    us(length, du_cpb_removal_delay_increment_minus1[i],
+                      0, MAX_UINT_BITS(length), 1, i);
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int FUNC(sei_pan_scan_rect)(CodedBitstreamContext *ctx, RWContext *rw,
+                                   H265RawSEIPanScanRect *current)
+{
+    int err, i;
+
+    HEADER("Pan-Scan Rectangle");
+
+    ue(pan_scan_rect_id, 0, UINT32_MAX - 1);
+    flag(pan_scan_rect_cancel_flag);
+
+    if (!current->pan_scan_rect_cancel_flag) {
+        ue(pan_scan_cnt_minus1, 0, 2);
+
+        for (i = 0; i <= current->pan_scan_cnt_minus1; i++) {
+            ses(pan_scan_rect_left_offset[i],   INT32_MIN + 1, INT32_MAX, 1, i);
+            ses(pan_scan_rect_right_offset[i],  INT32_MIN + 1, INT32_MAX, 1, i);
+            ses(pan_scan_rect_top_offset[i],    INT32_MIN + 1, INT32_MAX, 1, i);
+            ses(pan_scan_rect_bottom_offset[i], INT32_MIN + 1, INT32_MAX, 1, i);
+        }
+
+        flag(pan_scan_rect_persistence_flag);
+    }
+
+    return 0;
+}
+
+static int FUNC(sei_user_data_registered)(CodedBitstreamContext *ctx, RWContext *rw,
+                                          H265RawSEIUserDataRegistered *current,
+                                          uint32_t *payload_size)
+{
+    int err, i, j;
+
+    HEADER("User Data Registered ITU-T T.35");
+
+    u(8, itu_t_t35_country_code, 0x00, 0xff);
+    if (current->itu_t_t35_country_code != 0xff)
+        i = 1;
+    else {
+        u(8, itu_t_t35_country_code_extension_byte, 0x00, 0xff);
+        i = 2;
+    }
+
+#ifdef READ
+    if (*payload_size < i) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR,
+               "Invalid SEI user data registered payload.\n");
+        return AVERROR_INVALIDDATA;
+    }
+    current->data_length = *payload_size - i;
+#else
+    *payload_size = i + current->data_length;
+#endif
+
+    allocate(current->data, current->data_length);
+    for (j = 0; j < current->data_length; j++)
+        xu(8, itu_t_t35_payload_byte[i], current->data[j], 0x00, 0xff, 1, i + j);
+
+    return 0;
+}
+
+static int FUNC(sei_user_data_unregistered)(CodedBitstreamContext *ctx, RWContext *rw,
+                                            H265RawSEIUserDataUnregistered *current,
+                                            uint32_t *payload_size)
+{
+    int err, i;
+
+    HEADER("User Data Unregistered");
+
+#ifdef READ
+    if (*payload_size < 16) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR,
+               "Invalid SEI user data unregistered payload.\n");
+        return AVERROR_INVALIDDATA;
+    }
+    current->data_length = *payload_size - 16;
+#else
+    *payload_size = 16 + current->data_length;
+#endif
+
+    for (i = 0; i < 16; i++)
+        us(8, uuid_iso_iec_11578[i], 0x00, 0xff, 1, i);
+
+    allocate(current->data, current->data_length);
+
+    for (i = 0; i < current->data_length; i++)
+        xu(8, user_data_payload_byte[i], current->data[i], 0x00, 0xff, 1, i);
+
+    return 0;
+}
+
+static int FUNC(sei_recovery_point)(CodedBitstreamContext *ctx, RWContext *rw,
+                                    H265RawSEIRecoveryPoint *current)
+{
+    int err;
+
+    HEADER("Recovery Point");
+
+    se(recovery_poc_cnt, -32768, 32767);
+
+    flag(exact_match_flag);
+    flag(broken_link_flag);
+
+    return 0;
+}
+
+static int FUNC(sei_display_orientation)(CodedBitstreamContext *ctx, RWContext *rw,
+                                         H265RawSEIDisplayOrientation *current)
+{
+    int err;
+
+    HEADER("Display Orientation");
+
+    flag(display_orientation_cancel_flag);
+    if (!current->display_orientation_cancel_flag) {
+        flag(hor_flip);
+        flag(ver_flip);
+        u(16, anticlockwise_rotation, 0, 65535);
+        flag(display_orientation_persistence_flag);
+    }
+
+    return 0;
+}
+
+static int FUNC(sei_active_parameter_sets)(CodedBitstreamContext *ctx, RWContext *rw,
+                                           H265RawSEIActiveParameterSets *current)
+{
+    CodedBitstreamH265Context *h265 = ctx->priv_data;
+    const H265RawVPS *vps;
+    int err, i;
+
+    HEADER("Active Parameter Sets");
+
+    u(4, active_video_parameter_set_id, 0, HEVC_MAX_VPS_COUNT);
+    vps = h265->vps[current->active_video_parameter_set_id];
+    if (!vps) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR, "VPS id %d not available for active "
+               "parameter sets.\n", current->active_video_parameter_set_id);
+        return AVERROR_INVALIDDATA;
+    }
+    h265->active_vps = vps;
+
+    flag(self_contained_cvs_flag);
+    flag(no_parameter_set_update_flag);
+
+    ue(num_sps_ids_minus1, 0, HEVC_MAX_SPS_COUNT - 1);
+    for (i = 0; i <= current->num_sps_ids_minus1; i++)
+        ues(active_seq_parameter_set_id[i], 0, HEVC_MAX_SPS_COUNT - 1, 1, i);
+
+    for (i = vps->vps_base_layer_internal_flag;
+         i <= FFMIN(62, vps->vps_max_layers_minus1); i++) {
+        ues(layer_sps_idx[i], 0, current->num_sps_ids_minus1, 1, i);
+
+        if (i == 0)
+            h265->active_sps = h265->sps[current->active_seq_parameter_set_id[current->layer_sps_idx[0]]];
+    }
+
+    return 0;
+}
+
+static int FUNC(sei_decoded_picture_hash)(CodedBitstreamContext *ctx, RWContext *rw,
+                                          H265RawSEIDecodedPictureHash *current)
+{
+    CodedBitstreamH265Context *h265 = ctx->priv_data;
+    const H265RawSPS *sps = h265->active_sps;
+    int err, c, i;
+
+    HEADER("Decoded Picture Hash");
+
+    if (!sps) {
+        av_log(ctx->log_ctx, AV_LOG_ERROR,
+               "No active SPS for decoded picture hash.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    u(8, hash_type, 0, 2);
+
+    for (c = 0; c < (sps->chroma_format_idc == 0 ? 1 : 3); c++) {
+        if (current->hash_type == 0) {
+            for (i = 0; i < 16; i++)
+                us(8, picture_md5[c][i], 0x00, 0xff, 2, c, i);
+        } else if (current->hash_type == 1) {
+            us(16, picture_crc[c], 0x0000, 0xffff, 1, c);
+        } else if (current->hash_type == 2) {
+            us(32, picture_checksum[c], 0x00000000, 0xffffffff, 1, c);
+        }
+    }
+
+    return 0;
+}
+
+static int FUNC(sei_time_code)(CodedBitstreamContext *ctx, RWContext *rw,
+                               H265RawSEITimeCode *current)
+{
+    int err, i;
+
+    HEADER("Time Code");
+
+    u(2, num_clock_ts, 1, 3);
+
+    for (i = 0; i < current->num_clock_ts; i++) {
+        flags(clock_timestamp_flag[i],   1, i);
+
+        if (current->clock_timestamp_flag[i]) {
+            flags(units_field_based_flag[i], 1, i);
+            us(5, counting_type[i], 0, 6,    1, i);
+            flags(full_timestamp_flag[i],    1, i);
+            flags(discontinuity_flag[i],     1, i);
+            flags(cnt_dropped_flag[i],       1, i);
+
+            us(9, n_frames[i], 0, MAX_UINT_BITS(9), 1, i);
+
+            if (current->full_timestamp_flag[i]) {
+                us(6, seconds_value[i], 0, 59, 1, i);
+                us(6, minutes_value[i], 0, 59, 1, i);
+                us(5, hours_value[i],   0, 23, 1, i);
+            } else {
+                flags(seconds_flag[i], 1, i);
+                if (current->seconds_flag[i]) {
+                    us(6, seconds_value[i], 0, 59, 1, i);
+                    flags(minutes_flag[i], 1, i);
+                    if (current->minutes_flag[i]) {
+                        us(6, minutes_value[i], 0, 59, 1, i);
+                        flags(hours_flag[i], 1, i);
+                        if (current->hours_flag[i])
+                            us(5, hours_value[i], 0, 23, 1, i);
+                    }
+                }
+            }
+
+            us(5, time_offset_length[i], 0, 31, 1, i);
+            if (current->time_offset_length[i] > 0)
+                us(current->time_offset_length[i], time_offset_value[i],
+                   0, MAX_UINT_BITS(current->time_offset_length[i]), 1, i);
+        }
+    }
+
+    return 0;
+}
+
 static int FUNC(sei_mastering_display)(CodedBitstreamContext *ctx, RWContext *rw,
                                        H265RawSEIMasteringDisplayColourVolume *current)
 {
     int err, c;
 
+    HEADER("Mastering Display Colour Volume");
+
     for (c = 0; c < 3; c++) {
         us(16, display_primaries_x[c], 0, 50000, 1, c);
         us(16, display_primaries_y[c], 0, 50000, 1, c);
@@ -1531,14 +2022,29 @@ static int FUNC(sei_content_light_level)(CodedBitstreamContext *ctx, RWContext *
 {
     int err;
 
+    HEADER("Content Light Level");
+
     u(16, max_content_light_level, 0, MAX_UINT_BITS(16));
     u(16, max_pic_average_light_level, 0, MAX_UINT_BITS(16));
 
     return 0;
 }
 
+static int FUNC(sei_alternative_transfer_characteristics)(CodedBitstreamContext *ctx,
+                                                          RWContext *rw,
+                                                          H265RawSEIAlternativeTransferCharacteristics *current)
+{
+    int err;
+
+    HEADER("Alternative Transfer Characteristics");
+
+    u(8, preferred_transfer_characteristics, 0, 255);
+
+    return 0;
+}
+
 static int FUNC(sei_payload)(CodedBitstreamContext *ctx, RWContext *rw,
-                             H265RawSEIPayload *current)
+                             H265RawSEIPayload *current, int prefix)
 {
     int err, i;
     int start_position, end_position;
@@ -1550,18 +2056,47 @@ static int FUNC(sei_payload)(CodedBitstreamContext *ctx, RWContext *rw,
 #endif
 
     switch (current->payload_type) {
-    case HEVC_SEI_TYPE_MASTERING_DISPLAY_INFO:
-        CHECK(FUNC(sei_mastering_display)
-              (ctx, rw, &current->payload.mastering_display));
-
-        break;
-
-    case HEVC_SEI_TYPE_CONTENT_LIGHT_LEVEL_INFO:
-        CHECK(FUNC(sei_content_light_level)
-              (ctx, rw, &current->payload.content_light_level));
-
-        break;
-
+#define SEI_TYPE_CHECK_VALID(name, prefix_valid, suffix_valid) do { \
+            if (prefix && !prefix_valid) { \
+                av_log(ctx->log_ctx, AV_LOG_ERROR, "SEI type %s invalid " \
+                       "as prefix SEI!\n", #name); \
+                return AVERROR_INVALIDDATA; \
+            } \
+            if (!prefix && !suffix_valid) { \
+                av_log(ctx->log_ctx, AV_LOG_ERROR, "SEI type %s invalid " \
+                       "as suffix SEI!\n", #name); \
+                return AVERROR_INVALIDDATA; \
+            } \
+        } while (0)
+#define SEI_TYPE_N(type, prefix_valid, suffix_valid, name) \
+    case HEVC_SEI_TYPE_ ## type: \
+        SEI_TYPE_CHECK_VALID(name, prefix_valid, suffix_valid); \
+        CHECK(FUNC(sei_ ## name)(ctx, rw, &current->payload.name)); \
+        break
+#define SEI_TYPE_S(type, prefix_valid, suffix_valid, name) \
+    case HEVC_SEI_TYPE_ ## type: \
+        SEI_TYPE_CHECK_VALID(name, prefix_valid, suffix_valid); \
+        CHECK(FUNC(sei_ ## name)(ctx, rw, &current->payload.name, \
+                                 &current->payload_size)); \
+        break
+
+        SEI_TYPE_S(BUFFERING_PERIOD,         1, 0, buffering_period);
+        SEI_TYPE_N(PICTURE_TIMING,           1, 0, pic_timing);
+        SEI_TYPE_N(PAN_SCAN_RECT,            1, 0, pan_scan_rect);
+        SEI_TYPE_S(USER_DATA_REGISTERED_ITU_T_T35,
+                                             1, 1, user_data_registered);
+        SEI_TYPE_S(USER_DATA_UNREGISTERED,   1, 1, user_data_unregistered);
+        SEI_TYPE_N(RECOVERY_POINT,           1, 0, recovery_point);
+        SEI_TYPE_N(DISPLAY_ORIENTATION,      1, 0, display_orientation);
+        SEI_TYPE_N(ACTIVE_PARAMETER_SETS,    1, 0, active_parameter_sets);
+        SEI_TYPE_N(DECODED_PICTURE_HASH,     0, 1, decoded_picture_hash);
+        SEI_TYPE_N(TIME_CODE,                1, 0, time_code);
+        SEI_TYPE_N(MASTERING_DISPLAY_INFO,   1, 0, mastering_display);
+        SEI_TYPE_N(CONTENT_LIGHT_LEVEL_INFO, 1, 0, content_light_level);
+        SEI_TYPE_N(ALTERNATIVE_TRANSFER_CHARACTERISTICS,
+                                             1, 0, alternative_transfer_characteristics);
+
+#undef SEI_TYPE
     default:
         {
 #ifdef READ
@@ -1599,14 +2134,18 @@ static int FUNC(sei_payload)(CodedBitstreamContext *ctx, RWContext *rw,
 }
 
 static int FUNC(sei)(CodedBitstreamContext *ctx, RWContext *rw,
-                     H265RawSEI *current)
+                     H265RawSEI *current, int prefix)
 {
     int err, k;
 
-    HEADER("Supplemental Enhancement Information");
+    if (prefix)
+        HEADER("Prefix Supplemental Enhancement Information");
+    else
+        HEADER("Suffix Supplemental Enhancement Information");
 
     CHECK(FUNC(nal_unit_header)(ctx, rw, &current->nal_unit_header,
-                                HEVC_NAL_SEI_PREFIX));
+                                prefix ? HEVC_NAL_SEI_PREFIX
+                                       : HEVC_NAL_SEI_SUFFIX));
 
 #ifdef READ
     for (k = 0; k < H265_MAX_SEI_PAYLOADS; k++) {
@@ -1631,7 +2170,7 @@ static int FUNC(sei)(CodedBitstreamContext *ctx, RWContext *rw,
         current->payload[k].payload_type = payload_type;
         current->payload[k].payload_size = payload_size;
 
-        CHECK(FUNC(sei_payload)(ctx, rw, &current->payload[k]));
+        CHECK(FUNC(sei_payload)(ctx, rw, &current->payload[k], prefix));
 
         if (!cbs_h2645_read_more_rbsp_data(rw))
             break;
@@ -1670,7 +2209,7 @@ static int FUNC(sei)(CodedBitstreamContext *ctx, RWContext *rw,
             }
             xu(8, last_payload_size_byte, tmp, 0, 254, 0);
 
-            CHECK(FUNC(sei_payload)(ctx, rw, &current->payload[k]));
+            CHECK(FUNC(sei_payload)(ctx, rw, &current->payload[k], prefix));
         }
     }
 #endif
diff --git a/libavcodec/cbs_mpeg2.c b/libavcodec/cbs_mpeg2.c
index 0df4234b1223b..8b8b2665631ed 100644
--- a/libavcodec/cbs_mpeg2.c
+++ b/libavcodec/cbs_mpeg2.c
@@ -264,8 +264,6 @@ static int cbs_mpeg2_write_slice(CodedBitstreamContext *ctx,
                                  PutBitContext *pbc)
 {
     MPEG2RawSlice *slice = unit->content;
-    GetBitContext gbc;
-    size_t bits_left;
     int err;
 
     err = cbs_mpeg2_write_slice_header(ctx, pbc, &slice->header);
@@ -273,21 +271,38 @@ static int cbs_mpeg2_write_slice(CodedBitstreamContext *ctx,
         return err;
 
     if (slice->data) {
+        size_t rest = slice->data_size - (slice->data_bit_start + 7) / 8;
+        uint8_t *pos = slice->data + slice->data_bit_start / 8;
+
+        av_assert0(slice->data_bit_start >= 0 &&
+                   8 * slice->data_size > slice->data_bit_start);
+
         if (slice->data_size * 8 + 8 > put_bits_left(pbc))
             return AVERROR(ENOSPC);
 
-        init_get_bits(&gbc, slice->data, slice->data_size * 8);
-        skip_bits_long(&gbc, slice->data_bit_start);
-
-        while (get_bits_left(&gbc) > 15)
-            put_bits(pbc, 16, get_bits(&gbc, 16));
+        // First copy the remaining bits of the first byte
+        if (slice->data_bit_start % 8)
+            put_bits(pbc, 8 - slice->data_bit_start % 8,
+                     *pos++ & MAX_UINT_BITS(8 - slice->data_bit_start % 8));
+
+        if (put_bits_count(pbc) % 8 == 0) {
+            // If the writer is aligned at this point,
+            // memcpy can be used to improve performance.
+            // This is the normal case.
+            flush_put_bits(pbc);
+            memcpy(put_bits_ptr(pbc), pos, rest);
+            skip_put_bytes(pbc, rest);
+        } else {
+            // If not, we have to copy manually:
+            for (; rest > 3; rest -= 4, pos += 4)
+                put_bits32(pbc, AV_RB32(pos));
 
-        bits_left = get_bits_left(&gbc);
-        put_bits(pbc, bits_left, get_bits(&gbc, bits_left));
+            for (; rest; rest--, pos++)
+                put_bits(pbc, 8, *pos);
 
-        // Align with zeroes.
-        while (put_bits_count(pbc) % 8 != 0)
-            put_bits(pbc, 1, 0);
+            // Align with zeros
+            put_bits(pbc, 8 - put_bits_count(pbc) % 8, 0);
+        }
     }
 
     return 0;
diff --git a/libavcodec/cbs_vp9.c b/libavcodec/cbs_vp9.c
index 7498be4b73b40..0b5f137ed866e 100644
--- a/libavcodec/cbs_vp9.c
+++ b/libavcodec/cbs_vp9.c
@@ -305,7 +305,7 @@ static int cbs_vp9_write_le(CodedBitstreamContext *ctx, PutBitContext *pbc,
 
 #define prob(name, subs, ...) do { \
         uint8_t prob_coded; \
-        int8_t prob; \
+        uint8_t prob; \
         xf(1, name.prob_coded, prob_coded, subs, __VA_ARGS__); \
         if (prob_coded) \
             xf(8, name.prob, prob, subs, __VA_ARGS__); \
@@ -314,6 +314,12 @@ static int cbs_vp9_write_le(CodedBitstreamContext *ctx, PutBitContext *pbc,
         current->name = prob; \
     } while (0)
 
+#define fixed(width, name, value) do { \
+        av_unused uint32_t fixed_value = value; \
+        CHECK(ff_cbs_read_unsigned(ctx, rw, width, #name, \
+                                   0, &fixed_value, value, value)); \
+    } while (0)
+
 #define infer(name, value) do { \
         current->name = value; \
     } while (0)
@@ -331,6 +337,7 @@ static int cbs_vp9_write_le(CodedBitstreamContext *ctx, PutBitContext *pbc,
 #undef fle
 #undef delta_q
 #undef prob
+#undef fixed
 #undef infer
 #undef byte_alignment
 
@@ -370,6 +377,11 @@ static int cbs_vp9_write_le(CodedBitstreamContext *ctx, PutBitContext *pbc,
             xf(8, name.prob, current->name, subs, __VA_ARGS__); \
     } while (0)
 
+#define fixed(width, name, value) do { \
+        CHECK(ff_cbs_write_unsigned(ctx, rw, width, #name, \
+                                    0, value, value, value)); \
+    } while (0)
+
 #define infer(name, value) do { \
         if (current->name != (value)) { \
             av_log(ctx->log_ctx, AV_LOG_WARNING, "Warning: " \
@@ -392,6 +404,7 @@ static int cbs_vp9_write_le(CodedBitstreamContext *ctx, PutBitContext *pbc,
 #undef fle
 #undef delta_q
 #undef prob
+#undef fixed
 #undef infer
 #undef byte_alignment
 
@@ -444,7 +457,7 @@ static int cbs_vp9_split_fragment(CodedBitstreamContext *ctx,
         }
         if (pos + index_size != frag->data_size) {
             av_log(ctx->log_ctx, AV_LOG_WARNING, "Extra padding at "
-                   "end of superframe: %zu bytes.\n",
+                   "end of superframe: %"SIZE_SPECIFIER" bytes.\n",
                    frag->data_size - (pos + index_size));
         }
 
@@ -525,7 +538,7 @@ static int cbs_vp9_write_unit(CodedBitstreamContext *ctx,
         if (err < 0) {
             av_log(ctx->log_ctx, AV_LOG_ERROR, "Unable to allocate a "
                    "sufficiently large write buffer (last attempt "
-                   "%zu bytes).\n", priv->write_buffer_size);
+                   "%"SIZE_SPECIFIER" bytes).\n", priv->write_buffer_size);
             return err;
         }
     }
diff --git a/libavcodec/cbs_vp9.h b/libavcodec/cbs_vp9.h
index 5b99c90c2e68e..4c9b2f880d32e 100644
--- a/libavcodec/cbs_vp9.h
+++ b/libavcodec/cbs_vp9.h
@@ -84,7 +84,6 @@ typedef struct VP9RawFrameHeader {
     uint8_t frame_marker;
     uint8_t profile_low_bit;
     uint8_t profile_high_bit;
-    uint8_t profile_reserved_zero;
 
     uint8_t show_existing_frame;
     uint8_t frame_to_show_map_idx;
@@ -99,7 +98,6 @@ typedef struct VP9RawFrameHeader {
     uint8_t color_range;
     uint8_t subsampling_x;
     uint8_t subsampling_y;
-    uint8_t color_config_reserved_zero;
 
     uint8_t refresh_frame_flags;
 
@@ -183,8 +181,17 @@ typedef struct VP9RawSuperframe {
     VP9RawSuperframeIndex index;
 } VP9RawSuperframe;
 
+typedef struct VP9ReferenceFrameState {
+    int frame_width;    // RefFrameWidth
+    int frame_height;   // RefFrameHeight
+    int subsampling_x;  // RefSubsamplingX
+    int subsampling_y;  // RefSubsamplingY
+    int bit_depth;      // RefBitDepth
+} VP9ReferenceFrameState;
 
 typedef struct CodedBitstreamVP9Context {
+    int profile;
+
     // Frame dimensions in 8x8 mode info blocks.
     uint16_t mi_cols;
     uint16_t mi_rows;
@@ -192,6 +199,15 @@ typedef struct CodedBitstreamVP9Context {
     uint16_t sb64_cols;
     uint16_t sb64_rows;
 
+    int frame_width;
+    int frame_height;
+
+    uint8_t subsampling_x;
+    uint8_t subsampling_y;
+    int bit_depth;
+
+    VP9ReferenceFrameState ref[VP9_NUM_REF_FRAMES];
+
     // Write buffer.
     uint8_t *write_buffer;
     size_t write_buffer_size;
diff --git a/libavcodec/cbs_vp9_syntax_template.c b/libavcodec/cbs_vp9_syntax_template.c
index 0db0f52a6dbaa..898cede329cfd 100644
--- a/libavcodec/cbs_vp9_syntax_template.c
+++ b/libavcodec/cbs_vp9_syntax_template.c
@@ -43,10 +43,14 @@ static int FUNC(frame_sync_code)(CodedBitstreamContext *ctx, RWContext *rw,
 static int FUNC(color_config)(CodedBitstreamContext *ctx, RWContext *rw,
                               VP9RawFrameHeader *current, int profile)
 {
+    CodedBitstreamVP9Context *vp9 = ctx->priv_data;
     int err;
 
-    if (profile >= 2)
+    if (profile >= 2) {
         f(1, ten_or_twelve_bit);
+        vp9->bit_depth = current->ten_or_twelve_bit ? 12 : 10;
+    } else
+        vp9->bit_depth = 8;
 
     f(3, color_space);
 
@@ -55,7 +59,7 @@ static int FUNC(color_config)(CodedBitstreamContext *ctx, RWContext *rw,
         if (profile == 1 || profile == 3) {
             f(1, subsampling_x);
             f(1, subsampling_y);
-            f(1, color_config_reserved_zero);
+            fixed(1, reserved_zero, 0);
         } else {
             infer(subsampling_x, 1);
             infer(subsampling_y, 1);
@@ -65,9 +69,13 @@ static int FUNC(color_config)(CodedBitstreamContext *ctx, RWContext *rw,
         if (profile == 1 || profile == 3) {
             infer(subsampling_x, 0);
             infer(subsampling_y, 0);
+            fixed(1, reserved_zero, 0);
         }
     }
 
+    vp9->subsampling_x = current->subsampling_x;
+    vp9->subsampling_y = current->subsampling_y;
+
     return 0;
 }
 
@@ -80,8 +88,11 @@ static int FUNC(frame_size)(CodedBitstreamContext *ctx, RWContext *rw,
     f(16, frame_width_minus_1);
     f(16, frame_height_minus_1);
 
-    vp9->mi_cols = (current->frame_width_minus_1  + 8) >> 3;
-    vp9->mi_rows = (current->frame_height_minus_1 + 8) >> 3;
+    vp9->frame_width  = current->frame_width_minus_1  + 1;
+    vp9->frame_height = current->frame_height_minus_1 + 1;
+
+    vp9->mi_cols = (vp9->frame_width  + 7) >> 3;
+    vp9->mi_rows = (vp9->frame_height + 7) >> 3;
     vp9->sb64_cols = (vp9->mi_cols + 7) >> 3;
     vp9->sb64_rows = (vp9->mi_rows + 7) >> 3;
 
@@ -106,15 +117,33 @@ static int FUNC(render_size)(CodedBitstreamContext *ctx, RWContext *rw,
 static int FUNC(frame_size_with_refs)(CodedBitstreamContext *ctx, RWContext *rw,
                                       VP9RawFrameHeader *current)
 {
+    CodedBitstreamVP9Context *vp9 = ctx->priv_data;
     int err, i;
 
     for (i = 0; i < VP9_REFS_PER_FRAME; i++) {
         fs(1, found_ref[i], 1, i);
-        if (current->found_ref[i])
+        if (current->found_ref[i]) {
+            VP9ReferenceFrameState *ref =
+                &vp9->ref[current->ref_frame_idx[i]];
+
+            vp9->frame_width   = ref->frame_width;
+            vp9->frame_height  = ref->frame_height;
+
+            vp9->subsampling_x = ref->subsampling_x;
+            vp9->subsampling_y = ref->subsampling_y;
+            vp9->bit_depth     = ref->bit_depth;
+
             break;
+        }
     }
     if (i >= VP9_REFS_PER_FRAME)
         CHECK(FUNC(frame_size)(ctx, rw, current));
+    else {
+        vp9->mi_cols = (vp9->frame_width  + 7) >> 3;
+        vp9->mi_rows = (vp9->frame_height + 7) >> 3;
+        vp9->sb64_cols = (vp9->mi_cols + 7) >> 3;
+        vp9->sb64_rows = (vp9->mi_rows + 7) >> 3;
+    }
     CHECK(FUNC(render_size)(ctx, rw, current));
 
     return 0;
@@ -248,16 +277,16 @@ static int FUNC(tile_info)(CodedBitstreamContext *ctx, RWContext *rw,
 static int FUNC(uncompressed_header)(CodedBitstreamContext *ctx, RWContext *rw,
                                      VP9RawFrameHeader *current)
 {
-    int profile, i;
-    int err;
+    CodedBitstreamVP9Context *vp9 = ctx->priv_data;
+    int err, i;
 
     f(2, frame_marker);
 
     f(1, profile_low_bit);
     f(1, profile_high_bit);
-    profile = (current->profile_high_bit << 1) + current->profile_low_bit;
-    if (profile == 3)
-        f(1, profile_reserved_zero);
+    vp9->profile = (current->profile_high_bit << 1) + current->profile_low_bit;
+    if (vp9->profile == 3)
+        fixed(1, reserved_zero, 0);
 
     f(1, show_existing_frame);
     if (current->show_existing_frame) {
@@ -274,7 +303,7 @@ static int FUNC(uncompressed_header)(CodedBitstreamContext *ctx, RWContext *rw,
 
     if (current->frame_type == VP9_KEY_FRAME) {
         CHECK(FUNC(frame_sync_code)(ctx, rw, current));
-        CHECK(FUNC(color_config)(ctx, rw, current, profile));
+        CHECK(FUNC(color_config)(ctx, rw, current, vp9->profile));
         CHECK(FUNC(frame_size)(ctx, rw, current));
         CHECK(FUNC(render_size)(ctx, rw, current));
 
@@ -294,12 +323,16 @@ static int FUNC(uncompressed_header)(CodedBitstreamContext *ctx, RWContext *rw,
          if (current->intra_only == 1) {
              CHECK(FUNC(frame_sync_code)(ctx, rw, current));
 
-             if (profile > 0) {
-                 CHECK(FUNC(color_config)(ctx, rw, current, profile));
+             if (vp9->profile > 0) {
+                 CHECK(FUNC(color_config)(ctx, rw, current, vp9->profile));
              } else {
                  infer(color_space,   1);
                  infer(subsampling_x, 1);
                  infer(subsampling_y, 1);
+                 vp9->bit_depth = 8;
+
+                 vp9->subsampling_x = current->subsampling_x;
+                 vp9->subsampling_y = current->subsampling_y;
              }
 
              f(8, refresh_frame_flags);
@@ -338,6 +371,25 @@ static int FUNC(uncompressed_header)(CodedBitstreamContext *ctx, RWContext *rw,
 
     f(16, header_size_in_bytes);
 
+    for (i = 0; i < VP9_NUM_REF_FRAMES; i++) {
+        if (current->refresh_frame_flags & (1 << i)) {
+            vp9->ref[i] = (VP9ReferenceFrameState) {
+                .frame_width    = vp9->frame_width,
+                .frame_height   = vp9->frame_height,
+                .subsampling_x  = vp9->subsampling_x,
+                .subsampling_y  = vp9->subsampling_y,
+                .bit_depth      = vp9->bit_depth,
+            };
+        }
+    }
+
+    av_log(ctx->log_ctx, AV_LOG_DEBUG, "Frame:  size %dx%d  "
+           "subsample %dx%d  bit_depth %d  tiles %dx%d.\n",
+           vp9->frame_width, vp9->frame_height,
+           vp9->subsampling_x, vp9->subsampling_y,
+           vp9->bit_depth, 1 << current->tile_cols_log2,
+           1 << current->tile_rows_log2);
+
     return 0;
 }
 
diff --git a/libavcodec/cdgraphics.c b/libavcodec/cdgraphics.c
index be85e54288787..cf3f01a417bb3 100644
--- a/libavcodec/cdgraphics.c
+++ b/libavcodec/cdgraphics.c
@@ -81,11 +81,8 @@ static av_cold int cdg_decode_init(AVCodecContext *avctx)
         return AVERROR(ENOMEM);
     cc->transparency = -1;
 
-    avctx->width   = CDG_FULL_WIDTH;
-    avctx->height  = CDG_FULL_HEIGHT;
     avctx->pix_fmt = AV_PIX_FMT_PAL8;
-
-    return 0;
+    return ff_set_dimensions(avctx, CDG_FULL_WIDTH, CDG_FULL_HEIGHT);
 }
 
 static void cdg_border_preset(CDGraphicsContext *cc, uint8_t *data)
diff --git a/libavcodec/clearvideo.c b/libavcodec/clearvideo.c
index 0e3c772123111..ad3012f7b789f 100644
--- a/libavcodec/clearvideo.c
+++ b/libavcodec/clearvideo.c
@@ -570,6 +570,8 @@ static int clv_decode_frame(AVCodecContext *avctx, void *data,
 
         for (j = 0; j < c->pmb_height; j++) {
             for (i = 0; i < c->pmb_width; i++) {
+                if (get_bits_left(&c->gb) <= 0)
+                    return AVERROR_INVALIDDATA;
                 if (get_bits1(&c->gb)) {
                     MV mv = mvi_predict(&c->mvi, i, j, zero_mv);
 
diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c
index 67a30542d1b05..a3de8e1c2bae6 100644
--- a/libavcodec/codec_desc.c
+++ b/libavcodec/codec_desc.c
@@ -81,6 +81,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .long_name = NULL_IF_CONFIG_SMALL("Motion JPEG"),
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
         .mime_types= MT("image/jpeg"),
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_mjpeg_profiles),
     },
     {
         .id        = AV_CODEC_ID_MJPEGB,
@@ -722,7 +723,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .id        = AV_CODEC_ID_GIF,
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "gif",
-        .long_name = NULL_IF_CONFIG_SMALL("GIF (Graphics Interchange Format)"),
+        .long_name = NULL_IF_CONFIG_SMALL("CompuServe GIF (Graphics Interchange Format)"),
         .props     = AV_CODEC_PROP_LOSSLESS,
         .mime_types= MT("image/gif"),
     },
@@ -1077,6 +1078,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .name      = "prores",
         .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_prores_profiles),
     },
     {
         .id        = AV_CODEC_ID_JV,
@@ -1689,6 +1691,20 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .long_name = NULL_IF_CONFIG_SMALL("RemotelyAnywhere Screen Capture"),
         .props     = AV_CODEC_PROP_LOSSY,
     },
+    {
+        .id        = AV_CODEC_ID_HYMT,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "hymt",
+        .long_name = NULL_IF_CONFIG_SMALL("HuffYUV MT"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_ARBC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "arbc",
+        .long_name = NULL_IF_CONFIG_SMALL("Gryphon's Anim Compressor"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
 
     /* various PCM "codecs" */
     {
@@ -1936,6 +1952,13 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .long_name = NULL_IF_CONFIG_SMALL("PCM 24.0 floating point little-endian"),
         .props     = AV_CODEC_PROP_LOSSLESS,
     },
+    {
+        .id        = AV_CODEC_ID_PCM_VIDC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_vidc",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM Archimedes VIDC"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
 
     /* various ADPCM codecs */
     {
@@ -2927,6 +2950,13 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .long_name = NULL_IF_CONFIG_SMALL("ATRAC9 (Adaptive TRansform Acoustic Coding 9)"),
         .props     = AV_CODEC_PROP_LOSSY,
     },
+    {
+        .id        = AV_CODEC_ID_HCOM,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "hcom",
+        .long_name = NULL_IF_CONFIG_SMALL("HCOM Audio"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
 
     /* subtitle codecs */
     {
@@ -3103,7 +3133,14 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .long_name = NULL_IF_CONFIG_SMALL("Timed Text Markup Language"),
         .props     = AV_CODEC_PROP_TEXT_SUB,
     },
-
+    {
+        .id        = AV_CODEC_ID_ARIB_CAPTION,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "arib_caption",
+        .long_name = NULL_IF_CONFIG_SMALL("ARIB STD-B24 caption"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_arib_caption_profiles),
+    },
 
     /* other kind of codecs and pseudo-codecs */
     {
diff --git a/libavcodec/cookdata.h b/libavcodec/cookdata.h
index dcdb912ddc70a..efb8a53942c8e 100644
--- a/libavcodec/cookdata.h
+++ b/libavcodec/cookdata.h
@@ -453,12 +453,13 @@ static const uint16_t ccpl_huffcodes5[31] = {
 };
 
 static const uint16_t ccpl_huffcodes6[63] = {
-    0x0004,0x0005,0x0005,0x0006,0x0006,0x0007,0x0007,0x0007,0x0007,0x0008,0x0008,0x0008,
-    0x0008,0x0009,0x0009,0x0009,0x0009,0x000a,0x000a,0x000a,0x000a,0x000a,0x000b,0x000b,
-    0x000b,0x000b,0x000c,0x000d,0x000e,0x000e,0x0010,0x0000,0x000a,0x0018,0x0019,0x0036,
-    0x0037,0x0074,0x0075,0x0076,0x0077,0x00f4,0x00f5,0x00f6,0x00f7,0x01f5,0x01f6,0x01f7,
-    0x01f8,0x03f6,0x03f7,0x03f8,0x03f9,0x03fa,0x07fa,0x07fb,0x07fc,0x07fd,0x0ffd,0x1ffd,
-    0x3ffd,0x3ffe,0xffff,
+    0xfffe, 0x7ffe, 0x3ffc, 0x1ffc, 0x0ffc, 0x07f6, 0x07f7, 0x07f8, 0x07f9,
+    0x03f2, 0x03f3, 0x03f4, 0x03f5, 0x01f0, 0x01f1, 0x01f2, 0x01f3, 0x01f4,
+    0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x0070, 0x0071, 0x0072, 0x0073, 0x0034,
+    0x0035, 0x0016, 0x0017, 0x0004, 0x0000, 0x000a, 0x0018, 0x0019, 0x0036,
+    0x0037, 0x0074, 0x0075, 0x0076, 0x0077, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
+    0x01f5, 0x01f6, 0x01f7, 0x01f8, 0x03f6, 0x03f7, 0x03f8, 0x03f9, 0x03fa,
+    0x07fa, 0x07fb, 0x07fc, 0x07fd, 0x0ffd, 0x1ffd, 0x3ffd, 0x3ffe, 0xffff
 };
 
 static const uint8_t ccpl_huffbits2[3] = {
diff --git a/libavcodec/cuviddec.c b/libavcodec/cuviddec.c
index f21273c07e903..291bb93dbc682 100644
--- a/libavcodec/cuviddec.c
+++ b/libavcodec/cuviddec.c
@@ -25,6 +25,7 @@
 #include "libavutil/mathematics.h"
 #include "libavutil/hwcontext.h"
 #include "libavutil/hwcontext_cuda_internal.h"
+#include "libavutil/cuda_check.h"
 #include "libavutil/fifo.h"
 #include "libavutil/log.h"
 #include "libavutil/opt.h"
@@ -33,8 +34,14 @@
 #include "avcodec.h"
 #include "decode.h"
 #include "hwaccel.h"
+#include "nvdec.h"
 #include "internal.h"
 
+#if !NVDECAPI_CHECK_VERSION(9, 0)
+#define cudaVideoSurfaceFormat_YUV444 2
+#define cudaVideoSurfaceFormat_YUV444_16Bit 3
+#endif
+
 typedef struct CuvidContext
 {
     AVClass *avclass;
@@ -95,29 +102,7 @@ typedef struct CuvidParsedFrame
     int is_deinterlacing;
 } CuvidParsedFrame;
 
-static int check_cu(AVCodecContext *avctx, CUresult err, const char *func)
-{
-    CuvidContext *ctx = avctx->priv_data;
-    const char *err_name;
-    const char *err_string;
-
-    av_log(avctx, AV_LOG_TRACE, "Calling %s\n", func);
-
-    if (err == CUDA_SUCCESS)
-        return 0;
-
-    ctx->cudl->cuGetErrorName(err, &err_name);
-    ctx->cudl->cuGetErrorString(err, &err_string);
-
-    av_log(avctx, AV_LOG_ERROR, "%s failed", func);
-    if (err_name && err_string)
-        av_log(avctx, AV_LOG_ERROR, " -> %s: %s", err_name, err_string);
-    av_log(avctx, AV_LOG_ERROR, "\n");
-
-    return AVERROR_EXTERNAL;
-}
-
-#define CHECK_CU(x) check_cu(avctx, (x), #x)
+#define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, ctx->cudl, x)
 
 static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* format)
 {
@@ -127,6 +112,7 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
     CUVIDDECODECAPS *caps = NULL;
     CUVIDDECODECREATEINFO cuinfo;
     int surface_fmt;
+    int chroma_444;
 
     int old_width = avctx->width;
     int old_height = avctx->height;
@@ -169,17 +155,19 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
     cuinfo.target_rect.right = cuinfo.ulTargetWidth;
     cuinfo.target_rect.bottom = cuinfo.ulTargetHeight;
 
+    chroma_444 = format->chroma_format == cudaVideoChromaFormat_444;
+
     switch (format->bit_depth_luma_minus8) {
     case 0: // 8-bit
-        pix_fmts[1] = AV_PIX_FMT_NV12;
+        pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_NV12;
         caps = &ctx->caps8;
         break;
     case 2: // 10-bit
-        pix_fmts[1] = AV_PIX_FMT_P010;
+        pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P010;
         caps = &ctx->caps10;
         break;
     case 4: // 12-bit
-        pix_fmts[1] = AV_PIX_FMT_P016;
+        pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P016;
         caps = &ctx->caps12;
         break;
     default:
@@ -282,12 +270,6 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
         return 0;
     }
 
-    if (format->chroma_format != cudaVideoChromaFormat_420) {
-        av_log(avctx, AV_LOG_ERROR, "Chroma formats other than 420 are not supported\n");
-        ctx->internal_error = AVERROR(EINVAL);
-        return 0;
-    }
-
     ctx->chroma_format = format->chroma_format;
 
     cuinfo.CodecType = ctx->codec_type = format->codec;
@@ -301,8 +283,15 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
     case AV_PIX_FMT_P016:
         cuinfo.OutputFormat = cudaVideoSurfaceFormat_P016;
         break;
+    case AV_PIX_FMT_YUV444P:
+        cuinfo.OutputFormat = cudaVideoSurfaceFormat_YUV444;
+        break;
+    case AV_PIX_FMT_YUV444P16:
+        cuinfo.OutputFormat = cudaVideoSurfaceFormat_YUV444_16Bit;
+        break;
     default:
-        av_log(avctx, AV_LOG_ERROR, "Output formats other than NV12, P010 or P016 are not supported\n");
+        av_log(avctx, AV_LOG_ERROR, "Unsupported output format: %s\n",
+               av_get_pix_fmt_name(avctx->sw_pix_fmt));
         ctx->internal_error = AVERROR(EINVAL);
         return 0;
     }
@@ -511,6 +500,7 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
         return ret;
 
     if (av_fifo_size(ctx->frame_queue)) {
+        const AVPixFmtDescriptor *pixdesc;
         CuvidParsedFrame parsed_frame;
         CUVIDPROCPARAMS params;
         unsigned int pitch = 0;
@@ -541,7 +531,10 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
                 goto error;
             }
 
-            for (i = 0; i < 2; i++) {
+            pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
+
+            for (i = 0; i < pixdesc->nb_components; i++) {
+                int height = avctx->height >> (i ? pixdesc->log2_chroma_h : 0);
                 CUDA_MEMCPY2D cpy = {
                     .srcMemoryType = CU_MEMORYTYPE_DEVICE,
                     .dstMemoryType = CU_MEMORYTYPE_DEVICE,
@@ -551,22 +544,25 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
                     .dstPitch      = frame->linesize[i],
                     .srcY          = offset,
                     .WidthInBytes  = FFMIN(pitch, frame->linesize[i]),
-                    .Height        = avctx->height >> (i ? 1 : 0),
+                    .Height        = height,
                 };
 
                 ret = CHECK_CU(ctx->cudl->cuMemcpy2DAsync(&cpy, device_hwctx->stream));
                 if (ret < 0)
                     goto error;
 
-                offset += avctx->height;
+                offset += height;
             }
 
             ret = CHECK_CU(ctx->cudl->cuStreamSynchronize(device_hwctx->stream));
             if (ret < 0)
                 goto error;
-        } else if (avctx->pix_fmt == AV_PIX_FMT_NV12 ||
-                   avctx->pix_fmt == AV_PIX_FMT_P010 ||
-                   avctx->pix_fmt == AV_PIX_FMT_P016) {
+        } else if (avctx->pix_fmt == AV_PIX_FMT_NV12      ||
+                   avctx->pix_fmt == AV_PIX_FMT_P010      ||
+                   avctx->pix_fmt == AV_PIX_FMT_P016      ||
+                   avctx->pix_fmt == AV_PIX_FMT_YUV444P   ||
+                   avctx->pix_fmt == AV_PIX_FMT_YUV444P16) {
+            unsigned int offset = 0;
             AVFrame *tmp_frame = av_frame_alloc();
             if (!tmp_frame) {
                 av_log(avctx, AV_LOG_ERROR, "av_frame_alloc failed\n");
@@ -574,15 +570,24 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
                 goto error;
             }
 
+            pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
+
             tmp_frame->format        = AV_PIX_FMT_CUDA;
             tmp_frame->hw_frames_ctx = av_buffer_ref(ctx->hwframe);
-            tmp_frame->data[0]       = (uint8_t*)mapped_frame;
-            tmp_frame->linesize[0]   = pitch;
-            tmp_frame->data[1]       = (uint8_t*)(mapped_frame + avctx->height * pitch);
-            tmp_frame->linesize[1]   = pitch;
             tmp_frame->width         = avctx->width;
             tmp_frame->height        = avctx->height;
 
+            /*
+             * Note that the following logic would not work for three plane
+             * YUV420 because the pitch value is different for the chroma
+             * planes.
+             */
+            for (i = 0; i < pixdesc->nb_components; i++) {
+                tmp_frame->data[i]     = (uint8_t*)mapped_frame + offset;
+                tmp_frame->linesize[i] = pitch;
+                offset += pitch * (avctx->height >> (i ? pixdesc->log2_chroma_h : 0));
+            }
+
             ret = ff_get_buffer(avctx, frame, 0);
             if (ret < 0) {
                 av_log(avctx, AV_LOG_ERROR, "ff_get_buffer failed\n");
diff --git a/libavcodec/dcaenc.c b/libavcodec/dcaenc.c
index 4b4ceeff05b0a..34b3e94165ec2 100644
--- a/libavcodec/dcaenc.c
+++ b/libavcodec/dcaenc.c
@@ -136,7 +136,7 @@ static int subband_bufer_alloc(DCAEncContext *c)
                                (SUBBAND_SAMPLES + DCA_ADPCM_COEFFS),
                                sizeof(int32_t));
     if (!bufer)
-        return -1;
+        return AVERROR(ENOMEM);
 
     /* we need a place for DCA_ADPCM_COEFF samples from previous frame
      * to calc prediction coefficients for each subband */
@@ -166,8 +166,8 @@ static int encode_init(AVCodecContext *avctx)
     int i, j, k, min_frame_bits;
     int ret;
 
-    if (subband_bufer_alloc(c))
-        return AVERROR(ENOMEM);
+    if ((ret = subband_bufer_alloc(c)) < 0)
+        return ret;
 
     c->fullband_channels = c->channels = avctx->channels;
     c->lfe_channel = (avctx->channels == 3 || avctx->channels == 6);
diff --git a/libavcodec/dct.c b/libavcodec/dct.c
index cca51eeaf8acf..52f082d0623db 100644
--- a/libavcodec/dct.c
+++ b/libavcodec/dct.c
@@ -178,6 +178,7 @@ av_cold int ff_dct_init(DCTContext *s, int nbits, enum DCTTransformType inverse)
 {
     int n = 1 << nbits;
     int i;
+    int ret;
 
     memset(s, 0, sizeof(*s));
 
@@ -194,9 +195,9 @@ av_cold int ff_dct_init(DCTContext *s, int nbits, enum DCTTransformType inverse)
         if (!s->csc2)
             return AVERROR(ENOMEM);
 
-        if (ff_rdft_init(&s->rdft, nbits, inverse == DCT_III) < 0) {
+        if ((ret = ff_rdft_init(&s->rdft, nbits, inverse == DCT_III)) < 0) {
             av_freep(&s->csc2);
-            return -1;
+            return ret;
         }
 
         for (i = 0; i < n / 2; i++)
diff --git a/libavcodec/decode.c b/libavcodec/decode.c
index 4607e9f318a59..a32ff2fcd395c 100644
--- a/libavcodec/decode.c
+++ b/libavcodec/decode.c
@@ -281,10 +281,6 @@ int ff_decode_bsfs_init(AVCodecContext *avctx)
             bsfs_str++;
     }
 
-    ret = avcodec_parameters_to_context(avctx, s->bsfs[s->nb_bsfs - 1]->par_out);
-    if (ret < 0)
-        return ret;
-
     return 0;
 fail:
     ff_decode_bsfs_uninit(avctx);
@@ -1382,6 +1378,7 @@ int ff_get_format(AVCodecContext *avctx, const enum AVPixelFormat *fmt)
         if (i == n) {
             av_log(avctx, AV_LOG_ERROR, "Invalid return from get_format(): "
                    "%s not in possible list.\n", desc->name);
+            ret = AV_PIX_FMT_NONE;
             break;
         }
 
@@ -1500,7 +1497,7 @@ static int update_frame_pool(AVCodecContext *avctx, AVFrame *frame)
         tmpsize = av_image_fill_pointers(data, avctx->pix_fmt, h,
                                          NULL, linesize);
         if (tmpsize < 0)
-            return -1;
+            return tmpsize;
 
         for (i = 0; i < 3 && data[i + 1]; i++)
             size[i] = data[i + 1] - data[i];
diff --git a/libavcodec/dirac_arith.c b/libavcodec/dirac_arith.c
index 7eb9bd60b2f17..36142fe198e91 100644
--- a/libavcodec/dirac_arith.c
+++ b/libavcodec/dirac_arith.c
@@ -115,6 +115,8 @@ void ff_dirac_init_arith_decoder(DiracArith *c, GetBitContext *gb, int length)
 
     c->counter = -16;
     c->range   = 0xffff;
+    c->error   = 0;
+    c->overread= 0;
 
     for (i = 0; i < DIRAC_CTX_COUNT; i++)
         c->contexts[i] = 0x8000;
diff --git a/libavcodec/dirac_arith.h b/libavcodec/dirac_arith.h
index 24a7ca390e704..79526a7ca3c19 100644
--- a/libavcodec/dirac_arith.h
+++ b/libavcodec/dirac_arith.h
@@ -81,6 +81,8 @@ typedef struct {
     const uint8_t *bytestream_end;
 
     uint16_t contexts[DIRAC_CTX_COUNT];
+    int error;
+    int overread;
 } DiracArith;
 
 extern const uint8_t ff_dirac_next_ctx[DIRAC_CTX_COUNT];
@@ -118,6 +120,9 @@ static inline void refill(DiracArith *c)
                 new |= 0xff00;
 
             c->bytestream = c->bytestream_end;
+            c->overread ++;
+            if (c->overread > 4)
+                c->error = AVERROR_INVALIDDATA;
         }
 
         c->low += new << counter;
@@ -173,6 +178,7 @@ static inline int dirac_get_arith_uint(DiracArith *c, int follow_ctx, int data_c
     while (!dirac_get_arith_bit(c, follow_ctx)) {
         if (ret >= 0x40000000) {
             av_log(NULL, AV_LOG_ERROR, "dirac_get_arith_uint overflow\n");
+            c->error = AVERROR_INVALIDDATA;
             return -1;
         }
         ret <<= 1;
diff --git a/libavcodec/diracdec.c b/libavcodec/diracdec.c
index af561d1426526..30b4bfad7998f 100644
--- a/libavcodec/diracdec.c
+++ b/libavcodec/diracdec.c
@@ -537,6 +537,8 @@ static inline int codeblock(DiracContext *s, SubBand *b,
     buf = b->ibuf + top * b->stride;
     if (is_arith) {
         for (y = top; y < bottom; y++) {
+            if (c->error)
+                return c->error;
             for (x = left; x < right; x++) {
                 if (b->pshift) {
                     coeff_unpack_arith_10(c, qfactor, qoffset, b, (int32_t*)(buf)+x, x, y);
@@ -676,6 +678,11 @@ static int decode_component(DiracContext *s, int comp)
             b->length = get_interleaved_ue_golomb(&s->gb);
             if (b->length) {
                 b->quant = get_interleaved_ue_golomb(&s->gb);
+                if (b->quant > (DIRAC_MAX_QUANT_INDEX - 1)) {
+                    av_log(s->avctx, AV_LOG_ERROR, "Unsupported quant %d\n", b->quant);
+                    b->quant = 0;
+                    return AVERROR_INVALIDDATA;
+                }
                 align_get_bits(&s->gb);
                 b->coeff_data = s->gb.buffer + get_bits_count(&s->gb)/8;
                 b->length = FFMIN(b->length, FFMAX(get_bits_left(&s->gb)/8, 0));
@@ -2130,7 +2137,7 @@ static int dirac_decode_data_unit(AVCodecContext *avctx, const uint8_t *buf, int
             return ret;
         }
 
-        if (CALC_PADDING((int64_t)dsh->width, MAX_DWT_LEVELS) * CALC_PADDING((int64_t)dsh->height, MAX_DWT_LEVELS) > avctx->max_pixels)
+        if (CALC_PADDING((int64_t)dsh->width, MAX_DWT_LEVELS) * CALC_PADDING((int64_t)dsh->height, MAX_DWT_LEVELS) * 5LL > avctx->max_pixels)
             ret = AVERROR(ERANGE);
         if (ret >= 0)
             ret = ff_set_dimensions(avctx, dsh->width, dsh->height);
diff --git a/libavcodec/dnxhddec.c b/libavcodec/dnxhddec.c
index ae8b0ffafa279..1e95086696703 100644
--- a/libavcodec/dnxhddec.c
+++ b/libavcodec/dnxhddec.c
@@ -37,7 +37,7 @@
 #include "thread.h"
 
 typedef struct RowContext {
-    DECLARE_ALIGNED(16, int16_t, blocks)[12][64];
+    DECLARE_ALIGNED(32, int16_t, blocks)[12][64];
     int luma_scale[64];
     int chroma_scale[64];
     GetBitContext gb;
@@ -589,12 +589,16 @@ static int dnxhd_decode_row(AVCodecContext *avctx, void *data,
     const DNXHDContext *ctx = avctx->priv_data;
     uint32_t offset = ctx->mb_scan_index[rownb];
     RowContext *row = ctx->rows + threadnb;
-    int x;
+    int x, ret;
 
     row->last_dc[0] =
     row->last_dc[1] =
     row->last_dc[2] = 1 << (ctx->bit_depth + 2); // for levels +2^(bitdepth-1)
-    init_get_bits(&row->gb, ctx->buf + offset, (ctx->buf_size - offset) << 3);
+    ret = init_get_bits8(&row->gb, ctx->buf + offset, ctx->buf_size - offset);
+    if (ret < 0) {
+        row->errors++;
+        return ret;
+    }
     for (x = 0; x < ctx->mb_width; x++) {
         //START_TIMER;
         int ret = dnxhd_decode_macroblock(ctx, row, data, x, rownb);
diff --git a/libavcodec/dpx.c b/libavcodec/dpx.c
index cf23bb6ba1fc3..b1833ed9ef06a 100644
--- a/libavcodec/dpx.c
+++ b/libavcodec/dpx.c
@@ -19,6 +19,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "libavutil/avstring.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/intfloat.h"
 #include "libavutil/imgutils.h"
@@ -50,8 +51,26 @@ static unsigned int read32(const uint8_t **ptr, int is_big)
     return temp;
 }
 
-static uint16_t read10in32(const uint8_t **ptr, uint32_t * lbuf,
-                                  int * n_datum, int is_big, int shift)
+static uint16_t read10in32_gray(const uint8_t **ptr, uint32_t *lbuf,
+                                int *n_datum, int is_big, int shift)
+{
+    uint16_t temp;
+
+    if (*n_datum)
+        (*n_datum)--;
+    else {
+        *lbuf = read32(ptr, is_big);
+        *n_datum = 2;
+    }
+
+    temp = *lbuf >> shift & 0x3FF;
+    *lbuf = *lbuf >> 10;
+
+    return temp;
+}
+
+static uint16_t read10in32(const uint8_t **ptr, uint32_t *lbuf,
+                           int *n_datum, int is_big, int shift)
 {
     if (*n_datum)
         (*n_datum)--;
@@ -65,8 +84,8 @@ static uint16_t read10in32(const uint8_t **ptr, uint32_t * lbuf,
     return *lbuf & 0x3FF;
 }
 
-static uint16_t read12in32(const uint8_t **ptr, uint32_t * lbuf,
-                                  int * n_datum, int is_big)
+static uint16_t read12in32(const uint8_t **ptr, uint32_t *lbuf,
+                           int *n_datum, int is_big)
 {
     if (*n_datum)
         (*n_datum)--;
@@ -106,6 +125,9 @@ static int decode_frame(AVCodecContext *avctx,
     int buf_size       = avpkt->size;
     AVFrame *const p = data;
     uint8_t *ptr[AV_NUM_DATA_POINTERS];
+    uint32_t header_version, version = 0;
+    char creator[101];
+    char input_device[33];
 
     unsigned int offset;
     int magic_num, endian;
@@ -141,6 +163,15 @@ static int decode_frame(AVCodecContext *avctx,
         return AVERROR_INVALIDDATA;
     }
 
+    header_version = read32(&buf, 0);
+    if (header_version == MKTAG('V','1','.','0'))
+        version = 1;
+    if (header_version == MKTAG('V','2','.','0'))
+        version = 2;
+    if (!version)
+        av_log(avctx, AV_LOG_WARNING, "Unknown header format version %s.\n",
+               av_fourcc2str(header_version));
+
     // Check encryption
     buf = avpkt->data + 660;
     ret = read32(&buf, endian);
@@ -310,6 +341,10 @@ static int decode_frame(AVCodecContext *avctx,
     case 51121:
         avctx->pix_fmt = AV_PIX_FMT_GBRAP12;
         break;
+    case 6100:
+    case 6101:
+        avctx->pix_fmt = AV_PIX_FMT_GRAY10;
+        break;
     case 6161:
         avctx->pix_fmt = AV_PIX_FMT_GRAY16BE;
         break;
@@ -347,6 +382,14 @@ static int decode_frame(AVCodecContext *avctx,
     if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
         return ret;
 
+    av_strlcpy(creator, avpkt->data + 160, 100);
+    creator[100] = '\0';
+    av_dict_set(&p->metadata, "Creator", creator, 0);
+
+    av_strlcpy(input_device, avpkt->data + 1556, 32);
+    input_device[32] = '\0';
+    av_dict_set(&p->metadata, "Input Device", input_device, 0);
+
     // Move pointer to offset from start of file
     buf =  avpkt->data + offset;
 
@@ -360,20 +403,27 @@ static int decode_frame(AVCodecContext *avctx,
                                 (uint16_t*)ptr[1],
                                 (uint16_t*)ptr[2],
                                 (uint16_t*)ptr[3]};
-            int shift = packing == 1 ? 22 : 20;
+            int shift = elements > 1 ? packing == 1 ? 22 : 20 : packing == 1 ? 2 : 0;
             for (y = 0; y < avctx->width; y++) {
-                *dst[2]++ = read10in32(&buf, &rgbBuffer,
-                                       &n_datum, endian, shift);
-                *dst[0]++ = read10in32(&buf, &rgbBuffer,
-                                       &n_datum, endian, shift);
-                *dst[1]++ = read10in32(&buf, &rgbBuffer,
-                                       &n_datum, endian, shift);
+                if (elements >= 3)
+                    *dst[2]++ = read10in32(&buf, &rgbBuffer,
+                                           &n_datum, endian, shift);
+                if (elements == 1)
+                    *dst[0]++ = read10in32_gray(&buf, &rgbBuffer,
+                                                &n_datum, endian, shift);
+                else
+                    *dst[0]++ = read10in32(&buf, &rgbBuffer,
+                                           &n_datum, endian, shift);
+                if (elements >= 2)
+                    *dst[1]++ = read10in32(&buf, &rgbBuffer,
+                                           &n_datum, endian, shift);
                 if (elements == 4)
                     *dst[3]++ =
                     read10in32(&buf, &rgbBuffer,
                                &n_datum, endian, shift);
             }
-            n_datum = 0;
+            if (memcmp(input_device, "Scanity", 7))
+                n_datum = 0;
             for (i = 0; i < elements; i++)
                 ptr[i] += p->linesize[i];
         }
diff --git a/libavcodec/dstdec.c b/libavcodec/dstdec.c
index 368cb649319d0..0614c99c4bb49 100644
--- a/libavcodec/dstdec.c
+++ b/libavcodec/dstdec.c
@@ -70,7 +70,7 @@ typedef struct DSTContext {
     GetBitContext gb;
     ArithCoder ac;
     Table fsets, probs;
-    DECLARE_ALIGNED(64, uint8_t, status)[DST_MAX_CHANNELS][16];
+    DECLARE_ALIGNED(16, uint8_t, status)[DST_MAX_CHANNELS][16];
     DECLARE_ALIGNED(16, int16_t, filter)[DST_MAX_ELEMENTS][16][256];
     DSDContext dsdctx[DST_MAX_CHANNELS];
 } DSTContext;
@@ -343,8 +343,8 @@ static int decode_frame(AVCodecContext *avctx, void *data,
             v = ((predict >> 15) ^ residual) & 1;
             dsd[((i >> 3) * channels + ch) << 2] |= v << (7 - (i & 0x7 ));
 
-            AV_WN64A(status + 8, (AV_RN64A(status + 8) << 1) | ((AV_RN64A(status) >> 63) & 1));
-            AV_WN64A(status, (AV_RN64A(status) << 1) | v);
+            AV_WL64A(status + 8, (AV_RL64A(status + 8) << 1) | ((AV_RL64A(status) >> 63) & 1));
+            AV_WL64A(status, (AV_RL64A(status) << 1) | v);
         }
     }
 
diff --git a/libavcodec/dvbsubdec.c b/libavcodec/dvbsubdec.c
index b59e8366570ee..bc4a17bde0ffa 100644
--- a/libavcodec/dvbsubdec.c
+++ b/libavcodec/dvbsubdec.c
@@ -1267,6 +1267,13 @@ static int dvbsub_parse_region_segment(AVCodecContext *avctx,
         display->y_pos = AV_RB16(buf) & 0xfff;
         buf += 2;
 
+        if (display->x_pos >= region->width ||
+            display->y_pos >= region->height) {
+            av_log(avctx, AV_LOG_ERROR, "Object outside region\n");
+            av_free(display);
+            return AVERROR_INVALIDDATA;
+        }
+
         if ((object->type == 1 || object->type == 2) && buf+1 < buf_end) {
             display->fgcolor = *buf++;
             display->bgcolor = *buf++;
diff --git a/libavcodec/dvdsubdec.c b/libavcodec/dvdsubdec.c
index 632a53adabcb8..741ea9fd1e737 100644
--- a/libavcodec/dvdsubdec.c
+++ b/libavcodec/dvdsubdec.c
@@ -595,6 +595,7 @@ static int dvdsub_decode(AVCodecContext *avctx,
     }
 
     if (is_menu < 0) {
+        ctx->buf_size = 0;
     no_subtitle:
         reset_rects(sub);
         *data_size = 0;
diff --git a/libavcodec/dxv.c b/libavcodec/dxv.c
index 08aca73b1fd8b..aef5ec19dd6b6 100644
--- a/libavcodec/dxv.c
+++ b/libavcodec/dxv.c
@@ -426,7 +426,8 @@ static int fill_optable(unsigned *table0, OpcodeTable *table1, int nb_elements)
 static int get_opcodes(GetByteContext *gb, uint32_t *table, uint8_t *dst, int op_size, int nb_elements)
 {
     OpcodeTable optable[1024];
-    int sum, x, val, lshift, rshift, ret, size_in_bits, i, idx;
+    int sum, x, val, lshift, rshift, ret, i, idx;
+    int64_t size_in_bits;
     unsigned endoffset, newoffset, offset;
     unsigned next;
     uint8_t *src = (uint8_t *)gb->buffer;
@@ -1192,6 +1193,12 @@ static int dxv_decode(AVCodecContext *avctx, void *data,
     ret = decompress_tex(avctx);
     if (ret < 0)
         return ret;
+    {
+        int w_block = avctx->coded_width / ctx->texture_block_w;
+        int h_block = avctx->coded_height / ctx->texture_block_h;
+        if (w_block * h_block * ctx->tex_step > ctx->tex_size * 8LL)
+            return AVERROR_INVALIDDATA;
+    }
 
     tframe.f = data;
     ret = ff_thread_get_buffer(avctx, &tframe, 0);
diff --git a/libavcodec/eac3dec.c b/libavcodec/eac3dec.c
index 73067ded9d2a3..3a5c7989b93be 100644
--- a/libavcodec/eac3dec.c
+++ b/libavcodec/eac3dec.c
@@ -31,12 +31,6 @@
  *     No known samples exist.  The spec also does not give clear information
  *     on how this is to be implemented.
  *
- * Dependent Streams
- *     Only the independent stream is currently decoded. Any dependent
- *     streams are skipped.  We have only come across two examples of this, and
- *     they are both just test streams, one for HD-DVD and the other for
- *     Blu-ray.
- *
  * Transient Pre-noise Processing
  *     This is side information which a decoder should use to reduce artifacts
  *     caused by transients.  There are samples which are known to have this
diff --git a/libavcodec/eacmv.c b/libavcodec/eacmv.c
index bf4404ce42635..6f39d72b88d61 100644
--- a/libavcodec/eacmv.c
+++ b/libavcodec/eacmv.c
@@ -191,12 +191,12 @@ static int cmv_decode_frame(AVCodecContext *avctx,
         if (ret < 0)
             return ret;
         if (size > buf_end - buf - EA_PREAMBLE_SIZE)
-            return -1;
+            return AVERROR_INVALIDDATA;
         buf += size;
     }
 
-    if (av_image_check_size(s->width, s->height, 0, s->avctx))
-        return -1;
+    if ((ret = av_image_check_size(s->width, s->height, 0, s->avctx)) < 0)
+        return ret;
 
     if ((ret = ff_get_buffer(avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
         return ret;
diff --git a/libavcodec/eatqi.c b/libavcodec/eatqi.c
index 1a847a35da45b..0002d454570bc 100644
--- a/libavcodec/eatqi.c
+++ b/libavcodec/eatqi.c
@@ -83,7 +83,7 @@ static int tqi_decode_mb(TqiContext *t, int16_t (*block)[64])
         if (ret < 0) {
             av_log(t->avctx, AV_LOG_ERROR, "ac-tex damaged at %d %d\n",
                    t->mb_x, t->mb_y);
-            return -1;
+            return ret;
         }
     }
 
diff --git a/libavcodec/error_resilience.c b/libavcodec/error_resilience.c
index 1abae53f41d41..35d0c609e5ab9 100644
--- a/libavcodec/error_resilience.c
+++ b/libavcodec/error_resilience.c
@@ -437,7 +437,7 @@ static void guess_mv(ERContext *s)
     }
 
     if ((!(s->avctx->error_concealment&FF_EC_GUESS_MVS)) ||
-        num_avail <= mb_width / 2) {
+        num_avail <= FFMAX(mb_width, mb_height) / 2) {
         for (mb_y = 0; mb_y < mb_height; mb_y++) {
             for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
                 const int mb_xy = mb_x + mb_y * s->mb_stride;
diff --git a/libavcodec/exr.c b/libavcodec/exr.c
index 5253cc3f136c7..0f8b0fda9f2c8 100644
--- a/libavcodec/exr.c
+++ b/libavcodec/exr.c
@@ -1389,6 +1389,7 @@ static int decode_header(EXRContext *s, AVFrame *frame)
                         if (*ch_gb.buffer == '.')
                             ch_gb.buffer++;         /* skip dot if not given */
                     } else {
+                        layer_match = 0;
                         av_log(s->avctx, AV_LOG_INFO,
                                "Channel doesn't match layer : %s.\n", ch_gb.buffer);
                     }
@@ -1463,6 +1464,11 @@ static int decode_header(EXRContext *s, AVFrame *frame)
                     }
                     s->pixel_type                     = current_pixel_type;
                     s->channel_offsets[channel_index] = s->current_channel_offset;
+                } else if (channel_index >= 0) {
+                    av_log(s->avctx, AV_LOG_ERROR,
+                            "Multiple channels with index %d.\n", channel_index);
+                    ret = AVERROR_INVALIDDATA;
+                    goto fail;
                 }
 
                 s->channels = av_realloc(s->channels,
diff --git a/libavcodec/extract_extradata_bsf.c b/libavcodec/extract_extradata_bsf.c
index f37427c7e14ec..17e5deb96b656 100644
--- a/libavcodec/extract_extradata_bsf.c
+++ b/libavcodec/extract_extradata_bsf.c
@@ -157,7 +157,7 @@ static int extract_extradata_h2645(AVBSFContext *ctx, AVPacket *pkt,
     }
 
     ret = ff_h2645_packet_split(&s->h2645_pkt, pkt->data, pkt->size,
-                                ctx, 0, 0, ctx->par_in->codec_id, 1);
+                                ctx, 0, 0, ctx->par_in->codec_id, 1, 0);
     if (ret < 0)
         return ret;
 
diff --git a/libavcodec/fft_template.c b/libavcodec/fft_template.c
index 762c014bc8be7..20a62e429055d 100644
--- a/libavcodec/fft_template.c
+++ b/libavcodec/fft_template.c
@@ -261,17 +261,41 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
     if (s->fft_permutation == FF_FFT_PERM_AVX) {
         fft_perm_avx(s);
     } else {
-        for(i=0; i<n; i++) {
-            int k;
-            j = i;
-            if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS)
-                j = (j&~3) | ((j>>1)&1) | ((j<<1)&2);
-            k = -split_radix_permutation(i, n, s->inverse) & (n-1);
-            if (s->revtab)
-                s->revtab[k] = j;
-            if (s->revtab32)
-                s->revtab32[k] = j;
-        }
+#define PROCESS_FFT_PERM_SWAP_LSBS(num) do {\
+    for(i = 0; i < n; i++) {\
+        int k;\
+        j = i;\
+        j = (j & ~3) | ((j >> 1) & 1) | ((j << 1) & 2);\
+        k = -split_radix_permutation(i, n, s->inverse) & (n - 1);\
+        s->revtab##num[k] = j;\
+    } \
+} while(0);
+
+#define PROCESS_FFT_PERM_DEFAULT(num) do {\
+    for(i = 0; i < n; i++) {\
+        int k;\
+        j = i;\
+        k = -split_radix_permutation(i, n, s->inverse) & (n - 1);\
+        s->revtab##num[k] = j;\
+    } \
+} while(0);
+
+#define SPLIT_RADIX_PERMUTATION(num) do { \
+    if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS) {\
+        PROCESS_FFT_PERM_SWAP_LSBS(num) \
+    } else {\
+        PROCESS_FFT_PERM_DEFAULT(num) \
+    }\
+} while(0);
+
+    if (s->revtab)
+        SPLIT_RADIX_PERMUTATION()
+    if (s->revtab32)
+        SPLIT_RADIX_PERMUTATION(32)
+
+#undef PROCESS_FFT_PERM_DEFAULT
+#undef PROCESS_FFT_PERM_SWAP_LSBS
+#undef SPLIT_RADIX_PERMUTATION
     }
 
     return 0;
diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
index 653138b070219..f0bb19350a899 100644
--- a/libavcodec/ffv1.h
+++ b/libavcodec/ffv1.h
@@ -153,9 +153,7 @@ static av_always_inline int fold(int diff, int bits)
     if (bits == 8)
         diff = (int8_t)diff;
     else {
-        diff +=  1 << (bits  - 1);
-        diff  = av_mod_uintp2(diff, bits);
-        diff -=  1 << (bits  - 1);
+        diff = sign_extend(diff, bits);
     }
 
     return diff;
@@ -176,19 +174,13 @@ static inline void update_vlc_state(VlcState *const state, const int v)
     count++;
 
     if (drift <= -count) {
-        if (state->bias > -128)
-            state->bias--;
+        state->bias = FFMAX(state->bias - 1, -128);
 
-        drift += count;
-        if (drift <= -count)
-            drift = -count + 1;
+        drift = FFMAX(drift + count, -count + 1);
     } else if (drift > 0) {
-        if (state->bias < 127)
-            state->bias++;
+        state->bias = FFMIN(state->bias + 1, 127);
 
-        drift -= count;
-        if (drift > 0)
-            drift = 0;
+        drift = FFMIN(drift - count, 0);
     }
 
     state->drift = drift;
diff --git a/libavcodec/ffv1enc.c b/libavcodec/ffv1enc.c
index f5eb0feb4ecf9..796d81f7c6541 100644
--- a/libavcodec/ffv1enc.c
+++ b/libavcodec/ffv1enc.c
@@ -449,7 +449,7 @@ static int write_extradata(FFV1Context *f)
         put_symbol(c, state, f->intra = (f->avctx->gop_size < 2), 0);
     }
 
-    f->avctx->extradata_size = ff_rac_terminate(c);
+    f->avctx->extradata_size = ff_rac_terminate(c, 0);
     v = av_crc(av_crc_get_table(AV_CRC_32_IEEE), 0, f->avctx->extradata, f->avctx->extradata_size);
     AV_WL32(f->avctx->extradata + f->avctx->extradata_size, v);
     f->avctx->extradata_size += 4;
@@ -1065,9 +1065,7 @@ static int encode_slice(AVCodecContext *c, void *arg)
         encode_slice_header(f, fs);
     }
     if (fs->ac == AC_GOLOMB_RICE) {
-        if (f->version > 2)
-            put_rac(&fs->c, (uint8_t[]) { 129 }, 0);
-        fs->ac_byte_count = f->version > 2 || (!x && !y) ? ff_rac_terminate(&fs->c) : 0;
+        fs->ac_byte_count = f->version > 2 || (!x && !y) ? ff_rac_terminate(&fs->c, f->version > 2) : 0;
         init_put_bits(&fs->pb,
                       fs->c.bytestream_start + fs->ac_byte_count,
                       fs->c.bytestream_end - fs->c.bytestream_start - fs->ac_byte_count);
@@ -1232,9 +1230,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
         int bytes;
 
         if (fs->ac != AC_GOLOMB_RICE) {
-            uint8_t state = 129;
-            put_rac(&fs->c, &state, 0);
-            bytes = ff_rac_terminate(&fs->c);
+            bytes = ff_rac_terminate(&fs->c, 1);
         } else {
             flush_put_bits(&fs->pb); // FIXME: nicer padding
             bytes = fs->ac_byte_count + (put_bits_count(&fs->pb) + 7) / 8;
diff --git a/libavcodec/fic.c b/libavcodec/fic.c
index dcf0777674285..65d102b86bea2 100644
--- a/libavcodec/fic.c
+++ b/libavcodec/fic.c
@@ -139,6 +139,9 @@ static int fic_decode_block(FICContext *ctx, GetBitContext *gb,
 {
     int i, num_coeff;
 
+    if (get_bits_left(gb) < 8)
+        return AVERROR_INVALIDDATA;
+
     /* Is it a skip block? */
     if (get_bits1(gb)) {
         *is_p = 1;
@@ -173,9 +176,11 @@ static int fic_decode_slice(AVCodecContext *avctx, void *tdata)
     int slice_h  = tctx->slice_h;
     int src_size = tctx->src_size;
     int y_off    = tctx->y_off;
-    int x, y, p;
+    int x, y, p, ret;
 
-    init_get_bits(&gb, src, src_size * 8);
+    ret = init_get_bits8(&gb, src, src_size);
+    if (ret < 0)
+        return ret;
 
     for (p = 0; p < 3; p++) {
         int stride   = ctx->frame->linesize[p];
@@ -380,6 +385,8 @@ static int fic_decode_frame(AVCodecContext *avctx, void *data,
             slice_h      = FFALIGN(avctx->height - ctx->slice_h * (nslices - 1), 16);
         } else {
             slice_size = AV_RB32(src + tsize + FIC_HEADER_SIZE + slice * 4 + 4);
+            if (slice_size < slice_off)
+                return AVERROR_INVALIDDATA;
         }
 
         if (slice_size < slice_off || slice_size > msize)
diff --git a/libavcodec/filter_units_bsf.c b/libavcodec/filter_units_bsf.c
index 1ee0afdf2b636..bc2ca288dd97f 100644
--- a/libavcodec/filter_units_bsf.c
+++ b/libavcodec/filter_units_bsf.c
@@ -139,7 +139,7 @@ static int filter_units_filter(AVBSFContext *bsf, AVPacket *out)
 
         // Don't return packets with nothing in them.
         av_packet_free(&in);
-        ff_cbs_fragment_uninit(ctx->cbc, frag);
+        ff_cbs_fragment_reset(ctx->cbc, frag);
     }
 
     err = ff_cbs_write_packet(ctx->cbc, out, frag);
@@ -153,7 +153,7 @@ static int filter_units_filter(AVBSFContext *bsf, AVPacket *out)
         goto fail;
 
 fail:
-    ff_cbs_fragment_uninit(ctx->cbc, frag);
+    ff_cbs_fragment_reset(ctx->cbc, frag);
     av_packet_free(&in);
 
     return err;
@@ -199,18 +199,18 @@ static int filter_units_init(AVBSFContext *bsf)
     ctx->cbc->nb_decompose_unit_types = 0;
 
     if (bsf->par_in->extradata) {
-        CodedBitstreamFragment ps;
+        CodedBitstreamFragment *frag = &ctx->fragment;
 
-        err = ff_cbs_read_extradata(ctx->cbc, &ps, bsf->par_in);
+        err = ff_cbs_read_extradata(ctx->cbc, frag, bsf->par_in);
         if (err < 0) {
             av_log(bsf, AV_LOG_ERROR, "Failed to read extradata.\n");
         } else {
-            err = ff_cbs_write_extradata(ctx->cbc, bsf->par_out, &ps);
+            err = ff_cbs_write_extradata(ctx->cbc, bsf->par_out, frag);
             if (err < 0)
                 av_log(bsf, AV_LOG_ERROR, "Failed to write extradata.\n");
         }
 
-        ff_cbs_fragment_uninit(ctx->cbc, &ps);
+        ff_cbs_fragment_reset(ctx->cbc, frag);
     }
 
     return err;
@@ -222,6 +222,7 @@ static void filter_units_close(AVBSFContext *bsf)
 
     av_freep(&ctx->type_list);
 
+    ff_cbs_fragment_free(ctx->cbc, &ctx->fragment);
     ff_cbs_close(&ctx->cbc);
 }
 
diff --git a/libavcodec/g722dec.c b/libavcodec/g722dec.c
index 000b591fe6242..7c270bc33afec 100644
--- a/libavcodec/g722dec.c
+++ b/libavcodec/g722dec.c
@@ -100,7 +100,9 @@ static int g722_decode_frame(AVCodecContext *avctx, void *data,
         return ret;
     out_buf = (int16_t *)frame->data[0];
 
-    init_get_bits(&gb, avpkt->data, avpkt->size * 8);
+    ret = init_get_bits8(&gb, avpkt->data, avpkt->size);
+    if (ret < 0)
+        return ret;
 
     for (j = 0; j < avpkt->size; j++) {
         int ilow, ihigh, rlow, rhigh, dhigh;
diff --git a/libavcodec/g723_1.h b/libavcodec/g723_1.h
index f833af01c64c9..d60d481e67591 100644
--- a/libavcodec/g723_1.h
+++ b/libavcodec/g723_1.h
@@ -116,9 +116,7 @@ typedef struct FCBParam {
     int pulse_sign[PULSE_MAX];
 } FCBParam;
 
-typedef struct g723_1_context {
-    AVClass *class;
-
+typedef struct G723_1_ChannelContext {
     G723_1_Subframe subframe[4];
     enum FrameType cur_frame_type;
     enum FrameType past_frame_type;
@@ -144,8 +142,6 @@ typedef struct g723_1_context {
     int reflection_coef;
     int pf_gain;                 ///< formant postfilter
                                  ///< gain scaling unit memory
-    int postfilter;
-
     int16_t audio[FRAME_LEN + LPC_ORDER + PITCH_MAX + 4];
 
     /* encoder */
@@ -158,6 +154,13 @@ typedef struct g723_1_context {
     int16_t perf_iir_mem[LPC_ORDER];       ///< and iir memories
 
     int16_t harmonic_mem[PITCH_MAX];
+} G723_1_ChannelContext;
+
+typedef struct G723_1_Context {
+    AVClass *class;
+    int postfilter;
+
+    G723_1_ChannelContext ch[2];
 } G723_1_Context;
 
 
diff --git a/libavcodec/g723_1_parser.c b/libavcodec/g723_1_parser.c
new file mode 100644
index 0000000000000..0305ca329da55
--- /dev/null
+++ b/libavcodec/g723_1_parser.c
@@ -0,0 +1,60 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * G723_1 audio parser
+ */
+
+#include "parser.h"
+#include "g723_1.h"
+
+typedef struct G723_1ParseContext {
+    ParseContext pc;
+} G723_1ParseContext;
+
+static int g723_1_parse(AVCodecParserContext *s1, AVCodecContext *avctx,
+                        const uint8_t **poutbuf, int *poutbuf_size,
+                        const uint8_t *buf, int buf_size)
+{
+    G723_1ParseContext *s = s1->priv_data;
+    ParseContext *pc = &s->pc;
+    int next = END_NOT_FOUND;
+
+    if (buf_size > 0)
+        next = frame_size[buf[0] & 3] * FFMAX(1, avctx->channels);
+
+    if (ff_combine_frame(pc, next, &buf, &buf_size) < 0 || !buf_size) {
+        *poutbuf      = NULL;
+        *poutbuf_size = 0;
+        return buf_size;
+    }
+
+    s1->duration = 240;
+
+    *poutbuf      = buf;
+    *poutbuf_size = buf_size;
+    return next;
+}
+
+AVCodecParser ff_g723_1_parser = {
+    .codec_ids      = { AV_CODEC_ID_G723_1 },
+    .priv_data_size = sizeof(G723_1ParseContext),
+    .parser_parse   = g723_1_parse,
+    .parser_close   = ff_parse_close,
+};
diff --git a/libavcodec/g723_1dec.c b/libavcodec/g723_1dec.c
index ab952ec66d37f..d8bc3f97acfdd 100644
--- a/libavcodec/g723_1dec.c
+++ b/libavcodec/g723_1dec.c
@@ -42,18 +42,25 @@
 
 static av_cold int g723_1_decode_init(AVCodecContext *avctx)
 {
-    G723_1_Context *p = avctx->priv_data;
+    G723_1_Context *s = avctx->priv_data;
 
-    avctx->channel_layout = AV_CH_LAYOUT_MONO;
-    avctx->sample_fmt     = AV_SAMPLE_FMT_S16;
-    avctx->channels       = 1;
-    p->pf_gain            = 1 << 12;
+    avctx->sample_fmt     = AV_SAMPLE_FMT_S16P;
+    if (avctx->channels < 1 || avctx->channels > 2) {
+        av_log(avctx, AV_LOG_ERROR, "Only mono and stereo are supported (requested channels: %d).\n", avctx->channels);
+        return AVERROR(EINVAL);
+    }
+    avctx->channel_layout = avctx->channels == 1 ? AV_CH_LAYOUT_MONO : AV_CH_LAYOUT_STEREO;
+    for (int ch = 0; ch < avctx->channels; ch++) {
+        G723_1_ChannelContext *p = &s->ch[ch];
 
-    memcpy(p->prev_lsp, dc_lsp, LPC_ORDER * sizeof(*p->prev_lsp));
-    memcpy(p->sid_lsp,  dc_lsp, LPC_ORDER * sizeof(*p->sid_lsp));
+        p->pf_gain = 1 << 12;
 
-    p->cng_random_seed = CNG_RANDOM_SEED;
-    p->past_frame_type = SID_FRAME;
+        memcpy(p->prev_lsp, dc_lsp, LPC_ORDER * sizeof(*p->prev_lsp));
+        memcpy(p->sid_lsp,  dc_lsp, LPC_ORDER * sizeof(*p->sid_lsp));
+
+        p->cng_random_seed = CNG_RANDOM_SEED;
+        p->past_frame_type = SID_FRAME;
+    }
 
     return 0;
 }
@@ -65,14 +72,17 @@ static av_cold int g723_1_decode_init(AVCodecContext *avctx)
  * @param buf         pointer to the input buffer
  * @param buf_size    size of the input buffer
  */
-static int unpack_bitstream(G723_1_Context *p, const uint8_t *buf,
+static int unpack_bitstream(G723_1_ChannelContext *p, const uint8_t *buf,
                             int buf_size)
 {
     GetBitContext gb;
     int ad_cb_len;
     int temp, info_bits, i;
+    int ret;
 
-    init_get_bits(&gb, buf, buf_size * 8);
+    ret = init_get_bits8(&gb, buf, buf_size);
+    if (ret < 0)
+        return ret;
 
     /* Extract frame type and rate info */
     info_bits = get_bits(&gb, 2);
@@ -344,7 +354,7 @@ static void comp_ppf_gains(int lag, PPFParam *ppf, enum Rate cur_rate,
  * @param ppf       pitch postfilter parameters
  * @param cur_rate  current bitrate
  */
-static void comp_ppf_coeff(G723_1_Context *p, int offset, int pitch_lag,
+static void comp_ppf_coeff(G723_1_ChannelContext *p, int offset, int pitch_lag,
                            PPFParam *ppf, enum Rate cur_rate)
 {
 
@@ -430,7 +440,7 @@ static void comp_ppf_coeff(G723_1_Context *p, int offset, int pitch_lag,
  *
  * @return residual interpolation index if voiced, 0 otherwise
  */
-static int comp_interp_index(G723_1_Context *p, int pitch_lag,
+static int comp_interp_index(G723_1_ChannelContext *p, int pitch_lag,
                              int *exc_eng, int *scale)
 {
     int offset = PITCH_MAX + 2 * SUBFRAME_LEN;
@@ -529,7 +539,7 @@ static void residual_interp(int16_t *buf, int16_t *out, int lag,
  * @param buf    postfiltered output vector
  * @param energy input energy coefficient
  */
-static void gain_scale(G723_1_Context *p, int16_t * buf, int energy)
+static void gain_scale(G723_1_ChannelContext *p, int16_t * buf, int energy)
 {
     int num, denom, gain, bits1, bits2;
     int i;
@@ -572,7 +582,7 @@ static void gain_scale(G723_1_Context *p, int16_t * buf, int energy)
  * @param buf input buffer
  * @param dst output buffer
  */
-static void formant_postfilter(G723_1_Context *p, int16_t *lpc,
+static void formant_postfilter(G723_1_ChannelContext *p, int16_t *lpc,
                                int16_t *buf, int16_t *dst)
 {
     int16_t filter_coef[2][LPC_ORDER];
@@ -655,7 +665,7 @@ static inline int cng_rand(int *state, int base)
     return (*state & 0x7FFF) * base >> 15;
 }
 
-static int estimate_sid_gain(G723_1_Context *p)
+static int estimate_sid_gain(G723_1_ChannelContext *p)
 {
     int i, shift, seg, seg2, t, val, val_add, x, y;
 
@@ -715,7 +725,7 @@ static int estimate_sid_gain(G723_1_Context *p)
     return val;
 }
 
-static void generate_noise(G723_1_Context *p)
+static void generate_noise(G723_1_ChannelContext *p)
 {
     int i, j, idx, t;
     int off[SUBFRAMES];
@@ -843,7 +853,7 @@ static void generate_noise(G723_1_Context *p)
 static int g723_1_decode_frame(AVCodecContext *avctx, void *data,
                                int *got_frame_ptr, AVPacket *avpkt)
 {
-    G723_1_Context *p  = avctx->priv_data;
+    G723_1_Context *s  = avctx->priv_data;
     AVFrame *frame     = data;
     const uint8_t *buf = avpkt->data;
     int buf_size       = avpkt->size;
@@ -855,9 +865,8 @@ static int g723_1_decode_frame(AVCodecContext *avctx, void *data,
     int16_t acb_vector[SUBFRAME_LEN];
     int16_t *out;
     int bad_frame = 0, i, j, ret;
-    int16_t *audio = p->audio;
 
-    if (buf_size < frame_size[dec_mode]) {
+    if (buf_size < frame_size[dec_mode] * avctx->channels) {
         if (buf_size)
             av_log(avctx, AV_LOG_WARNING,
                    "Expected %d bytes, got %d - skipping packet\n",
@@ -866,142 +875,148 @@ static int g723_1_decode_frame(AVCodecContext *avctx, void *data,
         return buf_size;
     }
 
-    if (unpack_bitstream(p, buf, buf_size) < 0) {
-        bad_frame = 1;
-        if (p->past_frame_type == ACTIVE_FRAME)
-            p->cur_frame_type = ACTIVE_FRAME;
-        else
-            p->cur_frame_type = UNTRANSMITTED_FRAME;
-    }
-
     frame->nb_samples = FRAME_LEN;
     if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
 
-    out = (int16_t *)frame->data[0];
-
-    if (p->cur_frame_type == ACTIVE_FRAME) {
-        if (!bad_frame)
-            p->erased_frames = 0;
-        else if (p->erased_frames != 3)
-            p->erased_frames++;
-
-        ff_g723_1_inverse_quant(cur_lsp, p->prev_lsp, p->lsp_index, bad_frame);
-        ff_g723_1_lsp_interpolate(lpc, cur_lsp, p->prev_lsp);
-
-        /* Save the lsp_vector for the next frame */
-        memcpy(p->prev_lsp, cur_lsp, LPC_ORDER * sizeof(*p->prev_lsp));
-
-        /* Generate the excitation for the frame */
-        memcpy(p->excitation, p->prev_excitation,
-               PITCH_MAX * sizeof(*p->excitation));
-        if (!p->erased_frames) {
-            int16_t *vector_ptr = p->excitation + PITCH_MAX;
-
-            /* Update interpolation gain memory */
-            p->interp_gain = fixed_cb_gain[(p->subframe[2].amp_index +
-                                            p->subframe[3].amp_index) >> 1];
-            for (i = 0; i < SUBFRAMES; i++) {
-                gen_fcb_excitation(vector_ptr, &p->subframe[i], p->cur_rate,
-                                   p->pitch_lag[i >> 1], i);
-                ff_g723_1_gen_acb_excitation(acb_vector,
-                                             &p->excitation[SUBFRAME_LEN * i],
-                                             p->pitch_lag[i >> 1],
-                                             &p->subframe[i], p->cur_rate);
-                /* Get the total excitation */
-                for (j = 0; j < SUBFRAME_LEN; j++) {
-                    int v = av_clip_int16(vector_ptr[j] * 2);
-                    vector_ptr[j] = av_clip_int16(v + acb_vector[j]);
-                }
-                vector_ptr += SUBFRAME_LEN;
-            }
+    for (int ch = 0; ch < avctx->channels; ch++) {
+        G723_1_ChannelContext *p = &s->ch[ch];
+        int16_t *audio = p->audio;
+
+        if (unpack_bitstream(p, buf + ch * (buf_size / avctx->channels),
+                             buf_size / avctx->channels) < 0) {
+            bad_frame = 1;
+            if (p->past_frame_type == ACTIVE_FRAME)
+                p->cur_frame_type = ACTIVE_FRAME;
+            else
+                p->cur_frame_type = UNTRANSMITTED_FRAME;
+        }
 
-            vector_ptr = p->excitation + PITCH_MAX;
-
-            p->interp_index = comp_interp_index(p, p->pitch_lag[1],
-                                                &p->sid_gain, &p->cur_gain);
-
-            /* Perform pitch postfiltering */
-            if (p->postfilter) {
-                i = PITCH_MAX;
-                for (j = 0; j < SUBFRAMES; i += SUBFRAME_LEN, j++)
-                    comp_ppf_coeff(p, i, p->pitch_lag[j >> 1],
-                                   ppf + j, p->cur_rate);
-
-                for (i = 0, j = 0; j < SUBFRAMES; i += SUBFRAME_LEN, j++)
-                    ff_acelp_weighted_vector_sum(p->audio + LPC_ORDER + i,
-                                                 vector_ptr + i,
-                                                 vector_ptr + i + ppf[j].index,
-                                                 ppf[j].sc_gain,
-                                                 ppf[j].opt_gain,
-                                                 1 << 14, 15, SUBFRAME_LEN);
-            } else {
-                audio = vector_ptr - LPC_ORDER;
-            }
+        out = (int16_t *)frame->extended_data[ch];
 
-            /* Save the excitation for the next frame */
-            memcpy(p->prev_excitation, p->excitation + FRAME_LEN,
+        if (p->cur_frame_type == ACTIVE_FRAME) {
+            if (!bad_frame)
+                p->erased_frames = 0;
+            else if (p->erased_frames != 3)
+                p->erased_frames++;
+
+            ff_g723_1_inverse_quant(cur_lsp, p->prev_lsp, p->lsp_index, bad_frame);
+            ff_g723_1_lsp_interpolate(lpc, cur_lsp, p->prev_lsp);
+
+            /* Save the lsp_vector for the next frame */
+            memcpy(p->prev_lsp, cur_lsp, LPC_ORDER * sizeof(*p->prev_lsp));
+
+            /* Generate the excitation for the frame */
+            memcpy(p->excitation, p->prev_excitation,
                    PITCH_MAX * sizeof(*p->excitation));
-        } else {
-            p->interp_gain = (p->interp_gain * 3 + 2) >> 2;
-            if (p->erased_frames == 3) {
-                /* Mute output */
-                memset(p->excitation, 0,
-                       (FRAME_LEN + PITCH_MAX) * sizeof(*p->excitation));
-                memset(p->prev_excitation, 0,
-                       PITCH_MAX * sizeof(*p->excitation));
-                memset(frame->data[0], 0,
-                       (FRAME_LEN + LPC_ORDER) * sizeof(int16_t));
-            } else {
-                int16_t *buf = p->audio + LPC_ORDER;
+            if (!p->erased_frames) {
+                int16_t *vector_ptr = p->excitation + PITCH_MAX;
+
+                /* Update interpolation gain memory */
+                p->interp_gain = fixed_cb_gain[(p->subframe[2].amp_index +
+                                                p->subframe[3].amp_index) >> 1];
+                for (i = 0; i < SUBFRAMES; i++) {
+                    gen_fcb_excitation(vector_ptr, &p->subframe[i], p->cur_rate,
+                                       p->pitch_lag[i >> 1], i);
+                    ff_g723_1_gen_acb_excitation(acb_vector,
+                                                 &p->excitation[SUBFRAME_LEN * i],
+                                                 p->pitch_lag[i >> 1],
+                                                 &p->subframe[i], p->cur_rate);
+                    /* Get the total excitation */
+                    for (j = 0; j < SUBFRAME_LEN; j++) {
+                        int v = av_clip_int16(vector_ptr[j] * 2);
+                        vector_ptr[j] = av_clip_int16(v + acb_vector[j]);
+                    }
+                    vector_ptr += SUBFRAME_LEN;
+                }
 
-                /* Regenerate frame */
-                residual_interp(p->excitation, buf, p->interp_index,
-                                p->interp_gain, &p->random_seed);
+                vector_ptr = p->excitation + PITCH_MAX;
+
+                p->interp_index = comp_interp_index(p, p->pitch_lag[1],
+                                                    &p->sid_gain, &p->cur_gain);
+
+                /* Perform pitch postfiltering */
+                if (s->postfilter) {
+                    i = PITCH_MAX;
+                    for (j = 0; j < SUBFRAMES; i += SUBFRAME_LEN, j++)
+                        comp_ppf_coeff(p, i, p->pitch_lag[j >> 1],
+                                       ppf + j, p->cur_rate);
+
+                    for (i = 0, j = 0; j < SUBFRAMES; i += SUBFRAME_LEN, j++)
+                        ff_acelp_weighted_vector_sum(p->audio + LPC_ORDER + i,
+                                                     vector_ptr + i,
+                                                     vector_ptr + i + ppf[j].index,
+                                                     ppf[j].sc_gain,
+                                                     ppf[j].opt_gain,
+                                                     1 << 14, 15, SUBFRAME_LEN);
+                } else {
+                    audio = vector_ptr - LPC_ORDER;
+                }
 
                 /* Save the excitation for the next frame */
-                memcpy(p->prev_excitation, buf + (FRAME_LEN - PITCH_MAX),
+                memcpy(p->prev_excitation, p->excitation + FRAME_LEN,
                        PITCH_MAX * sizeof(*p->excitation));
+            } else {
+                p->interp_gain = (p->interp_gain * 3 + 2) >> 2;
+                if (p->erased_frames == 3) {
+                    /* Mute output */
+                    memset(p->excitation, 0,
+                           (FRAME_LEN + PITCH_MAX) * sizeof(*p->excitation));
+                    memset(p->prev_excitation, 0,
+                           PITCH_MAX * sizeof(*p->excitation));
+                    memset(frame->data[0], 0,
+                           (FRAME_LEN + LPC_ORDER) * sizeof(int16_t));
+                } else {
+                    int16_t *buf = p->audio + LPC_ORDER;
+
+                    /* Regenerate frame */
+                    residual_interp(p->excitation, buf, p->interp_index,
+                                    p->interp_gain, &p->random_seed);
+
+                    /* Save the excitation for the next frame */
+                    memcpy(p->prev_excitation, buf + (FRAME_LEN - PITCH_MAX),
+                           PITCH_MAX * sizeof(*p->excitation));
+                }
+            }
+            p->cng_random_seed = CNG_RANDOM_SEED;
+        } else {
+            if (p->cur_frame_type == SID_FRAME) {
+                p->sid_gain = sid_gain_to_lsp_index(p->subframe[0].amp_index);
+                ff_g723_1_inverse_quant(p->sid_lsp, p->prev_lsp, p->lsp_index, 0);
+            } else if (p->past_frame_type == ACTIVE_FRAME) {
+                p->sid_gain = estimate_sid_gain(p);
             }
-        }
-        p->cng_random_seed = CNG_RANDOM_SEED;
-    } else {
-        if (p->cur_frame_type == SID_FRAME) {
-            p->sid_gain = sid_gain_to_lsp_index(p->subframe[0].amp_index);
-            ff_g723_1_inverse_quant(p->sid_lsp, p->prev_lsp, p->lsp_index, 0);
-        } else if (p->past_frame_type == ACTIVE_FRAME) {
-            p->sid_gain = estimate_sid_gain(p);
-        }
 
-        if (p->past_frame_type == ACTIVE_FRAME)
-            p->cur_gain = p->sid_gain;
-        else
-            p->cur_gain = (p->cur_gain * 7 + p->sid_gain) >> 3;
-        generate_noise(p);
-        ff_g723_1_lsp_interpolate(lpc, p->sid_lsp, p->prev_lsp);
-        /* Save the lsp_vector for the next frame */
-        memcpy(p->prev_lsp, p->sid_lsp, LPC_ORDER * sizeof(*p->prev_lsp));
-    }
+            if (p->past_frame_type == ACTIVE_FRAME)
+                p->cur_gain = p->sid_gain;
+            else
+                p->cur_gain = (p->cur_gain * 7 + p->sid_gain) >> 3;
+            generate_noise(p);
+            ff_g723_1_lsp_interpolate(lpc, p->sid_lsp, p->prev_lsp);
+            /* Save the lsp_vector for the next frame */
+            memcpy(p->prev_lsp, p->sid_lsp, LPC_ORDER * sizeof(*p->prev_lsp));
+        }
 
-    p->past_frame_type = p->cur_frame_type;
+        p->past_frame_type = p->cur_frame_type;
 
-    memcpy(p->audio, p->synth_mem, LPC_ORDER * sizeof(*p->audio));
-    for (i = LPC_ORDER, j = 0; j < SUBFRAMES; i += SUBFRAME_LEN, j++)
-        ff_celp_lp_synthesis_filter(p->audio + i, &lpc[j * LPC_ORDER],
-                                    audio + i, SUBFRAME_LEN, LPC_ORDER,
-                                    0, 1, 1 << 12);
-    memcpy(p->synth_mem, p->audio + FRAME_LEN, LPC_ORDER * sizeof(*p->audio));
+        memcpy(p->audio, p->synth_mem, LPC_ORDER * sizeof(*p->audio));
+        for (i = LPC_ORDER, j = 0; j < SUBFRAMES; i += SUBFRAME_LEN, j++)
+            ff_celp_lp_synthesis_filter(p->audio + i, &lpc[j * LPC_ORDER],
+                                        audio + i, SUBFRAME_LEN, LPC_ORDER,
+                                        0, 1, 1 << 12);
+        memcpy(p->synth_mem, p->audio + FRAME_LEN, LPC_ORDER * sizeof(*p->audio));
 
-    if (p->postfilter) {
-        formant_postfilter(p, lpc, p->audio, out);
-    } else { // if output is not postfiltered it should be scaled by 2
-        for (i = 0; i < FRAME_LEN; i++)
-            out[i] = av_clip_int16(p->audio[LPC_ORDER + i] << 1);
+        if (s->postfilter) {
+            formant_postfilter(p, lpc, p->audio, out);
+        } else { // if output is not postfiltered it should be scaled by 2
+            for (i = 0; i < FRAME_LEN; i++)
+                out[i] = av_clip_int16(p->audio[LPC_ORDER + i] << 1);
+        }
     }
 
     *got_frame_ptr = 1;
 
-    return frame_size[dec_mode];
+    return frame_size[dec_mode] * avctx->channels;
 }
 
 #define OFFSET(x) offsetof(G723_1_Context, x)
diff --git a/libavcodec/g723_1enc.c b/libavcodec/g723_1enc.c
index 4a4525eda99d3..592840566e92c 100644
--- a/libavcodec/g723_1enc.c
+++ b/libavcodec/g723_1enc.c
@@ -42,7 +42,8 @@
 
 static av_cold int g723_1_encode_init(AVCodecContext *avctx)
 {
-    G723_1_Context *p = avctx->priv_data;
+    G723_1_Context *s = avctx->priv_data;
+    G723_1_ChannelContext *p = &s->ch[0];
 
     if (avctx->sample_rate != 8000) {
         av_log(avctx, AV_LOG_ERROR, "Only 8000Hz sample rate supported\n");
@@ -386,7 +387,7 @@ static void iir_filter(int16_t *fir_coef, int16_t *iir_coef,
  * @param flt_coef filter coefficients
  * @param unq_lpc  unquantized lpc vector
  */
-static void perceptual_filter(G723_1_Context *p, int16_t *flt_coef,
+static void perceptual_filter(G723_1_ChannelContext *p, int16_t *flt_coef,
                               int16_t *unq_lpc, int16_t *buf)
 {
     int16_t vector[FRAME_LEN + LPC_ORDER];
@@ -635,7 +636,7 @@ static void synth_percept_filter(int16_t *qnt_lpc, int16_t *perf_lpc,
  * @param buf   input signal
  * @param index the current subframe index
  */
-static void acb_search(G723_1_Context *p, int16_t *residual,
+static void acb_search(G723_1_ChannelContext *p, int16_t *residual,
                        int16_t *impulse_resp, const int16_t *buf,
                        int index)
 {
@@ -963,7 +964,7 @@ static void pack_fcb_param(G723_1_Subframe *subfrm, FCBParam *optim,
  * @param buf          target vector
  * @param impulse_resp impulse response of the combined filter
  */
-static void fcb_search(G723_1_Context *p, int16_t *impulse_resp,
+static void fcb_search(G723_1_ChannelContext *p, int16_t *impulse_resp,
                        int16_t *buf, int index)
 {
     FCBParam optim;
@@ -995,7 +996,7 @@ static void fcb_search(G723_1_Context *p, int16_t *impulse_resp,
  * @param frame output buffer
  * @param size  size of the buffer
  */
-static int pack_bitstream(G723_1_Context *p, AVPacket *avpkt)
+static int pack_bitstream(G723_1_ChannelContext *p, AVPacket *avpkt)
 {
     PutBitContext pb;
     int info_bits = 0;
@@ -1056,7 +1057,8 @@ static int pack_bitstream(G723_1_Context *p, AVPacket *avpkt)
 static int g723_1_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
                                const AVFrame *frame, int *got_packet_ptr)
 {
-    G723_1_Context *p = avctx->priv_data;
+    G723_1_Context *s = avctx->priv_data;
+    G723_1_ChannelContext *p = &s->ch[0];
     int16_t unq_lpc[LPC_ORDER * SUBFRAMES];
     int16_t qnt_lpc[LPC_ORDER * SUBFRAMES];
     int16_t cur_lsp[LPC_ORDER];
@@ -1189,6 +1191,11 @@ static int g723_1_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     return 0;
 }
 
+static const AVCodecDefault defaults[] = {
+    { "b", "6300" },
+    { NULL },
+};
+
 AVCodec ff_g723_1_encoder = {
     .name           = "g723_1",
     .long_name      = NULL_IF_CONFIG_SMALL("G.723.1"),
@@ -1197,6 +1204,7 @@ AVCodec ff_g723_1_encoder = {
     .priv_data_size = sizeof(G723_1_Context),
     .init           = g723_1_encode_init,
     .encode2        = g723_1_encode_frame,
+    .defaults       = defaults,
     .sample_fmts    = (const enum AVSampleFormat[]) {
         AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE
     },
diff --git a/libavcodec/g729_parser.c b/libavcodec/g729_parser.c
index d13c990807af6..9982dbfffccf3 100644
--- a/libavcodec/g729_parser.c
+++ b/libavcodec/g729_parser.c
@@ -48,6 +48,7 @@ static int g729_parse(AVCodecParserContext *s1, AVCodecContext *avctx,
         av_assert1(avctx->codec_id == AV_CODEC_ID_G729);
         /* FIXME: replace this heuristic block_size with more precise estimate */
         s->block_size = (avctx->bit_rate < 8000) ? G729D_6K4_BLOCK_SIZE : G729_8K_BLOCK_SIZE;
+        s->block_size *= avctx->channels;
         s->duration   = avctx->frame_size;
     }
 
diff --git a/libavcodec/g729dec.c b/libavcodec/g729dec.c
index 2e1bf18e4e49f..2e4756b805738 100644
--- a/libavcodec/g729dec.c
+++ b/libavcodec/g729dec.c
@@ -100,8 +100,6 @@ typedef struct {
 } G729FormatDescription;
 
 typedef struct {
-    AudioDSPContext adsp;
-
     /// past excitation signal buffer
     int16_t exc_base[2*SUBFRAME_SIZE+PITCH_DELAY_MAX+INTERPOL_LEN];
 
@@ -152,7 +150,13 @@ typedef struct {
 
     /// high-pass filter data (past output)
     int16_t hpf_z[2];
-}  G729Context;
+}  G729ChannelContext;
+
+typedef struct {
+    AudioDSPContext adsp;
+
+    G729ChannelContext *channel_context;
+} G729Context;
 
 static const G729FormatDescription format_g729_8k = {
     .ac_index_bits     = {8,5},
@@ -268,8 +272,7 @@ static void g729d_get_new_exc(
 
     ff_celp_convolve_circ(fc_new, fc_cur, phase_filter[dstate], subframe_size);
 
-    for(i=0; i<subframe_size; i++)
-    {
+    for (i = 0; i < subframe_size; i++) {
         out[i]  = in[i];
         out[i] -= (gain_code * fc_cur[i] + 0x2000) >> 14;
         out[i] += (gain_code * fc_new[i] + 0x2000) >> 14;
@@ -285,10 +288,10 @@ static void g729d_get_new_exc(
  */
 static int g729d_onset_decision(int past_onset, const int16_t* past_gain_code)
 {
-    if((past_gain_code[0] >> 1) > past_gain_code[1])
+    if ((past_gain_code[0] >> 1) > past_gain_code[1])
         return 2;
-    else
-        return FFMAX(past_onset-1, 0);
+
+    return FFMAX(past_onset-1, 0);
 }
 
 /**
@@ -303,24 +306,25 @@ static int16_t g729d_voice_decision(int onset, int prev_voice_decision, const in
 {
     int i, low_gain_pitch_cnt, voice_decision;
 
-    if(past_gain_pitch[0] >= 14745)      // 0.9
+    if (past_gain_pitch[0] >= 14745) {       // 0.9
         voice_decision = DECISION_VOICE;
-    else if (past_gain_pitch[0] <= 9830) // 0.6
+    } else if (past_gain_pitch[0] <= 9830) { // 0.6
         voice_decision = DECISION_NOISE;
-    else
+    } else {
         voice_decision = DECISION_INTERMEDIATE;
+    }
 
-    for(i=0, low_gain_pitch_cnt=0; i<6; i++)
-        if(past_gain_pitch[i] < 9830)
+    for (i = 0, low_gain_pitch_cnt = 0; i < 6; i++)
+        if (past_gain_pitch[i] < 9830)
             low_gain_pitch_cnt++;
 
-    if(low_gain_pitch_cnt > 2 && !onset)
+    if (low_gain_pitch_cnt > 2 && !onset)
         voice_decision = DECISION_NOISE;
 
-    if(!onset && voice_decision > prev_voice_decision + 1)
+    if (!onset && voice_decision > prev_voice_decision + 1)
         voice_decision--;
 
-    if(onset && voice_decision < DECISION_VOICE)
+    if (onset && voice_decision < DECISION_VOICE)
         voice_decision++;
 
     return voice_decision;
@@ -338,43 +342,53 @@ static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int
 
 static av_cold int decoder_init(AVCodecContext * avctx)
 {
-    G729Context* ctx = avctx->priv_data;
-    int i,k;
+    G729Context *s = avctx->priv_data;
+    G729ChannelContext *ctx;
+    int c,i,k;
 
-    if (avctx->channels != 1) {
-        av_log(avctx, AV_LOG_ERROR, "Only mono sound is supported (requested channels: %d).\n", avctx->channels);
+    if (avctx->channels < 1 || avctx->channels > 2) {
+        av_log(avctx, AV_LOG_ERROR, "Only mono and stereo are supported (requested channels: %d).\n", avctx->channels);
         return AVERROR(EINVAL);
     }
-    avctx->sample_fmt = AV_SAMPLE_FMT_S16;
+    avctx->sample_fmt = AV_SAMPLE_FMT_S16P;
 
     /* Both 8kbit/s and 6.4kbit/s modes uses two subframes per frame. */
     avctx->frame_size = SUBFRAME_SIZE << 1;
 
-    ctx->gain_coeff = 16384; // 1.0 in (1.14)
+    ctx =
+    s->channel_context = av_mallocz(sizeof(G729ChannelContext) * avctx->channels);
+    if (!ctx)
+        return AVERROR(ENOMEM);
 
-    for (k = 0; k < MA_NP + 1; k++) {
-        ctx->past_quantizer_outputs[k] = ctx->past_quantizer_output_buf[k];
-        for (i = 1; i < 11; i++)
-            ctx->past_quantizer_outputs[k][i - 1] = (18717 * i) >> 3;
-    }
+    for (c = 0; c < avctx->channels; c++) {
+        ctx->gain_coeff = 16384; // 1.0 in (1.14)
+
+        for (k = 0; k < MA_NP + 1; k++) {
+            ctx->past_quantizer_outputs[k] = ctx->past_quantizer_output_buf[k];
+            for (i = 1; i < 11; i++)
+                ctx->past_quantizer_outputs[k][i - 1] = (18717 * i) >> 3;
+        }
+
+        ctx->lsp[0] = ctx->lsp_buf[0];
+        ctx->lsp[1] = ctx->lsp_buf[1];
+        memcpy(ctx->lsp[0], lsp_init, 10 * sizeof(int16_t));
 
-    ctx->lsp[0] = ctx->lsp_buf[0];
-    ctx->lsp[1] = ctx->lsp_buf[1];
-    memcpy(ctx->lsp[0], lsp_init, 10 * sizeof(int16_t));
+        ctx->exc = &ctx->exc_base[PITCH_DELAY_MAX+INTERPOL_LEN];
 
-    ctx->exc = &ctx->exc_base[PITCH_DELAY_MAX+INTERPOL_LEN];
+        ctx->pitch_delay_int_prev = PITCH_DELAY_MIN;
 
-    ctx->pitch_delay_int_prev = PITCH_DELAY_MIN;
+        /* random seed initialization */
+        ctx->rand_value = 21845;
 
-    /* random seed initialization */
-    ctx->rand_value = 21845;
+        /* quantized prediction error */
+        for (i = 0; i < 4; i++)
+            ctx->quant_energy[i] = -14336; // -14 in (5.10)
 
-    /* quantized prediction error */
-    for(i=0; i<4; i++)
-        ctx->quant_energy[i] = -14336; // -14 in (5.10)
+        ctx++;
+    }
 
-    ff_audiodsp_init(&ctx->adsp);
-    ctx->adsp.scalarproduct_int16 = scalarproduct_int16_c;
+    ff_audiodsp_init(&s->adsp);
+    s->adsp.scalarproduct_int16 = scalarproduct_int16_c;
 
     return 0;
 }
@@ -387,12 +401,11 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame_ptr,
     int16_t *out_frame;
     GetBitContext gb;
     const G729FormatDescription *format;
-    int frame_erasure = 0;    ///< frame erasure detected during decoding
-    int bad_pitch = 0;        ///< parity check failed
-    int i;
+    int c, i;
     int16_t *tmp;
     G729Formats packet_type;
-    G729Context *ctx = avctx->priv_data;
+    G729Context *s = avctx->priv_data;
+    G729ChannelContext *ctx = s->channel_context;
     int16_t lp[2][11];           // (3.12)
     uint8_t ma_predictor;     ///< switched MA predictor of LSP quantizer
     uint8_t quantizer_1st;    ///< first stage vector of quantizer
@@ -405,22 +418,20 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame_ptr,
     int16_t synth[SUBFRAME_SIZE+10]; // fixed-codebook vector
     int j, ret;
     int gain_before, gain_after;
-    int is_periodic = 0;         // whether one of the subframes is declared as periodic or not
     AVFrame *frame = data;
 
     frame->nb_samples = SUBFRAME_SIZE<<1;
     if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    out_frame = (int16_t*) frame->data[0];
 
-    if (buf_size % 10 == 0) {
+    if (buf_size % (G729_8K_BLOCK_SIZE * avctx->channels) == 0) {
         packet_type = FORMAT_G729_8K;
         format = &format_g729_8k;
         //Reset voice decision
         ctx->onset = 0;
         ctx->voice_decision = DECISION_VOICE;
         av_log(avctx, AV_LOG_DEBUG, "Packet type: %s\n", "G.729 @ 8kbit/s");
-    } else if (buf_size == 8) {
+    } else if (buf_size == G729D_6K4_BLOCK_SIZE * avctx->channels) {
         packet_type = FORMAT_G729D_6K4;
         format = &format_g729d_6k4;
         av_log(avctx, AV_LOG_DEBUG, "Packet type: %s\n", "G.729D @ 6.4kbit/s");
@@ -429,281 +440,302 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame_ptr,
         return AVERROR_INVALIDDATA;
     }
 
-    for (i=0; i < buf_size; i++)
-        frame_erasure |= buf[i];
-    frame_erasure = !frame_erasure;
-
-    init_get_bits(&gb, buf, 8*buf_size);
-
-    ma_predictor     = get_bits(&gb, 1);
-    quantizer_1st    = get_bits(&gb, VQ_1ST_BITS);
-    quantizer_2nd_lo = get_bits(&gb, VQ_2ND_BITS);
-    quantizer_2nd_hi = get_bits(&gb, VQ_2ND_BITS);
-
-    if(frame_erasure)
-        lsf_restore_from_previous(ctx->lsfq, ctx->past_quantizer_outputs,
-                                  ctx->ma_predictor_prev);
-    else {
-        lsf_decode(ctx->lsfq, ctx->past_quantizer_outputs,
-                   ma_predictor,
-                   quantizer_1st, quantizer_2nd_lo, quantizer_2nd_hi);
-        ctx->ma_predictor_prev = ma_predictor;
-    }
+    for (c = 0; c < avctx->channels; c++) {
+        int frame_erasure = 0; ///< frame erasure detected during decoding
+        int bad_pitch = 0;     ///< parity check failed
+        int is_periodic = 0;   ///< whether one of the subframes is declared as periodic or not
+        out_frame = (int16_t*)frame->data[c];
 
-    tmp = ctx->past_quantizer_outputs[MA_NP];
-    memmove(ctx->past_quantizer_outputs + 1, ctx->past_quantizer_outputs,
-            MA_NP * sizeof(int16_t*));
-    ctx->past_quantizer_outputs[0] = tmp;
-
-    ff_acelp_lsf2lsp(ctx->lsp[1], ctx->lsfq, 10);
-
-    ff_acelp_lp_decode(&lp[0][0], &lp[1][0], ctx->lsp[1], ctx->lsp[0], 10);
-
-    FFSWAP(int16_t*, ctx->lsp[1], ctx->lsp[0]);
-
-    for (i = 0; i < 2; i++) {
-        int gain_corr_factor;
-
-        uint8_t ac_index;      ///< adaptive codebook index
-        uint8_t pulses_signs;  ///< fixed-codebook vector pulse signs
-        int fc_indexes;        ///< fixed-codebook indexes
-        uint8_t gc_1st_index;  ///< gain codebook (first stage) index
-        uint8_t gc_2nd_index;  ///< gain codebook (second stage) index
-
-        ac_index      = get_bits(&gb, format->ac_index_bits[i]);
-        if(!i && format->parity_bit)
-            bad_pitch = av_parity(ac_index >> 2) == get_bits1(&gb);
-        fc_indexes    = get_bits(&gb, format->fc_indexes_bits);
-        pulses_signs  = get_bits(&gb, format->fc_signs_bits);
-        gc_1st_index  = get_bits(&gb, format->gc_1st_index_bits);
-        gc_2nd_index  = get_bits(&gb, format->gc_2nd_index_bits);
-
-        if (frame_erasure)
-            pitch_delay_3x   = 3 * ctx->pitch_delay_int_prev;
-        else if(!i) {
-            if (bad_pitch)
-                pitch_delay_3x   = 3 * ctx->pitch_delay_int_prev;
-            else
-                pitch_delay_3x = ff_acelp_decode_8bit_to_1st_delay3(ac_index);
-        } else {
-            int pitch_delay_min = av_clip(ctx->pitch_delay_int_prev - 5,
-                                          PITCH_DELAY_MIN, PITCH_DELAY_MAX - 9);
+        for (i = 0; i < buf_size; i++)
+            frame_erasure |= buf[i];
+        frame_erasure = !frame_erasure;
 
-            if(packet_type == FORMAT_G729D_6K4)
-                pitch_delay_3x = ff_acelp_decode_4bit_to_2nd_delay3(ac_index, pitch_delay_min);
-            else
-                pitch_delay_3x = ff_acelp_decode_5_6_bit_to_2nd_delay3(ac_index, pitch_delay_min);
-        }
+        init_get_bits(&gb, buf, 8*buf_size);
 
-        /* Round pitch delay to nearest (used everywhere except ff_acelp_interpolate). */
-        pitch_delay_int[i]  = (pitch_delay_3x + 1) / 3;
-        if (pitch_delay_int[i] > PITCH_DELAY_MAX) {
-            av_log(avctx, AV_LOG_WARNING, "pitch_delay_int %d is too large\n", pitch_delay_int[i]);
-            pitch_delay_int[i] = PITCH_DELAY_MAX;
-        }
+        ma_predictor     = get_bits(&gb, 1);
+        quantizer_1st    = get_bits(&gb, VQ_1ST_BITS);
+        quantizer_2nd_lo = get_bits(&gb, VQ_2ND_BITS);
+        quantizer_2nd_hi = get_bits(&gb, VQ_2ND_BITS);
 
         if (frame_erasure) {
-            ctx->rand_value = g729_prng(ctx->rand_value);
-            fc_indexes   = av_mod_uintp2(ctx->rand_value, format->fc_indexes_bits);
-
-            ctx->rand_value = g729_prng(ctx->rand_value);
-            pulses_signs = ctx->rand_value;
+            lsf_restore_from_previous(ctx->lsfq, ctx->past_quantizer_outputs,
+                                      ctx->ma_predictor_prev);
+        } else {
+            lsf_decode(ctx->lsfq, ctx->past_quantizer_outputs,
+                       ma_predictor,
+                       quantizer_1st, quantizer_2nd_lo, quantizer_2nd_hi);
+            ctx->ma_predictor_prev = ma_predictor;
         }
 
+        tmp = ctx->past_quantizer_outputs[MA_NP];
+        memmove(ctx->past_quantizer_outputs + 1, ctx->past_quantizer_outputs,
+                MA_NP * sizeof(int16_t*));
+        ctx->past_quantizer_outputs[0] = tmp;
+
+        ff_acelp_lsf2lsp(ctx->lsp[1], ctx->lsfq, 10);
+
+        ff_acelp_lp_decode(&lp[0][0], &lp[1][0], ctx->lsp[1], ctx->lsp[0], 10);
+
+        FFSWAP(int16_t*, ctx->lsp[1], ctx->lsp[0]);
+
+        for (i = 0; i < 2; i++) {
+            int gain_corr_factor;
+
+            uint8_t ac_index;      ///< adaptive codebook index
+            uint8_t pulses_signs;  ///< fixed-codebook vector pulse signs
+            int fc_indexes;        ///< fixed-codebook indexes
+            uint8_t gc_1st_index;  ///< gain codebook (first stage) index
+            uint8_t gc_2nd_index;  ///< gain codebook (second stage) index
+
+            ac_index      = get_bits(&gb, format->ac_index_bits[i]);
+            if (!i && format->parity_bit)
+                bad_pitch = av_parity(ac_index >> 2) == get_bits1(&gb);
+            fc_indexes    = get_bits(&gb, format->fc_indexes_bits);
+            pulses_signs  = get_bits(&gb, format->fc_signs_bits);
+            gc_1st_index  = get_bits(&gb, format->gc_1st_index_bits);
+            gc_2nd_index  = get_bits(&gb, format->gc_2nd_index_bits);
+
+            if (frame_erasure) {
+                pitch_delay_3x = 3 * ctx->pitch_delay_int_prev;
+            } else if (!i) {
+                if (bad_pitch) {
+                    pitch_delay_3x = 3 * ctx->pitch_delay_int_prev;
+                } else {
+                    pitch_delay_3x = ff_acelp_decode_8bit_to_1st_delay3(ac_index);
+                }
+            } else {
+                int pitch_delay_min = av_clip(ctx->pitch_delay_int_prev - 5,
+                                              PITCH_DELAY_MIN, PITCH_DELAY_MAX - 9);
+
+                if (packet_type == FORMAT_G729D_6K4) {
+                    pitch_delay_3x = ff_acelp_decode_4bit_to_2nd_delay3(ac_index, pitch_delay_min);
+                } else {
+                    pitch_delay_3x = ff_acelp_decode_5_6_bit_to_2nd_delay3(ac_index, pitch_delay_min);
+                }
+            }
 
-        memset(fc, 0, sizeof(int16_t) * SUBFRAME_SIZE);
-        switch (packet_type) {
-            case FORMAT_G729_8K:
-                ff_acelp_fc_pulse_per_track(fc, ff_fc_4pulses_8bits_tracks_13,
-                                            ff_fc_4pulses_8bits_track_4,
-                                            fc_indexes, pulses_signs, 3, 3);
-                break;
-            case FORMAT_G729D_6K4:
-                ff_acelp_fc_pulse_per_track(fc, ff_fc_2pulses_9bits_track1_gray,
-                                            ff_fc_2pulses_9bits_track2_gray,
-                                            fc_indexes, pulses_signs, 1, 4);
-                break;
-        }
-
-        /*
-          This filter enhances harmonic components of the fixed-codebook vector to
-          improve the quality of the reconstructed speech.
+            /* Round pitch delay to nearest (used everywhere except ff_acelp_interpolate). */
+            pitch_delay_int[i]  = (pitch_delay_3x + 1) / 3;
+            if (pitch_delay_int[i] > PITCH_DELAY_MAX) {
+                av_log(avctx, AV_LOG_WARNING, "pitch_delay_int %d is too large\n", pitch_delay_int[i]);
+                pitch_delay_int[i] = PITCH_DELAY_MAX;
+            }
 
-                     / fc_v[i],                                    i < pitch_delay
-          fc_v[i] = <
-                     \ fc_v[i] + gain_pitch * fc_v[i-pitch_delay], i >= pitch_delay
-        */
-        ff_acelp_weighted_vector_sum(fc + pitch_delay_int[i],
-                                     fc + pitch_delay_int[i],
-                                     fc, 1 << 14,
-                                     av_clip(ctx->past_gain_pitch[0], SHARP_MIN, SHARP_MAX),
-                                     0, 14,
-                                     SUBFRAME_SIZE - pitch_delay_int[i]);
+            if (frame_erasure) {
+                ctx->rand_value = g729_prng(ctx->rand_value);
+                fc_indexes   = av_mod_uintp2(ctx->rand_value, format->fc_indexes_bits);
 
-        memmove(ctx->past_gain_pitch+1, ctx->past_gain_pitch, 5 * sizeof(int16_t));
-        ctx->past_gain_code[1] = ctx->past_gain_code[0];
+                ctx->rand_value = g729_prng(ctx->rand_value);
+                pulses_signs = ctx->rand_value;
+            }
 
-        if (frame_erasure) {
-            ctx->past_gain_pitch[0] = (29491 * ctx->past_gain_pitch[0]) >> 15; // 0.90 (0.15)
-            ctx->past_gain_code[0]  = ( 2007 * ctx->past_gain_code[0] ) >> 11; // 0.98 (0.11)
 
-            gain_corr_factor = 0;
-        } else {
-            if (packet_type == FORMAT_G729D_6K4) {
-                ctx->past_gain_pitch[0]  = cb_gain_1st_6k4[gc_1st_index][0] +
-                                           cb_gain_2nd_6k4[gc_2nd_index][0];
-                gain_corr_factor = cb_gain_1st_6k4[gc_1st_index][1] +
-                                   cb_gain_2nd_6k4[gc_2nd_index][1];
-
-                /* Without check below overflow can occur in ff_acelp_update_past_gain.
-                   It is not issue for G.729, because gain_corr_factor in it's case is always
-                   greater than 1024, while in G.729D it can be even zero. */
-                gain_corr_factor = FFMAX(gain_corr_factor, 1024);
-#ifndef G729_BITEXACT
-                gain_corr_factor >>= 1;
-#endif
-            } else {
-                ctx->past_gain_pitch[0]  = cb_gain_1st_8k[gc_1st_index][0] +
-                                           cb_gain_2nd_8k[gc_2nd_index][0];
-                gain_corr_factor = cb_gain_1st_8k[gc_1st_index][1] +
-                                   cb_gain_2nd_8k[gc_2nd_index][1];
+            memset(fc, 0, sizeof(int16_t) * SUBFRAME_SIZE);
+            switch (packet_type) {
+                case FORMAT_G729_8K:
+                    ff_acelp_fc_pulse_per_track(fc, ff_fc_4pulses_8bits_tracks_13,
+                                                ff_fc_4pulses_8bits_track_4,
+                                                fc_indexes, pulses_signs, 3, 3);
+                    break;
+                case FORMAT_G729D_6K4:
+                    ff_acelp_fc_pulse_per_track(fc, ff_fc_2pulses_9bits_track1_gray,
+                                                ff_fc_2pulses_9bits_track2_gray,
+                                                fc_indexes, pulses_signs, 1, 4);
+                    break;
             }
 
-            /* Decode the fixed-codebook gain. */
-            ctx->past_gain_code[0] = ff_acelp_decode_gain_code(&ctx->adsp, gain_corr_factor,
-                                                               fc, MR_ENERGY,
-                                                               ctx->quant_energy,
-                                                               ma_prediction_coeff,
-                                                               SUBFRAME_SIZE, 4);
-#ifdef G729_BITEXACT
             /*
-              This correction required to get bit-exact result with
-              reference code, because gain_corr_factor in G.729D is
-              two times larger than in original G.729.
+              This filter enhances harmonic components of the fixed-codebook vector to
+              improve the quality of the reconstructed speech.
 
-              If bit-exact result is not issue then gain_corr_factor
-              can be simpler divided by 2 before call to g729_get_gain_code
-              instead of using correction below.
+                         / fc_v[i],                                    i < pitch_delay
+              fc_v[i] = <
+                         \ fc_v[i] + gain_pitch * fc_v[i-pitch_delay], i >= pitch_delay
             */
+            ff_acelp_weighted_vector_sum(fc + pitch_delay_int[i],
+                                         fc + pitch_delay_int[i],
+                                         fc, 1 << 14,
+                                         av_clip(ctx->past_gain_pitch[0], SHARP_MIN, SHARP_MAX),
+                                         0, 14,
+                                         SUBFRAME_SIZE - pitch_delay_int[i]);
+
+            memmove(ctx->past_gain_pitch+1, ctx->past_gain_pitch, 5 * sizeof(int16_t));
+            ctx->past_gain_code[1] = ctx->past_gain_code[0];
+
+            if (frame_erasure) {
+                ctx->past_gain_pitch[0] = (29491 * ctx->past_gain_pitch[0]) >> 15; // 0.90 (0.15)
+                ctx->past_gain_code[0]  = ( 2007 * ctx->past_gain_code[0] ) >> 11; // 0.98 (0.11)
+
+                gain_corr_factor = 0;
+            } else {
+                if (packet_type == FORMAT_G729D_6K4) {
+                    ctx->past_gain_pitch[0]  = cb_gain_1st_6k4[gc_1st_index][0] +
+                                               cb_gain_2nd_6k4[gc_2nd_index][0];
+                    gain_corr_factor = cb_gain_1st_6k4[gc_1st_index][1] +
+                                       cb_gain_2nd_6k4[gc_2nd_index][1];
+
+                    /* Without check below overflow can occur in ff_acelp_update_past_gain.
+                       It is not issue for G.729, because gain_corr_factor in it's case is always
+                       greater than 1024, while in G.729D it can be even zero. */
+                    gain_corr_factor = FFMAX(gain_corr_factor, 1024);
+    #ifndef G729_BITEXACT
+                    gain_corr_factor >>= 1;
+    #endif
+                } else {
+                    ctx->past_gain_pitch[0]  = cb_gain_1st_8k[gc_1st_index][0] +
+                                               cb_gain_2nd_8k[gc_2nd_index][0];
+                    gain_corr_factor = cb_gain_1st_8k[gc_1st_index][1] +
+                                       cb_gain_2nd_8k[gc_2nd_index][1];
+                }
+
+                /* Decode the fixed-codebook gain. */
+                ctx->past_gain_code[0] = ff_acelp_decode_gain_code(&s->adsp, gain_corr_factor,
+                                                                   fc, MR_ENERGY,
+                                                                   ctx->quant_energy,
+                                                                   ma_prediction_coeff,
+                                                                   SUBFRAME_SIZE, 4);
+    #ifdef G729_BITEXACT
+                /*
+                  This correction required to get bit-exact result with
+                  reference code, because gain_corr_factor in G.729D is
+                  two times larger than in original G.729.
+
+                  If bit-exact result is not issue then gain_corr_factor
+                  can be simpler divided by 2 before call to g729_get_gain_code
+                  instead of using correction below.
+                */
+                if (packet_type == FORMAT_G729D_6K4) {
+                    gain_corr_factor >>= 1;
+                    ctx->past_gain_code[0] >>= 1;
+                }
+    #endif
+            }
+            ff_acelp_update_past_gain(ctx->quant_energy, gain_corr_factor, 2, frame_erasure);
+
+            /* Routine requires rounding to lowest. */
+            ff_acelp_interpolate(ctx->exc + i * SUBFRAME_SIZE,
+                                 ctx->exc + i * SUBFRAME_SIZE - pitch_delay_3x / 3,
+                                 ff_acelp_interp_filter, 6,
+                                 (pitch_delay_3x % 3) << 1,
+                                 10, SUBFRAME_SIZE);
+
+            ff_acelp_weighted_vector_sum(ctx->exc + i * SUBFRAME_SIZE,
+                                         ctx->exc + i * SUBFRAME_SIZE, fc,
+                                         (!ctx->was_periodic && frame_erasure) ? 0 : ctx->past_gain_pitch[0],
+                                         ( ctx->was_periodic && frame_erasure) ? 0 : ctx->past_gain_code[0],
+                                         1 << 13, 14, SUBFRAME_SIZE);
+
+            memcpy(synth, ctx->syn_filter_data, 10 * sizeof(int16_t));
+
+            if (ff_celp_lp_synthesis_filter(
+                synth+10,
+                &lp[i][1],
+                ctx->exc  + i * SUBFRAME_SIZE,
+                SUBFRAME_SIZE,
+                10,
+                1,
+                0,
+                0x800))
+                /* Overflow occurred, downscale excitation signal... */
+                for (j = 0; j < 2 * SUBFRAME_SIZE + PITCH_DELAY_MAX + INTERPOL_LEN; j++)
+                    ctx->exc_base[j] >>= 2;
+
+            /* ... and make synthesis again. */
             if (packet_type == FORMAT_G729D_6K4) {
-                gain_corr_factor >>= 1;
-                ctx->past_gain_code[0] >>= 1;
+                int16_t exc_new[SUBFRAME_SIZE];
+
+                ctx->onset = g729d_onset_decision(ctx->onset, ctx->past_gain_code);
+                ctx->voice_decision = g729d_voice_decision(ctx->onset, ctx->voice_decision, ctx->past_gain_pitch);
+
+                g729d_get_new_exc(exc_new, ctx->exc  + i * SUBFRAME_SIZE, fc, ctx->voice_decision, ctx->past_gain_code[0], SUBFRAME_SIZE);
+
+                ff_celp_lp_synthesis_filter(
+                        synth+10,
+                        &lp[i][1],
+                        exc_new,
+                        SUBFRAME_SIZE,
+                        10,
+                        0,
+                        0,
+                        0x800);
+            } else {
+                ff_celp_lp_synthesis_filter(
+                        synth+10,
+                        &lp[i][1],
+                        ctx->exc  + i * SUBFRAME_SIZE,
+                        SUBFRAME_SIZE,
+                        10,
+                        0,
+                        0,
+                        0x800);
             }
-#endif
-        }
-        ff_acelp_update_past_gain(ctx->quant_energy, gain_corr_factor, 2, frame_erasure);
-
-        /* Routine requires rounding to lowest. */
-        ff_acelp_interpolate(ctx->exc + i * SUBFRAME_SIZE,
-                             ctx->exc + i * SUBFRAME_SIZE - pitch_delay_3x / 3,
-                             ff_acelp_interp_filter, 6,
-                             (pitch_delay_3x % 3) << 1,
-                             10, SUBFRAME_SIZE);
-
-        ff_acelp_weighted_vector_sum(ctx->exc + i * SUBFRAME_SIZE,
-                                     ctx->exc + i * SUBFRAME_SIZE, fc,
-                                     (!ctx->was_periodic && frame_erasure) ? 0 : ctx->past_gain_pitch[0],
-                                     ( ctx->was_periodic && frame_erasure) ? 0 : ctx->past_gain_code[0],
-                                     1 << 13, 14, SUBFRAME_SIZE);
-
-        memcpy(synth, ctx->syn_filter_data, 10 * sizeof(int16_t));
-
-        if (ff_celp_lp_synthesis_filter(
-            synth+10,
-            &lp[i][1],
-            ctx->exc  + i * SUBFRAME_SIZE,
-            SUBFRAME_SIZE,
-            10,
-            1,
-            0,
-            0x800))
-            /* Overflow occurred, downscale excitation signal... */
-            for (j = 0; j < 2 * SUBFRAME_SIZE + PITCH_DELAY_MAX + INTERPOL_LEN; j++)
-                ctx->exc_base[j] >>= 2;
-
-        /* ... and make synthesis again. */
-        if (packet_type == FORMAT_G729D_6K4) {
-            int16_t exc_new[SUBFRAME_SIZE];
-
-            ctx->onset = g729d_onset_decision(ctx->onset, ctx->past_gain_code);
-            ctx->voice_decision = g729d_voice_decision(ctx->onset, ctx->voice_decision, ctx->past_gain_pitch);
-
-            g729d_get_new_exc(exc_new, ctx->exc  + i * SUBFRAME_SIZE, fc, ctx->voice_decision, ctx->past_gain_code[0], SUBFRAME_SIZE);
-
-            ff_celp_lp_synthesis_filter(
+            /* Save data (without postfilter) for use in next subframe. */
+            memcpy(ctx->syn_filter_data, synth+SUBFRAME_SIZE, 10 * sizeof(int16_t));
+
+            /* Calculate gain of unfiltered signal for use in AGC. */
+            gain_before = 0;
+            for (j = 0; j < SUBFRAME_SIZE; j++)
+                gain_before += FFABS(synth[j+10]);
+
+            /* Call postfilter and also update voicing decision for use in next frame. */
+            ff_g729_postfilter(
+                    &s->adsp,
+                    &ctx->ht_prev_data,
+                    &is_periodic,
+                    &lp[i][0],
+                    pitch_delay_int[0],
+                    ctx->residual,
+                    ctx->res_filter_data,
+                    ctx->pos_filter_data,
                     synth+10,
-                    &lp[i][1],
-                    exc_new,
-                    SUBFRAME_SIZE,
-                    10,
-                    0,
-                    0,
-                    0x800);
-        } else {
-            ff_celp_lp_synthesis_filter(
+                    SUBFRAME_SIZE);
+
+            /* Calculate gain of filtered signal for use in AGC. */
+            gain_after = 0;
+            for (j = 0; j < SUBFRAME_SIZE; j++)
+                gain_after += FFABS(synth[j+10]);
+
+            ctx->gain_coeff = ff_g729_adaptive_gain_control(
+                    gain_before,
+                    gain_after,
                     synth+10,
-                    &lp[i][1],
-                    ctx->exc  + i * SUBFRAME_SIZE,
                     SUBFRAME_SIZE,
-                    10,
-                    0,
-                    0,
-                    0x800);
-        }
-        /* Save data (without postfilter) for use in next subframe. */
-        memcpy(ctx->syn_filter_data, synth+SUBFRAME_SIZE, 10 * sizeof(int16_t));
-
-        /* Calculate gain of unfiltered signal for use in AGC. */
-        gain_before = 0;
-        for (j = 0; j < SUBFRAME_SIZE; j++)
-            gain_before += FFABS(synth[j+10]);
-
-        /* Call postfilter and also update voicing decision for use in next frame. */
-        ff_g729_postfilter(
-                &ctx->adsp,
-                &ctx->ht_prev_data,
-                &is_periodic,
-                &lp[i][0],
-                pitch_delay_int[0],
-                ctx->residual,
-                ctx->res_filter_data,
-                ctx->pos_filter_data,
-                synth+10,
-                SUBFRAME_SIZE);
+                    ctx->gain_coeff);
 
-        /* Calculate gain of filtered signal for use in AGC. */
-        gain_after = 0;
-        for(j=0; j<SUBFRAME_SIZE; j++)
-            gain_after += FFABS(synth[j+10]);
+            if (frame_erasure) {
+                ctx->pitch_delay_int_prev = FFMIN(ctx->pitch_delay_int_prev + 1, PITCH_DELAY_MAX);
+            } else {
+                ctx->pitch_delay_int_prev = pitch_delay_int[i];
+            }
 
-        ctx->gain_coeff = ff_g729_adaptive_gain_control(
-                gain_before,
-                gain_after,
-                synth+10,
-                SUBFRAME_SIZE,
-                ctx->gain_coeff);
+            memcpy(synth+8, ctx->hpf_z, 2*sizeof(int16_t));
+            ff_acelp_high_pass_filter(
+                    out_frame + i*SUBFRAME_SIZE,
+                    ctx->hpf_f,
+                    synth+10,
+                    SUBFRAME_SIZE);
+            memcpy(ctx->hpf_z, synth+8+SUBFRAME_SIZE, 2*sizeof(int16_t));
+        }
 
-        if (frame_erasure)
-            ctx->pitch_delay_int_prev = FFMIN(ctx->pitch_delay_int_prev + 1, PITCH_DELAY_MAX);
-        else
-            ctx->pitch_delay_int_prev = pitch_delay_int[i];
+        ctx->was_periodic = is_periodic;
 
-        memcpy(synth+8, ctx->hpf_z, 2*sizeof(int16_t));
-        ff_acelp_high_pass_filter(
-                out_frame + i*SUBFRAME_SIZE,
-                ctx->hpf_f,
-                synth+10,
-                SUBFRAME_SIZE);
-        memcpy(ctx->hpf_z, synth+8+SUBFRAME_SIZE, 2*sizeof(int16_t));
+        /* Save signal for use in next frame. */
+        memmove(ctx->exc_base, ctx->exc_base + 2 * SUBFRAME_SIZE, (PITCH_DELAY_MAX+INTERPOL_LEN)*sizeof(int16_t));
+
+        buf += packet_type == FORMAT_G729_8K ? G729_8K_BLOCK_SIZE : G729D_6K4_BLOCK_SIZE;
+        ctx++;
     }
 
-    ctx->was_periodic = is_periodic;
+    *got_frame_ptr = 1;
+    return packet_type == FORMAT_G729_8K ? G729_8K_BLOCK_SIZE * avctx->channels : G729D_6K4_BLOCK_SIZE * avctx->channels;
+}
 
-    /* Save signal for use in next frame. */
-    memmove(ctx->exc_base, ctx->exc_base + 2 * SUBFRAME_SIZE, (PITCH_DELAY_MAX+INTERPOL_LEN)*sizeof(int16_t));
+static av_cold int decode_close(AVCodecContext *avctx)
+{
+    G729Context *s = avctx->priv_data;
+    av_freep(&s->channel_context);
 
-    *got_frame_ptr = 1;
-    return packet_type == FORMAT_G729_8K ? 10 : 8;
+    return 0;
 }
 
 AVCodec ff_g729_decoder = {
@@ -714,5 +746,6 @@ AVCodec ff_g729_decoder = {
     .priv_data_size = sizeof(G729Context),
     .init           = decoder_init,
     .decode         = decode_frame,
+    .close          = decode_close,
     .capabilities   = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/gdv.c b/libavcodec/gdv.c
index 538bc38e3eadf..183286b55f178 100644
--- a/libavcodec/gdv.c
+++ b/libavcodec/gdv.c
@@ -72,9 +72,64 @@ static av_cold int gdv_decode_init(AVCodecContext *avctx)
     return 0;
 }
 
+static void scaleup(uint8_t *dst, const uint8_t *src, int w)
+{
+    int x;
+    for (x = 0; x < w - 7; x+=8) {
+        dst[x + 0] =
+        dst[x + 1] = src[(x>>1) + 0];
+        dst[x + 2] =
+        dst[x + 3] = src[(x>>1) + 1];
+        dst[x + 4] =
+        dst[x + 5] = src[(x>>1) + 2];
+        dst[x + 6] =
+        dst[x + 7] = src[(x>>1) + 3];
+    }
+    for (; x < w; x++) {
+        dst[x] = src[(x>>1)];
+    }
+}
+
+static void scaleup_rev(uint8_t *dst, const uint8_t *src, int w)
+{
+    int x;
+
+    for (x = w - 1; (x+1) & 7; x--) {
+        dst[x] = src[(x>>1)];
+    }
+    for (x -= 7; x >= 0; x -= 8) {
+        dst[x + 6] =
+        dst[x + 7] = src[(x>>1) + 3];
+        dst[x + 4] =
+        dst[x + 5] = src[(x>>1) + 2];
+        dst[x + 2] =
+        dst[x + 3] = src[(x>>1) + 1];
+        dst[x + 0] =
+        dst[x + 1] = src[(x>>1) + 0];
+    }
+}
+
+static void scaledown(uint8_t *dst, const uint8_t *src, int w)
+{
+    int x;
+    for (x = 0; x < w - 7; x+=8) {
+        dst[x + 0] = src[2*x + 0];
+        dst[x + 1] = src[2*x + 2];
+        dst[x + 2] = src[2*x + 4];
+        dst[x + 3] = src[2*x + 6];
+        dst[x + 4] = src[2*x + 8];
+        dst[x + 5] = src[2*x +10];
+        dst[x + 6] = src[2*x +12];
+        dst[x + 7] = src[2*x +14];
+    }
+    for (; x < w; x++) {
+        dst[x] = src[2*x];
+    }
+}
+
 static void rescale(GDVContext *gdv, uint8_t *dst, int w, int h, int scale_v, int scale_h)
 {
-    int j, y, x;
+    int j, y;
 
     if ((gdv->scale_v == scale_v) && (gdv->scale_h == scale_h)) {
         return;
@@ -86,14 +141,7 @@ static void rescale(GDVContext *gdv, uint8_t *dst, int w, int h, int scale_v, in
             uint8_t *dst1 = dst + PREAMBLE_SIZE + y * w;
             uint8_t *src1 = dst + PREAMBLE_SIZE + (y>>!!gdv->scale_h) * (w>>1);
 
-            for (x = w - 1; x >= 0 && !(x&1); x--) {
-                dst1[x] = src1[(x>>1)];
-            }
-
-            for (x--; x >= 0; x-=2) {
-                dst1[x  ] =
-                dst1[x+1] = src1[(x>>1)];
-            }
+            scaleup_rev(dst1, src1, w);
         }
     } else if (gdv->scale_h) {
         for (j = 0; j < h; j++) {
@@ -108,9 +156,7 @@ static void rescale(GDVContext *gdv, uint8_t *dst, int w, int h, int scale_v, in
         for (y = 0; y < (h>>1); y++) {
             uint8_t *dst1 = dst + PREAMBLE_SIZE + y * (w>>1);
             uint8_t *src1 = dst + PREAMBLE_SIZE + y*2 * w;
-            for (x = 0; x < (w>>1); x++) {
-                dst1[x] = src1[x*2];
-            }
+            scaledown(dst1, src1, w>>1);
         }
     } else if (scale_h) {
         for (y = 0; y < (h>>1); y++) {
@@ -121,9 +167,7 @@ static void rescale(GDVContext *gdv, uint8_t *dst, int w, int h, int scale_v, in
     } else if (scale_v) {
         for (y = 0; y < h; y++) {
             uint8_t *dst1 = dst + PREAMBLE_SIZE + y * w;
-            for (x = 0; x < (w>>1); x++) {
-                dst1[x] = dst1[x*2];
-            }
+            scaledown(dst1, dst1, w>>1);
         }
     }
 
@@ -250,6 +294,8 @@ static int decompress_5(AVCodecContext *avctx, unsigned skip)
 
     while (bytestream2_get_bytes_left_p(pb) > 0 && bytestream2_get_bytes_left(gb) > 0) {
         int tag = read_bits2(&bits, gb);
+        if (bytestream2_get_bytes_left(gb) < 1)
+            return AVERROR_INVALIDDATA;
         if (tag == 0) {
             bytestream2_put_byte(pb, bytestream2_get_byte(gb));
         } else if (tag == 1) {
@@ -470,30 +516,25 @@ static int gdv_decode_frame(AVCodecContext *avctx, void *data,
 
     if (!gdv->scale_v && !gdv->scale_h) {
         int sidx = PREAMBLE_SIZE, didx = 0;
-        int y, x;
+        int y;
 
         for (y = 0; y < avctx->height; y++) {
-            for (x = 0; x < avctx->width; x++) {
-                dst[x+didx] = gdv->frame[x+sidx];
-            }
+            memcpy(dst + didx, gdv->frame + sidx, avctx->width);
             sidx += avctx->width;
             didx += frame->linesize[0];
         }
     } else {
         int sidx = PREAMBLE_SIZE, didx = 0;
-        int y, x;
+        int y;
 
         for (y = 0; y < avctx->height; y++) {
             if (!gdv->scale_v) {
                 memcpy(dst + didx, gdv->frame + sidx, avctx->width);
             } else {
-                for (x = 0; x < avctx->width - 1; x+=2) {
-                    dst[didx + x    ] =
-                    dst[didx + x + 1] = gdv->frame[sidx + (x>>1)];
-                }
-                for (; x < avctx->width; x++) {
-                    dst[didx + x] = gdv->frame[sidx + (x>>1)];
-                }
+                uint8_t *dst2 = dst + didx;
+                uint8_t *src2 = gdv->frame + sidx;
+
+                scaleup(dst2, src2, avctx->width);
             }
             if (!gdv->scale_h || ((y & 1) == 1)) {
                 sidx += !gdv->scale_v ? avctx->width : avctx->width/2;
diff --git a/libavcodec/get_bits.h b/libavcodec/get_bits.h
index 26a5b3e54ff85..c2f267186e3e8 100644
--- a/libavcodec/get_bits.h
+++ b/libavcodec/get_bits.h
@@ -380,7 +380,7 @@ static inline int get_sbits(GetBitContext *s, int n)
  */
 static inline unsigned int get_bits(GetBitContext *s, int n)
 {
-    register int tmp;
+    register unsigned int tmp;
 #if CACHED_BITSTREAM_READER
 
     av_assert2(n>0 && n<=32);
@@ -403,6 +403,7 @@ static inline unsigned int get_bits(GetBitContext *s, int n)
     LAST_SKIP_BITS(re, s, n);
     CLOSE_READER(re, s);
 #endif
+    av_assert2(tmp < UINT64_C(1) << n);
     return tmp;
 }
 
@@ -442,7 +443,7 @@ static inline unsigned int get_bits_le(GetBitContext *s, int n)
  */
 static inline unsigned int show_bits(GetBitContext *s, int n)
 {
-    register int tmp;
+    register unsigned int tmp;
 #if CACHED_BITSTREAM_READER
     if (n > s->bits_left)
         refill_32(s);
diff --git a/libavcodec/gif.c b/libavcodec/gif.c
index 8ba01c18585a3..94c8b1af499f2 100644
--- a/libavcodec/gif.c
+++ b/libavcodec/gif.c
@@ -2,6 +2,8 @@
  * Copyright (c) 2000 Fabrice Bellard
  * Copyright (c) 2002 Francois Revol
  * Copyright (c) 2006 Baptiste Coudurier
+ * Copyright (c) 2018 Bjorn Roche
+ * Copyright (c) 2018 Paul B Mahol
  *
  * first version by Francois Revol <revol@free.fr>
  *
@@ -39,6 +41,8 @@
 
 #include "put_bits.h"
 
+#define DEFAULT_TRANSPARENCY_INDEX 0x1f
+
 typedef struct GIFContext {
     const AVClass *class;
     LZWState *lzw;
@@ -46,10 +50,10 @@ typedef struct GIFContext {
     int buf_size;
     AVFrame *last_frame;
     int flags;
+    int image;
     uint32_t palette[AVPALETTE_COUNT];  ///< local reference palette for !pal8
     int palette_loaded;
     int transparent_index;
-    uint8_t *pal_exdata;
     uint8_t *tmpl;                      ///< temporary line buffer
 } GIFContext;
 
@@ -58,6 +62,45 @@ enum {
     GF_TRANSDIFF  = 1<<1,
 };
 
+static int is_image_translucent(AVCodecContext *avctx,
+                                const uint8_t *buf, const int linesize)
+{
+    GIFContext *s = avctx->priv_data;
+    int trans = s->transparent_index;
+
+    if (trans < 0)
+        return 0;
+
+    for (int y = 0; y < avctx->height; y++) {
+        for (int x = 0; x < avctx->width; x++) {
+            if (buf[x] == trans) {
+                return 1;
+            }
+        }
+        buf += linesize;
+    }
+
+    return 0;
+}
+
+static int get_palette_transparency_index(const uint32_t *palette)
+{
+    int transparent_color_index = -1;
+    unsigned i, smallest_alpha = 0xff;
+
+    if (!palette)
+        return -1;
+
+    for (i = 0; i < AVPALETTE_COUNT; i++) {
+        const uint32_t v = palette[i];
+        if (v >> 24 < smallest_alpha) {
+            smallest_alpha = v >> 24;
+            transparent_color_index = i;
+        }
+    }
+    return smallest_alpha < 128 ? transparent_color_index : -1;
+}
+
 static int pick_palette_entry(const uint8_t *buf, int linesize, int w, int h)
 {
     int histogram[AVPALETTE_COUNT] = {0};
@@ -74,43 +117,91 @@ static int pick_palette_entry(const uint8_t *buf, int linesize, int w, int h)
     return -1;
 }
 
-// returns true if any of the pixels are transparent
-static int is_image_translucent(AVCodecContext *avctx,
-                                const uint32_t *palette,
-                                const uint8_t *buf, const int linesize)
+static void gif_crop_translucent(AVCodecContext *avctx,
+                                 const uint8_t *buf, const int linesize,
+                                 int *width, int *height,
+                                 int *x_start, int *y_start)
 {
     GIFContext *s = avctx->priv_data;
     int trans = s->transparent_index;
-    int p;
-    const int m = avctx->width * avctx->height ;
 
-    if (trans < 0) {
-        return 0;
-    }
+    /* Crop image */
+    if ((s->flags & GF_OFFSETTING) && trans >= 0) {
+        const int w = avctx->width;
+        const int h = avctx->height;
+        int x_end = w - 1,
+            y_end = h - 1;
+
+        // crop top
+        while (*y_start < y_end) {
+            int is_trans = 1;
+            for (int i = 0; i < w; i++) {
+                if (buf[w * *y_start + i] != trans) {
+                    is_trans = 0;
+                    break;
+                }
+            }
+
+            if (!is_trans)
+                break;
+            (*y_start)++;
+        }
+
+        // crop bottom
+        while (y_end < h) {
+            int is_trans = 1;
+            for (int i = 0; i < w; i++) {
+                if (buf[w * y_end + i] != trans) {
+                    is_trans = 0;
+                    break;
+                }
+            }
+            if (!is_trans)
+                break;
+            y_end--;
+        }
 
-    for (p=0; p<m; ++p) {
-        if (buf[p] == trans) {
-            return 1;
+        // crop left
+        while (*x_start < x_end) {
+            int is_trans = 1;
+            for (int i = *y_start; i < y_end; i++) {
+                if (buf[w * i + *x_start] != trans) {
+                    is_trans = 0;
+                    break;
+                }
+            }
+            if (!is_trans)
+                break;
+            (*x_start)++;
         }
+
+        // crop right
+        while (x_end < w) {
+            int is_trans = 1;
+            for (int i = *y_start; i < y_end; i++) {
+                if (buf[w * i + x_end] != trans) {
+                    is_trans = 0;
+                    break;
+                }
+            }
+            if (!is_trans)
+                break;
+            x_end--;
+        }
+
+        *height = y_end + 1 - *y_start;
+        *width  = x_end + 1 - *x_start;
+        av_log(avctx, AV_LOG_DEBUG,"%dx%d image at pos (%d;%d) [area:%dx%d]\n",
+               *width, *height, *x_start, *y_start, avctx->width, avctx->height);
     }
-    return 0;
 }
 
-// writes an opaque image. ie an image with no transparency.
-// it also works, for a first image even with transpareny,
-// in which case is_translucent should be set.
-static int gif_image_write_opaque(AVCodecContext *avctx,
-                                  uint8_t **bytestream, uint8_t *end,
-                                  const uint32_t *palette,
-                                  const uint8_t *buf, const int linesize,
-                                  AVPacket *pkt,
-                                  const int is_translucent )
+static void gif_crop_opaque(AVCodecContext *avctx,
+                            const uint32_t *palette,
+                            const uint8_t *buf, const int linesize,
+                            int *width, int *height, int *x_start, int *y_start)
 {
     GIFContext *s = avctx->priv_data;
-    int len = 0, height = avctx->height, width = avctx->width, x, y;
-    int x_start = 0, y_start = 0, trans = s->transparent_index;
-    int honor_transparency = (s->flags & GF_TRANSDIFF) && s->last_frame && !palette;
-    const uint8_t *ptr;
 
     /* Crop image */
     if ((s->flags & GF_OFFSETTING) && s->last_frame && !palette) {
@@ -120,34 +211,34 @@ static int gif_image_write_opaque(AVCodecContext *avctx,
             y_end = avctx->height - 1;
 
         /* skip common lines */
-        while (y_start < y_end) {
-            if (memcmp(ref + y_start*ref_linesize, buf + y_start*linesize, width))
+        while (*y_start < y_end) {
+            if (memcmp(ref + *y_start*ref_linesize, buf + *y_start*linesize, *width))
                 break;
-            y_start++;
+            (*y_start)++;
         }
-        while (y_end > y_start) {
-            if (memcmp(ref + y_end*ref_linesize, buf + y_end*linesize, width))
+        while (y_end > *y_start) {
+            if (memcmp(ref + y_end*ref_linesize, buf + y_end*linesize, *width))
                 break;
             y_end--;
         }
-        height = y_end + 1 - y_start;
+        *height = y_end + 1 - *y_start;
 
         /* skip common columns */
-        while (x_start < x_end) {
+        while (*x_start < x_end) {
             int same_column = 1;
-            for (y = y_start; y <= y_end; y++) {
-                if (ref[y*ref_linesize + x_start] != buf[y*linesize + x_start]) {
+            for (int y = *y_start; y <= y_end; y++) {
+                if (ref[y*ref_linesize + *x_start] != buf[y*linesize + *x_start]) {
                     same_column = 0;
                     break;
                 }
             }
             if (!same_column)
                 break;
-            x_start++;
+            (*x_start)++;
         }
-        while (x_end > x_start) {
+        while (x_end > *x_start) {
             int same_column = 1;
-            for (y = y_start; y <= y_end; y++) {
+            for (int y = *y_start; y <= y_end; y++) {
                 if (ref[y*ref_linesize + x_end] != buf[y*linesize + x_end]) {
                     same_column = 0;
                     break;
@@ -157,40 +248,80 @@ static int gif_image_write_opaque(AVCodecContext *avctx,
                 break;
             x_end--;
         }
-        width = x_end + 1 - x_start;
+        *width = x_end + 1 - *x_start;
 
         av_log(avctx, AV_LOG_DEBUG,"%dx%d image at pos (%d;%d) [area:%dx%d]\n",
-               width, height, x_start, y_start, avctx->width, avctx->height);
+               *width, *height, *x_start, *y_start, avctx->width, avctx->height);
     }
+}
 
-    if (trans < 0) {
-       honor_transparency = 0;
+static int gif_image_write_image(AVCodecContext *avctx,
+                                 uint8_t **bytestream, uint8_t *end,
+                                 const uint32_t *palette,
+                                 const uint8_t *buf, const int linesize,
+                                 AVPacket *pkt)
+{
+    GIFContext *s = avctx->priv_data;
+    int disposal, len = 0, height = avctx->height, width = avctx->width, x, y;
+    int x_start = 0, y_start = 0, trans = s->transparent_index;
+    int bcid = -1, honor_transparency = (s->flags & GF_TRANSDIFF) && s->last_frame && !palette;
+    const uint8_t *ptr;
+
+    if (!s->image && avctx->frame_number && is_image_translucent(avctx, buf, linesize)) {
+        gif_crop_translucent(avctx, buf, linesize, &width, &height, &x_start, &y_start);
+        honor_transparency = 0;
+        disposal = GCE_DISPOSAL_BACKGROUND;
+    } else {
+        gif_crop_opaque(avctx, palette, buf, linesize, &width, &height, &x_start, &y_start);
+        disposal = GCE_DISPOSAL_INPLACE;
+    }
+
+    if (s->image || !avctx->frame_number) { /* GIF header */
+        const uint32_t *global_palette = palette ? palette : s->palette;
+        const AVRational sar = avctx->sample_aspect_ratio;
+        int64_t aspect = 0;
+
+        if (sar.num > 0 && sar.den > 0) {
+            aspect = sar.num * 64LL / sar.den - 15;
+            if (aspect < 0 || aspect > 255)
+                aspect = 0;
+        }
+
+        bytestream_put_buffer(bytestream, gif89a_sig, sizeof(gif89a_sig));
+        bytestream_put_le16(bytestream, avctx->width);
+        bytestream_put_le16(bytestream, avctx->height);
+
+        bcid = get_palette_transparency_index(global_palette);
+
+        bytestream_put_byte(bytestream, 0xf7); /* flags: global clut, 256 entries */
+        bytestream_put_byte(bytestream, bcid < 0 ? DEFAULT_TRANSPARENCY_INDEX : bcid); /* background color index */
+        bytestream_put_byte(bytestream, aspect);
+        for (int i = 0; i < 256; i++) {
+            const uint32_t v = global_palette[i] & 0xffffff;
+            bytestream_put_be24(bytestream, v);
+        }
     }
+
     if (honor_transparency && trans < 0) {
         trans = pick_palette_entry(buf + y_start*linesize + x_start,
                                    linesize, width, height);
-        if (trans < 0) { // TODO, patch welcome
+        if (trans < 0) // TODO, patch welcome
             av_log(avctx, AV_LOG_DEBUG, "No available color, can not use transparency\n");
-        } else {
-            uint8_t *pal_exdata = s->pal_exdata;
-            if (!pal_exdata)
-                pal_exdata = av_packet_new_side_data(pkt, AV_PKT_DATA_PALETTE, AVPALETTE_SIZE);
-            if (!pal_exdata)
-                return AVERROR(ENOMEM);
-            memcpy(pal_exdata, s->palette, AVPALETTE_SIZE);
-            pal_exdata[trans*4 + 3*!HAVE_BIGENDIAN] = 0x00;
-        }
     }
 
-    uint8_t *frame_disposal = av_packet_new_side_data(pkt, AV_PKT_DATA_GIF_FRAME_DISPOSAL, 1);
-    if (!frame_disposal)
-        return AVERROR(ENOMEM);
-    *frame_disposal = GCE_DISPOSAL_NONE ;
-    if( honor_transparency )
-        *frame_disposal = GCE_DISPOSAL_INPLACE;
-    if( is_translucent )
-        *frame_disposal = GCE_DISPOSAL_BACKGROUND;
+    if (trans < 0)
+        honor_transparency = 0;
 
+    bcid = honor_transparency || disposal == GCE_DISPOSAL_BACKGROUND ? trans : get_palette_transparency_index(palette);
+
+    /* graphic control extension */
+    bytestream_put_byte(bytestream, GIF_EXTENSION_INTRODUCER);
+    bytestream_put_byte(bytestream, GIF_GCE_EXT_LABEL);
+    bytestream_put_byte(bytestream, 0x04); /* block size */
+    bytestream_put_byte(bytestream, disposal<<2 | (bcid >= 0));
+    bytestream_put_le16(bytestream, 5); // default delay
+    bytestream_put_byte(bytestream, bcid < 0 ? DEFAULT_TRANSPARENCY_INDEX : bcid);
+    bytestream_put_byte(bytestream, 0x00);
 
     /* image block */
     bytestream_put_byte(bytestream, GIF_IMAGE_SEPARATOR);
@@ -216,7 +347,6 @@ static int gif_image_write_opaque(AVCodecContext *avctx,
                        12, FF_LZW_GIF, put_bits);
 
     ptr = buf + y_start*linesize + x_start;
-
     if (honor_transparency) {
         const int ref_linesize = s->last_frame->linesize[0];
         const uint8_t *ref = s->last_frame->data[0] + y_start*ref_linesize + x_start;
@@ -252,155 +382,6 @@ static int gif_image_write_opaque(AVCodecContext *avctx,
     return 0;
 }
 
-// wrtites an image that may contain transparency
-// this should work for opaque images as well, but will be less optimized.
-static int gif_image_write_translucent(AVCodecContext *avctx,
-                                       uint8_t **bytestream, uint8_t *end,
-                                       const uint32_t *palette,
-                                       const uint8_t *buf, const int linesize,
-                                       AVPacket *pkt)
-{
-    GIFContext *s = avctx->priv_data;
-    int len = 0, height = avctx->height, width = avctx->width, y;
-    int x_start = 0, y_start = 0, trans = s->transparent_index;
-    const uint8_t *ptr;
-
-    /* Crop Image */
-    if ((s->flags & GF_OFFSETTING) && trans >=0) {
-        const int w = avctx->width;
-        const int h = avctx->height;
-        int x_end = w - 1,
-            y_end = h - 1;
-
-        // crop top
-        while (y_start < y_end) {
-            int i;
-            for (i=0; i<w; ++i) {
-                if( buf[linesize*y_start+i] != trans ) {
-                    goto DONE_CROP_TOP;
-                }
-            }
-            ++y_start;
-        }
-  DONE_CROP_TOP:
-
-        // crop bottom
-        while (y_end < h) {
-            int i;
-            for (i=0; i<w; ++i) {
-                if (buf[linesize*y_end+i] != trans) {
-                    goto DONE_CROP_BOTTOM;
-                }
-            }
-            y_end--;
-        }
-  DONE_CROP_BOTTOM:
-
-        // crop left
-        while (x_start < x_end) {
-            int i;
-            for (i=y_start; i<y_end; ++i) {
-                if (buf[linesize*i+x_start] != trans) {
-                    goto DONE_CROP_LEFT;
-                }
-            }
-            x_start++;
-        }
-  DONE_CROP_LEFT:
-
-        // crop right
-        while (x_end > x_start) {
-            int i;
-            for (i=y_start; i<y_end; ++i) {
-                if (buf[linesize*i+x_end] != trans) {
-                    goto DONE_CROP_RIGHT;
-                }
-            }
-            x_end--;
-        }
-  DONE_CROP_RIGHT:
-
-        height = y_end + 1 - y_start;
-        width  = x_end + 1 - x_start;
-        av_log(avctx, AV_LOG_DEBUG,"%dx%d image at pos (%d;%d) [area:%dx%d]\n",
-               width, height, x_start, y_start, avctx->width, avctx->height);
-    }
-
-
-    uint8_t *frame_disposal = av_packet_new_side_data(pkt, AV_PKT_DATA_GIF_FRAME_DISPOSAL, 1);
-    if (!frame_disposal)
-        return AVERROR(ENOMEM);
-    *frame_disposal = GCE_DISPOSAL_BACKGROUND;
-
-    /* image block */
-    bytestream_put_byte(bytestream, GIF_IMAGE_SEPARATOR);
-    bytestream_put_le16(bytestream, x_start);
-    bytestream_put_le16(bytestream, y_start);
-    bytestream_put_le16(bytestream, width);
-    bytestream_put_le16(bytestream, height);
-
-    if (!palette) {
-        bytestream_put_byte(bytestream, 0x00); /* flags */
-    } else {
-        unsigned i;
-        bytestream_put_byte(bytestream, 1<<7 | 0x7); /* flags */
-        for (i = 0; i < AVPALETTE_COUNT; i++) {
-            const uint32_t v = palette[i];
-            bytestream_put_be24(bytestream, v);
-        }
-    }
-
-    bytestream_put_byte(bytestream, 0x08);
-
-    ff_lzw_encode_init(s->lzw, s->buf, s->buf_size,
-                       12, FF_LZW_GIF, put_bits);
-
-    ptr = buf + y_start*(linesize) + x_start;
-
-    for (y = 0; y < height; y++) {
-        len += ff_lzw_encode(s->lzw, ptr, width);
-        ptr += linesize;
-    }
-
-    len += ff_lzw_encode_flush(s->lzw, flush_put_bits);
-
-    ptr = s->buf;
-    while (len > 0) {
-        int size = FFMIN(255, len);
-        bytestream_put_byte(bytestream, size);
-        if (end - *bytestream < size)
-            return -1;
-        bytestream_put_buffer(bytestream, ptr, size);
-        ptr += size;
-        len -= size;
-    }
-    bytestream_put_byte(bytestream, 0x00); /* end of image block */
-
-    return 0;
-}
-
-static int gif_image_write_image(AVCodecContext *avctx,
-                                 uint8_t **bytestream, uint8_t *end,
-                                 const uint32_t *palette,
-                                 const uint8_t *buf, const int linesize,
-                                 AVPacket *pkt)
-{
-    GIFContext *s = avctx->priv_data;
-
-    int first_frame = s->last_frame == NULL;
-    int is_translucent = is_image_translucent(avctx, palette, buf, linesize);
-
-    if (first_frame) {
-        return gif_image_write_opaque(avctx, bytestream, end, palette, buf, linesize, pkt, is_translucent);
-    }
-
-    if (is_translucent) {
-        return gif_image_write_translucent(avctx, bytestream, end, palette, buf, linesize, pkt);
-    } else {
-        return gif_image_write_opaque(avctx, bytestream, end, palette, buf, linesize, pkt, is_translucent);
-    }
-}
-
 static av_cold int gif_encode_init(AVCodecContext *avctx)
 {
     GIFContext *s = avctx->priv_data;
@@ -409,12 +390,6 @@ static av_cold int gif_encode_init(AVCodecContext *avctx)
         av_log(avctx, AV_LOG_ERROR, "GIF does not support resolutions above 65535x65535\n");
         return AVERROR(EINVAL);
     }
-#if FF_API_CODED_FRAME
-FF_DISABLE_DEPRECATION_WARNINGS
-    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
-    avctx->coded_frame->key_frame = 1;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
 
     s->transparent_index = -1;
 
@@ -431,25 +406,6 @@ FF_ENABLE_DEPRECATION_WARNINGS
     return 0;
 }
 
-/* FIXME: duplicated with lavc */
-static int get_palette_transparency_index(const uint32_t *palette)
-{
-    int transparent_color_index = -1;
-    unsigned i, smallest_alpha = 0xff;
-
-    if (!palette)
-        return -1;
-
-    for (i = 0; i < AVPALETTE_COUNT; i++) {
-        const uint32_t v = palette[i];
-        if (v >> 24 < smallest_alpha) {
-            smallest_alpha = v >> 24;
-            transparent_color_index = i;
-        }
-    }
-    return smallest_alpha < 128 ? transparent_color_index : -1;
-}
-
 static int gif_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
                             const AVFrame *pict, int *got_packet)
 {
@@ -464,22 +420,12 @@ static int gif_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     end        = pkt->data + pkt->size;
 
     if (avctx->pix_fmt == AV_PIX_FMT_PAL8) {
-        uint8_t *pal_exdata = av_packet_new_side_data(pkt, AV_PKT_DATA_PALETTE, AVPALETTE_SIZE);
-        if (!pal_exdata)
-            return AVERROR(ENOMEM);
-        memcpy(pal_exdata, pict->data[1], AVPALETTE_SIZE);
         palette = (uint32_t*)pict->data[1];
 
-        s->pal_exdata = pal_exdata;
-
-        /* The first palette with PAL8 will be used as generic palette by the
-         * muxer so we don't need to write it locally in the packet. We store
-         * it as a reference here in case it changes later. */
         if (!s->palette_loaded) {
             memcpy(s->palette, palette, AVPALETTE_SIZE);
             s->transparent_index = get_palette_transparency_index(palette);
             s->palette_loaded = 1;
-            palette = NULL;
         } else if (!memcmp(s->palette, palette, AVPALETTE_SIZE)) {
             palette = NULL;
         }
@@ -487,18 +433,22 @@ static int gif_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 
     gif_image_write_image(avctx, &outbuf_ptr, end, palette,
                           pict->data[0], pict->linesize[0], pkt);
-    if (!s->last_frame) {
+    if (!s->last_frame && !s->image) {
         s->last_frame = av_frame_alloc();
         if (!s->last_frame)
             return AVERROR(ENOMEM);
     }
-    av_frame_unref(s->last_frame);
-    ret = av_frame_ref(s->last_frame, (AVFrame*)pict);
-    if (ret < 0)
-        return ret;
+
+    if (!s->image) {
+        av_frame_unref(s->last_frame);
+        ret = av_frame_ref(s->last_frame, (AVFrame*)pict);
+        if (ret < 0)
+            return ret;
+    }
 
     pkt->size   = outbuf_ptr - pkt->data;
-    pkt->flags |= AV_PKT_FLAG_KEY;
+    if (s->image || !avctx->frame_number)
+        pkt->flags |= AV_PKT_FLAG_KEY;
     *got_packet = 1;
 
     return 0;
@@ -522,6 +472,7 @@ static const AVOption gif_options[] = {
     { "gifflags", "set GIF flags", OFFSET(flags), AV_OPT_TYPE_FLAGS, {.i64 = GF_OFFSETTING|GF_TRANSDIFF}, 0, INT_MAX, FLAGS, "flags" },
         { "offsetting", "enable picture offsetting", 0, AV_OPT_TYPE_CONST, {.i64=GF_OFFSETTING}, INT_MIN, INT_MAX, FLAGS, "flags" },
         { "transdiff", "enable transparency detection between frames", 0, AV_OPT_TYPE_CONST, {.i64=GF_TRANSDIFF}, INT_MIN, INT_MAX, FLAGS, "flags" },
+    { "gifimage", "enable encoding only images per frame", OFFSET(image), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS, "flags" },
     { NULL }
 };
 
@@ -546,4 +497,4 @@ AVCodec ff_gif_encoder = {
         AV_PIX_FMT_GRAY8, AV_PIX_FMT_PAL8, AV_PIX_FMT_NONE
     },
     .priv_class     = &gif_class,
-};
+};
\ No newline at end of file
diff --git a/libavcodec/gif.h b/libavcodec/gif.h
index 9f35778857216..7fb61495bc8c8 100644
--- a/libavcodec/gif.h
+++ b/libavcodec/gif.h
@@ -43,6 +43,7 @@ static const uint8_t gif89a_sig[6] = "GIF89a";
 #define GIF_EXTENSION_INTRODUCER    0x21
 #define GIF_IMAGE_SEPARATOR         0x2c
 #define GIF_GCE_EXT_LABEL           0xf9
+#define GIF_COM_EXT_LABEL           0xfe
 #define GIF_APP_EXT_LABEL           0xff
 #define NETSCAPE_EXT_STR            "NETSCAPE2.0"
 
diff --git a/libavcodec/gif_parser.c b/libavcodec/gif_parser.c
new file mode 100644
index 0000000000000..e88338fd4729a
--- /dev/null
+++ b/libavcodec/gif_parser.c
@@ -0,0 +1,188 @@
+/*
+ * GIF parser
+ * Copyright (c) 2018 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * GIF parser
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/bswap.h"
+#include "libavutil/common.h"
+
+#include "gif.h"
+#include "parser.h"
+
+typedef enum GIFParseStates {
+    GIF_HEADER = 1,
+    GIF_EXTENSION,
+    GIF_EXTENSION_BLOCK,
+    GIF_IMAGE,
+    GIF_IMAGE_BLOCK,
+} gif_states;
+
+typedef struct GIFParseContext {
+    ParseContext pc;
+    unsigned found_sig;
+    int found_start;
+    int found_end;
+    int index;
+    int state;
+    int gct_flag;
+    int gct_size;
+    int block_size;
+    int etype;
+    int delay;
+} GIFParseContext;
+
+static int gif_find_frame_end(GIFParseContext *g, const uint8_t *buf,
+                              int buf_size, void *logctx)
+{
+    int index, next = END_NOT_FOUND;
+
+    for (index = 0; index < buf_size; index++) {
+        if (!g->state) {
+            if (!memcmp(buf + index, gif87a_sig, 6) ||
+                !memcmp(buf + index, gif89a_sig, 6)) {
+                g->state = GIF_HEADER;
+                g->found_sig++;
+            } else if (buf[index] == GIF_EXTENSION_INTRODUCER) {
+                g->state = GIF_EXTENSION;
+                g->found_start = 1;
+            } else if (buf[index] == GIF_IMAGE_SEPARATOR) {
+                g->state = GIF_IMAGE;
+            } else if (buf[index] == GIF_TRAILER) {
+                g->state = 0;
+                g->found_end = 1;
+                g->found_sig = 0;
+            } else {
+                g->found_sig = 0;
+            }
+        }
+
+        if (g->state == GIF_HEADER) {
+            if (g->index == 10) {
+                g->gct_flag = !!(buf[index] & 0x80);
+                g->gct_size = 3 * (1 << ((buf[index] & 0x07) + 1));
+            }
+            if (g->index >= 12 + g->gct_flag * g->gct_size) {
+                g->state = 0;
+                g->index = 0;
+                g->gct_flag = 0;
+                g->gct_size = 0;
+                continue;
+            }
+            g->index++;
+        } else if (g->state == GIF_EXTENSION) {
+            if (g->found_start && g->found_end && g->found_sig) {
+                next = index;
+                g->found_start = 0;
+                g->found_end = 0;
+                g->index = 0;
+                g->gct_flag = 0;
+                g->gct_size = 0;
+                g->state = 0;
+                break;
+            }
+            if (g->index == 1) {
+                g->etype = buf[index];
+            }
+            if (g->index >= 2) {
+                g->block_size = buf[index];
+                g->index = 0;
+                g->state = GIF_EXTENSION_BLOCK;
+                continue;
+            }
+            g->index++;
+        } else if (g->state == GIF_IMAGE_BLOCK) {
+            if (!g->index)
+                g->block_size = buf[index];
+            if (g->index >= g->block_size) {
+                g->index = 0;
+                if (!g->block_size) {
+                    g->state = 0;
+                    g->found_end = 1;
+                }
+                continue;
+            }
+            g->index++;
+        } else if (g->state == GIF_EXTENSION_BLOCK) {
+            if (g->etype == GIF_GCE_EXT_LABEL) {
+                if (g->index == 0)
+                    g->delay = 0;
+                if (g->index >= 1 && g->index <= 2) {
+                    g->delay |= buf[index] << (8 * (g->index - 1));
+                }
+            }
+            if (g->index >= g->block_size) {
+                g->block_size = buf[index];
+                g->index = 0;
+                if (!g->block_size)
+                    g->state = 0;
+                continue;
+            }
+            g->index++;
+        } else if (g->state == GIF_IMAGE) {
+            if (g->index == 8) {
+                g->gct_flag = !!(buf[index] & 0x80);
+                g->gct_size = 3 * (1 << ((buf[index] & 0x07) + 1));
+            }
+            if (g->index >= 10 + g->gct_flag * g->gct_size) {
+                g->state = GIF_IMAGE_BLOCK;
+                g->index = 0;
+                g->gct_flag = 0;
+                g->gct_size = 0;
+                continue;
+            }
+            g->index++;
+        }
+    }
+
+    return next;
+}
+
+static int gif_parse(AVCodecParserContext *s, AVCodecContext *avctx,
+                     const uint8_t **poutbuf, int *poutbuf_size,
+                     const uint8_t *buf, int buf_size)
+{
+    GIFParseContext *g = s->priv_data;
+    int next;
+
+    next = gif_find_frame_end(g, buf, buf_size, avctx);
+    if (ff_combine_frame(&g->pc, next, &buf, &buf_size) < 0) {
+        *poutbuf      = NULL;
+        *poutbuf_size = 0;
+        return buf_size;
+    }
+
+    s->duration   = g->delay;
+
+    *poutbuf      = buf;
+    *poutbuf_size = buf_size;
+    return next;
+}
+
+AVCodecParser ff_gif_parser = {
+    .codec_ids      = { AV_CODEC_ID_GIF },
+    .priv_data_size = sizeof(GIFParseContext),
+    .parser_parse   = gif_parse,
+    .parser_close   = ff_parse_close,
+};
diff --git a/libavcodec/gifdec.c b/libavcodec/gifdec.c
index 54f1d4c0ba7fd..2115da163f620 100644
--- a/libavcodec/gifdec.c
+++ b/libavcodec/gifdec.c
@@ -179,12 +179,20 @@ static int gif_read_image(GifState *s, AVFrame *frame)
     }
 
     /* verify that all the image is inside the screen dimensions */
-    if (!width || width > s->screen_width || left >= s->screen_width) {
-        av_log(s->avctx, AV_LOG_ERROR, "Invalid image width.\n");
+    if (!width || width > s->screen_width) {
+        av_log(s->avctx, AV_LOG_WARNING, "Invalid image width: %d, truncating.\n", width);
+        width = s->screen_width;
+    }
+    if (left >= s->screen_width) {
+        av_log(s->avctx, AV_LOG_ERROR, "Invalid left position: %d.\n", left);
         return AVERROR_INVALIDDATA;
     }
-    if (!height || height > s->screen_height || top >= s->screen_height) {
-        av_log(s->avctx, AV_LOG_ERROR, "Invalid image height.\n");
+    if (!height || height > s->screen_height) {
+        av_log(s->avctx, AV_LOG_WARNING, "Invalid image height: %d, truncating.\n", height);
+        height = s->screen_height;
+    }
+    if (top >= s->screen_height) {
+        av_log(s->avctx, AV_LOG_ERROR, "Invalid top position: %d.\n", top);
         return AVERROR_INVALIDDATA;
     }
     if (left + width > s->screen_width) {
diff --git a/libavcodec/golomb.h b/libavcodec/golomb.h
index 5c25883626356..fcc78f44c14b3 100644
--- a/libavcodec/golomb.h
+++ b/libavcodec/golomb.h
@@ -476,15 +476,19 @@ static inline int get_ur_golomb_jpegls(GetBitContext *gb, int k, int limit,
         return buf;
     } else {
         int i;
-        for (i = 0; i < limit && SHOW_UBITS(re, gb, 1) == 0; i++) {
+        for (i = 0; i + MIN_CACHE_BITS <= limit && SHOW_UBITS(re, gb, MIN_CACHE_BITS) == 0; i += MIN_CACHE_BITS) {
             if (gb->size_in_bits <= re_index) {
                 CLOSE_READER(re, gb);
                 return -1;
             }
-            LAST_SKIP_BITS(re, gb, 1);
+            LAST_SKIP_BITS(re, gb, MIN_CACHE_BITS);
             UPDATE_CACHE(re, gb);
         }
-        SKIP_BITS(re, gb, 1);
+        for (; i < limit && SHOW_UBITS(re, gb, 1) == 0; i++) {
+            SKIP_BITS(re, gb, 1);
+        }
+        LAST_SKIP_BITS(re, gb, 1);
+        UPDATE_CACHE(re, gb);
 
         if (i < limit - 1) {
             if (k) {
diff --git a/libavcodec/h263dec.c b/libavcodec/h263dec.c
index 2cf01e3d985aa..8385ddfe2e215 100644
--- a/libavcodec/h263dec.c
+++ b/libavcodec/h263dec.c
@@ -500,9 +500,9 @@ int ff_h263_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
             GetBitContext gb;
 
             if (init_get_bits8(&gb, s->avctx->extradata, s->avctx->extradata_size) >= 0 )
-                ff_mpeg4_decode_picture_header(avctx->priv_data, &gb);
+                ff_mpeg4_decode_picture_header(avctx->priv_data, &gb, 1);
         }
-        ret = ff_mpeg4_decode_picture_header(avctx->priv_data, &s->gb);
+        ret = ff_mpeg4_decode_picture_header(avctx->priv_data, &s->gb, 0);
     } else if (CONFIG_H263I_DECODER && s->codec_id == AV_CODEC_ID_H263I) {
         ret = ff_intel_h263_decode_picture_header(s);
     } else if (CONFIG_FLV_DECODER && s->h263_flv) {
diff --git a/libavcodec/h2645_parse.c b/libavcodec/h2645_parse.c
index aaa4b8f443be6..942f2c5d7124a 100644
--- a/libavcodec/h2645_parse.c
+++ b/libavcodec/h2645_parse.c
@@ -343,9 +343,51 @@ static int find_next_start_code(const uint8_t *buf, const uint8_t *next_avc)
     return i + 3;
 }
 
+static void alloc_rbsp_buffer(H2645RBSP *rbsp, unsigned int size, int use_ref)
+{
+    if (size > INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE)
+        goto fail;
+    size += AV_INPUT_BUFFER_PADDING_SIZE;
+
+    if (rbsp->rbsp_buffer_alloc_size >= size &&
+        (!rbsp->rbsp_buffer_ref || av_buffer_is_writable(rbsp->rbsp_buffer_ref)))
+        return;
+
+    size = FFMIN(size + size / 16 + 32, INT_MAX);
+
+    if (rbsp->rbsp_buffer_ref)
+        av_buffer_unref(&rbsp->rbsp_buffer_ref);
+    else
+        av_free(rbsp->rbsp_buffer);
+
+    rbsp->rbsp_buffer = av_malloc(size);
+    if (!rbsp->rbsp_buffer)
+        goto fail;
+    rbsp->rbsp_buffer_alloc_size = size;
+
+    if (use_ref) {
+        rbsp->rbsp_buffer_ref = av_buffer_create(rbsp->rbsp_buffer, size,
+                                                 NULL, NULL, 0);
+        if (!rbsp->rbsp_buffer_ref)
+            goto fail;
+    }
+
+    return;
+
+fail:
+    rbsp->rbsp_buffer_alloc_size = 0;
+    if (rbsp->rbsp_buffer_ref) {
+        av_buffer_unref(&rbsp->rbsp_buffer_ref);
+        rbsp->rbsp_buffer = NULL;
+    } else
+        av_freep(&rbsp->rbsp_buffer);
+
+    return;
+}
+
 int ff_h2645_packet_split(H2645Packet *pkt, const uint8_t *buf, int length,
                           void *logctx, int is_nalff, int nal_length_size,
-                          enum AVCodecID codec_id, int small_padding)
+                          enum AVCodecID codec_id, int small_padding, int use_ref)
 {
     GetByteContext bc;
     int consumed, ret = 0;
@@ -353,7 +395,8 @@ int ff_h2645_packet_split(H2645Packet *pkt, const uint8_t *buf, int length,
     int64_t padding = small_padding ? 0 : MAX_MBPAIR_SIZE;
 
     bytestream2_init(&bc, buf, length);
-    av_fast_padded_malloc(&pkt->rbsp.rbsp_buffer, &pkt->rbsp.rbsp_buffer_alloc_size, length + padding);
+    alloc_rbsp_buffer(&pkt->rbsp, length + padding, use_ref);
+
     if (!pkt->rbsp.rbsp_buffer)
         return AVERROR(ENOMEM);
 
@@ -474,6 +517,10 @@ void ff_h2645_packet_uninit(H2645Packet *pkt)
     }
     av_freep(&pkt->nals);
     pkt->nals_allocated = 0;
-    av_freep(&pkt->rbsp.rbsp_buffer);
+    if (pkt->rbsp.rbsp_buffer_ref) {
+        av_buffer_unref(&pkt->rbsp.rbsp_buffer_ref);
+        pkt->rbsp.rbsp_buffer = NULL;
+    } else
+        av_freep(&pkt->rbsp.rbsp_buffer);
     pkt->rbsp.rbsp_buffer_alloc_size = pkt->rbsp.rbsp_buffer_size = 0;
 }
diff --git a/libavcodec/h2645_parse.h b/libavcodec/h2645_parse.h
index 2e29ad26cbca5..2c29ca517c8b3 100644
--- a/libavcodec/h2645_parse.h
+++ b/libavcodec/h2645_parse.h
@@ -23,6 +23,7 @@
 
 #include <stdint.h>
 
+#include "libavutil/buffer.h"
 #include "avcodec.h"
 #include "get_bits.h"
 
@@ -66,6 +67,7 @@ typedef struct H2645NAL {
 
 typedef struct H2645RBSP {
     uint8_t *rbsp_buffer;
+    AVBufferRef *rbsp_buffer_ref;
     int rbsp_buffer_alloc_size;
     int rbsp_buffer_size;
 } H2645RBSP;
@@ -86,10 +88,21 @@ int ff_h2645_extract_rbsp(const uint8_t *src, int length, H2645RBSP *rbsp,
 
 /**
  * Split an input packet into NAL units.
+ *
+ * If data == raw_data holds true for a NAL unit of the returned pkt, then
+ * said NAL unit does not contain any emulation_prevention_three_byte and
+ * the data is contained in the input buffer pointed to by buf.
+ * Otherwise, the unescaped data is part of the rbsp_buffer described by the
+ * packet's H2645RBSP.
+ *
+ * If the packet's rbsp_buffer_ref is not NULL, the underlying AVBuffer must
+ * own rbsp_buffer. If not and rbsp_buffer is not NULL, use_ref must be 0.
+ * If use_ref is set, rbsp_buffer will be reference-counted and owned by
+ * the underlying AVBuffer of rbsp_buffer_ref.
  */
 int ff_h2645_packet_split(H2645Packet *pkt, const uint8_t *buf, int length,
                           void *logctx, int is_nalff, int nal_length_size,
-                          enum AVCodecID codec_id, int small_padding);
+                          enum AVCodecID codec_id, int small_padding, int use_ref);
 
 /**
  * Free all the allocated memory in the packet.
diff --git a/libavcodec/h264_direct.c b/libavcodec/h264_direct.c
index ec9fca0350d4d..a01d823e7ad62 100644
--- a/libavcodec/h264_direct.c
+++ b/libavcodec/h264_direct.c
@@ -156,8 +156,8 @@ void ff_h264_direct_ref_list_init(const H264Context *const h, H264SliceContext *
             av_log(h->avctx, AV_LOG_ERROR, "co located POCs unavailable\n");
             sl->col_parity = 1;
         } else
-        sl->col_parity = (FFABS(col_poc[0] - cur_poc) >=
-                          FFABS(col_poc[1] - cur_poc));
+            sl->col_parity = (FFABS(col_poc[0] - (int64_t)cur_poc) >=
+                              FFABS(col_poc[1] - (int64_t)cur_poc));
         ref1sidx =
         sidx     = sl->col_parity;
     // FL -> FL & differ parity
diff --git a/libavcodec/h264_levels.c b/libavcodec/h264_levels.c
index 737b7dcf06c43..7a5511677391d 100644
--- a/libavcodec/h264_levels.c
+++ b/libavcodec/h264_levels.c
@@ -25,7 +25,7 @@ static const H264LevelDescriptor h264_levels[] = {
     //  | level_idc     |       MaxFS            |    MaxCPB        | MaxMvsPer2Mb
     //  |     | cs3f    |         |  MaxDpbMbs   |       |  MaxVmvR |   |
     { "1",   10, 0,     1485,     99,    396,     64,    175,   64, 2,  0 },
-    { "1b",  10, 1,     1485,     99,    396,    128,    350,   64, 2,  0 },
+    { "1b",  11, 1,     1485,     99,    396,    128,    350,   64, 2,  0 },
     { "1b",   9, 0,     1485,     99,    396,    128,    350,   64, 2,  0 },
     { "1.1", 11, 0,     3000,    396,    900,    192,    500,  128, 2,  0 },
     { "1.2", 12, 0,     6000,    396,   2376,    384,   1000,  128, 2,  0 },
diff --git a/libavcodec/h264_metadata_bsf.c b/libavcodec/h264_metadata_bsf.c
index bf37528234c7a..a17987a06cc4d 100644
--- a/libavcodec/h264_metadata_bsf.c
+++ b/libavcodec/h264_metadata_bsf.c
@@ -222,7 +222,7 @@ static int h264_metadata_update_sps(AVBSFContext *bsf,
         if (ctx->level == LEVEL_AUTO) {
             const H264LevelDescriptor *desc;
             int64_t bit_rate;
-            int width, height;
+            int width, height, dpb_frames;
 
             if (sps->vui.nal_hrd_parameters_present_flag) {
                 bit_rate = (sps->vui.nal_hrd_parameters.bit_rate_value_minus1[0] + 1) *
@@ -236,13 +236,16 @@ static int h264_metadata_update_sps(AVBSFContext *bsf,
                 bit_rate = 0;
             }
 
+            // Don't use max_dec_frame_buffering if it is only inferred.
+            dpb_frames = sps->vui.bitstream_restriction_flag ?
+                sps->vui.max_dec_frame_buffering : H264_MAX_DPB_FRAMES;
+
             width  = 16 * (sps->pic_width_in_mbs_minus1 + 1);
             height = 16 * (sps->pic_height_in_map_units_minus1 + 1) *
                 (2 - sps->frame_mbs_only_flag);
 
             desc = ff_h264_guess_level(sps->profile_idc, bit_rate,
-                                       width, height,
-                                       sps->vui.max_dec_frame_buffering);
+                                       width, height, dpb_frames);
             if (desc) {
                 level_idc = desc->level_idc;
             } else {
@@ -258,7 +261,7 @@ static int h264_metadata_update_sps(AVBSFContext *bsf,
             if (sps->profile_idc == 66 ||
                 sps->profile_idc == 77 ||
                 sps->profile_idc == 88) {
-                sps->level_idc = 10;
+                sps->level_idc = 11;
                 sps->constraint_set3_flag = 1;
             } else {
                 sps->level_idc = 9;
@@ -601,7 +604,7 @@ static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *out)
 
     err = 0;
 fail:
-    ff_cbs_fragment_uninit(ctx->cbc, au);
+    ff_cbs_fragment_reset(ctx->cbc, au);
     av_freep(&displaymatrix_side_data);
 
     if (err < 0)
@@ -645,13 +648,15 @@ static int h264_metadata_init(AVBSFContext *bsf)
 
     err = 0;
 fail:
-    ff_cbs_fragment_uninit(ctx->cbc, au);
+    ff_cbs_fragment_reset(ctx->cbc, au);
     return err;
 }
 
 static void h264_metadata_close(AVBSFContext *bsf)
 {
     H264MetadataContext *ctx = bsf->priv_data;
+
+    ff_cbs_fragment_free(ctx->cbc, &ctx->access_unit);
     ff_cbs_close(&ctx->cbc);
 }
 
diff --git a/libavcodec/h264_parse.c b/libavcodec/h264_parse.c
index 34ffe3b1fea2a..a075443d173ad 100644
--- a/libavcodec/h264_parse.c
+++ b/libavcodec/h264_parse.c
@@ -242,18 +242,23 @@ int ff_h264_parse_ref_count(int *plist_count, int ref_count[2],
                 ref_count[1] = 1;
         }
 
-        if (ref_count[0] - 1 > max[0] || ref_count[1] - 1 > max[1]) {
+        if (slice_type_nos == AV_PICTURE_TYPE_B)
+            list_count = 2;
+        else
+            list_count = 1;
+
+        if (ref_count[0] - 1 > max[0] || (list_count == 2 && (ref_count[1] - 1 > max[1]))) {
             av_log(logctx, AV_LOG_ERROR, "reference overflow %u > %u or %u > %u\n",
                    ref_count[0] - 1, max[0], ref_count[1] - 1, max[1]);
             ref_count[0] = ref_count[1] = 0;
             *plist_count = 0;
             goto fail;
+        } else if (ref_count[1] - 1 > max[1]) {
+            av_log(logctx, AV_LOG_DEBUG, "reference overflow %u > %u \n",
+                   ref_count[1] - 1, max[1]);
+            ref_count[1] = 0;
         }
 
-        if (slice_type_nos == AV_PICTURE_TYPE_B)
-            list_count = 2;
-        else
-            list_count = 1;
     } else {
         list_count   = 0;
         ref_count[0] = ref_count[1] = 0;
@@ -359,7 +364,7 @@ static int decode_extradata_ps(const uint8_t *data, int size, H264ParamSets *ps,
     H2645Packet pkt = { 0 };
     int i, ret = 0;
 
-    ret = ff_h2645_packet_split(&pkt, data, size, logctx, is_avc, 2, AV_CODEC_ID_H264, 1);
+    ret = ff_h2645_packet_split(&pkt, data, size, logctx, is_avc, 2, AV_CODEC_ID_H264, 1, 0);
     if (ret < 0) {
         ret = 0;
         goto fail;
diff --git a/libavcodec/h264_redundant_pps_bsf.c b/libavcodec/h264_redundant_pps_bsf.c
index cc5a3060f5611..db8717d69a061 100644
--- a/libavcodec/h264_redundant_pps_bsf.c
+++ b/libavcodec/h264_redundant_pps_bsf.c
@@ -80,7 +80,7 @@ static int h264_redundant_pps_filter(AVBSFContext *bsf, AVPacket *out)
 
     err = ff_cbs_read_packet(ctx->input, au, in);
     if (err < 0)
-        return err;
+        goto fail;
 
     au_has_sps = 0;
     for (i = 0; i < au->nb_units; i++) {
@@ -89,11 +89,15 @@ static int h264_redundant_pps_filter(AVBSFContext *bsf, AVPacket *out)
         if (nal->type == H264_NAL_SPS)
             au_has_sps = 1;
         if (nal->type == H264_NAL_PPS) {
-            h264_redundant_pps_fixup_pps(ctx, nal->content);
+            err = h264_redundant_pps_fixup_pps(ctx, nal->content);
+            if (err < 0)
+                goto fail;
             if (!au_has_sps) {
-                av_log(ctx, AV_LOG_VERBOSE, "Deleting redundant PPS "
+                av_log(bsf, AV_LOG_VERBOSE, "Deleting redundant PPS "
                        "at %"PRId64".\n", in->pts);
-                ff_cbs_delete_unit(ctx->input, au, i);
+                err = ff_cbs_delete_unit(ctx->input, au, i);
+                if (err < 0)
+                    goto fail;
             }
         }
         if (nal->type == H264_NAL_SLICE ||
@@ -105,17 +109,21 @@ static int h264_redundant_pps_filter(AVBSFContext *bsf, AVPacket *out)
 
     err = ff_cbs_write_packet(ctx->output, out, au);
     if (err < 0)
-        return err;
+        goto fail;
 
-    ff_cbs_fragment_uninit(ctx->output, au);
 
     err = av_packet_copy_props(out, in);
     if (err < 0)
-        return err;
+        goto fail;
 
+    err = 0;
+fail:
+    ff_cbs_fragment_reset(ctx->output, au);
     av_packet_free(&in);
+    if (err < 0)
+        av_packet_unref(out);
 
-    return 0;
+    return err;
 }
 
 static int h264_redundant_pps_init(AVBSFContext *bsf)
@@ -138,25 +146,29 @@ static int h264_redundant_pps_init(AVBSFContext *bsf)
         err = ff_cbs_read_extradata(ctx->input, au, bsf->par_in);
         if (err < 0) {
             av_log(bsf, AV_LOG_ERROR, "Failed to read extradata.\n");
-            return err;
+            goto fail;
         }
 
         for (i = 0; i < au->nb_units; i++) {
-            if (au->units[i].type == H264_NAL_PPS)
-                h264_redundant_pps_fixup_pps(ctx, au->units[i].content);
+            if (au->units[i].type == H264_NAL_PPS) {
+                err = h264_redundant_pps_fixup_pps(ctx, au->units[i].content);
+                if (err < 0)
+                    goto fail;
+            }
         }
 
         ctx->extradata_pic_init_qp = ctx->current_pic_init_qp;
         err = ff_cbs_write_extradata(ctx->output, bsf->par_out, au);
         if (err < 0) {
             av_log(bsf, AV_LOG_ERROR, "Failed to write extradata.\n");
-            return err;
+            goto fail;
         }
-
-        ff_cbs_fragment_uninit(ctx->output, au);
     }
 
-    return 0;
+    err = 0;
+fail:
+    ff_cbs_fragment_reset(ctx->output, au);
+    return err;
 }
 
 static void h264_redundant_pps_flush(AVBSFContext *bsf)
@@ -168,6 +180,8 @@ static void h264_redundant_pps_flush(AVBSFContext *bsf)
 static void h264_redundant_pps_close(AVBSFContext *bsf)
 {
     H264RedundantPPSContext *ctx = bsf->priv_data;
+
+    ff_cbs_fragment_free(ctx->input, &ctx->access_unit);
     ff_cbs_close(&ctx->input);
     ff_cbs_close(&ctx->output);
 }
diff --git a/libavcodec/h264_sei.c b/libavcodec/h264_sei.c
index 43593d34d2d34..d4eb9c0dab67c 100644
--- a/libavcodec/h264_sei.c
+++ b/libavcodec/h264_sei.c
@@ -84,32 +84,38 @@ static int decode_picture_timing(H264SEIPictureTiming *h, GetBitContext *gb,
             return AVERROR_INVALIDDATA;
 
         num_clock_ts = sei_num_clock_ts_table[h->pic_struct];
-
+        h->timecode_cnt = 0;
         for (i = 0; i < num_clock_ts; i++) {
-            if (get_bits(gb, 1)) {                /* clock_timestamp_flag */
+            if (get_bits(gb, 1)) {                      /* clock_timestamp_flag */
+                H264SEITimeCode *tc = &h->timecode[h->timecode_cnt++];
                 unsigned int full_timestamp_flag;
-
+                unsigned int counting_type, cnt_dropped_flag;
                 h->ct_type |= 1 << get_bits(gb, 2);
-                skip_bits(gb, 1);                 /* nuit_field_based_flag */
-                skip_bits(gb, 5);                 /* counting_type */
+                skip_bits(gb, 1);                       /* nuit_field_based_flag */
+                counting_type = get_bits(gb, 5);        /* counting_type */
                 full_timestamp_flag = get_bits(gb, 1);
-                skip_bits(gb, 1);                 /* discontinuity_flag */
-                skip_bits(gb, 1);                 /* cnt_dropped_flag */
-                skip_bits(gb, 8);                 /* n_frames */
+                skip_bits(gb, 1);                       /* discontinuity_flag */
+                cnt_dropped_flag = get_bits(gb, 1);      /* cnt_dropped_flag */
+                if (cnt_dropped_flag && counting_type > 1 && counting_type < 7)
+                    tc->dropframe = 1;
+                tc->frame = get_bits(gb, 8);         /* n_frames */
                 if (full_timestamp_flag) {
-                    skip_bits(gb, 6);             /* seconds_value 0..59 */
-                    skip_bits(gb, 6);             /* minutes_value 0..59 */
-                    skip_bits(gb, 5);             /* hours_value 0..23 */
+                    tc->full = 1;
+                    tc->seconds = get_bits(gb, 6); /* seconds_value 0..59 */
+                    tc->minutes = get_bits(gb, 6); /* minutes_value 0..59 */
+                    tc->hours = get_bits(gb, 5);   /* hours_value 0..23 */
                 } else {
-                    if (get_bits(gb, 1)) {        /* seconds_flag */
-                        skip_bits(gb, 6);         /* seconds_value range 0..59 */
-                        if (get_bits(gb, 1)) {    /* minutes_flag */
-                            skip_bits(gb, 6);     /* minutes_value 0..59 */
-                            if (get_bits(gb, 1))  /* hours_flag */
-                                skip_bits(gb, 5); /* hours_value 0..23 */
+                    tc->seconds = tc->minutes = tc->hours = tc->full = 0;
+                    if (get_bits(gb, 1)) {             /* seconds_flag */
+                        tc->seconds = get_bits(gb, 6);
+                        if (get_bits(gb, 1)) {         /* minutes_flag */
+                            tc->minutes = get_bits(gb, 6);
+                            if (get_bits(gb, 1))       /* hours_flag */
+                                tc->hours = get_bits(gb, 5);
                         }
                     }
                 }
+
                 if (sps->time_offset_length > 0)
                     skip_bits(gb,
                               sps->time_offset_length); /* time_offset */
diff --git a/libavcodec/h264_sei.h b/libavcodec/h264_sei.h
index 5b7c8ef9d8aa8..a75c3aa1753e9 100644
--- a/libavcodec/h264_sei.h
+++ b/libavcodec/h264_sei.h
@@ -67,6 +67,17 @@ typedef enum {
     H264_SEI_FPA_TYPE_2D                  = 6,
 } H264_SEI_FpaType;
 
+typedef struct H264SEITimeCode {
+    /* When not continuously receiving full timecodes, we have to reference
+       the previous timecode received */
+    int full;
+    int frame;
+    int seconds;
+    int minutes;
+    int hours;
+    int dropframe;
+} H264SEITimeCode;
+
 typedef struct H264SEIPictureTiming {
     int present;
     H264_SEI_PicStructType pic_struct;
@@ -87,6 +98,16 @@ typedef struct H264SEIPictureTiming {
      * cpb_removal_delay in picture timing SEI message, see H.264 C.1.2
      */
     int cpb_removal_delay;
+
+    /**
+     * Maximum three timecodes in a pic_timing SEI.
+     */
+    H264SEITimeCode timecode[3];
+
+    /**
+     * Number of timecode in use
+     */
+    int timecode_cnt;
 } H264SEIPictureTiming;
 
 typedef struct H264SEIAFD {
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index d09cee4b13fbb..1c9a270fb6cb2 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -678,7 +678,7 @@ static void implicit_weight_table(const H264Context *h, H264SliceContext *sl, in
             cur_poc = h->cur_pic_ptr->field_poc[h->picture_structure - 1];
         }
         if (sl->ref_count[0] == 1 && sl->ref_count[1] == 1 && !FRAME_MBAFF(h) &&
-            sl->ref_list[0][0].poc + (int64_t)sl->ref_list[1][0].poc == 2 * cur_poc) {
+            sl->ref_list[0][0].poc + (int64_t)sl->ref_list[1][0].poc == 2LL * cur_poc) {
             sl->pwt.use_weight        = 0;
             sl->pwt.use_weight_chroma = 0;
             return;
@@ -1287,6 +1287,51 @@ static int h264_export_frame_props(H264Context *h)
         h->avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
     }
 
+    if (h->sei.picture_timing.timecode_cnt > 0) {
+        uint32_t tc = 0;
+        uint32_t *tc_sd;
+
+        AVFrameSideData *tcside = av_frame_new_side_data(cur->f,
+                                                         AV_FRAME_DATA_S12M_TIMECODE,
+                                                         sizeof(uint32_t)*4);
+        if (!tcside)
+            return AVERROR(ENOMEM);
+
+        tc_sd = (uint32_t*)tcside->data;
+        tc_sd[0] = h->sei.picture_timing.timecode_cnt;
+
+        for (int i = 0; i < tc_sd[0]; i++) {
+            uint32_t frames;
+
+            /* For SMPTE 12-M timecodes, frame count is a special case if > 30 FPS.
+               See SMPTE ST 12-1:2014 Sec 12.1 for more info. */
+            if (av_cmp_q(h->avctx->framerate, (AVRational) {30, 1}) == 1) {
+                frames = h->sei.picture_timing.timecode[i].frame / 2;
+                if (h->sei.picture_timing.timecode[i].frame % 2 == 1) {
+                    if (av_cmp_q(h->avctx->framerate, (AVRational) {50, 1}) == 0)
+                        tc |= (1 << 7);
+                    else
+                        tc |= (1 << 23);
+                }
+            } else {
+                frames = h->sei.picture_timing.timecode[i].frame;
+            }
+
+            tc |= h->sei.picture_timing.timecode[i].dropframe << 30;
+            tc |= (frames / 10) << 28;
+            tc |= (frames % 10) << 24;
+            tc |= (h->sei.picture_timing.timecode[i].seconds / 10) << 20;
+            tc |= (h->sei.picture_timing.timecode[i].seconds % 10) << 16;
+            tc |= (h->sei.picture_timing.timecode[i].minutes / 10) << 12;
+            tc |= (h->sei.picture_timing.timecode[i].minutes % 10) << 8;
+            tc |= (h->sei.picture_timing.timecode[i].hours / 10) << 4;
+            tc |= (h->sei.picture_timing.timecode[i].hours % 10);
+
+            tc_sd[i + 1] = tc;
+        }
+        h->sei.picture_timing.timecode_cnt = 0;
+    }
+
     if (h->sei.alternative_transfer.present &&
         av_color_transfer_name(h->sei.alternative_transfer.preferred_transfer_characteristics) &&
         h->sei.alternative_transfer.preferred_transfer_characteristics != AVCOL_TRC_UNSPECIFIED) {
diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c
index 7b4c5c76eaab7..837c3b75381c9 100644
--- a/libavcodec/h264dec.c
+++ b/libavcodec/h264dec.c
@@ -622,8 +622,8 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size)
             h->is_avc = 1;
     }
 
-    ret = ff_h2645_packet_split(&h->pkt, buf, buf_size, avctx, h->is_avc,
-                                h->nal_length_size, avctx->codec_id, avctx->flags2 & AV_CODEC_FLAG2_FAST);
+    ret = ff_h2645_packet_split(&h->pkt, buf, buf_size, avctx, h->is_avc, h->nal_length_size,
+                                avctx->codec_id, avctx->flags2 & AV_CODEC_FLAG2_FAST, 0);
     if (ret < 0) {
         av_log(avctx, AV_LOG_ERROR,
                "Error splitting the input into NAL units.\n");
@@ -657,11 +657,6 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size)
                 goto end;
             }
             if(!idr_cleared) {
-                if (h->current_slice && (avctx->active_thread_type & FF_THREAD_SLICE)) {
-                    av_log(h, AV_LOG_ERROR, "invalid mixed IDR / non IDR frames cannot be decoded in slice multithreading mode\n");
-                    ret = AVERROR_INVALIDDATA;
-                    goto end;
-                }
                 idr(h); // FIXME ensure we don't lose some frames if there is reordering
             }
             idr_cleared = 1;
diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h
index bcd76abcc1657..cbea3173c6a30 100644
--- a/libavcodec/h264dsp.h
+++ b/libavcodec/h264dsp.h
@@ -45,32 +45,32 @@ typedef struct H264DSPContext {
     h264_biweight_func biweight_h264_pixels_tab[4];
 
     /* loop filter */
-    void (*h264_v_loop_filter_luma)(uint8_t *pix /*align 16*/, int stride,
+    void (*h264_v_loop_filter_luma)(uint8_t *pix /*align 16*/, ptrdiff_t stride,
                                     int alpha, int beta, int8_t *tc0);
-    void (*h264_h_loop_filter_luma)(uint8_t *pix /*align 4 */, int stride,
+    void (*h264_h_loop_filter_luma)(uint8_t *pix /*align 4 */, ptrdiff_t stride,
                                     int alpha, int beta, int8_t *tc0);
-    void (*h264_h_loop_filter_luma_mbaff)(uint8_t *pix /*align 16*/, int stride,
+    void (*h264_h_loop_filter_luma_mbaff)(uint8_t *pix /*align 16*/, ptrdiff_t stride,
                                           int alpha, int beta, int8_t *tc0);
     /* v/h_loop_filter_luma_intra: align 16 */
-    void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride,
+    void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, ptrdiff_t stride,
                                           int alpha, int beta);
-    void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride,
+    void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, ptrdiff_t stride,
                                           int alpha, int beta);
     void (*h264_h_loop_filter_luma_mbaff_intra)(uint8_t *pix /*align 16*/,
-                                                int stride, int alpha, int beta);
-    void (*h264_v_loop_filter_chroma)(uint8_t *pix /*align 8*/, int stride,
+                                                ptrdiff_t stride, int alpha, int beta);
+    void (*h264_v_loop_filter_chroma)(uint8_t *pix /*align 8*/, ptrdiff_t stride,
                                       int alpha, int beta, int8_t *tc0);
-    void (*h264_h_loop_filter_chroma)(uint8_t *pix /*align 4*/, int stride,
+    void (*h264_h_loop_filter_chroma)(uint8_t *pix /*align 4*/, ptrdiff_t stride,
                                       int alpha, int beta, int8_t *tc0);
     void (*h264_h_loop_filter_chroma_mbaff)(uint8_t *pix /*align 8*/,
-                                            int stride, int alpha, int beta,
+                                            ptrdiff_t stride, int alpha, int beta,
                                             int8_t *tc0);
     void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix /*align 8*/,
-                                            int stride, int alpha, int beta);
+                                            ptrdiff_t stride, int alpha, int beta);
     void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix /*align 8*/,
-                                            int stride, int alpha, int beta);
+                                            ptrdiff_t stride, int alpha, int beta);
     void (*h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix /*align 8*/,
-                                                  int stride, int alpha, int beta);
+                                                  ptrdiff_t stride, int alpha, int beta);
     // h264_loop_filter_strength: simd only. the C version is inlined in h264_loopfilter.c
     void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40],
                                       int8_t ref[2][40], int16_t mv[2][40][2],
diff --git a/libavcodec/h264dsp_template.c b/libavcodec/h264dsp_template.c
index 0288cc761e10e..fe23a2cff1f1a 100644
--- a/libavcodec/h264dsp_template.c
+++ b/libavcodec/h264dsp_template.c
@@ -101,7 +101,7 @@ H264_WEIGHT(2)
 #undef op_scale2
 #undef H264_WEIGHT
 
-static av_always_inline av_flatten void FUNCC(h264_loop_filter_luma)(uint8_t *p_pix, int xstride, int ystride, int inner_iters, int alpha, int beta, int8_t *tc0)
+static av_always_inline av_flatten void FUNCC(h264_loop_filter_luma)(uint8_t *p_pix, ptrdiff_t xstride, ptrdiff_t ystride, int inner_iters, int alpha, int beta, int8_t *tc0)
 {
     pixel *pix = (pixel*)p_pix;
     int i, d;
@@ -149,20 +149,20 @@ static av_always_inline av_flatten void FUNCC(h264_loop_filter_luma)(uint8_t *p_
         }
     }
 }
-static void FUNCC(h264_v_loop_filter_luma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+static void FUNCC(h264_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
 {
     FUNCC(h264_loop_filter_luma)(pix, stride, sizeof(pixel), 4, alpha, beta, tc0);
 }
-static void FUNCC(h264_h_loop_filter_luma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+static void FUNCC(h264_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
 {
     FUNCC(h264_loop_filter_luma)(pix, sizeof(pixel), stride, 4, alpha, beta, tc0);
 }
-static void FUNCC(h264_h_loop_filter_luma_mbaff)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+static void FUNCC(h264_h_loop_filter_luma_mbaff)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
 {
     FUNCC(h264_loop_filter_luma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0);
 }
 
-static av_always_inline av_flatten void FUNCC(h264_loop_filter_luma_intra)(uint8_t *p_pix, int xstride, int ystride, int inner_iters, int alpha, int beta)
+static av_always_inline av_flatten void FUNCC(h264_loop_filter_luma_intra)(uint8_t *p_pix, ptrdiff_t xstride, ptrdiff_t ystride, int inner_iters, int alpha, int beta)
 {
     pixel *pix = (pixel*)p_pix;
     int d;
@@ -215,20 +215,20 @@ static av_always_inline av_flatten void FUNCC(h264_loop_filter_luma_intra)(uint8
         pix += ystride;
     }
 }
-static void FUNCC(h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta)
+static void FUNCC(h264_v_loop_filter_luma_intra)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
 {
     FUNCC(h264_loop_filter_luma_intra)(pix, stride, sizeof(pixel), 4, alpha, beta);
 }
-static void FUNCC(h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta)
+static void FUNCC(h264_h_loop_filter_luma_intra)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
 {
     FUNCC(h264_loop_filter_luma_intra)(pix, sizeof(pixel), stride, 4, alpha, beta);
 }
-static void FUNCC(h264_h_loop_filter_luma_mbaff_intra)(uint8_t *pix, int stride, int alpha, int beta)
+static void FUNCC(h264_h_loop_filter_luma_mbaff_intra)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
 {
     FUNCC(h264_loop_filter_luma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta);
 }
 
-static av_always_inline av_flatten void FUNCC(h264_loop_filter_chroma)(uint8_t *p_pix, int xstride, int ystride, int inner_iters, int alpha, int beta, int8_t *tc0)
+static av_always_inline av_flatten void FUNCC(h264_loop_filter_chroma)(uint8_t *p_pix, ptrdiff_t xstride, ptrdiff_t ystride, int inner_iters, int alpha, int beta, int8_t *tc0)
 {
     pixel *pix = (pixel*)p_pix;
     int i, d;
@@ -261,28 +261,28 @@ static av_always_inline av_flatten void FUNCC(h264_loop_filter_chroma)(uint8_t *
         }
     }
 }
-static void FUNCC(h264_v_loop_filter_chroma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+static void FUNCC(h264_v_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
 {
     FUNCC(h264_loop_filter_chroma)(pix, stride, sizeof(pixel), 2, alpha, beta, tc0);
 }
-static void FUNCC(h264_h_loop_filter_chroma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+static void FUNCC(h264_h_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
 {
     FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0);
 }
-static void FUNCC(h264_h_loop_filter_chroma_mbaff)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+static void FUNCC(h264_h_loop_filter_chroma_mbaff)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
 {
     FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 1, alpha, beta, tc0);
 }
-static void FUNCC(h264_h_loop_filter_chroma422)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+static void FUNCC(h264_h_loop_filter_chroma422)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
 {
     FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 4, alpha, beta, tc0);
 }
-static void FUNCC(h264_h_loop_filter_chroma422_mbaff)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+static void FUNCC(h264_h_loop_filter_chroma422_mbaff)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
 {
     FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0);
 }
 
-static av_always_inline av_flatten void FUNCC(h264_loop_filter_chroma_intra)(uint8_t *p_pix, int xstride, int ystride, int inner_iters, int alpha, int beta)
+static av_always_inline av_flatten void FUNCC(h264_loop_filter_chroma_intra)(uint8_t *p_pix, ptrdiff_t xstride, ptrdiff_t ystride, int inner_iters, int alpha, int beta)
 {
     pixel *pix = (pixel*)p_pix;
     int d;
@@ -306,23 +306,23 @@ static av_always_inline av_flatten void FUNCC(h264_loop_filter_chroma_intra)(uin
         pix += ystride;
     }
 }
-static void FUNCC(h264_v_loop_filter_chroma_intra)(uint8_t *pix, int stride, int alpha, int beta)
+static void FUNCC(h264_v_loop_filter_chroma_intra)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
 {
     FUNCC(h264_loop_filter_chroma_intra)(pix, stride, sizeof(pixel), 2, alpha, beta);
 }
-static void FUNCC(h264_h_loop_filter_chroma_intra)(uint8_t *pix, int stride, int alpha, int beta)
+static void FUNCC(h264_h_loop_filter_chroma_intra)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
 {
     FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta);
 }
-static void FUNCC(h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix, int stride, int alpha, int beta)
+static void FUNCC(h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
 {
     FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 1, alpha, beta);
 }
-static void FUNCC(h264_h_loop_filter_chroma422_intra)(uint8_t *pix, int stride, int alpha, int beta)
+static void FUNCC(h264_h_loop_filter_chroma422_intra)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
 {
     FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 4, alpha, beta);
 }
-static void FUNCC(h264_h_loop_filter_chroma422_mbaff_intra)(uint8_t *pix, int stride, int alpha, int beta)
+static void FUNCC(h264_h_loop_filter_chroma422_mbaff_intra)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
 {
     FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta);
 }
diff --git a/libavcodec/h265_metadata_bsf.c b/libavcodec/h265_metadata_bsf.c
index 26eb2d05d071b..0683cc2f9d520 100644
--- a/libavcodec/h265_metadata_bsf.c
+++ b/libavcodec/h265_metadata_bsf.c
@@ -322,7 +322,7 @@ static int h265_metadata_filter(AVBSFContext *bsf, AVPacket *out)
 
     err = 0;
 fail:
-    ff_cbs_fragment_uninit(ctx->cbc, au);
+    ff_cbs_fragment_reset(ctx->cbc, au);
 
     if (err < 0)
         av_packet_unref(out);
@@ -370,13 +370,15 @@ static int h265_metadata_init(AVBSFContext *bsf)
 
     err = 0;
 fail:
-    ff_cbs_fragment_uninit(ctx->cbc, au);
+    ff_cbs_fragment_reset(ctx->cbc, au);
     return err;
 }
 
 static void h265_metadata_close(AVBSFContext *bsf)
 {
     H265MetadataContext *ctx = bsf->priv_data;
+
+    ff_cbs_fragment_free(ctx->cbc, &ctx->access_unit);
     ff_cbs_close(&ctx->cbc);
 }
 
diff --git a/libavcodec/hcom.c b/libavcodec/hcom.c
new file mode 100644
index 0000000000000..bce9e80aa52b3
--- /dev/null
+++ b/libavcodec/hcom.c
@@ -0,0 +1,143 @@
+/*
+ * HCOM audio decoder
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/intreadwrite.h"
+
+#include "avcodec.h"
+#include "get_bits.h"
+#include "internal.h"
+
+typedef struct HEntry {
+    int16_t l, r;
+} HEntry;
+
+typedef struct HCOMContext {
+    AVCodecContext *avctx;
+
+    uint8_t first_sample;
+    uint8_t sample;
+    int dict_entries;
+    int dict_entry;
+    int delta_compression;
+
+    HEntry *dict;
+} HCOMContext;
+
+static av_cold int hcom_init(AVCodecContext *avctx)
+{
+    HCOMContext *s = avctx->priv_data;
+
+    if (avctx->channels != 1) {
+        av_log(avctx, AV_LOG_ERROR, "invalid number of channels\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (avctx->extradata_size <= 7)
+        return AVERROR_INVALIDDATA;
+    s->dict_entries = AV_RB16(avctx->extradata);
+    if (avctx->extradata_size < s->dict_entries * 4 + 7)
+        return AVERROR_INVALIDDATA;
+    s->delta_compression = AV_RB32(avctx->extradata + 2);
+    s->sample = s->first_sample = avctx->extradata[avctx->extradata_size - 1];
+
+    s->dict = av_calloc(s->dict_entries, sizeof(*s->dict));
+    if (!s->dict)
+        return AVERROR(ENOMEM);
+    for (int i = 0; i < s->dict_entries; i++) {
+        s->dict[i].l = AV_RB16(avctx->extradata + 6 + 4 * i);
+        s->dict[i].r = AV_RB16(avctx->extradata + 6 + 4 * i + 2);
+        if (s->dict[i].l >= 0 &&
+            (s->dict[i].l >= s->dict_entries ||
+             s->dict[i].r >= s->dict_entries))
+            return AVERROR_INVALIDDATA;
+    }
+
+    avctx->sample_fmt = AV_SAMPLE_FMT_U8;
+    s->dict_entry = 0;
+
+    return 0;
+}
+
+static int hcom_decode(AVCodecContext *avctx, void *data,
+                       int *got_frame, AVPacket *pkt)
+{
+    HCOMContext *s = avctx->priv_data;
+    AVFrame *frame = data;
+    GetBitContext gb;
+    int ret, n = 0;
+
+    if (pkt->size > INT16_MAX)
+        return AVERROR_INVALIDDATA;
+
+    frame->nb_samples = pkt->size * 8;
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+        return ret;
+
+    if ((ret = init_get_bits8(&gb, pkt->data, pkt->size)) < 0)
+        return ret;
+
+    while (get_bits_left(&gb) > 0) {
+        if (get_bits1(&gb))
+            s->dict_entry = s->dict[s->dict_entry].r;
+        else
+            s->dict_entry = s->dict[s->dict_entry].l;
+
+        if (s->dict[s->dict_entry].l < 0) {
+            int16_t datum;
+
+            datum = s->dict[s->dict_entry].r;
+
+            if (!s->delta_compression)
+                s->sample = 0;
+            s->sample = (s->sample + datum) & 0xFF;
+
+            frame->data[0][n++] = s->sample;
+
+            s->dict_entry = 0;
+        }
+    }
+
+    frame->nb_samples = n;
+
+    *got_frame = 1;
+
+    return pkt->size;
+}
+
+static av_cold int hcom_close(AVCodecContext *avctx)
+{
+    HCOMContext *s = avctx->priv_data;
+
+    av_freep(&s->dict);
+
+    return 0;
+}
+
+AVCodec ff_hcom_decoder = {
+    .name           = "hcom",
+    .long_name      = NULL_IF_CONFIG_SMALL("HCOM Audio"),
+    .type           = AVMEDIA_TYPE_AUDIO,
+    .id             = AV_CODEC_ID_HCOM,
+    .priv_data_size = sizeof(HCOMContext),
+    .init           = hcom_init,
+    .close          = hcom_close,
+    .decode         = hcom_decode,
+    .capabilities   = AV_CODEC_CAP_DR1,
+};
diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h
index 670168eb52b3b..56b5541d90a56 100644
--- a/libavcodec/hevc.h
+++ b/libavcodec/hevc.h
@@ -143,6 +143,9 @@ enum {
     // A.4.1: table A.6 allows at most 20 tile columns for any level.
     HEVC_MAX_TILE_COLUMNS = 20,
 
+    // A.4.2: table A.6 allows at most 600 slice segments for any level.
+    HEVC_MAX_SLICE_SEGMENTS = 600,
+
     // 7.4.7.1: in the worst case (tiles_enabled_flag and
     // entropy_coding_sync_enabled_flag are both set), entry points can be
     // placed at the beginning of every Ctb row in every tile, giving an
diff --git a/libavcodec/hevc_parse.c b/libavcodec/hevc_parse.c
index b1b27eef09cd2..dddb293df642e 100644
--- a/libavcodec/hevc_parse.c
+++ b/libavcodec/hevc_parse.c
@@ -29,7 +29,8 @@ static int hevc_decode_nal_units(const uint8_t *buf, int buf_size, HEVCParamSets
     int ret = 0;
     H2645Packet pkt = { 0 };
 
-    ret = ff_h2645_packet_split(&pkt, buf, buf_size, logctx, is_nalff, nal_length_size, AV_CODEC_ID_HEVC, 1);
+    ret = ff_h2645_packet_split(&pkt, buf, buf_size, logctx, is_nalff,
+                                nal_length_size, AV_CODEC_ID_HEVC, 1, 0);
     if (ret < 0) {
         goto done;
     }
diff --git a/libavcodec/hevc_parser.c b/libavcodec/hevc_parser.c
index 369d1338d0ba8..b444b99955070 100644
--- a/libavcodec/hevc_parser.c
+++ b/libavcodec/hevc_parser.c
@@ -194,7 +194,7 @@ static int parse_nal_units(AVCodecParserContext *s, const uint8_t *buf,
     ff_hevc_reset_sei(sei);
 
     ret = ff_h2645_packet_split(&ctx->pkt, buf, buf_size, avctx, ctx->is_avc,
-                                ctx->nal_length_size, AV_CODEC_ID_HEVC, 1);
+                                ctx->nal_length_size, AV_CODEC_ID_HEVC, 1, 0);
     if (ret < 0)
         return ret;
 
@@ -239,7 +239,7 @@ static int parse_nal_units(AVCodecParserContext *s, const uint8_t *buf,
         }
     }
     /* didn't find a picture! */
-    av_log(avctx, AV_LOG_ERROR, "missing picture in access unit\n");
+    av_log(avctx, AV_LOG_ERROR, "missing picture in access unit with size %d\n", buf_size);
     return -1;
 }
 
diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c
index ea984af0a18cc..80df417e4f798 100644
--- a/libavcodec/hevc_ps.c
+++ b/libavcodec/hevc_ps.c
@@ -1102,20 +1102,17 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
         decode_vui(gb, avctx, apply_defdispwin, sps);
 
     if (get_bits1(gb)) { // sps_extension_flag
-        int sps_range_extension_flag = get_bits1(gb);
+        sps->sps_range_extension_flag = get_bits1(gb);
         skip_bits(gb, 7); //sps_extension_7bits = get_bits(gb, 7);
-        if (sps_range_extension_flag) {
-            int extended_precision_processing_flag;
-            int cabac_bypass_alignment_enabled_flag;
-
+        if (sps->sps_range_extension_flag) {
             sps->transform_skip_rotation_enabled_flag = get_bits1(gb);
             sps->transform_skip_context_enabled_flag  = get_bits1(gb);
             sps->implicit_rdpcm_enabled_flag = get_bits1(gb);
 
             sps->explicit_rdpcm_enabled_flag = get_bits1(gb);
 
-            extended_precision_processing_flag = get_bits1(gb);
-            if (extended_precision_processing_flag)
+            sps->extended_precision_processing_flag = get_bits1(gb);
+            if (sps->extended_precision_processing_flag)
                 av_log(avctx, AV_LOG_WARNING,
                    "extended_precision_processing_flag not yet implemented\n");
 
@@ -1127,8 +1124,8 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
 
             sps->persistent_rice_adaptation_enabled_flag = get_bits1(gb);
 
-            cabac_bypass_alignment_enabled_flag  = get_bits1(gb);
-            if (cabac_bypass_alignment_enabled_flag)
+            sps->cabac_bypass_alignment_enabled_flag  = get_bits1(gb);
+            if (sps->cabac_bypass_alignment_enabled_flag)
                 av_log(avctx, AV_LOG_WARNING,
                    "cabac_bypass_alignment_enabled_flag not yet implemented\n");
         }
@@ -1686,9 +1683,9 @@ int ff_hevc_decode_nal_pps(GetBitContext *gb, AVCodecContext *avctx,
     pps->slice_header_extension_present_flag = get_bits1(gb);
 
     if (get_bits1(gb)) { // pps_extension_present_flag
-        int pps_range_extensions_flag = get_bits1(gb);
+        pps->pps_range_extensions_flag = get_bits1(gb);
         skip_bits(gb, 7); // pps_extension_7bits
-        if (sps->ptl.general_ptl.profile_idc == FF_PROFILE_HEVC_REXT && pps_range_extensions_flag) {
+        if (sps->ptl.general_ptl.profile_idc == FF_PROFILE_HEVC_REXT && pps->pps_range_extensions_flag) {
             if ((ret = pps_range_extensions(gb, avctx, pps, sps)) < 0)
                 goto err;
         }
diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h
index 1fbda199e3fe8..bbaa9205ef815 100644
--- a/libavcodec/hevc_ps.h
+++ b/libavcodec/hevc_ps.h
@@ -284,13 +284,16 @@ typedef struct HEVCSPS {
     int max_transform_hierarchy_depth_inter;
     int max_transform_hierarchy_depth_intra;
 
+    int sps_range_extension_flag;
     int transform_skip_rotation_enabled_flag;
     int transform_skip_context_enabled_flag;
     int implicit_rdpcm_enabled_flag;
     int explicit_rdpcm_enabled_flag;
+    int extended_precision_processing_flag;
     int intra_smoothing_disabled_flag;
     int high_precision_offsets_enabled_flag;
     int persistent_rice_adaptation_enabled_flag;
+    int cabac_bypass_alignment_enabled_flag;
 
     ///< coded frame dimension in various units
     int width;
@@ -365,6 +368,7 @@ typedef struct HEVCPPS {
     int num_extra_slice_header_bits;
     uint8_t slice_header_extension_present_flag;
     uint8_t log2_max_transform_skip_block_size;
+    uint8_t pps_range_extensions_flag;
     uint8_t cross_component_prediction_enabled_flag;
     uint8_t chroma_qp_offset_list_enabled_flag;
     uint8_t diff_cu_chroma_qp_offset_depth;
diff --git a/libavcodec/hevc_sei.h b/libavcodec/hevc_sei.h
index e92da25bbf76c..2fec00ace07bb 100644
--- a/libavcodec/hevc_sei.h
+++ b/libavcodec/hevc_sei.h
@@ -52,6 +52,7 @@ typedef enum {
     HEVC_SEI_TYPE_DECODED_PICTURE_HASH                 = 132,
     HEVC_SEI_TYPE_SCALABLE_NESTING                     = 133,
     HEVC_SEI_TYPE_REGION_REFRESH_INFO                  = 134,
+    HEVC_SEI_TYPE_TIME_CODE                            = 136,
     HEVC_SEI_TYPE_MASTERING_DISPLAY_INFO               = 137,
     HEVC_SEI_TYPE_CONTENT_LIGHT_LEVEL_INFO             = 144,
     HEVC_SEI_TYPE_ALTERNATIVE_TRANSFER_CHARACTERISTICS = 147,
diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
index a3b5c8cb71e5d..967f8f1def560 100644
--- a/libavcodec/hevcdec.c
+++ b/libavcodec/hevcdec.c
@@ -409,6 +409,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
 #endif
         break;
     case AV_PIX_FMT_YUV420P12:
+    case AV_PIX_FMT_YUV444P:
+    case AV_PIX_FMT_YUV444P10:
+    case AV_PIX_FMT_YUV444P12:
 #if CONFIG_HEVC_NVDEC_HWACCEL
         *fmt++ = AV_PIX_FMT_CUDA;
 #endif
@@ -2942,6 +2945,7 @@ static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
                     s->max_ra = INT_MIN;
             }
 
+            s->overlap ++;
             ret = hevc_frame_start(s);
             if (ret < 0)
                 return ret;
@@ -3020,11 +3024,12 @@ static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
     s->ref = NULL;
     s->last_eos = s->eos;
     s->eos = 0;
+    s->overlap = 0;
 
     /* split the input packet into NAL units, so we know the upper bound on the
      * number of slices in the frame */
     ret = ff_h2645_packet_split(&s->pkt, buf, length, s->avctx, s->is_nalff,
-                                s->nal_length_size, s->avctx->codec_id, 1);
+                                s->nal_length_size, s->avctx->codec_id, 1, 0);
     if (ret < 0) {
         av_log(s->avctx, AV_LOG_ERROR,
                "Error splitting the input into NAL units.\n");
@@ -3054,6 +3059,8 @@ static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
             continue;
 
         ret = decode_nal_unit(s, nal);
+        if (ret >= 0 && s->overlap > 2)
+            ret = AVERROR_INVALIDDATA;
         if (ret < 0) {
             av_log(s->avctx, AV_LOG_WARNING,
                    "Error parsing NAL unit #%d.\n", i);
diff --git a/libavcodec/hevcdec.h b/libavcodec/hevcdec.h
index f0f588f2b826a..b45969b7e2337 100644
--- a/libavcodec/hevcdec.h
+++ b/libavcodec/hevcdec.h
@@ -430,6 +430,7 @@ typedef struct HEVCContext {
     int max_ra;
     int bs_width;
     int bs_height;
+    int overlap;
 
     int is_decoded;
     int no_rasl_output_flag;
@@ -559,8 +560,6 @@ static av_always_inline int ff_hevc_nal_is_nonref(enum HEVCNALUnitType type)
     case HEVC_NAL_VCL_N10:
     case HEVC_NAL_VCL_N12:
     case HEVC_NAL_VCL_N14:
-    case HEVC_NAL_BLA_N_LP:
-    case HEVC_NAL_IDR_N_LP:
         return 1;
         break;
     default: break;
diff --git a/libavcodec/htmlsubtitles.c b/libavcodec/htmlsubtitles.c
index fb9f90042228d..d9221ba16b0fb 100644
--- a/libavcodec/htmlsubtitles.c
+++ b/libavcodec/htmlsubtitles.c
@@ -24,6 +24,7 @@
 #include "libavutil/common.h"
 #include "libavutil/parseutils.h"
 #include "htmlsubtitles.h"
+#include <ctype.h>
 
 static int html_color_parse(void *log_ctx, const char *str)
 {
@@ -44,14 +45,32 @@ static void rstrip_spaces_buf(AVBPrint *buf)
             buf->str[--buf->len] = 0;
 }
 
+/*
+ * Fast code for scanning text enclosed in braces. Functionally
+ * equivalent to this sscanf call:
+ *
+ * sscanf(in, "{\\an%*1u}%n", &len) >= 0 && len > 0
+ */
+static int scanbraces(const char* in) {
+    if (strncmp(in, "{\\an", 4) != 0) {
+        return 0;
+    }
+    if (!isdigit(in[4])) {
+        return 0;
+    }
+    if (in[5] != '}') {
+        return 0;
+    }
+    return 1;
+}
+
 /* skip all {\xxx} substrings except for {\an%d}
    and all microdvd like styles such as {Y:xxx} */
 static void handle_open_brace(AVBPrint *dst, const char **inp, int *an, int *closing_brace_missing)
 {
-    int len = 0;
     const char *in = *inp;
 
-    *an += sscanf(in, "{\\an%*1u}%n", &len) >= 0 && len > 0;
+    *an += scanbraces(in);
 
     if (!*closing_brace_missing) {
         if (   (*an != 1 && in[1] == '\\')
@@ -74,6 +93,34 @@ struct font_tag {
     uint32_t color;
 };
 
+/*
+ * Fast code for scanning the rest of a tag. Functionally equivalent to
+ * this sscanf call:
+ *
+ * sscanf(in, "%127[^<>]>%n", buffer, lenp) == 2
+ */
+static int scantag(const char* in, char* buffer, int* lenp) {
+    int len;
+
+    for (len = 0; len < 128; len++) {
+        const char c = *in++;
+        switch (c) {
+        case '\0':
+            return 0;
+        case '<':
+            return 0;
+        case '>':
+            buffer[len] = '\0';
+            *lenp = len+1;
+            return 1;
+        default:
+            break;
+        }
+        buffer[len] = c;
+    }
+    return 0;
+}
+
 /*
  * The general politic of the convert is to mask unsupported tags or formatting
  * errors (but still alert the user/subtitles writer with an error/warning)
@@ -155,7 +202,7 @@ int ff_htmlmarkup_to_ass(void *log_ctx, AVBPrint *dst, const char *in)
 
             len = 0;
 
-            if (sscanf(in+tag_close+1, "%127[^<>]>%n", buffer, &len) >= 1 && len > 0) {
+            if (scantag(in+tag_close+1, buffer, &len) && len > 0) {
                 const int skip = len + tag_close;
                 const char *tagname = buffer;
                 while (*tagname == ' ') {
diff --git a/libavcodec/huffyuvdec.c b/libavcodec/huffyuvdec.c
index 66357bfb40353..27f650d7bf963 100644
--- a/libavcodec/huffyuvdec.c
+++ b/libavcodec/huffyuvdec.c
@@ -905,54 +905,23 @@ static void add_median_prediction(HYuvContext *s, uint8_t *dst, const uint8_t *s
         s->hdsp.add_hfyu_median_pred_int16((uint16_t *)dst, (const uint16_t *)src, (const uint16_t *)diff, s->n-1, w, left, left_top);
     }
 }
-static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
-                        AVPacket *avpkt)
+
+static int decode_slice(AVCodecContext *avctx, AVFrame *p, int height,
+                        int buf_size, int y_offset, int table_size)
 {
-    const uint8_t *buf = avpkt->data;
-    int buf_size       = avpkt->size;
     HYuvContext *s = avctx->priv_data;
+    int fake_ystride, fake_ustride, fake_vstride;
     const int width  = s->width;
     const int width2 = s->width >> 1;
-    const int height = s->height;
-    int fake_ystride, fake_ustride, fake_vstride;
-    ThreadFrame frame = { .f = data };
-    AVFrame *const p = data;
-    int table_size = 0, ret;
-
-    if (buf_size < (width * height + 7)/8)
-        return AVERROR_INVALIDDATA;
-
-    av_fast_padded_malloc(&s->bitstream_buffer,
-                   &s->bitstream_buffer_size,
-                   buf_size);
-    if (!s->bitstream_buffer)
-        return AVERROR(ENOMEM);
-
-    s->bdsp.bswap_buf((uint32_t *) s->bitstream_buffer,
-                      (const uint32_t *) buf, buf_size / 4);
-
-    if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
-        return ret;
-
-    if (s->context) {
-        table_size = read_huffman_tables(s, s->bitstream_buffer, buf_size);
-        if (table_size < 0)
-            return table_size;
-    }
-
-    if ((unsigned) (buf_size - table_size) >= INT_MAX / 8)
-        return AVERROR_INVALIDDATA;
+    int ret;
 
-    if ((ret = init_get_bits(&s->gb, s->bitstream_buffer + table_size,
-                             (buf_size - table_size) * 8)) < 0)
+    if ((ret = init_get_bits8(&s->gb, s->bitstream_buffer + table_size, buf_size - table_size)) < 0)
         return ret;
 
     fake_ystride = s->interlaced ? p->linesize[0] * 2 : p->linesize[0];
     fake_ustride = s->interlaced ? p->linesize[1] * 2 : p->linesize[1];
     fake_vstride = s->interlaced ? p->linesize[2] * 2 : p->linesize[2];
 
-    s->last_slice_end = 0;
-
     if (s->version > 2) {
         int plane;
         for(plane = 0; plane < 1 + 2*s->chroma + s->alpha; plane++) {
@@ -1034,31 +1003,31 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
             return AVERROR_PATCHWELCOME;
         } else {
             leftv         =
-            p->data[2][0] = get_bits(&s->gb, 8);
+            p->data[2][0 + y_offset * p->linesize[2]] = get_bits(&s->gb, 8);
             lefty         =
-            p->data[0][1] = get_bits(&s->gb, 8);
+            p->data[0][1 + y_offset * p->linesize[0]] = get_bits(&s->gb, 8);
             leftu         =
-            p->data[1][0] = get_bits(&s->gb, 8);
-            p->data[0][0] = get_bits(&s->gb, 8);
+            p->data[1][0 + y_offset * p->linesize[1]] = get_bits(&s->gb, 8);
+            p->data[0][0 + y_offset * p->linesize[0]] = get_bits(&s->gb, 8);
 
             switch (s->predictor) {
             case LEFT:
             case PLANE:
                 decode_422_bitstream(s, width - 2);
-                lefty = s->llviddsp.add_left_pred(p->data[0] + 2, s->temp[0],
+                lefty = s->llviddsp.add_left_pred(p->data[0] + p->linesize[0] * y_offset + 2, s->temp[0],
                                                    width - 2, lefty);
                 if (!(s->flags & AV_CODEC_FLAG_GRAY)) {
-                    leftu = s->llviddsp.add_left_pred(p->data[1] + 1, s->temp[1], width2 - 1, leftu);
-                    leftv = s->llviddsp.add_left_pred(p->data[2] + 1, s->temp[2], width2 - 1, leftv);
+                    leftu = s->llviddsp.add_left_pred(p->data[1] + p->linesize[1] * y_offset + 1, s->temp[1], width2 - 1, leftu);
+                    leftv = s->llviddsp.add_left_pred(p->data[2] + p->linesize[2] * y_offset + 1, s->temp[2], width2 - 1, leftv);
                 }
 
-                for (cy = y = 1; y < s->height; y++, cy++) {
+                for (cy = y = 1; y < height; y++, cy++) {
                     uint8_t *ydst, *udst, *vdst;
 
                     if (s->bitstream_bpp == 12) {
                         decode_gray_bitstream(s, width);
 
-                        ydst = p->data[0] + p->linesize[0] * y;
+                        ydst = p->data[0] + p->linesize[0] * (y + y_offset);
 
                         lefty = s->llviddsp.add_left_pred(ydst, s->temp[0],
                                                            width, lefty);
@@ -1067,15 +1036,15 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                                 s->llviddsp.add_bytes(ydst, ydst - fake_ystride, width);
                         }
                         y++;
-                        if (y >= s->height)
+                        if (y >= height)
                             break;
                     }
 
                     draw_slice(s, p, y);
 
-                    ydst = p->data[0] + p->linesize[0] * y;
-                    udst = p->data[1] + p->linesize[1] * cy;
-                    vdst = p->data[2] + p->linesize[2] * cy;
+                    ydst = p->data[0] + p->linesize[0] * (y  + y_offset);
+                    udst = p->data[1] + p->linesize[1] * (cy + y_offset);
+                    vdst = p->data[2] + p->linesize[2] * (cy + y_offset);
 
                     decode_422_bitstream(s, width);
                     lefty = s->llviddsp.add_left_pred(ydst, s->temp[0],
@@ -1185,7 +1154,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     } else {
         int y;
         uint8_t left[4];
-        const int last_line = (height - 1) * p->linesize[0];
+        const int last_line = (y_offset + height - 1) * p->linesize[0];
 
         if (s->bitstream_bpp == 32) {
             left[A] = p->data[0][last_line + A] = get_bits(&s->gb, 8);
@@ -1208,17 +1177,17 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                 s->hdsp.add_hfyu_left_pred_bgr32(p->data[0] + last_line + 4,
                                                  s->temp[0], width - 1, left);
 
-                for (y = s->height - 2; y >= 0; y--) { // Yes it is stored upside down.
+                for (y = height - 2; y >= 0; y--) { // Yes it is stored upside down.
                     decode_bgr_bitstream(s, width);
 
-                    s->hdsp.add_hfyu_left_pred_bgr32(p->data[0] + p->linesize[0] * y,
+                    s->hdsp.add_hfyu_left_pred_bgr32(p->data[0] + p->linesize[0] * (y + y_offset),
                                                      s->temp[0], width, left);
                     if (s->predictor == PLANE) {
                         if (s->bitstream_bpp != 32)
                             left[A] = 0;
-                        if (y < s->height - 1 - s->interlaced) {
-                            s->llviddsp.add_bytes(p->data[0] + p->linesize[0] * y,
-                                              p->data[0] + p->linesize[0] * y +
+                        if (y < height - 1 - s->interlaced) {
+                            s->llviddsp.add_bytes(p->data[0] + p->linesize[0] * (y + y_offset),
+                                              p->data[0] + p->linesize[0] * (y + y_offset) +
                                               fake_ystride, 4 * width);
                         }
                     }
@@ -1236,7 +1205,88 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
             return AVERROR_PATCHWELCOME;
         }
     }
-    emms_c();
+
+    return 0;
+}
+
+static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
+                        AVPacket *avpkt)
+{
+    const uint8_t *buf = avpkt->data;
+    int buf_size       = avpkt->size;
+    HYuvContext *s = avctx->priv_data;
+    const int width  = s->width;
+    const int height = s->height;
+    ThreadFrame frame = { .f = data };
+    AVFrame *const p = data;
+    int slice, table_size = 0, ret, nb_slices;
+    unsigned slices_info_offset;
+    int slice_height;
+
+    if (buf_size < (width * height + 7)/8)
+        return AVERROR_INVALIDDATA;
+
+    av_fast_padded_malloc(&s->bitstream_buffer,
+                   &s->bitstream_buffer_size,
+                   buf_size);
+    if (!s->bitstream_buffer)
+        return AVERROR(ENOMEM);
+
+    s->bdsp.bswap_buf((uint32_t *) s->bitstream_buffer,
+                      (const uint32_t *) buf, buf_size / 4);
+
+    if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
+        return ret;
+
+    if (s->context) {
+        table_size = read_huffman_tables(s, s->bitstream_buffer, buf_size);
+        if (table_size < 0)
+            return table_size;
+    }
+
+    if ((unsigned) (buf_size - table_size) >= INT_MAX / 8)
+        return AVERROR_INVALIDDATA;
+
+    s->last_slice_end = 0;
+
+    if (avctx->codec_id == AV_CODEC_ID_HYMT &&
+        (buf_size > 32 && AV_RL32(avpkt->data + buf_size - 16) == 0)) {
+        slices_info_offset = AV_RL32(avpkt->data + buf_size - 4);
+        slice_height = AV_RL32(avpkt->data + buf_size - 8);
+        nb_slices = AV_RL32(avpkt->data + buf_size - 12);
+        if (nb_slices * 8LL + slices_info_offset > buf_size - 16 ||
+            slice_height <= 0 || nb_slices * (uint64_t)slice_height > height)
+            return AVERROR_INVALIDDATA;
+    } else {
+        slice_height = height;
+        nb_slices = 1;
+    }
+
+    for (slice = 0; slice < nb_slices; slice++) {
+        int y_offset, slice_offset, slice_size;
+
+        if (nb_slices > 1) {
+            slice_offset = AV_RL32(avpkt->data + slices_info_offset + slice * 8);
+            slice_size = AV_RL32(avpkt->data + slices_info_offset + slice * 8 + 4);
+
+            if (slice_offset < 0 || slice_size <= 0 || (slice_offset&3) ||
+                slice_offset + (int64_t)slice_size > buf_size)
+                return AVERROR_INVALIDDATA;
+
+            y_offset = height - (slice + 1) * slice_height;
+            s->bdsp.bswap_buf((uint32_t *)s->bitstream_buffer,
+                              (const uint32_t *)(buf + slice_offset), slice_size / 4);
+        } else {
+            y_offset = 0;
+            slice_offset = 0;
+            slice_size = buf_size;
+        }
+
+        ret = decode_slice(avctx, p, slice_height, slice_size, y_offset, table_size);
+        emms_c();
+        if (ret < 0)
+            return ret;
+    }
 
     *got_frame = 1;
 
@@ -1272,3 +1322,19 @@ AVCodec ff_ffvhuff_decoder = {
     .init_thread_copy = ONLY_IF_THREADS_ENABLED(decode_init_thread_copy),
 };
 #endif /* CONFIG_FFVHUFF_DECODER */
+
+#if CONFIG_HYMT_DECODER
+AVCodec ff_hymt_decoder = {
+    .name             = "hymt",
+    .long_name        = NULL_IF_CONFIG_SMALL("HuffYUV MT"),
+    .type             = AVMEDIA_TYPE_VIDEO,
+    .id               = AV_CODEC_ID_HYMT,
+    .priv_data_size   = sizeof(HYuvContext),
+    .init             = decode_init,
+    .close            = decode_end,
+    .decode           = decode_frame,
+    .capabilities     = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DRAW_HORIZ_BAND |
+                        AV_CODEC_CAP_FRAME_THREADS,
+    .init_thread_copy = ONLY_IF_THREADS_ENABLED(decode_init_thread_copy),
+};
+#endif /* CONFIG_HYMT_DECODER */
diff --git a/libavcodec/huffyuvenc.c b/libavcodec/huffyuvenc.c
index 8be752844b029..3662c173ec49b 100644
--- a/libavcodec/huffyuvenc.c
+++ b/libavcodec/huffyuvenc.c
@@ -1118,7 +1118,7 @@ AVCodec ff_ffvhuff_encoder = {
         AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV411P,
         AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV440P,
         AV_PIX_FMT_GBRP,
-        AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14,
+        AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
         AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY16,
         AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA444P,
         AV_PIX_FMT_GBRAP,
diff --git a/libavcodec/ilbcdec.c b/libavcodec/ilbcdec.c
index 8f234b98e1817..bba83a589630f 100644
--- a/libavcodec/ilbcdec.c
+++ b/libavcodec/ilbcdec.c
@@ -408,11 +408,11 @@ static void lsf2poly(int16_t *a, int16_t *lsf)
 
     a[0] = 4096;
     for (i = 5; i > 0; i--) {
-        tmp = f[0][6 - i] + (unsigned)f[1][6 - i];
-        a[6 - i] = (tmp + 4096) >> 13;
+        tmp = f[0][6 - i] + (unsigned)f[1][6 - i] + 4096;
+        a[6 - i] = tmp >> 13;
 
-        tmp = f[0][6 - i] - (unsigned)f[1][6 - i];
-        a[5 + i] = (tmp + 4096) >> 13;
+        tmp = f[0][6 - i] - (unsigned)f[1][6 - i] + 4096;
+        a[5 + i] = tmp >> 13;
     }
 }
 
@@ -745,7 +745,7 @@ static void construct_vector (
     for (j = 0; j < veclen; j++) {
         a32 = SPL_MUL_16_16(*gainPtr++, cbvec0[j]);
         a32 += SPL_MUL_16_16(*gainPtr++, cbvec1[j]);
-        a32 += SPL_MUL_16_16(*gainPtr, cbvec2[j]);
+        a32 += (unsigned)SPL_MUL_16_16(*gainPtr, cbvec2[j]);
         gainPtr -= 2;
         decvector[j] = (a32 + 8192) >> 14;
     }
@@ -1303,7 +1303,8 @@ static int xcorr_coeff(int16_t *target, int16_t *regressor,
         pos += step;
 
         /* Do a +/- to get the next energy */
-        energy += step * ((*rp_end * *rp_end - *rp_beg * *rp_beg) >> shifts);
+        energy += (unsigned)step * ((*rp_end * *rp_end - *rp_beg * *rp_beg) >> shifts);
+
         rp_beg += step;
         rp_end += step;
     }
@@ -1372,7 +1373,7 @@ static int ilbc_decode_frame(AVCodecContext *avctx, void *data,
 
     if (unpack_frame(s))
         mode = 0;
-    if (s->frame.start < 1)
+    if (s->frame.start < 1 || s->frame.start > 5)
         mode = 0;
 
     if (mode) {
diff --git a/libavcodec/imc.c b/libavcodec/imc.c
index 7cd6db930b3e2..82a908160a4ea 100644
--- a/libavcodec/imc.c
+++ b/libavcodec/imc.c
@@ -104,6 +104,8 @@ typedef struct IMCContext {
 
     int8_t cyclTab[32], cyclTab2[32];
     float  weights1[31], weights2[31];
+
+    AVCodecContext *avctx;
 } IMCContext;
 
 static VLC huffman_vlc[4][4];
@@ -466,7 +468,7 @@ static int bit_allocation(IMCContext *q, IMCChannel *chctx,
 
     for (i = 0; i < BANDS - 1; i++) {
         if (chctx->flcoeffs5[i] <= 0) {
-            av_log(NULL, AV_LOG_ERROR, "flcoeffs5 %f invalid\n", chctx->flcoeffs5[i]);
+            av_log(q->avctx, AV_LOG_ERROR, "flcoeffs5 %f invalid\n", chctx->flcoeffs5[i]);
             return AVERROR_INVALIDDATA;
         }
         chctx->flcoeffs4[i] = chctx->flcoeffs3[i] - log2f(chctx->flcoeffs5[i]);
@@ -1022,6 +1024,8 @@ static int imc_decode_frame(AVCodecContext *avctx, void *data,
 
     LOCAL_ALIGNED_16(uint16_t, buf16, [(IMC_BLOCK_SIZE + AV_INPUT_BUFFER_PADDING_SIZE) / 2]);
 
+    q->avctx = avctx;
+
     if (buf_size < IMC_BLOCK_SIZE * avctx->channels) {
         av_log(avctx, AV_LOG_ERROR, "frame too small!\n");
         return AVERROR_INVALIDDATA;
diff --git a/libavcodec/imm4.c b/libavcodec/imm4.c
index a4e9b5d4d2a4c..1a4d0decd7f94 100644
--- a/libavcodec/imm4.c
+++ b/libavcodec/imm4.c
@@ -41,7 +41,6 @@ typedef struct IMM4Context {
     uint8_t *bitstream;
     int bitstream_size;
 
-    int changed_size;
     int factor;
     unsigned lo;
     unsigned hi;
@@ -139,7 +138,7 @@ static int get_cbphi(GetBitContext *gb, int x)
 }
 
 static int decode_block(AVCodecContext *avctx, GetBitContext *gb,
-                        int block, int factor, int flag, int offset)
+                        int block, int factor, int flag, int offset, int flag2)
 {
     IMM4Context *s = avctx->priv_data;
     const uint8_t *scantable = s->intra_scantable.permutated;
@@ -170,11 +169,19 @@ static int decode_block(AVCodecContext *avctx, GetBitContext *gb,
             break;
     }
 
+    if (s->hi == 2 && flag2 && block < 4) {
+        if (flag)
+            s->block[block][scantable[0]]  *= 2;
+        s->block[block][scantable[1]]  *= 2;
+        s->block[block][scantable[8]]  *= 2;
+        s->block[block][scantable[16]] *= 2;
+    }
+
     return 0;
 }
 
 static int decode_blocks(AVCodecContext *avctx, GetBitContext *gb,
-                         unsigned cbp, int flag, int offset)
+                         unsigned cbp, int flag, int offset, unsigned flag2)
 {
     IMM4Context *s = avctx->priv_data;
     const uint8_t *scantable = s->intra_scantable.permutated;
@@ -194,7 +201,7 @@ static int decode_blocks(AVCodecContext *avctx, GetBitContext *gb,
         }
 
         if (cbp & (1 << (5 - i))) {
-            ret = decode_block(avctx, gb, i, s->factor, flag, offset);
+            ret = decode_block(avctx, gb, i, s->factor, flag, offset, flag2);
             if (ret < 0)
                 return ret;
         }
@@ -213,11 +220,7 @@ static int decode_intra(AVCodecContext *avctx, GetBitContext *gb, AVFrame *frame
             return AVERROR_INVALIDDATA;
         s->factor = intra_cb[s->lo];
     } else {
-        if (s->hi == 1) {
-            s->factor = s->lo * 2;
-        } else {
-            s->factor = s->lo * 2;
-        }
+        s->factor = s->lo * 2;
     }
 
     if (s->hi) {
@@ -229,14 +232,14 @@ static int decode_intra(AVCodecContext *avctx, GetBitContext *gb, AVFrame *frame
 
     for (y = 0; y < avctx->height; y += 16) {
         for (x = 0; x < avctx->width; x += 16) {
-            unsigned cbphi, cbplo;
+            unsigned flag, cbphi, cbplo;
 
             cbplo = get_vlc2(gb, cbplo_tab.table, cbplo_tab.bits, 1) >> 4;
-            skip_bits1(gb);
+            flag = get_bits1(gb);
 
             cbphi = get_cbphi(gb, 1);
 
-            ret = decode_blocks(avctx, gb, cbplo | (cbphi << 2), 0, offset);
+            ret = decode_blocks(avctx, gb, cbplo | (cbphi << 2), 0, offset, flag);
             if (ret < 0)
                 return ret;
 
@@ -269,11 +272,7 @@ static int decode_inter(AVCodecContext *avctx, GetBitContext *gb,
             return AVERROR_INVALIDDATA;
         s->factor = inter_cb[s->lo];
     } else {
-        if (s->hi == 1) {
-            s->factor = s->lo * 2;
-        } else {
-            s->factor = s->lo * 2;
-        }
+        s->factor = s->lo * 2;
     }
 
     if (s->hi) {
@@ -286,7 +285,7 @@ static int decode_inter(AVCodecContext *avctx, GetBitContext *gb,
     for (y = 0; y < avctx->height; y += 16) {
         for (x = 0; x < avctx->width; x += 16) {
             int reverse, intra_block, value;
-            unsigned cbphi, cbplo;
+            unsigned cbphi, cbplo, flag2 = 0;
 
             if (get_bits1(gb)) {
                 copy_block16(frame->data[0] + y * frame->linesize[0] + x,
@@ -308,12 +307,12 @@ static int decode_inter(AVCodecContext *avctx, GetBitContext *gb,
             intra_block = value & 0x07;
             reverse = intra_block == 3;
             if (reverse)
-                skip_bits1(gb);
+                flag2 = get_bits1(gb);
 
             cbplo = value >> 4;
             cbphi = get_cbphi(gb, reverse);
             if (intra_block) {
-                ret = decode_blocks(avctx, gb, cbplo | (cbphi << 2), 0, offset);
+                ret = decode_blocks(avctx, gb, cbplo | (cbphi << 2), 0, offset, flag2);
                 if (ret < 0)
                     return ret;
 
@@ -330,8 +329,9 @@ static int decode_inter(AVCodecContext *avctx, GetBitContext *gb,
                 s->idsp.idct_put(frame->data[2] + (y >> 1) * frame->linesize[2] + (x >> 1),
                                  frame->linesize[2], s->block[5]);
             } else {
-                skip_bits(gb, 2);
-                ret = decode_blocks(avctx, gb, cbplo | (cbphi << 2), 1, offset);
+                flag2 = get_bits1(gb);
+                skip_bits1(gb);
+                ret = decode_blocks(avctx, gb, cbplo | (cbphi << 2), 1, offset, flag2);
                 if (ret < 0)
                     return ret;
 
@@ -370,6 +370,7 @@ static int decode_frame(AVCodecContext *avctx, void *data,
     IMM4Context *s = avctx->priv_data;
     GetBitContext *gb = &s->gb;
     AVFrame *frame = data;
+    int width, height;
     unsigned type;
     int ret, scaled;
 
@@ -391,9 +392,11 @@ static int decode_frame(AVCodecContext *avctx, void *data,
     avctx->pix_fmt = AV_PIX_FMT_YUV420P;
     avctx->color_range = AVCOL_RANGE_JPEG;
 
+    width = avctx->width;
+    height = avctx->height;
+
     scaled = avpkt->data[8];
     if (scaled < 2) {
-        int width, height;
         int mode = avpkt->data[10];
 
         switch (mode) {
@@ -422,17 +425,8 @@ static int decode_frame(AVCodecContext *avctx, void *data,
             height = 576;
             break;
         }
-
-        if (s->changed_size == 1 &&
-            (avctx->width != width || avctx->height != height)) {
-            av_log(avctx, AV_LOG_ERROR, "Frame size change is unsupported.\n");
-            return AVERROR_INVALIDDATA;
-        }
-        avctx->width = width;
-        avctx->height = height;
     }
 
-    s->changed_size = 1;
     skip_bits_long(gb, 24 * 8);
     type = get_bits_long(gb, 32);
     s->hi = get_bits(gb, 16);
@@ -452,6 +446,19 @@ static int decode_frame(AVCodecContext *avctx, void *data,
         return AVERROR_PATCHWELCOME;
     }
 
+    if (avctx->width  != width ||
+        avctx->height != height) {
+        if (!frame->key_frame) {
+            av_log(avctx, AV_LOG_ERROR, "Frame size change is unsupported.\n");
+            return AVERROR_INVALIDDATA;
+        }
+        av_frame_unref(s->prev_frame);
+    }
+
+    ret = ff_set_dimensions(avctx, width, height);
+    if (ret < 0)
+        return ret;
+
     if ((ret = ff_get_buffer(avctx, frame, frame->key_frame ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
         return ret;
 
diff --git a/libavcodec/internal.h b/libavcodec/internal.h
index 0c2133f0925b3..f2e6f00ace644 100644
--- a/libavcodec/internal.h
+++ b/libavcodec/internal.h
@@ -404,6 +404,18 @@ int ff_alloc_a53_sei(const AVFrame *frame, size_t prefix_len,
  */
 int64_t ff_guess_coded_bitrate(AVCodecContext *avctx);
 
+/**
+ * Check if a value is in the list. If not, return the default value
+ *
+ * @param ctx                Context for the log msg
+ * @param val_name           Name of the checked value, for log msg
+ * @param array_valid_values Array of valid int, ended with INT_MAX
+ * @param default_value      Value return if checked value is not in the array
+ * @return                   Value or default_value.
+ */
+int ff_int_from_list_or_default(void *ctx, const char * val_name, int val,
+                                const int * array_valid_values, int default_value);
+
 #if defined(_WIN32) && CONFIG_SHARED && !defined(BUILDING_avcodec)
 #    define av_export_avcodec __declspec(dllimport)
 #else
diff --git a/libavcodec/ivi.c b/libavcodec/ivi.c
index b23d4af27e4fd..71bf0e6e1c889 100644
--- a/libavcodec/ivi.c
+++ b/libavcodec/ivi.c
@@ -437,6 +437,14 @@ av_cold int ff_ivi_init_tiles(IVIPlaneDesc *planes,
 
         for (b = 0; b < planes[p].num_bands; b++) {
             band = &planes[p].bands[b];
+
+            if (band->tiles) {
+                int t;
+                for (t = 0; t < band->num_tiles; t++) {
+                    av_freep(&band->tiles[t].mbs);
+                }
+            }
+
             x_tiles = IVI_NUM_TILES(band->width, t_width);
             y_tiles = IVI_NUM_TILES(band->height, t_height);
             band->num_tiles = x_tiles * y_tiles;
diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c
index 96dab8e1766c1..a4291bc06b830 100644
--- a/libavcodec/jpeg2000dec.c
+++ b/libavcodec/jpeg2000dec.c
@@ -1162,7 +1162,7 @@ static int jpeg2000_decode_packets_po_iteration(Jpeg2000DecoderContext *s, Jpeg2
             step_x = 32;
             step_y = 32;
 
-            if (RSpoc > FFMIN(codsty->nreslevels, REpoc))
+            if (RSpoc >= FFMIN(codsty->nreslevels, REpoc))
                 continue;
 
             for (reslevelno = RSpoc; reslevelno < FFMIN(codsty->nreslevels, REpoc); reslevelno++) {
@@ -1597,7 +1597,7 @@ static int decode_cblk(Jpeg2000DecoderContext *s, Jpeg2000CodingStyle *codsty,
                cblk->data + cblk->length - 2*(term_cnt < cblk->nb_terminations) - t1->mqc.bp);
     }
 
-    return 0;
+    return 1;
 }
 
 /* TODO: Verify dequantization for lossless case
@@ -1694,6 +1694,7 @@ static inline void tile_codeblocks(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile
     for (compno = 0; compno < s->ncomponents; compno++) {
         Jpeg2000Component *comp     = tile->comp + compno;
         Jpeg2000CodingStyle *codsty = tile->codsty + compno;
+        int coded = 0;
 
         t1.stride = (1<<codsty->log2_cblk_width) + 2;
 
@@ -1723,11 +1724,12 @@ static inline void tile_codeblocks(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile
                          cblkno++) {
                         int x, y;
                         Jpeg2000Cblk *cblk = prec->cblk + cblkno;
-                        decode_cblk(s, codsty, &t1, cblk,
+                        int ret = decode_cblk(s, codsty, &t1, cblk,
                                     cblk->coord[0][1] - cblk->coord[0][0],
                                     cblk->coord[1][1] - cblk->coord[1][0],
                                     bandpos);
-
+                        if (ret)
+                            coded = 1;
                         x = cblk->coord[0][0] - band->coord[0][0];
                         y = cblk->coord[1][0] - band->coord[1][0];
 
@@ -1743,7 +1745,9 @@ static inline void tile_codeblocks(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile
         } /* end reslevel */
 
         /* inverse DWT */
-        ff_dwt_decode(&comp->dwt, codsty->transform == FF_DWT97 ? (void*)comp->f_data : (void*)comp->i_data);
+        if (coded)
+            ff_dwt_decode(&comp->dwt, codsty->transform == FF_DWT97 ? (void*)comp->f_data : (void*)comp->i_data);
+
     } /*end comp */
 }
 
diff --git a/libavcodec/jpeg2000dwt.c b/libavcodec/jpeg2000dwt.c
index ce1678a3d74ed..badf0f8cd0586 100644
--- a/libavcodec/jpeg2000dwt.c
+++ b/libavcodec/jpeg2000dwt.c
@@ -531,7 +531,7 @@ static void dwt_decode97_int(DWTContext *s, int32_t *t)
     }
 
     for (i = 0; i < w * h; i++)
-        data[i] = (data[i] + ((1<<I_PRESHIFT)>>1)) >> I_PRESHIFT;
+        data[i] = (data[i] + ((1LL<<I_PRESHIFT)>>1)) >> I_PRESHIFT;
 }
 
 int ff_jpeg2000_dwt_init(DWTContext *s, int border[2][2],
diff --git a/libavcodec/jrevdct.c b/libavcodec/jrevdct.c
index 3b15a526770ca..a1a0f571ad057 100644
--- a/libavcodec/jrevdct.c
+++ b/libavcodec/jrevdct.c
@@ -63,6 +63,7 @@
  */
 
 #include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
 
 #include "dct.h"
 #include "idctdsp.h"
@@ -234,7 +235,7 @@ void ff_j_rev_dct(DCTBLOCK data)
      * row DCT calculations can be simplified this way.
      */
 
-    register int *idataptr = (int*)dataptr;
+    register uint8_t *idataptr = (uint8_t*)dataptr;
 
     /* WARNING: we do the same permutation as MMX idct to simplify the
        video core */
@@ -254,10 +255,10 @@ void ff_j_rev_dct(DCTBLOCK data)
           int16_t dcval = (int16_t) (d0 * (1 << PASS1_BITS));
           register int v = (dcval & 0xffff) | ((dcval * (1 << 16)) & 0xffff0000);
 
-          idataptr[0] = v;
-          idataptr[1] = v;
-          idataptr[2] = v;
-          idataptr[3] = v;
+          AV_WN32A(&idataptr[ 0], v);
+          AV_WN32A(&idataptr[ 4], v);
+          AV_WN32A(&idataptr[ 8], v);
+          AV_WN32A(&idataptr[12], v);
       }
 
       dataptr += DCTSIZE;       /* advance pointer to next row */
@@ -974,7 +975,7 @@ void ff_j_rev_dct4(DCTBLOCK data)
      * row DCT calculations can be simplified this way.
      */
 
-    register int *idataptr = (int*)dataptr;
+    register uint8_t *idataptr = (uint8_t*)dataptr;
 
     d0 = dataptr[0];
     d2 = dataptr[1];
@@ -988,8 +989,8 @@ void ff_j_rev_dct4(DCTBLOCK data)
           int16_t dcval = (int16_t) (d0 << PASS1_BITS);
           register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000);
 
-          idataptr[0] = v;
-          idataptr[1] = v;
+          AV_WN32A(&idataptr[0], v);
+          AV_WN32A(&idataptr[4], v);
       }
 
       dataptr += DCTSTRIDE;     /* advance pointer to next row */
diff --git a/libavcodec/jvdec.c b/libavcodec/jvdec.c
index cbe83d3c108d1..4337d5681ec94 100644
--- a/libavcodec/jvdec.c
+++ b/libavcodec/jvdec.c
@@ -170,6 +170,11 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
             GetBitContext gb;
             init_get_bits(&gb, buf, 8 * video_size);
 
+            if (avctx->height/8 * (avctx->width/8) > 4 * video_size) {
+                av_log(avctx, AV_LOG_ERROR, "Insufficient input data for dimensions\n");
+                return AVERROR_INVALIDDATA;
+            }
+
             for (j = 0; j < avctx->height; j += 8)
                 for (i = 0; i < avctx->width; i += 8)
                     decode8x8(&gb,
diff --git a/libavcodec/lagarith.c b/libavcodec/lagarith.c
index 5763504b713fb..59169be5dea52 100644
--- a/libavcodec/lagarith.c
+++ b/libavcodec/lagarith.c
@@ -669,9 +669,6 @@ static int lag_decode_frame(AVCodecContext *avctx,
 
         if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
             return ret;
-        if (buf_size <= offset_ry || buf_size <= offset_gu || buf_size <= offset_bv) {
-            return AVERROR_INVALIDDATA;
-        }
 
         if (offset_ry >= buf_size ||
             offset_gu >= buf_size ||
diff --git a/libavcodec/libaomenc.c b/libavcodec/libaomenc.c
index 045c519f7236e..faec61cacdf46 100644
--- a/libavcodec/libaomenc.c
+++ b/libavcodec/libaomenc.c
@@ -34,6 +34,7 @@
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
 
+#include "av1.h"
 #include "avcodec.h"
 #include "internal.h"
 #include "profiles.h"
@@ -70,10 +71,14 @@ typedef struct AOMEncoderContext {
     int crf;
     int static_thresh;
     int drop_threshold;
-    int noise_sensitivity;
     uint64_t sse[4];
     int have_sse; /**< true if we have pending sse[] */
     uint64_t frame_number;
+    int tile_cols, tile_rows;
+    int tile_cols_log2, tile_rows_log2;
+    aom_superblock_size_t superblock_size;
+    int uniform_tiles;
+    int row_mt;
 } AOMContext;
 
 static const char *const ctlidstr[] = {
@@ -85,6 +90,12 @@ static const char *const ctlidstr[] = {
     [AV1E_SET_COLOR_PRIMARIES]  = "AV1E_SET_COLOR_PRIMARIES",
     [AV1E_SET_MATRIX_COEFFICIENTS] = "AV1E_SET_MATRIX_COEFFICIENTS",
     [AV1E_SET_TRANSFER_CHARACTERISTICS] = "AV1E_SET_TRANSFER_CHARACTERISTICS",
+    [AV1E_SET_SUPERBLOCK_SIZE]  = "AV1E_SET_SUPERBLOCK_SIZE",
+    [AV1E_SET_TILE_COLUMNS]     = "AV1E_SET_TILE_COLUMNS",
+    [AV1E_SET_TILE_ROWS]        = "AV1E_SET_TILE_ROWS",
+#ifdef AOM_CTRL_AV1E_SET_ROW_MT
+    [AV1E_SET_ROW_MT]           = "AV1E_SET_ROW_MT",
+#endif
 };
 
 static av_cold void log_encoder_error(AVCodecContext *avctx, const char *desc)
@@ -149,6 +160,10 @@ static av_cold void dump_enc_cfg(AVCodecContext *avctx,
            width, "kf_mode:",     cfg->kf_mode,
            width, "kf_min_dist:", cfg->kf_min_dist,
            width, "kf_max_dist:", cfg->kf_max_dist);
+    av_log(avctx, level, "tile settings\n"
+                         "  %*s%d\n  %*s%d\n",
+           width, "tile_width_count:",  cfg->tile_width_count,
+           width, "tile_height_count:", cfg->tile_height_count);
     av_log(avctx, level, "\n");
 }
 
@@ -180,7 +195,12 @@ static av_cold void free_frame_list(struct FrameListData *list)
 }
 
 static av_cold int codecctl_int(AVCodecContext *avctx,
-                                enum aome_enc_control_id id, int val)
+#ifdef UENUM1BYTE
+                                aome_enc_control_id id,
+#else
+                                enum aome_enc_control_id id,
+#endif
+                                int val)
 {
     AOMContext *ctx = avctx->priv_data;
     char buf[80];
@@ -276,7 +296,7 @@ static int set_pix_fmt(AVCodecContext *avctx, aom_codec_caps_t codec_caps,
 
 static void set_color_range(AVCodecContext *avctx)
 {
-    enum aom_color_range aom_cr;
+    aom_color_range_t aom_cr;
     switch (avctx->color_range) {
     case AVCOL_RANGE_UNSPECIFIED:
     case AVCOL_RANGE_MPEG:       aom_cr = AOM_CR_STUDIO_RANGE; break;
@@ -290,6 +310,169 @@ static void set_color_range(AVCodecContext *avctx)
     codecctl_int(avctx, AV1E_SET_COLOR_RANGE, aom_cr);
 }
 
+static int count_uniform_tiling(int dim, int sb_size, int tiles_log2)
+{
+    int sb_dim   = (dim + sb_size - 1) / sb_size;
+    int tile_dim = (sb_dim + (1 << tiles_log2) - 1) >> tiles_log2;
+    av_assert0(tile_dim > 0);
+    return (sb_dim + tile_dim - 1) / tile_dim;
+}
+
+static int choose_tiling(AVCodecContext *avctx,
+                         struct aom_codec_enc_cfg *enccfg)
+{
+    AOMContext *ctx = avctx->priv_data;
+    int sb_128x128_possible, sb_size, sb_width, sb_height;
+    int uniform_rows, uniform_cols;
+    int uniform_64x64_possible, uniform_128x128_possible;
+    int tile_size, rounding, i;
+
+    if (ctx->tile_cols_log2 >= 0)
+        ctx->tile_cols = 1 << ctx->tile_cols_log2;
+    if (ctx->tile_rows_log2 >= 0)
+        ctx->tile_rows = 1 << ctx->tile_rows_log2;
+
+    if (ctx->tile_cols == 0) {
+        ctx->tile_cols = (avctx->width + AV1_MAX_TILE_WIDTH - 1) /
+            AV1_MAX_TILE_WIDTH;
+        if (ctx->tile_cols > 1) {
+            av_log(avctx, AV_LOG_DEBUG, "Automatically using %d tile "
+                   "columns to fill width.\n", ctx->tile_cols);
+        }
+    }
+    av_assert0(ctx->tile_cols > 0);
+    if (ctx->tile_rows == 0) {
+        int max_tile_width =
+            FFALIGN((FFALIGN(avctx->width, 128) +
+                     ctx->tile_cols - 1) / ctx->tile_cols, 128);
+        ctx->tile_rows =
+            (max_tile_width * FFALIGN(avctx->height, 128) +
+             AV1_MAX_TILE_AREA - 1) / AV1_MAX_TILE_AREA;
+        if (ctx->tile_rows > 1) {
+            av_log(avctx, AV_LOG_DEBUG, "Automatically using %d tile "
+                   "rows to fill area.\n", ctx->tile_rows);
+        }
+    }
+    av_assert0(ctx->tile_rows > 0);
+
+    if ((avctx->width  + 63) / 64 < ctx->tile_cols ||
+        (avctx->height + 63) / 64 < ctx->tile_rows) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid tile sizing: frame not "
+               "large enough to fit specified tile arrangement.\n");
+        return AVERROR(EINVAL);
+    }
+    if (ctx->tile_cols > AV1_MAX_TILE_COLS ||
+        ctx->tile_rows > AV1_MAX_TILE_ROWS) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid tile sizing: AV1 does "
+               "not allow more than %dx%d tiles.\n",
+               AV1_MAX_TILE_COLS, AV1_MAX_TILE_ROWS);
+        return AVERROR(EINVAL);
+    }
+    if (avctx->width / ctx->tile_cols > AV1_MAX_TILE_WIDTH) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid tile sizing: AV1 does "
+               "not allow tiles of width greater than %d.\n",
+               AV1_MAX_TILE_WIDTH);
+        return AVERROR(EINVAL);
+    }
+
+    ctx->superblock_size = AOM_SUPERBLOCK_SIZE_DYNAMIC;
+
+    if (ctx->tile_cols == 1 && ctx->tile_rows == 1) {
+        av_log(avctx, AV_LOG_DEBUG, "Using a single tile.\n");
+        return 0;
+    }
+
+    sb_128x128_possible =
+        (avctx->width  + 127) / 128 >= ctx->tile_cols &&
+        (avctx->height + 127) / 128 >= ctx->tile_rows;
+
+    ctx->tile_cols_log2 = ctx->tile_cols == 1 ? 0 :
+        av_log2(ctx->tile_cols - 1) + 1;
+    ctx->tile_rows_log2 = ctx->tile_rows == 1 ? 0 :
+        av_log2(ctx->tile_rows - 1) + 1;
+
+    uniform_cols = count_uniform_tiling(avctx->width,
+                                        64, ctx->tile_cols_log2);
+    uniform_rows = count_uniform_tiling(avctx->height,
+                                        64, ctx->tile_rows_log2);
+    av_log(avctx, AV_LOG_DEBUG, "Uniform with 64x64 superblocks "
+           "-> %dx%d tiles.\n", uniform_cols, uniform_rows);
+    uniform_64x64_possible = uniform_cols == ctx->tile_cols &&
+                             uniform_rows == ctx->tile_rows;
+
+    if (sb_128x128_possible) {
+        uniform_cols = count_uniform_tiling(avctx->width,
+                                            128, ctx->tile_cols_log2);
+        uniform_rows = count_uniform_tiling(avctx->height,
+                                            128, ctx->tile_rows_log2);
+        av_log(avctx, AV_LOG_DEBUG, "Uniform with 128x128 superblocks "
+               "-> %dx%d tiles.\n", uniform_cols, uniform_rows);
+        uniform_128x128_possible = uniform_cols == ctx->tile_cols &&
+                                   uniform_rows == ctx->tile_rows;
+    } else {
+        av_log(avctx, AV_LOG_DEBUG, "128x128 superblocks not possible.\n");
+        uniform_128x128_possible = 0;
+    }
+
+    ctx->uniform_tiles = 1;
+    if (uniform_64x64_possible && uniform_128x128_possible) {
+        av_log(avctx, AV_LOG_DEBUG, "Using uniform tiling with dynamic "
+               "superblocks (tile_cols_log2 = %d, tile_rows_log2 = %d).\n",
+               ctx->tile_cols_log2, ctx->tile_rows_log2);
+        return 0;
+    }
+    if (uniform_64x64_possible && !sb_128x128_possible) {
+        av_log(avctx, AV_LOG_DEBUG, "Using uniform tiling with 64x64 "
+               "superblocks (tile_cols_log2 = %d, tile_rows_log2 = %d).\n",
+               ctx->tile_cols_log2, ctx->tile_rows_log2);
+        ctx->superblock_size = AOM_SUPERBLOCK_SIZE_64X64;
+        return 0;
+    }
+    if (uniform_128x128_possible) {
+        av_log(avctx, AV_LOG_DEBUG, "Using uniform tiling with 128x128 "
+               "superblocks (tile_cols_log2 = %d, tile_rows_log2 = %d).\n",
+               ctx->tile_cols_log2, ctx->tile_rows_log2);
+        ctx->superblock_size = AOM_SUPERBLOCK_SIZE_128X128;
+        return 0;
+    }
+    ctx->uniform_tiles = 0;
+
+    if (sb_128x128_possible) {
+        sb_size = 128;
+        ctx->superblock_size = AOM_SUPERBLOCK_SIZE_128X128;
+    } else {
+        sb_size = 64;
+        ctx->superblock_size = AOM_SUPERBLOCK_SIZE_64X64;
+    }
+    av_log(avctx, AV_LOG_DEBUG, "Using fixed tiling with %dx%d "
+           "superblocks (tile_cols = %d, tile_rows = %d).\n",
+           sb_size, sb_size, ctx->tile_cols, ctx->tile_rows);
+
+    enccfg->tile_width_count  = ctx->tile_cols;
+    enccfg->tile_height_count = ctx->tile_rows;
+
+    sb_width  = (avctx->width  + sb_size - 1) / sb_size;
+    sb_height = (avctx->height + sb_size - 1) / sb_size;
+
+    tile_size = sb_width / ctx->tile_cols;
+    rounding  = sb_width % ctx->tile_cols;
+    for (i = 0; i < ctx->tile_cols; i++) {
+        enccfg->tile_widths[i] = tile_size +
+            (i < rounding / 2 ||
+             i > ctx->tile_cols - 1 - (rounding + 1) / 2);
+    }
+
+    tile_size = sb_height / ctx->tile_rows;
+    rounding  = sb_height % ctx->tile_rows;
+    for (i = 0; i < ctx->tile_rows; i++) {
+        enccfg->tile_heights[i] = tile_size +
+            (i < rounding / 2 ||
+             i > ctx->tile_rows - 1 - (rounding + 1) / 2);
+    }
+
+    return 0;
+}
+
 static av_cold int aom_init(AVCodecContext *avctx,
                             const struct aom_codec_iface *iface)
 {
@@ -330,7 +513,8 @@ static av_cold int aom_init(AVCodecContext *avctx,
     enccfg.g_h            = avctx->height;
     enccfg.g_timebase.num = avctx->time_base.num;
     enccfg.g_timebase.den = avctx->time_base.den;
-    enccfg.g_threads      = avctx->thread_count ? avctx->thread_count : av_cpu_count();
+    enccfg.g_threads      =
+        FFMIN(avctx->thread_count ? avctx->thread_count : av_cpu_count(), 64);
 
     if (ctx->lag_in_frames >= 0)
         enccfg.g_lag_in_frames = ctx->lag_in_frames;
@@ -442,6 +626,10 @@ static av_cold int aom_init(AVCodecContext *avctx,
 
     enccfg.g_error_resilient = ctx->error_resilient;
 
+    res = choose_tiling(avctx, &enccfg);
+    if (res < 0)
+        return res;
+
     dump_enc_cfg(avctx, &enccfg);
     /* Construct Encoder Context */
     res = aom_codec_enc_init(&ctx->encoder, iface, &enccfg, flags);
@@ -465,6 +653,16 @@ static av_cold int aom_init(AVCodecContext *avctx,
     codecctl_int(avctx, AV1E_SET_TRANSFER_CHARACTERISTICS, avctx->color_trc);
     set_color_range(avctx);
 
+    codecctl_int(avctx, AV1E_SET_SUPERBLOCK_SIZE, ctx->superblock_size);
+    if (ctx->uniform_tiles) {
+        codecctl_int(avctx, AV1E_SET_TILE_COLUMNS, ctx->tile_cols_log2);
+        codecctl_int(avctx, AV1E_SET_TILE_ROWS,    ctx->tile_rows_log2);
+    }
+
+#ifdef AOM_CTRL_AV1E_SET_ROW_MT
+    codecctl_int(avctx, AV1E_SET_ROW_MT, ctx->row_mt);
+#endif
+
     // provide dummy value to initialize wrapper, values will be updated each _encode()
     aom_img_wrap(&ctx->rawimg, img_fmt, avctx->width, avctx->height, 1,
                  (unsigned char*)1);
@@ -795,11 +993,15 @@ static const AVOption options[] = {
     { "crf",              "Select the quality for constant quality mode", offsetof(AOMContext, crf), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 63, VE },
     { "static-thresh",    "A change threshold on blocks below which they will be skipped by the encoder", OFFSET(static_thresh), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
     { "drop-threshold",   "Frame drop threshold", offsetof(AOMContext, drop_threshold), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, VE },
-    { "noise-sensitivity", "Noise sensitivity", OFFSET(noise_sensitivity), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 4, VE},
+    { "tiles",            "Tile columns x rows", OFFSET(tile_cols), AV_OPT_TYPE_IMAGE_SIZE, { .str = NULL }, 0, 0, VE },
+    { "tile-columns",     "Log2 of number of tile columns to use", OFFSET(tile_cols_log2), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 6, VE},
+    { "tile-rows",        "Log2 of number of tile rows to use",    OFFSET(tile_rows_log2), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 6, VE},
+    { "row-mt",           "Enable row based multi-threading",      OFFSET(row_mt),         AV_OPT_TYPE_BOOL, {.i64 = 0},  0, 1, VE},
     { NULL }
 };
 
 static const AVCodecDefault defaults[] = {
+    { "b",          "256*1000" },
     { "qmin",             "-1" },
     { "qmax",             "-1" },
     { "g",                "-1" },
diff --git a/libavcodec/libaribb24.c b/libavcodec/libaribb24.c
new file mode 100644
index 0000000000000..3a59938451fa0
--- /dev/null
+++ b/libavcodec/libaribb24.c
@@ -0,0 +1,395 @@
+/*
+ * ARIB STD-B24 caption decoder using the libaribb24 library
+ * Copyright (c) 2019 Jan Ekström
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "libavcodec/ass.h"
+#include "libavutil/log.h"
+#include "libavutil/opt.h"
+
+#include <aribb24/aribb24.h>
+#include <aribb24/parser.h>
+#include <aribb24/decoder.h>
+
+typedef struct Libaribb24Context {
+    AVClass *class;
+
+    arib_instance_t *lib_instance;
+    arib_parser_t *parser;
+    arib_decoder_t *decoder;
+
+    int read_order;
+
+    char        *aribb24_base_path;
+    unsigned int aribb24_skip_ruby;
+} Libaribb24Context;
+
+static unsigned int get_profile_font_size(int profile)
+{
+    switch (profile) {
+    case FF_PROFILE_ARIB_PROFILE_A:
+        return 36;
+    case FF_PROFILE_ARIB_PROFILE_C:
+        return 18;
+    default:
+        return 0;
+    }
+}
+
+static void libaribb24_log(void *p, const char *msg)
+{
+    av_log((AVCodecContext *)p, AV_LOG_INFO, "%s\n", msg);
+}
+
+static int libaribb24_generate_ass_header(AVCodecContext *avctx)
+{
+    unsigned int plane_width = 0;
+    unsigned int plane_height = 0;
+    unsigned int font_size = 0;
+
+    switch (avctx->profile) {
+    case FF_PROFILE_ARIB_PROFILE_A:
+        plane_width = 960;
+        plane_height = 540;
+        font_size = get_profile_font_size(avctx->profile);
+        break;
+    case FF_PROFILE_ARIB_PROFILE_C:
+        plane_width = 320;
+        plane_height = 180;
+        font_size = get_profile_font_size(avctx->profile);
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "Unknown or unsupported profile set!\n");
+        return AVERROR(EINVAL);
+    }
+
+    avctx->subtitle_header = av_asprintf(
+             "[Script Info]\r\n"
+             "; Script generated by FFmpeg/Lavc%s\r\n"
+             "ScriptType: v4.00+\r\n"
+             "PlayResX: %d\r\n"
+             "PlayResY: %d\r\n"
+             "\r\n"
+             "[V4+ Styles]\r\n"
+
+             /* ASSv4 header */
+             "Format: Name, "
+             "Fontname, Fontsize, "
+             "PrimaryColour, SecondaryColour, OutlineColour, BackColour, "
+             "Bold, Italic, Underline, StrikeOut, "
+             "ScaleX, ScaleY, "
+             "Spacing, Angle, "
+             "BorderStyle, Outline, Shadow, "
+             "Alignment, MarginL, MarginR, MarginV, "
+             "Encoding\r\n"
+
+             "Style: "
+             "Default,"             /* Name */
+             "%s,%d,"               /* Font{name,size} */
+             "&H%x,&H%x,&H%x,&H%x," /* {Primary,Secondary,Outline,Back}Colour */
+             "%d,%d,%d,0,"          /* Bold, Italic, Underline, StrikeOut */
+             "100,100,"             /* Scale{X,Y} */
+             "0,0,"                 /* Spacing, Angle */
+             "%d,1,0,"              /* BorderStyle, Outline, Shadow */
+             "%d,10,10,10,"         /* Alignment, Margin[LRV] */
+             "0\r\n"                /* Encoding */
+
+             "\r\n"
+             "[Events]\r\n"
+             "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\r\n",
+             !(avctx->flags & AV_CODEC_FLAG_BITEXACT) ? AV_STRINGIFY(LIBAVCODEC_VERSION) : "",
+             plane_width, plane_height,
+             ASS_DEFAULT_FONT, font_size, ASS_DEFAULT_COLOR,
+             ASS_DEFAULT_COLOR, ASS_DEFAULT_BACK_COLOR, ASS_DEFAULT_BACK_COLOR,
+             -ASS_DEFAULT_BOLD, -ASS_DEFAULT_ITALIC, -ASS_DEFAULT_UNDERLINE,
+             ASS_DEFAULT_BORDERSTYLE, ASS_DEFAULT_ALIGNMENT);
+
+    if (!avctx->subtitle_header)
+        return AVERROR(ENOMEM);
+
+    avctx->subtitle_header_size = strlen(avctx->subtitle_header);
+
+    return 0;
+}
+
+static int libaribb24_init(AVCodecContext *avctx)
+{
+    Libaribb24Context *b24 = avctx->priv_data;
+    void(* arib_dec_init)(arib_decoder_t* decoder) = NULL;
+    int ret_code = AVERROR_EXTERNAL;
+
+    if (!(b24->lib_instance = arib_instance_new(avctx))) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to initialize libaribb24!\n");
+        goto init_fail;
+    }
+
+    if (b24->aribb24_base_path) {
+        av_log(avctx, AV_LOG_INFO, "Setting the libaribb24 base path to '%s'\n",
+               b24->aribb24_base_path);
+        arib_set_base_path(b24->lib_instance, b24->aribb24_base_path);
+    }
+
+    arib_register_messages_callback(b24->lib_instance, libaribb24_log);
+
+    if (!(b24->parser = arib_get_parser(b24->lib_instance))) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to initialize libaribb24 PES parser!\n");
+        goto init_fail;
+    }
+    if (!(b24->decoder = arib_get_decoder(b24->lib_instance))) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to initialize libaribb24 decoder!\n");
+        goto init_fail;
+    }
+
+    switch (avctx->profile) {
+    case FF_PROFILE_ARIB_PROFILE_A:
+        arib_dec_init = arib_initialize_decoder_a_profile;
+        break;
+    case FF_PROFILE_ARIB_PROFILE_C:
+        arib_dec_init = arib_initialize_decoder_c_profile;
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "Unknown or unsupported profile set!\n");
+        ret_code = AVERROR(EINVAL);
+        goto init_fail;
+    }
+
+    arib_dec_init(b24->decoder);
+
+    if (libaribb24_generate_ass_header(avctx) < 0) {
+        ret_code = AVERROR(ENOMEM);
+        goto init_fail;
+    }
+
+    return 0;
+
+init_fail:
+    if (b24->decoder)
+        arib_finalize_decoder(b24->decoder);
+
+    if (b24->lib_instance)
+        arib_instance_destroy(b24->lib_instance);
+
+    return ret_code;
+}
+
+static int libaribb24_close(AVCodecContext *avctx)
+{
+    Libaribb24Context *b24 = avctx->priv_data;
+
+    if (b24->decoder)
+        arib_finalize_decoder(b24->decoder);
+
+    if (b24->lib_instance)
+        arib_instance_destroy(b24->lib_instance);
+
+    return 0;
+}
+
+#define RGB_TO_BGR(c) (((c) & 0xff) << 16 | ((c) & 0xff00) | (((c) >> 16) & 0xff))
+
+static int libaribb24_handle_regions(AVCodecContext *avctx, AVSubtitle *sub)
+{
+    Libaribb24Context *b24 = avctx->priv_data;
+    const arib_buf_region_t *region = arib_decoder_get_regions(b24->decoder);
+    unsigned int profile_font_size = get_profile_font_size(avctx->profile);
+    AVBPrint buf = { 0 };
+    int ret = 0;
+
+    av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
+
+    while (region) {
+        ptrdiff_t region_length = region->p_end - region->p_start;
+        unsigned int ruby_region =
+            region->i_fontheight == (profile_font_size / 2);
+
+        // ASS requires us to make the colors BGR, so we convert here
+        int foreground_bgr_color = RGB_TO_BGR(region->i_foreground_color);
+        int background_bgr_color = RGB_TO_BGR(region->i_background_color);
+
+        if (region_length < 0) {
+            av_log(avctx, AV_LOG_ERROR, "Invalid negative region length!\n");
+            ret = AVERROR_INVALIDDATA;
+            break;
+        }
+
+        if (region_length == 0 || (ruby_region && b24->aribb24_skip_ruby)) {
+            goto next_region;
+        }
+
+        // color and alpha
+        if (foreground_bgr_color != ASS_DEFAULT_COLOR)
+            av_bprintf(&buf, "{\\1c&H%06x&}", foreground_bgr_color);
+
+        if (region->i_foreground_alpha != 0)
+            av_bprintf(&buf, "{\\1a&H%02x&}", region->i_foreground_alpha);
+
+        if (background_bgr_color != ASS_DEFAULT_BACK_COLOR)
+            av_bprintf(&buf, "{\\3c&H%06x&}", background_bgr_color);
+
+        if (region->i_background_alpha != 0)
+            av_bprintf(&buf, "{\\3a&H%02x&}", region->i_background_alpha);
+
+        // font size
+        if (region->i_fontwidth  != profile_font_size ||
+            region->i_fontheight != profile_font_size) {
+            av_bprintf(&buf, "{\\fscx%"PRId64"\\fscy%"PRId64"}",
+                       av_rescale(region->i_fontwidth, 100,
+                                  profile_font_size),
+                       av_rescale(region->i_fontheight, 100,
+                                  profile_font_size));
+        }
+
+        // TODO: positioning
+
+        av_bprint_append_data(&buf, region->p_start, region_length);
+
+        av_bprintf(&buf, "{\\r}");
+
+next_region:
+        region = region->p_next;
+    }
+
+    if (!av_bprint_is_complete(&buf))
+        ret = AVERROR(ENOMEM);
+
+    if (ret == 0) {
+        av_log(avctx, AV_LOG_DEBUG, "Styled ASS line: %s\n",
+               buf.str);
+
+        ret = ff_ass_add_rect(sub, buf.str, b24->read_order++,
+                              0, NULL, NULL);
+    }
+
+    av_bprint_finalize(&buf, NULL);
+
+    return ret;
+}
+
+static int libaribb24_decode(AVCodecContext *avctx, void *data, int *got_sub_ptr, AVPacket *pkt)
+{
+    Libaribb24Context *b24 = avctx->priv_data;
+    AVSubtitle *sub = data;
+    size_t parsed_data_size = 0;
+    size_t decoded_subtitle_size = 0;
+    const unsigned char *parsed_data = NULL;
+    char *decoded_subtitle = NULL;
+    time_t subtitle_duration = 0;
+    int ret = 0;
+
+    if (pkt->size <= 0)
+        return pkt->size;
+
+    arib_parse_pes(b24->parser, pkt->data, pkt->size);
+
+    parsed_data = arib_parser_get_data(b24->parser,
+                                       &parsed_data_size);
+    if (!parsed_data || !parsed_data_size) {
+        av_log(avctx, AV_LOG_DEBUG, "No decode'able data was received from "
+                                    "packet (dts: %"PRId64", pts: %"PRId64").\n",
+               pkt->dts, pkt->pts);
+        return pkt->size;
+    }
+
+    decoded_subtitle_size = parsed_data_size * 4;
+    if (!(decoded_subtitle = av_mallocz(decoded_subtitle_size + 1))) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Failed to allocate buffer for decoded subtitle!\n");
+        return AVERROR(ENOMEM);
+    }
+
+    decoded_subtitle_size = arib_decode_buffer(b24->decoder,
+                                               parsed_data,
+                                               parsed_data_size,
+                                               decoded_subtitle,
+                                               decoded_subtitle_size);
+
+    subtitle_duration = arib_decoder_get_time(b24->decoder);
+
+    if (avctx->pkt_timebase.num && pkt->pts != AV_NOPTS_VALUE)
+        sub->pts = av_rescale_q(pkt->pts,
+                                avctx->pkt_timebase, AV_TIME_BASE_Q);
+
+    sub->end_display_time = subtitle_duration ?
+                            av_rescale_q(subtitle_duration,
+                                         AV_TIME_BASE_Q,
+                                         (AVRational){1, 1000}) :
+                            UINT32_MAX;
+
+    av_log(avctx, AV_LOG_DEBUG,
+           "Result: '%s' (size: %zu, pkt_pts: %"PRId64", sub_pts: %"PRId64" "
+           "duration: %"PRIu32", pkt_timebase: %d/%d, time_base: %d/%d')\n",
+           decoded_subtitle ? decoded_subtitle : "<no subtitle>",
+           decoded_subtitle_size,
+           pkt->pts, sub->pts,
+           sub->end_display_time,
+           avctx->pkt_timebase.num, avctx->pkt_timebase.den,
+           avctx->time_base.num, avctx->time_base.den);
+
+    if (decoded_subtitle)
+        ret = libaribb24_handle_regions(avctx, sub);
+
+    *got_sub_ptr = sub->num_rects > 0;
+
+    av_free(decoded_subtitle);
+
+    // flush the region buffers, otherwise the linked list keeps getting
+    // longer and longer...
+    arib_finalize_decoder(b24->decoder);
+
+    return ret < 0 ? ret : pkt->size;
+}
+
+static void libaribb24_flush(AVCodecContext *avctx)
+{
+    Libaribb24Context *b24 = avctx->priv_data;
+    if (!(avctx->flags2 & AV_CODEC_FLAG2_RO_FLUSH_NOOP))
+        b24->read_order = 0;
+}
+
+#define OFFSET(x) offsetof(Libaribb24Context, x)
+#define SD AV_OPT_FLAG_SUBTITLE_PARAM | AV_OPT_FLAG_DECODING_PARAM
+static const AVOption options[] = {
+    { "aribb24-base-path", "set the base path for the libaribb24 library",
+      OFFSET(aribb24_base_path), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, SD },
+    { "aribb24-skip-ruby-text", "skip ruby text blocks during decoding",
+      OFFSET(aribb24_skip_ruby), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, SD },
+    { NULL }
+};
+
+static const AVClass aribb24_class = {
+    .class_name = "libaribb24 decoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_libaribb24_decoder = {
+    .name      = "libaribb24",
+    .long_name = NULL_IF_CONFIG_SMALL("libaribb24 ARIB STD-B24 caption decoder"),
+    .type      = AVMEDIA_TYPE_SUBTITLE,
+    .id        = AV_CODEC_ID_ARIB_CAPTION,
+    .priv_data_size = sizeof(Libaribb24Context),
+    .init      = libaribb24_init,
+    .close     = libaribb24_close,
+    .decode    = libaribb24_decode,
+    .flush     = libaribb24_flush,
+    .priv_class= &aribb24_class,
+    .wrapper_name = "libaribb24",
+};
diff --git a/libavcodec/libdav1d.c b/libavcodec/libdav1d.c
new file mode 100644
index 0000000000000..8c8584f4e804e
--- /dev/null
+++ b/libavcodec/libdav1d.c
@@ -0,0 +1,356 @@
+/*
+ * Copyright (c) 2018 Ronald S. Bultje <rsbultje gmail com>
+ * Copyright (c) 2018 James Almer <jamrial gmail com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <dav1d/dav1d.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/mastering_display_metadata.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
+
+#include "avcodec.h"
+#include "decode.h"
+#include "internal.h"
+
+typedef struct Libdav1dContext {
+    AVClass *class;
+    Dav1dContext *c;
+    AVBufferPool *pool;
+    int pool_size;
+
+    Dav1dData data;
+    int tile_threads;
+    int apply_grain;
+} Libdav1dContext;
+
+static const enum AVPixelFormat pix_fmt[][3] = {
+    [DAV1D_PIXEL_LAYOUT_I400] = { AV_PIX_FMT_GRAY8,   AV_PIX_FMT_GRAY10,    AV_PIX_FMT_GRAY12 },
+    [DAV1D_PIXEL_LAYOUT_I420] = { AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV420P12 },
+    [DAV1D_PIXEL_LAYOUT_I422] = { AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV422P12 },
+    [DAV1D_PIXEL_LAYOUT_I444] = { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV444P12 },
+};
+
+static void libdav1d_log_callback(void *opaque, const char *fmt, va_list vl)
+{
+    AVCodecContext *c = opaque;
+
+    av_vlog(c, AV_LOG_ERROR, fmt, vl);
+}
+
+static int libdav1d_picture_allocator(Dav1dPicture *p, void *cookie)
+{
+    Libdav1dContext *dav1d = cookie;
+    enum AVPixelFormat format = pix_fmt[p->p.layout][p->seq_hdr->hbd];
+    int ret, linesize[4], h = FFALIGN(p->p.h, 128);
+    uint8_t *aligned_ptr, *data[4];
+    AVBufferRef *buf;
+
+    ret = av_image_fill_arrays(data, linesize, NULL, format, FFALIGN(p->p.w, 128),
+                               h, DAV1D_PICTURE_ALIGNMENT);
+    if (ret < 0)
+        return ret;
+
+    if (ret != dav1d->pool_size) {
+        av_buffer_pool_uninit(&dav1d->pool);
+        // Use twice the amount of required padding bytes for aligned_ptr below.
+        dav1d->pool = av_buffer_pool_init(ret + DAV1D_PICTURE_ALIGNMENT * 2, NULL);
+        if (!dav1d->pool)
+            return AVERROR(ENOMEM);
+        dav1d->pool_size = ret;
+    }
+    buf = av_buffer_pool_get(dav1d->pool);
+    if (!buf)
+        return AVERROR(ENOMEM);
+
+    // libdav1d requires DAV1D_PICTURE_ALIGNMENT aligned buffers, which av_malloc()
+    // doesn't guarantee for example when AVX is disabled at configure time.
+    // Use the extra DAV1D_PICTURE_ALIGNMENT padding bytes in the buffer to align it
+    // if required.
+    aligned_ptr = (uint8_t *)FFALIGN((uintptr_t)buf->data, DAV1D_PICTURE_ALIGNMENT);
+    ret = av_image_fill_pointers(data, format, h, aligned_ptr, linesize);
+    if (ret < 0) {
+        av_buffer_unref(&buf);
+        return ret;
+    }
+
+    p->data[0] = data[0];
+    p->data[1] = data[1];
+    p->data[2] = data[2];
+    p->stride[0] = linesize[0];
+    p->stride[1] = linesize[1];
+    p->allocator_data = buf;
+
+    return 0;
+}
+
+static void libdav1d_picture_release(Dav1dPicture *p, void *cookie)
+{
+    AVBufferRef *buf = p->allocator_data;
+
+    av_buffer_unref(&buf);
+}
+
+static av_cold int libdav1d_init(AVCodecContext *c)
+{
+    Libdav1dContext *dav1d = c->priv_data;
+    Dav1dSettings s;
+    int res;
+
+    av_log(c, AV_LOG_INFO, "libdav1d %s\n", dav1d_version());
+
+    dav1d_default_settings(&s);
+    s.logger.cookie = c;
+    s.logger.callback = libdav1d_log_callback;
+    s.allocator.cookie = dav1d;
+    s.allocator.alloc_picture_callback = libdav1d_picture_allocator;
+    s.allocator.release_picture_callback = libdav1d_picture_release;
+    s.n_tile_threads = dav1d->tile_threads;
+    s.apply_grain = dav1d->apply_grain;
+    s.n_frame_threads = FFMIN(c->thread_count ? c->thread_count : av_cpu_count(), DAV1D_MAX_FRAME_THREADS);
+
+    res = dav1d_open(&dav1d->c, &s);
+    if (res < 0)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static void libdav1d_flush(AVCodecContext *c)
+{
+    Libdav1dContext *dav1d = c->priv_data;
+
+    dav1d_data_unref(&dav1d->data);
+    dav1d_flush(dav1d->c);
+}
+
+static void libdav1d_data_free(const uint8_t *data, void *opaque) {
+    AVBufferRef *buf = opaque;
+
+    av_buffer_unref(&buf);
+}
+
+static void libdav1d_frame_free(void *opaque, uint8_t *data) {
+    Dav1dPicture *p = opaque;
+
+    dav1d_picture_unref(p);
+    av_free(p);
+}
+
+static int libdav1d_receive_frame(AVCodecContext *c, AVFrame *frame)
+{
+    Libdav1dContext *dav1d = c->priv_data;
+    Dav1dData *data = &dav1d->data;
+    Dav1dPicture *p;
+    int res;
+
+    if (!data->sz) {
+        AVPacket pkt = { 0 };
+
+        res = ff_decode_get_packet(c, &pkt);
+        if (res < 0 && res != AVERROR_EOF)
+            return res;
+
+        if (pkt.size) {
+            res = dav1d_data_wrap(data, pkt.data, pkt.size, libdav1d_data_free, pkt.buf);
+            if (res < 0) {
+                av_packet_unref(&pkt);
+                return res;
+            }
+
+            data->m.timestamp = pkt.pts;
+            data->m.offset = pkt.pos;
+            data->m.duration = pkt.duration;
+
+            pkt.buf = NULL;
+            av_packet_unref(&pkt);
+        }
+    }
+
+    res = dav1d_send_data(dav1d->c, data);
+    if (res < 0) {
+        if (res == AVERROR(EINVAL))
+            res = AVERROR_INVALIDDATA;
+        if (res != AVERROR(EAGAIN))
+            return res;
+    }
+
+    p = av_mallocz(sizeof(*p));
+    if (!p)
+        return AVERROR(ENOMEM);
+
+    res = dav1d_get_picture(dav1d->c, p);
+    if (res < 0) {
+        if (res == AVERROR(EINVAL))
+            res = AVERROR_INVALIDDATA;
+        else if (res == AVERROR(EAGAIN) && c->internal->draining)
+            res = AVERROR_EOF;
+
+        av_free(p);
+        return res;
+    }
+
+    av_assert0(p->data[0] != NULL);
+
+    frame->buf[0] = av_buffer_create(NULL, 0, libdav1d_frame_free,
+                                     p, AV_BUFFER_FLAG_READONLY);
+    if (!frame->buf[0]) {
+        dav1d_picture_unref(p);
+        av_free(p);
+        return AVERROR(ENOMEM);
+    }
+
+    frame->data[0] = p->data[0];
+    frame->data[1] = p->data[1];
+    frame->data[2] = p->data[2];
+    frame->linesize[0] = p->stride[0];
+    frame->linesize[1] = p->stride[1];
+    frame->linesize[2] = p->stride[1];
+
+    c->profile = p->seq_hdr->profile;
+    frame->format = c->pix_fmt = pix_fmt[p->p.layout][p->seq_hdr->hbd];
+    frame->width = p->p.w;
+    frame->height = p->p.h;
+    if (c->width != p->p.w || c->height != p->p.h) {
+        res = ff_set_dimensions(c, p->p.w, p->p.h);
+        if (res < 0)
+            goto fail;
+    }
+
+    switch (p->seq_hdr->chr) {
+    case DAV1D_CHR_VERTICAL:
+        frame->chroma_location = c->chroma_sample_location = AVCHROMA_LOC_LEFT;
+        break;
+    case DAV1D_CHR_COLOCATED:
+        frame->chroma_location = c->chroma_sample_location = AVCHROMA_LOC_TOPLEFT;
+        break;
+    }
+    frame->colorspace = c->colorspace = (enum AVColorSpace) p->seq_hdr->mtrx;
+    frame->color_primaries = c->color_primaries = (enum AVColorPrimaries) p->seq_hdr->pri;
+    frame->color_trc = c->color_trc = (enum AVColorTransferCharacteristic) p->seq_hdr->trc;
+    frame->color_range = c->color_range = p->seq_hdr->color_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
+
+    // match timestamps and packet size
+    frame->pts = frame->best_effort_timestamp = p->m.timestamp;
+#if FF_API_PKT_PTS
+FF_DISABLE_DEPRECATION_WARNINGS
+    frame->pkt_pts = p->m.timestamp;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+    frame->pkt_dts = p->m.timestamp;
+    frame->pkt_pos = p->m.offset;
+    frame->pkt_size = p->m.size;
+    frame->pkt_duration = p->m.duration;
+    frame->key_frame = p->frame_hdr->frame_type == DAV1D_FRAME_TYPE_KEY;
+
+    switch (p->frame_hdr->frame_type) {
+    case DAV1D_FRAME_TYPE_KEY:
+    case DAV1D_FRAME_TYPE_INTRA:
+        frame->pict_type = AV_PICTURE_TYPE_I;
+        break;
+    case DAV1D_FRAME_TYPE_INTER:
+        frame->pict_type = AV_PICTURE_TYPE_P;
+        break;
+    case DAV1D_FRAME_TYPE_SWITCH:
+        frame->pict_type = AV_PICTURE_TYPE_SP;
+        break;
+    default:
+        res = AVERROR_INVALIDDATA;
+        goto fail;
+    }
+
+    if (p->mastering_display) {
+        AVMasteringDisplayMetadata *mastering = av_mastering_display_metadata_create_side_data(frame);
+        if (!mastering) {
+            res = AVERROR(ENOMEM);
+            goto fail;
+        }
+
+        for (int i = 0; i < 3; i++) {
+            mastering->display_primaries[i][0] = av_make_q(p->mastering_display->primaries[i][0], 1 << 16);
+            mastering->display_primaries[i][1] = av_make_q(p->mastering_display->primaries[i][1], 1 << 16);
+        }
+        mastering->white_point[0] = av_make_q(p->mastering_display->white_point[0], 1 << 16);
+        mastering->white_point[1] = av_make_q(p->mastering_display->white_point[1], 1 << 16);
+
+        mastering->max_luminance = av_make_q(p->mastering_display->max_luminance, 1 << 8);
+        mastering->min_luminance = av_make_q(p->mastering_display->min_luminance, 1 << 14);
+
+        mastering->has_primaries = 1;
+        mastering->has_luminance = 1;
+    }
+    if (p->content_light) {
+        AVContentLightMetadata *light = av_content_light_metadata_create_side_data(frame);
+        if (!light) {
+            res = AVERROR(ENOMEM);
+            goto fail;
+        }
+        light->MaxCLL = p->content_light->max_content_light_level;
+        light->MaxFALL = p->content_light->max_frame_average_light_level;
+    }
+
+    res = 0;
+fail:
+    if (res < 0)
+        av_frame_unref(frame);
+    return res;
+}
+
+static av_cold int libdav1d_close(AVCodecContext *c)
+{
+    Libdav1dContext *dav1d = c->priv_data;
+
+    av_buffer_pool_uninit(&dav1d->pool);
+    dav1d_data_unref(&dav1d->data);
+    dav1d_close(&dav1d->c);
+
+    return 0;
+}
+
+#define OFFSET(x) offsetof(Libdav1dContext, x)
+#define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
+static const AVOption libdav1d_options[] = {
+    { "tilethreads", "Tile threads", OFFSET(tile_threads), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, DAV1D_MAX_TILE_THREADS, VD },
+    { "filmgrain", "Apply Film Grain", OFFSET(apply_grain), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, VD },
+    { NULL }
+};
+
+static const AVClass libdav1d_class = {
+    .class_name = "libdav1d decoder",
+    .item_name  = av_default_item_name,
+    .option     = libdav1d_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_libdav1d_decoder = {
+    .name           = "libdav1d",
+    .long_name      = NULL_IF_CONFIG_SMALL("dav1d AV1 decoder by VideoLAN"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_AV1,
+    .priv_data_size = sizeof(Libdav1dContext),
+    .init           = libdav1d_init,
+    .close          = libdav1d_close,
+    .flush          = libdav1d_flush,
+    .receive_frame  = libdav1d_receive_frame,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AUTO_THREADS,
+    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_SETS_PKT_DTS,
+    .priv_class     = &libdav1d_class,
+    .wrapper_name   = "libdav1d",
+};
diff --git a/libavcodec/libdavs2.c b/libavcodec/libdavs2.c
index aa1478290abfd..080872195a0cc 100644
--- a/libavcodec/libdavs2.c
+++ b/libavcodec/libdavs2.c
@@ -32,8 +32,6 @@ typedef struct DAVS2Context {
     davs2_param_t    param;      // decoding parameters
     davs2_packet_t   packet;     // input bitstream
 
-    int decoded_frames;
-
     davs2_picture_t  out_frame;  // output data, frame data
     davs2_seq_info_t headerset;  // output data, sequence header
 
@@ -42,10 +40,13 @@ typedef struct DAVS2Context {
 static av_cold int davs2_init(AVCodecContext *avctx)
 {
     DAVS2Context *cad = avctx->priv_data;
+    int cpu_flags = av_get_cpu_flags();
 
     /* init the decoder */
     cad->param.threads      = avctx->thread_count;
     cad->param.info_level   = 0;
+    cad->param.disable_avx  = !(cpu_flags & AV_CPU_FLAG_AVX &&
+                                cpu_flags & AV_CPU_FLAG_AVX2);
     cad->decoder            = davs2_decoder_open(&cad->param);
 
     if (!cad->decoder) {
@@ -57,7 +58,7 @@ static av_cold int davs2_init(AVCodecContext *avctx)
     return 0;
 }
 
-static int davs2_dump_frames(AVCodecContext *avctx, davs2_picture_t *pic,
+static int davs2_dump_frames(AVCodecContext *avctx, davs2_picture_t *pic, int *got_frame,
                              davs2_seq_info_t *headerset, int ret_type, AVFrame *frame)
 {
     DAVS2Context *cad    = avctx->priv_data;
@@ -65,8 +66,10 @@ static int davs2_dump_frames(AVCodecContext *avctx, davs2_picture_t *pic,
     int plane = 0;
     int line  = 0;
 
-    if (!headerset)
+    if (!headerset) {
+        *got_frame = 0;
         return 0;
+    }
 
     if (!pic || ret_type == DAVS2_GOT_HEADER) {
         avctx->width     = headerset->width;
@@ -75,9 +78,30 @@ static int davs2_dump_frames(AVCodecContext *avctx, davs2_picture_t *pic,
                            AV_PIX_FMT_YUV420P10 : AV_PIX_FMT_YUV420P;
 
         avctx->framerate = av_d2q(headerset->frame_rate,4096);
+        *got_frame = 0;
         return 0;
     }
 
+    switch (pic->type) {
+    case DAVS2_PIC_I:
+    case DAVS2_PIC_G:
+        frame->pict_type = AV_PICTURE_TYPE_I;
+        break;
+    case DAVS2_PIC_P:
+    case DAVS2_PIC_S:
+        frame->pict_type = AV_PICTURE_TYPE_P;
+        break;
+    case DAVS2_PIC_B:
+        frame->pict_type = AV_PICTURE_TYPE_B;
+        break;
+    case DAVS2_PIC_F:
+        frame->pict_type = AV_PICTURE_TYPE_S;
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "Decoder error: unknown frame type\n");
+        return AVERROR_EXTERNAL;
+    }
+
     for (plane = 0; plane < 3; ++plane) {
         int size_line = pic->widths[plane] * bytes_per_sample;
         frame->buf[plane]  = av_buffer_alloc(size_line * pic->lines[plane]);
@@ -88,7 +112,7 @@ static int davs2_dump_frames(AVCodecContext *avctx, davs2_picture_t *pic,
         }
 
         frame->data[plane]     = frame->buf[plane]->data;
-        frame->linesize[plane] = pic->widths[plane];
+        frame->linesize[plane] = size_line;
 
         for (line = 0; line < pic->lines[plane]; ++line)
             memcpy(frame->data[plane] + line * size_line,
@@ -99,11 +123,27 @@ static int davs2_dump_frames(AVCodecContext *avctx, davs2_picture_t *pic,
     frame->width     = cad->headerset.width;
     frame->height    = cad->headerset.height;
     frame->pts       = cad->out_frame.pts;
-    frame->pict_type = pic->type;
     frame->format    = avctx->pix_fmt;
 
-    cad->decoded_frames++;
-    return 1;
+    *got_frame = 1;
+    return 0;
+}
+
+static int send_delayed_frame(AVCodecContext *avctx, AVFrame *frame, int *got_frame)
+{
+    DAVS2Context *cad      = avctx->priv_data;
+    int           ret      = DAVS2_DEFAULT;
+
+    ret = davs2_decoder_flush(cad->decoder, &cad->headerset, &cad->out_frame);
+    if (ret == DAVS2_ERROR) {
+        av_log(avctx, AV_LOG_ERROR, "Decoder error: can't flush delayed frame\n");
+        return AVERROR_EXTERNAL;
+    }
+    if (ret == DAVS2_GOT_FRAME) {
+        ret = davs2_dump_frames(avctx, &cad->out_frame, got_frame, &cad->headerset, ret, frame);
+        davs2_decoder_frame_unref(cad->decoder, &cad->out_frame);
+    }
+    return ret;
 }
 
 static av_cold int davs2_end(AVCodecContext *avctx)
@@ -128,8 +168,9 @@ static int davs2_decode_frame(AVCodecContext *avctx, void *data,
     AVFrame      *frame    = data;
     int           ret      = DAVS2_DEFAULT;
 
+    /* end of stream, output what is still in the buffers */
     if (!buf_size) {
-        return 0;
+        return send_delayed_frame(avctx, frame, got_frame);
     }
 
     cad->packet.data = buf_ptr;
@@ -148,11 +189,11 @@ static int davs2_decode_frame(AVCodecContext *avctx, void *data,
     ret = davs2_decoder_recv_frame(cad->decoder, &cad->headerset, &cad->out_frame);
 
     if (ret != DAVS2_DEFAULT) {
-        *got_frame = davs2_dump_frames(avctx, &cad->out_frame, &cad->headerset, ret, frame);
+        ret = davs2_dump_frames(avctx, &cad->out_frame, got_frame, &cad->headerset, ret, frame);
         davs2_decoder_frame_unref(cad->decoder, &cad->out_frame);
     }
 
-    return buf_size;
+    return ret == 0 ? buf_size : ret;
 }
 
 AVCodec ff_libdavs2_decoder = {
@@ -164,8 +205,8 @@ AVCodec ff_libdavs2_decoder = {
     .init           = davs2_init,
     .close          = davs2_end,
     .decode         = davs2_decode_frame,
-    .capabilities   =  AV_CODEC_CAP_DELAY,//AV_CODEC_CAP_DR1 |
-    .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV420P10,
+    .capabilities   =  AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AUTO_THREADS,
+    .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUV420P,
                                                      AV_PIX_FMT_NONE },
     .wrapper_name   = "libdavs2",
 };
diff --git a/libavcodec/libgsmenc.c b/libavcodec/libgsmenc.c
index c9e7ba056e1e4..fdb11c705eecc 100644
--- a/libavcodec/libgsmenc.c
+++ b/libavcodec/libgsmenc.c
@@ -114,6 +114,10 @@ static int libgsm_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     return 0;
 }
 
+static const AVCodecDefault libgsm_defaults[] = {
+    { "b",                "13000" },
+    { NULL },
+};
 
 #if CONFIG_LIBGSM_ENCODER
 AVCodec ff_libgsm_encoder = {
@@ -124,6 +128,8 @@ AVCodec ff_libgsm_encoder = {
     .init           = libgsm_encode_init,
     .encode2        = libgsm_encode_frame,
     .close          = libgsm_encode_close,
+    .defaults       = libgsm_defaults,
+    .channel_layouts= (const uint64_t[]) { AV_CH_LAYOUT_MONO, 0 },
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
                                                      AV_SAMPLE_FMT_NONE },
     .wrapper_name   = "libgsm",
@@ -138,6 +144,8 @@ AVCodec ff_libgsm_ms_encoder = {
     .init           = libgsm_encode_init,
     .encode2        = libgsm_encode_frame,
     .close          = libgsm_encode_close,
+    .defaults       = libgsm_defaults,
+    .channel_layouts= (const uint64_t[]) { AV_CH_LAYOUT_MONO, 0 },
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
                                                      AV_SAMPLE_FMT_NONE },
     .wrapper_name   = "libgsm",
diff --git a/libavcodec/libkvazaar.c b/libavcodec/libkvazaar.c
index 5bc5b4ebf16b6..a89ca7f74951c 100644
--- a/libavcodec/libkvazaar.c
+++ b/libavcodec/libkvazaar.c
@@ -79,13 +79,23 @@ static av_cold int libkvazaar_init(AVCodecContext *avctx)
     cfg->width  = avctx->width;
     cfg->height = avctx->height;
 
-    if (avctx->ticks_per_frame > INT_MAX / avctx->time_base.num) {
-        av_log(avctx, AV_LOG_ERROR,
-               "Could not set framerate for kvazaar: integer overflow\n");
-        return AVERROR(EINVAL);
+    if (avctx->framerate.num > 0 && avctx->framerate.den > 0) {
+        if (avctx->ticks_per_frame > INT_MAX / avctx->framerate.den) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Could not set framerate for kvazaar: integer overflow\n");
+            return AVERROR(EINVAL);
+        }
+        cfg->framerate_num   = avctx->framerate.num;
+        cfg->framerate_denom = avctx->time_base.den * avctx->ticks_per_frame;
+    } else {
+        if (avctx->ticks_per_frame > INT_MAX / avctx->time_base.num) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Could not set framerate for kvazaar: integer overflow\n");
+            return AVERROR(EINVAL);
+        }
+        cfg->framerate_num   = avctx->time_base.den;
+        cfg->framerate_denom = avctx->time_base.num * avctx->ticks_per_frame;
     }
-    cfg->framerate_num   = avctx->time_base.den;
-    cfg->framerate_denom = avctx->time_base.num * avctx->ticks_per_frame;
     cfg->target_bitrate = avctx->bit_rate;
     cfg->vui.sar_width  = avctx->sample_aspect_ratio.num;
     cfg->vui.sar_height = avctx->sample_aspect_ratio.den;
@@ -143,8 +153,8 @@ static av_cold int libkvazaar_close(AVCodecContext *avctx)
     LibkvazaarContext *ctx = avctx->priv_data;
 
     if (ctx->api) {
-      ctx->api->encoder_close(ctx->encoder);
-      ctx->api->config_destroy(ctx->config);
+        ctx->api->encoder_close(ctx->encoder);
+        ctx->api->config_destroy(ctx->config);
     }
 
     if (avctx->extradata)
@@ -170,7 +180,7 @@ static int libkvazaar_encode(AVCodecContext *avctx,
 
     if (frame) {
         if (frame->width != ctx->config->width ||
-                frame->height != ctx->config->height) {
+            frame->height != ctx->config->height) {
             av_log(avctx, AV_LOG_ERROR,
                    "Changing video dimensions during encoding is not supported. "
                    "(changed from %dx%d to %dx%d)\n",
@@ -223,8 +233,7 @@ static int libkvazaar_encode(AVCodecContext *avctx,
         av_log(avctx, AV_LOG_ERROR, "Failed to encode frame.\n");
         retval = AVERROR_INVALIDDATA;
         goto done;
-    }
-    else
+    } else
         retval = 0; /* kvazaar returns 1 on success */
 
     if (data_out) {
@@ -249,7 +258,7 @@ static int libkvazaar_encode(AVCodecContext *avctx,
         // IRAP VCL NAL unit types span the range
         // [BLA_W_LP (16), RSV_IRAP_VCL23 (23)].
         if (frame_info.nal_unit_type >= KVZ_NAL_BLA_W_LP &&
-                frame_info.nal_unit_type <= KVZ_NAL_RSV_IRAP_VCL23) {
+            frame_info.nal_unit_type <= KVZ_NAL_RSV_IRAP_VCL23) {
             avpkt->flags |= AV_PKT_FLAG_KEY;
         }
 
@@ -293,7 +302,7 @@ AVCodec ff_libkvazaar_encoder = {
     .long_name        = NULL_IF_CONFIG_SMALL("libkvazaar H.265 / HEVC"),
     .type             = AVMEDIA_TYPE_VIDEO,
     .id               = AV_CODEC_ID_HEVC,
-    .capabilities     = AV_CODEC_CAP_DELAY,
+    .capabilities     = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AUTO_THREADS,
     .pix_fmts         = pix_fmts,
 
     .priv_class       = &class,
diff --git a/libavcodec/libopenh264dec.c b/libavcodec/libopenh264dec.c
index f9b91ce3c05a6..c7aa7fa19ccec 100644
--- a/libavcodec/libopenh264dec.c
+++ b/libavcodec/libopenh264dec.c
@@ -109,10 +109,18 @@ static int svc_decode_frame(AVCodecContext *avctx, void *data,
 #endif
     } else {
         info.uiInBsTimeStamp = avpkt->pts;
+#if OPENH264_VER_AT_LEAST(1, 4)
+        // Contrary to the name, DecodeFrameNoDelay actually does buffering
+        // and reordering of frames, and is the recommended decoding entry
+        // point since 1.4. This is essential for successfully decoding
+        // B-frames.
+        state = (*s->decoder)->DecodeFrameNoDelay(s->decoder, avpkt->data, avpkt->size, ptrs, &info);
+#else
         state = (*s->decoder)->DecodeFrame2(s->decoder, avpkt->data, avpkt->size, ptrs, &info);
+#endif
     }
     if (state != dsErrorFree) {
-        av_log(avctx, AV_LOG_ERROR, "DecodeFrame2 failed\n");
+        av_log(avctx, AV_LOG_ERROR, "DecodeFrame failed\n");
         return AVERROR_UNKNOWN;
     }
     if (info.iBufferStatus != 1) {
diff --git a/libavcodec/libopenh264enc.c b/libavcodec/libopenh264enc.c
index 83c3f0ce208cb..ae6d17c6d2c19 100644
--- a/libavcodec/libopenh264enc.c
+++ b/libavcodec/libopenh264enc.c
@@ -75,7 +75,7 @@ static const AVOption options[] = {
 };
 
 static const AVClass class = {
-    .class_name = "libvo_amrwbenc",
+    .class_name = "libopenh264enc",
     .item_name  = av_default_item_name,
     .option     = options,
     .version    = LIBAVUTIL_VERSION_INT,
@@ -164,6 +164,47 @@ FF_ENABLE_DEPRECATION_WARNINGS
     param.sSpatialLayers[0].iSpatialBitrate     = param.iTargetBitrate;
     param.sSpatialLayers[0].iMaxSpatialBitrate  = param.iMaxBitrate;
 
+#if OPENH264_VER_AT_LEAST(1, 7)
+    if (avctx->sample_aspect_ratio.num && avctx->sample_aspect_ratio.den) {
+        // Table E-1.
+        static const AVRational sar_idc[] = {
+            {   0,  0 }, // Unspecified (never written here).
+            {   1,  1 }, {  12, 11 }, {  10, 11 }, {  16, 11 },
+            {  40, 33 }, {  24, 11 }, {  20, 11 }, {  32, 11 },
+            {  80, 33 }, {  18, 11 }, {  15, 11 }, {  64, 33 },
+            { 160, 99 }, // Last 3 are unknown to openh264: {   4,  3 }, {   3,  2 }, {   2,  1 },
+        };
+        static const ESampleAspectRatio asp_idc[] = {
+            ASP_UNSPECIFIED,
+            ASP_1x1,      ASP_12x11,   ASP_10x11,   ASP_16x11,
+            ASP_40x33,    ASP_24x11,   ASP_20x11,   ASP_32x11,
+            ASP_80x33,    ASP_18x11,   ASP_15x11,   ASP_64x33,
+            ASP_160x99,
+        };
+        int num, den, i;
+
+        av_reduce(&num, &den, avctx->sample_aspect_ratio.num,
+                  avctx->sample_aspect_ratio.den, 65535);
+
+        for (i = 1; i < FF_ARRAY_ELEMS(sar_idc); i++) {
+            if (num == sar_idc[i].num &&
+                den == sar_idc[i].den)
+                break;
+        }
+        if (i == FF_ARRAY_ELEMS(sar_idc)) {
+            param.sSpatialLayers[0].eAspectRatio = ASP_EXT_SAR;
+            param.sSpatialLayers[0].sAspectRatioExtWidth = num;
+            param.sSpatialLayers[0].sAspectRatioExtHeight = den;
+        } else {
+            param.sSpatialLayers[0].eAspectRatio = asp_idc[i];
+        }
+        param.sSpatialLayers[0].bAspectRatioPresent = true;
+    }
+    else {
+        param.sSpatialLayers[0].bAspectRatioPresent = false;
+    }
+#endif
+
     if ((avctx->slices > 1) && (s->max_nal_size)) {
         av_log(avctx, AV_LOG_ERROR,
                "Invalid combination -slices %d and -max_nal_size %d.\n",
diff --git a/libavcodec/libopusdec.c b/libavcodec/libopusdec.c
index 2a97811d187cf..1724a49906f64 100644
--- a/libavcodec/libopusdec.c
+++ b/libavcodec/libopusdec.c
@@ -63,6 +63,8 @@ static av_cold int libopus_decode_init(AVCodecContext *avc)
     avc->sample_rate    = 48000;
     avc->sample_fmt     = avc->request_sample_fmt == AV_SAMPLE_FMT_FLT ?
                           AV_SAMPLE_FMT_FLT : AV_SAMPLE_FMT_S16;
+    avc->channel_layout = avc->channels > 8 ? 0 :
+                          ff_vorbis_channel_layouts[avc->channels - 1];
 
     if (avc->extradata_size >= OPUS_HEAD_SIZE) {
         opus->pre_skip = AV_RL16(avc->extradata + 10);
@@ -86,35 +88,14 @@ static av_cold int libopus_decode_init(AVCodecContext *avc)
         mapping    = mapping_arr;
     }
 
-    if (channel_map == 1) {
-        avc->channel_layout = avc->channels > 8 ? 0 :
-                              ff_vorbis_channel_layouts[avc->channels - 1];
-        if (avc->channels > 2 && avc->channels <= 8) {
-            const uint8_t *vorbis_offset = ff_vorbis_channel_layout_offsets[avc->channels - 1];
-            int ch;
-
-            /* Remap channels from Vorbis order to ffmpeg order */
-            for (ch = 0; ch < avc->channels; ch++)
-                mapping_arr[ch] = mapping[vorbis_offset[ch]];
-            mapping = mapping_arr;
-        }
-    } else if (channel_map == 2) {
-        int ambisonic_order = ff_sqrt(avc->channels) - 1;
-        if (avc->channels != (ambisonic_order + 1) * (ambisonic_order + 1) &&
-            avc->channels != (ambisonic_order + 1) * (ambisonic_order + 1) + 2) {
-            av_log(avc, AV_LOG_ERROR,
-                   "Channel mapping 2 is only specified for channel counts"
-                   " which can be written as (n + 1)^2 or (n + 2)^2 + 2"
-                   " for nonnegative integer n\n");
-            return AVERROR_INVALIDDATA;
-        }
-        if (avc->channels > 227) {
-            av_log(avc, AV_LOG_ERROR, "Too many channels\n");
-            return AVERROR_INVALIDDATA;
-        }
-        avc->channel_layout = 0;
-    } else {
-        avc->channel_layout = 0;
+    if (avc->channels > 2 && avc->channels <= 8) {
+        const uint8_t *vorbis_offset = ff_vorbis_channel_layout_offsets[avc->channels - 1];
+        int ch;
+
+        /* Remap channels from Vorbis order to ffmpeg order */
+        for (ch = 0; ch < avc->channels; ch++)
+            mapping_arr[ch] = mapping[vorbis_offset[ch]];
+        mapping = mapping_arr;
     }
 
     opus->dec = opus_multistream_decoder_create(avc->sample_rate, avc->channels,
diff --git a/libavcodec/libvpxdec.c b/libavcodec/libvpxdec.c
index 04f27d3396107..164dbda49b08d 100644
--- a/libavcodec/libvpxdec.c
+++ b/libavcodec/libvpxdec.c
@@ -47,8 +47,7 @@ static av_cold int vpx_init(AVCodecContext *avctx,
 {
     VPxContext *ctx = avctx->priv_data;
     struct vpx_codec_dec_cfg deccfg = {
-        /* token partitions+1 would be a decent choice */
-        .threads = FFMIN(avctx->thread_count, 16)
+        .threads = FFMIN(avctx->thread_count ? avctx->thread_count : av_cpu_count(), 16)
     };
 
     av_log(avctx, AV_LOG_INFO, "%s\n", vpx_codec_version_str());
diff --git a/libavcodec/libvpxenc.c b/libavcodec/libvpxenc.c
index 09f7a88452873..c823b8ad8bf75 100644
--- a/libavcodec/libvpxenc.c
+++ b/libavcodec/libvpxenc.c
@@ -33,6 +33,7 @@
 #include "libavutil/avassert.h"
 #include "libvpx.h"
 #include "profiles.h"
+#include "libavutil/avstring.h"
 #include "libavutil/base64.h"
 #include "libavutil/common.h"
 #include "libavutil/internal.h"
@@ -75,6 +76,7 @@ typedef struct VPxEncoderContext {
     struct FrameListData *coded_frame_list;
 
     int cpu_used;
+    int sharpness;
     /**
      * VP8 specific flags, see VP8F_* below.
      */
@@ -98,6 +100,8 @@ typedef struct VPxEncoderContext {
     int rc_undershoot_pct;
     int rc_overshoot_pct;
 
+    char *vp8_ts_parameters;
+
     // VP9-only
     int lossless;
     int tile_columns;
@@ -111,6 +115,7 @@ typedef struct VPxEncoderContext {
     int row_mt;
     int tune_content;
     int corpus_complexity;
+    int tpl_model;
 } VPxContext;
 
 /** String mappings for enum vp8e_enc_control_id */
@@ -126,6 +131,7 @@ static const char *const ctlidstr[] = {
     [VP8E_SET_TUNING]            = "VP8E_SET_TUNING",
     [VP8E_SET_CQ_LEVEL]          = "VP8E_SET_CQ_LEVEL",
     [VP8E_SET_MAX_INTRA_BITRATE_PCT] = "VP8E_SET_MAX_INTRA_BITRATE_PCT",
+    [VP8E_SET_SHARPNESS]               = "VP8E_SET_SHARPNESS",
 #if CONFIG_LIBVPX_VP9_ENCODER
     [VP9E_SET_LOSSLESS]                = "VP9E_SET_LOSSLESS",
     [VP9E_SET_TILE_COLUMNS]            = "VP9E_SET_TILE_COLUMNS",
@@ -146,6 +152,9 @@ static const char *const ctlidstr[] = {
 #ifdef VPX_CTRL_VP9E_SET_TUNE_CONTENT
     [VP9E_SET_TUNE_CONTENT]            = "VP9E_SET_TUNE_CONTENT",
 #endif
+#ifdef VPX_CTRL_VP9E_SET_TPL
+    [VP9E_SET_TPL]                     = "VP9E_SET_TPL",
+#endif
 #endif
 };
 
@@ -165,6 +174,7 @@ static av_cold void dump_enc_cfg(AVCodecContext *avctx,
 {
     int width = -30;
     int level = AV_LOG_DEBUG;
+    int i;
 
     av_log(avctx, level, "vpx_codec_enc_cfg\n");
     av_log(avctx, level, "generic settings\n"
@@ -204,6 +214,25 @@ static av_cold void dump_enc_cfg(AVCodecContext *avctx,
            "  %*s%u\n  %*s%u\n",
            width, "rc_undershoot_pct:", cfg->rc_undershoot_pct,
            width, "rc_overshoot_pct:",  cfg->rc_overshoot_pct);
+    av_log(avctx, level, "temporal layering settings\n"
+           "  %*s%u\n", width, "ts_number_layers:", cfg->ts_number_layers);
+    av_log(avctx, level,
+           "\n  %*s", width, "ts_target_bitrate:");
+    for (i = 0; i < VPX_TS_MAX_LAYERS; i++)
+        av_log(avctx, level, "%u ", cfg->ts_target_bitrate[i]);
+    av_log(avctx, level, "\n");
+    av_log(avctx, level,
+           "\n  %*s", width, "ts_rate_decimator:");
+    for (i = 0; i < VPX_TS_MAX_LAYERS; i++)
+        av_log(avctx, level, "%u ", cfg->ts_rate_decimator[i]);
+    av_log(avctx, level, "\n");
+    av_log(avctx, level,
+           "\n  %*s%u\n", width, "ts_periodicity:", cfg->ts_periodicity);
+    av_log(avctx, level,
+           "\n  %*s", width, "ts_layer_id:");
+    for (i = 0; i < VPX_TS_MAX_PERIODICITY; i++)
+        av_log(avctx, level, "%u ", cfg->ts_layer_id[i]);
+    av_log(avctx, level, "\n");
     av_log(avctx, level, "decoder buffer model\n"
             "  %*s%u\n  %*s%u\n  %*s%u\n",
             width, "rc_buf_sz:",         cfg->rc_buf_sz,
@@ -321,6 +350,39 @@ static av_cold int vpx_free(AVCodecContext *avctx)
     return 0;
 }
 
+static void vp8_ts_parse_int_array(int *dest, char *value, size_t value_len, int max_entries)
+{
+    int dest_idx = 0;
+    char *saveptr = NULL;
+    char *token = av_strtok(value, ",", &saveptr);
+
+    while (token && dest_idx < max_entries) {
+        dest[dest_idx++] = strtoul(token, NULL, 10);
+        token = av_strtok(NULL, ",", &saveptr);
+    }
+}
+
+static int vp8_ts_param_parse(struct vpx_codec_enc_cfg *enccfg, char *key, char *value)
+{
+    size_t value_len = strlen(value);
+
+    if (!value_len)
+        return -1;
+
+    if (!strcmp(key, "ts_number_layers"))
+        enccfg->ts_number_layers = strtoul(value, &value, 10);
+    else if (!strcmp(key, "ts_target_bitrate"))
+        vp8_ts_parse_int_array(enccfg->ts_target_bitrate, value, value_len, VPX_TS_MAX_LAYERS);
+    else if (!strcmp(key, "ts_rate_decimator"))
+      vp8_ts_parse_int_array(enccfg->ts_rate_decimator, value, value_len, VPX_TS_MAX_LAYERS);
+    else if (!strcmp(key, "ts_periodicity"))
+        enccfg->ts_periodicity = strtoul(value, &value, 10);
+    else if (!strcmp(key, "ts_layer_id"))
+        vp8_ts_parse_int_array(enccfg->ts_layer_id, value, value_len, VPX_TS_MAX_PERIODICITY);
+
+    return 0;
+}
+
 #if CONFIG_LIBVPX_VP9_ENCODER
 static int set_pix_fmt(AVCodecContext *avctx, vpx_codec_caps_t codec_caps,
                        struct vpx_codec_enc_cfg *enccfg, vpx_codec_flags_t *flags,
@@ -493,7 +555,8 @@ static av_cold int vpx_init(AVCodecContext *avctx,
     enccfg.g_h            = avctx->height;
     enccfg.g_timebase.num = avctx->time_base.num;
     enccfg.g_timebase.den = avctx->time_base.den;
-    enccfg.g_threads      = avctx->thread_count ? avctx->thread_count : av_cpu_count();
+    enccfg.g_threads      =
+        FFMIN(avctx->thread_count ? avctx->thread_count : av_cpu_count(), 16);
     enccfg.g_lag_in_frames= ctx->lag_in_frames;
 
     if (avctx->flags & AV_CODEC_FLAG_PASS1)
@@ -635,6 +698,22 @@ FF_ENABLE_DEPRECATION_WARNINGS
 
     enccfg.g_error_resilient = ctx->error_resilient || ctx->flags & VP8F_ERROR_RESILIENT;
 
+    if (CONFIG_LIBVPX_VP8_ENCODER && avctx->codec_id == AV_CODEC_ID_VP8 && ctx->vp8_ts_parameters) {
+        AVDictionary *dict    = NULL;
+        AVDictionaryEntry* en = NULL;
+
+        if (!av_dict_parse_string(&dict, ctx->vp8_ts_parameters, "=", ":", 0)) {
+            while ((en = av_dict_get(dict, "", en, AV_DICT_IGNORE_SUFFIX))) {
+                if (vp8_ts_param_parse(&enccfg, en->key, en->value) < 0)
+                    av_log(avctx, AV_LOG_WARNING,
+                           "Error parsing option '%s = %s'.\n",
+                           en->key, en->value);
+            }
+
+            av_dict_free(&dict);
+        }
+    }
+
     dump_enc_cfg(avctx, &enccfg);
     /* Construct Encoder Context */
     res = vpx_codec_enc_init(&ctx->encoder, iface, &enccfg, flags);
@@ -674,6 +753,9 @@ FF_ENABLE_DEPRECATION_WARNINGS
         return AVERROR(EINVAL);
     }
 
+    if (ctx->sharpness >= 0)
+        codecctl_int(avctx, VP8E_SET_SHARPNESS, ctx->sharpness);
+
     if (CONFIG_LIBVPX_VP8_ENCODER && avctx->codec_id == AV_CODEC_ID_VP8) {
 #if FF_API_PRIVATE_OPT
 FF_DISABLE_DEPRECATION_WARNINGS
@@ -716,6 +798,10 @@ FF_ENABLE_DEPRECATION_WARNINGS
 #ifdef VPX_CTRL_VP9E_SET_TUNE_CONTENT
         if (ctx->tune_content >= 0)
             codecctl_int(avctx, VP9E_SET_TUNE_CONTENT, ctx->tune_content);
+#endif
+#ifdef VPX_CTRL_VP9E_SET_TPL
+        if (ctx->tpl_model >= 0)
+            codecctl_int(avctx, VP9E_SET_TPL, ctx->tpl_model);
 #endif
     }
 #endif
@@ -1021,6 +1107,12 @@ static int vpx_encode(AVCodecContext *avctx, AVPacket *pkt,
 #endif
         if (frame->pict_type == AV_PICTURE_TYPE_I)
             flags |= VPX_EFLAG_FORCE_KF;
+        if (CONFIG_LIBVPX_VP8_ENCODER && avctx->codec_id == AV_CODEC_ID_VP8 && frame->metadata) {
+            AVDictionaryEntry* en = av_dict_get(frame->metadata, "vp8-flags", NULL, 0);
+            if (en) {
+                flags |= strtoul(en->value, NULL, 10);
+            }
+        }
     }
 
     res = vpx_codec_encode(&ctx->encoder, rawimg, timestamp,
@@ -1067,8 +1159,6 @@ static int vpx_encode(AVCodecContext *avctx, AVPacket *pkt,
 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 
 #define COMMON_OPTIONS \
-    { "auto-alt-ref",    "Enable use of alternate reference " \
-                         "frames (2-pass only)",                   OFFSET(auto_alt_ref),    AV_OPT_TYPE_INT, {.i64 = -1},      -1,      2,       VE}, \
     { "lag-in-frames",   "Number of frames to look ahead for " \
                          "alternate reference frame selection",    OFFSET(lag_in_frames),   AV_OPT_TYPE_INT, {.i64 = -1},      -1,      INT_MAX, VE}, \
     { "arnr-maxframes",  "altref noise reduction max frame count", OFFSET(arnr_max_frames), AV_OPT_TYPE_INT, {.i64 = -1},      -1,      INT_MAX, VE}, \
@@ -1108,11 +1198,16 @@ static int vpx_encode(AVCodecContext *avctx, AVPacket *pkt,
     {"arnr_strength", "altref noise reduction filter strength", offsetof(VPxContext, arnr_strength), AV_OPT_TYPE_INT, {.i64 = 3}, 0, 6, VE}, \
     {"arnr_type", "altref noise reduction filter type", offsetof(VPxContext, arnr_type), AV_OPT_TYPE_INT, {.i64 = 3}, 1, 3, VE}, \
     {"rc_lookahead", "Number of frames to look ahead for alternate reference frame selection", offsetof(VPxContext, lag_in_frames), AV_OPT_TYPE_INT, {.i64 = 25}, 0, 25, VE}, \
+    {"sharpness", "Increase sharpness at the expense of lower PSNR", offsetof(VPxContext, sharpness), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 7, VE},
 
 #if CONFIG_LIBVPX_VP8_ENCODER
 static const AVOption vp8_options[] = {
     COMMON_OPTIONS
+    { "auto-alt-ref",    "Enable use of alternate reference "
+                         "frames (2-pass only)",                        OFFSET(auto_alt_ref),    AV_OPT_TYPE_INT, {.i64 = -1}, -1,  2, VE},
     { "cpu-used",        "Quality/Speed ratio modifier",                OFFSET(cpu_used),        AV_OPT_TYPE_INT, {.i64 = 1}, -16, 16, VE},
+    { "ts-parameters",   "Temporal scaling configuration using a "
+                         ":-separated list of key=value parameters",    OFFSET(vp8_ts_parameters), AV_OPT_TYPE_STRING, {.str=NULL},  0,  0, VE},
     LEGACY_OPTIONS
     { NULL }
 };
@@ -1121,6 +1216,8 @@ static const AVOption vp8_options[] = {
 #if CONFIG_LIBVPX_VP9_ENCODER
 static const AVOption vp9_options[] = {
     COMMON_OPTIONS
+    { "auto-alt-ref",    "Enable use of alternate reference "
+                         "frames (2-pass only)",                        OFFSET(auto_alt_ref),    AV_OPT_TYPE_INT, {.i64 = -1}, -1, 6, VE},
     { "cpu-used",        "Quality/Speed ratio modifier",                OFFSET(cpu_used),        AV_OPT_TYPE_INT, {.i64 = 1},  -8, 8, VE},
     { "lossless",        "Lossless mode",                               OFFSET(lossless),        AV_OPT_TYPE_INT, {.i64 = -1}, -1, 1, VE},
     { "tile-columns",    "Number of tile columns to use, log2",         OFFSET(tile_columns),    AV_OPT_TYPE_INT, {.i64 = -1}, -1, 6, VE},
@@ -1156,6 +1253,9 @@ static const AVOption vp9_options[] = {
 #endif
 #if VPX_ENCODER_ABI_VERSION >= 14
     { "corpus-complexity", "corpus vbr complexity midpoint", OFFSET(corpus_complexity), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 10000, VE },
+#endif
+#ifdef VPX_CTRL_VP9E_SET_TPL
+    { "enable-tpl",      "Enable temporal dependency model", OFFSET(tpl_model), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE },
 #endif
     LEGACY_OPTIONS
     { NULL }
diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
index 54e6703d739b6..a3493f393da2a 100644
--- a/libavcodec/libx264.c
+++ b/libavcodec/libx264.c
@@ -40,6 +40,10 @@
 #include <stdlib.h>
 #include <string.h>
 
+// from x264.h, for quant_offsets, Macroblocks are 16x16
+// blocks of pixels (with respect to the luma plane)
+#define MB_SIZE 16
+
 typedef struct X264Context {
     AVClass        *class;
     x264_param_t    params;
@@ -92,6 +96,9 @@ typedef struct X264Context {
     int noise_reduction;
 
     char *x264_params;
+
+    int nb_reordered_opaque, next_reordered_opaque;
+    int64_t *reordered_opaque;
 } X264Context;
 
 static void X264_log(void *p, int level, const char *fmt, va_list args)
@@ -278,6 +285,8 @@ static int X264_frame(AVCodecContext *ctx, AVPacket *pkt, const AVFrame *frame,
     int nnal, i, ret;
     x264_picture_t pic_out = {0};
     int pict_type;
+    int64_t *out_opaque;
+    AVFrameSideData *sd;
 
     x264_picture_init( &x4->pic );
     x4->pic.img.i_csp   = x4->params.i_csp;
@@ -297,6 +306,11 @@ static int X264_frame(AVCodecContext *ctx, AVPacket *pkt, const AVFrame *frame,
 
         x4->pic.i_pts  = frame->pts;
 
+        x4->reordered_opaque[x4->next_reordered_opaque] = frame->reordered_opaque;
+        x4->pic.opaque = &x4->reordered_opaque[x4->next_reordered_opaque];
+        x4->next_reordered_opaque++;
+        x4->next_reordered_opaque %= x4->nb_reordered_opaque;
+
         switch (frame->pict_type) {
         case AV_PICTURE_TYPE_I:
             x4->pic.i_type = x4->forced_idr > 0 ? X264_TYPE_IDR
@@ -336,6 +350,63 @@ static int X264_frame(AVCodecContext *ctx, AVPacket *pkt, const AVFrame *frame,
                 }
             }
         }
+
+        sd = av_frame_get_side_data(frame, AV_FRAME_DATA_REGIONS_OF_INTEREST);
+        if (sd) {
+            if (x4->params.rc.i_aq_mode == X264_AQ_NONE) {
+                av_log(ctx, AV_LOG_WARNING, "Adaptive quantization must be enabled to use ROI encoding, skipping ROI.\n");
+            } else {
+                if (frame->interlaced_frame == 0) {
+                    int mbx = (frame->width + MB_SIZE - 1) / MB_SIZE;
+                    int mby = (frame->height + MB_SIZE - 1) / MB_SIZE;
+                    int nb_rois;
+                    AVRegionOfInterest* roi;
+                    float* qoffsets;
+                    qoffsets = av_mallocz_array(mbx * mby, sizeof(*qoffsets));
+                    if (!qoffsets)
+                        return AVERROR(ENOMEM);
+
+                    nb_rois = sd->size / sizeof(AVRegionOfInterest);
+                    roi = (AVRegionOfInterest*)sd->data;
+                    for (int count = 0; count < nb_rois; count++) {
+                        int starty = FFMIN(mby, roi->top / MB_SIZE);
+                        int endy   = FFMIN(mby, (roi->bottom + MB_SIZE - 1)/ MB_SIZE);
+                        int startx = FFMIN(mbx, roi->left / MB_SIZE);
+                        int endx   = FFMIN(mbx, (roi->right + MB_SIZE - 1)/ MB_SIZE);
+                        float qoffset;
+
+                        if (roi->qoffset.den == 0) {
+                            av_free(qoffsets);
+                            av_log(ctx, AV_LOG_ERROR, "AVRegionOfInterest.qoffset.den should not be zero.\n");
+                            return AVERROR(EINVAL);
+                        }
+                        qoffset = roi->qoffset.num * 1.0f / roi->qoffset.den;
+                        qoffset = av_clipf(qoffset, -1.0f, 1.0f);
+
+                        // 25 is a number that I think it is a possible proper scale value.
+                        qoffset = qoffset * 25;
+
+                        for (int y = starty; y < endy; y++) {
+                            for (int x = startx; x < endx; x++) {
+                                qoffsets[x + y*mbx] = qoffset;
+                            }
+                        }
+
+                        if (roi->self_size == 0) {
+                            av_free(qoffsets);
+                            av_log(ctx, AV_LOG_ERROR, "AVRegionOfInterest.self_size should be set to sizeof(AVRegionOfInterest).\n");
+                            return AVERROR(EINVAL);
+                        }
+                        roi = (AVRegionOfInterest*)((char*)roi + roi->self_size);
+                    }
+
+                    x4->pic.prop.quant_offsets = qoffsets;
+                    x4->pic.prop.quant_offsets_free = av_free;
+                } else {
+                    av_log(ctx, AV_LOG_WARNING, "interlaced_frame not supported for ROI encoding yet, skipping ROI.\n");
+                }
+            }
+        }
     }
 
     do {
@@ -350,6 +421,14 @@ static int X264_frame(AVCodecContext *ctx, AVPacket *pkt, const AVFrame *frame,
     pkt->pts = pic_out.i_pts;
     pkt->dts = pic_out.i_dts;
 
+    out_opaque = pic_out.opaque;
+    if (out_opaque >= x4->reordered_opaque &&
+        out_opaque < &x4->reordered_opaque[x4->nb_reordered_opaque]) {
+        ctx->reordered_opaque = *out_opaque;
+    } else {
+        // Unexpected opaque pointer on picture output
+        ctx->reordered_opaque = 0;
+    }
 
     switch (pic_out.i_type) {
     case X264_TYPE_IDR:
@@ -393,6 +472,7 @@ static av_cold int X264_close(AVCodecContext *avctx)
 
     av_freep(&avctx->extradata);
     av_freep(&x4->sei);
+    av_freep(&x4->reordered_opaque);
 
     if (x4->enc) {
         x264_encoder_close(x4->enc);
@@ -846,6 +926,14 @@ FF_ENABLE_DEPRECATION_WARNINGS
     cpb_props->max_bitrate = x4->params.rc.i_vbv_max_bitrate * 1000;
     cpb_props->avg_bitrate = x4->params.rc.i_bitrate         * 1000;
 
+    // Overestimate the reordered opaque buffer size, in case a runtime
+    // reconfigure would increase the delay (which it shouldn't).
+    x4->nb_reordered_opaque = x264_encoder_maximum_delayed_frames(x4->enc) + 17;
+    x4->reordered_opaque    = av_malloc_array(x4->nb_reordered_opaque,
+                                              sizeof(*x4->reordered_opaque));
+    if (!x4->reordered_opaque)
+        return AVERROR(ENOMEM);
+
     return 0;
 }
 
@@ -1059,12 +1147,12 @@ AVCodec ff_libx264_encoder = {
     .init             = X264_init,
     .encode2          = X264_frame,
     .close            = X264_close,
-    .capabilities     = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AUTO_THREADS,
+    .capabilities     = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AUTO_THREADS |
+                        AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE,
     .priv_class       = &x264_class,
     .defaults         = x264_defaults,
     .init_static_data = X264_init_static,
-    .caps_internal    = FF_CODEC_CAP_INIT_THREADSAFE |
-                        FF_CODEC_CAP_INIT_CLEANUP,
+    .caps_internal    = FF_CODEC_CAP_INIT_CLEANUP,
     .wrapper_name     = "libx264",
 };
 #endif
@@ -1086,7 +1174,8 @@ AVCodec ff_libx264rgb_encoder = {
     .init           = X264_init,
     .encode2        = X264_frame,
     .close          = X264_close,
-    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AUTO_THREADS,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AUTO_THREADS |
+                      AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE,
     .priv_class     = &rgbclass,
     .defaults       = x264_defaults,
     .pix_fmts       = pix_fmts_8bit_rgb,
@@ -1111,12 +1200,12 @@ AVCodec ff_libx262_encoder = {
     .init             = X264_init,
     .encode2          = X264_frame,
     .close            = X264_close,
-    .capabilities     = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AUTO_THREADS,
+    .capabilities     = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AUTO_THREADS |
+                        AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE,
     .priv_class       = &X262_class,
     .defaults         = x264_defaults,
     .pix_fmts         = pix_fmts_8bit,
-    .caps_internal    = FF_CODEC_CAP_INIT_THREADSAFE |
-                        FF_CODEC_CAP_INIT_CLEANUP,
+    .caps_internal    = FF_CODEC_CAP_INIT_CLEANUP,
     .wrapper_name     = "libx264",
 };
 #endif
diff --git a/libavcodec/libx265.c b/libavcodec/libx265.c
index 27c90b323feaa..fe39f45241726 100644
--- a/libavcodec/libx265.c
+++ b/libavcodec/libx265.c
@@ -79,6 +79,7 @@ static av_cold int libx265_encode_close(AVCodecContext *avctx)
 static av_cold int libx265_encode_init(AVCodecContext *avctx)
 {
     libx265Context *ctx = avctx->priv_data;
+    AVCPBProperties *cpb_props = NULL;
 
     ctx->api = x265_api_get(av_pix_fmt_desc_get(avctx->pix_fmt)->comp[0].depth);
     if (!ctx->api)
@@ -208,6 +209,13 @@ static av_cold int libx265_encode_init(AVCodecContext *avctx)
     ctx->params->rc.vbvBufferSize = avctx->rc_buffer_size / 1000;
     ctx->params->rc.vbvMaxBitrate = avctx->rc_max_rate    / 1000;
 
+    cpb_props = ff_add_cpb_side_data(avctx);
+    if (!cpb_props)
+        return AVERROR(ENOMEM);
+    cpb_props->buffer_size = ctx->params->rc.vbvBufferSize * 1000;
+    cpb_props->max_bitrate = ctx->params->rc.vbvMaxBitrate * 1000;
+    cpb_props->avg_bitrate = ctx->params->rc.bitrate       * 1000;
+
     if (!(avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER))
         ctx->params->bRepeatHeaders = 1;
 
@@ -285,6 +293,65 @@ static av_cold int libx265_encode_init(AVCodecContext *avctx)
     return 0;
 }
 
+static av_cold int libx265_encode_set_roi(libx265Context *ctx, const AVFrame *frame, x265_picture* pic)
+{
+    AVFrameSideData *sd = av_frame_get_side_data(frame, AV_FRAME_DATA_REGIONS_OF_INTEREST);
+    if (sd) {
+        if (ctx->params->rc.aqMode == X265_AQ_NONE) {
+            av_log(ctx, AV_LOG_WARNING, "Adaptive quantization must be enabled to use ROI encoding, skipping ROI.\n");
+        } else {
+            /* 8x8 block when qg-size is 8, 16*16 block otherwise. */
+            int mb_size = (ctx->params->rc.qgSize == 8) ? 8 : 16;
+            int mbx = (frame->width + mb_size - 1) / mb_size;
+            int mby = (frame->height + mb_size - 1) / mb_size;
+            int nb_rois;
+            AVRegionOfInterest *roi;
+            float *qoffsets;         /* will be freed after encode is called. */
+            qoffsets = av_mallocz_array(mbx * mby, sizeof(*qoffsets));
+            if (!qoffsets)
+                return AVERROR(ENOMEM);
+
+            nb_rois = sd->size / sizeof(AVRegionOfInterest);
+            roi = (AVRegionOfInterest*)sd->data;
+            for (int count = 0; count < nb_rois; count++) {
+                int starty = FFMIN(mby, roi->top / mb_size);
+                int endy   = FFMIN(mby, (roi->bottom + mb_size - 1)/ mb_size);
+                int startx = FFMIN(mbx, roi->left / mb_size);
+                int endx   = FFMIN(mbx, (roi->right + mb_size - 1)/ mb_size);
+                float qoffset;
+
+                if (roi->self_size == 0) {
+                    av_free(qoffsets);
+                    av_log(ctx, AV_LOG_ERROR, "AVRegionOfInterest.self_size must be set to sizeof(AVRegionOfInterest).\n");
+                    return AVERROR(EINVAL);
+                }
+
+                if (roi->qoffset.den == 0) {
+                    av_free(qoffsets);
+                    av_log(ctx, AV_LOG_ERROR, "AVRegionOfInterest.qoffset.den must not be zero.\n");
+                    return AVERROR(EINVAL);
+                }
+                qoffset = roi->qoffset.num * 1.0f / roi->qoffset.den;
+                qoffset = av_clipf(qoffset, -1.0f, 1.0f);
+
+                /* qp range of x265 is from 0 to 51, just choose 25 as the scale value,
+                 * so the range of final qoffset is [-25.0, 25.0].
+                 */
+                qoffset = qoffset * 25;
+
+                for (int y = starty; y < endy; y++)
+                    for (int x = startx; x < endx; x++)
+                        qoffsets[x + y*mbx] = qoffset;
+
+                roi = (AVRegionOfInterest*)((char*)roi + roi->self_size);
+            }
+
+            pic->quantOffsets = qoffsets;
+        }
+    }
+    return 0;
+}
+
 static int libx265_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
                                 const AVFrame *pic, int *got_packet)
 {
@@ -314,10 +381,17 @@ static int libx265_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
                             pic->pict_type == AV_PICTURE_TYPE_P ? X265_TYPE_P :
                             pic->pict_type == AV_PICTURE_TYPE_B ? X265_TYPE_B :
                             X265_TYPE_AUTO;
+
+        ret = libx265_encode_set_roi(ctx, pic, &x265pic);
+        if (ret < 0)
+            return ret;
     }
 
     ret = ctx->api->encoder_encode(ctx->encoder, &nal, &nnal,
                                    pic ? &x265pic : NULL, &x265pic_out);
+
+    av_freep(&x265pic.quantOffsets);
+
     if (ret < 0)
         return AVERROR_EXTERNAL;
 
diff --git a/libavcodec/libxavs2.c b/libavcodec/libxavs2.c
index 2b47d0c5d28e8..d5c455797d9ac 100644
--- a/libavcodec/libxavs2.c
+++ b/libavcodec/libxavs2.c
@@ -46,7 +46,6 @@ typedef struct XAVS2EContext {
     int min_qp;
     int preset_level;
     int log_level;
-    int hierarchical_reference;
 
     void *encoder;
     char *xavs2_opts;
@@ -78,20 +77,20 @@ static av_cold int xavs2_init(AVCodecContext *avctx)
         return AVERROR(ENOMEM);
     }
 
-    xavs2_opt_set2("width",     "%d", avctx->width);
-    xavs2_opt_set2("height",    "%d", avctx->height);
-    xavs2_opt_set2("bframes",   "%d", avctx->max_b_frames);
-    xavs2_opt_set2("bitdepth",  "%d", bit_depth);
-    xavs2_opt_set2("log",       "%d", cae->log_level);
-    xavs2_opt_set2("preset",    "%d", cae->preset_level);
+    xavs2_opt_set2("Width",     "%d", avctx->width);
+    xavs2_opt_set2("Height",    "%d", avctx->height);
+    xavs2_opt_set2("BFrames",   "%d", avctx->max_b_frames);
+    xavs2_opt_set2("BitDepth",  "%d", bit_depth);
+    xavs2_opt_set2("Log",       "%d", cae->log_level);
+    xavs2_opt_set2("Preset",    "%d", cae->preset_level);
 
-    /* not the same parameter as the IntraPeriod in xavs2 log */
-    xavs2_opt_set2("intraperiod",       "%d", avctx->gop_size);
+    xavs2_opt_set2("IntraPeriodMax",    "%d", avctx->gop_size);
+    xavs2_opt_set2("IntraPeriodMin",    "%d", avctx->gop_size);
 
-    xavs2_opt_set2("thread_frames",     "%d", avctx->thread_count);
-    xavs2_opt_set2("thread_rows",       "%d", cae->lcu_row_threads);
+    xavs2_opt_set2("ThreadFrames",      "%d", avctx->thread_count);
+    xavs2_opt_set2("ThreadRows",        "%d", cae->lcu_row_threads);
 
-    xavs2_opt_set2("OpenGOP",  "%d", 1);
+    xavs2_opt_set2("OpenGOP",  "%d", !(avctx->flags & AV_CODEC_FLAG_CLOSED_GOP));
 
     if (cae->xavs2_opts) {
         AVDictionary *dict    = NULL;
@@ -109,11 +108,11 @@ static av_cold int xavs2_init(AVCodecContext *avctx)
     if (avctx->bit_rate > 0) {
         xavs2_opt_set2("RateControl",   "%d", 1);
         xavs2_opt_set2("TargetBitRate", "%"PRId64"", avctx->bit_rate);
-        xavs2_opt_set2("initial_qp",    "%d", cae->initial_qp);
-        xavs2_opt_set2("max_qp",        "%d", cae->max_qp);
-        xavs2_opt_set2("min_qp",        "%d", cae->min_qp);
+        xavs2_opt_set2("InitialQP",     "%d", cae->initial_qp);
+        xavs2_opt_set2("MaxQP",         "%d", avctx->qmax >= 0 ? avctx->qmax : cae->max_qp);
+        xavs2_opt_set2("MinQP",         "%d", avctx->qmin >= 0 ? avctx->qmin : cae->min_qp);
     } else {
-        xavs2_opt_set2("initial_qp",    "%d", cae->qp);
+        xavs2_opt_set2("InitialQP",     "%d", cae->qp);
     }
 
 
@@ -161,7 +160,7 @@ static void xavs2_copy_frame(xavs2_picture_t *pic, const AVFrame *frame)
 }
 
 static int xavs2_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
-                      const AVFrame *frame, int *got_packet)
+                              const AVFrame *frame, int *got_packet)
 {
     XAVS2EContext *cae = avctx->priv_data;
     xavs2_picture_t pic;
@@ -175,22 +174,22 @@ static int xavs2_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     }
     if (frame) {
         switch (frame->format) {
-            case AV_PIX_FMT_YUV420P:
-                if (pic.img.in_sample_size == pic.img.enc_sample_size) {
-                    xavs2_copy_frame(&pic, frame);
-                } else {
-                    const int shift_in = atoi(cae->api->opt_get(cae->param, "SampleShift"));
-                    xavs2_copy_frame_with_shift(&pic, frame, shift_in);
-                }
+        case AV_PIX_FMT_YUV420P:
+            if (pic.img.in_sample_size == pic.img.enc_sample_size) {
+                xavs2_copy_frame(&pic, frame);
+            } else {
+                const int shift_in = atoi(cae->api->opt_get(cae->param, "SampleShift"));
+                xavs2_copy_frame_with_shift(&pic, frame, shift_in);
+            }
             break;
-            case AV_PIX_FMT_YUV420P10:
-                if (pic.img.in_sample_size == pic.img.enc_sample_size) {
-                    xavs2_copy_frame(&pic, frame);
-                    break;
-                }
-            default:
-                av_log(avctx, AV_LOG_ERROR, "Unsupported pixel format\n");
-                return AVERROR(EINVAL);
+        case AV_PIX_FMT_YUV420P10:
+            if (pic.img.in_sample_size == pic.img.enc_sample_size) {
+                xavs2_copy_frame(&pic, frame);
+                break;
+            }
+        default:
+            av_log(avctx, AV_LOG_ERROR, "Unsupported pixel format\n");
+            return AVERROR(EINVAL);
             break;
         }
 
@@ -271,7 +270,7 @@ static const AVClass libxavs2 = {
 
 static const AVCodecDefault xavs2_defaults[] = {
     { "b",                "0" },
-    { "g",                "48" },
+    { "g",                "48"},
     { "bf",               "7" },
     { NULL },
 };
@@ -286,7 +285,8 @@ AVCodec ff_libxavs2_encoder = {
     .encode2        = xavs2_encode_frame,
     .close          = xavs2_close,
     .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AUTO_THREADS,
-    .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV420P10, AV_PIX_FMT_NONE },
+    .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUV420P,
+                                                     AV_PIX_FMT_NONE },
     .priv_class     = &libxavs2,
     .defaults       = xavs2_defaults,
     .wrapper_name   = "libxavs2",
diff --git a/libavcodec/lzw.c b/libavcodec/lzw.c
index b0b9a343585e6..e26e4829eedf8 100644
--- a/libavcodec/lzw.c
+++ b/libavcodec/lzw.c
@@ -71,6 +71,9 @@ static int lzw_get_code(struct LZWState * s)
 {
     int c;
 
+    if (s->bbits < s->cursize && bytestream2_get_bytes_left(&s->gb) <= 0)
+        return s->end_code;
+
     if(s->mode == FF_LZW_GIF) {
         while (s->bbits < s->cursize) {
             if (!s->bs) {
diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index 1f659a0037ef7..c5b54d55c0853 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -22,6 +22,7 @@ OBJS-$(CONFIG_HEVC_DECODER)               += mips/hevcdsp_init_mips.o      \
                                              mips/hevcpred_init_mips.o
 OBJS-$(CONFIG_VP9_DECODER)                += mips/vp9dsp_init_mips.o
 OBJS-$(CONFIG_VP8_DECODER)                += mips/vp8dsp_init_mips.o
+OBJS-$(CONFIG_VP3DSP)                     += mips/vp3dsp_init_mips.o
 OBJS-$(CONFIG_H264DSP)                    += mips/h264dsp_init_mips.o
 OBJS-$(CONFIG_H264QPEL)                   += mips/h264qpel_init_mips.o
 OBJS-$(CONFIG_H264CHROMA)                 += mips/h264chroma_init_mips.o
@@ -54,6 +55,7 @@ MSA-OBJS-$(CONFIG_VP9_DECODER)            += mips/vp9_mc_msa.o             \
 MSA-OBJS-$(CONFIG_VP8_DECODER)            += mips/vp8_mc_msa.o             \
                                              mips/vp8_idct_msa.o           \
                                              mips/vp8_lpf_msa.o
+MSA-OBJS-$(CONFIG_VP3DSP)                 += mips/vp3dsp_idct_msa.o
 MSA-OBJS-$(CONFIG_H264DSP)                += mips/h264dsp_msa.o            \
                                              mips/h264idct_msa.o
 MSA-OBJS-$(CONFIG_H264QPEL)               += mips/h264qpel_msa.o
@@ -84,3 +86,6 @@ MMI-OBJS-$(CONFIG_VP8_DECODER)            += mips/vp8dsp_mmi.o
 MMI-OBJS-$(CONFIG_HPELDSP)                += mips/hpeldsp_mmi.o
 MMI-OBJS-$(CONFIG_VC1_DECODER)            += mips/vc1dsp_mmi.o
 MMI-OBJS-$(CONFIG_WMV2DSP)                += mips/wmv2dsp_mmi.o
+MMI-OBJS-$(CONFIG_HEVC_DECODER)           += mips/hevcdsp_mmi.o
+MMI-OBJS-$(CONFIG_VP3DSP)                 += mips/vp3dsp_idct_mmi.o
+MMI-OBJS-$(CONFIG_VP9_DECODER)            += mips/vp9_mc_mmi.o
diff --git a/libavcodec/mips/blockdsp_init_mips.c b/libavcodec/mips/blockdsp_init_mips.c
index 30ae95fa10a1d..55ac1c3e99497 100644
--- a/libavcodec/mips/blockdsp_init_mips.c
+++ b/libavcodec/mips/blockdsp_init_mips.c
@@ -45,10 +45,10 @@ static av_cold void blockdsp_init_mmi(BlockDSPContext *c)
 
 void ff_blockdsp_init_mips(BlockDSPContext *c)
 {
-#if HAVE_MSA
-    blockdsp_init_msa(c);
-#endif  // #if HAVE_MSA
 #if HAVE_MMI
     blockdsp_init_mmi(c);
 #endif /* HAVE_MMI */
+#if HAVE_MSA
+    blockdsp_init_msa(c);
+#endif  // #if HAVE_MSA
 }
diff --git a/libavcodec/mips/h264chroma_init_mips.c b/libavcodec/mips/h264chroma_init_mips.c
index 122148dc78c98..ae817e47ae658 100644
--- a/libavcodec/mips/h264chroma_init_mips.c
+++ b/libavcodec/mips/h264chroma_init_mips.c
@@ -54,10 +54,10 @@ static av_cold void h264chroma_init_mmi(H264ChromaContext *c, int bit_depth)
 
 av_cold void ff_h264chroma_init_mips(H264ChromaContext *c, int bit_depth)
 {
-#if HAVE_MSA
-    h264chroma_init_msa(c, bit_depth);
-#endif  // #if HAVE_MSA
 #if HAVE_MMI
     h264chroma_init_mmi(c, bit_depth);
 #endif /* HAVE_MMI */
+#if HAVE_MSA
+    h264chroma_init_msa(c, bit_depth);
+#endif  // #if HAVE_MSA
 }
diff --git a/libavcodec/mips/h264chroma_mmi.c b/libavcodec/mips/h264chroma_mmi.c
index 91b2cc470d133..739dd7d4d6955 100644
--- a/libavcodec/mips/h264chroma_mmi.c
+++ b/libavcodec/mips/h264chroma_mmi.c
@@ -30,74 +30,177 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
         int h, int x, int y)
 {
     int A = 64, B, C, D, E;
-    double ftmp[10];
+    double ftmp[12];
     uint64_t tmp[1];
 
     if (!(x || y)) {
         /* x=0, y=0, A=64 */
         __asm__ volatile (
-            "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]           \n\t"
-            "dli        %[tmp0],    0x06                               \n\t"
-            "mtc1       %[tmp0],    %[ftmp4]                           \n\t"
-
             "1:                                                        \n\t"
+            MMI_ULDC1(%[ftmp0], %[src], 0x00)
+            PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
             MMI_ULDC1(%[ftmp1], %[src], 0x00)
-            "addi       %[h],       %[h],           -0x04              \n\t"
             PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
-            MMI_ULDC1(%[ftmp5], %[src], 0x00)
+            MMI_ULDC1(%[ftmp2], %[src], 0x00)
             PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
-            MMI_ULDC1(%[ftmp6], %[src], 0x00)
+            MMI_ULDC1(%[ftmp3], %[src], 0x00)
             PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
-            MMI_ULDC1(%[ftmp7], %[src], 0x00)
 
-            "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]           \n\t"
-            "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]           \n\t"
-            "psllh      %[ftmp1],   %[ftmp2],       %[ftmp4]           \n\t"
-            "psllh      %[ftmp2],   %[ftmp3],       %[ftmp4]           \n\t"
-            "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]        \n\t"
-            "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]        \n\t"
-            "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp4]           \n\t"
-            "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp4]           \n\t"
-            "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]           \n\t"
-            MMI_SDC1(%[ftmp1], %[dst], 0x00)
+            "addi       %[h],       %[h],           -0x04              \n\t"
 
-            "punpcklbh  %[ftmp2],   %[ftmp5],       %[ftmp0]           \n\t"
-            "punpckhbh  %[ftmp3],   %[ftmp5],       %[ftmp0]           \n\t"
-            "psllh      %[ftmp1],   %[ftmp2],       %[ftmp4]           \n\t"
-            "psllh      %[ftmp2],   %[ftmp3],       %[ftmp4]           \n\t"
-            "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]        \n\t"
-            "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]        \n\t"
-            "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp4]           \n\t"
-            "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp4]           \n\t"
-            "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]           \n\t"
+            MMI_SDC1(%[ftmp0], %[dst], 0x00)
             PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
             MMI_SDC1(%[ftmp1], %[dst], 0x00)
+            PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
+            MMI_SDC1(%[ftmp2], %[dst], 0x00)
+            PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
+            MMI_SDC1(%[ftmp3], %[dst], 0x00)
+            PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
+            "bnez       %[h],       1b                                 \n\t"
+            : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
+              [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
+              [dst]"+&r"(dst),              [src]"+&r"(src),
+              [h]"+&r"(h)
+            : [stride]"r"((mips_reg)stride)
+            : "memory"
+        );
+    } else if (x && y) {
+        /* x!=0, y!=0 */
+        D = x * y;
+        B = (x << 3) - D;
+        C = (y << 3) - D;
+        A = 64 - D - B - C;
 
-            "punpcklbh  %[ftmp2],   %[ftmp6],       %[ftmp0]           \n\t"
-            "punpckhbh  %[ftmp3],   %[ftmp6],       %[ftmp0]           \n\t"
-            "psllh      %[ftmp1],   %[ftmp2],       %[ftmp4]           \n\t"
-            "psllh      %[ftmp2],   %[ftmp3],       %[ftmp4]           \n\t"
+        __asm__ volatile (
+            "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]           \n\t"
+            "dli        %[tmp0],    0x06                               \n\t"
+            "pshufh     %[A],       %[A],           %[ftmp0]           \n\t"
+            "pshufh     %[B],       %[B],           %[ftmp0]           \n\t"
+            "mtc1       %[tmp0],    %[ftmp9]                           \n\t"
+            "pshufh     %[C],       %[C],           %[ftmp0]           \n\t"
+            "pshufh     %[D],       %[D],           %[ftmp0]           \n\t"
+
+            "1:                                                        \n\t"
+            MMI_ULDC1(%[ftmp1], %[src], 0x00)
+            MMI_ULDC1(%[ftmp2], %[src], 0x01)
+            PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
+            MMI_ULDC1(%[ftmp3], %[src], 0x00)
+            MMI_ULDC1(%[ftmp4], %[src], 0x01)
+            PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
+            MMI_ULDC1(%[ftmp10], %[src], 0x00)
+            MMI_ULDC1(%[ftmp11], %[src], 0x01)
+            "addi       %[h],       %[h],           -0x02              \n\t"
+
+            "punpcklbh  %[ftmp5],   %[ftmp1],       %[ftmp0]           \n\t"
+            "punpckhbh  %[ftmp6],   %[ftmp1],       %[ftmp0]           \n\t"
+            "punpcklbh  %[ftmp7],   %[ftmp2],       %[ftmp0]           \n\t"
+            "punpckhbh  %[ftmp8],   %[ftmp2],       %[ftmp0]           \n\t"
+            "pmullh     %[ftmp5],   %[ftmp5],       %[A]               \n\t"
+            "pmullh     %[ftmp7],   %[ftmp7],       %[B]               \n\t"
+            "paddh      %[ftmp1],   %[ftmp5],       %[ftmp7]           \n\t"
+            "pmullh     %[ftmp6],   %[ftmp6],       %[A]               \n\t"
+            "pmullh     %[ftmp8],   %[ftmp8],       %[B]               \n\t"
+            "paddh      %[ftmp2],   %[ftmp6],       %[ftmp8]           \n\t"
+            "punpcklbh  %[ftmp5],   %[ftmp3],       %[ftmp0]           \n\t"
+            "punpckhbh  %[ftmp6],   %[ftmp3],       %[ftmp0]           \n\t"
+            "punpcklbh  %[ftmp7],   %[ftmp4],       %[ftmp0]           \n\t"
+            "punpckhbh  %[ftmp8],   %[ftmp4],       %[ftmp0]           \n\t"
+            "pmullh     %[ftmp5],   %[ftmp5],       %[C]               \n\t"
+            "pmullh     %[ftmp7],   %[ftmp7],       %[D]               \n\t"
+            "paddh      %[ftmp5],   %[ftmp5],       %[ftmp7]           \n\t"
+            "pmullh     %[ftmp6],   %[ftmp6],       %[C]               \n\t"
+            "pmullh     %[ftmp8],   %[ftmp8],       %[D]               \n\t"
+            "paddh      %[ftmp6],   %[ftmp6],       %[ftmp8]           \n\t"
+            "paddh      %[ftmp1],   %[ftmp1],       %[ftmp5]           \n\t"
+            "paddh      %[ftmp2],   %[ftmp2],       %[ftmp6]           \n\t"
             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]        \n\t"
             "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]        \n\t"
-            "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp4]           \n\t"
-            "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp4]           \n\t"
+            "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp9]           \n\t"
+            "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp9]           \n\t"
             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]           \n\t"
-            PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
+
+            "punpcklbh  %[ftmp5],   %[ftmp3],       %[ftmp0]           \n\t"
+            "punpckhbh  %[ftmp6],   %[ftmp3],       %[ftmp0]           \n\t"
+            "punpcklbh  %[ftmp7],   %[ftmp4],       %[ftmp0]           \n\t"
+            "punpckhbh  %[ftmp8],   %[ftmp4],       %[ftmp0]           \n\t"
+            "pmullh     %[ftmp5],   %[ftmp5],       %[A]               \n\t"
+            "pmullh     %[ftmp7],   %[ftmp7],       %[B]               \n\t"
+            "paddh      %[ftmp3],   %[ftmp5],       %[ftmp7]           \n\t"
+            "pmullh     %[ftmp6],   %[ftmp6],       %[A]               \n\t"
+            "pmullh     %[ftmp8],   %[ftmp8],       %[B]               \n\t"
+            "paddh      %[ftmp4],   %[ftmp6],       %[ftmp8]           \n\t"
+            "punpcklbh  %[ftmp5],   %[ftmp10],      %[ftmp0]           \n\t"
+            "punpckhbh  %[ftmp6],   %[ftmp10],      %[ftmp0]           \n\t"
+            "punpcklbh  %[ftmp7],   %[ftmp11],      %[ftmp0]           \n\t"
+            "punpckhbh  %[ftmp8],   %[ftmp11],      %[ftmp0]           \n\t"
+            "pmullh     %[ftmp5],   %[ftmp5],       %[C]               \n\t"
+            "pmullh     %[ftmp7],   %[ftmp7],       %[D]               \n\t"
+            "paddh      %[ftmp5],   %[ftmp5],       %[ftmp7]           \n\t"
+            "pmullh     %[ftmp6],   %[ftmp6],       %[C]               \n\t"
+            "pmullh     %[ftmp8],   %[ftmp8],       %[D]               \n\t"
+            "paddh      %[ftmp6],   %[ftmp6],       %[ftmp8]           \n\t"
+            "paddh      %[ftmp3],   %[ftmp3],       %[ftmp5]           \n\t"
+            "paddh      %[ftmp4],   %[ftmp4],       %[ftmp6]           \n\t"
+            "paddh      %[ftmp3],   %[ftmp3],       %[ff_pw_32]        \n\t"
+            "paddh      %[ftmp4],   %[ftmp4],       %[ff_pw_32]        \n\t"
+            "psrlh      %[ftmp3],   %[ftmp3],       %[ftmp9]           \n\t"
+            "psrlh      %[ftmp4],   %[ftmp4],       %[ftmp9]           \n\t"
+            "packushb   %[ftmp3],   %[ftmp3],       %[ftmp4]           \n\t"
+
             MMI_SDC1(%[ftmp1], %[dst], 0x00)
+            PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
+            MMI_SDC1(%[ftmp3], %[dst], 0x00)
+            PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
+            "bnez       %[h],       1b                                 \n\t"
+            : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
+              [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
+              [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
+              [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
+              [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
+              [ftmp10]"=&f"(ftmp[10]),      [ftmp11]"=&f"(ftmp[11]),
+              [tmp0]"=&r"(tmp[0]),
+              [dst]"+&r"(dst),              [src]"+&r"(src),
+              [h]"+&r"(h)
+            : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
+              [A]"f"(A),                    [B]"f"(B),
+              [C]"f"(C),                    [D]"f"(D)
+            : "memory"
+        );
+    } else if (x) {
+        /* x!=0, y==0 */
+        E = x << 3;
+        A = 64 - E;
+
+        __asm__ volatile (
+            "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]           \n\t"
+            "dli        %[tmp0],    0x06                               \n\t"
+            "pshufh     %[A],       %[A],           %[ftmp0]           \n\t"
+            "pshufh     %[E],       %[E],           %[ftmp0]           \n\t"
+            "mtc1       %[tmp0],    %[ftmp7]                           \n\t"
+
+            "1:                                                        \n\t"
+            MMI_ULDC1(%[ftmp1], %[src], 0x00)
+            MMI_ULDC1(%[ftmp2], %[src], 0x01)
+            "addi       %[h],       %[h],           -0x01              \n\t"
+            PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
+
+            "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]           \n\t"
+            "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]           \n\t"
+            "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]           \n\t"
+            "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]           \n\t"
+            "pmullh     %[ftmp3],   %[ftmp3],       %[A]               \n\t"
+            "pmullh     %[ftmp5],   %[ftmp5],       %[E]               \n\t"
+            "paddh      %[ftmp1],   %[ftmp3],       %[ftmp5]           \n\t"
+            "pmullh     %[ftmp4],   %[ftmp4],       %[A]               \n\t"
+            "pmullh     %[ftmp6],   %[ftmp6],       %[E]               \n\t"
+            "paddh      %[ftmp2],   %[ftmp4],       %[ftmp6]           \n\t"
 
-            "punpcklbh  %[ftmp2],   %[ftmp7],       %[ftmp0]           \n\t"
-            "punpckhbh  %[ftmp3],   %[ftmp7],       %[ftmp0]           \n\t"
-            "psllh      %[ftmp1],   %[ftmp2],       %[ftmp4]           \n\t"
-            "psllh      %[ftmp2],   %[ftmp3],       %[ftmp4]           \n\t"
             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]        \n\t"
             "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]        \n\t"
-            "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp4]           \n\t"
-            "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp4]           \n\t"
+            "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp7]           \n\t"
+            "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp7]           \n\t"
             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]           \n\t"
-            PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
             MMI_SDC1(%[ftmp1], %[dst], 0x00)
-
-            PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
             PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
             "bnez       %[h],       1b                                 \n\t"
             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
@@ -107,220 +210,80 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
               [tmp0]"=&r"(tmp[0]),
               [dst]"+&r"(dst),              [src]"+&r"(src),
               [h]"+&r"(h)
-            : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32)
+            : [stride]"r"((mips_reg)stride),
+              [ff_pw_32]"f"(ff_pw_32),
+              [A]"f"(A),                    [E]"f"(E)
             : "memory"
         );
     } else {
-        if (x && y) {
-            /* x!=0, y!=0 */
-            D = x * y;
-            B = (x << 3) - D;
-            C = (y << 3) - D;
-            A = 64 - D - B - C;
-
-            __asm__ volatile (
-                "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]           \n\t"
-                "dli        %[tmp0],    0x06                               \n\t"
-                "pshufh     %[A],       %[A],           %[ftmp0]           \n\t"
-                "pshufh     %[B],       %[B],           %[ftmp0]           \n\t"
-                "mtc1       %[tmp0],    %[ftmp9]                           \n\t"
-                "pshufh     %[C],       %[C],           %[ftmp0]           \n\t"
-                "pshufh     %[D],       %[D],           %[ftmp0]           \n\t"
-
-                "1:                                                        \n\t"
-                MMI_ULDC1(%[ftmp1], %[src], 0x00)
-                MMI_ULDC1(%[ftmp2], %[src], 0x01)
-                PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
-                MMI_ULDC1(%[ftmp3], %[src], 0x00)
-                MMI_ULDC1(%[ftmp4], %[src], 0x01)
-                "addi       %[h],       %[h],           -0x02              \n\t"
-
-                "punpcklbh  %[ftmp5],   %[ftmp1],       %[ftmp0]           \n\t"
-                "punpckhbh  %[ftmp6],   %[ftmp1],       %[ftmp0]           \n\t"
-                "punpcklbh  %[ftmp7],   %[ftmp2],       %[ftmp0]           \n\t"
-                "punpckhbh  %[ftmp8],   %[ftmp2],       %[ftmp0]           \n\t"
-                "pmullh     %[ftmp5],   %[ftmp5],       %[A]               \n\t"
-                "pmullh     %[ftmp7],   %[ftmp7],       %[B]               \n\t"
-                "paddh      %[ftmp1],   %[ftmp5],       %[ftmp7]           \n\t"
-                "pmullh     %[ftmp6],   %[ftmp6],       %[A]               \n\t"
-                "pmullh     %[ftmp8],   %[ftmp8],       %[B]               \n\t"
-                "paddh      %[ftmp2],   %[ftmp6],       %[ftmp8]           \n\t"
-
-                "punpcklbh  %[ftmp5],   %[ftmp3],       %[ftmp0]           \n\t"
-                "punpckhbh  %[ftmp6],   %[ftmp3],       %[ftmp0]           \n\t"
-                "punpcklbh  %[ftmp7],   %[ftmp4],       %[ftmp0]           \n\t"
-                "punpckhbh  %[ftmp8],   %[ftmp4],       %[ftmp0]           \n\t"
-                "pmullh     %[ftmp5],   %[ftmp5],       %[C]               \n\t"
-                "pmullh     %[ftmp7],   %[ftmp7],       %[D]               \n\t"
-                "paddh      %[ftmp3],   %[ftmp5],       %[ftmp7]           \n\t"
-                "pmullh     %[ftmp6],   %[ftmp6],       %[C]               \n\t"
-                "pmullh     %[ftmp8],   %[ftmp8],       %[D]               \n\t"
-                "paddh      %[ftmp4],   %[ftmp6],       %[ftmp8]           \n\t"
-
-                "paddh      %[ftmp1],   %[ftmp1],       %[ftmp3]           \n\t"
-                "paddh      %[ftmp2],   %[ftmp2],       %[ftmp4]           \n\t"
-                "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]        \n\t"
-                "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]        \n\t"
-                "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp9]           \n\t"
-                "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp9]           \n\t"
-                "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]           \n\t"
-                MMI_SDC1(%[ftmp1], %[dst], 0x00)
-                PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
-
-                MMI_ULDC1(%[ftmp1], %[src], 0x00)
-                MMI_ULDC1(%[ftmp2], %[src], 0x01)
-                PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
-                MMI_ULDC1(%[ftmp3], %[src], 0x00)
-                MMI_ULDC1(%[ftmp4], %[src], 0x01)
-
-                "punpcklbh  %[ftmp5],   %[ftmp1],       %[ftmp0]           \n\t"
-                "punpckhbh  %[ftmp6],   %[ftmp1],       %[ftmp0]           \n\t"
-                "punpcklbh  %[ftmp7],   %[ftmp2],       %[ftmp0]           \n\t"
-                "punpckhbh  %[ftmp8],   %[ftmp2],       %[ftmp0]           \n\t"
-                "pmullh     %[ftmp5],   %[ftmp5],       %[A]               \n\t"
-                "pmullh     %[ftmp7],   %[ftmp7],       %[B]               \n\t"
-                "paddh      %[ftmp1],   %[ftmp5],       %[ftmp7]           \n\t"
-                "pmullh     %[ftmp6],   %[ftmp6],       %[A]               \n\t"
-                "pmullh     %[ftmp8],   %[ftmp8],       %[B]               \n\t"
-                "paddh      %[ftmp2],   %[ftmp6],       %[ftmp8]           \n\t"
-
-                "punpcklbh  %[ftmp5],   %[ftmp3],       %[ftmp0]           \n\t"
-                "punpckhbh  %[ftmp6],   %[ftmp3],       %[ftmp0]           \n\t"
-                "punpcklbh  %[ftmp7],   %[ftmp4],       %[ftmp0]           \n\t"
-                "punpckhbh  %[ftmp8],   %[ftmp4],       %[ftmp0]           \n\t"
-                "pmullh     %[ftmp5],   %[ftmp5],       %[C]               \n\t"
-                "pmullh     %[ftmp7],   %[ftmp7],       %[D]               \n\t"
-                "paddh      %[ftmp3],   %[ftmp5],       %[ftmp7]           \n\t"
-                "pmullh     %[ftmp6],   %[ftmp6],       %[C]               \n\t"
-                "pmullh     %[ftmp8],   %[ftmp8],       %[D]               \n\t"
-                "paddh      %[ftmp4],   %[ftmp6],       %[ftmp8]           \n\t"
-
-                "paddh      %[ftmp1],   %[ftmp1],       %[ftmp3]           \n\t"
-                "paddh      %[ftmp2],   %[ftmp2],       %[ftmp4]           \n\t"
-                "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]        \n\t"
-                "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]        \n\t"
-                "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp9]           \n\t"
-                "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp9]           \n\t"
-                "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]           \n\t"
-                MMI_SDC1(%[ftmp1], %[dst], 0x00)
-                PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
-
-                "bnez       %[h],       1b                                 \n\t"
-                : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
-                  [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
-                  [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
-                  [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
-                  [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
-                  [tmp0]"=&r"(tmp[0]),
-                  [dst]"+&r"(dst),              [src]"+&r"(src),
-                  [h]"+&r"(h)
-                : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
-                  [A]"f"(A),                    [B]"f"(B),
-                  [C]"f"(C),                    [D]"f"(D)
-                : "memory"
-            );
-        } else {
-            if (x) {
-                /* x!=0, y==0 */
-                E = x << 3;
-                A = 64 - E;
-
-                __asm__ volatile (
-                    "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]           \n\t"
-                    "dli        %[tmp0],    0x06                               \n\t"
-                    "pshufh     %[A],       %[A],           %[ftmp0]           \n\t"
-                    "pshufh     %[E],       %[E],           %[ftmp0]           \n\t"
-                    "mtc1       %[tmp0],    %[ftmp7]                           \n\t"
-
-                    "1:                                                        \n\t"
-                    MMI_ULDC1(%[ftmp1], %[src], 0x00)
-                    MMI_ULDC1(%[ftmp2], %[src], 0x01)
-                    "addi       %[h],       %[h],           -0x01              \n\t"
-                    PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
-
-                    "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]           \n\t"
-                    "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]           \n\t"
-                    "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]           \n\t"
-                    "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]           \n\t"
-                    "pmullh     %[ftmp3],   %[ftmp3],       %[A]               \n\t"
-                    "pmullh     %[ftmp5],   %[ftmp5],       %[E]               \n\t"
-                    "paddh      %[ftmp1],   %[ftmp3],       %[ftmp5]           \n\t"
-                    "pmullh     %[ftmp4],   %[ftmp4],       %[A]               \n\t"
-                    "pmullh     %[ftmp6],   %[ftmp6],       %[E]               \n\t"
-                    "paddh      %[ftmp2],   %[ftmp4],       %[ftmp6]           \n\t"
-
-                    "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]        \n\t"
-                    "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]        \n\t"
-                    "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp7]           \n\t"
-                    "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp7]           \n\t"
-                    "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]           \n\t"
-                    MMI_SDC1(%[ftmp1], %[dst], 0x00)
-                    PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
-                    "bnez       %[h],       1b                                 \n\t"
-                    : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
-                      [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
-                      [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
-                      [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
-                      [tmp0]"=&r"(tmp[0]),
-                      [dst]"+&r"(dst),              [src]"+&r"(src),
-                      [h]"+&r"(h)
-                    : [stride]"r"((mips_reg)stride),
-                      [ff_pw_32]"f"(ff_pw_32),
-                      [A]"f"(A),                    [E]"f"(E)
-                    : "memory"
-                );
-            } else {
-                /* x==0, y!=0 */
-                E = y << 3;
-                A = 64 - E;
-
-                __asm__ volatile (
-                    "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]           \n\t"
-                    "dli        %[tmp0],    0x06                               \n\t"
-                    "pshufh     %[A],       %[A],           %[ftmp0]           \n\t"
-                    "pshufh     %[E],       %[E],           %[ftmp0]           \n\t"
-                    "mtc1       %[tmp0],    %[ftmp7]                           \n\t"
-
-                    "1:                                                        \n\t"
-                    MMI_ULDC1(%[ftmp1], %[src], 0x00)
-                    PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
-                    MMI_ULDC1(%[ftmp2], %[src], 0x00)
-                    "addi       %[h],       %[h],           -0x01              \n\t"
-
-                    "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]           \n\t"
-                    "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]           \n\t"
-                    "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]           \n\t"
-                    "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]           \n\t"
-                    "pmullh     %[ftmp3],   %[ftmp3],       %[A]               \n\t"
-                    "pmullh     %[ftmp5],   %[ftmp5],       %[E]               \n\t"
-                    "paddh      %[ftmp1],   %[ftmp3],       %[ftmp5]           \n\t"
-                    "pmullh     %[ftmp4],   %[ftmp4],       %[A]               \n\t"
-                    "pmullh     %[ftmp6],   %[ftmp6],       %[E]               \n\t"
-                    "paddh      %[ftmp2],   %[ftmp4],       %[ftmp6]           \n\t"
-
-                    "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]        \n\t"
-                    "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]        \n\t"
-                    "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp7]           \n\t"
-                    "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp7]           \n\t"
-                    "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]           \n\t"
-                    MMI_SDC1(%[ftmp1], %[dst], 0x00)
-
-                    PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
-                    "bnez       %[h],       1b                                 \n\t"
-                    : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
-                      [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
-                      [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
-                      [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
-                      [tmp0]"=&r"(tmp[0]),
-                      [dst]"+&r"(dst),              [src]"+&r"(src),
-                      [h]"+&r"(h)
-                    : [stride]"r"((mips_reg)stride),
-                      [ff_pw_32]"f"(ff_pw_32),
-                      [A]"f"(A),                    [E]"f"(E)
-                    : "memory"
-                );
-            }
-        }
+        /* x==0, y!=0 */
+        E = y << 3;
+        A = 64 - E;
+
+        __asm__ volatile (
+            "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]           \n\t"
+            "dli        %[tmp0],    0x06                               \n\t"
+            "pshufh     %[A],       %[A],           %[ftmp0]           \n\t"
+            "pshufh     %[E],       %[E],           %[ftmp0]           \n\t"
+            "mtc1       %[tmp0],    %[ftmp7]                           \n\t"
+
+            "1:                                                        \n\t"
+            MMI_ULDC1(%[ftmp1], %[src], 0x00)
+            PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
+            MMI_ULDC1(%[ftmp2], %[src], 0x00)
+            PTR_ADDU   "%[src],     %[src],         %[stride]          \n\t"
+            MMI_ULDC1(%[ftmp8], %[src], 0x00)
+            "addi       %[h],       %[h],           -0x02              \n\t"
+
+            "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]           \n\t"
+            "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]           \n\t"
+            "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]           \n\t"
+            "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]           \n\t"
+            "pmullh     %[ftmp3],   %[ftmp3],       %[A]               \n\t"
+            "pmullh     %[ftmp5],   %[ftmp5],       %[E]               \n\t"
+            "paddh      %[ftmp3],   %[ftmp3],       %[ftmp5]           \n\t"
+            "pmullh     %[ftmp4],   %[ftmp4],       %[A]               \n\t"
+            "pmullh     %[ftmp6],   %[ftmp6],       %[E]               \n\t"
+            "paddh      %[ftmp4],   %[ftmp4],       %[ftmp6]           \n\t"
+            "paddh      %[ftmp3],   %[ftmp3],       %[ff_pw_32]        \n\t"
+            "paddh      %[ftmp4],   %[ftmp4],       %[ff_pw_32]        \n\t"
+            "psrlh      %[ftmp3],   %[ftmp3],       %[ftmp7]           \n\t"
+            "psrlh      %[ftmp4],   %[ftmp4],       %[ftmp7]           \n\t"
+            "packushb   %[ftmp1],   %[ftmp3],       %[ftmp4]           \n\t"
+
+            "punpcklbh  %[ftmp3],   %[ftmp2],       %[ftmp0]           \n\t"
+            "punpckhbh  %[ftmp4],   %[ftmp2],       %[ftmp0]           \n\t"
+            "punpcklbh  %[ftmp5],   %[ftmp8],       %[ftmp0]           \n\t"
+            "punpckhbh  %[ftmp6],   %[ftmp8],       %[ftmp0]           \n\t"
+            "pmullh     %[ftmp3],   %[ftmp3],       %[A]               \n\t"
+            "pmullh     %[ftmp5],   %[ftmp5],       %[E]               \n\t"
+            "paddh      %[ftmp3],   %[ftmp3],       %[ftmp5]           \n\t"
+            "pmullh     %[ftmp4],   %[ftmp4],       %[A]               \n\t"
+            "pmullh     %[ftmp6],   %[ftmp6],       %[E]               \n\t"
+            "paddh      %[ftmp4],   %[ftmp4],       %[ftmp6]           \n\t"
+            "paddh      %[ftmp3],   %[ftmp3],       %[ff_pw_32]        \n\t"
+            "paddh      %[ftmp4],   %[ftmp4],       %[ff_pw_32]        \n\t"
+            "psrlh      %[ftmp3],   %[ftmp3],       %[ftmp7]           \n\t"
+            "psrlh      %[ftmp4],   %[ftmp4],       %[ftmp7]           \n\t"
+            "packushb   %[ftmp2],   %[ftmp3],       %[ftmp4]           \n\t"
+
+            MMI_SDC1(%[ftmp1], %[dst], 0x00)
+            PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
+            MMI_SDC1(%[ftmp2], %[dst], 0x00)
+            PTR_ADDU   "%[dst],     %[dst],         %[stride]          \n\t"
+            "bnez       %[h],       1b                                 \n\t"
+            : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
+              [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
+              [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
+              [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
+              [ftmp8]"=&f"(ftmp[8]),        [tmp0]"=&r"(tmp[0]),
+              [dst]"+&r"(dst),              [src]"+&r"(src),
+              [h]"+&r"(h)
+            : [stride]"r"((mips_reg)stride),
+              [ff_pw_32]"f"(ff_pw_32),
+              [A]"f"(A),                    [E]"f"(E)
+            : "memory"
+        );
     }
 }
 
@@ -334,231 +297,200 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
     if(!(x || y)){
         /* x=0, y=0, A=64 */
         __asm__ volatile (
-            "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
-            "dli        %[tmp0],    0x06                                \n\t"
-            "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
-            "mtc1       %[tmp0],    %[ftmp4]                            \n\t"
-
             "1:                                                         \n\t"
-            MMI_ULDC1(%[ftmp1], %[src], 0x00)
+            MMI_ULDC1(%[ftmp0], %[src], 0x00)
             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
-            MMI_ULDC1(%[ftmp5], %[src], 0x00)
+            MMI_ULDC1(%[ftmp1], %[src], 0x00)
             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
-
-            "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
-            "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
-            "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
-            "pmullh     %[ftmp2],   %[ftmp3],       %[A]                \n\t"
-            "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
-            "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]         \n\t"
-            "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp4]            \n\t"
-            "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
-            "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
             MMI_LDC1(%[ftmp2], %[dst], 0x00)
-            "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
-            MMI_SDC1(%[ftmp1], %[dst], 0x00)
             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
-
-            "punpcklbh  %[ftmp2],   %[ftmp5],       %[ftmp0]            \n\t"
-            "punpckhbh  %[ftmp3],   %[ftmp5],       %[ftmp0]            \n\t"
-            "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
-            "pmullh     %[ftmp2],   %[ftmp3],       %[A]                \n\t"
-            "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
-            "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]         \n\t"
-            "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp4]            \n\t"
-            "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
-            "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
-            MMI_LDC1(%[ftmp2], %[dst], 0x00)
-            "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
+            MMI_LDC1(%[ftmp3], %[dst], 0x00)
+            PTR_SUBU   "%[dst],     %[dst],         %[stride]           \n\t"
+            "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]            \n\t"
+            "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
+            MMI_SDC1(%[ftmp0], %[dst], 0x00)
+            PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
             MMI_SDC1(%[ftmp1], %[dst], 0x00)
             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
-
             "addi       %[h],       %[h],           -0x02               \n\t"
             "bnez       %[h],       1b                                  \n\t"
+            : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
+              [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
+              [dst]"+&r"(dst),              [src]"+&r"(src),
+              [h]"+&r"(h)
+            : [stride]"r"((mips_reg)stride)
+            : "memory"
+        );
+    } else if (x && y) {
+        /* x!=0, y!=0 */
+        D = x * y;
+        B = (x << 3) - D;
+        C = (y << 3) - D;
+        A = 64 - D - B - C;
+        __asm__ volatile (
+            "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]       \n\t"
+            "dli        %[tmp0],    0x06                           \n\t"
+            "pshufh     %[A],       %[A],           %[ftmp0]       \n\t"
+            "pshufh     %[B],       %[B],           %[ftmp0]       \n\t"
+            "mtc1       %[tmp0],    %[ftmp9]                       \n\t"
+            "pshufh     %[C],       %[C],           %[ftmp0]       \n\t"
+            "pshufh     %[D],       %[D],           %[ftmp0]       \n\t"
+
+            "1:                                                    \n\t"
+            MMI_ULDC1(%[ftmp1], %[src], 0x00)
+            MMI_ULDC1(%[ftmp2], %[src], 0x01)
+            PTR_ADDU   "%[src],     %[src],         %[stride]      \n\t"
+            MMI_ULDC1(%[ftmp3], %[src], 0x00)
+            MMI_ULDC1(%[ftmp4], %[src], 0x01)
+            "addi       %[h],       %[h],           -0x01          \n\t"
+
+            "punpcklbh  %[ftmp5],   %[ftmp1],       %[ftmp0]       \n\t"
+            "punpckhbh  %[ftmp6],   %[ftmp1],       %[ftmp0]       \n\t"
+            "punpcklbh  %[ftmp7],   %[ftmp2],       %[ftmp0]       \n\t"
+            "punpckhbh  %[ftmp8],   %[ftmp2],       %[ftmp0]       \n\t"
+            "pmullh     %[ftmp5],   %[ftmp5],       %[A]           \n\t"
+            "pmullh     %[ftmp7],   %[ftmp7],       %[B]           \n\t"
+            "paddh      %[ftmp1],   %[ftmp5],       %[ftmp7]       \n\t"
+            "pmullh     %[ftmp6],   %[ftmp6],       %[A]           \n\t"
+            "pmullh     %[ftmp8],   %[ftmp8],       %[B]           \n\t"
+            "paddh      %[ftmp2],   %[ftmp6],       %[ftmp8]       \n\t"
+
+            "punpcklbh  %[ftmp5],   %[ftmp3],       %[ftmp0]       \n\t"
+            "punpckhbh  %[ftmp6],   %[ftmp3],       %[ftmp0]       \n\t"
+            "punpcklbh  %[ftmp7],   %[ftmp4],       %[ftmp0]       \n\t"
+            "punpckhbh  %[ftmp8],   %[ftmp4],       %[ftmp0]       \n\t"
+            "pmullh     %[ftmp5],   %[ftmp5],       %[C]           \n\t"
+            "pmullh     %[ftmp7],   %[ftmp7],       %[D]           \n\t"
+            "paddh      %[ftmp3],   %[ftmp5],       %[ftmp7]       \n\t"
+            "pmullh     %[ftmp6],   %[ftmp6],       %[C]           \n\t"
+            "pmullh     %[ftmp8],   %[ftmp8],       %[D]           \n\t"
+            "paddh      %[ftmp4],   %[ftmp6],       %[ftmp8]       \n\t"
+
+            "paddh      %[ftmp1],   %[ftmp1],       %[ftmp3]       \n\t"
+            "paddh      %[ftmp2],   %[ftmp2],       %[ftmp4]       \n\t"
+            "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]    \n\t"
+            "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]    \n\t"
+            "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp9]       \n\t"
+            "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp9]       \n\t"
+            "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]       \n\t"
+            MMI_LDC1(%[ftmp2], %[dst], 0x00)
+            "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]       \n\t"
+            MMI_SDC1(%[ftmp1], %[dst], 0x00)
+            PTR_ADDU   "%[dst],     %[dst],         %[stride]      \n\t"
+            "bnez       %[h],       1b                             \n\t"
             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
+              [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
+              [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
               [tmp0]"=&r"(tmp[0]),
               [dst]"+&r"(dst),              [src]"+&r"(src),
               [h]"+&r"(h)
             : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
-              [A]"f"(A)
+              [A]"f"(A),                    [B]"f"(B),
+              [C]"f"(C),                    [D]"f"(D)
+            : "memory"
+        );
+    } else if (x) {
+        /* x!=0, y==0 */
+        E = x << 3;
+        A = 64 - E;
+        __asm__ volatile (
+            "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]       \n\t"
+            "dli        %[tmp0],    0x06                           \n\t"
+            "pshufh     %[A],       %[A],           %[ftmp0]       \n\t"
+            "pshufh     %[E],       %[E],           %[ftmp0]       \n\t"
+            "mtc1       %[tmp0],    %[ftmp7]                       \n\t"
+
+            "1:                                                    \n\t"
+            MMI_ULDC1(%[ftmp1], %[src], 0x00)
+            MMI_ULDC1(%[ftmp2], %[src], 0x01)
+            PTR_ADDU   "%[src],     %[src],         %[stride]      \n\t"
+            "addi       %[h],       %[h],           -0x01          \n\t"
+
+            "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]       \n\t"
+            "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]       \n\t"
+            "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]       \n\t"
+            "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]       \n\t"
+            "pmullh     %[ftmp3],   %[ftmp3],       %[A]           \n\t"
+            "pmullh     %[ftmp5],   %[ftmp5],       %[E]           \n\t"
+            "paddh      %[ftmp1],   %[ftmp3],       %[ftmp5]       \n\t"
+            "pmullh     %[ftmp4],   %[ftmp4],       %[A]           \n\t"
+            "pmullh     %[ftmp6],   %[ftmp6],       %[E]           \n\t"
+            "paddh      %[ftmp2],   %[ftmp4],       %[ftmp6]       \n\t"
+
+            "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]    \n\t"
+            "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]    \n\t"
+            "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp7]       \n\t"
+            "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp7]       \n\t"
+            "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]       \n\t"
+            MMI_LDC1(%[ftmp2], %[dst], 0x00)
+            "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]       \n\t"
+            MMI_SDC1(%[ftmp1], %[dst], 0x00)
+            PTR_ADDU   "%[dst],     %[dst],         %[stride]      \n\t"
+            "bnez       %[h],       1b                             \n\t"
+            : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
+              [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
+              [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
+              [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
+              [tmp0]"=&r"(tmp[0]),
+              [dst]"+&r"(dst),              [src]"+&r"(src),
+              [h]"+&r"(h)
+            : [stride]"r"((mips_reg)stride),
+              [ff_pw_32]"f"(ff_pw_32),
+              [A]"f"(A),                    [E]"f"(E)
             : "memory"
         );
     } else {
-        if(x && y) {
-            /* x!=0, y!=0 */
-            D = x * y;
-            B = (x << 3) - D;
-            C = (y << 3) - D;
-            A = 64 - D - B - C;
-            __asm__ volatile (
-                "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]       \n\t"
-                "dli        %[tmp0],    0x06                           \n\t"
-                "pshufh     %[A],       %[A],           %[ftmp0]       \n\t"
-                "pshufh     %[B],       %[B],           %[ftmp0]       \n\t"
-                "mtc1       %[tmp0],    %[ftmp9]                       \n\t"
-                "pshufh     %[C],       %[C],           %[ftmp0]       \n\t"
-                "pshufh     %[D],       %[D],           %[ftmp0]       \n\t"
-
-                "1:                                                    \n\t"
-                MMI_ULDC1(%[ftmp1], %[src], 0x00)
-                MMI_ULDC1(%[ftmp2], %[src], 0x01)
-                PTR_ADDU   "%[src],     %[src],         %[stride]      \n\t"
-                MMI_ULDC1(%[ftmp3], %[src], 0x00)
-                MMI_ULDC1(%[ftmp4], %[src], 0x01)
-                "addi       %[h],       %[h],           -0x01          \n\t"
-
-                "punpcklbh  %[ftmp5],   %[ftmp1],       %[ftmp0]       \n\t"
-                "punpckhbh  %[ftmp6],   %[ftmp1],       %[ftmp0]       \n\t"
-                "punpcklbh  %[ftmp7],   %[ftmp2],       %[ftmp0]       \n\t"
-                "punpckhbh  %[ftmp8],   %[ftmp2],       %[ftmp0]       \n\t"
-                "pmullh     %[ftmp5],   %[ftmp5],       %[A]           \n\t"
-                "pmullh     %[ftmp7],   %[ftmp7],       %[B]           \n\t"
-                "paddh      %[ftmp1],   %[ftmp5],       %[ftmp7]       \n\t"
-                "pmullh     %[ftmp6],   %[ftmp6],       %[A]           \n\t"
-                "pmullh     %[ftmp8],   %[ftmp8],       %[B]           \n\t"
-                "paddh      %[ftmp2],   %[ftmp6],       %[ftmp8]       \n\t"
-
-                "punpcklbh  %[ftmp5],   %[ftmp3],       %[ftmp0]       \n\t"
-                "punpckhbh  %[ftmp6],   %[ftmp3],       %[ftmp0]       \n\t"
-                "punpcklbh  %[ftmp7],   %[ftmp4],       %[ftmp0]       \n\t"
-                "punpckhbh  %[ftmp8],   %[ftmp4],       %[ftmp0]       \n\t"
-                "pmullh     %[ftmp5],   %[ftmp5],       %[C]           \n\t"
-                "pmullh     %[ftmp7],   %[ftmp7],       %[D]           \n\t"
-                "paddh      %[ftmp3],   %[ftmp5],       %[ftmp7]       \n\t"
-                "pmullh     %[ftmp6],   %[ftmp6],       %[C]           \n\t"
-                "pmullh     %[ftmp8],   %[ftmp8],       %[D]           \n\t"
-                "paddh      %[ftmp4],   %[ftmp6],       %[ftmp8]       \n\t"
-
-                "paddh      %[ftmp1],   %[ftmp1],       %[ftmp3]       \n\t"
-                "paddh      %[ftmp2],   %[ftmp2],       %[ftmp4]       \n\t"
-                "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]    \n\t"
-                "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]    \n\t"
-                "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp9]       \n\t"
-                "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp9]       \n\t"
-                "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]       \n\t"
-                MMI_LDC1(%[ftmp2], %[dst], 0x00)
-                "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]       \n\t"
-                MMI_SDC1(%[ftmp1], %[dst], 0x00)
-                PTR_ADDU   "%[dst],     %[dst],         %[stride]      \n\t"
-                "bnez       %[h],       1b                             \n\t"
-                : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
-                  [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
-                  [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
-                  [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
-                  [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
-                  [tmp0]"=&r"(tmp[0]),
-                  [dst]"+&r"(dst),              [src]"+&r"(src),
-                  [h]"+&r"(h)
-                : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
-                  [A]"f"(A),                    [B]"f"(B),
-                  [C]"f"(C),                    [D]"f"(D)
-                : "memory"
-            );
-        } else {
-            if(x) {
-                /* x!=0, y==0 */
-                E = x << 3;
-                A = 64 - E;
-                __asm__ volatile (
-                    "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]       \n\t"
-                    "dli        %[tmp0],    0x06                           \n\t"
-                    "pshufh     %[A],       %[A],           %[ftmp0]       \n\t"
-                    "pshufh     %[E],       %[E],           %[ftmp0]       \n\t"
-                    "mtc1       %[tmp0],    %[ftmp7]                       \n\t"
-
-                    "1:                                                    \n\t"
-                    MMI_ULDC1(%[ftmp1], %[src], 0x00)
-                    MMI_ULDC1(%[ftmp2], %[src], 0x01)
-                    PTR_ADDU   "%[src],     %[src],         %[stride]      \n\t"
-                    "addi       %[h],       %[h],           -0x01          \n\t"
-
-                    "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]       \n\t"
-                    "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]       \n\t"
-                    "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]       \n\t"
-                    "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]       \n\t"
-                    "pmullh     %[ftmp3],   %[ftmp3],       %[A]           \n\t"
-                    "pmullh     %[ftmp5],   %[ftmp5],       %[E]           \n\t"
-                    "paddh      %[ftmp1],   %[ftmp3],       %[ftmp5]       \n\t"
-                    "pmullh     %[ftmp4],   %[ftmp4],       %[A]           \n\t"
-                    "pmullh     %[ftmp6],   %[ftmp6],       %[E]           \n\t"
-                    "paddh      %[ftmp2],   %[ftmp4],       %[ftmp6]       \n\t"
-
-                    "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]    \n\t"
-                    "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]    \n\t"
-                    "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp7]       \n\t"
-                    "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp7]       \n\t"
-                    "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]       \n\t"
-                    MMI_LDC1(%[ftmp2], %[dst], 0x00)
-                    "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]       \n\t"
-                    MMI_SDC1(%[ftmp1], %[dst], 0x00)
-                    PTR_ADDU   "%[dst],     %[dst],         %[stride]      \n\t"
-                    "bnez       %[h],       1b                             \n\t"
-                    : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
-                      [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
-                      [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
-                      [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
-                      [tmp0]"=&r"(tmp[0]),
-                      [dst]"+&r"(dst),              [src]"+&r"(src),
-                      [h]"+&r"(h)
-                    : [stride]"r"((mips_reg)stride),
-                      [ff_pw_32]"f"(ff_pw_32),
-                      [A]"f"(A),                    [E]"f"(E)
-                    : "memory"
-                );
-            } else {
-                /* x==0, y!=0 */
-                E = y << 3;
-                A = 64 - E;
-                __asm__ volatile (
-                    "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]       \n\t"
-                    "dli        %[tmp0],    0x06                           \n\t"
-                    "pshufh     %[A],       %[A],           %[ftmp0]       \n\t"
-                    "pshufh     %[E],       %[E],           %[ftmp0]       \n\t"
-                    "mtc1       %[tmp0],    %[ftmp7]                       \n\t"
-
-                    "1:                                                    \n\t"
-                    MMI_ULDC1(%[ftmp1], %[src], 0x00)
-                    PTR_ADDU   "%[src],     %[src],         %[stride]      \n\t"
-                    MMI_ULDC1(%[ftmp2], %[src], 0x00)
-                    "addi       %[h],       %[h],           -0x01          \n\t"
-
-                    "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]       \n\t"
-                    "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]       \n\t"
-                    "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]       \n\t"
-                    "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]       \n\t"
-                    "pmullh     %[ftmp3],   %[ftmp3],       %[A]           \n\t"
-                    "pmullh     %[ftmp5],   %[ftmp5],       %[E]           \n\t"
-                    "paddh      %[ftmp1],   %[ftmp3],       %[ftmp5]       \n\t"
-                    "pmullh     %[ftmp4],   %[ftmp4],       %[A]           \n\t"
-                    "pmullh     %[ftmp6],   %[ftmp6],       %[E]           \n\t"
-                    "paddh      %[ftmp2],   %[ftmp4],       %[ftmp6]       \n\t"
-
-                    "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]    \n\t"
-                    "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]    \n\t"
-                    "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp7]       \n\t"
-                    "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp7]       \n\t"
-                    "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]       \n\t"
-                    MMI_LDC1(%[ftmp2], %[dst], 0x00)
-                    "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]       \n\t"
-                    MMI_SDC1(%[ftmp1], %[dst], 0x00)
-                    PTR_ADDU   "%[dst],     %[dst],         %[stride]      \n\t"
-                    "bnez       %[h],       1b                             \n\t"
-                    : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
-                      [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
-                      [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
-                      [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
-                      [tmp0]"=&r"(tmp[0]),
-                      [dst]"+&r"(dst),              [src]"+&r"(src),
-                      [h]"+&r"(h)
-                    : [stride]"r"((mips_reg)stride),
-                      [ff_pw_32]"f"(ff_pw_32),
-                      [A]"f"(A),                    [E]"f"(E)
-                    : "memory"
-                );
-            }
-        }
+        /* x==0, y!=0 */
+        E = y << 3;
+        A = 64 - E;
+        __asm__ volatile (
+            "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]       \n\t"
+            "dli        %[tmp0],    0x06                           \n\t"
+            "pshufh     %[A],       %[A],           %[ftmp0]       \n\t"
+            "pshufh     %[E],       %[E],           %[ftmp0]       \n\t"
+            "mtc1       %[tmp0],    %[ftmp7]                       \n\t"
+
+            "1:                                                    \n\t"
+            MMI_ULDC1(%[ftmp1], %[src], 0x00)
+            PTR_ADDU   "%[src],     %[src],         %[stride]      \n\t"
+            MMI_ULDC1(%[ftmp2], %[src], 0x00)
+            "addi       %[h],       %[h],           -0x01          \n\t"
+
+            "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]       \n\t"
+            "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]       \n\t"
+            "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]       \n\t"
+            "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]       \n\t"
+            "pmullh     %[ftmp3],   %[ftmp3],       %[A]           \n\t"
+            "pmullh     %[ftmp5],   %[ftmp5],       %[E]           \n\t"
+            "paddh      %[ftmp1],   %[ftmp3],       %[ftmp5]       \n\t"
+            "pmullh     %[ftmp4],   %[ftmp4],       %[A]           \n\t"
+            "pmullh     %[ftmp6],   %[ftmp6],       %[E]           \n\t"
+            "paddh      %[ftmp2],   %[ftmp4],       %[ftmp6]       \n\t"
+
+            "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]    \n\t"
+            "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_32]    \n\t"
+            "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp7]       \n\t"
+            "psrlh      %[ftmp2],   %[ftmp2],       %[ftmp7]       \n\t"
+            "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]       \n\t"
+            MMI_LDC1(%[ftmp2], %[dst], 0x00)
+            "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]       \n\t"
+            MMI_SDC1(%[ftmp1], %[dst], 0x00)
+            PTR_ADDU   "%[dst],     %[dst],         %[stride]      \n\t"
+            "bnez       %[h],       1b                             \n\t"
+            : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
+              [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
+              [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
+              [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
+              [tmp0]"=&r"(tmp[0]),
+              [dst]"+&r"(dst),              [src]"+&r"(src),
+              [h]"+&r"(h)
+            : [stride]"r"((mips_reg)stride),
+              [ff_pw_32]"f"(ff_pw_32),
+              [A]"f"(A),                    [E]"f"(E)
+            : "memory"
+        );
     }
 }
 
@@ -567,8 +499,8 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
 {
     const int A = (8 - x) * (8 - y);
     const int B = x * (8 - y);
-    const int C = (8 - x) *  y;
-    const int D = x *  y;
+    const int C = (8 - x) * y;
+    const int D = x * y;
     const int E = B + C;
     double ftmp[8];
     uint64_t tmp[1];
@@ -586,31 +518,29 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
             "pshufh     %[D],       %[D],           %[ftmp0]            \n\t"
 
             "1:                                                         \n\t"
-            PTR_ADDU   "%[addr0],   %[src],         %[stride]           \n\t"
             MMI_ULWC1(%[ftmp1], %[src], 0x00)
             MMI_ULWC1(%[ftmp2], %[src], 0x01)
-            MMI_ULWC1(%[ftmp3], %[addr0], 0x00)
-            MMI_ULWC1(%[ftmp4], %[addr0], 0x01)
+            PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
+            MMI_ULWC1(%[ftmp3], %[src], 0x00)
+            MMI_ULWC1(%[ftmp4], %[src], 0x01)
 
             "punpcklbh  %[ftmp5],   %[ftmp1],       %[ftmp0]            \n\t"
             "punpcklbh  %[ftmp6],   %[ftmp2],       %[ftmp0]            \n\t"
             "pmullh     %[ftmp5],   %[ftmp5],       %[A]                \n\t"
             "pmullh     %[ftmp6],   %[ftmp6],       %[B]                \n\t"
             "paddh      %[ftmp1],   %[ftmp5],       %[ftmp6]            \n\t"
-
             "punpcklbh  %[ftmp5],   %[ftmp3],       %[ftmp0]            \n\t"
             "punpcklbh  %[ftmp6],   %[ftmp4],       %[ftmp0]            \n\t"
             "pmullh     %[ftmp5],   %[ftmp5],       %[C]                \n\t"
             "pmullh     %[ftmp6],   %[ftmp6],       %[D]                \n\t"
             "paddh      %[ftmp2],   %[ftmp5],       %[ftmp6]            \n\t"
-
             "paddh      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
+
             "addi       %[h],       %[h],           -0x01               \n\t"
             MMI_SWC1(%[ftmp1], %[dst], 0x00)
-            PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
             "bnez       %[h],       1b                                  \n\t"
             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
@@ -619,7 +549,6 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
               [tmp0]"=&r"(tmp[0]),
               RESTRICT_ASM_LOW32
-              [addr0]"=&r"(addr[0]),
               [dst]"+&r"(dst),              [src]"+&r"(src),
               [h]"+&r"(h)
             : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
@@ -629,7 +558,6 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
         );
     } else if (E) {
         const int step = C ? stride : 1;
-
         __asm__ volatile (
             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
             "dli        %[tmp0],    0x06                                \n\t"
@@ -638,22 +566,20 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
             "mtc1       %[tmp0],    %[ftmp5]                            \n\t"
 
             "1:                                                         \n\t"
-            PTR_ADDU   "%[addr0],   %[src],         %[step]             \n\t"
             MMI_ULWC1(%[ftmp1], %[src], 0x00)
+            PTR_ADDU   "%[addr0],   %[src],         %[step]             \n\t"
             MMI_ULWC1(%[ftmp2], %[addr0], 0x00)
-
+            PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
+            "addi       %[h],       %[h],           -0x01               \n\t"
             "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
             "punpcklbh  %[ftmp4],   %[ftmp2],       %[ftmp0]            \n\t"
             "pmullh     %[ftmp3],   %[ftmp3],       %[A]                \n\t"
             "pmullh     %[ftmp4],   %[ftmp4],       %[E]                \n\t"
             "paddh      %[ftmp1],   %[ftmp3],       %[ftmp4]            \n\t"
-
             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp5]            \n\t"
             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
-            "addi       %[h],       %[h],           -0x01               \n\t"
             MMI_SWC1(%[ftmp1], %[dst], 0x00)
-            PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
             "bnez       %[h],       1b                                  \n\t"
             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
@@ -671,42 +597,22 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
         );
     } else {
         __asm__ volatile (
-            "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
-            "dli        %[tmp0],    0x06                                \n\t"
-            "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
-            "mtc1       %[tmp0],    %[ftmp3]                            \n\t"
-
             "1:                                                         \n\t"
-            MMI_ULWC1(%[ftmp1], %[src], 0x00)
-            "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
-            "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
-            "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
-            "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
-            "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
+            MMI_ULWC1(%[ftmp0], %[src], 0x00)
             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
-            MMI_SWC1(%[ftmp1], %[dst], 0x00)
-            PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
-
             MMI_ULWC1(%[ftmp1], %[src], 0x00)
-            "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
-            "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
-            "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
-            "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
-            "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
+            PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
             "addi       %[h],       %[h],           -0x02               \n\t"
+            MMI_SWC1(%[ftmp0], %[dst], 0x00)
+            PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
             MMI_SWC1(%[ftmp1], %[dst], 0x00)
-
-            PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
             "bnez       %[h],       1b                                  \n\t"
             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
-              [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
-              [tmp0]"=&r"(tmp[0]),
-              RESTRICT_ASM_LOW32
               [dst]"+&r"(dst),              [src]"+&r"(src),
+              RESTRICT_ASM_LOW32
               [h]"+&r"(h)
-            : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
-              [A]"f"(A)
+            : [stride]"r"((mips_reg)stride)
             : "memory"
         );
     }
@@ -736,33 +642,31 @@ void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
             "pshufh     %[D],       %[D],           %[ftmp0]            \n\t"
 
             "1:                                                         \n\t"
-            PTR_ADDU   "%[addr0],   %[src],         %[stride]           \n\t"
             MMI_ULWC1(%[ftmp1], %[src], 0x00)
             MMI_ULWC1(%[ftmp2], %[src], 0x01)
-            MMI_ULWC1(%[ftmp3], %[addr0], 0x00)
-            MMI_ULWC1(%[ftmp4], %[addr0], 0x01)
+            PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
+            MMI_ULWC1(%[ftmp3], %[src], 0x00)
+            MMI_ULWC1(%[ftmp4], %[src], 0x01)
 
             "punpcklbh  %[ftmp5],   %[ftmp1],       %[ftmp0]            \n\t"
             "punpcklbh  %[ftmp6],   %[ftmp2],       %[ftmp0]            \n\t"
             "pmullh     %[ftmp5],   %[ftmp5],       %[A]                \n\t"
             "pmullh     %[ftmp6],   %[ftmp6],       %[B]                \n\t"
             "paddh      %[ftmp1],   %[ftmp5],       %[ftmp6]            \n\t"
-
             "punpcklbh  %[ftmp5],   %[ftmp3],       %[ftmp0]            \n\t"
             "punpcklbh  %[ftmp6],   %[ftmp4],       %[ftmp0]            \n\t"
             "pmullh     %[ftmp5],   %[ftmp5],       %[C]                \n\t"
             "pmullh     %[ftmp6],   %[ftmp6],       %[D]                \n\t"
             "paddh      %[ftmp2],   %[ftmp5],       %[ftmp6]            \n\t"
-
             "paddh      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
             MMI_LWC1(%[ftmp2], %[dst], 0x00)
             "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
+
             "addi       %[h],       %[h],           -0x01               \n\t"
             MMI_SWC1(%[ftmp1], %[dst], 0x00)
-            PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
             "bnez       %[h],       1b                                  \n\t"
             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
@@ -771,7 +675,6 @@ void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
               [tmp0]"=&r"(tmp[0]),
               RESTRICT_ASM_LOW32
-              [addr0]"=&r"(addr[0]),
               [dst]"+&r"(dst),              [src]"+&r"(src),
               [h]"+&r"(h)
             : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
@@ -781,32 +684,30 @@ void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
         );
     } else if (E) {
         const int step = C ? stride : 1;
-
         __asm__ volatile (
             "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
             "dli        %[tmp0],    0x06                                \n\t"
             "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
             "pshufh     %[E],       %[E],           %[ftmp0]            \n\t"
             "mtc1       %[tmp0],    %[ftmp5]                            \n\t"
+
             "1:                                                         \n\t"
-            PTR_ADDU   "%[addr0],   %[src],         %[step]             \n\t"
             MMI_ULWC1(%[ftmp1], %[src], 0x00)
+            PTR_ADDU   "%[addr0],   %[src],         %[step]             \n\t"
             MMI_ULWC1(%[ftmp2], %[addr0], 0x00)
-
+            PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
+            "addi       %[h],       %[h],           -0x01               \n\t"
             "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]            \n\t"
             "punpcklbh  %[ftmp4],   %[ftmp2],       %[ftmp0]            \n\t"
             "pmullh     %[ftmp3],   %[ftmp3],       %[A]                \n\t"
             "pmullh     %[ftmp4],   %[ftmp4],       %[E]                \n\t"
             "paddh      %[ftmp1],   %[ftmp3],       %[ftmp4]            \n\t"
-
             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
             "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp5]            \n\t"
             "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
             MMI_LWC1(%[ftmp2], %[dst], 0x00)
             "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
-            "addi       %[h],       %[h],           -0x01               \n\t"
             MMI_SWC1(%[ftmp1], %[dst], 0x00)
-            PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
             "bnez       %[h],       1b                                  \n\t"
             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
@@ -824,46 +725,27 @@ void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
         );
     } else {
         __asm__ volatile (
-            "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]            \n\t"
-            "dli        %[tmp0],    0x06                                \n\t"
-            "pshufh     %[A],       %[A],           %[ftmp0]            \n\t"
-            "mtc1       %[tmp0],    %[ftmp3]                            \n\t"
-
             "1:                                                         \n\t"
-            MMI_ULWC1(%[ftmp1], %[src], 0x00)
-            "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
-            "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
-            "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
-            "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
-            "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
-            MMI_LWC1(%[ftmp2], %[dst], 0x00)
-            "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
+            MMI_ULWC1(%[ftmp0], %[src], 0x00)
             PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
-            MMI_SWC1(%[ftmp1], %[dst], 0x00)
-            PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
-
             MMI_ULWC1(%[ftmp1], %[src], 0x00)
-            "punpcklbh  %[ftmp2],   %[ftmp1],       %[ftmp0]            \n\t"
-            "pmullh     %[ftmp1],   %[ftmp2],       %[A]                \n\t"
-            "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_32]         \n\t"
-            "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
-            "packushb   %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
-            MMI_LWC1(%[ftmp2], %[dst], 0x00)
-            "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t"
+            PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
             "addi       %[h],       %[h],           -0x02               \n\t"
+            MMI_LWC1(%[ftmp2], %[dst], 0x00)
+            "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]            \n\t"
+            MMI_SWC1(%[ftmp0], %[dst], 0x00)
+            PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
+            MMI_LWC1(%[ftmp3], %[dst], 0x00)
+            "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
             MMI_SWC1(%[ftmp1], %[dst], 0x00)
-
-            PTR_ADDU   "%[src],     %[src],         %[stride]           \n\t"
             PTR_ADDU   "%[dst],     %[dst],         %[stride]           \n\t"
             "bnez       %[h],       1b                                  \n\t"
             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
-              [tmp0]"=&r"(tmp[0]),
-              RESTRICT_ASM_LOW32
               [dst]"+&r"(dst),              [src]"+&r"(src),
+              RESTRICT_ASM_LOW32
               [h]"+&r"(h)
-            : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32),
-              [A]"f"(A)
+            : [stride]"r"((mips_reg)stride)
             : "memory"
         );
     }
diff --git a/libavcodec/mips/h264dsp_init_mips.c b/libavcodec/mips/h264dsp_init_mips.c
index 1fe7f8468c590..dc08a25800418 100644
--- a/libavcodec/mips/h264dsp_init_mips.c
+++ b/libavcodec/mips/h264dsp_init_mips.c
@@ -138,10 +138,10 @@ static av_cold void h264dsp_init_mmi(H264DSPContext * c, const int bit_depth,
 av_cold void ff_h264dsp_init_mips(H264DSPContext *c, const int bit_depth,
                                   const int chroma_format_idc)
 {
-#if HAVE_MSA
-    h264dsp_init_msa(c, bit_depth, chroma_format_idc);
-#endif  // #if HAVE_MSA
 #if HAVE_MMI
     h264dsp_init_mmi(c, bit_depth, chroma_format_idc);
 #endif /* HAVE_MMI */
+#if HAVE_MSA
+    h264dsp_init_msa(c, bit_depth, chroma_format_idc);
+#endif  // #if HAVE_MSA
 }
diff --git a/libavcodec/mips/h264pred_init_mips.c b/libavcodec/mips/h264pred_init_mips.c
index c33d8f7cdb63b..63637b8732716 100644
--- a/libavcodec/mips/h264pred_init_mips.c
+++ b/libavcodec/mips/h264pred_init_mips.c
@@ -146,10 +146,10 @@ av_cold void ff_h264_pred_init_mips(H264PredContext *h, int codec_id,
                                     int bit_depth,
                                     const int chroma_format_idc)
 {
-#if HAVE_MSA
-    h264_pred_init_msa(h, codec_id, bit_depth, chroma_format_idc);
-#endif  // #if HAVE_MSA
 #if HAVE_MMI
     h264_pred_init_mmi(h, codec_id, bit_depth, chroma_format_idc);
 #endif /* HAVE_MMI */
+#if HAVE_MSA
+    h264_pred_init_msa(h, codec_id, bit_depth, chroma_format_idc);
+#endif  // #if HAVE_MSA
 }
diff --git a/libavcodec/mips/h264qpel_init_mips.c b/libavcodec/mips/h264qpel_init_mips.c
index 92219f8877fd6..33bae3093af21 100644
--- a/libavcodec/mips/h264qpel_init_mips.c
+++ b/libavcodec/mips/h264qpel_init_mips.c
@@ -240,10 +240,10 @@ static av_cold void h264qpel_init_mmi(H264QpelContext *c, int bit_depth)
 
 av_cold void ff_h264qpel_init_mips(H264QpelContext *c, int bit_depth)
 {
-#if HAVE_MSA
-    h264qpel_init_msa(c, bit_depth);
-#endif  // #if HAVE_MSA
 #if HAVE_MMI
     h264qpel_init_mmi(c, bit_depth);
 #endif /* HAVE_MMI */
+#if HAVE_MSA
+    h264qpel_init_msa(c, bit_depth);
+#endif  // #if HAVE_MSA
 }
diff --git a/libavcodec/mips/hevc_lpf_sao_msa.c b/libavcodec/mips/hevc_lpf_sao_msa.c
index 5b5537a264503..adcafde621167 100644
--- a/libavcodec/mips/hevc_lpf_sao_msa.c
+++ b/libavcodec/mips/hevc_lpf_sao_msa.c
@@ -2630,7 +2630,7 @@ void ff_hevc_sao_edge_filter_8_msa(uint8_t *dst, uint8_t *src,
                                    int16_t *sao_offset_val,
                                    int eo, int width, int height)
 {
-    ptrdiff_t stride_src = (2 * 64 + 32) / sizeof(uint8_t);
+    ptrdiff_t stride_src = (2 * MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE) / sizeof(uint8_t);
 
     switch (eo) {
     case 0:
diff --git a/libavcodec/mips/hevcdsp_init_mips.c b/libavcodec/mips/hevcdsp_init_mips.c
index 776d13e4d3513..88337f462e039 100644
--- a/libavcodec/mips/hevcdsp_init_mips.c
+++ b/libavcodec/mips/hevcdsp_init_mips.c
@@ -20,6 +20,78 @@
 
 #include "libavcodec/mips/hevcdsp_mips.h"
 
+#if HAVE_MMI
+static av_cold void hevc_dsp_init_mmi(HEVCDSPContext *c,
+                                      const int bit_depth)
+{
+    if (8 == bit_depth) {
+        c->put_hevc_qpel[1][0][1] = ff_hevc_put_hevc_qpel_h4_8_mmi;
+        c->put_hevc_qpel[3][0][1] = ff_hevc_put_hevc_qpel_h8_8_mmi;
+        c->put_hevc_qpel[4][0][1] = ff_hevc_put_hevc_qpel_h12_8_mmi;
+        c->put_hevc_qpel[5][0][1] = ff_hevc_put_hevc_qpel_h16_8_mmi;
+        c->put_hevc_qpel[6][0][1] = ff_hevc_put_hevc_qpel_h24_8_mmi;
+        c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_8_mmi;
+        c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_8_mmi;
+        c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_8_mmi;
+
+        c->put_hevc_qpel[1][1][1] = ff_hevc_put_hevc_qpel_hv4_8_mmi;
+        c->put_hevc_qpel[3][1][1] = ff_hevc_put_hevc_qpel_hv8_8_mmi;
+        c->put_hevc_qpel[4][1][1] = ff_hevc_put_hevc_qpel_hv12_8_mmi;
+        c->put_hevc_qpel[5][1][1] = ff_hevc_put_hevc_qpel_hv16_8_mmi;
+        c->put_hevc_qpel[6][1][1] = ff_hevc_put_hevc_qpel_hv24_8_mmi;
+        c->put_hevc_qpel[7][1][1] = ff_hevc_put_hevc_qpel_hv32_8_mmi;
+        c->put_hevc_qpel[8][1][1] = ff_hevc_put_hevc_qpel_hv48_8_mmi;
+        c->put_hevc_qpel[9][1][1] = ff_hevc_put_hevc_qpel_hv64_8_mmi;
+
+        c->put_hevc_qpel_bi[1][0][1] = ff_hevc_put_hevc_qpel_bi_h4_8_mmi;
+        c->put_hevc_qpel_bi[3][0][1] = ff_hevc_put_hevc_qpel_bi_h8_8_mmi;
+        c->put_hevc_qpel_bi[4][0][1] = ff_hevc_put_hevc_qpel_bi_h12_8_mmi;
+        c->put_hevc_qpel_bi[5][0][1] = ff_hevc_put_hevc_qpel_bi_h16_8_mmi;
+        c->put_hevc_qpel_bi[6][0][1] = ff_hevc_put_hevc_qpel_bi_h24_8_mmi;
+        c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_hevc_qpel_bi_h32_8_mmi;
+        c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_hevc_qpel_bi_h48_8_mmi;
+        c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_qpel_bi_h64_8_mmi;
+
+        c->put_hevc_qpel_bi[1][1][1] = ff_hevc_put_hevc_qpel_bi_hv4_8_mmi;
+        c->put_hevc_qpel_bi[3][1][1] = ff_hevc_put_hevc_qpel_bi_hv8_8_mmi;
+        c->put_hevc_qpel_bi[4][1][1] = ff_hevc_put_hevc_qpel_bi_hv12_8_mmi;
+        c->put_hevc_qpel_bi[5][1][1] = ff_hevc_put_hevc_qpel_bi_hv16_8_mmi;
+        c->put_hevc_qpel_bi[6][1][1] = ff_hevc_put_hevc_qpel_bi_hv24_8_mmi;
+        c->put_hevc_qpel_bi[7][1][1] = ff_hevc_put_hevc_qpel_bi_hv32_8_mmi;
+        c->put_hevc_qpel_bi[8][1][1] = ff_hevc_put_hevc_qpel_bi_hv48_8_mmi;
+        c->put_hevc_qpel_bi[9][1][1] = ff_hevc_put_hevc_qpel_bi_hv64_8_mmi;
+
+        c->put_hevc_qpel_bi[3][0][0] = ff_hevc_put_hevc_pel_bi_pixels8_8_mmi;
+        c->put_hevc_qpel_bi[5][0][0] = ff_hevc_put_hevc_pel_bi_pixels16_8_mmi;
+        c->put_hevc_qpel_bi[6][0][0] = ff_hevc_put_hevc_pel_bi_pixels24_8_mmi;
+        c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_hevc_pel_bi_pixels32_8_mmi;
+        c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_hevc_pel_bi_pixels48_8_mmi;
+        c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_hevc_pel_bi_pixels64_8_mmi;
+
+        c->put_hevc_epel_bi[3][0][0] = ff_hevc_put_hevc_pel_bi_pixels8_8_mmi;
+        c->put_hevc_epel_bi[5][0][0] = ff_hevc_put_hevc_pel_bi_pixels16_8_mmi;
+        c->put_hevc_epel_bi[6][0][0] = ff_hevc_put_hevc_pel_bi_pixels24_8_mmi;
+        c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_hevc_pel_bi_pixels32_8_mmi;
+
+        c->put_hevc_epel_bi[1][1][1] = ff_hevc_put_hevc_epel_bi_hv4_8_mmi;
+        c->put_hevc_epel_bi[3][1][1] = ff_hevc_put_hevc_epel_bi_hv8_8_mmi;
+        c->put_hevc_epel_bi[4][1][1] = ff_hevc_put_hevc_epel_bi_hv12_8_mmi;
+        c->put_hevc_epel_bi[5][1][1] = ff_hevc_put_hevc_epel_bi_hv16_8_mmi;
+        c->put_hevc_epel_bi[6][1][1] = ff_hevc_put_hevc_epel_bi_hv24_8_mmi;
+        c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_epel_bi_hv32_8_mmi;
+
+        c->put_hevc_qpel_uni[1][1][1] = ff_hevc_put_hevc_qpel_uni_hv4_8_mmi;
+        c->put_hevc_qpel_uni[3][1][1] = ff_hevc_put_hevc_qpel_uni_hv8_8_mmi;
+        c->put_hevc_qpel_uni[4][1][1] = ff_hevc_put_hevc_qpel_uni_hv12_8_mmi;
+        c->put_hevc_qpel_uni[5][1][1] = ff_hevc_put_hevc_qpel_uni_hv16_8_mmi;
+        c->put_hevc_qpel_uni[6][1][1] = ff_hevc_put_hevc_qpel_uni_hv24_8_mmi;
+        c->put_hevc_qpel_uni[7][1][1] = ff_hevc_put_hevc_qpel_uni_hv32_8_mmi;
+        c->put_hevc_qpel_uni[8][1][1] = ff_hevc_put_hevc_qpel_uni_hv48_8_mmi;
+        c->put_hevc_qpel_uni[9][1][1] = ff_hevc_put_hevc_qpel_uni_hv64_8_mmi;
+    }
+}
+#endif // #if HAVE_MMI
+
 #if HAVE_MSA
 static av_cold void hevc_dsp_init_msa(HEVCDSPContext *c,
                                       const int bit_depth)
@@ -448,6 +520,9 @@ static av_cold void hevc_dsp_init_msa(HEVCDSPContext *c,
 
 void ff_hevc_dsp_init_mips(HEVCDSPContext *c, const int bit_depth)
 {
+#if HAVE_MMI
+    hevc_dsp_init_mmi(c, bit_depth);
+#endif  // #if HAVE_MMI
 #if HAVE_MSA
     hevc_dsp_init_msa(c, bit_depth);
 #endif  // #if HAVE_MSA
diff --git a/libavcodec/mips/hevcdsp_mips.h b/libavcodec/mips/hevcdsp_mips.h
index 1573d1cc9d981..c84e08d160ae6 100644
--- a/libavcodec/mips/hevcdsp_mips.h
+++ b/libavcodec/mips/hevcdsp_mips.h
@@ -479,4 +479,95 @@ void ff_hevc_addblk_32x32_msa(uint8_t *dst, int16_t *pi16Coeffs,
                               ptrdiff_t stride);
 void ff_hevc_idct_luma_4x4_msa(int16_t *pi16Coeffs);
 
+/* Loongson optimization */
+#define L_MC(PEL, DIR, WIDTH, TYPE)                                          \
+void ff_hevc_put_hevc_##PEL##_##DIR##WIDTH##_8_##TYPE(int16_t *dst,          \
+                                                      uint8_t *src,          \
+                                                      ptrdiff_t src_stride,  \
+                                                      int height,            \
+                                                      intptr_t mx,           \
+                                                      intptr_t my,           \
+                                                      int width)
+L_MC(qpel, h, 4, mmi);
+L_MC(qpel, h, 8, mmi);
+L_MC(qpel, h, 12, mmi);
+L_MC(qpel, h, 16, mmi);
+L_MC(qpel, h, 24, mmi);
+L_MC(qpel, h, 32, mmi);
+L_MC(qpel, h, 48, mmi);
+L_MC(qpel, h, 64, mmi);
+
+L_MC(qpel, hv, 4, mmi);
+L_MC(qpel, hv, 8, mmi);
+L_MC(qpel, hv, 12, mmi);
+L_MC(qpel, hv, 16, mmi);
+L_MC(qpel, hv, 24, mmi);
+L_MC(qpel, hv, 32, mmi);
+L_MC(qpel, hv, 48, mmi);
+L_MC(qpel, hv, 64, mmi);
+
+#define L_BI_MC(PEL, DIR, WIDTH, TYPE)                                          \
+void ff_hevc_put_hevc_##PEL##_bi_##DIR##WIDTH##_8_##TYPE(uint8_t *dst,          \
+                                                         ptrdiff_t dst_stride,  \
+                                                         uint8_t *src,          \
+                                                         ptrdiff_t src_stride,  \
+                                                         int16_t *src2,         \
+                                                         int height,            \
+                                                         intptr_t mx,           \
+                                                         intptr_t my,           \
+                                                         int width)
+
+L_BI_MC(pel, pixels, 8, mmi);
+L_BI_MC(pel, pixels, 16, mmi);
+L_BI_MC(pel, pixels, 24, mmi);
+L_BI_MC(pel, pixels, 32, mmi);
+L_BI_MC(pel, pixels, 48, mmi);
+L_BI_MC(pel, pixels, 64, mmi);
+
+L_BI_MC(qpel, hv, 4, mmi);
+L_BI_MC(qpel, hv, 8, mmi);
+L_BI_MC(qpel, hv, 12, mmi);
+L_BI_MC(qpel, hv, 16, mmi);
+L_BI_MC(qpel, hv, 24, mmi);
+L_BI_MC(qpel, hv, 32, mmi);
+L_BI_MC(qpel, hv, 48, mmi);
+L_BI_MC(qpel, hv, 64, mmi);
+
+L_BI_MC(qpel, h, 4, mmi);
+L_BI_MC(qpel, h, 8, mmi);
+L_BI_MC(qpel, h, 12, mmi);
+L_BI_MC(qpel, h, 16, mmi);
+L_BI_MC(qpel, h, 24, mmi);
+L_BI_MC(qpel, h, 32, mmi);
+L_BI_MC(qpel, h, 48, mmi);
+L_BI_MC(qpel, h, 64, mmi);
+
+L_BI_MC(epel, hv, 4, mmi);
+L_BI_MC(epel, hv, 8, mmi);
+L_BI_MC(epel, hv, 12, mmi);
+L_BI_MC(epel, hv, 16, mmi);
+L_BI_MC(epel, hv, 24, mmi);
+L_BI_MC(epel, hv, 32, mmi);
+#undef L_BI_MC
+
+#define L_UNI_MC(PEL, DIR, WIDTH, TYPE)                                         \
+void ff_hevc_put_hevc_##PEL##_uni_##DIR##WIDTH##_8_##TYPE(uint8_t *dst,         \
+                                                          ptrdiff_t dst_stride, \
+                                                          uint8_t *src,         \
+                                                          ptrdiff_t src_stride, \
+                                                          int height,           \
+                                                          intptr_t mx,          \
+                                                          intptr_t my,          \
+                                                          int width)
+
+L_UNI_MC(qpel, hv, 4, mmi);
+L_UNI_MC(qpel, hv, 8, mmi);
+L_UNI_MC(qpel, hv, 12, mmi);
+L_UNI_MC(qpel, hv, 16, mmi);
+L_UNI_MC(qpel, hv, 24, mmi);
+L_UNI_MC(qpel, hv, 32, mmi);
+L_UNI_MC(qpel, hv, 48, mmi);
+L_UNI_MC(qpel, hv, 64, mmi);
+#undef L_UNI_MC
+
 #endif  // #ifndef AVCODEC_MIPS_HEVCDSP_MIPS_H
diff --git a/libavcodec/mips/hevcdsp_mmi.c b/libavcodec/mips/hevcdsp_mmi.c
new file mode 100644
index 0000000000000..aa83e1f9add00
--- /dev/null
+++ b/libavcodec/mips/hevcdsp_mmi.c
@@ -0,0 +1,1183 @@
+/*
+ * Copyright (c) 2019 Shiyou Yin (yinshiyou-hf@loongson.cn)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/hevcdec.h"
+#include "libavcodec/bit_depth_template.c"
+#include "libavcodec/mips/hevcdsp_mips.h"
+#include "libavutil/mips/mmiutils.h"
+
+#define PUT_HEVC_QPEL_H(w, x_step, src_step, dst_step)                   \
+void ff_hevc_put_hevc_qpel_h##w##_8_mmi(int16_t *dst, uint8_t *_src,     \
+                                        ptrdiff_t _srcstride,            \
+                                        int height, intptr_t mx,         \
+                                        intptr_t my, int width)          \
+{                                                                        \
+    int x, y;                                                            \
+    pixel *src = (pixel*)_src - 3;                                       \
+    ptrdiff_t srcstride = _srcstride / sizeof(pixel);                    \
+    uint64_t ftmp[15];                                                   \
+    uint64_t rtmp[1];                                                    \
+    const int8_t *filter = ff_hevc_qpel_filters[mx - 1];                 \
+                                                                         \
+    x = x_step;                                                          \
+    y = height;                                                          \
+    __asm__ volatile(                                                    \
+        MMI_LDC1(%[ftmp1], %[filter], 0x00)                              \
+        "li           %[rtmp0],      0x08                       \n\t"    \
+        "dmtc1        %[rtmp0],      %[ftmp0]                   \n\t"    \
+        "punpckhbh    %[ftmp2],      %[ftmp0],      %[ftmp1]    \n\t"    \
+        "punpcklbh    %[ftmp1],      %[ftmp0],      %[ftmp1]    \n\t"    \
+        "psrah        %[ftmp1],      %[ftmp1],      %[ftmp0]    \n\t"    \
+        "psrah        %[ftmp2],      %[ftmp2],      %[ftmp0]    \n\t"    \
+        "xor          %[ftmp0],      %[ftmp0],      %[ftmp0]    \n\t"    \
+                                                                         \
+        "1:                                                     \n\t"    \
+        "2:                                                     \n\t"    \
+        "gsldlc1      %[ftmp3],      0x07(%[src])               \n\t"    \
+        "gsldrc1      %[ftmp3],      0x00(%[src])               \n\t"    \
+        "gsldlc1      %[ftmp4],      0x08(%[src])               \n\t"    \
+        "gsldrc1      %[ftmp4],      0x01(%[src])               \n\t"    \
+        "gsldlc1      %[ftmp5],      0x09(%[src])               \n\t"    \
+        "gsldrc1      %[ftmp5],      0x02(%[src])               \n\t"    \
+        "gsldlc1      %[ftmp6],      0x0a(%[src])               \n\t"    \
+        "gsldrc1      %[ftmp6],      0x03(%[src])               \n\t"    \
+        "punpcklbh    %[ftmp7],      %[ftmp3],      %[ftmp0]    \n\t"    \
+        "punpckhbh    %[ftmp8],      %[ftmp3],      %[ftmp0]    \n\t"    \
+        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"    \
+        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"    \
+        "paddh        %[ftmp3],      %[ftmp7],      %[ftmp8]    \n\t"    \
+        "punpcklbh    %[ftmp7],      %[ftmp4],      %[ftmp0]    \n\t"    \
+        "punpckhbh    %[ftmp8],      %[ftmp4],      %[ftmp0]    \n\t"    \
+        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"    \
+        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"    \
+        "paddh        %[ftmp4],      %[ftmp7],      %[ftmp8]    \n\t"    \
+        "punpcklbh    %[ftmp7],      %[ftmp5],      %[ftmp0]    \n\t"    \
+        "punpckhbh    %[ftmp8],      %[ftmp5],      %[ftmp0]    \n\t"    \
+        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"    \
+        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"    \
+        "paddh        %[ftmp5],      %[ftmp7],      %[ftmp8]    \n\t"    \
+        "punpcklbh    %[ftmp7],      %[ftmp6],      %[ftmp0]    \n\t"    \
+        "punpckhbh    %[ftmp8],      %[ftmp6],      %[ftmp0]    \n\t"    \
+        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"    \
+        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"    \
+        "paddh        %[ftmp6],      %[ftmp7],      %[ftmp8]    \n\t"    \
+        TRANSPOSE_4H(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6],             \
+                     %[ftmp7], %[ftmp8], %[ftmp9], %[ftmp10])            \
+        "paddh        %[ftmp3],      %[ftmp3],      %[ftmp4]    \n\t"    \
+        "paddh        %[ftmp5],      %[ftmp5],      %[ftmp6]    \n\t"    \
+        "paddh        %[ftmp3],      %[ftmp3],      %[ftmp5]    \n\t"    \
+        "gssdlc1      %[ftmp3],      0x07(%[dst])               \n\t"    \
+        "gssdrc1      %[ftmp3],      0x00(%[dst])               \n\t"    \
+                                                                         \
+        "daddi        %[x],          %[x],         -0x01        \n\t"    \
+        PTR_ADDIU    "%[src],        %[src],        0x04        \n\t"    \
+        PTR_ADDIU    "%[dst],        %[dst],        0x08        \n\t"    \
+        "bnez         %[x],          2b                         \n\t"    \
+                                                                         \
+        "daddi        %[y],          %[y],         -0x01        \n\t"    \
+        "li           %[x],        " #x_step "                  \n\t"    \
+        PTR_ADDIU    "%[src],        %[src],     " #src_step "  \n\t"    \
+        PTR_ADDIU    "%[dst],        %[dst],     " #dst_step "  \n\t"    \
+        PTR_ADDU     "%[src],        %[src],        %[stride]   \n\t"    \
+        PTR_ADDIU    "%[dst],        %[dst],        0x80        \n\t"    \
+        "bnez         %[y],          1b                         \n\t"    \
+        : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                  \
+          [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),                  \
+          [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),                  \
+          [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),                  \
+          [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),                  \
+          [ftmp10]"=&f"(ftmp[10]), [rtmp0]"=&r"(rtmp[0]),                \
+          [src]"+&r"(src), [dst]"+&r"(dst), [y]"+&r"(y),                 \
+          [x]"+&r"(x)                                                    \
+        : [filter]"r"(filter), [stride]"r"(srcstride)                    \
+        : "memory"                                                       \
+    );                                                                   \
+}
+
+PUT_HEVC_QPEL_H(4, 1, -4, -8);
+PUT_HEVC_QPEL_H(8, 2, -8, -16);
+PUT_HEVC_QPEL_H(12, 3, -12, -24);
+PUT_HEVC_QPEL_H(16, 4, -16, -32);
+PUT_HEVC_QPEL_H(24, 6, -24, -48);
+PUT_HEVC_QPEL_H(32, 8, -32, -64);
+PUT_HEVC_QPEL_H(48, 12, -48, -96);
+PUT_HEVC_QPEL_H(64, 16, -64, -128);
+
+#define PUT_HEVC_QPEL_HV(w, x_step, src_step, dst_step)                  \
+void ff_hevc_put_hevc_qpel_hv##w##_8_mmi(int16_t *dst, uint8_t *_src,    \
+                                     ptrdiff_t _srcstride,               \
+                                     int height, intptr_t mx,            \
+                                     intptr_t my, int width)             \
+{                                                                        \
+    int x, y;                                                            \
+    const int8_t *filter;                                                \
+    pixel *src = (pixel*)_src;                                           \
+    ptrdiff_t srcstride = _srcstride / sizeof(pixel);                    \
+    int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];         \
+    int16_t *tmp = tmp_array;                                            \
+    uint64_t ftmp[15];                                                   \
+    uint64_t rtmp[1];                                                    \
+                                                                         \
+    src   -= (QPEL_EXTRA_BEFORE * srcstride + 3);                        \
+    filter = ff_hevc_qpel_filters[mx - 1];                               \
+    x = x_step;                                                          \
+    y = height + QPEL_EXTRA;                                             \
+    __asm__ volatile(                                                    \
+        MMI_LDC1(%[ftmp1], %[filter], 0x00)                              \
+        "li           %[rtmp0],      0x08                       \n\t"    \
+        "dmtc1        %[rtmp0],      %[ftmp0]                   \n\t"    \
+        "punpckhbh    %[ftmp2],      %[ftmp0],      %[ftmp1]    \n\t"    \
+        "punpcklbh    %[ftmp1],      %[ftmp0],      %[ftmp1]    \n\t"    \
+        "psrah        %[ftmp1],      %[ftmp1],      %[ftmp0]    \n\t"    \
+        "psrah        %[ftmp2],      %[ftmp2],      %[ftmp0]    \n\t"    \
+        "xor          %[ftmp0],      %[ftmp0],      %[ftmp0]    \n\t"    \
+                                                                         \
+        "1:                                                     \n\t"    \
+        "2:                                                     \n\t"    \
+        "gsldlc1      %[ftmp3],      0x07(%[src])               \n\t"    \
+        "gsldrc1      %[ftmp3],      0x00(%[src])               \n\t"    \
+        "gsldlc1      %[ftmp4],      0x08(%[src])               \n\t"    \
+        "gsldrc1      %[ftmp4],      0x01(%[src])               \n\t"    \
+        "gsldlc1      %[ftmp5],      0x09(%[src])               \n\t"    \
+        "gsldrc1      %[ftmp5],      0x02(%[src])               \n\t"    \
+        "gsldlc1      %[ftmp6],      0x0a(%[src])               \n\t"    \
+        "gsldrc1      %[ftmp6],      0x03(%[src])               \n\t"    \
+        "punpcklbh    %[ftmp7],      %[ftmp3],      %[ftmp0]    \n\t"    \
+        "punpckhbh    %[ftmp8],      %[ftmp3],      %[ftmp0]    \n\t"    \
+        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"    \
+        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"    \
+        "paddh        %[ftmp3],      %[ftmp7],      %[ftmp8]    \n\t"    \
+        "punpcklbh    %[ftmp7],      %[ftmp4],      %[ftmp0]    \n\t"    \
+        "punpckhbh    %[ftmp8],      %[ftmp4],      %[ftmp0]    \n\t"    \
+        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"    \
+        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"    \
+        "paddh        %[ftmp4],      %[ftmp7],      %[ftmp8]    \n\t"    \
+        "punpcklbh    %[ftmp7],      %[ftmp5],      %[ftmp0]    \n\t"    \
+        "punpckhbh    %[ftmp8],      %[ftmp5],      %[ftmp0]    \n\t"    \
+        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"    \
+        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"    \
+        "paddh        %[ftmp5],      %[ftmp7],      %[ftmp8]    \n\t"    \
+        "punpcklbh    %[ftmp7],      %[ftmp6],      %[ftmp0]    \n\t"    \
+        "punpckhbh    %[ftmp8],      %[ftmp6],      %[ftmp0]    \n\t"    \
+        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"    \
+        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"    \
+        "paddh        %[ftmp6],      %[ftmp7],      %[ftmp8]    \n\t"    \
+        TRANSPOSE_4H(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6],             \
+                     %[ftmp7], %[ftmp8], %[ftmp9], %[ftmp10])            \
+        "paddh        %[ftmp3],      %[ftmp3],      %[ftmp4]    \n\t"    \
+        "paddh        %[ftmp5],      %[ftmp5],      %[ftmp6]    \n\t"    \
+        "paddh        %[ftmp3],      %[ftmp3],      %[ftmp5]    \n\t"    \
+        "gssdlc1      %[ftmp3],      0x07(%[tmp])               \n\t"    \
+        "gssdrc1      %[ftmp3],      0x00(%[tmp])               \n\t"    \
+                                                                         \
+        "daddi        %[x],          %[x],         -0x01        \n\t"    \
+        PTR_ADDIU    "%[src],        %[src],        0x04        \n\t"    \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x08        \n\t"    \
+        "bnez         %[x],          2b                         \n\t"    \
+                                                                         \
+        "daddi        %[y],          %[y],         -0x01        \n\t"    \
+        "li           %[x],        " #x_step "                  \n\t"    \
+        PTR_ADDIU    "%[src],        %[src],     " #src_step "  \n\t"    \
+        PTR_ADDIU    "%[tmp],        %[tmp],     " #dst_step "  \n\t"    \
+        PTR_ADDU     "%[src],        %[src],        %[stride]   \n\t"    \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"    \
+        "bnez         %[y],          1b                         \n\t"    \
+        : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                  \
+          [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),                  \
+          [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),                  \
+          [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),                  \
+          [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),                  \
+          [ftmp10]"=&f"(ftmp[10]), [rtmp0]"=&r"(rtmp[0]),                \
+          [src]"+&r"(src), [tmp]"+&r"(tmp), [y]"+&r"(y),                 \
+          [x]"+&r"(x)                                                    \
+        : [filter]"r"(filter), [stride]"r"(srcstride)                    \
+        : "memory"                                                       \
+    );                                                                   \
+                                                                         \
+    tmp    = tmp_array + QPEL_EXTRA_BEFORE * 4 -12;                      \
+    filter = ff_hevc_qpel_filters[my - 1];                               \
+    x = x_step;                                                          \
+    y = height;                                                          \
+    __asm__ volatile(                                                    \
+        MMI_LDC1(%[ftmp1], %[filter], 0x00)                              \
+        "li           %[rtmp0],      0x08                       \n\t"    \
+        "dmtc1        %[rtmp0],      %[ftmp0]                   \n\t"    \
+        "punpckhbh    %[ftmp2],      %[ftmp0],      %[ftmp1]    \n\t"    \
+        "punpcklbh    %[ftmp1],      %[ftmp0],      %[ftmp1]    \n\t"    \
+        "psrah        %[ftmp1],      %[ftmp1],      %[ftmp0]    \n\t"    \
+        "psrah        %[ftmp2],      %[ftmp2],      %[ftmp0]    \n\t"    \
+        "li           %[rtmp0],      0x06                       \n\t"    \
+        "dmtc1        %[rtmp0],      %[ftmp0]                   \n\t"    \
+                                                                         \
+        "1:                                                     \n\t"    \
+        "2:                                                     \n\t"    \
+        "gsldlc1      %[ftmp3],      0x07(%[tmp])               \n\t"    \
+        "gsldrc1      %[ftmp3],      0x00(%[tmp])               \n\t"    \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"    \
+        "gsldlc1      %[ftmp4],      0x07(%[tmp])               \n\t"    \
+        "gsldrc1      %[ftmp4],      0x00(%[tmp])               \n\t"    \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"    \
+        "gsldlc1      %[ftmp5],      0x07(%[tmp])               \n\t"    \
+        "gsldrc1      %[ftmp5],      0x00(%[tmp])               \n\t"    \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"    \
+        "gsldlc1      %[ftmp6],      0x07(%[tmp])               \n\t"    \
+        "gsldrc1      %[ftmp6],      0x00(%[tmp])               \n\t"    \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"    \
+        "gsldlc1      %[ftmp7],      0x07(%[tmp])               \n\t"    \
+        "gsldrc1      %[ftmp7],      0x00(%[tmp])               \n\t"    \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"    \
+        "gsldlc1      %[ftmp8],      0x07(%[tmp])               \n\t"    \
+        "gsldrc1      %[ftmp8],      0x00(%[tmp])               \n\t"    \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"    \
+        "gsldlc1      %[ftmp9],      0x07(%[tmp])               \n\t"    \
+        "gsldrc1      %[ftmp9],      0x00(%[tmp])               \n\t"    \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"    \
+        "gsldlc1      %[ftmp10],     0x07(%[tmp])               \n\t"    \
+        "gsldrc1      %[ftmp10],     0x00(%[tmp])               \n\t"    \
+        PTR_ADDIU    "%[tmp],        %[tmp],        -0x380      \n\t"    \
+        TRANSPOSE_4H(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6],             \
+                     %[ftmp11], %[ftmp12], %[ftmp13], %[ftmp14])         \
+        TRANSPOSE_4H(%[ftmp7], %[ftmp8], %[ftmp9], %[ftmp10],            \
+                     %[ftmp11], %[ftmp12], %[ftmp13], %[ftmp14])         \
+        "pmaddhw      %[ftmp11],     %[ftmp3],      %[ftmp1]    \n\t"    \
+        "pmaddhw      %[ftmp12],     %[ftmp7],      %[ftmp2]    \n\t"    \
+        "pmaddhw      %[ftmp13],     %[ftmp4],      %[ftmp1]    \n\t"    \
+        "pmaddhw      %[ftmp14],     %[ftmp8],      %[ftmp2]    \n\t"    \
+        "paddw        %[ftmp11],     %[ftmp11],     %[ftmp12]   \n\t"    \
+        "paddw        %[ftmp13],     %[ftmp13],     %[ftmp14]   \n\t"    \
+        TRANSPOSE_2W(%[ftmp11], %[ftmp13], %[ftmp3], %[ftmp4])           \
+        "paddw        %[ftmp3],      %[ftmp3],      %[ftmp4]    \n\t"    \
+        "psraw        %[ftmp3],      %[ftmp3],      %[ftmp0]    \n\t"    \
+        "pmaddhw      %[ftmp11],     %[ftmp5],      %[ftmp1]    \n\t"    \
+        "pmaddhw      %[ftmp12],     %[ftmp9],      %[ftmp2]    \n\t"    \
+        "pmaddhw      %[ftmp13],     %[ftmp6],      %[ftmp1]    \n\t"    \
+        "pmaddhw      %[ftmp14],     %[ftmp10],     %[ftmp2]    \n\t"    \
+        "paddw        %[ftmp11],     %[ftmp11],     %[ftmp12]   \n\t"    \
+        "paddw        %[ftmp13],     %[ftmp13],     %[ftmp14]   \n\t"    \
+        TRANSPOSE_2W(%[ftmp11], %[ftmp13], %[ftmp5], %[ftmp6])           \
+        "paddw        %[ftmp5],      %[ftmp5],      %[ftmp6]    \n\t"    \
+        "psraw        %[ftmp5],      %[ftmp5],      %[ftmp0]    \n\t"    \
+        "packsswh     %[ftmp3],      %[ftmp3],      %[ftmp5]    \n\t"    \
+        "gssdlc1      %[ftmp3],      0x07(%[dst])               \n\t"    \
+        "gssdrc1      %[ftmp3],      0x00(%[dst])               \n\t"    \
+                                                                         \
+        "daddi        %[x],          %[x],         -0x01        \n\t"    \
+        PTR_ADDIU    "%[dst],        %[dst],        0x08        \n\t"    \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x08        \n\t"    \
+        "bnez         %[x],          2b                         \n\t"    \
+                                                                         \
+        "daddi        %[y],          %[y],         -0x01        \n\t"    \
+        "li           %[x],        " #x_step "                  \n\t"    \
+        PTR_ADDIU    "%[dst],        %[dst],     " #dst_step "  \n\t"    \
+        PTR_ADDIU    "%[tmp],        %[tmp],     " #dst_step "  \n\t"    \
+        PTR_ADDIU    "%[dst],        %[dst],        0x80        \n\t"    \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"    \
+        "bnez         %[y],          1b                         \n\t"    \
+        : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                  \
+          [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),                  \
+          [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),                  \
+          [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),                  \
+          [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),                  \
+          [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),              \
+          [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),              \
+          [ftmp14]"=&f"(ftmp[14]), [rtmp0]"=&r"(rtmp[0]),                \
+          [dst]"+&r"(dst), [tmp]"+&r"(tmp), [y]"+&r"(y),                 \
+          [x]"+&r"(x)                                                    \
+        : [filter]"r"(filter), [stride]"r"(srcstride)                    \
+        : "memory"                                                       \
+    );                                                                   \
+}
+
+PUT_HEVC_QPEL_HV(4, 1, -4, -8);
+PUT_HEVC_QPEL_HV(8, 2, -8, -16);
+PUT_HEVC_QPEL_HV(12, 3, -12, -24);
+PUT_HEVC_QPEL_HV(16, 4, -16, -32);
+PUT_HEVC_QPEL_HV(24, 6, -24, -48);
+PUT_HEVC_QPEL_HV(32, 8, -32, -64);
+PUT_HEVC_QPEL_HV(48, 12, -48, -96);
+PUT_HEVC_QPEL_HV(64, 16, -64, -128);
+
+#define PUT_HEVC_QPEL_BI_H(w, x_step, src_step, src2_step, dst_step)    \
+void ff_hevc_put_hevc_qpel_bi_h##w##_8_mmi(uint8_t *_dst,               \
+                                           ptrdiff_t _dststride,        \
+                                           uint8_t *_src,               \
+                                           ptrdiff_t _srcstride,        \
+                                           int16_t *src2, int height,   \
+                                           intptr_t mx, intptr_t my,    \
+                                           int width)                   \
+{                                                                       \
+    int x, y;                                                           \
+    pixel        *src       = (pixel*)_src - 3;                         \
+    ptrdiff_t     srcstride = _srcstride / sizeof(pixel);               \
+    pixel *dst          = (pixel *)_dst;                                \
+    ptrdiff_t dststride = _dststride / sizeof(pixel);                   \
+    const int8_t *filter    = ff_hevc_qpel_filters[mx - 1];             \
+    uint64_t ftmp[20];                                                  \
+    uint64_t rtmp[1];                                                   \
+    int shift = 7;                                                      \
+    int offset = 64;                                                    \
+                                                                        \
+    x = width >> 2;                                                     \
+    y = height;                                                         \
+    __asm__ volatile(                                                   \
+        MMI_LDC1(%[ftmp1], %[filter], 0x00)                             \
+        "li           %[rtmp0],      0x08                       \n\t"   \
+        "dmtc1        %[rtmp0],      %[ftmp0]                   \n\t"   \
+        "punpckhbh    %[ftmp2],      %[ftmp0],      %[ftmp1]    \n\t"   \
+        "punpcklbh    %[ftmp1],      %[ftmp0],      %[ftmp1]    \n\t"   \
+        "psrah        %[ftmp1],      %[ftmp1],      %[ftmp0]    \n\t"   \
+        "psrah        %[ftmp2],      %[ftmp2],      %[ftmp0]    \n\t"   \
+        "xor          %[ftmp0],      %[ftmp0],      %[ftmp0]    \n\t"   \
+        "punpcklhw    %[offset],     %[offset],     %[offset]   \n\t"   \
+        "punpcklwd    %[offset],     %[offset],     %[offset]   \n\t"   \
+                                                                        \
+        "1:                                                     \n\t"   \
+        "li           %[x],        " #x_step "                  \n\t"   \
+        "2:                                                     \n\t"   \
+        "gsldlc1      %[ftmp3],      0x07(%[src])               \n\t"   \
+        "gsldrc1      %[ftmp3],      0x00(%[src])               \n\t"   \
+        "gsldlc1      %[ftmp4],      0x08(%[src])               \n\t"   \
+        "gsldrc1      %[ftmp4],      0x01(%[src])               \n\t"   \
+        "gsldlc1      %[ftmp5],      0x09(%[src])               \n\t"   \
+        "gsldrc1      %[ftmp5],      0x02(%[src])               \n\t"   \
+        "gsldlc1      %[ftmp6],      0x0a(%[src])               \n\t"   \
+        "gsldrc1      %[ftmp6],      0x03(%[src])               \n\t"   \
+        "punpcklbh    %[ftmp7],      %[ftmp3],      %[ftmp0]    \n\t"   \
+        "punpckhbh    %[ftmp8],      %[ftmp3],      %[ftmp0]    \n\t"   \
+        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"   \
+        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"   \
+        "paddh        %[ftmp3],      %[ftmp7],      %[ftmp8]    \n\t"   \
+        "punpcklbh    %[ftmp7],      %[ftmp4],      %[ftmp0]    \n\t"   \
+        "punpckhbh    %[ftmp8],      %[ftmp4],      %[ftmp0]    \n\t"   \
+        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"   \
+        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"   \
+        "paddh        %[ftmp4],      %[ftmp7],      %[ftmp8]    \n\t"   \
+        "punpcklbh    %[ftmp7],      %[ftmp5],      %[ftmp0]    \n\t"   \
+        "punpckhbh    %[ftmp8],      %[ftmp5],      %[ftmp0]    \n\t"   \
+        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"   \
+        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"   \
+        "paddh        %[ftmp5],      %[ftmp7],      %[ftmp8]    \n\t"   \
+        "punpcklbh    %[ftmp7],      %[ftmp6],      %[ftmp0]    \n\t"   \
+        "punpckhbh    %[ftmp8],      %[ftmp6],      %[ftmp0]    \n\t"   \
+        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"   \
+        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"   \
+        "paddh        %[ftmp6],      %[ftmp7],      %[ftmp8]    \n\t"   \
+        TRANSPOSE_4H(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6],            \
+                     %[ftmp7], %[ftmp8], %[ftmp9], %[ftmp10])           \
+        "paddh        %[ftmp3],      %[ftmp3],      %[ftmp4]    \n\t"   \
+        "paddh        %[ftmp5],      %[ftmp5],      %[ftmp6]    \n\t"   \
+        "paddh        %[ftmp3],      %[ftmp3],      %[ftmp5]    \n\t"   \
+        "paddh        %[ftmp3],      %[ftmp3],      %[offset]   \n\t"   \
+        "gsldlc1      %[ftmp4],      0x07(%[src2])              \n\t"   \
+        "gsldrc1      %[ftmp4],      0x00(%[src2])              \n\t"   \
+        "li           %[rtmp0],      0x10                       \n\t"   \
+        "dmtc1        %[rtmp0],      %[ftmp8]                   \n\t"   \
+        "punpcklhw    %[ftmp5],      %[ftmp0],      %[ftmp3]    \n\t"   \
+        "punpckhhw    %[ftmp6],      %[ftmp0],      %[ftmp3]    \n\t"   \
+        "punpckhhw    %[ftmp3],      %[ftmp0],      %[ftmp4]    \n\t"   \
+        "punpcklhw    %[ftmp4],      %[ftmp0],      %[ftmp4]    \n\t"   \
+        "psraw        %[ftmp5],      %[ftmp5],      %[ftmp8]    \n\t"   \
+        "psraw        %[ftmp6],      %[ftmp6],      %[ftmp8]    \n\t"   \
+        "psraw        %[ftmp3],      %[ftmp3],      %[ftmp8]    \n\t"   \
+        "psraw        %[ftmp4],      %[ftmp4],      %[ftmp8]    \n\t"   \
+        "paddw        %[ftmp5],      %[ftmp5],      %[ftmp4]    \n\t"   \
+        "paddw        %[ftmp6],      %[ftmp6],      %[ftmp3]    \n\t"   \
+        "psraw        %[ftmp5],      %[ftmp5],      %[shift]    \n\t"   \
+        "psraw        %[ftmp6],      %[ftmp6],      %[shift]    \n\t"   \
+        "packsswh     %[ftmp5],      %[ftmp5],      %[ftmp6]    \n\t"   \
+        "pcmpgth      %[ftmp7],      %[ftmp5],      %[ftmp0]    \n\t"   \
+        "and          %[ftmp3],      %[ftmp5],      %[ftmp7]    \n\t"   \
+        "packushb     %[ftmp3],      %[ftmp3],      %[ftmp3]    \n\t"   \
+        "gsswlc1      %[ftmp3],      0x03(%[dst])               \n\t"   \
+        "gsswrc1      %[ftmp3],      0x00(%[dst])               \n\t"   \
+                                                                        \
+        "daddi        %[x],          %[x],         -0x01        \n\t"   \
+        PTR_ADDIU    "%[src],        %[src],        0x04        \n\t"   \
+        PTR_ADDIU    "%[dst],        %[dst],        0x04        \n\t"   \
+        PTR_ADDIU    "%[src2],       %[src2],       0x08        \n\t"   \
+        "bnez         %[x],          2b                         \n\t"   \
+                                                                        \
+        "daddi        %[y],          %[y],         -0x01        \n\t"   \
+        PTR_ADDIU    "%[src],        %[src],     " #src_step "  \n\t"   \
+        PTR_ADDIU    "%[dst],        %[dst],     " #dst_step "  \n\t"   \
+        PTR_ADDIU    "%[src2],       %[src2],    " #src2_step " \n\t"   \
+        PTR_ADDU     "%[src],        %[src],    %[src_stride]   \n\t"   \
+        PTR_ADDU     "%[dst],        %[dst],    %[dst_stride]   \n\t"   \
+        PTR_ADDIU    "%[src2],       %[src2],       0x80        \n\t"   \
+        "bnez         %[y],          1b                         \n\t"   \
+        : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                 \
+          [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),                 \
+          [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),                 \
+          [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),                 \
+          [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),                 \
+          [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),             \
+          [ftmp12]"=&f"(ftmp[12]), [src2]"+&r"(src2),                   \
+          [dst]"+&r"(dst), [src]"+&r"(src), [y]"+&r"(y), [x]"=&r"(x),   \
+          [offset]"+&f"(offset), [rtmp0]"=&r"(rtmp[0])                  \
+        : [src_stride]"r"(srcstride), [dst_stride]"r"(dststride),       \
+          [filter]"r"(filter), [shift]"f"(shift)                        \
+        : "memory"                                                      \
+    );                                                                  \
+}
+
+PUT_HEVC_QPEL_BI_H(4, 1, -4, -8, -4);
+PUT_HEVC_QPEL_BI_H(8, 2, -8, -16, -8);
+PUT_HEVC_QPEL_BI_H(12, 3, -12, -24, -12);
+PUT_HEVC_QPEL_BI_H(16, 4, -16, -32, -16);
+PUT_HEVC_QPEL_BI_H(24, 6, -24, -48, -24);
+PUT_HEVC_QPEL_BI_H(32, 8, -32, -64, -32);
+PUT_HEVC_QPEL_BI_H(48, 12, -48, -96, -48);
+PUT_HEVC_QPEL_BI_H(64, 16, -64, -128, -64);
+
+#define PUT_HEVC_QPEL_BI_HV(w, x_step, src_step, src2_step, dst_step)   \
+void ff_hevc_put_hevc_qpel_bi_hv##w##_8_mmi(uint8_t *_dst,              \
+                                            ptrdiff_t _dststride,       \
+                                            uint8_t *_src,              \
+                                            ptrdiff_t _srcstride,       \
+                                            int16_t *src2, int height,  \
+                                            intptr_t mx, intptr_t my,   \
+                                            int width)                  \
+{                                                                       \
+    int x, y;                                                           \
+    const int8_t *filter;                                               \
+    pixel *src = (pixel*)_src;                                          \
+    ptrdiff_t srcstride = _srcstride / sizeof(pixel);                   \
+    pixel *dst          = (pixel *)_dst;                                \
+    ptrdiff_t dststride = _dststride / sizeof(pixel);                   \
+    int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];        \
+    int16_t *tmp = tmp_array;                                           \
+    uint64_t ftmp[20];                                                  \
+    uint64_t rtmp[1];                                                   \
+    int shift = 7;                                                      \
+    int offset = 64;                                                    \
+                                                                        \
+    src   -= (QPEL_EXTRA_BEFORE * srcstride + 3);                       \
+    filter = ff_hevc_qpel_filters[mx - 1];                              \
+    x = width >> 2;                                                     \
+    y = height + QPEL_EXTRA;                                            \
+    __asm__ volatile(                                                   \
+        MMI_LDC1(%[ftmp1], %[filter], 0x00)                             \
+        "li           %[rtmp0],      0x08                       \n\t"   \
+        "dmtc1        %[rtmp0],      %[ftmp0]                   \n\t"   \
+        "punpckhbh    %[ftmp2],      %[ftmp0],      %[ftmp1]    \n\t"   \
+        "punpcklbh    %[ftmp1],      %[ftmp0],      %[ftmp1]    \n\t"   \
+        "psrah        %[ftmp1],      %[ftmp1],      %[ftmp0]    \n\t"   \
+        "psrah        %[ftmp2],      %[ftmp2],      %[ftmp0]    \n\t"   \
+        "xor          %[ftmp0],      %[ftmp0],      %[ftmp0]    \n\t"   \
+                                                                        \
+        "1:                                                     \n\t"   \
+        "2:                                                     \n\t"   \
+        "gsldlc1      %[ftmp3],      0x07(%[src])               \n\t"   \
+        "gsldrc1      %[ftmp3],      0x00(%[src])               \n\t"   \
+        "gsldlc1      %[ftmp4],      0x08(%[src])               \n\t"   \
+        "gsldrc1      %[ftmp4],      0x01(%[src])               \n\t"   \
+        "gsldlc1      %[ftmp5],      0x09(%[src])               \n\t"   \
+        "gsldrc1      %[ftmp5],      0x02(%[src])               \n\t"   \
+        "gsldlc1      %[ftmp6],      0x0a(%[src])               \n\t"   \
+        "gsldrc1      %[ftmp6],      0x03(%[src])               \n\t"   \
+        "punpcklbh    %[ftmp7],      %[ftmp3],      %[ftmp0]    \n\t"   \
+        "punpckhbh    %[ftmp8],      %[ftmp3],      %[ftmp0]    \n\t"   \
+        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"   \
+        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"   \
+        "paddh        %[ftmp3],      %[ftmp7],      %[ftmp8]    \n\t"   \
+        "punpcklbh    %[ftmp7],      %[ftmp4],      %[ftmp0]    \n\t"   \
+        "punpckhbh    %[ftmp8],      %[ftmp4],      %[ftmp0]    \n\t"   \
+        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"   \
+        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"   \
+        "paddh        %[ftmp4],      %[ftmp7],      %[ftmp8]    \n\t"   \
+        "punpcklbh    %[ftmp7],      %[ftmp5],      %[ftmp0]    \n\t"   \
+        "punpckhbh    %[ftmp8],      %[ftmp5],      %[ftmp0]    \n\t"   \
+        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"   \
+        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"   \
+        "paddh        %[ftmp5],      %[ftmp7],      %[ftmp8]    \n\t"   \
+        "punpcklbh    %[ftmp7],      %[ftmp6],      %[ftmp0]    \n\t"   \
+        "punpckhbh    %[ftmp8],      %[ftmp6],      %[ftmp0]    \n\t"   \
+        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"   \
+        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"   \
+        "paddh        %[ftmp6],      %[ftmp7],      %[ftmp8]    \n\t"   \
+        TRANSPOSE_4H(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6],            \
+                     %[ftmp7], %[ftmp8], %[ftmp9], %[ftmp10])           \
+        "paddh        %[ftmp3],      %[ftmp3],      %[ftmp4]    \n\t"   \
+        "paddh        %[ftmp5],      %[ftmp5],      %[ftmp6]    \n\t"   \
+        "paddh        %[ftmp3],      %[ftmp3],      %[ftmp5]    \n\t"   \
+        "gssdlc1      %[ftmp3],      0x07(%[tmp])               \n\t"   \
+        "gssdrc1      %[ftmp3],      0x00(%[tmp])               \n\t"   \
+                                                                        \
+        "daddi        %[x],          %[x],         -0x01        \n\t"   \
+        PTR_ADDIU    "%[src],        %[src],        0x04        \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x08        \n\t"   \
+        "bnez         %[x],          2b                         \n\t"   \
+                                                                        \
+        "daddi        %[y],          %[y],         -0x01        \n\t"   \
+        "li           %[x],        " #x_step "                  \n\t"   \
+        PTR_ADDIU    "%[src],        %[src],      " #src_step " \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],     " #src2_step " \n\t"   \
+        PTR_ADDU     "%[src],        %[src],        %[stride]   \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
+        "bnez         %[y],          1b                         \n\t"   \
+        : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                 \
+          [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),                 \
+          [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),                 \
+          [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),                 \
+          [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),                 \
+          [ftmp10]"=&f"(ftmp[10]), [rtmp0]"=&r"(rtmp[0]),               \
+          [src]"+&r"(src), [tmp]"+&r"(tmp), [y]"+&r"(y),                \
+          [x]"+&r"(x)                                                   \
+        : [filter]"r"(filter), [stride]"r"(srcstride)                   \
+        : "memory"                                                      \
+    );                                                                  \
+                                                                        \
+    tmp    = tmp_array;                                                 \
+    filter = ff_hevc_qpel_filters[my - 1];                              \
+    x = width >> 2;                                                     \
+    y = height;                                                         \
+    __asm__ volatile(                                                   \
+        MMI_LDC1(%[ftmp1], %[filter], 0x00)                             \
+        "li           %[rtmp0],      0x08                       \n\t"   \
+        "dmtc1        %[rtmp0],      %[ftmp0]                   \n\t"   \
+        "punpckhbh    %[ftmp2],      %[ftmp0],      %[ftmp1]    \n\t"   \
+        "punpcklbh    %[ftmp1],      %[ftmp0],      %[ftmp1]    \n\t"   \
+        "psrah        %[ftmp1],      %[ftmp1],      %[ftmp0]    \n\t"   \
+        "psrah        %[ftmp2],      %[ftmp2],      %[ftmp0]    \n\t"   \
+        "li           %[rtmp0],      0x06                       \n\t"   \
+        "dmtc1        %[rtmp0],      %[ftmp0]                   \n\t"   \
+        "punpcklwd    %[offset],     %[offset],     %[offset]   \n\t"   \
+                                                                        \
+        "1:                                                     \n\t"   \
+        "li           %[x],        " #x_step "                  \n\t"   \
+        "2:                                                     \n\t"   \
+        "gsldlc1      %[ftmp3],      0x07(%[tmp])               \n\t"   \
+        "gsldrc1      %[ftmp3],      0x00(%[tmp])               \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
+        "gsldlc1      %[ftmp4],      0x07(%[tmp])               \n\t"   \
+        "gsldrc1      %[ftmp4],      0x00(%[tmp])               \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
+        "gsldlc1      %[ftmp5],      0x07(%[tmp])               \n\t"   \
+        "gsldrc1      %[ftmp5],      0x00(%[tmp])               \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
+        "gsldlc1      %[ftmp6],      0x07(%[tmp])               \n\t"   \
+        "gsldrc1      %[ftmp6],      0x00(%[tmp])               \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
+        "gsldlc1      %[ftmp7],      0x07(%[tmp])               \n\t"   \
+        "gsldrc1      %[ftmp7],      0x00(%[tmp])               \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
+        "gsldlc1      %[ftmp8],      0x07(%[tmp])               \n\t"   \
+        "gsldrc1      %[ftmp8],      0x00(%[tmp])               \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
+        "gsldlc1      %[ftmp9],      0x07(%[tmp])               \n\t"   \
+        "gsldrc1      %[ftmp9],      0x00(%[tmp])               \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
+        "gsldlc1      %[ftmp10],     0x07(%[tmp])               \n\t"   \
+        "gsldrc1      %[ftmp10],     0x00(%[tmp])               \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],        -0x380      \n\t"   \
+        TRANSPOSE_4H(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6],            \
+                     %[ftmp11], %[ftmp12], %[ftmp13], %[ftmp14])        \
+        TRANSPOSE_4H(%[ftmp7], %[ftmp8], %[ftmp9], %[ftmp10],           \
+                     %[ftmp11], %[ftmp12], %[ftmp13], %[ftmp14])        \
+        "pmaddhw      %[ftmp11],     %[ftmp3],      %[ftmp1]    \n\t"   \
+        "pmaddhw      %[ftmp12],     %[ftmp7],      %[ftmp2]    \n\t"   \
+        "pmaddhw      %[ftmp13],     %[ftmp4],      %[ftmp1]    \n\t"   \
+        "pmaddhw      %[ftmp14],     %[ftmp8],      %[ftmp2]    \n\t"   \
+        "paddw        %[ftmp11],     %[ftmp11],     %[ftmp12]   \n\t"   \
+        "paddw        %[ftmp13],     %[ftmp13],     %[ftmp14]   \n\t"   \
+        TRANSPOSE_2W(%[ftmp11], %[ftmp13], %[ftmp3], %[ftmp4])          \
+        "paddw        %[ftmp3],      %[ftmp3],      %[ftmp4]    \n\t"   \
+        "psraw        %[ftmp3],      %[ftmp3],      %[ftmp0]    \n\t"   \
+        "pmaddhw      %[ftmp11],     %[ftmp5],      %[ftmp1]    \n\t"   \
+        "pmaddhw      %[ftmp12],     %[ftmp9],      %[ftmp2]    \n\t"   \
+        "pmaddhw      %[ftmp13],     %[ftmp6],      %[ftmp1]    \n\t"   \
+        "pmaddhw      %[ftmp14],     %[ftmp10],     %[ftmp2]    \n\t"   \
+        "paddw        %[ftmp11],     %[ftmp11],     %[ftmp12]   \n\t"   \
+        "paddw        %[ftmp13],     %[ftmp13],     %[ftmp14]   \n\t"   \
+        TRANSPOSE_2W(%[ftmp11], %[ftmp13], %[ftmp5], %[ftmp6])          \
+        "paddw        %[ftmp5],      %[ftmp5],      %[ftmp6]    \n\t"   \
+        "psraw        %[ftmp5],      %[ftmp5],      %[ftmp0]    \n\t"   \
+        "packsswh     %[ftmp3],      %[ftmp3],      %[ftmp5]    \n\t"   \
+        "gsldlc1      %[ftmp4],      0x07(%[src2])              \n\t"   \
+        "gsldrc1      %[ftmp4],      0x00(%[src2])              \n\t"   \
+        "xor          %[ftmp7],      %[ftmp7],      %[ftmp7]    \n\t"   \
+        "li           %[rtmp0],      0x10                       \n\t"   \
+        "dmtc1        %[rtmp0],      %[ftmp8]                   \n\t"   \
+        "punpcklhw    %[ftmp5],      %[ftmp7],      %[ftmp3]    \n\t"   \
+        "punpckhhw    %[ftmp6],      %[ftmp7],      %[ftmp3]    \n\t"   \
+        "punpckhhw    %[ftmp3],      %[ftmp7],      %[ftmp4]    \n\t"   \
+        "punpcklhw    %[ftmp4],      %[ftmp7],      %[ftmp4]    \n\t"   \
+        "psraw        %[ftmp5],      %[ftmp5],      %[ftmp8]    \n\t"   \
+        "psraw        %[ftmp6],      %[ftmp6],      %[ftmp8]    \n\t"   \
+        "psraw        %[ftmp3],      %[ftmp3],      %[ftmp8]    \n\t"   \
+        "psraw        %[ftmp4],      %[ftmp4],      %[ftmp8]    \n\t"   \
+        "paddw        %[ftmp5],      %[ftmp5],      %[ftmp4]    \n\t"   \
+        "paddw        %[ftmp6],      %[ftmp6],      %[ftmp3]    \n\t"   \
+        "paddw        %[ftmp5],      %[ftmp5],      %[offset]   \n\t"   \
+        "paddw        %[ftmp6],      %[ftmp6],      %[offset]   \n\t"   \
+        "psraw        %[ftmp5],      %[ftmp5],      %[shift]    \n\t"   \
+        "psraw        %[ftmp6],      %[ftmp6],      %[shift]    \n\t"   \
+        "packsswh     %[ftmp5],      %[ftmp5],      %[ftmp6]    \n\t"   \
+        "pcmpgth      %[ftmp7],      %[ftmp5],      %[ftmp7]    \n\t"   \
+        "and          %[ftmp3],      %[ftmp5],      %[ftmp7]    \n\t"   \
+        "packushb     %[ftmp3],      %[ftmp3],      %[ftmp3]    \n\t"   \
+        "gsswlc1      %[ftmp3],      0x03(%[dst])               \n\t"   \
+        "gsswrc1      %[ftmp3],      0x00(%[dst])               \n\t"   \
+                                                                        \
+        "daddi        %[x],          %[x],         -0x01        \n\t"   \
+        PTR_ADDIU    "%[src2],       %[src2],       0x08        \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x08        \n\t"   \
+        PTR_ADDIU    "%[dst],        %[dst],        0x04        \n\t"   \
+        "bnez         %[x],          2b                         \n\t"   \
+                                                                        \
+        "daddi        %[y],          %[y],         -0x01        \n\t"   \
+        PTR_ADDIU    "%[src2],       %[src2],    " #src2_step " \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],     " #src2_step " \n\t"   \
+        PTR_ADDIU    "%[dst],        %[dst],     " #dst_step "  \n\t"   \
+        PTR_ADDIU    "%[src2],       %[src2],       0x80        \n\t"   \
+        PTR_ADDU     "%[dst],        %[dst],        %[stride]   \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
+        "bnez         %[y],          1b                         \n\t"   \
+        : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                 \
+          [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),                 \
+          [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),                 \
+          [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),                 \
+          [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),                 \
+          [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),             \
+          [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),             \
+          [ftmp14]"=&f"(ftmp[14]), [src2]"+&r"(src2),                   \
+          [dst]"+&r"(dst), [tmp]"+&r"(tmp), [y]"+&r"(y), [x]"=&r"(x),   \
+          [offset]"+&f"(offset), [rtmp0]"=&r"(rtmp[0])                  \
+        : [filter]"r"(filter), [stride]"r"(dststride),                  \
+          [shift]"f"(shift)                                             \
+        : "memory"                                                      \
+    );                                                                  \
+}
+
+PUT_HEVC_QPEL_BI_HV(4, 1, -4, -8, -4);
+PUT_HEVC_QPEL_BI_HV(8, 2, -8, -16, -8);
+PUT_HEVC_QPEL_BI_HV(12, 3, -12, -24, -12);
+PUT_HEVC_QPEL_BI_HV(16, 4, -16, -32, -16);
+PUT_HEVC_QPEL_BI_HV(24, 6, -24, -48, -24);
+PUT_HEVC_QPEL_BI_HV(32, 8, -32, -64, -32);
+PUT_HEVC_QPEL_BI_HV(48, 12, -48, -96, -48);
+PUT_HEVC_QPEL_BI_HV(64, 16, -64, -128, -64);
+
+#define PUT_HEVC_EPEL_BI_HV(w, x_step, src_step, src2_step, dst_step)   \
+void ff_hevc_put_hevc_epel_bi_hv##w##_8_mmi(uint8_t *_dst,              \
+                                            ptrdiff_t _dststride,       \
+                                            uint8_t *_src,              \
+                                            ptrdiff_t _srcstride,       \
+                                            int16_t *src2, int height,  \
+                                            intptr_t mx, intptr_t my,   \
+                                            int width)                  \
+{                                                                       \
+    int x, y;                                                           \
+    pixel *src = (pixel *)_src;                                         \
+    ptrdiff_t srcstride = _srcstride / sizeof(pixel);                   \
+    pixel *dst          = (pixel *)_dst;                                \
+    ptrdiff_t dststride = _dststride / sizeof(pixel);                   \
+    const int8_t *filter = ff_hevc_epel_filters[mx - 1];                \
+    int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];        \
+    int16_t *tmp = tmp_array;                                           \
+    uint64_t ftmp[12];                                                  \
+    uint64_t rtmp[1];                                                   \
+    int shift = 7;                                                      \
+    int offset = 64;                                                    \
+                                                                        \
+    src -= (EPEL_EXTRA_BEFORE * srcstride + 1);                         \
+    x = width >> 2;                                                     \
+    y = height + EPEL_EXTRA;                                            \
+    __asm__ volatile(                                                   \
+        MMI_LWC1(%[ftmp1], %[filter], 0x00)                             \
+        "li           %[rtmp0],      0x08                       \n\t"   \
+        "dmtc1        %[rtmp0],      %[ftmp0]                   \n\t"   \
+        "punpcklbh    %[ftmp1],      %[ftmp0],      %[ftmp1]    \n\t"   \
+        "psrah        %[ftmp1],      %[ftmp1],      %[ftmp0]    \n\t"   \
+        "xor          %[ftmp0],      %[ftmp0],      %[ftmp0]    \n\t"   \
+                                                                        \
+        "1:                                                     \n\t"   \
+        "2:                                                     \n\t"   \
+        "gslwlc1      %[ftmp2],      0x03(%[src])               \n\t"   \
+        "gslwrc1      %[ftmp2],      0x00(%[src])               \n\t"   \
+        "gslwlc1      %[ftmp3],      0x04(%[src])               \n\t"   \
+        "gslwrc1      %[ftmp3],      0x01(%[src])               \n\t"   \
+        "gslwlc1      %[ftmp4],      0x05(%[src])               \n\t"   \
+        "gslwrc1      %[ftmp4],      0x02(%[src])               \n\t"   \
+        "gslwlc1      %[ftmp5],      0x06(%[src])               \n\t"   \
+        "gslwrc1      %[ftmp5],      0x03(%[src])               \n\t"   \
+        "punpcklbh    %[ftmp2],      %[ftmp2],      %[ftmp0]    \n\t"   \
+        "pmullh       %[ftmp2],      %[ftmp2],      %[ftmp1]    \n\t"   \
+        "punpcklbh    %[ftmp3],      %[ftmp3],      %[ftmp0]    \n\t"   \
+        "pmullh       %[ftmp3],      %[ftmp3],      %[ftmp1]    \n\t"   \
+        "punpcklbh    %[ftmp4],      %[ftmp4],      %[ftmp0]    \n\t"   \
+        "pmullh       %[ftmp4],      %[ftmp4],      %[ftmp1]    \n\t"   \
+        "punpcklbh    %[ftmp5],      %[ftmp5],      %[ftmp0]    \n\t"   \
+        "pmullh       %[ftmp5],      %[ftmp5],      %[ftmp1]    \n\t"   \
+        TRANSPOSE_4H(%[ftmp2], %[ftmp3], %[ftmp4], %[ftmp5],            \
+                     %[ftmp6], %[ftmp7], %[ftmp8], %[ftmp9])            \
+        "paddh        %[ftmp2],      %[ftmp2],      %[ftmp3]    \n\t"   \
+        "paddh        %[ftmp4],      %[ftmp4],      %[ftmp5]    \n\t"   \
+        "paddh        %[ftmp2],      %[ftmp2],      %[ftmp4]    \n\t"   \
+        "gssdlc1      %[ftmp2],      0x07(%[tmp])               \n\t"   \
+        "gssdrc1      %[ftmp2],      0x00(%[tmp])               \n\t"   \
+                                                                        \
+        "daddi        %[x],          %[x],         -0x01        \n\t"   \
+        PTR_ADDIU    "%[src],        %[src],        0x04        \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x08        \n\t"   \
+        "bnez         %[x],          2b                         \n\t"   \
+                                                                        \
+        "daddi        %[y],          %[y],         -0x01        \n\t"   \
+        "li           %[x],        " #x_step "                  \n\t"   \
+        PTR_ADDIU    "%[src],        %[src],      " #src_step " \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],     " #src2_step " \n\t"   \
+        PTR_ADDU     "%[src],        %[src],        %[stride]   \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
+        "bnez         %[y],          1b                         \n\t"   \
+        : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                 \
+          [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),                 \
+          [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),                 \
+          [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),                 \
+          [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),                 \
+          [rtmp0]"=&r"(rtmp[0]),                                        \
+          [src]"+&r"(src), [tmp]"+&r"(tmp), [y]"+&r"(y),                \
+          [x]"+&r"(x)                                                   \
+        : [filter]"r"(filter), [stride]"r"(srcstride)                   \
+        : "memory"                                                      \
+    );                                                                  \
+                                                                        \
+    tmp      = tmp_array;                                               \
+    filter = ff_hevc_epel_filters[my - 1];                              \
+    x = width >> 2;                                                     \
+    y = height;                                                         \
+    __asm__ volatile(                                                   \
+        MMI_LWC1(%[ftmp1], %[filter], 0x00)                             \
+        "li           %[rtmp0],      0x08                       \n\t"   \
+        "dmtc1        %[rtmp0],      %[ftmp0]                   \n\t"   \
+        "punpcklbh    %[ftmp1],      %[ftmp0],      %[ftmp1]    \n\t"   \
+        "psrah        %[ftmp1],      %[ftmp1],      %[ftmp0]    \n\t"   \
+        "li           %[rtmp0],      0x06                       \n\t"   \
+        "dmtc1        %[rtmp0],      %[ftmp0]                   \n\t"   \
+        "punpcklwd    %[offset],     %[offset],     %[offset]   \n\t"   \
+        "xor          %[ftmp2],      %[ftmp2],      %[ftmp2]    \n\t"   \
+                                                                        \
+        "1:                                                     \n\t"   \
+        "li           %[x],        " #x_step "                  \n\t"   \
+        "2:                                                     \n\t"   \
+        "gsldlc1      %[ftmp3],      0x07(%[tmp])               \n\t"   \
+        "gsldrc1      %[ftmp3],      0x00(%[tmp])               \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
+        "gsldlc1      %[ftmp4],      0x07(%[tmp])               \n\t"   \
+        "gsldrc1      %[ftmp4],      0x00(%[tmp])               \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
+        "gsldlc1      %[ftmp5],      0x07(%[tmp])               \n\t"   \
+        "gsldrc1      %[ftmp5],      0x00(%[tmp])               \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
+        "gsldlc1      %[ftmp6],      0x07(%[tmp])               \n\t"   \
+        "gsldrc1      %[ftmp6],      0x00(%[tmp])               \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],       -0x180       \n\t"   \
+        TRANSPOSE_4H(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6],            \
+                     %[ftmp7], %[ftmp8], %[ftmp9], %[ftmp10])           \
+        "pmaddhw      %[ftmp7],      %[ftmp3],      %[ftmp1]    \n\t"   \
+        "pmaddhw      %[ftmp8],      %[ftmp4],      %[ftmp1]    \n\t"   \
+        TRANSPOSE_2W(%[ftmp7], %[ftmp8], %[ftmp3], %[ftmp4])            \
+        "paddw        %[ftmp3],      %[ftmp3],      %[ftmp4]    \n\t"   \
+        "psraw        %[ftmp3],      %[ftmp3],      %[ftmp0]    \n\t"   \
+        "pmaddhw      %[ftmp7],      %[ftmp5],      %[ftmp1]    \n\t"   \
+        "pmaddhw      %[ftmp8],      %[ftmp6],      %[ftmp1]    \n\t"   \
+        TRANSPOSE_2W(%[ftmp7], %[ftmp8], %[ftmp5], %[ftmp6])            \
+        "paddw        %[ftmp5],      %[ftmp5],      %[ftmp6]    \n\t"   \
+        "psraw        %[ftmp5],      %[ftmp5],      %[ftmp0]    \n\t"   \
+        "packsswh     %[ftmp3],      %[ftmp3],      %[ftmp5]    \n\t"   \
+        "gsldlc1      %[ftmp4],      0x07(%[src2])              \n\t"   \
+        "gsldrc1      %[ftmp4],      0x00(%[src2])              \n\t"   \
+        "li           %[rtmp0],      0x10                       \n\t"   \
+        "dmtc1        %[rtmp0],      %[ftmp8]                   \n\t"   \
+        "punpcklhw    %[ftmp5],      %[ftmp2],      %[ftmp3]    \n\t"   \
+        "punpckhhw    %[ftmp6],      %[ftmp2],      %[ftmp3]    \n\t"   \
+        "punpckhhw    %[ftmp3],      %[ftmp2],      %[ftmp4]    \n\t"   \
+        "punpcklhw    %[ftmp4],      %[ftmp2],      %[ftmp4]    \n\t"   \
+        "psraw        %[ftmp5],      %[ftmp5],      %[ftmp8]    \n\t"   \
+        "psraw        %[ftmp6],      %[ftmp6],      %[ftmp8]    \n\t"   \
+        "psraw        %[ftmp3],      %[ftmp3],      %[ftmp8]    \n\t"   \
+        "psraw        %[ftmp4],      %[ftmp4],      %[ftmp8]    \n\t"   \
+        "paddw        %[ftmp5],      %[ftmp5],      %[ftmp4]    \n\t"   \
+        "paddw        %[ftmp6],      %[ftmp6],      %[ftmp3]    \n\t"   \
+        "paddw        %[ftmp5],      %[ftmp5],      %[offset]   \n\t"   \
+        "paddw        %[ftmp6],      %[ftmp6],      %[offset]   \n\t"   \
+        "psraw        %[ftmp5],      %[ftmp5],      %[shift]    \n\t"   \
+        "psraw        %[ftmp6],      %[ftmp6],      %[shift]    \n\t"   \
+        "packsswh     %[ftmp5],      %[ftmp5],      %[ftmp6]    \n\t"   \
+        "pcmpgth      %[ftmp7],      %[ftmp5],      %[ftmp2]    \n\t"   \
+        "and          %[ftmp3],      %[ftmp5],      %[ftmp7]    \n\t"   \
+        "packushb     %[ftmp3],      %[ftmp3],      %[ftmp3]    \n\t"   \
+        "gsswlc1      %[ftmp3],      0x03(%[dst])               \n\t"   \
+        "gsswrc1      %[ftmp3],      0x00(%[dst])               \n\t"   \
+                                                                        \
+        "daddi        %[x],          %[x],         -0x01        \n\t"   \
+        PTR_ADDIU    "%[src2],       %[src2],       0x08        \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x08        \n\t"   \
+        PTR_ADDIU    "%[dst],        %[dst],        0x04        \n\t"   \
+        "bnez         %[x],          2b                         \n\t"   \
+                                                                        \
+        "daddi        %[y],          %[y],         -0x01        \n\t"   \
+        PTR_ADDIU    "%[src2],       %[src2],    " #src2_step " \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],     " #src2_step " \n\t"   \
+        PTR_ADDIU    "%[dst],        %[dst],     " #dst_step "  \n\t"   \
+        PTR_ADDIU    "%[src2],       %[src2],       0x80        \n\t"   \
+        PTR_ADDU     "%[dst],        %[dst],        %[stride]   \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
+        "bnez         %[y],          1b                         \n\t"   \
+        : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                 \
+          [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),                 \
+          [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),                 \
+          [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),                 \
+          [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),                 \
+          [ftmp10]"=&f"(ftmp[10]), [src2]"+&r"(src2),                   \
+          [dst]"+&r"(dst), [tmp]"+&r"(tmp), [y]"+&r"(y), [x]"=&r"(x),   \
+          [offset]"+&f"(offset), [rtmp0]"=&r"(rtmp[0])                  \
+        : [filter]"r"(filter), [stride]"r"(dststride),                  \
+          [shift]"f"(shift)                                             \
+        : "memory"                                                      \
+    );                                                                  \
+}
+
+PUT_HEVC_EPEL_BI_HV(4, 1, -4, -8, -4);
+PUT_HEVC_EPEL_BI_HV(8, 2, -8, -16, -8);
+PUT_HEVC_EPEL_BI_HV(12, 3, -12, -24, -12);
+PUT_HEVC_EPEL_BI_HV(16, 4, -16, -32, -16);
+PUT_HEVC_EPEL_BI_HV(24, 6, -24, -48, -24);
+PUT_HEVC_EPEL_BI_HV(32, 8, -32, -64, -32);
+
+#define PUT_HEVC_PEL_BI_PIXELS(w, x_step, src_step, dst_step, src2_step)  \
+void ff_hevc_put_hevc_pel_bi_pixels##w##_8_mmi(uint8_t *_dst,             \
+                                               ptrdiff_t _dststride,      \
+                                               uint8_t *_src,             \
+                                               ptrdiff_t _srcstride,      \
+                                               int16_t *src2, int height, \
+                                               intptr_t mx, intptr_t my,  \
+                                               int width)                 \
+{                                                                         \
+    int x, y;                                                             \
+    pixel *src          = (pixel *)_src;                                  \
+    ptrdiff_t srcstride = _srcstride / sizeof(pixel);                     \
+    pixel *dst          = (pixel *)_dst;                                  \
+    ptrdiff_t dststride = _dststride / sizeof(pixel);                     \
+    uint64_t ftmp[12];                                                    \
+    uint64_t rtmp[1];                                                     \
+    int shift = 7;                                                        \
+                                                                          \
+    y = height;                                                           \
+    x = width >> 3;                                                       \
+    __asm__ volatile(                                                     \
+        "xor          %[ftmp0],      %[ftmp0],      %[ftmp0]    \n\t"     \
+        "li           %[rtmp0],      0x06                       \n\t"     \
+        "dmtc1        %[rtmp0],      %[ftmp1]                   \n\t"     \
+        "li           %[rtmp0],      0x10                       \n\t"     \
+        "dmtc1        %[rtmp0],      %[ftmp10]                  \n\t"     \
+        "li           %[rtmp0],      0x40                       \n\t"     \
+        "dmtc1        %[rtmp0],      %[offset]                  \n\t"     \
+        "punpcklhw    %[offset],     %[offset],     %[offset]   \n\t"     \
+        "punpcklwd    %[offset],     %[offset],     %[offset]   \n\t"     \
+                                                                          \
+        "1:                                                     \n\t"     \
+        "2:                                                     \n\t"     \
+        "gsldlc1      %[ftmp5],      0x07(%[src])               \n\t"     \
+        "gsldrc1      %[ftmp5],      0x00(%[src])               \n\t"     \
+        "gsldlc1      %[ftmp2],      0x07(%[src2])              \n\t"     \
+        "gsldrc1      %[ftmp2],      0x00(%[src2])              \n\t"     \
+        "gsldlc1      %[ftmp3],      0x0f(%[src2])              \n\t"     \
+        "gsldrc1      %[ftmp3],      0x08(%[src2])              \n\t"     \
+        "punpcklbh    %[ftmp4],      %[ftmp5],      %[ftmp0]    \n\t"     \
+        "punpckhbh    %[ftmp5],      %[ftmp5],      %[ftmp0]    \n\t"     \
+        "psllh        %[ftmp4],      %[ftmp4],      %[ftmp1]    \n\t"     \
+        "psllh        %[ftmp5],      %[ftmp5],      %[ftmp1]    \n\t"     \
+        "paddh        %[ftmp4],      %[ftmp4],      %[offset]   \n\t"     \
+        "paddh        %[ftmp5],      %[ftmp5],      %[offset]   \n\t"     \
+        "punpcklhw    %[ftmp6],      %[ftmp4],      %[ftmp0]    \n\t"     \
+        "punpckhhw    %[ftmp7],      %[ftmp4],      %[ftmp0]    \n\t"     \
+        "punpcklhw    %[ftmp8],      %[ftmp5],      %[ftmp0]    \n\t"     \
+        "punpckhhw    %[ftmp9],      %[ftmp5],      %[ftmp0]    \n\t"     \
+        "punpcklhw    %[ftmp4],      %[ftmp0],      %[ftmp3]    \n\t"     \
+        "punpckhhw    %[ftmp5],      %[ftmp0],      %[ftmp3]    \n\t"     \
+        "punpckhhw    %[ftmp3],      %[ftmp0],      %[ftmp2]    \n\t"     \
+        "punpcklhw    %[ftmp2],      %[ftmp0],      %[ftmp2]    \n\t"     \
+        "psraw        %[ftmp2],      %[ftmp2],      %[ftmp10]   \n\t"     \
+        "psraw        %[ftmp3],      %[ftmp3],      %[ftmp10]   \n\t"     \
+        "psraw        %[ftmp4],      %[ftmp4],      %[ftmp10]   \n\t"     \
+        "psraw        %[ftmp5],      %[ftmp5],      %[ftmp10]   \n\t"     \
+        "paddw        %[ftmp2],      %[ftmp2],      %[ftmp6]    \n\t"     \
+        "paddw        %[ftmp3],      %[ftmp3],      %[ftmp7]    \n\t"     \
+        "paddw        %[ftmp4],      %[ftmp4],      %[ftmp8]    \n\t"     \
+        "paddw        %[ftmp5],      %[ftmp5],      %[ftmp9]    \n\t"     \
+        "psraw        %[ftmp2],      %[ftmp2],      %[shift]    \n\t"     \
+        "psraw        %[ftmp3],      %[ftmp3],      %[shift]    \n\t"     \
+        "psraw        %[ftmp4],      %[ftmp4],      %[shift]    \n\t"     \
+        "psraw        %[ftmp5],      %[ftmp5],      %[shift]    \n\t"     \
+        "packsswh     %[ftmp2],      %[ftmp2],      %[ftmp3]    \n\t"     \
+        "packsswh     %[ftmp4],      %[ftmp4],      %[ftmp5]    \n\t"     \
+        "pcmpgth      %[ftmp3],      %[ftmp2],      %[ftmp0]    \n\t"     \
+        "pcmpgth      %[ftmp5],      %[ftmp4],      %[ftmp0]    \n\t"     \
+        "and          %[ftmp2],      %[ftmp2],      %[ftmp3]    \n\t"     \
+        "and          %[ftmp4],      %[ftmp4],      %[ftmp5]    \n\t"     \
+        "packushb     %[ftmp2],      %[ftmp2],      %[ftmp4]    \n\t"     \
+        "gssdlc1      %[ftmp2],      0x07(%[dst])               \n\t"     \
+        "gssdrc1      %[ftmp2],      0x00(%[dst])               \n\t"     \
+                                                                          \
+        "daddi        %[x],          %[x],         -0x01        \n\t"     \
+        PTR_ADDIU    "%[src],        %[src],        0x08        \n\t"     \
+        PTR_ADDIU    "%[dst],        %[dst],        0x08        \n\t"     \
+        PTR_ADDIU    "%[src2],       %[src2],       0x10        \n\t"     \
+        "bnez         %[x],          2b                         \n\t"     \
+                                                                          \
+        PTR_ADDIU    "%[src],        %[src],     " #src_step "  \n\t"     \
+        PTR_ADDIU    "%[dst],        %[dst],     " #dst_step "  \n\t"     \
+        PTR_ADDIU    "%[src2],       %[src2],    " #src2_step " \n\t"     \
+        "li           %[x],        " #x_step "                  \n\t"     \
+        "daddi        %[y],          %[y],         -0x01        \n\t"     \
+        PTR_ADDU     "%[src],        %[src],       %[srcstride] \n\t"     \
+        PTR_ADDU     "%[dst],        %[dst],       %[dststride] \n\t"     \
+        PTR_ADDIU    "%[src2],       %[src2],       0x80        \n\t"     \
+        "bnez         %[y],          1b                         \n\t"     \
+        : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                   \
+          [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),                   \
+          [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),                   \
+          [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),                   \
+          [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),                   \
+          [ftmp10]"=&f"(ftmp[10]), [offset]"=&f"(ftmp[11]),               \
+          [src2]"+&r"(src2), [dst]"+&r"(dst), [src]"+&r"(src),            \
+          [x]"+&r"(x), [y]"+&r"(y), [rtmp0]"=&r"(rtmp[0])                 \
+        : [dststride]"r"(dststride), [shift]"f"(shift),                   \
+          [srcstride]"r"(srcstride)                                       \
+        : "memory"                                                        \
+    );                                                                    \
+}                                                                         \
+
+PUT_HEVC_PEL_BI_PIXELS(8, 1, -8, -8, -16);
+PUT_HEVC_PEL_BI_PIXELS(16, 2, -16, -16, -32);
+PUT_HEVC_PEL_BI_PIXELS(24, 3, -24, -24, -48);
+PUT_HEVC_PEL_BI_PIXELS(32, 4, -32, -32, -64);
+PUT_HEVC_PEL_BI_PIXELS(48, 6, -48, -48, -96);
+PUT_HEVC_PEL_BI_PIXELS(64, 8, -64, -64, -128);
+
+#define PUT_HEVC_QPEL_UNI_HV(w, x_step, src_step, dst_step, tmp_step)   \
+void ff_hevc_put_hevc_qpel_uni_hv##w##_8_mmi(uint8_t *_dst,             \
+                                             ptrdiff_t _dststride,      \
+                                             uint8_t *_src,             \
+                                             ptrdiff_t _srcstride,      \
+                                             int height,                \
+                                             intptr_t mx, intptr_t my,  \
+                                             int width)                 \
+{                                                                       \
+    int x, y;                                                           \
+    const int8_t *filter;                                               \
+    pixel *src = (pixel*)_src;                                          \
+    ptrdiff_t srcstride = _srcstride / sizeof(pixel);                   \
+    pixel *dst          = (pixel *)_dst;                                \
+    ptrdiff_t dststride = _dststride / sizeof(pixel);                   \
+    int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];        \
+    int16_t *tmp = tmp_array;                                           \
+    uint64_t ftmp[20];                                                  \
+    uint64_t rtmp[1];                                                   \
+    int shift = 6;                                                      \
+    int offset = 32;                                                    \
+                                                                        \
+    src   -= (QPEL_EXTRA_BEFORE * srcstride + 3);                       \
+    filter = ff_hevc_qpel_filters[mx - 1];                              \
+    x = width >> 2;                                                     \
+    y = height + QPEL_EXTRA;                                            \
+    __asm__ volatile(                                                   \
+        MMI_LDC1(%[ftmp1], %[filter], 0x00)                             \
+        "li           %[rtmp0],      0x08                       \n\t"   \
+        "dmtc1        %[rtmp0],      %[ftmp0]                   \n\t"   \
+        "punpckhbh    %[ftmp2],      %[ftmp0],      %[ftmp1]    \n\t"   \
+        "punpcklbh    %[ftmp1],      %[ftmp0],      %[ftmp1]    \n\t"   \
+        "psrah        %[ftmp1],      %[ftmp1],      %[ftmp0]    \n\t"   \
+        "psrah        %[ftmp2],      %[ftmp2],      %[ftmp0]    \n\t"   \
+        "xor          %[ftmp0],      %[ftmp0],      %[ftmp0]    \n\t"   \
+                                                                        \
+        "1:                                                     \n\t"   \
+        "2:                                                     \n\t"   \
+        "gsldlc1      %[ftmp3],      0x07(%[src])               \n\t"   \
+        "gsldrc1      %[ftmp3],      0x00(%[src])               \n\t"   \
+        "gsldlc1      %[ftmp4],      0x08(%[src])               \n\t"   \
+        "gsldrc1      %[ftmp4],      0x01(%[src])               \n\t"   \
+        "gsldlc1      %[ftmp5],      0x09(%[src])               \n\t"   \
+        "gsldrc1      %[ftmp5],      0x02(%[src])               \n\t"   \
+        "gsldlc1      %[ftmp6],      0x0a(%[src])               \n\t"   \
+        "gsldrc1      %[ftmp6],      0x03(%[src])               \n\t"   \
+        "punpcklbh    %[ftmp7],      %[ftmp3],      %[ftmp0]    \n\t"   \
+        "punpckhbh    %[ftmp8],      %[ftmp3],      %[ftmp0]    \n\t"   \
+        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"   \
+        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"   \
+        "paddh        %[ftmp3],      %[ftmp7],      %[ftmp8]    \n\t"   \
+        "punpcklbh    %[ftmp7],      %[ftmp4],      %[ftmp0]    \n\t"   \
+        "punpckhbh    %[ftmp8],      %[ftmp4],      %[ftmp0]    \n\t"   \
+        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"   \
+        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"   \
+        "paddh        %[ftmp4],      %[ftmp7],      %[ftmp8]    \n\t"   \
+        "punpcklbh    %[ftmp7],      %[ftmp5],      %[ftmp0]    \n\t"   \
+        "punpckhbh    %[ftmp8],      %[ftmp5],      %[ftmp0]    \n\t"   \
+        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"   \
+        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"   \
+        "paddh        %[ftmp5],      %[ftmp7],      %[ftmp8]    \n\t"   \
+        "punpcklbh    %[ftmp7],      %[ftmp6],      %[ftmp0]    \n\t"   \
+        "punpckhbh    %[ftmp8],      %[ftmp6],      %[ftmp0]    \n\t"   \
+        "pmullh       %[ftmp7],      %[ftmp7],      %[ftmp1]    \n\t"   \
+        "pmullh       %[ftmp8],      %[ftmp8],      %[ftmp2]    \n\t"   \
+        "paddh        %[ftmp6],      %[ftmp7],      %[ftmp8]    \n\t"   \
+        TRANSPOSE_4H(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6],            \
+                     %[ftmp7], %[ftmp8], %[ftmp9], %[ftmp10])           \
+        "paddh        %[ftmp3],      %[ftmp3],      %[ftmp4]    \n\t"   \
+        "paddh        %[ftmp5],      %[ftmp5],      %[ftmp6]    \n\t"   \
+        "paddh        %[ftmp3],      %[ftmp3],      %[ftmp5]    \n\t"   \
+        "gssdlc1      %[ftmp3],      0x07(%[tmp])               \n\t"   \
+        "gssdrc1      %[ftmp3],      0x00(%[tmp])               \n\t"   \
+                                                                        \
+        "daddi        %[x],          %[x],         -0x01        \n\t"   \
+        PTR_ADDIU    "%[src],        %[src],        0x04        \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x08        \n\t"   \
+        "bnez         %[x],          2b                         \n\t"   \
+                                                                        \
+        "daddi        %[y],          %[y],         -0x01        \n\t"   \
+        "li           %[x],        " #x_step "                  \n\t"   \
+        PTR_ADDIU    "%[src],        %[src],      " #src_step " \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],      " #tmp_step " \n\t"   \
+        PTR_ADDU     "%[src],        %[src],        %[stride]   \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
+        "bnez         %[y],          1b                         \n\t"   \
+        : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                 \
+          [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),                 \
+          [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),                 \
+          [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),                 \
+          [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),                 \
+          [ftmp10]"=&f"(ftmp[10]), [rtmp0]"=&r"(rtmp[0]),               \
+          [src]"+&r"(src), [tmp]"+&r"(tmp), [y]"+&r"(y),                \
+          [x]"+&r"(x)                                                   \
+        : [filter]"r"(filter), [stride]"r"(srcstride)                   \
+        : "memory"                                                      \
+    );                                                                  \
+                                                                        \
+    tmp    = tmp_array;                                                 \
+    filter = ff_hevc_qpel_filters[my - 1];                              \
+    x = width >> 2;                                                     \
+    y = height;                                                         \
+    __asm__ volatile(                                                   \
+        MMI_LDC1(%[ftmp1], %[filter], 0x00)                             \
+        "li           %[rtmp0],      0x08                       \n\t"   \
+        "dmtc1        %[rtmp0],      %[ftmp0]                   \n\t"   \
+        "punpckhbh    %[ftmp2],      %[ftmp0],      %[ftmp1]    \n\t"   \
+        "punpcklbh    %[ftmp1],      %[ftmp0],      %[ftmp1]    \n\t"   \
+        "psrah        %[ftmp1],      %[ftmp1],      %[ftmp0]    \n\t"   \
+        "psrah        %[ftmp2],      %[ftmp2],      %[ftmp0]    \n\t"   \
+        "li           %[rtmp0],      0x06                       \n\t"   \
+        "dmtc1        %[rtmp0],      %[ftmp0]                   \n\t"   \
+        "punpcklhw    %[offset],     %[offset],     %[offset]   \n\t"   \
+        "punpcklwd    %[offset],     %[offset],     %[offset]   \n\t"   \
+                                                                        \
+        "1:                                                     \n\t"   \
+        "li           %[x],        " #x_step "                  \n\t"   \
+        "2:                                                     \n\t"   \
+        "gsldlc1      %[ftmp3],      0x07(%[tmp])               \n\t"   \
+        "gsldrc1      %[ftmp3],      0x00(%[tmp])               \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
+        "gsldlc1      %[ftmp4],      0x07(%[tmp])               \n\t"   \
+        "gsldrc1      %[ftmp4],      0x00(%[tmp])               \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
+        "gsldlc1      %[ftmp5],      0x07(%[tmp])               \n\t"   \
+        "gsldrc1      %[ftmp5],      0x00(%[tmp])               \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
+        "gsldlc1      %[ftmp6],      0x07(%[tmp])               \n\t"   \
+        "gsldrc1      %[ftmp6],      0x00(%[tmp])               \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
+        "gsldlc1      %[ftmp7],      0x07(%[tmp])               \n\t"   \
+        "gsldrc1      %[ftmp7],      0x00(%[tmp])               \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
+        "gsldlc1      %[ftmp8],      0x07(%[tmp])               \n\t"   \
+        "gsldrc1      %[ftmp8],      0x00(%[tmp])               \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
+        "gsldlc1      %[ftmp9],      0x07(%[tmp])               \n\t"   \
+        "gsldrc1      %[ftmp9],      0x00(%[tmp])               \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
+        "gsldlc1      %[ftmp10],     0x07(%[tmp])               \n\t"   \
+        "gsldrc1      %[ftmp10],     0x00(%[tmp])               \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],        -0x380      \n\t"   \
+        TRANSPOSE_4H(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6],            \
+                     %[ftmp11], %[ftmp12], %[ftmp13], %[ftmp14])        \
+        TRANSPOSE_4H(%[ftmp7], %[ftmp8], %[ftmp9], %[ftmp10],           \
+                     %[ftmp11], %[ftmp12], %[ftmp13], %[ftmp14])        \
+        "pmaddhw      %[ftmp11],     %[ftmp3],      %[ftmp1]    \n\t"   \
+        "pmaddhw      %[ftmp12],     %[ftmp7],      %[ftmp2]    \n\t"   \
+        "pmaddhw      %[ftmp13],     %[ftmp4],      %[ftmp1]    \n\t"   \
+        "pmaddhw      %[ftmp14],     %[ftmp8],      %[ftmp2]    \n\t"   \
+        "paddw        %[ftmp11],     %[ftmp11],     %[ftmp12]   \n\t"   \
+        "paddw        %[ftmp13],     %[ftmp13],     %[ftmp14]   \n\t"   \
+        TRANSPOSE_2W(%[ftmp11], %[ftmp13], %[ftmp3], %[ftmp4])          \
+        "paddw        %[ftmp3],      %[ftmp3],      %[ftmp4]    \n\t"   \
+        "psraw        %[ftmp3],      %[ftmp3],      %[ftmp0]    \n\t"   \
+        "pmaddhw      %[ftmp11],     %[ftmp5],      %[ftmp1]    \n\t"   \
+        "pmaddhw      %[ftmp12],     %[ftmp9],      %[ftmp2]    \n\t"   \
+        "pmaddhw      %[ftmp13],     %[ftmp6],      %[ftmp1]    \n\t"   \
+        "pmaddhw      %[ftmp14],     %[ftmp10],     %[ftmp2]    \n\t"   \
+        "paddw        %[ftmp11],     %[ftmp11],     %[ftmp12]   \n\t"   \
+        "paddw        %[ftmp13],     %[ftmp13],     %[ftmp14]   \n\t"   \
+        TRANSPOSE_2W(%[ftmp11], %[ftmp13], %[ftmp5], %[ftmp6])          \
+        "paddw        %[ftmp5],      %[ftmp5],      %[ftmp6]    \n\t"   \
+        "psraw        %[ftmp5],      %[ftmp5],      %[ftmp0]    \n\t"   \
+        "packsswh     %[ftmp3],      %[ftmp3],      %[ftmp5]    \n\t"   \
+        "paddh        %[ftmp3],      %[ftmp3],      %[offset]   \n\t"   \
+        "psrah        %[ftmp3],      %[ftmp3],      %[shift]    \n\t"   \
+        "xor          %[ftmp7],      %[ftmp7],      %[ftmp7]    \n\t"   \
+        "pcmpgth      %[ftmp7],      %[ftmp3],      %[ftmp7]    \n\t"   \
+        "and          %[ftmp3],      %[ftmp3],      %[ftmp7]    \n\t"   \
+        "packushb     %[ftmp3],      %[ftmp3],      %[ftmp3]    \n\t"   \
+        "gsswlc1      %[ftmp3],      0x03(%[dst])               \n\t"   \
+        "gsswrc1      %[ftmp3],      0x00(%[dst])               \n\t"   \
+                                                                        \
+        "daddi        %[x],          %[x],         -0x01        \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x08        \n\t"   \
+        PTR_ADDIU    "%[dst],        %[dst],        0x04        \n\t"   \
+        "bnez         %[x],          2b                         \n\t"   \
+                                                                        \
+        "daddi        %[y],          %[y],         -0x01        \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],     " #tmp_step "  \n\t"   \
+        PTR_ADDIU    "%[dst],        %[dst],     " #dst_step "  \n\t"   \
+        PTR_ADDU     "%[dst],        %[dst],        %[stride]   \n\t"   \
+        PTR_ADDIU    "%[tmp],        %[tmp],        0x80        \n\t"   \
+        "bnez         %[y],          1b                         \n\t"   \
+        : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),                 \
+          [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),                 \
+          [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),                 \
+          [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),                 \
+          [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),                 \
+          [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),             \
+          [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),             \
+          [ftmp14]"=&f"(ftmp[14]),                                      \
+          [dst]"+&r"(dst), [tmp]"+&r"(tmp), [y]"+&r"(y), [x]"=&r"(x),   \
+          [offset]"+&f"(offset), [rtmp0]"=&r"(rtmp[0])                  \
+        : [filter]"r"(filter), [stride]"r"(dststride),                  \
+          [shift]"f"(shift)                                             \
+        : "memory"                                                      \
+    );                                                                  \
+}
+
+PUT_HEVC_QPEL_UNI_HV(4, 1, -4, -4, -8);
+PUT_HEVC_QPEL_UNI_HV(8, 2, -8, -8, -16);
+PUT_HEVC_QPEL_UNI_HV(12, 3, -12, -12, -24);
+PUT_HEVC_QPEL_UNI_HV(16, 4, -16, -16, -32);
+PUT_HEVC_QPEL_UNI_HV(24, 6, -24, -24, -48);
+PUT_HEVC_QPEL_UNI_HV(32, 8, -32, -32, -64);
+PUT_HEVC_QPEL_UNI_HV(48, 12, -48, -48, -96);
+PUT_HEVC_QPEL_UNI_HV(64, 16, -64, -64, -128);
diff --git a/libavcodec/mips/hpeldsp_init_mips.c b/libavcodec/mips/hpeldsp_init_mips.c
index 363a04514f48c..d6f7a9793dfde 100644
--- a/libavcodec/mips/hpeldsp_init_mips.c
+++ b/libavcodec/mips/hpeldsp_init_mips.c
@@ -113,10 +113,10 @@ static void ff_hpeldsp_init_mmi(HpelDSPContext *c, int flags)
 
 void ff_hpeldsp_init_mips(HpelDSPContext *c, int flags)
 {
-#if HAVE_MSA
-    ff_hpeldsp_init_msa(c, flags);
-#endif  // #if HAVE_MSA
 #if HAVE_MMI
     ff_hpeldsp_init_mmi(c, flags);
 #endif  // #if HAVE_MMI
+#if HAVE_MSA
+    ff_hpeldsp_init_msa(c, flags);
+#endif  // #if HAVE_MSA
 }
diff --git a/libavcodec/mips/idctdsp_init_mips.c b/libavcodec/mips/idctdsp_init_mips.c
index bb33b5541986b..85b76ca47836a 100644
--- a/libavcodec/mips/idctdsp_init_mips.c
+++ b/libavcodec/mips/idctdsp_init_mips.c
@@ -65,10 +65,10 @@ static av_cold void idctdsp_init_mmi(IDCTDSPContext *c, AVCodecContext *avctx,
 av_cold void ff_idctdsp_init_mips(IDCTDSPContext *c, AVCodecContext *avctx,
                           unsigned high_bit_depth)
 {
-#if HAVE_MSA
-    idctdsp_init_msa(c, avctx, high_bit_depth);
-#endif  // #if HAVE_MSA
 #if HAVE_MMI
     idctdsp_init_mmi(c, avctx, high_bit_depth);
 #endif /* HAVE_MMI */
+#if HAVE_MSA
+    idctdsp_init_msa(c, avctx, high_bit_depth);
+#endif  // #if HAVE_MSA
 }
diff --git a/libavcodec/mips/mpegvideo_init_mips.c b/libavcodec/mips/mpegvideo_init_mips.c
index 1918da5f46b4e..be77308140134 100644
--- a/libavcodec/mips/mpegvideo_init_mips.c
+++ b/libavcodec/mips/mpegvideo_init_mips.c
@@ -49,10 +49,10 @@ static av_cold void dct_unquantize_init_mmi(MpegEncContext *s)
 
 av_cold void ff_mpv_common_init_mips(MpegEncContext *s)
 {
-#if HAVE_MSA
-    dct_unquantize_init_msa(s);
-#endif  // #if HAVE_MSA
 #if HAVE_MMI
     dct_unquantize_init_mmi(s);
 #endif /* HAVE_MMI */
+#if HAVE_MSA
+    dct_unquantize_init_msa(s);
+#endif  // #if HAVE_MSA
 }
diff --git a/libavcodec/mips/pixblockdsp_init_mips.c b/libavcodec/mips/pixblockdsp_init_mips.c
index 1b3741ea768f0..fd0238d79bb47 100644
--- a/libavcodec/mips/pixblockdsp_init_mips.c
+++ b/libavcodec/mips/pixblockdsp_init_mips.c
@@ -60,10 +60,10 @@ static av_cold void pixblockdsp_init_mmi(PixblockDSPContext *c,
 void ff_pixblockdsp_init_mips(PixblockDSPContext *c, AVCodecContext *avctx,
                               unsigned high_bit_depth)
 {
-#if HAVE_MSA
-    pixblockdsp_init_msa(c, avctx, high_bit_depth);
-#endif  // #if HAVE_MSA
 #if HAVE_MMI
     pixblockdsp_init_mmi(c, avctx, high_bit_depth);
 #endif /* HAVE_MMI */
+#if HAVE_MSA
+    pixblockdsp_init_msa(c, avctx, high_bit_depth);
+#endif  // #if HAVE_MSA
 }
diff --git a/libavcodec/mips/vp3dsp_idct_mmi.c b/libavcodec/mips/vp3dsp_idct_mmi.c
new file mode 100644
index 0000000000000..c5c4cf31270aa
--- /dev/null
+++ b/libavcodec/mips/vp3dsp_idct_mmi.c
@@ -0,0 +1,769 @@
+/*
+ * Copyright (c) 2018 gxw <guxiwei-hf@loongson.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "vp3dsp_mips.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/mips/mmiutils.h"
+#include "libavutil/common.h"
+#include "libavcodec/rnd_avg.h"
+
+#define LOAD_CONST(dst, value)                        \
+    "li     %[tmp1],      "#value"              \n\t" \
+    "dmtc1  %[tmp1],      "#dst"                \n\t" \
+    "pshufh "#dst",       "#dst",     %[ftmp10] \n\t"
+
+static void idct_row_mmi(int16_t *input)
+{
+    double ftmp[23];
+    uint64_t tmp[2];
+    __asm__ volatile (
+        "xor        %[ftmp10],      %[ftmp10],        %[ftmp10] \n\t"
+        LOAD_CONST(%[csth_1], 1)
+        "li         %[tmp0],        0x02                        \n\t"
+        "1:                                                     \n\t"
+        /* Load input */
+        "ldc1       %[ftmp0],       0x00(%[input])              \n\t"
+        "ldc1       %[ftmp1],       0x10(%[input])              \n\t"
+        "ldc1       %[ftmp2],       0x20(%[input])              \n\t"
+        "ldc1       %[ftmp3],       0x30(%[input])              \n\t"
+        "ldc1       %[ftmp4],       0x40(%[input])              \n\t"
+        "ldc1       %[ftmp5],       0x50(%[input])              \n\t"
+        "ldc1       %[ftmp6],       0x60(%[input])              \n\t"
+        "ldc1       %[ftmp7],       0x70(%[input])              \n\t"
+        LOAD_CONST(%[ftmp8], 64277)
+        LOAD_CONST(%[ftmp9], 12785)
+        "pmulhh     %[A],           %[ftmp9],         %[ftmp7]  \n\t"
+        "pcmpgth    %[C],           %[ftmp10],        %[ftmp1]  \n\t"
+        "or         %[mask],        %[C],             %[csth_1] \n\t"
+        "pmullh     %[B],           %[ftmp1],         %[mask]   \n\t"
+        "pmulhuh    %[B],           %[ftmp8],         %[B]      \n\t"
+        "pmullh     %[B],           %[B],             %[mask]   \n\t"
+        "paddh      %[A],           %[A],             %[B]      \n\t"
+        "paddh      %[A],           %[A],             %[C]      \n\t"
+        "pcmpgth    %[D],           %[ftmp10],        %[ftmp7]  \n\t"
+        "or         %[mask],        %[D],             %[csth_1] \n\t"
+        "pmullh     %[ftmp7],       %[ftmp7],         %[mask]   \n\t"
+        "pmulhuh    %[B],           %[ftmp8],         %[ftmp7]  \n\t"
+        "pmullh     %[B],           %[B],             %[mask]   \n\t"
+        "pmulhh     %[C],           %[ftmp9],         %[ftmp1]  \n\t"
+        "psubh      %[B],           %[C],             %[B]      \n\t"
+        "psubh      %[B],           %[B],             %[D]      \n\t"
+
+        LOAD_CONST(%[ftmp8], 54491)
+        LOAD_CONST(%[ftmp9], 36410)
+        "pcmpgth    %[Ad],          %[ftmp10],        %[ftmp5]  \n\t"
+        "or         %[mask],        %[Ad],            %[csth_1] \n\t"
+        "pmullh     %[ftmp1],       %[ftmp5],         %[mask]   \n\t"
+        "pmulhuh    %[C],           %[ftmp9],         %[ftmp1]  \n\t"
+        "pmullh     %[C],           %[C],             %[mask]   \n\t"
+        "pcmpgth    %[Bd],          %[ftmp10],        %[ftmp3]  \n\t"
+        "or         %[mask],        %[Bd],            %[csth_1] \n\t"
+        "pmullh     %[D],           %[ftmp3],         %[mask]   \n\t"
+        "pmulhuh    %[D],           %[ftmp8],         %[D]      \n\t"
+        "pmullh     %[D],           %[D],             %[mask]   \n\t"
+        "paddh      %[C],           %[C],             %[D]      \n\t"
+        "paddh      %[C],           %[C],             %[Ad]     \n\t"
+        "paddh      %[C],           %[C],             %[Bd]     \n\t"
+        "pcmpgth    %[Bd],          %[ftmp10],        %[ftmp3]  \n\t"
+        "or         %[mask],        %[Bd],            %[csth_1] \n\t"
+        "pmullh     %[ftmp1],       %[ftmp3],         %[mask]   \n\t"
+        "pmulhuh    %[D],           %[ftmp9],         %[ftmp1]  \n\t"
+        "pmullh     %[D],           %[D],             %[mask]   \n\t"
+        "pcmpgth    %[Ed],          %[ftmp10],        %[ftmp5]  \n\t"
+        "or         %[mask],        %[Ed],            %[csth_1] \n\t"
+        "pmullh     %[Ad],          %[ftmp5],         %[mask]   \n\t"
+        "pmulhuh    %[Ad],          %[ftmp8],         %[Ad]     \n\t"
+        "pmullh     %[Ad],          %[Ad],            %[mask]   \n\t"
+        "psubh      %[D],           %[Ad],            %[D]      \n\t"
+        "paddh      %[D],           %[D],             %[Ed]     \n\t"
+        "psubh      %[D],           %[D],             %[Bd]     \n\t"
+
+        LOAD_CONST(%[ftmp8], 46341)
+        "psubh      %[Ad],          %[A],             %[C]      \n\t"
+        "pcmpgth    %[Bd],          %[ftmp10],        %[Ad]     \n\t"
+        "or         %[mask],        %[Bd],            %[csth_1] \n\t"
+        "pmullh     %[Ad],          %[Ad],            %[mask]   \n\t"
+        "pmulhuh    %[Ad],          %[ftmp8],         %[Ad]     \n\t"
+        "pmullh     %[Ad],          %[Ad],            %[mask]   \n\t"
+        "paddh      %[Ad],          %[Ad],            %[Bd]     \n\t"
+        "psubh      %[Bd],          %[B],             %[D]      \n\t"
+        "pcmpgth    %[Cd],          %[ftmp10],        %[Bd]     \n\t"
+        "or         %[mask],        %[Cd],            %[csth_1] \n\t"
+        "pmullh     %[Bd],          %[Bd],            %[mask]   \n\t"
+        "pmulhuh    %[Bd],          %[ftmp8],         %[Bd]     \n\t"
+        "pmullh     %[Bd],          %[Bd],            %[mask]   \n\t"
+        "paddh      %[Bd],          %[Bd],            %[Cd]     \n\t"
+        "paddh      %[Cd],          %[A],             %[C]      \n\t"
+        "paddh      %[Dd],          %[B],             %[D]      \n\t"
+        "paddh      %[A],           %[ftmp0],         %[ftmp4]  \n\t"
+        "pcmpgth    %[B],           %[ftmp10],        %[A]      \n\t"
+        "or         %[mask],        %[B],             %[csth_1] \n\t"
+        "pmullh     %[A],           %[A],             %[mask]   \n\t"
+        "pmulhuh    %[A],           %[ftmp8],         %[A]      \n\t"
+        "pmullh     %[A],           %[A],             %[mask]   \n\t"
+        "paddh      %[A],           %[A],             %[B]      \n\t"
+        "psubh      %[B],           %[ftmp0],         %[ftmp4]  \n\t"
+        "pcmpgth    %[C],           %[ftmp10],        %[B]      \n\t"
+        "or         %[mask],        %[C],             %[csth_1] \n\t"
+        "pmullh     %[B],           %[B],             %[mask]   \n\t"
+        "pmulhuh    %[B],           %[ftmp8],         %[B]      \n\t"
+        "pmullh     %[B],           %[B],             %[mask]   \n\t"
+        "paddh      %[B],           %[B],             %[C]      \n\t"
+
+        LOAD_CONST(%[ftmp8], 60547)
+        LOAD_CONST(%[ftmp9], 25080)
+        "pmulhh     %[C],           %[ftmp9],         %[ftmp6]  \n\t"
+        "pcmpgth    %[D],           %[ftmp10],        %[ftmp2]  \n\t"
+        "or         %[mask],        %[D],             %[csth_1] \n\t"
+        "pmullh     %[Ed],          %[ftmp2],         %[mask]   \n\t"
+        "pmulhuh    %[Ed],          %[ftmp8],         %[Ed]     \n\t"
+        "pmullh     %[Ed],          %[Ed],            %[mask]   \n\t"
+        "paddh      %[C],           %[C],             %[Ed]     \n\t"
+        "paddh      %[C],           %[C],             %[D]      \n\t"
+        "pcmpgth    %[Ed],          %[ftmp10],        %[ftmp6]  \n\t"
+        "or         %[mask],        %[Ed],            %[csth_1] \n\t"
+        "pmullh     %[ftmp6],       %[ftmp6],         %[mask]   \n\t"
+        "pmulhuh    %[D],           %[ftmp8],         %[ftmp6]  \n\t"
+        "pmullh     %[D],           %[D],             %[mask]   \n\t"
+        "pmulhh     %[Gd],          %[ftmp9],         %[ftmp2]  \n\t"
+        "psubh      %[D],           %[Gd],            %[D]      \n\t"
+        "psubh      %[D],           %[D],             %[Ed]     \n\t"
+        "psubh      %[Ed],          %[A],             %[C]      \n\t"
+        "paddh      %[Gd],          %[A],             %[C]      \n\t"
+        "paddh      %[A],           %[B],             %[Ad]     \n\t"
+        "psubh      %[C],           %[B],             %[Ad]     \n\t"
+        "psubh      %[B],           %[Bd],            %[D]      \n\t"
+        "paddh      %[D],           %[Bd],            %[D]      \n\t"
+        /* Final sequence of operations over-write original inputs */
+        "paddh      %[ftmp0],       %[Gd],            %[Cd]     \n\t"
+        "paddh      %[ftmp1],       %[A],             %[D]      \n\t"
+        "psubh      %[ftmp2],       %[A],             %[D]      \n\t"
+        "paddh      %[ftmp3],       %[Ed],            %[Dd]     \n\t"
+        "psubh      %[ftmp4],       %[Ed],            %[Dd]     \n\t"
+        "paddh      %[ftmp5],       %[C],             %[B]      \n\t"
+        "psubh      %[ftmp6],       %[C],             %[B]      \n\t"
+        "psubh      %[ftmp7],       %[Gd],            %[Cd]     \n\t"
+        "sdc1       %[ftmp0],       0x00(%[input])              \n\t"
+        "sdc1       %[ftmp1],       0x10(%[input])              \n\t"
+        "sdc1       %[ftmp2],       0x20(%[input])              \n\t"
+        "sdc1       %[ftmp3],       0x30(%[input])              \n\t"
+        "sdc1       %[ftmp4],       0x40(%[input])              \n\t"
+        "sdc1       %[ftmp5],       0x50(%[input])              \n\t"
+        "sdc1       %[ftmp6],       0x60(%[input])              \n\t"
+        "sdc1       %[ftmp7],       0x70(%[input])              \n\t"
+        PTR_ADDU   "%[tmp0],        %[tmp0],          -0x01     \n\t"
+        PTR_ADDIU  "%[input],       %[input],         0x08      \n\t"
+        "bnez       %[tmp0],        1b                          \n\t"
+        : [input]"+&r"(input), [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
+          [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]),
+          [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
+          [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]),
+          [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), [mask]"=&f"(ftmp[11]),
+          [A]"=&f"(ftmp[12]), [B]"=&f"(ftmp[13]), [C]"=&f"(ftmp[14]),
+          [D]"=&f"(ftmp[15]), [Ad]"=&f"(ftmp[16]), [Bd]"=&f"(ftmp[17]),
+          [Cd]"=&f"(ftmp[18]), [Dd]"=&f"(ftmp[19]), [Ed]"=&f"(ftmp[20]),
+          [Gd]"=&f"(ftmp[21]), [csth_1]"=&f"(ftmp[22])
+        :
+        : "memory"
+    );
+}
+
+static void idct_column_true_mmi(uint8_t *dst, int stride, int16_t *input)
+{
+    uint8_t temp_value[8];
+    double ftmp[23];
+    uint64_t tmp[2];
+    for (int i = 0; i < 8; ++i)
+        temp_value[i] = av_clip_uint8(128 + ((46341 * input[i << 3] + (8 << 16)) >> 20));
+    __asm__ volatile (
+        "xor        %[ftmp10],      %[ftmp10],          %[ftmp10] \n\t"
+        "li         %[tmp0],        0x02                          \n\t"
+        "1:                                                       \n\t"
+        "ldc1       %[ftmp0],       0x00(%[input])                \n\t"
+        "ldc1       %[ftmp4],       0x08(%[input])                \n\t"
+        "ldc1       %[ftmp1],       0x10(%[input])                \n\t"
+        "ldc1       %[ftmp5],       0x18(%[input])                \n\t"
+        "ldc1       %[ftmp2],       0x20(%[input])                \n\t"
+        "ldc1       %[ftmp6],       0x28(%[input])                \n\t"
+        "ldc1       %[ftmp3],       0x30(%[input])                \n\t"
+        "ldc1       %[ftmp7],       0x38(%[input])                \n\t"
+        TRANSPOSE_4H(%[ftmp0], %[ftmp1], %[ftmp2], %[ftmp3],
+                     %[A], %[B], %[C], %[D])
+        TRANSPOSE_4H(%[ftmp4], %[ftmp5], %[ftmp6], %[ftmp7],
+                     %[A], %[B], %[C], %[D])
+        LOAD_CONST(%[ftmp8], 64277)
+        LOAD_CONST(%[ftmp9], 12785)
+        LOAD_CONST(%[Gd], 1)
+        "pmulhh     %[A],           %[ftmp9],           %[ftmp7]  \n\t"
+        "pcmpgth    %[C],           %[ftmp10],          %[ftmp1]  \n\t"
+        "or         %[mask],        %[C],               %[Gd]     \n\t"
+        "pmullh     %[B],           %[ftmp1],           %[mask]   \n\t"
+        "pmulhuh    %[B],           %[ftmp8],           %[B]      \n\t"
+        "pmullh     %[B],           %[B],               %[mask]   \n\t"
+        "paddh      %[A],           %[A],               %[B]      \n\t"
+        "paddh      %[A],           %[A],               %[C]      \n\t"
+        "pcmpgth    %[D],           %[ftmp10],          %[ftmp7]  \n\t"
+        "or         %[mask],        %[D],               %[Gd]     \n\t"
+        "pmullh     %[Ad],          %[ftmp7],           %[mask]   \n\t"
+        "pmulhuh    %[B],           %[ftmp8],           %[Ad]     \n\t"
+        "pmullh     %[B],           %[B],               %[mask]   \n\t"
+        "pmulhh     %[C],           %[ftmp9],           %[ftmp1]  \n\t"
+        "psubh      %[B],           %[C],               %[B]      \n\t"
+        "psubh      %[B],           %[B],               %[D]      \n\t"
+
+        LOAD_CONST(%[ftmp8], 54491)
+        LOAD_CONST(%[ftmp9], 36410)
+        "pcmpgth    %[Ad],          %[ftmp10],          %[ftmp5]  \n\t"
+        "or         %[mask],        %[Ad],              %[Gd]     \n\t"
+        "pmullh     %[Cd],          %[ftmp5],           %[mask]   \n\t"
+        "pmulhuh    %[C],           %[ftmp9],           %[Cd]     \n\t"
+        "pmullh     %[C],           %[C],               %[mask]   \n\t"
+        "pcmpgth    %[Bd],          %[ftmp10],          %[ftmp3]  \n\t"
+        "or         %[mask],        %[Bd],              %[Gd]     \n\t"
+        "pmullh     %[D],           %[ftmp3],           %[mask]   \n\t"
+        "pmulhuh    %[D],           %[ftmp8],           %[D]      \n\t"
+        "pmullh     %[D],           %[D],               %[mask]   \n\t"
+        "paddh      %[C],           %[C],               %[D]      \n\t"
+        "paddh      %[C],           %[C],               %[Ad]     \n\t"
+        "paddh      %[C],           %[C],               %[Bd]     \n\t"
+        "pcmpgth    %[Bd],          %[ftmp10],          %[ftmp3]  \n\t"
+        "or         %[mask],        %[Bd],              %[Gd]     \n\t"
+        "pmullh     %[Cd],          %[ftmp3],           %[mask]   \n\t"
+        "pmulhuh    %[D],           %[ftmp9],           %[Cd]     \n\t"
+        "pmullh     %[D],           %[D],               %[mask]   \n\t"
+        "pcmpgth    %[Ed],          %[ftmp10],          %[ftmp5]  \n\t"
+        "or         %[mask],        %[Ed],              %[Gd]     \n\t"
+        "pmullh     %[Ad],          %[ftmp5],           %[mask]   \n\t"
+        "pmulhuh    %[Ad],          %[ftmp8],           %[Ad]     \n\t"
+        "pmullh     %[Ad],          %[Ad],              %[mask]   \n\t"
+        "psubh      %[D],           %[Ad],              %[D]      \n\t"
+        "paddh      %[D],           %[D],               %[Ed]     \n\t"
+        "psubh      %[D],           %[D],               %[Bd]     \n\t"
+
+        LOAD_CONST(%[ftmp8], 46341)
+        "psubh      %[Ad],          %[A],             %[C]        \n\t"
+        "pcmpgth    %[Bd],          %[ftmp10],        %[Ad]       \n\t"
+        "or         %[mask],        %[Bd],            %[Gd]       \n\t"
+        "pmullh     %[Ad],          %[Ad],            %[mask]     \n\t"
+        "pmulhuh    %[Ad],          %[ftmp8],         %[Ad]       \n\t"
+        "pmullh     %[Ad],          %[Ad],            %[mask]     \n\t"
+        "paddh      %[Ad],          %[Ad],            %[Bd]       \n\t"
+        "psubh      %[Bd],          %[B],             %[D]        \n\t"
+        "pcmpgth    %[Cd],          %[ftmp10],        %[Bd]       \n\t"
+        "or         %[mask],        %[Cd],            %[Gd]       \n\t"
+        "pmullh     %[Bd],          %[Bd],            %[mask]     \n\t"
+        "pmulhuh    %[Bd],          %[ftmp8],         %[Bd]       \n\t"
+        "pmullh     %[Bd],          %[Bd],            %[mask]     \n\t"
+        "paddh      %[Bd],          %[Bd],            %[Cd]       \n\t"
+        "paddh      %[Cd],          %[A],             %[C]        \n\t"
+        "paddh      %[Dd],          %[B],             %[D]        \n\t"
+
+        LOAD_CONST(%[Ed], 2056)
+        "paddh      %[A],           %[ftmp0],         %[ftmp4]    \n\t"
+        "pcmpgth    %[B],           %[ftmp10],        %[A]        \n\t"
+        "or         %[mask],        %[B],             %[Gd]       \n\t"
+        "pmullh     %[A],           %[A],             %[mask]     \n\t"
+        "pmulhuh    %[A],           %[ftmp8],         %[A]        \n\t"
+        "pmullh     %[A],           %[A],             %[mask]     \n\t"
+        "paddh      %[A],           %[A],             %[B]        \n\t"
+        "paddh      %[A],           %[A],             %[Ed]       \n\t"
+        "psubh      %[B],           %[ftmp0],         %[ftmp4]    \n\t"
+        "pcmpgth    %[C],           %[ftmp10],        %[B]        \n\t"
+        "or         %[mask],        %[C],             %[Gd]       \n\t"
+        "pmullh     %[B],           %[B],             %[mask]     \n\t"
+        "pmulhuh    %[B],           %[ftmp8],         %[B]        \n\t"
+        "pmullh     %[B],           %[B],             %[mask]     \n\t"
+        "paddh      %[B],           %[B],             %[C]        \n\t"
+        "paddh      %[B],           %[B],             %[Ed]       \n\t"
+
+        LOAD_CONST(%[ftmp8], 60547)
+        LOAD_CONST(%[ftmp9], 25080)
+        "pmulhh     %[C],           %[ftmp9],         %[ftmp6]    \n\t"
+        "pcmpgth    %[D],           %[ftmp10],        %[ftmp2]    \n\t"
+        "or         %[mask],        %[D],             %[Gd]       \n\t"
+        "pmullh     %[Ed],          %[ftmp2],         %[mask]     \n\t"
+        "pmulhuh    %[Ed],          %[ftmp8],         %[Ed]       \n\t"
+        "pmullh     %[Ed],          %[Ed],            %[mask]     \n\t"
+        "paddh      %[C],           %[C],             %[Ed]       \n\t"
+        "paddh      %[C],           %[C],             %[D]        \n\t"
+        "pcmpgth    %[Ed],          %[ftmp10],        %[ftmp6]    \n\t"
+        "or         %[mask],        %[Ed],            %[Gd]       \n\t"
+        "pmullh     %[D],           %[ftmp6],         %[mask]     \n\t"
+        "pmulhuh    %[D],           %[ftmp8],         %[D]        \n\t"
+        "pmullh     %[D],           %[D],             %[mask]     \n\t"
+        "pmulhh     %[Gd],          %[ftmp9],         %[ftmp2]    \n\t"
+        "psubh      %[D],           %[Gd],            %[D]        \n\t"
+        "psubh      %[D],           %[D],             %[Ed]       \n\t"
+        "psubh      %[Ed],          %[A],             %[C]        \n\t"
+        "paddh      %[Gd],          %[A],             %[C]        \n\t"
+        "paddh      %[A],           %[B],             %[Ad]       \n\t"
+        "psubh      %[C],           %[B],             %[Ad]       \n\t"
+        "psubh      %[B],           %[Bd],            %[D]        \n\t"
+        "paddh      %[D],           %[Bd],            %[D]        \n\t"
+        "or         %[mask],        %[ftmp1],         %[ftmp2]    \n\t"
+        "or         %[mask],        %[mask],          %[ftmp3]    \n\t"
+        "or         %[mask],        %[mask],          %[ftmp4]    \n\t"
+        "or         %[mask],        %[mask],          %[ftmp5]    \n\t"
+        "or         %[mask],        %[mask],          %[ftmp6]    \n\t"
+        "or         %[mask],        %[mask],          %[ftmp7]    \n\t"
+        "pcmpeqh    %[mask],        %[mask],          %[ftmp10]   \n\t"
+        "packushb   %[mask],        %[mask],          %[ftmp10]   \n\t"
+        "li         %[tmp1],        0x04                          \n\t"
+        "dmtc1      %[tmp1],        %[ftmp8]                      \n\t"
+        "paddh      %[ftmp0],       %[Gd],            %[Cd]       \n\t"
+        "psrah      %[ftmp0],       %[ftmp0],         %[ftmp8]    \n\t"
+        "paddh      %[ftmp1],       %[A],             %[D]        \n\t"
+        "psrah      %[ftmp1],       %[ftmp1],         %[ftmp8]    \n\t"
+        "psubh      %[ftmp2],       %[A],             %[D]        \n\t"
+        "psrah      %[ftmp2],       %[ftmp2],         %[ftmp8]    \n\t"
+        "paddh      %[ftmp3],       %[Ed],            %[Dd]       \n\t"
+        "psrah      %[ftmp3],       %[ftmp3],         %[ftmp8]    \n\t"
+        "psubh      %[ftmp4],       %[Ed],            %[Dd]       \n\t"
+        "psrah      %[ftmp4],       %[ftmp4],         %[ftmp8]    \n\t"
+        "paddh      %[ftmp5],       %[C],             %[B]        \n\t"
+        "psrah      %[ftmp5],       %[ftmp5],         %[ftmp8]    \n\t"
+        "psubh      %[ftmp6],       %[C],             %[B]        \n\t"
+        "psrah      %[ftmp6],       %[ftmp6],         %[ftmp8]    \n\t"
+        "psubh      %[ftmp7],       %[Gd],            %[Cd]       \n\t"
+        "psrah      %[ftmp7],       %[ftmp7],         %[ftmp8]    \n\t"
+        "pmaxsh     %[ftmp0],       %[ftmp0],         %[ftmp10]   \n\t"
+        "packushb   %[ftmp0],       %[ftmp0],         %[ftmp10]   \n\t"
+        "pmaxsh     %[ftmp1],       %[ftmp1],         %[ftmp10]   \n\t"
+        "packushb   %[ftmp1],       %[ftmp1],         %[ftmp10]   \n\t"
+        "pmaxsh     %[ftmp2],       %[ftmp2],         %[ftmp10]   \n\t"
+        "packushb   %[ftmp2],       %[ftmp2],         %[ftmp10]   \n\t"
+        "pmaxsh     %[ftmp3],       %[ftmp3],         %[ftmp10]   \n\t"
+        "packushb   %[ftmp3],       %[ftmp3],         %[ftmp10]   \n\t"
+        "pmaxsh     %[ftmp4],       %[ftmp4],         %[ftmp10]   \n\t"
+        "packushb   %[ftmp4],       %[ftmp4],         %[ftmp10]   \n\t"
+        "pmaxsh     %[ftmp5],       %[ftmp5],         %[ftmp10]   \n\t"
+        "packushb   %[ftmp5],       %[ftmp5],         %[ftmp10]   \n\t"
+        "pmaxsh     %[ftmp6],       %[ftmp6],         %[ftmp10]   \n\t"
+        "packushb   %[ftmp6],       %[ftmp6],         %[ftmp10]   \n\t"
+        "pmaxsh     %[ftmp7],       %[ftmp7],         %[ftmp10]   \n\t"
+        "packushb   %[ftmp7],       %[ftmp7],         %[ftmp10]   \n\t"
+
+        "lwc1       %[Ed],          0x00(%[temp_value])           \n\t"
+        "and        %[Ed],          %[Ed],            %[mask]     \n\t"
+        "paddb      %[ftmp0],       %[ftmp0],         %[Ed]       \n\t"
+        "paddb      %[ftmp1],       %[ftmp1],         %[Ed]       \n\t"
+        "paddb      %[ftmp2],       %[ftmp2],         %[Ed]       \n\t"
+        "paddb      %[ftmp3],       %[ftmp3],         %[Ed]       \n\t"
+        "paddb      %[ftmp4],       %[ftmp4],         %[Ed]       \n\t"
+        "paddb      %[ftmp5],       %[ftmp5],         %[Ed]       \n\t"
+        "paddb      %[ftmp6],       %[ftmp6],         %[Ed]       \n\t"
+        "paddb      %[ftmp7],       %[ftmp7],         %[Ed]       \n\t"
+        "swc1       %[ftmp0],       0x00(%[dst])                  \n\t"
+        PTR_ADDU   "%[tmp1],        %[dst],           %[stride]   \n\t"
+        "swc1       %[ftmp1],       0x00(%[tmp1])                 \n\t"
+        PTR_ADDU   "%[tmp1],        %[tmp1],          %[stride]   \n\t"
+        "swc1       %[ftmp2],       0x00(%[tmp1])                 \n\t"
+        PTR_ADDU   "%[tmp1],        %[tmp1],          %[stride]   \n\t"
+        "swc1       %[ftmp3],       0x00(%[tmp1])                 \n\t"
+        PTR_ADDU   "%[tmp1],        %[tmp1],          %[stride]   \n\t"
+        "swc1       %[ftmp4],       0x00(%[tmp1])                 \n\t"
+        PTR_ADDU   "%[tmp1],        %[tmp1],          %[stride]   \n\t"
+        "swc1       %[ftmp5],       0x00(%[tmp1])                 \n\t"
+        PTR_ADDU   "%[tmp1],        %[tmp1],          %[stride]   \n\t"
+        "swc1       %[ftmp6],       0x00(%[tmp1])                 \n\t"
+        PTR_ADDU   "%[tmp1],        %[tmp1],          %[stride]   \n\t"
+        "swc1       %[ftmp7],       0x00(%[tmp1])                 \n\t"
+        PTR_ADDIU  "%[dst],         %[dst],           0x04        \n\t"
+        PTR_ADDIU  "%[input],       %[input],         0x40        \n\t"
+        PTR_ADDIU  "%[temp_value],  %[temp_value],    0x04        \n\t"
+        PTR_ADDIU  "%[tmp0],        %[tmp0],          -0x01       \n\t"
+        "bnez       %[tmp0],        1b                            \n\t"
+        : [dst]"+&r"(dst), [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
+          [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]),
+          [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
+          [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]),
+          [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), [mask]"=&f"(ftmp[11]),
+          [A]"=&f"(ftmp[12]), [B]"=&f"(ftmp[13]), [C]"=&f"(ftmp[14]),
+          [D]"=&f"(ftmp[15]), [Ad]"=&f"(ftmp[16]), [Bd]"=&f"(ftmp[17]),
+          [Cd]"=&f"(ftmp[18]), [Dd]"=&f"(ftmp[19]), [Ed]"=&f"(ftmp[20]),
+          [Gd]"=&f"(ftmp[21]), [input]"+&r"(input)
+        : [stride]"r"(stride), [temp_value]"r"(temp_value)
+        : "memory"
+    );
+}
+
+static void idct_column_false_mmi(uint8_t *dst, int stride, int16_t *input)
+{
+    int16_t temp_value[8];
+    double ftmp[23];
+    uint64_t tmp[2];
+    for (int i = 0; i < 8; ++i)
+        temp_value[i] = (46341 * input[i << 3] + (8 << 16)) >> 20;
+    __asm__ volatile (
+        "xor        %[ftmp10],      %[ftmp10],          %[ftmp10] \n\t"
+        "li         %[tmp0],        0x02                          \n\t"
+        "1:                                                       \n\t"
+        "ldc1       %[ftmp0],       0x00(%[input])                \n\t"
+        "ldc1       %[ftmp4],       0x08(%[input])                \n\t"
+        "ldc1       %[ftmp1],       0x10(%[input])                \n\t"
+        "ldc1       %[ftmp5],       0x18(%[input])                \n\t"
+        "ldc1       %[ftmp2],       0x20(%[input])                \n\t"
+        "ldc1       %[ftmp6],       0x28(%[input])                \n\t"
+        "ldc1       %[ftmp3],       0x30(%[input])                \n\t"
+        "ldc1       %[ftmp7],       0x38(%[input])                \n\t"
+        TRANSPOSE_4H(%[ftmp0], %[ftmp1], %[ftmp2], %[ftmp3],
+                     %[A], %[B], %[C], %[D])
+        TRANSPOSE_4H(%[ftmp4], %[ftmp5], %[ftmp6], %[ftmp7],
+                     %[A], %[B], %[C], %[D])
+        LOAD_CONST(%[ftmp8], 64277)
+        LOAD_CONST(%[ftmp9], 12785)
+        LOAD_CONST(%[Gd], 1)
+        "pmulhh     %[A],           %[ftmp9],           %[ftmp7]  \n\t"
+        "pcmpgth    %[C],           %[ftmp10],          %[ftmp1]  \n\t"
+        "or         %[mask],        %[C],               %[Gd]     \n\t"
+        "pmullh     %[B],           %[ftmp1],           %[mask]   \n\t"
+        "pmulhuh    %[B],           %[ftmp8],           %[B]      \n\t"
+        "pmullh     %[B],           %[B],               %[mask]   \n\t"
+        "paddh      %[A],           %[A],               %[B]      \n\t"
+        "paddh      %[A],           %[A],               %[C]      \n\t"
+        "pcmpgth    %[D],           %[ftmp10],          %[ftmp7]  \n\t"
+        "or         %[mask],        %[D],               %[Gd]     \n\t"
+        "pmullh     %[Ad],          %[ftmp7],           %[mask]   \n\t"
+        "pmulhuh    %[B],           %[ftmp8],           %[Ad]     \n\t"
+        "pmullh     %[B],           %[B],               %[mask]   \n\t"
+        "pmulhh     %[C],           %[ftmp9],           %[ftmp1]  \n\t"
+        "psubh      %[B],           %[C],               %[B]      \n\t"
+        "psubh      %[B],           %[B],               %[D]      \n\t"
+
+        LOAD_CONST(%[ftmp8], 54491)
+        LOAD_CONST(%[ftmp9], 36410)
+        "pcmpgth    %[Ad],          %[ftmp10],          %[ftmp5]  \n\t"
+        "or         %[mask],        %[Ad],              %[Gd]     \n\t"
+        "pmullh     %[Cd],          %[ftmp5],           %[mask]   \n\t"
+        "pmulhuh    %[C],           %[ftmp9],           %[Cd]     \n\t"
+        "pmullh     %[C],           %[C],               %[mask]   \n\t"
+        "pcmpgth    %[Bd],          %[ftmp10],          %[ftmp3]  \n\t"
+        "or         %[mask],        %[Bd],              %[Gd]     \n\t"
+        "pmullh     %[D],           %[ftmp3],           %[mask]   \n\t"
+        "pmulhuh    %[D],           %[ftmp8],           %[D]      \n\t"
+        "pmullh     %[D],           %[D],               %[mask]   \n\t"
+        "paddh      %[C],           %[C],               %[D]      \n\t"
+        "paddh      %[C],           %[C],               %[Ad]     \n\t"
+        "paddh      %[C],           %[C],               %[Bd]     \n\t"
+        "pcmpgth    %[Bd],          %[ftmp10],          %[ftmp3]  \n\t"
+        "or         %[mask],        %[Bd],              %[Gd]     \n\t"
+        "pmullh     %[Cd],          %[ftmp3],           %[mask]   \n\t"
+        "pmulhuh    %[D],           %[ftmp9],           %[Cd]     \n\t"
+        "pmullh     %[D],           %[D],               %[mask]   \n\t"
+        "pcmpgth    %[Ed],          %[ftmp10],          %[ftmp5]  \n\t"
+        "or         %[mask],        %[Ed],              %[Gd]     \n\t"
+        "pmullh     %[Ad],          %[ftmp5],           %[mask]   \n\t"
+        "pmulhuh    %[Ad],          %[ftmp8],           %[Ad]     \n\t"
+        "pmullh     %[Ad],          %[Ad],              %[mask]   \n\t"
+        "psubh      %[D],           %[Ad],              %[D]      \n\t"
+        "paddh      %[D],           %[D],               %[Ed]     \n\t"
+        "psubh      %[D],           %[D],               %[Bd]     \n\t"
+
+        LOAD_CONST(%[ftmp8], 46341)
+        "psubh      %[Ad],          %[A],             %[C]        \n\t"
+        "pcmpgth    %[Bd],          %[ftmp10],        %[Ad]       \n\t"
+        "or         %[mask],        %[Bd],            %[Gd]       \n\t"
+        "pmullh     %[Ad],          %[Ad],            %[mask]     \n\t"
+        "pmulhuh    %[Ad],          %[ftmp8],         %[Ad]       \n\t"
+        "pmullh     %[Ad],          %[Ad],            %[mask]     \n\t"
+        "paddh      %[Ad],          %[Ad],            %[Bd]       \n\t"
+        "psubh      %[Bd],          %[B],             %[D]        \n\t"
+        "pcmpgth    %[Cd],          %[ftmp10],        %[Bd]       \n\t"
+        "or         %[mask],        %[Cd],            %[Gd]       \n\t"
+        "pmullh     %[Bd],          %[Bd],            %[mask]     \n\t"
+        "pmulhuh    %[Bd],          %[ftmp8],         %[Bd]       \n\t"
+        "pmullh     %[Bd],          %[Bd],            %[mask]     \n\t"
+        "paddh      %[Bd],          %[Bd],            %[Cd]       \n\t"
+        "paddh      %[Cd],          %[A],             %[C]        \n\t"
+        "paddh      %[Dd],          %[B],             %[D]        \n\t"
+
+        LOAD_CONST(%[Ed], 8)
+        "paddh      %[A],           %[ftmp0],         %[ftmp4]    \n\t"
+        "pcmpgth    %[B],           %[ftmp10],        %[A]        \n\t"
+        "or         %[mask],        %[B],             %[Gd]       \n\t"
+        "pmullh     %[A],           %[A],             %[mask]     \n\t"
+        "pmulhuh    %[A],           %[ftmp8],         %[A]        \n\t"
+        "pmullh     %[A],           %[A],             %[mask]     \n\t"
+        "paddh      %[A],           %[A],             %[B]        \n\t"
+        "paddh      %[A],           %[A],             %[Ed]       \n\t"
+        "psubh      %[B],           %[ftmp0],         %[ftmp4]    \n\t"
+        "pcmpgth    %[C],           %[ftmp10],        %[B]        \n\t"
+        "or         %[mask],        %[C],             %[Gd]       \n\t"
+        "pmullh     %[B],           %[B],             %[mask]     \n\t"
+        "pmulhuh    %[B],           %[ftmp8],         %[B]        \n\t"
+        "pmullh     %[B],           %[B],             %[mask]     \n\t"
+        "paddh      %[B],           %[B],             %[C]        \n\t"
+        "paddh      %[B],           %[B],             %[Ed]       \n\t"
+
+        LOAD_CONST(%[ftmp8], 60547)
+        LOAD_CONST(%[ftmp9], 25080)
+        "pmulhh     %[C],           %[ftmp9],         %[ftmp6]    \n\t"
+        "pcmpgth    %[D],           %[ftmp10],        %[ftmp2]    \n\t"
+        "or         %[mask],        %[D],             %[Gd]       \n\t"
+        "pmullh     %[Ed],          %[ftmp2],         %[mask]     \n\t"
+        "pmulhuh    %[Ed],          %[ftmp8],         %[Ed]       \n\t"
+        "pmullh     %[Ed],          %[Ed],            %[mask]     \n\t"
+        "paddh      %[C],           %[C],             %[Ed]       \n\t"
+        "paddh      %[C],           %[C],             %[D]        \n\t"
+        "pcmpgth    %[Ed],          %[ftmp10],        %[ftmp6]    \n\t"
+        "or         %[mask],        %[Ed],            %[Gd]       \n\t"
+        "pmullh     %[D],           %[ftmp6],         %[mask]     \n\t"
+        "pmulhuh    %[D],           %[ftmp8],         %[D]        \n\t"
+        "pmullh     %[D],           %[D],             %[mask]     \n\t"
+        "pmulhh     %[Gd],          %[ftmp9],         %[ftmp2]    \n\t"
+        "psubh      %[D],           %[Gd],            %[D]        \n\t"
+        "psubh      %[D],           %[D],             %[Ed]       \n\t"
+        "psubh      %[Ed],          %[A],             %[C]        \n\t"
+        "paddh      %[Gd],          %[A],             %[C]        \n\t"
+        "paddh      %[A],           %[B],             %[Ad]       \n\t"
+        "psubh      %[C],           %[B],             %[Ad]       \n\t"
+        "psubh      %[B],           %[Bd],            %[D]        \n\t"
+        "paddh      %[D],           %[Bd],            %[D]        \n\t"
+        "or         %[mask],        %[ftmp1],         %[ftmp2]    \n\t"
+        "or         %[mask],        %[mask],          %[ftmp3]    \n\t"
+        "or         %[mask],        %[mask],          %[ftmp4]    \n\t"
+        "or         %[mask],        %[mask],          %[ftmp5]    \n\t"
+        "or         %[mask],        %[mask],          %[ftmp6]    \n\t"
+        "or         %[mask],        %[mask],          %[ftmp7]    \n\t"
+        "pcmpeqh    %[mask],        %[mask],          %[ftmp10]   \n\t"
+        "li         %[tmp1],        0x04                          \n\t"
+        "dmtc1      %[tmp1],        %[ftmp8]                      \n\t"
+        "paddh      %[ftmp0],       %[Gd],            %[Cd]       \n\t"
+        "psrah      %[ftmp0],       %[ftmp0],         %[ftmp8]    \n\t"
+        "paddh      %[ftmp1],       %[A],             %[D]        \n\t"
+        "psrah      %[ftmp1],       %[ftmp1],         %[ftmp8]    \n\t"
+        "psubh      %[ftmp2],       %[A],             %[D]        \n\t"
+        "psrah      %[ftmp2],       %[ftmp2],         %[ftmp8]    \n\t"
+        "paddh      %[ftmp3],       %[Ed],            %[Dd]       \n\t"
+        "psrah      %[ftmp3],       %[ftmp3],         %[ftmp8]    \n\t"
+        "psubh      %[ftmp4],       %[Ed],            %[Dd]       \n\t"
+        "psrah      %[ftmp4],       %[ftmp4],         %[ftmp8]    \n\t"
+        "paddh      %[ftmp5],       %[C],             %[B]        \n\t"
+        "psrah      %[ftmp5],       %[ftmp5],         %[ftmp8]    \n\t"
+        "psubh      %[ftmp6],       %[C],             %[B]        \n\t"
+        "psrah      %[ftmp6],       %[ftmp6],         %[ftmp8]    \n\t"
+        "psubh      %[ftmp7],       %[Gd],            %[Cd]       \n\t"
+        "psrah      %[ftmp7],       %[ftmp7],         %[ftmp8]    \n\t"
+
+        /* Load from dst */
+        "lwc1       %[A],           0x00(%[dst])                  \n\t"
+        PTR_ADDU   "%[tmp1],        %[dst],           %[stride]   \n\t"
+        "lwc1       %[B],           0x00(%[tmp1])                 \n\t"
+        PTR_ADDU   "%[tmp1],        %[tmp1],          %[stride]   \n\t"
+        "lwc1       %[C],           0x00(%[tmp1])                 \n\t"
+        PTR_ADDU   "%[tmp1],        %[tmp1],          %[stride]   \n\t"
+        "lwc1       %[D],           0x00(%[tmp1])                 \n\t"
+        PTR_ADDU   "%[tmp1],        %[tmp1],          %[stride]   \n\t"
+        "lwc1       %[Ad],          0x00(%[tmp1])                 \n\t"
+        PTR_ADDU   "%[tmp1],        %[tmp1],          %[stride]   \n\t"
+        "lwc1       %[Bd],          0x00(%[tmp1])                 \n\t"
+        PTR_ADDU   "%[tmp1],        %[tmp1],          %[stride]   \n\t"
+        "lwc1       %[Cd],          0x00(%[tmp1])                 \n\t"
+        PTR_ADDU   "%[tmp1],        %[tmp1],          %[stride]   \n\t"
+        "lwc1       %[Dd],          0x00(%[tmp1])                 \n\t"
+        "punpcklbh  %[A],           %[A],             %[ftmp10]   \n\t"
+        "punpcklbh  %[B],           %[B],             %[ftmp10]   \n\t"
+        "punpcklbh  %[C],           %[C],             %[ftmp10]   \n\t"
+        "punpcklbh  %[D],           %[D],             %[ftmp10]   \n\t"
+        "punpcklbh  %[Ad],          %[Ad],            %[ftmp10]   \n\t"
+        "punpcklbh  %[Bd],          %[Bd],            %[ftmp10]   \n\t"
+        "punpcklbh  %[Cd],          %[Cd],            %[ftmp10]   \n\t"
+        "punpcklbh  %[Dd],          %[Dd],            %[ftmp10]   \n\t"
+        "ldc1       %[Ed],          0x00(%[temp_value])           \n\t"
+        "and        %[Ed],          %[Ed],            %[mask]     \n\t"
+        "nor        %[mask],        %[mask],          %[mask]     \n\t"
+        "and        %[ftmp0],       %[ftmp0],         %[mask]     \n\t"
+        "and        %[ftmp1],       %[ftmp1],         %[mask]     \n\t"
+        "and        %[ftmp2],       %[ftmp2],         %[mask]     \n\t"
+        "and        %[ftmp3],       %[ftmp3],         %[mask]     \n\t"
+        "and        %[ftmp4],       %[ftmp4],         %[mask]     \n\t"
+        "and        %[ftmp5],       %[ftmp5],         %[mask]     \n\t"
+        "and        %[ftmp6],       %[ftmp6],         %[mask]     \n\t"
+        "and        %[ftmp7],       %[ftmp7],         %[mask]     \n\t"
+        "paddh      %[ftmp0],       %[ftmp0],         %[A]        \n\t"
+        "paddh      %[ftmp1],       %[ftmp1],         %[B]        \n\t"
+        "paddh      %[ftmp2],       %[ftmp2],         %[C]        \n\t"
+        "paddh      %[ftmp3],       %[ftmp3],         %[D]        \n\t"
+        "paddh      %[ftmp4],       %[ftmp4],         %[Ad]       \n\t"
+        "paddh      %[ftmp5],       %[ftmp5],         %[Bd]       \n\t"
+        "paddh      %[ftmp6],       %[ftmp6],         %[Cd]       \n\t"
+        "paddh      %[ftmp7],       %[ftmp7],         %[Dd]       \n\t"
+        "paddh      %[ftmp0],       %[ftmp0],         %[Ed]       \n\t"
+        "paddh      %[ftmp1],       %[ftmp1],         %[Ed]       \n\t"
+        "paddh      %[ftmp2],       %[ftmp2],         %[Ed]       \n\t"
+        "paddh      %[ftmp3],       %[ftmp3],         %[Ed]       \n\t"
+        "paddh      %[ftmp4],       %[ftmp4],         %[Ed]       \n\t"
+        "paddh      %[ftmp5],       %[ftmp5],         %[Ed]       \n\t"
+        "paddh      %[ftmp6],       %[ftmp6],         %[Ed]       \n\t"
+        "paddh      %[ftmp7],       %[ftmp7],         %[Ed]       \n\t"
+        "pmaxsh     %[ftmp0],       %[ftmp0],         %[ftmp10]   \n\t"
+        "packushb   %[ftmp0],       %[ftmp0],         %[ftmp10]   \n\t"
+        "pmaxsh     %[ftmp1],       %[ftmp1],         %[ftmp10]   \n\t"
+        "packushb   %[ftmp1],       %[ftmp1],         %[ftmp10]   \n\t"
+        "pmaxsh     %[ftmp2],       %[ftmp2],         %[ftmp10]   \n\t"
+        "packushb   %[ftmp2],       %[ftmp2],         %[ftmp10]   \n\t"
+        "pmaxsh     %[ftmp3],       %[ftmp3],         %[ftmp10]   \n\t"
+        "packushb   %[ftmp3],       %[ftmp3],         %[ftmp10]   \n\t"
+        "pmaxsh     %[ftmp4],       %[ftmp4],         %[ftmp10]   \n\t"
+        "packushb   %[ftmp4],       %[ftmp4],         %[ftmp10]   \n\t"
+        "pmaxsh     %[ftmp5],       %[ftmp5],         %[ftmp10]   \n\t"
+        "packushb   %[ftmp5],       %[ftmp5],         %[ftmp10]   \n\t"
+        "pmaxsh     %[ftmp6],       %[ftmp6],         %[ftmp10]   \n\t"
+        "packushb   %[ftmp6],       %[ftmp6],         %[ftmp10]   \n\t"
+        "pmaxsh     %[ftmp7],       %[ftmp7],         %[ftmp10]   \n\t"
+        "packushb   %[ftmp7],       %[ftmp7],         %[ftmp10]   \n\t"
+        "swc1       %[ftmp0],       0x00(%[dst])                  \n\t"
+        PTR_ADDU   "%[tmp1],        %[dst],           %[stride]   \n\t"
+        "swc1       %[ftmp1],       0x00(%[tmp1])                 \n\t"
+        PTR_ADDU   "%[tmp1],        %[tmp1],          %[stride]   \n\t"
+        "swc1       %[ftmp2],       0x00(%[tmp1])                 \n\t"
+        PTR_ADDU   "%[tmp1],        %[tmp1],          %[stride]   \n\t"
+        "swc1       %[ftmp3],       0x00(%[tmp1])                 \n\t"
+        PTR_ADDU   "%[tmp1],        %[tmp1],          %[stride]   \n\t"
+        "swc1       %[ftmp4],       0x00(%[tmp1])                 \n\t"
+        PTR_ADDU   "%[tmp1],        %[tmp1],          %[stride]   \n\t"
+        "swc1       %[ftmp5],       0x00(%[tmp1])                 \n\t"
+        PTR_ADDU   "%[tmp1],        %[tmp1],          %[stride]   \n\t"
+        "swc1       %[ftmp6],       0x00(%[tmp1])                 \n\t"
+        PTR_ADDU   "%[tmp1],        %[tmp1],          %[stride]   \n\t"
+        "swc1       %[ftmp7],       0x00(%[tmp1])                 \n\t"
+        PTR_ADDIU  "%[dst],         %[dst],           0x04        \n\t"
+        PTR_ADDIU  "%[input],       %[input],         0x40        \n\t"
+        PTR_ADDIU  "%[temp_value],  %[temp_value],    0x08        \n\t"
+        PTR_ADDIU  "%[tmp0],        %[tmp0],          -0x01       \n\t"
+        "bnez       %[tmp0],        1b                            \n\t"
+        : [dst]"+&r"(dst), [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
+          [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]),
+          [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
+          [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]),
+          [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), [mask]"=&f"(ftmp[11]),
+          [A]"=&f"(ftmp[12]), [B]"=&f"(ftmp[13]), [C]"=&f"(ftmp[14]),
+          [D]"=&f"(ftmp[15]), [Ad]"=&f"(ftmp[16]), [Bd]"=&f"(ftmp[17]),
+          [Cd]"=&f"(ftmp[18]), [Dd]"=&f"(ftmp[19]), [Ed]"=&f"(ftmp[20]),
+          [Gd]"=&f"(ftmp[21]), [input]"+&r"(input)
+        : [stride]"r"(stride), [temp_value]"r"(temp_value)
+        : "memory"
+    );
+}
+static void idct_mmi(uint8_t *dst, int stride, int16_t *input, int type)
+{
+    idct_row_mmi(input);
+    if (type == 1)
+        idct_column_true_mmi(dst, stride, input);
+    else
+        idct_column_false_mmi(dst, stride, input);
+}
+
+void ff_vp3_idct_put_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
+{
+    idct_mmi(dest, line_size, block, 1);
+    memset(block, 0, sizeof(*block) << 6);
+}
+
+void ff_vp3_idct_add_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
+{
+    idct_mmi(dest, line_size, block, 2);
+    memset(block, 0, sizeof(*block) << 6);
+}
+void ff_vp3_idct_dc_add_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
+{
+    int dc = (block[0] + 15) >> 5;
+
+    double ftmp[7];
+    uint64_t tmp;
+    __asm__ volatile (
+        "xor        %[ftmp0],     %[ftmp0],           %[ftmp0]      \n\t"
+        "mtc1       %[dc],        %[ftmp5]                          \n\t"
+        "pshufh     %[ftmp5],     %[ftmp5],           %[ftmp0]      \n\t"
+        "li         %[tmp0],      0x08                              \n\t"
+        "1:                                                         \n\t"
+        "ldc1       %[ftmp1],     0x00(%[dest])                     \n\t"
+        "punpcklbh  %[ftmp2],     %[ftmp1],           %[ftmp0]      \n\t"
+        "punpckhbh  %[ftmp3],     %[ftmp1],           %[ftmp0]      \n\t"
+        "paddh      %[ftmp4],     %[ftmp2],           %[ftmp5]      \n\t"
+        "paddh      %[ftmp6],     %[ftmp3],           %[ftmp5]      \n\t"
+        "packushb   %[ftmp4],     %[ftmp4],           %[ftmp0]      \n\t"
+        "packushb   %[ftmp6],     %[ftmp6],           %[ftmp0]      \n\t"
+        "swc1       %[ftmp4],     0x00(%[dest])                     \n\t"
+        "swc1       %[ftmp6],     0x04(%[dest])                     \n\t"
+        PTR_ADDU   "%[dest],      %[dest],            %[line_size]  \n\t"
+        PTR_ADDIU  "%[tmp0],      %[tmp0],            -0x01         \n\t"
+        "bnez       %[tmp0],      1b                                \n\t"
+        : [dest]"+&r"(dest), [block]"+&r"(block), [tmp0]"=&r"(tmp),
+          [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]),
+          [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
+          [ftmp6]"=&f"(ftmp[6])
+        : [line_size]"r"(line_size), [dc]"r"(dc)
+        : "memory"
+    );
+    block[0] = 0;
+}
+
+void ff_put_no_rnd_pixels_l2_mmi(uint8_t *dst, const uint8_t *src1,
+                                 const uint8_t *src2, ptrdiff_t stride, int h)
+{
+    if (h == 8) {
+        double ftmp[6];
+        uint64_t tmp[2];
+        __asm__ volatile (
+            "li          %[tmp0],        0x08                            \n\t"
+            "li          %[tmp1],        0xfefefefe                      \n\t"
+            "dmtc1       %[tmp1],        %[ftmp4]                        \n\t"
+            "punpcklwd   %[ftmp4],       %[ftmp4],             %[ftmp4]  \n\t"
+            "li          %[tmp1],        0x01                            \n\t"
+            "dmtc1       %[tmp1],        %[ftmp5]                        \n\t"
+            "1:                                                          \n\t"
+            "gsldlc1     %[ftmp1],       0x07(%[src1])                   \n\t"
+            "gsldrc1     %[ftmp1],       0x00(%[src1])                   \n\t"
+            "gsldlc1     %[ftmp2],       0x07(%[src2])                   \n\t"
+            "gsldrc1     %[ftmp2],       0x00(%[src2])                   \n\t"
+            "xor         %[ftmp3],       %[ftmp1],             %[ftmp2]  \n\t"
+            "and         %[ftmp3],       %[ftmp3],             %[ftmp4]  \n\t"
+            "psrlw       %[ftmp3],       %[ftmp3],             %[ftmp5]  \n\t"
+            "and         %[ftmp6],       %[ftmp1],             %[ftmp2]  \n\t"
+            "paddw       %[ftmp3],       %[ftmp3],             %[ftmp6]  \n\t"
+            "sdc1        %[ftmp3],       0x00(%[dst])                    \n\t"
+            PTR_ADDU    "%[src1],        %[src1],              %[stride] \n\t"
+            PTR_ADDU    "%[src2],        %[src2],              %[stride] \n\t"
+            PTR_ADDU    "%[dst],         %[dst],               %[stride] \n\t"
+            PTR_ADDIU   "%[tmp0],        %[tmp0],              -0x01     \n\t"
+            "bnez        %[tmp0],        1b                              \n\t"
+            : [dst]"+&r"(dst), [src1]"+&r"(src1), [src2]"+&r"(src2),
+              [ftmp1]"=&f"(ftmp[0]), [ftmp2]"=&f"(ftmp[1]), [ftmp3]"=&f"(ftmp[2]),
+              [ftmp4]"=&f"(ftmp[3]), [ftmp5]"=&f"(ftmp[4]), [ftmp6]"=&f"(ftmp[5]),
+              [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1])
+            : [stride]"r"(stride)
+            : "memory"
+        );
+    } else {
+        int i;
+
+        for (i = 0; i < h; i++) {
+            uint32_t a, b;
+
+            a = AV_RN32(&src1[i * stride]);
+            b = AV_RN32(&src2[i * stride]);
+            AV_WN32A(&dst[i * stride], no_rnd_avg32(a, b));
+            a = AV_RN32(&src1[i * stride + 4]);
+            b = AV_RN32(&src2[i * stride + 4]);
+            AV_WN32A(&dst[i * stride + 4], no_rnd_avg32(a, b));
+        }
+    }
+}
diff --git a/libavcodec/mips/vp3dsp_idct_msa.c b/libavcodec/mips/vp3dsp_idct_msa.c
new file mode 100644
index 0000000000000..5427ac559bcdf
--- /dev/null
+++ b/libavcodec/mips/vp3dsp_idct_msa.c
@@ -0,0 +1,662 @@
+/*
+ * Copyright (c) 2018 gxw <guxiwei-hf@loongson.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "vp3dsp_mips.h"
+#include "libavutil/mips/generic_macros_msa.h"
+#include "libavutil/intreadwrite.h"
+#include "libavcodec/rnd_avg.h"
+
+static void idct_msa(uint8_t *dst, int stride, int16_t *input, int type)
+{
+    v8i16 r0, r1, r2, r3, r4, r5, r6, r7, sign;
+    v4i32 r0_r, r0_l, r1_r, r1_l, r2_r, r2_l, r3_r, r3_l,
+          r4_r, r4_l, r5_r, r5_l, r6_r, r6_l, r7_r, r7_l;
+    v4i32 A, B, C, D, Ad, Bd, Cd, Dd, E, F, G, H;
+    v4i32 Ed, Gd, Add, Bdd, Fd, Hd;
+    v16u8 sign_l;
+    v16i8 d0, d1, d2, d3, d4, d5, d6, d7;
+    v4i32 c0, c1, c2, c3, c4, c5, c6, c7;
+    v4i32 f0, f1, f2, f3, f4, f5, f6, f7;
+    v4i32 sign_t;
+    v16i8 zero = {0};
+    v16i8 mask = {0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0};
+    v4i32 cnst64277w = {64277, 64277, 64277, 64277};
+    v4i32 cnst60547w = {60547, 60547, 60547, 60547};
+    v4i32 cnst54491w = {54491, 54491, 54491, 54491};
+    v4i32 cnst46341w = {46341, 46341, 46341, 46341};
+    v4i32 cnst36410w = {36410, 36410, 36410, 36410};
+    v4i32 cnst25080w = {25080, 25080, 25080, 25080};
+    v4i32 cnst12785w = {12785, 12785, 12785, 12785};
+    v4i32 cnst8w = {8, 8, 8, 8};
+    v4i32 cnst2048w = {2048, 2048, 2048, 2048};
+    v4i32 cnst128w = {128, 128, 128, 128};
+    int nstride = stride;
+
+    /* Extended input data */
+    LD_SH8(input, 8, r0, r1, r2, r3, r4, r5, r6, r7);
+    sign = __msa_clti_s_h(r0, 0);
+    r0_r = (v4i32) __msa_ilvr_h(sign, r0);
+    r0_l = (v4i32) __msa_ilvl_h(sign, r0);
+    sign = __msa_clti_s_h(r1, 0);
+    r1_r = (v4i32) __msa_ilvr_h(sign, r1);
+    r1_l = (v4i32) __msa_ilvl_h(sign, r1);
+    sign = __msa_clti_s_h(r2, 0);
+    r2_r = (v4i32) __msa_ilvr_h(sign, r2);
+    r2_l = (v4i32) __msa_ilvl_h(sign, r2);
+    sign = __msa_clti_s_h(r3, 0);
+    r3_r = (v4i32) __msa_ilvr_h(sign, r3);
+    r3_l = (v4i32) __msa_ilvl_h(sign, r3);
+    sign = __msa_clti_s_h(r4, 0);
+    r4_r = (v4i32) __msa_ilvr_h(sign, r4);
+    r4_l = (v4i32) __msa_ilvl_h(sign, r4);
+    sign = __msa_clti_s_h(r5, 0);
+    r5_r = (v4i32) __msa_ilvr_h(sign, r5);
+    r5_l = (v4i32) __msa_ilvl_h(sign, r5);
+    sign = __msa_clti_s_h(r6, 0);
+    r6_r = (v4i32) __msa_ilvr_h(sign, r6);
+    r6_l = (v4i32) __msa_ilvl_h(sign, r6);
+    sign = __msa_clti_s_h(r7, 0);
+    r7_r = (v4i32) __msa_ilvr_h(sign, r7);
+    r7_l = (v4i32) __msa_ilvl_h(sign, r7);
+
+    /* Right part */
+    A = ((r1_r * cnst64277w) >> 16) + ((r7_r * cnst12785w) >> 16);
+    B = ((r1_r * cnst12785w) >> 16) - ((r7_r * cnst64277w) >> 16);
+    C = ((r3_r * cnst54491w) >> 16) + ((r5_r * cnst36410w) >> 16);
+    D = ((r5_r * cnst54491w) >> 16) - ((r3_r * cnst36410w) >> 16);
+    Ad = ((A - C) * cnst46341w) >> 16;
+    Bd = ((B - D) * cnst46341w) >> 16;
+    Cd = A + C;
+    Dd = B + D;
+    E = ((r0_r + r4_r) * cnst46341w) >> 16;
+    F = ((r0_r - r4_r) * cnst46341w) >> 16;
+    G = ((r2_r * cnst60547w) >> 16) + ((r6_r * cnst25080w) >> 16);
+    H = ((r2_r * cnst25080w) >> 16) - ((r6_r * cnst60547w) >> 16);
+    Ed = E - G;
+    Gd = E + G;
+    Add = F + Ad;
+    Bdd = Bd - H;
+    Fd = F - Ad;
+    Hd = Bd + H;
+    r0_r = Gd + Cd;
+    r7_r = Gd - Cd;
+    r1_r = Add + Hd;
+    r2_r = Add - Hd;
+    r3_r = Ed + Dd;
+    r4_r = Ed - Dd;
+    r5_r = Fd + Bdd;
+    r6_r = Fd - Bdd;
+
+    /* Left part */
+    A = ((r1_l * cnst64277w) >> 16) + ((r7_l * cnst12785w) >> 16);
+    B = ((r1_l * cnst12785w) >> 16) - ((r7_l * cnst64277w) >> 16);
+    C = ((r3_l * cnst54491w) >> 16) + ((r5_l * cnst36410w) >> 16);
+    D = ((r5_l * cnst54491w) >> 16) - ((r3_l * cnst36410w) >> 16);
+    Ad = ((A - C) * cnst46341w) >> 16;
+    Bd = ((B - D) * cnst46341w) >> 16;
+    Cd = A + C;
+    Dd = B + D;
+    E = ((r0_l + r4_l) * cnst46341w) >> 16;
+    F = ((r0_l - r4_l) * cnst46341w) >> 16;
+    G = ((r2_l * cnst60547w) >> 16) + ((r6_l * cnst25080w) >> 16);
+    H = ((r2_l * cnst25080w) >> 16) - ((r6_l * cnst60547w) >> 16);
+    Ed = E - G;
+    Gd = E + G;
+    Add = F + Ad;
+    Bdd = Bd - H;
+    Fd = F - Ad;
+    Hd = Bd + H;
+    r0_l = Gd + Cd;
+    r7_l = Gd - Cd;
+    r1_l = Add + Hd;
+    r2_l = Add - Hd;
+    r3_l = Ed + Dd;
+    r4_l = Ed - Dd;
+    r5_l = Fd + Bdd;
+    r6_l = Fd - Bdd;
+
+    /* Row 0 to 3 */
+    TRANSPOSE4x4_SW_SW(r0_r, r1_r, r2_r, r3_r,
+                       r0_r, r1_r, r2_r, r3_r);
+    TRANSPOSE4x4_SW_SW(r0_l, r1_l, r2_l, r3_l,
+                       r0_l, r1_l, r2_l, r3_l);
+    A = ((r1_r * cnst64277w) >> 16) + ((r3_l * cnst12785w) >> 16);
+    B = ((r1_r * cnst12785w) >> 16) - ((r3_l * cnst64277w) >> 16);
+    C = ((r3_r * cnst54491w) >> 16) + ((r1_l * cnst36410w) >> 16);
+    D = ((r1_l * cnst54491w) >> 16) - ((r3_r * cnst36410w) >> 16);
+    Ad = ((A - C) * cnst46341w) >> 16;
+    Bd = ((B - D) * cnst46341w) >> 16;
+    Cd = A + C;
+    Dd = B + D;
+    E = ((r0_r + r0_l) * cnst46341w) >> 16;
+    E += cnst8w;
+    F = ((r0_r - r0_l) * cnst46341w) >> 16;
+    F += cnst8w;
+    if (type == 1) { // HACK
+        E += cnst2048w;
+        F += cnst2048w;
+    }
+    G = ((r2_r * cnst60547w) >> 16) + ((r2_l * cnst25080w) >> 16);
+    H = ((r2_r * cnst25080w) >> 16) - ((r2_l * cnst60547w) >> 16);
+    Ed = E - G;
+    Gd = E + G;
+    Add = F + Ad;
+    Bdd = Bd - H;
+    Fd = F - Ad;
+    Hd = Bd + H;
+    A = (Gd + Cd) >> 4;
+    B = (Gd - Cd) >> 4;
+    C = (Add + Hd) >> 4;
+    D = (Add - Hd) >> 4;
+    E = (Ed + Dd) >> 4;
+    F = (Ed - Dd) >> 4;
+    G = (Fd + Bdd) >> 4;
+    H = (Fd - Bdd) >> 4;
+    if (type != 1) {
+        LD_SB8(dst, stride, d0, d1, d2, d3, d4, d5, d6, d7);
+        ILVR_B4_SW(zero, d0, zero, d1, zero, d2, zero, d3,
+                   f0, f1, f2, f3);
+        ILVR_B4_SW(zero, d4, zero, d5, zero, d6, zero, d7,
+                   f4, f5, f6, f7);
+        ILVR_H4_SW(zero, f0, zero, f1, zero, f2, zero, f3,
+                   c0, c1, c2, c3);
+        ILVR_H4_SW(zero, f4, zero, f5, zero, f6, zero, f7,
+                   c4, c5, c6, c7);
+        A += c0;
+        B += c7;
+        C += c1;
+        D += c2;
+        E += c3;
+        F += c4;
+        G += c5;
+        H += c6;
+    }
+    A = CLIP_SW_0_255(A);
+    B = CLIP_SW_0_255(B);
+    C = CLIP_SW_0_255(C);
+    D = CLIP_SW_0_255(D);
+    E = CLIP_SW_0_255(E);
+    F = CLIP_SW_0_255(F);
+    G = CLIP_SW_0_255(G);
+    H = CLIP_SW_0_255(H);
+    sign_l = __msa_or_v((v16u8)r1_r, (v16u8)r2_r);
+    sign_l = __msa_or_v(sign_l, (v16u8)r3_r);
+    sign_l = __msa_or_v(sign_l, (v16u8)r0_l);
+    sign_l = __msa_or_v(sign_l, (v16u8)r1_l);
+    sign_l = __msa_or_v(sign_l, (v16u8)r2_l);
+    sign_l = __msa_or_v(sign_l, (v16u8)r3_l);
+    sign_t = __msa_ceqi_w((v4i32)sign_l, 0);
+    Add = ((r0_r * cnst46341w) + (8 << 16)) >> 20;
+    if (type == 1) {
+        Bdd = Add + cnst128w;
+        Bdd = CLIP_SW_0_255(Bdd);
+        Ad = Bdd;
+        Bd = Bdd;
+        Cd = Bdd;
+        Dd = Bdd;
+        Ed = Bdd;
+        Fd = Bdd;
+        Gd = Bdd;
+        Hd = Bdd;
+    } else {
+        Ad = Add + c0;
+        Bd = Add + c1;
+        Cd = Add + c2;
+        Dd = Add + c3;
+        Ed = Add + c4;
+        Fd = Add + c5;
+        Gd = Add + c6;
+        Hd = Add + c7;
+        Ad = CLIP_SW_0_255(Ad);
+        Bd = CLIP_SW_0_255(Bd);
+        Cd = CLIP_SW_0_255(Cd);
+        Dd = CLIP_SW_0_255(Dd);
+        Ed = CLIP_SW_0_255(Ed);
+        Fd = CLIP_SW_0_255(Fd);
+        Gd = CLIP_SW_0_255(Gd);
+        Hd = CLIP_SW_0_255(Hd);
+    }
+    Ad = (v4i32)__msa_and_v((v16u8)Ad, (v16u8)sign_t);
+    Bd = (v4i32)__msa_and_v((v16u8)Bd, (v16u8)sign_t);
+    Cd = (v4i32)__msa_and_v((v16u8)Cd, (v16u8)sign_t);
+    Dd = (v4i32)__msa_and_v((v16u8)Dd, (v16u8)sign_t);
+    Ed = (v4i32)__msa_and_v((v16u8)Ed, (v16u8)sign_t);
+    Fd = (v4i32)__msa_and_v((v16u8)Fd, (v16u8)sign_t);
+    Gd = (v4i32)__msa_and_v((v16u8)Gd, (v16u8)sign_t);
+    Hd = (v4i32)__msa_and_v((v16u8)Hd, (v16u8)sign_t);
+    sign_t = __msa_ceqi_w(sign_t, 0);
+    A = (v4i32)__msa_and_v((v16u8)A, (v16u8)sign_t);
+    B = (v4i32)__msa_and_v((v16u8)B, (v16u8)sign_t);
+    C = (v4i32)__msa_and_v((v16u8)C, (v16u8)sign_t);
+    D = (v4i32)__msa_and_v((v16u8)D, (v16u8)sign_t);
+    E = (v4i32)__msa_and_v((v16u8)E, (v16u8)sign_t);
+    F = (v4i32)__msa_and_v((v16u8)F, (v16u8)sign_t);
+    G = (v4i32)__msa_and_v((v16u8)G, (v16u8)sign_t);
+    H = (v4i32)__msa_and_v((v16u8)H, (v16u8)sign_t);
+    r0_r = Ad + A;
+    r1_r = Bd + C;
+    r2_r = Cd + D;
+    r3_r = Dd + E;
+    r0_l = Ed + F;
+    r1_l = Fd + G;
+    r2_l = Gd + H;
+    r3_l = Hd + B;
+
+    /* Row 4 to 7 */
+    TRANSPOSE4x4_SW_SW(r4_r, r5_r, r6_r, r7_r,
+                       r4_r, r5_r, r6_r, r7_r);
+    TRANSPOSE4x4_SW_SW(r4_l, r5_l, r6_l, r7_l,
+                       r4_l, r5_l, r6_l, r7_l);
+    A = ((r5_r * cnst64277w) >> 16) + ((r7_l * cnst12785w) >> 16);
+    B = ((r5_r * cnst12785w) >> 16) - ((r7_l * cnst64277w) >> 16);
+    C = ((r7_r * cnst54491w) >> 16) + ((r5_l * cnst36410w) >> 16);
+    D = ((r5_l * cnst54491w) >> 16) - ((r7_r * cnst36410w) >> 16);
+    Ad = ((A - C) * cnst46341w) >> 16;
+    Bd = ((B - D) * cnst46341w) >> 16;
+    Cd = A + C;
+    Dd = B + D;
+    E = ((r4_r + r4_l) * cnst46341w) >> 16;
+    E += cnst8w;
+    F = ((r4_r - r4_l) * cnst46341w) >> 16;
+    F += cnst8w;
+    if (type == 1) { // HACK
+        E += cnst2048w;
+        F += cnst2048w;
+    }
+    G = ((r6_r * cnst60547w) >> 16) + ((r6_l * cnst25080w) >> 16);
+    H = ((r6_r * cnst25080w) >> 16) - ((r6_l * cnst60547w) >> 16);
+    Ed = E - G;
+    Gd = E + G;
+    Add = F + Ad;
+    Bdd = Bd - H;
+    Fd = F - Ad;
+    Hd = Bd + H;
+    A = (Gd + Cd) >> 4;
+    B = (Gd - Cd) >> 4;
+    C = (Add + Hd) >> 4;
+    D = (Add - Hd) >> 4;
+    E = (Ed + Dd) >> 4;
+    F = (Ed - Dd) >> 4;
+    G = (Fd + Bdd) >> 4;
+    H = (Fd - Bdd) >> 4;
+    if (type != 1) {
+        ILVL_H4_SW(zero, f0, zero, f1, zero, f2, zero, f3,
+                   c0, c1, c2, c3);
+        ILVL_H4_SW(zero, f4, zero, f5, zero, f6, zero, f7,
+                   c4, c5, c6, c7);
+        A += c0;
+        B += c7;
+        C += c1;
+        D += c2;
+        E += c3;
+        F += c4;
+        G += c5;
+        H += c6;
+    }
+    A = CLIP_SW_0_255(A);
+    B = CLIP_SW_0_255(B);
+    C = CLIP_SW_0_255(C);
+    D = CLIP_SW_0_255(D);
+    E = CLIP_SW_0_255(E);
+    F = CLIP_SW_0_255(F);
+    G = CLIP_SW_0_255(G);
+    H = CLIP_SW_0_255(H);
+    sign_l = __msa_or_v((v16u8)r5_r, (v16u8)r6_r);
+    sign_l = __msa_or_v(sign_l, (v16u8)r7_r);
+    sign_l = __msa_or_v(sign_l, (v16u8)r4_l);
+    sign_l = __msa_or_v(sign_l, (v16u8)r5_l);
+    sign_l = __msa_or_v(sign_l, (v16u8)r6_l);
+    sign_l = __msa_or_v(sign_l, (v16u8)r7_l);
+    sign_t = __msa_ceqi_w((v4i32)sign_l, 0);
+    Add = ((r4_r * cnst46341w) + (8 << 16)) >> 20;
+    if (type == 1) {
+        Bdd = Add + cnst128w;
+        Bdd = CLIP_SW_0_255(Bdd);
+        Ad = Bdd;
+        Bd = Bdd;
+        Cd = Bdd;
+        Dd = Bdd;
+        Ed = Bdd;
+        Fd = Bdd;
+        Gd = Bdd;
+        Hd = Bdd;
+    } else {
+        Ad = Add + c0;
+        Bd = Add + c1;
+        Cd = Add + c2;
+        Dd = Add + c3;
+        Ed = Add + c4;
+        Fd = Add + c5;
+        Gd = Add + c6;
+        Hd = Add + c7;
+        Ad = CLIP_SW_0_255(Ad);
+        Bd = CLIP_SW_0_255(Bd);
+        Cd = CLIP_SW_0_255(Cd);
+        Dd = CLIP_SW_0_255(Dd);
+        Ed = CLIP_SW_0_255(Ed);
+        Fd = CLIP_SW_0_255(Fd);
+        Gd = CLIP_SW_0_255(Gd);
+        Hd = CLIP_SW_0_255(Hd);
+    }
+    Ad = (v4i32)__msa_and_v((v16u8)Ad, (v16u8)sign_t);
+    Bd = (v4i32)__msa_and_v((v16u8)Bd, (v16u8)sign_t);
+    Cd = (v4i32)__msa_and_v((v16u8)Cd, (v16u8)sign_t);
+    Dd = (v4i32)__msa_and_v((v16u8)Dd, (v16u8)sign_t);
+    Ed = (v4i32)__msa_and_v((v16u8)Ed, (v16u8)sign_t);
+    Fd = (v4i32)__msa_and_v((v16u8)Fd, (v16u8)sign_t);
+    Gd = (v4i32)__msa_and_v((v16u8)Gd, (v16u8)sign_t);
+    Hd = (v4i32)__msa_and_v((v16u8)Hd, (v16u8)sign_t);
+    sign_t = __msa_ceqi_w(sign_t, 0);
+    A = (v4i32)__msa_and_v((v16u8)A, (v16u8)sign_t);
+    B = (v4i32)__msa_and_v((v16u8)B, (v16u8)sign_t);
+    C = (v4i32)__msa_and_v((v16u8)C, (v16u8)sign_t);
+    D = (v4i32)__msa_and_v((v16u8)D, (v16u8)sign_t);
+    E = (v4i32)__msa_and_v((v16u8)E, (v16u8)sign_t);
+    F = (v4i32)__msa_and_v((v16u8)F, (v16u8)sign_t);
+    G = (v4i32)__msa_and_v((v16u8)G, (v16u8)sign_t);
+    H = (v4i32)__msa_and_v((v16u8)H, (v16u8)sign_t);
+    r4_r = Ad + A;
+    r5_r = Bd + C;
+    r6_r = Cd + D;
+    r7_r = Dd + E;
+    r4_l = Ed + F;
+    r5_l = Fd + G;
+    r6_l = Gd + H;
+    r7_l = Hd + B;
+    VSHF_B2_SB(r0_r, r4_r, r1_r, r5_r, mask, mask, d0, d1);
+    VSHF_B2_SB(r2_r, r6_r, r3_r, r7_r, mask, mask, d2, d3);
+    VSHF_B2_SB(r0_l, r4_l, r1_l, r5_l, mask, mask, d4, d5);
+    VSHF_B2_SB(r2_l, r6_l, r3_l, r7_l, mask, mask, d6, d7);
+
+    /* Final sequence of operations over-write original dst */
+    ST8x1_UB(d0, dst);
+    ST8x1_UB(d1, dst + nstride);
+    nstride += stride;
+    ST8x1_UB(d2, dst + nstride);
+    nstride += stride;
+    ST8x1_UB(d3, dst + nstride);
+    nstride += stride;
+    ST8x1_UB(d4, dst + nstride);
+    nstride += stride;
+    ST8x1_UB(d5, dst + nstride);
+    nstride += stride;
+    ST8x1_UB(d6, dst + nstride);
+    nstride += stride;
+    ST8x1_UB(d7, dst + nstride);
+}
+
+void ff_vp3_idct_put_msa(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
+{
+    idct_msa(dest, line_size, block, 1);
+    memset(block, 0, sizeof(*block) * 64);
+}
+
+void ff_vp3_idct_add_msa(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
+{
+    idct_msa(dest, line_size, block, 2);
+    memset(block, 0, sizeof(*block) * 64);
+}
+
+void ff_vp3_idct_dc_add_msa(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
+{
+    int i = (block[0] + 15) >> 5;
+    v4i32 dc = {i, i, i, i};
+    v16i8 d0, d1, d2, d3, d4, d5, d6, d7;
+    v4i32 c0, c1, c2, c3, c4, c5, c6, c7;
+    v4i32 e0, e1, e2, e3, e4, e5, e6, e7;
+    v4i32 r0, r1, r2, r3, r4, r5, r6, r7;
+    v16i8 mask = {0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0};
+    v16i8 zero = {0};
+    int nstride = line_size;
+
+    LD_SB8(dest, line_size, d0, d1, d2, d3, d4, d5, d6, d7);
+    ILVR_B4_SW(zero, d0, zero, d1, zero, d2, zero, d3,
+               c0, c1, c2, c3);
+    ILVR_B4_SW(zero, d4, zero, d5, zero, d6, zero, d7,
+               c4, c5, c6, c7);
+    /* Right part */
+    ILVR_H4_SW(zero, c0, zero, c1, zero, c2, zero, c3,
+               e0, e1, e2, e3);
+    ILVR_H4_SW(zero, c4, zero, c5, zero, c6, zero, c7,
+               e4, e5, e6, e7);
+    e0 += dc;
+    e1 += dc;
+    e2 += dc;
+    e3 += dc;
+    e4 += dc;
+    e5 += dc;
+    e6 += dc;
+    e7 += dc;
+    e0 = CLIP_SW_0_255(e0);
+    e1 = CLIP_SW_0_255(e1);
+    e2 = CLIP_SW_0_255(e2);
+    e3 = CLIP_SW_0_255(e3);
+    e4 = CLIP_SW_0_255(e4);
+    e5 = CLIP_SW_0_255(e5);
+    e6 = CLIP_SW_0_255(e6);
+    e7 = CLIP_SW_0_255(e7);
+
+    /* Left part */
+    ILVL_H4_SW(zero, c0, zero, c1, zero, c2, zero, c3,
+               r0, r1, r2, r3);
+    ILVL_H4_SW(zero, c4, zero, c5, zero, c6, zero, c7,
+               r4, r5, r6, r7);
+    r0 += dc;
+    r1 += dc;
+    r2 += dc;
+    r3 += dc;
+    r4 += dc;
+    r5 += dc;
+    r6 += dc;
+    r7 += dc;
+    r0 = CLIP_SW_0_255(r0);
+    r1 = CLIP_SW_0_255(r1);
+    r2 = CLIP_SW_0_255(r2);
+    r3 = CLIP_SW_0_255(r3);
+    r4 = CLIP_SW_0_255(r4);
+    r5 = CLIP_SW_0_255(r5);
+    r6 = CLIP_SW_0_255(r6);
+    r7 = CLIP_SW_0_255(r7);
+    VSHF_B2_SB(e0, r0, e1, r1, mask, mask, d0, d1);
+    VSHF_B2_SB(e2, r2, e3, r3, mask, mask, d2, d3);
+    VSHF_B2_SB(e4, r4, e5, r5, mask, mask, d4, d5);
+    VSHF_B2_SB(e6, r6, e7, r7, mask, mask, d6, d7);
+
+    /* Final sequence of operations over-write original dst */
+    ST8x1_UB(d0, dest);
+    ST8x1_UB(d1, dest + nstride);
+    nstride += line_size;
+    ST8x1_UB(d2, dest + nstride);
+    nstride += line_size;
+    ST8x1_UB(d3, dest + nstride);
+    nstride += line_size;
+    ST8x1_UB(d4, dest + nstride);
+    nstride += line_size;
+    ST8x1_UB(d5, dest + nstride);
+    nstride += line_size;
+    ST8x1_UB(d6, dest + nstride);
+    nstride += line_size;
+    ST8x1_UB(d7, dest + nstride);
+
+    block[0] = 0;
+}
+
+void ff_vp3_v_loop_filter_msa(uint8_t *first_pixel, ptrdiff_t stride,
+                              int *bounding_values)
+{
+    int nstride = -stride;
+    v4i32 e0, e1, f0, f1, g0, g1;
+    v16i8 zero = {0};
+    v16i8 d0, d1, d2, d3;
+    v8i16 c0, c1, c2, c3;
+    v8i16 r0;
+    v8i16 cnst3h = {3, 3, 3, 3, 3, 3, 3, 3},
+          cnst4h = {4, 4, 4, 4, 4, 4, 4, 4};
+    v16i8 mask = {0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0};
+    int16_t temp_16[8];
+    int temp_32[8];
+
+    LD_SB4(first_pixel + nstride * 2, stride, d0, d1, d2, d3);
+    ILVR_B4_SH(zero, d0, zero, d1, zero, d2, zero, d3,
+               c0, c1, c2, c3);
+    r0 = (c0 - c3) + (c2 - c1) * cnst3h;
+    r0 += cnst4h;
+    r0 = r0 >> 3;
+    /* Get filter_value from bounding_values one by one */
+    ST_SH(r0, temp_16);
+    for (int i = 0; i < 8; i++)
+        temp_32[i] = bounding_values[temp_16[i]];
+    LD_SW2(temp_32, 4, e0, e1);
+    ILVR_H2_SW(zero, c1, zero, c2, f0, g0);
+    ILVL_H2_SW(zero, c1, zero, c2, f1, g1);
+    f0 += e0;
+    f1 += e1;
+    g0 -= e0;
+    g1 -= e1;
+    f0 = CLIP_SW_0_255(f0);
+    f1 = CLIP_SW_0_255(f1);
+    g0 = CLIP_SW_0_255(g0);
+    g1 = CLIP_SW_0_255(g1);
+    VSHF_B2_SB(f0, f1, g0, g1, mask, mask, d1, d2);
+
+    /* Final move to first_pixel */
+    ST8x1_UB(d1, first_pixel + nstride);
+    ST8x1_UB(d2, first_pixel);
+}
+
+void ff_vp3_h_loop_filter_msa(uint8_t *first_pixel, ptrdiff_t stride,
+                              int *bounding_values)
+{
+    v16i8 d0, d1, d2, d3, d4, d5, d6, d7;
+    v8i16 c0, c1, c2, c3, c4, c5, c6, c7;
+    v8i16 r0;
+    v4i32 e0, e1, f0, f1, g0, g1;
+    v16i8 zero = {0};
+    v8i16 cnst3h = {3, 3, 3, 3, 3, 3, 3, 3},
+          cnst4h = {4, 4, 4, 4, 4, 4, 4, 4};
+    v16i8 mask = {0, 16, 4, 20, 8, 24, 12, 28, 0, 0, 0, 0, 0, 0, 0, 0};
+    int16_t temp_16[8];
+    int temp_32[8];
+
+    LD_SB8(first_pixel - 2, stride, d0, d1, d2, d3, d4, d5, d6, d7);
+    ILVR_B4_SH(zero, d0, zero, d1, zero, d2, zero, d3,
+               c0, c1, c2, c3);
+    ILVR_B4_SH(zero, d4, zero, d5, zero, d6, zero, d7,
+               c4, c5, c6, c7);
+    TRANSPOSE8x8_SH_SH(c0, c1, c2, c3, c4, c5, c6, c7,
+                       c0, c1, c2, c3, c4, c5, c6, c7);
+    r0 = (c0 - c3) + (c2 - c1) * cnst3h;
+    r0 += cnst4h;
+    r0 = r0 >> 3;
+
+    /* Get filter_value from bounding_values one by one */
+    ST_SH(r0, temp_16);
+    for (int i = 0; i < 8; i++)
+        temp_32[i] = bounding_values[temp_16[i]];
+    LD_SW2(temp_32, 4, e0, e1);
+    ILVR_H2_SW(zero, c1, zero, c2, f0, g0);
+    ILVL_H2_SW(zero, c1, zero, c2, f1, g1);
+    f0 += e0;
+    f1 += e1;
+    g0 -= e0;
+    g1 -= e1;
+    f0 = CLIP_SW_0_255(f0);
+    f1 = CLIP_SW_0_255(f1);
+    g0 = CLIP_SW_0_255(g0);
+    g1 = CLIP_SW_0_255(g1);
+    VSHF_B2_SB(f0, g0, f1, g1, mask, mask, d1, d2);
+    /* Final move to first_pixel */
+    ST2x4_UB(d1, 0, first_pixel - 1, stride);
+    ST2x4_UB(d2, 0, first_pixel - 1 + 4 * stride, stride);
+}
+
+void ff_put_no_rnd_pixels_l2_msa(uint8_t *dst, const uint8_t *src1,
+                                 const uint8_t *src2, ptrdiff_t stride, int h)
+{
+    if (h == 8) {
+        v16i8 d0, d1, d2, d3, d4, d5, d6, d7;
+        v16i8 c0, c1, c2, c3;
+        v4i32 a0, a1, a2, a3, b0, b1, b2, b3;
+        v4i32 e0, e1, e2;
+        v4i32 f0, f1, f2;
+        v4u32 t0, t1, t2, t3;
+        v16i8 mask = {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23};
+        int32_t value = 0xfefefefe;
+        v4i32 fmask = {value, value, value, value};
+
+        LD_SB8(src1, stride, d0, d1, d2, d3, d4, d5, d6, d7);
+        VSHF_B2_SB(d0, d1, d2, d3, mask, mask, c0, c1);
+        VSHF_B2_SB(d4, d5, d6, d7, mask, mask, c2, c3);
+        a0 = (v4i32) __msa_pckev_d((v2i64)c1, (v2i64)c0);
+        a2 = (v4i32) __msa_pckod_d((v2i64)c1, (v2i64)c0);
+        a1 = (v4i32) __msa_pckev_d((v2i64)c3, (v2i64)c2);
+        a3 = (v4i32) __msa_pckod_d((v2i64)c3, (v2i64)c2);
+
+        LD_SB8(src2, stride, d0, d1, d2, d3, d4, d5, d6, d7);
+        VSHF_B2_SB(d0, d1, d2, d3, mask, mask, c0, c1);
+        VSHF_B2_SB(d4, d5, d6, d7, mask, mask, c2, c3);
+        b0 = (v4i32) __msa_pckev_d((v2i64)c1, (v2i64)c0);
+        b2 = (v4i32) __msa_pckod_d((v2i64)c1, (v2i64)c0);
+        b1 = (v4i32) __msa_pckev_d((v2i64)c3, (v2i64)c2);
+        b3 = (v4i32) __msa_pckod_d((v2i64)c3, (v2i64)c2);
+
+        e0 = (v4i32) __msa_xor_v((v16u8)a0, (v16u8)b0);
+        e0 = (v4i32) __msa_and_v((v16u8)e0, (v16u8)fmask);
+        t0 = ((v4u32)e0) >> 1;
+        e2 = (v4i32) __msa_and_v((v16u8)a0, (v16u8)b0);
+        t0 = t0 + (v4u32)e2;
+
+        e1 = (v4i32) __msa_xor_v((v16u8)a1, (v16u8)b1);
+        e1 = (v4i32) __msa_and_v((v16u8)e1, (v16u8)fmask);
+        t1 = ((v4u32)e1) >> 1;
+        e2 = (v4i32) __msa_and_v((v16u8)a1, (v16u8)b1);
+        t1 = t1 + (v4u32)e2;
+
+        f0 = (v4i32) __msa_xor_v((v16u8)a2, (v16u8)b2);
+        f0 = (v4i32) __msa_and_v((v16u8)f0, (v16u8)fmask);
+        t2 = ((v4u32)f0) >> 1;
+        f2 = (v4i32) __msa_and_v((v16u8)a2, (v16u8)b2);
+        t2 = t2 + (v4u32)f2;
+
+        f1 = (v4i32) __msa_xor_v((v16u8)a3, (v16u8)b3);
+        f1 = (v4i32) __msa_and_v((v16u8)f1, (v16u8)fmask);
+        t3 = ((v4u32)f1) >> 1;
+        f2 = (v4i32) __msa_and_v((v16u8)a3, (v16u8)b3);
+        t3 = t3 + (v4u32)f2;
+
+        ST4x4_UB(t0, t0, 0, 1, 2, 3, dst, stride);
+        ST4x4_UB(t1, t1, 0, 1, 2, 3, dst + 4 * stride, stride);
+        ST4x4_UB(t2, t2, 0, 1, 2, 3, dst + 4, stride);
+        ST4x4_UB(t3, t3, 0, 1, 2, 3, dst + 4 + 4 * stride, stride);
+    } else {
+        int i;
+
+        for (i = 0; i < h; i++) {
+            uint32_t a, b;
+
+            a = AV_RN32(&src1[i * stride]);
+            b = AV_RN32(&src2[i * stride]);
+            AV_WN32A(&dst[i * stride], no_rnd_avg32(a, b));
+            a = AV_RN32(&src1[i * stride + 4]);
+            b = AV_RN32(&src2[i * stride + 4]);
+            AV_WN32A(&dst[i * stride + 4], no_rnd_avg32(a, b));
+        }
+    }
+}
diff --git a/libavcodec/mips/vp3dsp_init_mips.c b/libavcodec/mips/vp3dsp_init_mips.c
new file mode 100644
index 0000000000000..e183db35b6757
--- /dev/null
+++ b/libavcodec/mips/vp3dsp_init_mips.c
@@ -0,0 +1,60 @@
+
+/*
+ * Copyright (c) 2018 gxw <guxiwei-hf@loongson.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/vp3dsp.h"
+#include "vp3dsp_mips.h"
+
+#if HAVE_MSA
+static av_cold void vp3dsp_init_msa(VP3DSPContext *c, int flags)
+{
+    c->put_no_rnd_pixels_l2 = ff_put_no_rnd_pixels_l2_msa;
+
+    c->idct_add      = ff_vp3_idct_add_msa;
+    c->idct_put      = ff_vp3_idct_put_msa;
+    c->idct_dc_add   = ff_vp3_idct_dc_add_msa;
+    c->v_loop_filter = ff_vp3_v_loop_filter_msa;
+    c->h_loop_filter = ff_vp3_h_loop_filter_msa;
+}
+#endif /* HAVE_MSA */
+
+#if HAVE_MMI
+static av_cold void vp3dsp_init_mmi(VP3DSPContext *c, int flags)
+{
+    c->put_no_rnd_pixels_l2 = ff_put_no_rnd_pixels_l2_mmi;
+
+    c->idct_add      = ff_vp3_idct_add_mmi;
+    c->idct_put      = ff_vp3_idct_put_mmi;
+    c->idct_dc_add   = ff_vp3_idct_dc_add_mmi;
+}
+#endif /* HAVE_MMI */
+
+av_cold void ff_vp3dsp_init_mips(VP3DSPContext *c, int flags)
+{
+#if HAVE_MMI
+    vp3dsp_init_mmi(c, flags);
+#endif /* HAVE_MMI */
+#if HAVE_MSA
+    vp3dsp_init_msa(c, flags);
+#endif /* HAVE_MSA */
+}
diff --git a/libavcodec/mips/vp3dsp_mips.h b/libavcodec/mips/vp3dsp_mips.h
new file mode 100644
index 0000000000000..4685a825cb9ea
--- /dev/null
+++ b/libavcodec/mips/vp3dsp_mips.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 gxw <guxiwei-hf@loongson.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MIPS_VP3DSP_MIPS_H
+#define AVCODEC_MIPS_VP3DSP_MIPS_H
+
+#include "libavcodec/vp3dsp.h"
+#include <string.h>
+
+void ff_vp3_idct_add_msa(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+void ff_vp3_idct_put_msa(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+void ff_vp3_idct_dc_add_msa(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+void ff_vp3_v_loop_filter_msa(uint8_t *first_pixel, ptrdiff_t stride,
+                              int *bounding_values);
+void ff_put_no_rnd_pixels_l2_msa(uint8_t *dst, const uint8_t *src1,
+                                 const uint8_t *src2, ptrdiff_t stride, int h);
+void ff_vp3_h_loop_filter_msa(uint8_t *first_pixel, ptrdiff_t stride,
+                              int *bounding_values);
+
+void ff_vp3_idct_add_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+void ff_vp3_idct_put_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+void ff_vp3_idct_dc_add_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+void ff_put_no_rnd_pixels_l2_mmi(uint8_t *dst, const uint8_t *src1,
+                                 const uint8_t *src2, ptrdiff_t stride, int h);
+
+#endif /* #ifndef AVCODEC_MIPS_VP3DSP_MIPS_H */
diff --git a/libavcodec/mips/vp8dsp_init_mips.c b/libavcodec/mips/vp8dsp_init_mips.c
index 3fc5f8e0af016..7fd8fb0d3291f 100644
--- a/libavcodec/mips/vp8dsp_init_mips.c
+++ b/libavcodec/mips/vp8dsp_init_mips.c
@@ -193,10 +193,10 @@ static av_cold void vp8dsp_init_mmi(VP8DSPContext *dsp)
 
 av_cold void ff_vp8dsp_init_mips(VP8DSPContext *dsp)
 {
-#if HAVE_MSA
-    vp8dsp_init_msa(dsp);
-#endif  // #if HAVE_MSA
 #if HAVE_MMI
     vp8dsp_init_mmi(dsp);
 #endif /* HAVE_MMI */
+#if HAVE_MSA
+    vp8dsp_init_msa(dsp);
+#endif  // #if HAVE_MSA
 }
diff --git a/libavcodec/mips/vp9_mc_mmi.c b/libavcodec/mips/vp9_mc_mmi.c
new file mode 100644
index 0000000000000..e7a83875b92be
--- /dev/null
+++ b/libavcodec/mips/vp9_mc_mmi.c
@@ -0,0 +1,628 @@
+/*
+ * Copyright (c) 2019 gxw <guxiwei-hf@loongson.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/vp9dsp.h"
+#include "libavutil/mips/mmiutils.h"
+#include "vp9dsp_mips.h"
+
+#define GET_DATA_H_MMI                                       \
+    "pmaddhw    %[ftmp4],    %[ftmp4],   %[filter1]    \n\t" \
+    "pmaddhw    %[ftmp5],    %[ftmp5],   %[filter2]    \n\t" \
+    "paddw      %[ftmp4],    %[ftmp4],   %[ftmp5]      \n\t" \
+    "punpckhwd  %[ftmp5],    %[ftmp4],   %[ftmp0]      \n\t" \
+    "paddw      %[ftmp4],    %[ftmp4],   %[ftmp5]      \n\t" \
+    "pmaddhw    %[ftmp6],    %[ftmp6],   %[filter1]    \n\t" \
+    "pmaddhw    %[ftmp7],    %[ftmp7],   %[filter2]    \n\t" \
+    "paddw      %[ftmp6],    %[ftmp6],   %[ftmp7]      \n\t" \
+    "punpckhwd  %[ftmp7],    %[ftmp6],   %[ftmp0]      \n\t" \
+    "paddw      %[ftmp6],    %[ftmp6],   %[ftmp7]      \n\t" \
+    "punpcklwd  %[srcl],     %[ftmp4],   %[ftmp6]      \n\t" \
+    "pmaddhw    %[ftmp8],    %[ftmp8],   %[filter1]    \n\t" \
+    "pmaddhw    %[ftmp9],    %[ftmp9],   %[filter2]    \n\t" \
+    "paddw      %[ftmp8],    %[ftmp8],   %[ftmp9]      \n\t" \
+    "punpckhwd  %[ftmp9],    %[ftmp8],   %[ftmp0]      \n\t" \
+    "paddw      %[ftmp8],    %[ftmp8],   %[ftmp9]      \n\t" \
+    "pmaddhw    %[ftmp10],   %[ftmp10],  %[filter1]    \n\t" \
+    "pmaddhw    %[ftmp11],   %[ftmp11],  %[filter2]    \n\t" \
+    "paddw      %[ftmp10],   %[ftmp10],  %[ftmp11]     \n\t" \
+    "punpckhwd  %[ftmp11],   %[ftmp10],  %[ftmp0]      \n\t" \
+    "paddw      %[ftmp10],   %[ftmp10],  %[ftmp11]     \n\t" \
+    "punpcklwd  %[srch],     %[ftmp8],   %[ftmp10]     \n\t"
+
+#define GET_DATA_V_MMI                                       \
+    "punpcklhw  %[srcl],     %[ftmp4],   %[ftmp5]      \n\t" \
+    "pmaddhw    %[srcl],     %[srcl],    %[filter10]   \n\t" \
+    "punpcklhw  %[ftmp12],   %[ftmp6],   %[ftmp7]      \n\t" \
+    "pmaddhw    %[ftmp12],   %[ftmp12],  %[filter32]   \n\t" \
+    "paddw      %[srcl],     %[srcl],    %[ftmp12]     \n\t" \
+    "punpcklhw  %[ftmp12],   %[ftmp8],   %[ftmp9]      \n\t" \
+    "pmaddhw    %[ftmp12],   %[ftmp12],  %[filter54]   \n\t" \
+    "paddw      %[srcl],     %[srcl],    %[ftmp12]     \n\t" \
+    "punpcklhw  %[ftmp12],   %[ftmp10],  %[ftmp11]     \n\t" \
+    "pmaddhw    %[ftmp12],   %[ftmp12],  %[filter76]   \n\t" \
+    "paddw      %[srcl],     %[srcl],    %[ftmp12]     \n\t" \
+    "punpckhhw  %[srch],     %[ftmp4],   %[ftmp5]      \n\t" \
+    "pmaddhw    %[srch],     %[srch],    %[filter10]   \n\t" \
+    "punpckhhw  %[ftmp12],   %[ftmp6],   %[ftmp7]      \n\t" \
+    "pmaddhw    %[ftmp12],   %[ftmp12],  %[filter32]   \n\t" \
+    "paddw      %[srch],     %[srch],    %[ftmp12]     \n\t" \
+    "punpckhhw  %[ftmp12],   %[ftmp8],   %[ftmp9]      \n\t" \
+    "pmaddhw    %[ftmp12],   %[ftmp12],  %[filter54]   \n\t" \
+    "paddw      %[srch],     %[srch],    %[ftmp12]     \n\t" \
+    "punpckhhw  %[ftmp12],   %[ftmp10],  %[ftmp11]     \n\t" \
+    "pmaddhw    %[ftmp12],   %[ftmp12],  %[filter76]   \n\t" \
+    "paddw      %[srch],     %[srch],    %[ftmp12]     \n\t"
+
+static void convolve_horiz_mmi(const uint8_t *src, int32_t src_stride,
+                               uint8_t *dst, int32_t dst_stride,
+                               const uint16_t *filter_x, int32_t w,
+                               int32_t h)
+{
+    double ftmp[15];
+    uint32_t tmp[2];
+    src -= 3;
+    src_stride -= w;
+    dst_stride -= w;
+    __asm__ volatile (
+        "move       %[tmp1],    %[width]                   \n\t"
+        "xor        %[ftmp0],   %[ftmp0],    %[ftmp0]      \n\t"
+        "gsldlc1    %[filter1], 0x03(%[filter])            \n\t"
+        "gsldrc1    %[filter1], 0x00(%[filter])            \n\t"
+        "gsldlc1    %[filter2], 0x0b(%[filter])            \n\t"
+        "gsldrc1    %[filter2], 0x08(%[filter])            \n\t"
+        "li         %[tmp0],    0x07                       \n\t"
+        "dmtc1      %[tmp0],    %[ftmp13]                  \n\t"
+        "punpcklwd  %[ftmp13],  %[ftmp13],   %[ftmp13]     \n\t"
+        "1:                                                \n\t"
+        /* Get 8 data per row */
+        "gsldlc1    %[ftmp5],   0x07(%[src])               \n\t"
+        "gsldrc1    %[ftmp5],   0x00(%[src])               \n\t"
+        "gsldlc1    %[ftmp7],   0x08(%[src])               \n\t"
+        "gsldrc1    %[ftmp7],   0x01(%[src])               \n\t"
+        "gsldlc1    %[ftmp9],   0x09(%[src])               \n\t"
+        "gsldrc1    %[ftmp9],   0x02(%[src])               \n\t"
+        "gsldlc1    %[ftmp11],  0x0A(%[src])               \n\t"
+        "gsldrc1    %[ftmp11],  0x03(%[src])               \n\t"
+        "punpcklbh  %[ftmp4],   %[ftmp5],    %[ftmp0]      \n\t"
+        "punpckhbh  %[ftmp5],   %[ftmp5],    %[ftmp0]      \n\t"
+        "punpcklbh  %[ftmp6],   %[ftmp7],    %[ftmp0]      \n\t"
+        "punpckhbh  %[ftmp7],   %[ftmp7],    %[ftmp0]      \n\t"
+        "punpcklbh  %[ftmp8],   %[ftmp9],    %[ftmp0]      \n\t"
+        "punpckhbh  %[ftmp9],   %[ftmp9],    %[ftmp0]      \n\t"
+        "punpcklbh  %[ftmp10],  %[ftmp11],   %[ftmp0]      \n\t"
+        "punpckhbh  %[ftmp11],  %[ftmp11],   %[ftmp0]      \n\t"
+        PTR_ADDIU  "%[width],   %[width],    -0x04         \n\t"
+        /* Get raw data */
+        GET_DATA_H_MMI
+        ROUND_POWER_OF_TWO_MMI(%[srcl], %[ftmp13], %[ftmp5],
+                               %[ftmp6], %[tmp0])
+        ROUND_POWER_OF_TWO_MMI(%[srch], %[ftmp13], %[ftmp5],
+                               %[ftmp6], %[tmp0])
+        "packsswh   %[srcl],    %[srcl],     %[srch]       \n\t"
+        "packushb   %[ftmp12],  %[srcl],     %[ftmp0]      \n\t"
+        "swc1       %[ftmp12],  0x00(%[dst])               \n\t"
+        PTR_ADDIU  "%[dst],     %[dst],      0x04          \n\t"
+        PTR_ADDIU  "%[src],     %[src],      0x04          \n\t"
+        /* Loop count */
+        "bnez       %[width],   1b                         \n\t"
+        "move       %[width],   %[tmp1]                    \n\t"
+        PTR_ADDU   "%[src],     %[src],      %[src_stride] \n\t"
+        PTR_ADDU   "%[dst],     %[dst],      %[dst_stride] \n\t"
+        PTR_ADDIU  "%[height],  %[height],   -0x01         \n\t"
+        "bnez       %[height],  1b                         \n\t"
+        : [srcl]"=&f"(ftmp[0]),     [srch]"=&f"(ftmp[1]),
+          [filter1]"=&f"(ftmp[2]),  [filter2]"=&f"(ftmp[3]),
+          [ftmp0]"=&f"(ftmp[4]),    [ftmp4]"=&f"(ftmp[5]),
+          [ftmp5]"=&f"(ftmp[6]),    [ftmp6]"=&f"(ftmp[7]),
+          [ftmp7]"=&f"(ftmp[8]),    [ftmp8]"=&f"(ftmp[9]),
+          [ftmp9]"=&f"(ftmp[10]),   [ftmp10]"=&f"(ftmp[11]),
+          [ftmp11]"=&f"(ftmp[12]),  [ftmp12]"=&f"(ftmp[13]),
+          [tmp0]"=&r"(tmp[0]),      [tmp1]"=&r"(tmp[1]),
+          [src]"+&r"(src),          [width]"+&r"(w),
+          [dst]"+&r"(dst),          [height]"+&r"(h),
+          [ftmp13]"=&f"(ftmp[14])
+        : [filter]"r"(filter_x),
+          [src_stride]"r"((mips_reg)src_stride),
+          [dst_stride]"r"((mips_reg)dst_stride)
+        : "memory"
+    );
+}
+
+static void convolve_vert_mmi(const uint8_t *src, int32_t src_stride,
+                              uint8_t *dst, int32_t dst_stride,
+                              const int16_t *filter_y, int32_t w,
+                              int32_t h)
+{
+    double ftmp[17];
+    uint32_t tmp[1];
+    ptrdiff_t addr = src_stride;
+    src_stride -= w;
+    dst_stride -= w;
+
+    __asm__ volatile (
+        "xor        %[ftmp0],    %[ftmp0],   %[ftmp0]      \n\t"
+        "gsldlc1    %[ftmp4],    0x03(%[filter])           \n\t"
+        "gsldrc1    %[ftmp4],    0x00(%[filter])           \n\t"
+        "gsldlc1    %[ftmp5],    0x0b(%[filter])           \n\t"
+        "gsldrc1    %[ftmp5],    0x08(%[filter])           \n\t"
+        "punpcklwd  %[filter10], %[ftmp4],   %[ftmp4]      \n\t"
+        "punpckhwd  %[filter32], %[ftmp4],   %[ftmp4]      \n\t"
+        "punpcklwd  %[filter54], %[ftmp5],   %[ftmp5]      \n\t"
+        "punpckhwd  %[filter76], %[ftmp5],   %[ftmp5]      \n\t"
+        "li         %[tmp0],     0x07                      \n\t"
+        "dmtc1      %[tmp0],     %[ftmp13]                 \n\t"
+        "punpcklwd  %[ftmp13],   %[ftmp13],  %[ftmp13]     \n\t"
+        "1:                                                \n\t"
+        /* Get 8 data per column */
+        "gsldlc1    %[ftmp4],    0x07(%[src])              \n\t"
+        "gsldrc1    %[ftmp4],    0x00(%[src])              \n\t"
+        PTR_ADDU   "%[tmp0],     %[src],     %[addr]       \n\t"
+        "gsldlc1    %[ftmp5],    0x07(%[tmp0])             \n\t"
+        "gsldrc1    %[ftmp5],    0x00(%[tmp0])             \n\t"
+        PTR_ADDU   "%[tmp0],     %[tmp0],    %[addr]       \n\t"
+        "gsldlc1    %[ftmp6],    0x07(%[tmp0])             \n\t"
+        "gsldrc1    %[ftmp6],    0x00(%[tmp0])             \n\t"
+        PTR_ADDU   "%[tmp0],     %[tmp0],    %[addr]       \n\t"
+        "gsldlc1    %[ftmp7],    0x07(%[tmp0])             \n\t"
+        "gsldrc1    %[ftmp7],    0x00(%[tmp0])             \n\t"
+        PTR_ADDU   "%[tmp0],     %[tmp0],    %[addr]       \n\t"
+        "gsldlc1    %[ftmp8],    0x07(%[tmp0])             \n\t"
+        "gsldrc1    %[ftmp8],    0x00(%[tmp0])             \n\t"
+        PTR_ADDU   "%[tmp0],     %[tmp0],    %[addr]       \n\t"
+        "gsldlc1    %[ftmp9],    0x07(%[tmp0])             \n\t"
+        "gsldrc1    %[ftmp9],    0x00(%[tmp0])             \n\t"
+        PTR_ADDU   "%[tmp0],     %[tmp0],    %[addr]       \n\t"
+        "gsldlc1    %[ftmp10],   0x07(%[tmp0])             \n\t"
+        "gsldrc1    %[ftmp10],   0x00(%[tmp0])             \n\t"
+        PTR_ADDU   "%[tmp0],     %[tmp0],    %[addr]       \n\t"
+        "gsldlc1    %[ftmp11],   0x07(%[tmp0])             \n\t"
+        "gsldrc1    %[ftmp11],   0x00(%[tmp0])             \n\t"
+        "punpcklbh  %[ftmp4],    %[ftmp4],   %[ftmp0]      \n\t"
+        "punpcklbh  %[ftmp5],    %[ftmp5],   %[ftmp0]      \n\t"
+        "punpcklbh  %[ftmp6],    %[ftmp6],   %[ftmp0]      \n\t"
+        "punpcklbh  %[ftmp7],    %[ftmp7],   %[ftmp0]      \n\t"
+        "punpcklbh  %[ftmp8],    %[ftmp8],   %[ftmp0]      \n\t"
+        "punpcklbh  %[ftmp9],    %[ftmp9],   %[ftmp0]      \n\t"
+        "punpcklbh  %[ftmp10],   %[ftmp10],  %[ftmp0]      \n\t"
+        "punpcklbh  %[ftmp11],   %[ftmp11],  %[ftmp0]      \n\t"
+        PTR_ADDIU  "%[width],    %[width],   -0x04         \n\t"
+        /* Get raw data */
+        GET_DATA_V_MMI
+        ROUND_POWER_OF_TWO_MMI(%[srcl], %[ftmp13], %[ftmp5],
+                               %[ftmp6], %[tmp0])
+        ROUND_POWER_OF_TWO_MMI(%[srch], %[ftmp13], %[ftmp5],
+                               %[ftmp6], %[tmp0])
+        "packsswh   %[srcl],     %[srcl],    %[srch]       \n\t"
+        "packushb   %[ftmp12],   %[srcl],    %[ftmp0]      \n\t"
+        "swc1       %[ftmp12],   0x00(%[dst])              \n\t"
+        PTR_ADDIU  "%[dst],      %[dst],      0x04         \n\t"
+        PTR_ADDIU  "%[src],      %[src],      0x04         \n\t"
+        /* Loop count */
+        "bnez       %[width],    1b                        \n\t"
+        PTR_SUBU   "%[width],    %[addr],    %[src_stride] \n\t"
+        PTR_ADDU   "%[src],      %[src],     %[src_stride] \n\t"
+        PTR_ADDU   "%[dst],      %[dst],     %[dst_stride] \n\t"
+        PTR_ADDIU  "%[height],   %[height],  -0x01         \n\t"
+        "bnez       %[height],   1b                        \n\t"
+        : [srcl]"=&f"(ftmp[0]),     [srch]"=&f"(ftmp[1]),
+          [filter10]"=&f"(ftmp[2]), [filter32]"=&f"(ftmp[3]),
+          [filter54]"=&f"(ftmp[4]), [filter76]"=&f"(ftmp[5]),
+          [ftmp0]"=&f"(ftmp[6]),    [ftmp4]"=&f"(ftmp[7]),
+          [ftmp5]"=&f"(ftmp[8]),    [ftmp6]"=&f"(ftmp[9]),
+          [ftmp7]"=&f"(ftmp[10]),   [ftmp8]"=&f"(ftmp[11]),
+          [ftmp9]"=&f"(ftmp[12]),   [ftmp10]"=&f"(ftmp[13]),
+          [ftmp11]"=&f"(ftmp[14]),  [ftmp12]"=&f"(ftmp[15]),
+          [src]"+&r"(src),          [dst]"+&r"(dst),
+          [width]"+&r"(w),          [height]"+&r"(h),
+          [tmp0]"=&r"(tmp[0]),      [ftmp13]"=&f"(ftmp[16])
+        : [filter]"r"(filter_y),
+          [src_stride]"r"((mips_reg)src_stride),
+          [dst_stride]"r"((mips_reg)dst_stride),
+          [addr]"r"((mips_reg)addr)
+        : "memory"
+    );
+}
+
+static void convolve_avg_horiz_mmi(const uint8_t *src, int32_t src_stride,
+                                   uint8_t *dst, int32_t dst_stride,
+                                   const uint16_t *filter_x, int32_t w,
+                                   int32_t h)
+{
+    double ftmp[15];
+    uint32_t tmp[2];
+    src -= 3;
+    src_stride -= w;
+    dst_stride -= w;
+
+    __asm__ volatile (
+        "move       %[tmp1],    %[width]                   \n\t"
+        "xor        %[ftmp0],   %[ftmp0],    %[ftmp0]      \n\t"
+        "gsldlc1    %[filter1], 0x03(%[filter])            \n\t"
+        "gsldrc1    %[filter1], 0x00(%[filter])            \n\t"
+        "gsldlc1    %[filter2], 0x0b(%[filter])            \n\t"
+        "gsldrc1    %[filter2], 0x08(%[filter])            \n\t"
+        "li         %[tmp0],    0x07                       \n\t"
+        "dmtc1      %[tmp0],    %[ftmp13]                  \n\t"
+        "punpcklwd  %[ftmp13],  %[ftmp13],   %[ftmp13]     \n\t"
+        "1:                                                \n\t"
+        /* Get 8 data per row */
+        "gsldlc1    %[ftmp5],   0x07(%[src])               \n\t"
+        "gsldrc1    %[ftmp5],   0x00(%[src])               \n\t"
+        "gsldlc1    %[ftmp7],   0x08(%[src])               \n\t"
+        "gsldrc1    %[ftmp7],   0x01(%[src])               \n\t"
+        "gsldlc1    %[ftmp9],   0x09(%[src])               \n\t"
+        "gsldrc1    %[ftmp9],   0x02(%[src])               \n\t"
+        "gsldlc1    %[ftmp11],  0x0A(%[src])               \n\t"
+        "gsldrc1    %[ftmp11],  0x03(%[src])               \n\t"
+        "punpcklbh  %[ftmp4],   %[ftmp5],    %[ftmp0]      \n\t"
+        "punpckhbh  %[ftmp5],   %[ftmp5],    %[ftmp0]      \n\t"
+        "punpcklbh  %[ftmp6],   %[ftmp7],    %[ftmp0]      \n\t"
+        "punpckhbh  %[ftmp7],   %[ftmp7],    %[ftmp0]      \n\t"
+        "punpcklbh  %[ftmp8],   %[ftmp9],    %[ftmp0]      \n\t"
+        "punpckhbh  %[ftmp9],   %[ftmp9],    %[ftmp0]      \n\t"
+        "punpcklbh  %[ftmp10],  %[ftmp11],   %[ftmp0]      \n\t"
+        "punpckhbh  %[ftmp11],  %[ftmp11],   %[ftmp0]      \n\t"
+        PTR_ADDIU  "%[width],   %[width],    -0x04         \n\t"
+        /* Get raw data */
+        GET_DATA_H_MMI
+        ROUND_POWER_OF_TWO_MMI(%[srcl], %[ftmp13], %[ftmp5],
+                               %[ftmp6], %[tmp0])
+        ROUND_POWER_OF_TWO_MMI(%[srch], %[ftmp13], %[ftmp5],
+                               %[ftmp6], %[tmp0])
+        "packsswh   %[srcl],    %[srcl],     %[srch]       \n\t"
+        "packushb   %[ftmp12],  %[srcl],     %[ftmp0]      \n\t"
+        "punpcklbh  %[ftmp12],  %[ftmp12],   %[ftmp0]      \n\t"
+        "gsldlc1    %[ftmp4],   0x07(%[dst])               \n\t"
+        "gsldrc1    %[ftmp4],   0x00(%[dst])               \n\t"
+        "punpcklbh  %[ftmp4],   %[ftmp4],    %[ftmp0]      \n\t"
+        "paddh      %[ftmp12],  %[ftmp12],   %[ftmp4]      \n\t"
+        "li         %[tmp0],    0x10001                    \n\t"
+        "dmtc1      %[tmp0],    %[ftmp5]                   \n\t"
+        "punpcklhw  %[ftmp5],   %[ftmp5],    %[ftmp5]      \n\t"
+        "paddh      %[ftmp12],  %[ftmp12],   %[ftmp5]      \n\t"
+        "psrah      %[ftmp12],  %[ftmp12],   %[ftmp5]      \n\t"
+        "packushb   %[ftmp12],  %[ftmp12],   %[ftmp0]      \n\t"
+        "swc1       %[ftmp12],  0x00(%[dst])               \n\t"
+        PTR_ADDIU  "%[dst],     %[dst],      0x04          \n\t"
+        PTR_ADDIU  "%[src],     %[src],      0x04          \n\t"
+        /* Loop count */
+        "bnez       %[width],   1b                         \n\t"
+        "move       %[width],   %[tmp1]                    \n\t"
+        PTR_ADDU   "%[src],     %[src],      %[src_stride] \n\t"
+        PTR_ADDU   "%[dst],     %[dst],      %[dst_stride] \n\t"
+        PTR_ADDIU  "%[height],  %[height],   -0x01         \n\t"
+        "bnez       %[height],  1b                         \n\t"
+        : [srcl]"=&f"(ftmp[0]),     [srch]"=&f"(ftmp[1]),
+          [filter1]"=&f"(ftmp[2]),  [filter2]"=&f"(ftmp[3]),
+          [ftmp0]"=&f"(ftmp[4]),    [ftmp4]"=&f"(ftmp[5]),
+          [ftmp5]"=&f"(ftmp[6]),    [ftmp6]"=&f"(ftmp[7]),
+          [ftmp7]"=&f"(ftmp[8]),    [ftmp8]"=&f"(ftmp[9]),
+          [ftmp9]"=&f"(ftmp[10]),   [ftmp10]"=&f"(ftmp[11]),
+          [ftmp11]"=&f"(ftmp[12]),  [ftmp12]"=&f"(ftmp[13]),
+          [tmp0]"=&r"(tmp[0]),      [tmp1]"=&r"(tmp[1]),
+          [src]"+&r"(src),          [width]"+&r"(w),
+          [dst]"+&r"(dst),          [height]"+&r"(h),
+          [ftmp13]"=&f"(ftmp[14])
+        : [filter]"r"(filter_x),
+          [src_stride]"r"((mips_reg)src_stride),
+          [dst_stride]"r"((mips_reg)dst_stride)
+        : "memory"
+    );
+}
+
+static void convolve_avg_vert_mmi(const uint8_t *src, int32_t src_stride,
+                                  uint8_t *dst, int32_t dst_stride,
+                                  const int16_t *filter_y, int32_t w,
+                                  int32_t h)
+{
+    double ftmp[17];
+    uint32_t tmp[1];
+    ptrdiff_t addr = src_stride;
+    src_stride -= w;
+    dst_stride -= w;
+
+    __asm__ volatile (
+        "xor        %[ftmp0],    %[ftmp0],   %[ftmp0]      \n\t"
+        "gsldlc1    %[ftmp4],    0x03(%[filter])           \n\t"
+        "gsldrc1    %[ftmp4],    0x00(%[filter])           \n\t"
+        "gsldlc1    %[ftmp5],    0x0b(%[filter])           \n\t"
+        "gsldrc1    %[ftmp5],    0x08(%[filter])           \n\t"
+        "punpcklwd  %[filter10], %[ftmp4],   %[ftmp4]      \n\t"
+        "punpckhwd  %[filter32], %[ftmp4],   %[ftmp4]      \n\t"
+        "punpcklwd  %[filter54], %[ftmp5],   %[ftmp5]      \n\t"
+        "punpckhwd  %[filter76], %[ftmp5],   %[ftmp5]      \n\t"
+        "li         %[tmp0],     0x07                      \n\t"
+        "dmtc1      %[tmp0],     %[ftmp13]                 \n\t"
+        "punpcklwd  %[ftmp13],   %[ftmp13],  %[ftmp13]     \n\t"
+        "1:                                                \n\t"
+        /* Get 8 data per column */
+        "gsldlc1    %[ftmp4],    0x07(%[src])              \n\t"
+        "gsldrc1    %[ftmp4],    0x00(%[src])              \n\t"
+        PTR_ADDU   "%[tmp0],     %[src],     %[addr]       \n\t"
+        "gsldlc1    %[ftmp5],    0x07(%[tmp0])             \n\t"
+        "gsldrc1    %[ftmp5],    0x00(%[tmp0])             \n\t"
+        PTR_ADDU   "%[tmp0],     %[tmp0],    %[addr]       \n\t"
+        "gsldlc1    %[ftmp6],    0x07(%[tmp0])             \n\t"
+        "gsldrc1    %[ftmp6],    0x00(%[tmp0])             \n\t"
+        PTR_ADDU   "%[tmp0],     %[tmp0],    %[addr]       \n\t"
+        "gsldlc1    %[ftmp7],    0x07(%[tmp0])             \n\t"
+        "gsldrc1    %[ftmp7],    0x00(%[tmp0])             \n\t"
+        PTR_ADDU   "%[tmp0],     %[tmp0],    %[addr]       \n\t"
+        "gsldlc1    %[ftmp8],    0x07(%[tmp0])             \n\t"
+        "gsldrc1    %[ftmp8],    0x00(%[tmp0])             \n\t"
+        PTR_ADDU   "%[tmp0],     %[tmp0],    %[addr]       \n\t"
+        "gsldlc1    %[ftmp9],    0x07(%[tmp0])             \n\t"
+        "gsldrc1    %[ftmp9],    0x00(%[tmp0])             \n\t"
+        PTR_ADDU   "%[tmp0],     %[tmp0],    %[addr]       \n\t"
+        "gsldlc1    %[ftmp10],   0x07(%[tmp0])             \n\t"
+        "gsldrc1    %[ftmp10],   0x00(%[tmp0])             \n\t"
+        PTR_ADDU   "%[tmp0],     %[tmp0],    %[addr]       \n\t"
+        "gsldlc1    %[ftmp11],   0x07(%[tmp0])             \n\t"
+        "gsldrc1    %[ftmp11],   0x00(%[tmp0])             \n\t"
+        "punpcklbh  %[ftmp4],    %[ftmp4],   %[ftmp0]      \n\t"
+        "punpcklbh  %[ftmp5],    %[ftmp5],   %[ftmp0]      \n\t"
+        "punpcklbh  %[ftmp6],    %[ftmp6],   %[ftmp0]      \n\t"
+        "punpcklbh  %[ftmp7],    %[ftmp7],   %[ftmp0]      \n\t"
+        "punpcklbh  %[ftmp8],    %[ftmp8],   %[ftmp0]      \n\t"
+        "punpcklbh  %[ftmp9],    %[ftmp9],   %[ftmp0]      \n\t"
+        "punpcklbh  %[ftmp10],   %[ftmp10],  %[ftmp0]      \n\t"
+        "punpcklbh  %[ftmp11],   %[ftmp11],  %[ftmp0]      \n\t"
+        PTR_ADDIU  "%[width],    %[width],   -0x04         \n\t"
+        /* Get raw data */
+        GET_DATA_V_MMI
+        ROUND_POWER_OF_TWO_MMI(%[srcl], %[ftmp13], %[ftmp5],
+                               %[ftmp6], %[tmp0])
+        ROUND_POWER_OF_TWO_MMI(%[srch], %[ftmp13], %[ftmp5],
+                               %[ftmp6], %[tmp0])
+        "packsswh   %[srcl],     %[srcl],    %[srch]       \n\t"
+        "packushb   %[ftmp12],   %[srcl],    %[ftmp0]      \n\t"
+        "punpcklbh  %[ftmp12],   %[ftmp12],  %[ftmp0]      \n\t"
+        "gsldlc1    %[ftmp4],    0x07(%[dst])              \n\t"
+        "gsldrc1    %[ftmp4],    0x00(%[dst])              \n\t"
+        "punpcklbh  %[ftmp4],    %[ftmp4],   %[ftmp0]      \n\t"
+        "paddh      %[ftmp12],   %[ftmp12],  %[ftmp4]      \n\t"
+        "li         %[tmp0],     0x10001                   \n\t"
+        "dmtc1      %[tmp0],     %[ftmp5]                  \n\t"
+        "punpcklhw  %[ftmp5],    %[ftmp5],   %[ftmp5]      \n\t"
+        "paddh      %[ftmp12],   %[ftmp12],  %[ftmp5]      \n\t"
+        "psrah      %[ftmp12],   %[ftmp12],  %[ftmp5]      \n\t"
+        "packushb   %[ftmp12],   %[ftmp12],  %[ftmp0]      \n\t"
+        "swc1       %[ftmp12],   0x00(%[dst])              \n\t"
+        PTR_ADDIU  "%[dst],      %[dst],     0x04          \n\t"
+        PTR_ADDIU  "%[src],      %[src],     0x04          \n\t"
+        /* Loop count */
+        "bnez       %[width],    1b                        \n\t"
+        PTR_SUBU   "%[width],    %[addr],    %[src_stride] \n\t"
+        PTR_ADDU   "%[src],      %[src],     %[src_stride] \n\t"
+        PTR_ADDU   "%[dst],      %[dst],     %[dst_stride] \n\t"
+        PTR_ADDIU  "%[height],   %[height],  -0x01         \n\t"
+        "bnez       %[height],   1b                        \n\t"
+        : [srcl]"=&f"(ftmp[0]),     [srch]"=&f"(ftmp[1]),
+          [filter10]"=&f"(ftmp[2]), [filter32]"=&f"(ftmp[3]),
+          [filter54]"=&f"(ftmp[4]), [filter76]"=&f"(ftmp[5]),
+          [ftmp0]"=&f"(ftmp[6]),    [ftmp4]"=&f"(ftmp[7]),
+          [ftmp5]"=&f"(ftmp[8]),    [ftmp6]"=&f"(ftmp[9]),
+          [ftmp7]"=&f"(ftmp[10]),   [ftmp8]"=&f"(ftmp[11]),
+          [ftmp9]"=&f"(ftmp[12]),   [ftmp10]"=&f"(ftmp[13]),
+          [ftmp11]"=&f"(ftmp[14]),  [ftmp12]"=&f"(ftmp[15]),
+          [src]"+&r"(src),          [dst]"+&r"(dst),
+          [width]"+&r"(w),          [height]"+&r"(h),
+          [tmp0]"=&r"(tmp[0]),      [ftmp13]"=&f"(ftmp[16])
+        : [filter]"r"(filter_y),
+          [src_stride]"r"((mips_reg)src_stride),
+          [dst_stride]"r"((mips_reg)dst_stride),
+          [addr]"r"((mips_reg)addr)
+        : "memory"
+    );
+}
+
+static void convolve_avg_mmi(const uint8_t *src, int32_t src_stride,
+                             uint8_t *dst, int32_t dst_stride,
+                             int32_t w, int32_t h)
+{
+    double ftmp[4];
+    uint32_t tmp[2];
+    src_stride -= w;
+    dst_stride -= w;
+
+    __asm__ volatile (
+        "move       %[tmp1],    %[width]                  \n\t"
+        "xor        %[ftmp0],   %[ftmp0],   %[ftmp0]      \n\t"
+        "li         %[tmp0],    0x10001                   \n\t"
+        "dmtc1      %[tmp0],    %[ftmp3]                  \n\t"
+        "punpcklhw  %[ftmp3],   %[ftmp3],   %[ftmp3]      \n\t"
+        "1:                                               \n\t"
+        "gslwlc1    %[ftmp1],   0x07(%[src])              \n\t"
+        "gslwrc1    %[ftmp1],   0x00(%[src])              \n\t"
+        "gslwlc1    %[ftmp2],   0x07(%[dst])              \n\t"
+        "gslwrc1    %[ftmp2],   0x00(%[dst])              \n\t"
+        "punpcklbh  %[ftmp1],   %[ftmp1],   %[ftmp0]      \n\t"
+        "punpcklbh  %[ftmp2],   %[ftmp2],   %[ftmp0]      \n\t"
+        "paddh      %[ftmp1],   %[ftmp1],   %[ftmp2]      \n\t"
+        "paddh      %[ftmp1],   %[ftmp1],   %[ftmp3]      \n\t"
+        "psrah      %[ftmp1],   %[ftmp1],   %[ftmp3]      \n\t"
+        "packushb   %[ftmp1],   %[ftmp1],   %[ftmp0]      \n\t"
+        "swc1       %[ftmp1],   0x00(%[dst])              \n\t"
+        PTR_ADDIU  "%[width],   %[width],   -0x04         \n\t"
+        PTR_ADDIU  "%[dst],     %[dst],     0x04          \n\t"
+        PTR_ADDIU  "%[src],     %[src],     0x04          \n\t"
+        "bnez       %[width],   1b                        \n\t"
+        "move       %[width],   %[tmp1]                   \n\t"
+        PTR_ADDU   "%[dst],     %[dst],     %[dst_stride] \n\t"
+        PTR_ADDU   "%[src],     %[src],     %[src_stride] \n\t"
+        PTR_ADDIU  "%[height],  %[height],  -0x01         \n\t"
+        "bnez       %[height],  1b                        \n\t"
+        : [ftmp0]"=&f"(ftmp[0]),  [ftmp1]"=&f"(ftmp[1]),
+          [ftmp2]"=&f"(ftmp[2]),  [ftmp3]"=&f"(ftmp[3]),
+          [tmp0]"=&r"(tmp[0]),    [tmp1]"=&r"(tmp[1]),
+          [src]"+&r"(src),        [dst]"+&r"(dst),
+          [width]"+&r"(w),        [height]"+&r"(h)
+        : [src_stride]"r"((mips_reg)src_stride),
+          [dst_stride]"r"((mips_reg)dst_stride)
+        : "memory"
+    );
+}
+
+static const int16_t vp9_subpel_filters_mmi[3][15][8] = {
+    [FILTER_8TAP_REGULAR] = {
+         {0, 1, -5, 126, 8, -3, 1, 0},
+         {-1, 3, -10, 122, 18, -6, 2, 0},
+         {-1, 4, -13, 118, 27, -9, 3, -1},
+         {-1, 4, -16, 112, 37, -11, 4, -1},
+         {-1, 5, -18, 105, 48, -14, 4, -1},
+         {-1, 5, -19, 97, 58, -16, 5, -1},
+         {-1, 6, -19, 88, 68, -18, 5, -1},
+         {-1, 6, -19, 78, 78, -19, 6, -1},
+         {-1, 5, -18, 68, 88, -19, 6, -1},
+         {-1, 5, -16, 58, 97, -19, 5, -1},
+         {-1, 4, -14, 48, 105, -18, 5, -1},
+         {-1, 4, -11, 37, 112, -16, 4, -1},
+         {-1, 3, -9, 27, 118, -13, 4, -1},
+         {0, 2, -6, 18, 122, -10, 3, -1},
+         {0, 1, -3, 8, 126, -5, 1, 0},
+    }, [FILTER_8TAP_SHARP] = {
+        {-1, 3, -7, 127, 8, -3, 1, 0},
+        {-2, 5, -13, 125, 17, -6, 3, -1},
+        {-3, 7, -17, 121, 27, -10, 5, -2},
+        {-4, 9, -20, 115, 37, -13, 6, -2},
+        {-4, 10, -23, 108, 48, -16, 8, -3},
+        {-4, 10, -24, 100, 59, -19, 9, -3},
+        {-4, 11, -24, 90, 70, -21, 10, -4},
+        {-4, 11, -23, 80, 80, -23, 11, -4},
+        {-4, 10, -21, 70, 90, -24, 11, -4},
+        {-3, 9, -19, 59, 100, -24, 10, -4},
+        {-3, 8, -16, 48, 108, -23, 10, -4},
+        {-2, 6, -13, 37, 115, -20, 9, -4},
+        {-2, 5, -10, 27, 121, -17, 7, -3},
+        {-1, 3, -6, 17, 125, -13, 5, -2},
+        {0, 1, -3, 8, 127, -7, 3, -1},
+    }, [FILTER_8TAP_SMOOTH] = {
+        {-3, -1, 32, 64, 38, 1, -3, 0},
+        {-2, -2, 29, 63, 41, 2, -3, 0},
+        {-2, -2, 26, 63, 43, 4, -4, 0},
+        {-2, -3, 24, 62, 46, 5, -4, 0},
+        {-2, -3, 21, 60, 49, 7, -4, 0},
+        {-1, -4, 18, 59, 51, 9, -4, 0},
+        {-1, -4, 16, 57, 53, 12, -4, -1},
+        {-1, -4, 14, 55, 55, 14, -4, -1},
+        {-1, -4, 12, 53, 57, 16, -4, -1},
+        {0, -4, 9, 51, 59, 18, -4, -1},
+        {0, -4, 7, 49, 60, 21, -3, -2},
+        {0, -4, 5, 46, 62, 24, -3, -2},
+        {0, -4, 4, 43, 63, 26, -2, -2},
+        {0, -3, 2, 41, 63, 29, -2, -2},
+        {0, -3, 1, 38, 64, 32, -1, -3},
+    }
+};
+
+#define VP9_8TAP_MIPS_MMI_FUNC(SIZE, TYPE, TYPE_IDX)                           \
+void ff_put_8tap_##TYPE##_##SIZE##h_mmi(uint8_t *dst, ptrdiff_t dststride,     \
+                                        const uint8_t *src,                    \
+                                        ptrdiff_t srcstride,                   \
+                                        int h, int mx, int my)                 \
+{                                                                              \
+    const int16_t *filter = vp9_subpel_filters_mmi[TYPE_IDX][mx-1];            \
+                                                                               \
+    convolve_horiz_mmi(src, srcstride, dst, dststride, filter, SIZE, h);       \
+}                                                                              \
+                                                                               \
+void ff_put_8tap_##TYPE##_##SIZE##v_mmi(uint8_t *dst, ptrdiff_t dststride,     \
+                                        const uint8_t *src,                    \
+                                        ptrdiff_t srcstride,                   \
+                                        int h, int mx, int my)                 \
+{                                                                              \
+    const int16_t *filter = vp9_subpel_filters_mmi[TYPE_IDX][my-1];            \
+                                                                               \
+    src -= (3 * srcstride);                                                    \
+    convolve_vert_mmi(src, srcstride, dst, dststride, filter, SIZE, h);        \
+}                                                                              \
+                                                                               \
+void ff_put_8tap_##TYPE##_##SIZE##hv_mmi(uint8_t *dst, ptrdiff_t dststride,    \
+                                         const uint8_t *src,                   \
+                                         ptrdiff_t srcstride,                  \
+                                         int h, int mx, int my)                \
+{                                                                              \
+    const uint16_t *hfilter = vp9_subpel_filters_mmi[TYPE_IDX][mx-1];          \
+    const uint16_t *vfilter = vp9_subpel_filters_mmi[TYPE_IDX][my-1];          \
+                                                                               \
+    int tmp_h = h + 7;                                                         \
+    uint8_t temp[64 * 71];                                                     \
+    src -= (3 * srcstride);                                                    \
+    convolve_horiz_mmi(src, srcstride, temp, 64, hfilter, SIZE, tmp_h);        \
+    convolve_vert_mmi(temp, 64, dst, dststride, vfilter, SIZE, h);             \
+}                                                                              \
+                                                                               \
+void ff_avg_8tap_##TYPE##_##SIZE##h_mmi(uint8_t *dst, ptrdiff_t dststride,     \
+                                        const uint8_t *src,                    \
+                                        ptrdiff_t srcstride,                   \
+                                        int h, int mx, int my)                 \
+{                                                                              \
+    const int16_t *filter = vp9_subpel_filters_mmi[TYPE_IDX][mx-1];            \
+                                                                               \
+    convolve_avg_horiz_mmi(src, srcstride, dst, dststride, filter, SIZE, h);   \
+}                                                                              \
+                                                                               \
+void ff_avg_8tap_##TYPE##_##SIZE##v_mmi(uint8_t *dst, ptrdiff_t dststride,     \
+                                        const uint8_t *src,                    \
+                                        ptrdiff_t srcstride,                   \
+                                        int h, int mx, int my)                 \
+{                                                                              \
+    const int16_t *filter = vp9_subpel_filters_mmi[TYPE_IDX][my-1];            \
+                                                                               \
+    src -= (3 * srcstride);                                                    \
+    convolve_avg_vert_mmi(src, srcstride, dst, dststride, filter, SIZE, h);    \
+}                                                                              \
+                                                                               \
+void ff_avg_8tap_##TYPE##_##SIZE##hv_mmi(uint8_t *dst, ptrdiff_t dststride,    \
+                                         const uint8_t *src,                   \
+                                         ptrdiff_t srcstride,                  \
+                                         int h, int mx, int my)                \
+{                                                                              \
+    const uint16_t *hfilter = vp9_subpel_filters_mmi[TYPE_IDX][mx-1];          \
+    const uint16_t *vfilter = vp9_subpel_filters_mmi[TYPE_IDX][my-1];          \
+                                                                               \
+    uint8_t temp1[64 * 64];                                                    \
+    uint8_t temp2[64 * 71];                                                    \
+    int tmp_h = h + 7;                                                         \
+    src -= (3 * srcstride);                                                    \
+    convolve_horiz_mmi(src, srcstride, temp2, 64, hfilter, SIZE, tmp_h);       \
+    convolve_vert_mmi(temp2, 64, temp1, 64, vfilter, SIZE, h);                 \
+    convolve_avg_mmi(temp1, 64, dst, dststride, SIZE, h);                      \
+}
+
+VP9_8TAP_MIPS_MMI_FUNC(64, regular, FILTER_8TAP_REGULAR);
+VP9_8TAP_MIPS_MMI_FUNC(32, regular, FILTER_8TAP_REGULAR);
+VP9_8TAP_MIPS_MMI_FUNC(16, regular, FILTER_8TAP_REGULAR);
+VP9_8TAP_MIPS_MMI_FUNC(8, regular, FILTER_8TAP_REGULAR);
+VP9_8TAP_MIPS_MMI_FUNC(4, regular, FILTER_8TAP_REGULAR);
+
+VP9_8TAP_MIPS_MMI_FUNC(64, sharp, FILTER_8TAP_SHARP);
+VP9_8TAP_MIPS_MMI_FUNC(32, sharp, FILTER_8TAP_SHARP);
+VP9_8TAP_MIPS_MMI_FUNC(16, sharp, FILTER_8TAP_SHARP);
+VP9_8TAP_MIPS_MMI_FUNC(8, sharp, FILTER_8TAP_SHARP);
+VP9_8TAP_MIPS_MMI_FUNC(4, sharp, FILTER_8TAP_SHARP);
+
+VP9_8TAP_MIPS_MMI_FUNC(64, smooth, FILTER_8TAP_SMOOTH);
+VP9_8TAP_MIPS_MMI_FUNC(32, smooth, FILTER_8TAP_SMOOTH);
+VP9_8TAP_MIPS_MMI_FUNC(16, smooth, FILTER_8TAP_SMOOTH);
+VP9_8TAP_MIPS_MMI_FUNC(8, smooth, FILTER_8TAP_SMOOTH);
+VP9_8TAP_MIPS_MMI_FUNC(4, smooth, FILTER_8TAP_SMOOTH);
+
+#undef VP9_8TAP_MIPS_MMI_FUNC
diff --git a/libavcodec/mips/vp9dsp_init_mips.c b/libavcodec/mips/vp9dsp_init_mips.c
index c8a48908af30e..5990fa69528f0 100644
--- a/libavcodec/mips/vp9dsp_init_mips.c
+++ b/libavcodec/mips/vp9dsp_init_mips.c
@@ -168,8 +168,50 @@ static av_cold void vp9dsp_init_msa(VP9DSPContext *dsp, int bpp)
 }
 #endif  // #if HAVE_MSA
 
+#if HAVE_MMI
+static av_cold void vp9dsp_mc_init_mmi(VP9DSPContext *dsp)
+{
+#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type)  \
+    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][idxh][idxv] =   \
+        ff_##type##_8tap_smooth_##sz##dir##_mmi;             \
+    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] =   \
+        ff_##type##_8tap_regular_##sz##dir##_mmi;            \
+    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][idxh][idxv] =   \
+        ff_##type##_8tap_sharp_##sz##dir##_mmi;
+
+#define init_subpel2(idx, idxh, idxv, dir, type)      \
+    init_subpel1(0, idx, idxh, idxv, 64, dir, type);  \
+    init_subpel1(1, idx, idxh, idxv, 32, dir, type);  \
+    init_subpel1(2, idx, idxh, idxv, 16, dir, type);  \
+    init_subpel1(3, idx, idxh, idxv,  8, dir, type);  \
+    init_subpel1(4, idx, idxh, idxv,  4, dir, type)
+
+#define init_subpel3(idx, type)         \
+    init_subpel2(idx, 1, 1, hv, type);  \
+    init_subpel2(idx, 0, 1, v, type);   \
+    init_subpel2(idx, 1, 0, h, type)
+
+    init_subpel3(0, put);
+    init_subpel3(1, avg);
+
+#undef init_subpel1
+#undef init_subpel2
+#undef init_subpel3
+}
+
+static av_cold void vp9dsp_init_mmi(VP9DSPContext *dsp, int bpp)
+{
+    if (bpp == 8) {
+        vp9dsp_mc_init_mmi(dsp);
+    }
+}
+#endif  // #if HAVE_MMI
+
 av_cold void ff_vp9dsp_init_mips(VP9DSPContext *dsp, int bpp)
 {
+#if HAVE_MMI
+    vp9dsp_init_mmi(dsp, bpp);
+#endif  // #if HAVE_MMI
 #if HAVE_MSA
     vp9dsp_init_msa(dsp, bpp);
 #endif  // #if HAVE_MSA
diff --git a/libavcodec/mips/vp9dsp_mips.h b/libavcodec/mips/vp9dsp_mips.h
index 4d7303888d023..0b6ce7cd7d8cc 100644
--- a/libavcodec/mips/vp9dsp_mips.h
+++ b/libavcodec/mips/vp9dsp_mips.h
@@ -234,4 +234,54 @@ void ff_tm_16x16_msa(uint8_t *dst, ptrdiff_t stride, const uint8_t *left,
 void ff_tm_32x32_msa(uint8_t *dst, ptrdiff_t stride, const uint8_t *left,
                      const uint8_t *top);
 
+#define VP9_8TAP_MIPS_MMI_FUNC(SIZE, type, type_idx)                         \
+void ff_put_8tap_##type##_##SIZE##h_mmi(uint8_t *dst, ptrdiff_t dststride,   \
+                                        const uint8_t *src,                  \
+                                        ptrdiff_t srcstride,                 \
+                                        int h, int mx, int my);              \
+                                                                             \
+void ff_put_8tap_##type##_##SIZE##v_mmi(uint8_t *dst, ptrdiff_t dststride,   \
+                                        const uint8_t *src,                  \
+                                        ptrdiff_t srcstride,                 \
+                                        int h, int mx, int my);              \
+                                                                             \
+void ff_put_8tap_##type##_##SIZE##hv_mmi(uint8_t *dst, ptrdiff_t dststride,  \
+                                         const uint8_t *src,                 \
+                                         ptrdiff_t srcstride,                \
+                                         int h, int mx, int my);             \
+                                                                             \
+void ff_avg_8tap_##type##_##SIZE##h_mmi(uint8_t *dst, ptrdiff_t dststride,   \
+                                        const uint8_t *src,                  \
+                                        ptrdiff_t srcstride,                 \
+                                        int h, int mx, int my);              \
+                                                                             \
+void ff_avg_8tap_##type##_##SIZE##v_mmi(uint8_t *dst, ptrdiff_t dststride,   \
+                                        const uint8_t *src,                  \
+                                        ptrdiff_t srcstride,                 \
+                                        int h, int mx, int my);              \
+                                                                             \
+void ff_avg_8tap_##type##_##SIZE##hv_mmi(uint8_t *dst, ptrdiff_t dststride,  \
+                                         const uint8_t *src,                 \
+                                         ptrdiff_t srcstride,                \
+                                         int h, int mx, int my);
+
+VP9_8TAP_MIPS_MMI_FUNC(64, regular, FILTER_8TAP_REGULAR);
+VP9_8TAP_MIPS_MMI_FUNC(32, regular, FILTER_8TAP_REGULAR);
+VP9_8TAP_MIPS_MMI_FUNC(16, regular, FILTER_8TAP_REGULAR);
+VP9_8TAP_MIPS_MMI_FUNC(8, regular, FILTER_8TAP_REGULAR);
+VP9_8TAP_MIPS_MMI_FUNC(4, regular, FILTER_8TAP_REGULAR);
+
+VP9_8TAP_MIPS_MMI_FUNC(64, sharp, FILTER_8TAP_SHARP);
+VP9_8TAP_MIPS_MMI_FUNC(32, sharp, FILTER_8TAP_SHARP);
+VP9_8TAP_MIPS_MMI_FUNC(16, sharp, FILTER_8TAP_SHARP);
+VP9_8TAP_MIPS_MMI_FUNC(8, sharp, FILTER_8TAP_SHARP);
+VP9_8TAP_MIPS_MMI_FUNC(4, sharp, FILTER_8TAP_SHARP);
+
+VP9_8TAP_MIPS_MMI_FUNC(64, smooth, FILTER_8TAP_SMOOTH);
+VP9_8TAP_MIPS_MMI_FUNC(32, smooth, FILTER_8TAP_SMOOTH);
+VP9_8TAP_MIPS_MMI_FUNC(16, smooth, FILTER_8TAP_SMOOTH);
+VP9_8TAP_MIPS_MMI_FUNC(8, smooth, FILTER_8TAP_SMOOTH);
+VP9_8TAP_MIPS_MMI_FUNC(4, smooth, FILTER_8TAP_SMOOTH);
+#undef VP9_8TAP_MIPS_MMI_FUNC
+
 #endif  // #ifndef AVCODEC_MIPS_VP9DSP_MIPS_H
diff --git a/libavcodec/mjpegbdec.c b/libavcodec/mjpegbdec.c
index a858707d54586..37d7bb82286d8 100644
--- a/libavcodec/mjpegbdec.c
+++ b/libavcodec/mjpegbdec.c
@@ -70,8 +70,7 @@ static int mjpegb_decode_frame(AVCodecContext *avctx,
 
     skip_bits(&hgb, 32); /* reserved zeros */
 
-    if (get_bits_long(&hgb, 32) != MKBETAG('m','j','p','g'))
-    {
+    if (get_bits_long(&hgb, 32) != MKBETAG('m','j','p','g')) {
         av_log(avctx, AV_LOG_WARNING, "not mjpeg-b (bad fourcc)\n");
         return AVERROR_INVALIDDATA;
     }
@@ -85,19 +84,17 @@ static int mjpegb_decode_frame(AVCodecContext *avctx,
 
     dqt_offs = read_offs(avctx, &hgb, buf_end - buf_ptr, "dqt is %d and size is %d\n");
     av_log(avctx, AV_LOG_DEBUG, "dqt offs: 0x%"PRIx32"\n", dqt_offs);
-    if (dqt_offs)
-    {
+    if (dqt_offs) {
         init_get_bits(&s->gb, buf_ptr+dqt_offs, (buf_end - (buf_ptr+dqt_offs))*8);
         s->start_code = DQT;
-        if (ff_mjpeg_decode_dqt(s) < 0 &&
-            (avctx->err_recognition & AV_EF_EXPLODE))
-          return AVERROR_INVALIDDATA;
+        ret = ff_mjpeg_decode_dqt(s);
+        if (ret < 0 && (avctx->err_recognition & AV_EF_EXPLODE))
+            return ret;
     }
 
     dht_offs = read_offs(avctx, &hgb, buf_end - buf_ptr, "dht is %d and size is %d\n");
     av_log(avctx, AV_LOG_DEBUG, "dht offs: 0x%"PRIx32"\n", dht_offs);
-    if (dht_offs)
-    {
+    if (dht_offs) {
         init_get_bits(&s->gb, buf_ptr+dht_offs, (buf_end - (buf_ptr+dht_offs))*8);
         s->start_code = DHT;
         ff_mjpeg_decode_dht(s);
@@ -105,37 +102,34 @@ static int mjpegb_decode_frame(AVCodecContext *avctx,
 
     sof_offs = read_offs(avctx, &hgb, buf_end - buf_ptr, "sof is %d and size is %d\n");
     av_log(avctx, AV_LOG_DEBUG, "sof offs: 0x%"PRIx32"\n", sof_offs);
-    if (sof_offs)
-    {
+    if (sof_offs) {
         init_get_bits(&s->gb, buf_ptr+sof_offs, (buf_end - (buf_ptr+sof_offs))*8);
         s->start_code = SOF0;
-        if (ff_mjpeg_decode_sof(s) < 0)
-            return -1;
+        if ((ret = ff_mjpeg_decode_sof(s)) < 0)
+            return ret;
     }
 
     sos_offs = read_offs(avctx, &hgb, buf_end - buf_ptr, "sos is %d and size is %d\n");
     av_log(avctx, AV_LOG_DEBUG, "sos offs: 0x%"PRIx32"\n", sos_offs);
     sod_offs = read_offs(avctx, &hgb, buf_end - buf_ptr, "sof is %d and size is %d\n");
     av_log(avctx, AV_LOG_DEBUG, "sod offs: 0x%"PRIx32"\n", sod_offs);
-    if (sos_offs)
-    {
+    if (sos_offs) {
         init_get_bits(&s->gb, buf_ptr + sos_offs,
                       8 * FFMIN(field_size, buf_end - buf_ptr - sos_offs));
         s->mjpb_skiptosod = (sod_offs - sos_offs - show_bits(&s->gb, 16));
         s->start_code = SOS;
-        if (ff_mjpeg_decode_sos(s, NULL, 0, NULL) < 0 &&
-            (avctx->err_recognition & AV_EF_EXPLODE))
-          return AVERROR_INVALIDDATA;
+        ret = ff_mjpeg_decode_sos(s, NULL, 0, NULL);
+        if (ret < 0 && (avctx->err_recognition & AV_EF_EXPLODE))
+            return ret;
     }
 
     if (s->interlaced) {
         s->bottom_field ^= 1;
         /* if not bottom field, do not output image yet */
-        if (s->bottom_field != s->interlace_polarity && second_field_offs)
-        {
+        if (s->bottom_field != s->interlace_polarity && second_field_offs) {
             buf_ptr = buf + second_field_offs;
             goto read_header;
-            }
+        }
     }
 
     //XXX FIXME factorize, this looks very similar to the EOI code
diff --git a/libavcodec/mjpegdec.c b/libavcodec/mjpegdec.c
index b0cb3ffc83925..e82c185433017 100644
--- a/libavcodec/mjpegdec.c
+++ b/libavcodec/mjpegdec.c
@@ -43,6 +43,7 @@
 #include "mjpeg.h"
 #include "mjpegdec.h"
 #include "jpeglsdec.h"
+#include "profiles.h"
 #include "put_bits.h"
 #include "tiff.h"
 #include "exif.h"
@@ -72,34 +73,45 @@ static int build_vlc(VLC *vlc, const uint8_t *bits_table,
                               huff_code, 2, 2, huff_sym, 2, 2, use_static);
 }
 
-static int build_basic_mjpeg_vlc(MJpegDecodeContext *s)
+static int init_default_huffman_tables(MJpegDecodeContext *s)
 {
-    int ret;
-
-    if ((ret = build_vlc(&s->vlcs[0][0], avpriv_mjpeg_bits_dc_luminance,
-                         avpriv_mjpeg_val_dc, 12, 0, 0)) < 0)
-        return ret;
-
-    if ((ret = build_vlc(&s->vlcs[0][1], avpriv_mjpeg_bits_dc_chrominance,
-                         avpriv_mjpeg_val_dc, 12, 0, 0)) < 0)
-        return ret;
-
-    if ((ret = build_vlc(&s->vlcs[1][0], avpriv_mjpeg_bits_ac_luminance,
-                         avpriv_mjpeg_val_ac_luminance, 251, 0, 1)) < 0)
-        return ret;
-
-    if ((ret = build_vlc(&s->vlcs[1][1], avpriv_mjpeg_bits_ac_chrominance,
-                         avpriv_mjpeg_val_ac_chrominance, 251, 0, 1)) < 0)
-        return ret;
-
-    if ((ret = build_vlc(&s->vlcs[2][0], avpriv_mjpeg_bits_ac_luminance,
-                         avpriv_mjpeg_val_ac_luminance, 251, 0, 0)) < 0)
-        return ret;
-
-    if ((ret = build_vlc(&s->vlcs[2][1], avpriv_mjpeg_bits_ac_chrominance,
-                         avpriv_mjpeg_val_ac_chrominance, 251, 0, 0)) < 0)
-        return ret;
+    static const struct {
+        int class;
+        int index;
+        const uint8_t *bits;
+        const uint8_t *values;
+        int codes;
+        int length;
+    } ht[] = {
+        { 0, 0, avpriv_mjpeg_bits_dc_luminance,
+                avpriv_mjpeg_val_dc, 12, 12 },
+        { 0, 1, avpriv_mjpeg_bits_dc_chrominance,
+                avpriv_mjpeg_val_dc, 12, 12 },
+        { 1, 0, avpriv_mjpeg_bits_ac_luminance,
+                avpriv_mjpeg_val_ac_luminance,   251, 162 },
+        { 1, 1, avpriv_mjpeg_bits_ac_chrominance,
+                avpriv_mjpeg_val_ac_chrominance, 251, 162 },
+        { 2, 0, avpriv_mjpeg_bits_ac_luminance,
+                avpriv_mjpeg_val_ac_luminance,   251, 162 },
+        { 2, 1, avpriv_mjpeg_bits_ac_chrominance,
+                avpriv_mjpeg_val_ac_chrominance, 251, 162 },
+    };
+    int i, ret;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(ht); i++) {
+        ret = build_vlc(&s->vlcs[ht[i].class][ht[i].index],
+                        ht[i].bits, ht[i].values, ht[i].codes,
+                        0, ht[i].class == 1);
+        if (ret < 0)
+            return ret;
 
+        if (ht[i].class < 2) {
+            memcpy(s->raw_huffman_lengths[ht[i].class][ht[i].index],
+                   ht[i].bits + 1, 16);
+            memcpy(s->raw_huffman_values[ht[i].class][ht[i].index],
+                   ht[i].values, ht[i].length);
+        }
+    }
 
     return 0;
 }
@@ -150,7 +162,7 @@ av_cold int ff_mjpeg_decode_init(AVCodecContext *avctx)
     avctx->colorspace = AVCOL_SPC_BT470BG;
     s->hwaccel_pix_fmt = s->hwaccel_sw_pix_fmt = AV_PIX_FMT_NONE;
 
-    if ((ret = build_basic_mjpeg_vlc(s)) < 0)
+    if ((ret = init_default_huffman_tables(s)) < 0)
         return ret;
 
     if (s->extern_huff) {
@@ -160,7 +172,7 @@ av_cold int ff_mjpeg_decode_init(AVCodecContext *avctx)
         if (ff_mjpeg_decode_dht(s)) {
             av_log(avctx, AV_LOG_ERROR,
                    "error using external huffman table, switching back to internal\n");
-            build_basic_mjpeg_vlc(s);
+            init_default_huffman_tables(s);
         }
     }
     if (avctx->field_order == AV_FIELD_BB) { /* quicktime icefloe 019 */
@@ -301,7 +313,6 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
     memset(s->upscale_h, 0, sizeof(s->upscale_h));
     memset(s->upscale_v, 0, sizeof(s->upscale_v));
 
-    /* XXX: verify len field validity */
     len     = get_bits(&s->gb, 16);
     bits    = get_bits(&s->gb, 8);
 
@@ -355,6 +366,11 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
                                       "bits/component or 16-bit gray");
         return AVERROR_PATCHWELCOME;
     }
+    if (len != 8 + 3 * nb_components) {
+        av_log(s->avctx, AV_LOG_ERROR, "decode_sof0: error, len(%d) mismatch %d components\n", len, nb_components);
+        return AVERROR_INVALIDDATA;
+    }
+
     s->nb_components = nb_components;
     s->h_max         = 1;
     s->v_max         = 1;
@@ -476,7 +492,8 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
         if (s->rgb)
             s->avctx->pix_fmt = s->bits <= 9 ? AV_PIX_FMT_BGR24 : AV_PIX_FMT_BGR48;
         else {
-            if (s->component_id[0] == 'Q' && s->component_id[1] == 'F' && s->component_id[2] == 'A') {
+            if (   s->adobe_transform == 0
+                || s->component_id[0] == 'R' - 1 && s->component_id[1] == 'G' - 1 && s->component_id[2] == 'B' - 1) {
                 s->avctx->pix_fmt = s->bits <= 8 ? AV_PIX_FMT_GBRP : AV_PIX_FMT_GBRP16;
             } else {
                 if (s->bits <= 8) s->avctx->pix_fmt = s->cs_itu601 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_YUVJ444P;
@@ -699,8 +716,6 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
             s->width, s->height, s->linesize[0], s->linesize[1],
             s->interlaced, s->avctx->height);
 
-    if (len != (8 + (3 * nb_components)))
-        av_log(s->avctx, AV_LOG_DEBUG, "decode_sof0: error, len(%d) mismatch\n", len);
     }
 
     if ((s->rgb && !s->lossless && !s->ls) ||
@@ -1206,25 +1221,25 @@ static int ljpeg_decode_yuv_scan(MJpegDecodeContext *s, int predictor,
                             || v * mb_y + y >= s->height) {
                             // Nothing to do
                         } else if (bits<=8) {
-                        ptr = s->picture_ptr->data[c] + (linesize * (v * mb_y + y)) + (h * mb_x + x); //FIXME optimize this crap
-                        if(y==0 && toprow){
-                            if(x==0 && leftcol){
-                                pred= 1 << (bits - 1);
-                            }else{
-                                pred= ptr[-1];
-                            }
-                        }else{
-                            if(x==0 && leftcol){
-                                pred= ptr[-linesize];
+                            ptr = s->picture_ptr->data[c] + (linesize * (v * mb_y + y)) + (h * mb_x + x); //FIXME optimize this crap
+                            if(y==0 && toprow){
+                                if(x==0 && leftcol){
+                                    pred= 1 << (bits - 1);
+                                }else{
+                                    pred= ptr[-1];
+                                }
                             }else{
-                                PREDICT(pred, ptr[-linesize-1], ptr[-linesize], ptr[-1], predictor);
+                                if(x==0 && leftcol){
+                                    pred= ptr[-linesize];
+                                }else{
+                                    PREDICT(pred, ptr[-linesize-1], ptr[-linesize], ptr[-1], predictor);
+                                }
                             }
-                        }
 
-                        if (s->interlaced && s->bottom_field)
-                            ptr += linesize >> 1;
-                        pred &= mask;
-                        *ptr= pred + ((unsigned)dc << point_transform);
+                            if (s->interlaced && s->bottom_field)
+                                ptr += linesize >> 1;
+                            pred &= mask;
+                            *ptr= pred + ((unsigned)dc << point_transform);
                         }else{
                             ptr16 = (uint16_t*)(s->picture_ptr->data[c] + 2*(linesize * (v * mb_y + y)) + 2*(h * mb_x + x)); //FIXME optimize this crap
                             if(y==0 && toprow){
@@ -1888,6 +1903,7 @@ static int mjpeg_decode_app(MJpegDecodeContext *s)
         type   = get_bits(&s->gb, 8);
         len -= 4;
 
+        av_freep(&s->stereo3d);
         s->stereo3d = av_stereo3d_alloc();
         if (!s->stereo3d) {
             goto out;
@@ -2796,6 +2812,7 @@ AVCodec ff_mjpeg_decoder = {
     .capabilities   = AV_CODEC_CAP_DR1,
     .max_lowres     = 3,
     .priv_class     = &mjpegdec_class,
+    .profiles       = NULL_IF_CONFIG_SMALL(ff_mjpeg_profiles),
     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE |
                       FF_CODEC_CAP_SKIP_FRAME_FILL_PARAM,
     .hw_configs     = (const AVCodecHWConfigInternal*[]) {
diff --git a/libavcodec/mjpegenc.c b/libavcodec/mjpegenc.c
index d2fcb8e19199a..0ea7bd3d1030e 100644
--- a/libavcodec/mjpegenc.c
+++ b/libavcodec/mjpegenc.c
@@ -38,6 +38,7 @@
 #include "mpegvideo.h"
 #include "mjpeg.h"
 #include "mjpegenc.h"
+#include "profiles.h"
 
 static int alloc_huffman(MpegEncContext *s)
 {
@@ -418,6 +419,7 @@ AVCodec ff_mjpeg_encoder = {
         AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_NONE
     },
     .priv_class     = &mjpeg_class,
+    .profiles       = NULL_IF_CONFIG_SMALL(ff_mjpeg_profiles),
 };
 #endif
 
diff --git a/libavcodec/mlpdec.c b/libavcodec/mlpdec.c
index 8caa266b7e723..3139a0172fa0f 100644
--- a/libavcodec/mlpdec.c
+++ b/libavcodec/mlpdec.c
@@ -1195,7 +1195,7 @@ static int read_access_unit(AVCodecContext *avctx, void* data,
         }
 
         if (length < header_size + substr_header_size) {
-            av_log(m->avctx, AV_LOG_ERROR, "Insuffient data for headers\n");
+            av_log(m->avctx, AV_LOG_ERROR, "Insufficient data for headers\n");
             goto error;
         }
 
diff --git a/libavcodec/motion_est.c b/libavcodec/motion_est.c
index 8b5ce2117afc8..759eea479d598 100644
--- a/libavcodec/motion_est.c
+++ b/libavcodec/motion_est.c
@@ -633,7 +633,7 @@ static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift)
                 if(P[i][1] > (c->ymax<<shift)) P[i][1]= (c->ymax<<shift);
             }
 
-        dmin4 = epzs_motion_search4(s, &mx4, &my4, P, block, block, s->p_mv_table, (1<<16)>>shift);
+        dmin4 = epzs_motion_search2(s, &mx4, &my4, P, block, block, s->p_mv_table, (1<<16)>>shift, 1);
 
         dmin4= c->sub_motion_search(s, &mx4, &my4, dmin4, block, block, size, h);
 
@@ -795,7 +795,7 @@ static int interlaced_search(MpegEncContext *s, int ref_index,
             P_MV1[0]= mx; //FIXME not correct if block != field_select
             P_MV1[1]= my / 2;
 
-            dmin = epzs_motion_search2(s, &mx_i, &my_i, P, block, field_select+ref_index, mv_table, (1<<16)>>1);
+            dmin = epzs_motion_search2(s, &mx_i, &my_i, P, block, field_select+ref_index, mv_table, (1<<16)>>1, 0);
 
             dmin= c->sub_motion_search(s, &mx_i, &my_i, dmin, block, field_select+ref_index, size, h);
 
diff --git a/libavcodec/motion_est_template.c b/libavcodec/motion_est_template.c
index 0c21bbfe1a4ab..014038e54fc46 100644
--- a/libavcodec/motion_est_template.c
+++ b/libavcodec/motion_est_template.c
@@ -989,76 +989,16 @@ int ff_epzs_motion_search(MpegEncContext *s, int *mx_ptr, int *my_ptr,
     }
 }
 
-static int epzs_motion_search4(MpegEncContext * s,
-                             int *mx_ptr, int *my_ptr, int P[10][2],
-                             int src_index, int ref_index, int16_t (*last_mv)[2],
-                             int ref_mv_scale)
-{
-    MotionEstContext * const c= &s->me;
-    int best[2]={0, 0};
-    int d, dmin;
-    unsigned map_generation;
-    const int penalty_factor= c->penalty_factor;
-    const int size=1;
-    const int h=8;
-    const int ref_mv_stride= s->mb_stride;
-    const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
-    me_cmp_func cmpf, chroma_cmpf;
-    LOAD_COMMON
-    int flags= c->flags;
-    LOAD_COMMON2
-
-    cmpf        = s->mecc.me_cmp[size];
-    chroma_cmpf = s->mecc.me_cmp[size + 1];
-
-    map_generation= update_map_generation(c);
-
-    dmin = 1000000;
-
-    /* first line */
-    if (s->first_slice_line) {
-        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
-        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
-                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
-        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
-    }else{
-        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
-        //FIXME try some early stop
-        CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
-        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
-        CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
-        CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
-        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
-                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
-    }
-    if(dmin>64*4){
-        CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
-                        (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
-        if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
-            CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
-                            (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
-    }
-
-    dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
-
-    *mx_ptr= best[0];
-    *my_ptr= best[1];
-
-    return dmin;
-}
-
-//try to merge with above FIXME (needs PSNR test)
 static int epzs_motion_search2(MpegEncContext * s,
                              int *mx_ptr, int *my_ptr, int P[10][2],
                              int src_index, int ref_index, int16_t (*last_mv)[2],
-                             int ref_mv_scale)
+                             int ref_mv_scale, const int size)
 {
     MotionEstContext * const c= &s->me;
     int best[2]={0, 0};
     int d, dmin;
     unsigned map_generation;
     const int penalty_factor= c->penalty_factor;
-    const int size=0; //FIXME pass as arg
     const int h=8;
     const int ref_mv_stride= s->mb_stride;
     const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
diff --git a/libavcodec/mpeg2_metadata_bsf.c b/libavcodec/mpeg2_metadata_bsf.c
index e787cb37824ee..ba3a74afda692 100644
--- a/libavcodec/mpeg2_metadata_bsf.c
+++ b/libavcodec/mpeg2_metadata_bsf.c
@@ -214,7 +214,7 @@ static int mpeg2_metadata_filter(AVBSFContext *bsf, AVPacket *out)
 
     err = 0;
 fail:
-    ff_cbs_fragment_uninit(ctx->cbc, frag);
+    ff_cbs_fragment_reset(ctx->cbc, frag);
 
     if (err < 0)
         av_packet_unref(out);
@@ -255,13 +255,15 @@ static int mpeg2_metadata_init(AVBSFContext *bsf)
 
     err = 0;
 fail:
-    ff_cbs_fragment_uninit(ctx->cbc, frag);
+    ff_cbs_fragment_reset(ctx->cbc, frag);
     return err;
 }
 
 static void mpeg2_metadata_close(AVBSFContext *bsf)
 {
     MPEG2MetadataContext *ctx = bsf->priv_data;
+
+    ff_cbs_fragment_free(ctx->cbc, &ctx->fragment);
     ff_cbs_close(&ctx->cbc);
 }
 
diff --git a/libavcodec/mpeg4_unpack_bframes_bsf.c b/libavcodec/mpeg4_unpack_bframes_bsf.c
index e9c535f3907f4..1daf133ce5ece 100644
--- a/libavcodec/mpeg4_unpack_bframes_bsf.c
+++ b/libavcodec/mpeg4_unpack_bframes_bsf.c
@@ -21,50 +21,36 @@
 
 #include "avcodec.h"
 #include "bsf.h"
+#include "internal.h"
 #include "mpeg4video.h"
 
 typedef struct UnpackBFramesBSFContext {
     AVPacket *b_frame;
 } UnpackBFramesBSFContext;
 
-/* search next start code */
-static unsigned int find_startcode(const uint8_t *buf, int buf_size, int *pos)
-{
-    unsigned int startcode = 0xFF;
-
-    for (; *pos < buf_size;) {
-        startcode = ((startcode << 8) | buf[*pos]) & 0xFFFFFFFF;
-        *pos +=1;
-        if ((startcode & 0xFFFFFF00) != 0x100)
-            continue;  /* no startcode */
-        return startcode;
-    }
-
-    return 0;
-}
-
 /* determine the position of the packed marker in the userdata,
  * the number of VOPs and the position of the second VOP */
 static void scan_buffer(const uint8_t *buf, int buf_size,
                         int *pos_p, int *nb_vop, int *pos_vop2) {
-    unsigned int startcode;
-    int pos, i;
+    uint32_t startcode;
+    const uint8_t *end = buf + buf_size, *pos = buf;
 
-    for (pos = 0; pos < buf_size;) {
-        startcode = find_startcode(buf, buf_size, &pos);
+    while (pos < end) {
+        startcode = -1;
+        pos = avpriv_find_start_code(pos, end, &startcode);
 
         if (startcode == USER_DATA_STARTCODE && pos_p) {
             /* check if the (DivX) userdata string ends with 'p' (packed) */
-            for (i = 0; i < 255 && pos + i + 1 < buf_size; i++) {
-                if (buf[pos + i] == 'p' && buf[pos + i + 1] == '\0') {
-                    *pos_p = pos + i;
+            for (int i = 0; i < 255 && pos + i + 1 < end; i++) {
+                if (pos[i] == 'p' && pos[i + 1] == '\0') {
+                    *pos_p = pos + i - buf;
                     break;
                 }
             }
         } else if (startcode == VOP_STARTCODE && nb_vop) {
             *nb_vop += 1;
             if (*nb_vop == 2 && pos_vop2) {
-                *pos_vop2 = pos - 4; /* subtract 4 bytes startcode */
+                *pos_vop2 = pos - buf - 4; /* subtract 4 bytes startcode */
             }
         }
     }
diff --git a/libavcodec/mpeg4video.h b/libavcodec/mpeg4video.h
index dd0a59038d497..1a5da3192864c 100644
--- a/libavcodec/mpeg4video.h
+++ b/libavcodec/mpeg4video.h
@@ -163,7 +163,7 @@ void ff_mpeg4_pred_ac(MpegEncContext *s, int16_t *block, int n,
 void ff_set_mpeg4_time(MpegEncContext *s);
 int ff_mpeg4_encode_picture_header(MpegEncContext *s, int picture_number);
 
-int ff_mpeg4_decode_picture_header(Mpeg4DecContext *ctx, GetBitContext *gb);
+int ff_mpeg4_decode_picture_header(Mpeg4DecContext *ctx, GetBitContext *gb, int header);
 void ff_mpeg4_encode_video_packet_header(MpegEncContext *s);
 void ff_mpeg4_clean_buffers(MpegEncContext *s);
 void ff_mpeg4_stuffing(PutBitContext *pbc);
diff --git a/libavcodec/mpeg4video_parser.c b/libavcodec/mpeg4video_parser.c
index 9ebb09a63e7ad..9ca0f1497679c 100644
--- a/libavcodec/mpeg4video_parser.c
+++ b/libavcodec/mpeg4video_parser.c
@@ -89,13 +89,13 @@ static int mpeg4_decode_header(AVCodecParserContext *s1, AVCodecContext *avctx,
 
     if (avctx->extradata_size && pc->first_picture) {
         init_get_bits(gb, avctx->extradata, avctx->extradata_size * 8);
-        ret = ff_mpeg4_decode_picture_header(dec_ctx, gb);
-        if (ret < -1)
+        ret = ff_mpeg4_decode_picture_header(dec_ctx, gb, 1);
+        if (ret < 0)
             av_log(avctx, AV_LOG_WARNING, "Failed to parse extradata\n");
     }
 
     init_get_bits(gb, buf, 8 * buf_size);
-    ret = ff_mpeg4_decode_picture_header(dec_ctx, gb);
+    ret = ff_mpeg4_decode_picture_header(dec_ctx, gb, 0);
     if (s->width && (!avctx->width || !avctx->height ||
                      !avctx->coded_width || !avctx->coded_height)) {
         ret = ff_set_dimensions(avctx, s->width, s->height);
diff --git a/libavcodec/mpeg4videodec.c b/libavcodec/mpeg4videodec.c
index f435a520c8b42..b6f2ae7b7b522 100644
--- a/libavcodec/mpeg4videodec.c
+++ b/libavcodec/mpeg4videodec.c
@@ -402,7 +402,7 @@ static int mpeg4_decode_sprite_trajectory(Mpeg4DecContext *ctx, GetBitContext *g
                 llabs(sprite_offset[0][i] + sprite_delta[i][1] * (h+16LL)) >= INT_MAX ||
                 llabs(sprite_offset[0][i] + sprite_delta[i][0] * (w+16LL) + sprite_delta[i][1] * (h+16LL)) >= INT_MAX ||
                 llabs(sprite_delta[i][0] * (w+16LL)) >= INT_MAX ||
-                llabs(sprite_delta[i][1] * (w+16LL)) >= INT_MAX ||
+                llabs(sprite_delta[i][1] * (h+16LL)) >= INT_MAX ||
                 llabs(sd[0]) >= INT_MAX ||
                 llabs(sd[1]) >= INT_MAX ||
                 llabs(sprite_offset[0][i] + sd[0] * (w+16LL)) >= INT_MAX ||
@@ -1899,14 +1899,20 @@ static int mpeg4_decode_studio_block(MpegEncContext *s, int32_t block[64], int n
             code >>= 1;
             run = (1 << (additional_code_len - 1)) + code;
             idx += run;
+            if (idx > 63)
+                return AVERROR_INVALIDDATA;
             j = scantable[idx++];
             block[j] = sign ? 1 : -1;
         } else if (group >= 13 && group <= 20) {
             /* Level value (Table B.49) */
+            if (idx > 63)
+                return AVERROR_INVALIDDATA;
             j = scantable[idx++];
             block[j] = get_xbits(&s->gb, additional_code_len);
         } else if (group == 21) {
             /* Escape */
+            if (idx > 63)
+                return AVERROR_INVALIDDATA;
             j = scantable[idx++];
             additional_code_len = s->avctx->bits_per_raw_sample + s->dct_precision + 4;
             flc = get_bits(&s->gb, additional_code_len);
@@ -3056,6 +3062,8 @@ static int decode_studio_vop_header(Mpeg4DecContext *ctx, GetBitContext *gb)
     if (get_bits_left(gb) <= 32)
         return 0;
 
+    s->partitioned_frame = 0;
+    s->interlaced_dct = 0;
     s->decode_mb = mpeg4_decode_studio_mb;
 
     decode_smpte_tc(ctx, gb);
@@ -3201,11 +3209,13 @@ static int decode_studio_vol_header(Mpeg4DecContext *ctx, GetBitContext *gb)
 
 /**
  * Decode MPEG-4 headers.
- * @return <0 if no VOP found (or a damaged one)
+ *
+ * @param  header If set the absence of a VOP is not treated as error; otherwise, it is treated as such.
+ * @return <0 if an error occured
  *         FRAME_SKIPPED if a not coded VOP is found
- *         0 if a VOP is found
+ *         0 else
  */
-int ff_mpeg4_decode_picture_header(Mpeg4DecContext *ctx, GetBitContext *gb)
+int ff_mpeg4_decode_picture_header(Mpeg4DecContext *ctx, GetBitContext *gb, int header)
 {
     MpegEncContext *s = &ctx->m;
     unsigned startcode, v;
@@ -3234,6 +3244,8 @@ int ff_mpeg4_decode_picture_header(Mpeg4DecContext *ctx, GetBitContext *gb)
                 (ctx->divx_version >= 0 || ctx->xvid_build >= 0) || s->codec_tag == AV_RL32("QMP4")) {
                 av_log(s->avctx, AV_LOG_VERBOSE, "frame skip %d\n", gb->size_in_bits);
                 return FRAME_SKIPPED;  // divx bug
+            } else if (header && get_bits_count(gb) == gb->size_in_bits) {
+                return 0; // ordinary return value for parsing of extradata
             } else
                 return AVERROR_INVALIDDATA;  // end of stream
         }
diff --git a/libavcodec/mpeg_er.c b/libavcodec/mpeg_er.c
index ada1a1692f339..f54cb8548ba07 100644
--- a/libavcodec/mpeg_er.c
+++ b/libavcodec/mpeg_er.c
@@ -78,6 +78,8 @@ static void mpeg_er_decode_mb(void *opaque, int ref, int mv_dir, int mv_type,
     ff_update_block_index(s);
 
     s->bdsp.clear_blocks(s->block[0]);
+    if (!s->chroma_y_shift)
+        s->bdsp.clear_blocks(s->block[6]);
 
     s->dest[0] = s->current_picture.f->data[0] +
                  s->mb_y * 16 * s->linesize +
diff --git a/libavcodec/mpegaudio_parser.c b/libavcodec/mpegaudio_parser.c
index a109f12701b14..1005e89aaeabd 100644
--- a/libavcodec/mpegaudio_parser.c
+++ b/libavcodec/mpegaudio_parser.c
@@ -101,7 +101,7 @@ static int mpegaudio_parse(AVCodecParserContext *s1,
                             "MP3ADU full parser");
                         *poutbuf = NULL;
                         *poutbuf_size = 0;
-                        return 0; /* parsers must not return error codes */
+                        return buf_size; /* parsers must not return error codes */
                     }
 
                     break;
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index d4d3bea6498f9..dbb6ab9b393b7 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -538,6 +538,8 @@ int ff_mpeg_update_thread_context(AVCodecContext *dst,
     s->avctx->width         = s1->avctx->width;
     s->avctx->height        = s1->avctx->height;
 
+    s->quarter_sample       = s1->quarter_sample;
+
     s->coded_picture_number = s1->coded_picture_number;
     s->picture_number       = s1->picture_number;
 
diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c
index 9fdab31a25fdc..ae3b131229f6b 100644
--- a/libavcodec/mpegvideo_enc.c
+++ b/libavcodec/mpegvideo_enc.c
@@ -2005,7 +2005,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
                 av_log(s->avctx, AV_LOG_ERROR,
                        "Internal error, negative bits\n");
 
-            assert(s->repeat_first_field == 0);
+            av_assert1(s->repeat_first_field == 0);
 
             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
@@ -3056,7 +3056,7 @@ static int encode_thread(AVCodecContext *c, void *arg){
                         if(r % d == 0){
                             current_packet_size=0;
                             s->pb.buf_ptr= s->ptr_lastgob;
-                            assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
+                            av_assert1(put_bits_ptr(&s->pb) == s->ptr_lastgob);
                         }
                     }
 
@@ -3592,8 +3592,8 @@ static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src)
         }
     }
 
-    assert(put_bits_count(&src->pb) % 8 ==0);
-    assert(put_bits_count(&dst->pb) % 8 ==0);
+    av_assert1(put_bits_count(&src->pb) % 8 ==0);
+    av_assert1(put_bits_count(&dst->pb) % 8 ==0);
     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
     flush_put_bits(&dst->pb);
 }
@@ -3642,11 +3642,11 @@ static void set_frame_distances(MpegEncContext * s){
 
     if(s->pict_type==AV_PICTURE_TYPE_B){
         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
-        assert(s->pb_time > 0 && s->pb_time < s->pp_time);
+        av_assert1(s->pb_time > 0 && s->pb_time < s->pp_time);
     }else{
         s->pp_time= s->time - s->last_non_b_time;
         s->last_non_b_time= s->time;
-        assert(s->picture_number==0 || s->pp_time > 0);
+        av_assert1(s->picture_number==0 || s->pp_time > 0);
     }
 }
 
diff --git a/libavcodec/msmpeg4dec.c b/libavcodec/msmpeg4dec.c
index 457a37e745806..16b67192b51ee 100644
--- a/libavcodec/msmpeg4dec.c
+++ b/libavcodec/msmpeg4dec.c
@@ -412,6 +412,14 @@ int ff_msmpeg4_decode_picture_header(MpegEncContext * s)
 {
     int code;
 
+    // at minimum one bit per macroblock is required at least in a valid frame,
+    // we discard frames much smaller than this. Frames smaller than 1/8 of the
+    // smallest "black/skip" frame generally contain not much recoverable content
+    // while at the same time they have the highest computational requirements
+    // per byte
+    if (get_bits_left(&s->gb) * 8LL < (s->width+15)/16 * ((s->height+15)/16))
+        return AVERROR_INVALIDDATA;
+
     if(s->msmpeg4_version==1){
         int start_code = get_bits_long(&s->gb, 32);
         if(start_code!=0x00000100){
diff --git a/libavcodec/msrle.c b/libavcodec/msrle.c
index adb55b1302c07..1ab8a419851f8 100644
--- a/libavcodec/msrle.c
+++ b/libavcodec/msrle.c
@@ -95,6 +95,9 @@ static int msrle_decode_frame(AVCodecContext *avctx,
     s->buf = buf;
     s->size = buf_size;
 
+    if (buf_size < 2) //Minimally a end of picture code should be there
+        return AVERROR_INVALIDDATA;
+
     if ((ret = ff_reget_buffer(avctx, s->frame)) < 0)
         return ret;
 
diff --git a/libavcodec/mss2.c b/libavcodec/mss2.c
index 3180af1d607bd..2eb366ee780c7 100644
--- a/libavcodec/mss2.c
+++ b/libavcodec/mss2.c
@@ -174,7 +174,7 @@ static int decode_pal_v2(MSS12Context *ctx, const uint8_t *buf, int buf_size)
     return 1 + ncol * 3;
 }
 
-static int decode_555(GetByteContext *gB, uint16_t *dst, ptrdiff_t stride,
+static int decode_555(AVCodecContext *avctx, GetByteContext *gB, uint16_t *dst, ptrdiff_t stride,
                       int keyframe, int w, int h)
 {
     int last_symbol = 0, repeat = 0, prev_avail = 0;
@@ -212,7 +212,7 @@ static int decode_555(GetByteContext *gB, uint16_t *dst, ptrdiff_t stride,
                     repeat = 0;
                     while (b-- > 130) {
                         if (repeat >= (INT_MAX >> 8) - 1) {
-                            av_log(NULL, AV_LOG_ERROR, "repeat overflow\n");
+                            av_log(avctx, AV_LOG_ERROR, "repeat overflow\n");
                             return AVERROR_INVALIDDATA;
                         }
                         repeat = (repeat << 8) + bytestream2_get_byte(gB) + 1;
@@ -634,7 +634,7 @@ static int mss2_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     if (is_555) {
         bytestream2_init(&gB, buf, buf_size);
 
-        if (decode_555(&gB, (uint16_t *)c->rgb_pic, c->rgb_stride >> 1,
+        if (decode_555(avctx, &gB, (uint16_t *)c->rgb_pic, c->rgb_stride >> 1,
                        keyframe, avctx->width, avctx->height))
             return AVERROR_INVALIDDATA;
 
diff --git a/libavcodec/msvideo1.c b/libavcodec/msvideo1.c
index 29700f54b66e9..de048d8b6fb9a 100644
--- a/libavcodec/msvideo1.c
+++ b/libavcodec/msvideo1.c
@@ -62,6 +62,9 @@ static av_cold int msvideo1_decode_init(AVCodecContext *avctx)
 
     s->avctx = avctx;
 
+    if (avctx->width < 4 || avctx->height < 4)
+        return AVERROR_INVALIDDATA;
+
     /* figure out the colorspace based on the presence of a palette */
     if (s->avctx->bits_per_coded_sample == 8) {
         s->mode_8bit = 1;
diff --git a/libavcodec/nuv.c b/libavcodec/nuv.c
index 32ed65899b13d..75b14bce5b0a3 100644
--- a/libavcodec/nuv.c
+++ b/libavcodec/nuv.c
@@ -365,4 +365,5 @@ AVCodec ff_nuv_decoder = {
     .close          = decode_end,
     .decode         = decode_frame,
     .capabilities   = AV_CODEC_CAP_DR1,
+    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
 };
diff --git a/libavcodec/nvdec.c b/libavcodec/nvdec.c
index e779be3a45f1c..b60da24301dff 100644
--- a/libavcodec/nvdec.c
+++ b/libavcodec/nvdec.c
@@ -26,6 +26,7 @@
 #include "libavutil/error.h"
 #include "libavutil/hwcontext.h"
 #include "libavutil/hwcontext_cuda_internal.h"
+#include "libavutil/cuda_check.h"
 #include "libavutil/pixdesc.h"
 #include "libavutil/pixfmt.h"
 
@@ -34,6 +35,11 @@
 #include "nvdec.h"
 #include "internal.h"
 
+#if !NVDECAPI_CHECK_VERSION(9, 0)
+#define cudaVideoSurfaceFormat_YUV444 2
+#define cudaVideoSurfaceFormat_YUV444_16Bit 3
+#endif
+
 typedef struct NVDECDecoder {
     CUvideodecoder decoder;
 
@@ -50,6 +56,8 @@ typedef struct NVDECFramePool {
     unsigned int nb_allocated;
 } NVDECFramePool;
 
+#define CHECK_CU(x) FF_CUDA_CHECK_DL(logctx, decoder->cudl, x)
+
 static int map_avcodec_id(enum AVCodecID id)
 {
     switch (id) {
@@ -86,7 +94,7 @@ static int map_chroma_format(enum AVPixelFormat pix_fmt)
 static int nvdec_test_capabilities(NVDECDecoder *decoder,
                                    CUVIDDECODECREATEINFO *params, void *logctx)
 {
-    CUresult err;
+    int ret;
     CUVIDDECODECAPS caps = { 0 };
 
     caps.eCodecType      = params->CodecType;
@@ -105,11 +113,9 @@ static int nvdec_test_capabilities(NVDECDecoder *decoder,
         return 0;
     }
 
-    err = decoder->cvdl->cuvidGetDecoderCaps(&caps);
-    if (err != CUDA_SUCCESS) {
-        av_log(logctx, AV_LOG_ERROR, "Failed querying decoder capabilities\n");
-        return AVERROR_UNKNOWN;
-    }
+    ret = CHECK_CU(decoder->cvdl->cuvidGetDecoderCaps(&caps));
+    if (ret < 0)
+        return ret;
 
     av_log(logctx, AV_LOG_VERBOSE, "NVDEC capabilities:\n");
     av_log(logctx, AV_LOG_VERBOSE, "format supported: %s, max_mb_count: %d\n",
@@ -149,8 +155,13 @@ static void nvdec_decoder_free(void *opaque, uint8_t *data)
 {
     NVDECDecoder *decoder = (NVDECDecoder*)data;
 
-    if (decoder->decoder)
-        decoder->cvdl->cuvidDestroyDecoder(decoder->decoder);
+    if (decoder->decoder) {
+        void *logctx = decoder->hw_device_ref->data;
+        CUcontext dummy;
+        CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx));
+        CHECK_CU(decoder->cvdl->cuvidDestroyDecoder(decoder->decoder));
+        CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
+    }
 
     av_buffer_unref(&decoder->hw_device_ref);
 
@@ -169,7 +180,6 @@ static int nvdec_decoder_create(AVBufferRef **out, AVBufferRef *hw_device_ref,
     NVDECDecoder *decoder;
 
     CUcontext dummy;
-    CUresult err;
     int ret;
 
     decoder = av_mallocz(sizeof(*decoder));
@@ -198,25 +208,21 @@ static int nvdec_decoder_create(AVBufferRef **out, AVBufferRef *hw_device_ref,
         goto fail;
     }
 
-    err = decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx);
-    if (err != CUDA_SUCCESS) {
-        ret = AVERROR_UNKNOWN;
+    ret = CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx));
+    if (ret < 0)
         goto fail;
-    }
 
     ret = nvdec_test_capabilities(decoder, params, logctx);
     if (ret < 0) {
-        decoder->cudl->cuCtxPopCurrent(&dummy);
+        CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
         goto fail;
     }
 
-    err = decoder->cvdl->cuvidCreateDecoder(&decoder->decoder, params);
+    ret = CHECK_CU(decoder->cvdl->cuvidCreateDecoder(&decoder->decoder, params));
 
-    decoder->cudl->cuCtxPopCurrent(&dummy);
+    CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
 
-    if (err != CUDA_SUCCESS) {
-        av_log(logctx, AV_LOG_ERROR, "Error creating a NVDEC decoder: %d\n", err);
-        ret = AVERROR_UNKNOWN;
+    if (ret < 0) {
         goto fail;
     }
 
@@ -273,7 +279,8 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
 
     CUVIDDECODECREATEINFO params = { 0 };
 
-    int cuvid_codec_type, cuvid_chroma_format;
+    cudaVideoSurfaceFormat output_format;
+    int cuvid_codec_type, cuvid_chroma_format, chroma_444;
     int ret = 0;
 
     sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
@@ -291,6 +298,7 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
         av_log(avctx, AV_LOG_ERROR, "Unsupported chroma format\n");
         return AVERROR(ENOSYS);
     }
+    chroma_444 = ctx->supports_444 && cuvid_chroma_format == cudaVideoChromaFormat_444;
 
     if (!avctx->hw_frames_ctx) {
         ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_CUDA);
@@ -298,6 +306,21 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
             return ret;
     }
 
+    switch (sw_desc->comp[0].depth) {
+    case 8:
+        output_format = chroma_444 ? cudaVideoSurfaceFormat_YUV444 :
+                                     cudaVideoSurfaceFormat_NV12;
+        break;
+    case 10:
+    case 12:
+        output_format = chroma_444 ? cudaVideoSurfaceFormat_YUV444_16Bit :
+                                     cudaVideoSurfaceFormat_P016;
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "Unsupported bit depth\n");
+        return AVERROR(ENOSYS);
+    }
+
     frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
 
     params.ulWidth             = avctx->coded_width;
@@ -305,8 +328,7 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
     params.ulTargetWidth       = avctx->coded_width;
     params.ulTargetHeight      = avctx->coded_height;
     params.bitDepthMinus8      = sw_desc->comp[0].depth - 8;
-    params.OutputFormat        = params.bitDepthMinus8 ?
-                                 cudaVideoSurfaceFormat_P016 : cudaVideoSurfaceFormat_NV12;
+    params.OutputFormat        = output_format;
     params.CodecType           = cuvid_codec_type;
     params.ChromaFormat        = cuvid_chroma_format;
     params.ulNumDecodeSurfaces = frames_ctx->initial_pool_size;
@@ -360,21 +382,18 @@ static void nvdec_unmap_mapped_frame(void *opaque, uint8_t *data)
 {
     NVDECFrame *unmap_data = (NVDECFrame*)data;
     NVDECDecoder *decoder = (NVDECDecoder*)unmap_data->decoder_ref->data;
+    void *logctx = decoder->hw_device_ref->data;
     CUdeviceptr devptr = (CUdeviceptr)opaque;
-    CUresult err;
+    int ret;
     CUcontext dummy;
 
-    err = decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx);
-    if (err != CUDA_SUCCESS) {
-        av_log(NULL, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
+    ret = CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx));
+    if (ret < 0)
         goto finish;
-    }
 
-    err = decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr);
-    if (err != CUDA_SUCCESS)
-        av_log(NULL, AV_LOG_ERROR, "cuvidUnmapVideoFrame failed\n");
+    CHECK_CU(decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr));
 
-    decoder->cudl->cuCtxPopCurrent(&dummy);
+    CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
 
 finish:
     av_buffer_unref(&unmap_data->idx_ref);
@@ -388,32 +407,31 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
     NVDECFrame        *cf = (NVDECFrame*)fdd->hwaccel_priv;
     NVDECDecoder *decoder = (NVDECDecoder*)cf->decoder_ref->data;
 
+    AVHWFramesContext *hwctx = (AVHWFramesContext *)frame->hw_frames_ctx->data;
+
     CUVIDPROCPARAMS vpp = { 0 };
     NVDECFrame *unmap_data = NULL;
 
-    CUresult err;
     CUcontext dummy;
     CUdeviceptr devptr;
 
     unsigned int pitch, i;
     unsigned int offset = 0;
+    int shift_h = 0, shift_v = 0;
     int ret = 0;
 
     vpp.progressive_frame = 1;
     vpp.output_stream = decoder->stream;
 
-    err = decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx);
-    if (err != CUDA_SUCCESS)
-        return AVERROR_UNKNOWN;
+    ret = CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx));
+    if (ret < 0)
+        return ret;
 
-    err = decoder->cvdl->cuvidMapVideoFrame(decoder->decoder, cf->idx, &devptr,
-                                            &pitch, &vpp);
-    if (err != CUDA_SUCCESS) {
-        av_log(logctx, AV_LOG_ERROR, "Error mapping a picture with CUVID: %d\n",
-               err);
-        ret = AVERROR_UNKNOWN;
+    ret = CHECK_CU(decoder->cvdl->cuvidMapVideoFrame(decoder->decoder,
+                                                     cf->idx, &devptr,
+                                                     &pitch, &vpp));
+    if (ret < 0)
         goto finish;
-    }
 
     unmap_data = av_mallocz(sizeof(*unmap_data));
     if (!unmap_data) {
@@ -433,24 +451,25 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
     unmap_data->idx_ref = av_buffer_ref(cf->idx_ref);
     unmap_data->decoder_ref = av_buffer_ref(cf->decoder_ref);
 
+    av_pix_fmt_get_chroma_sub_sample(hwctx->sw_format, &shift_h, &shift_v);
     for (i = 0; frame->linesize[i]; i++) {
         frame->data[i] = (uint8_t*)(devptr + offset);
         frame->linesize[i] = pitch;
-        offset += pitch * (frame->height >> (i ? 1 : 0));
+        offset += pitch * (frame->height >> (i ? shift_v : 0));
     }
 
     goto finish;
 
 copy_fail:
     if (!frame->buf[1]) {
-        decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr);
+        CHECK_CU(decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr));
         av_freep(&unmap_data);
     } else {
         av_buffer_unref(&frame->buf[1]);
     }
 
 finish:
-    decoder->cudl->cuCtxPopCurrent(&dummy);
+    CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
     return ret;
 }
 
@@ -500,9 +519,9 @@ int ff_nvdec_end_frame(AVCodecContext *avctx)
 {
     NVDECContext     *ctx = avctx->internal->hwaccel_priv_data;
     NVDECDecoder *decoder = (NVDECDecoder*)ctx->decoder_ref->data;
+    void *logctx          = avctx;
     CUVIDPICPARAMS    *pp = &ctx->pic_params;
 
-    CUresult err;
     CUcontext dummy;
 
     int ret = 0;
@@ -512,20 +531,16 @@ int ff_nvdec_end_frame(AVCodecContext *avctx)
     pp->nNumSlices        = ctx->nb_slices;
     pp->pSliceDataOffsets = ctx->slice_offsets;
 
-    err = decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx);
-    if (err != CUDA_SUCCESS)
-        return AVERROR_UNKNOWN;
+    ret = CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx));
+    if (ret < 0)
+        return ret;
 
-    err = decoder->cvdl->cuvidDecodePicture(decoder->decoder, &ctx->pic_params);
-    if (err != CUDA_SUCCESS) {
-        av_log(avctx, AV_LOG_ERROR, "Error decoding a picture with NVDEC: %d\n",
-               err);
-        ret = AVERROR_UNKNOWN;
+    ret = CHECK_CU(decoder->cvdl->cuvidDecodePicture(decoder->decoder, &ctx->pic_params));
+    if (ret < 0)
         goto finish;
-    }
 
 finish:
-    decoder->cudl->cuCtxPopCurrent(&dummy);
+    CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
 
     return ret;
 }
@@ -572,11 +587,12 @@ static AVBufferRef *nvdec_alloc_dummy(int size)
 
 int ff_nvdec_frame_params(AVCodecContext *avctx,
                           AVBufferRef *hw_frames_ctx,
-                          int dpb_size)
+                          int dpb_size,
+                          int supports_444)
 {
     AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data;
     const AVPixFmtDescriptor *sw_desc;
-    int cuvid_codec_type, cuvid_chroma_format;
+    int cuvid_codec_type, cuvid_chroma_format, chroma_444;
 
     sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
     if (!sw_desc)
@@ -593,11 +609,16 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,
         av_log(avctx, AV_LOG_VERBOSE, "Unsupported chroma format\n");
         return AVERROR(EINVAL);
     }
+    chroma_444 = supports_444 && cuvid_chroma_format == cudaVideoChromaFormat_444;
 
     frames_ctx->format            = AV_PIX_FMT_CUDA;
     frames_ctx->width             = (avctx->coded_width + 1) & ~1;
     frames_ctx->height            = (avctx->coded_height + 1) & ~1;
-    frames_ctx->initial_pool_size = dpb_size;
+    /*
+     * We add two extra frames to the pool to account for deinterlacing filters
+     * holding onto their frames.
+     */
+    frames_ctx->initial_pool_size = dpb_size + 2;
 
     frames_ctx->free = nvdec_free_dummy;
     frames_ctx->pool = av_buffer_pool_init(0, nvdec_alloc_dummy);
@@ -607,13 +628,13 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,
 
     switch (sw_desc->comp[0].depth) {
     case 8:
-        frames_ctx->sw_format = AV_PIX_FMT_NV12;
+        frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_NV12;
         break;
     case 10:
-        frames_ctx->sw_format = AV_PIX_FMT_P010;
+        frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P010;
         break;
     case 12:
-        frames_ctx->sw_format = AV_PIX_FMT_P016;
+        frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P016;
         break;
     default:
         return AVERROR(EINVAL);
diff --git a/libavcodec/nvdec.h b/libavcodec/nvdec.h
index 85a0fcf7259cb..09ae8c37e6bcb 100644
--- a/libavcodec/nvdec.h
+++ b/libavcodec/nvdec.h
@@ -61,6 +61,8 @@ typedef struct NVDECContext {
     unsigned     *slice_offsets;
     int           nb_slices;
     unsigned int  slice_offsets_allocated;
+
+    int           supports_444;
 } NVDECContext;
 
 int ff_nvdec_decode_init(AVCodecContext *avctx);
@@ -72,7 +74,8 @@ int ff_nvdec_simple_decode_slice(AVCodecContext *avctx, const uint8_t *buffer,
                                  uint32_t size);
 int ff_nvdec_frame_params(AVCodecContext *avctx,
                           AVBufferRef *hw_frames_ctx,
-                          int dpb_size);
+                          int dpb_size,
+                          int supports_444);
 int ff_nvdec_get_ref_idx(AVFrame *frame);
 
 #endif /* AVCODEC_NVDEC_H */
diff --git a/libavcodec/nvdec_h264.c b/libavcodec/nvdec_h264.c
index 25b30329d0eff..116bd4fb5de44 100644
--- a/libavcodec/nvdec_h264.c
+++ b/libavcodec/nvdec_h264.c
@@ -166,7 +166,7 @@ static int nvdec_h264_frame_params(AVCodecContext *avctx,
 {
     const H264Context *h = avctx->priv_data;
     const SPS       *sps = h->ps.sps;
-    return ff_nvdec_frame_params(avctx, hw_frames_ctx, sps->ref_frame_count + sps->num_reorder_frames);
+    return ff_nvdec_frame_params(avctx, hw_frames_ctx, sps->ref_frame_count + sps->num_reorder_frames, 0);
 }
 
 const AVHWAccel ff_h264_nvdec_hwaccel = {
diff --git a/libavcodec/nvdec_hevc.c b/libavcodec/nvdec_hevc.c
index e04a701f3ab5f..590278ba046ec 100644
--- a/libavcodec/nvdec_hevc.c
+++ b/libavcodec/nvdec_hevc.c
@@ -131,6 +131,17 @@ static int nvdec_hevc_start_frame(AVCodecContext *avctx,
             .IdrPicFlag                                   = IS_IDR(s),
             .bit_depth_luma_minus8                        = sps->bit_depth - 8,
             .bit_depth_chroma_minus8                      = sps->bit_depth - 8,
+#if NVDECAPI_CHECK_VERSION(9, 0)
+            .sps_range_extension_flag                     = sps->sps_range_extension_flag,
+            .transform_skip_rotation_enabled_flag         = sps->transform_skip_rotation_enabled_flag,
+            .transform_skip_context_enabled_flag          = sps->transform_skip_context_enabled_flag,
+            .implicit_rdpcm_enabled_flag                  = sps->implicit_rdpcm_enabled_flag,
+            .explicit_rdpcm_enabled_flag                  = sps->explicit_rdpcm_enabled_flag,
+            .extended_precision_processing_flag           = sps->extended_precision_processing_flag,
+            .intra_smoothing_disabled_flag                = sps->intra_smoothing_disabled_flag,
+            .persistent_rice_adaptation_enabled_flag      = sps->persistent_rice_adaptation_enabled_flag,
+            .cabac_bypass_alignment_enabled_flag          = sps->cabac_bypass_alignment_enabled_flag,
+#endif
 
             .dependent_slice_segments_enabled_flag        = pps->dependent_slice_segments_enabled_flag,
             .slice_segment_header_extension_present_flag  = pps->slice_header_extension_present_flag,
@@ -164,6 +175,13 @@ static int nvdec_hevc_start_frame(AVCodecContext *avctx,
             .uniform_spacing_flag                         = pps->uniform_spacing_flag,
             .num_tile_columns_minus1                      = pps->num_tile_columns - 1,
             .num_tile_rows_minus1                         = pps->num_tile_rows - 1,
+#if NVDECAPI_CHECK_VERSION(9, 0)
+            .pps_range_extension_flag                     = pps->pps_range_extensions_flag,
+            .cross_component_prediction_enabled_flag      = pps->cross_component_prediction_enabled_flag,
+            .chroma_qp_offset_list_enabled_flag           = pps->chroma_qp_offset_list_enabled_flag,
+            .diff_cu_chroma_qp_offset_depth               = pps->diff_cu_chroma_qp_offset_depth,
+            .chroma_qp_offset_list_len_minus1             = pps->chroma_qp_offset_list_len_minus1,
+#endif
 
             .NumBitsForShortTermRPSInSlice                = s->sh.short_term_rps ? s->sh.short_term_ref_pic_set_size : 0,
             .NumDeltaPocsOfRefRpsIdx                      = s->sh.short_term_rps ? s->sh.short_term_rps->rps_idx_num_delta_pocs : 0,
@@ -185,6 +203,18 @@ static int nvdec_hevc_start_frame(AVCodecContext *avctx,
     for (i = 0; i < pps->num_tile_rows; i++)
         ppc->row_height_minus1[i] = pps->row_height[i] - 1;
 
+#if NVDECAPI_CHECK_VERSION(9, 0)
+    if (pps->chroma_qp_offset_list_len_minus1 > FF_ARRAY_ELEMS(ppc->cb_qp_offset_list) ||
+        pps->chroma_qp_offset_list_len_minus1 > FF_ARRAY_ELEMS(ppc->cr_qp_offset_list)) {
+        av_log(avctx, AV_LOG_ERROR, "Too many chroma_qp_offsets\n");
+        return AVERROR(ENOSYS);
+    }
+    for (i = 0; i <= pps->chroma_qp_offset_list_len_minus1; i++) {
+        ppc->cb_qp_offset_list[i] = pps->cb_qp_offset_list[i];
+        ppc->cr_qp_offset_list[i] = pps->cr_qp_offset_list[i];
+    }
+#endif
+
     if (s->rps[LT_CURR].nb_refs     > FF_ARRAY_ELEMS(ppc->RefPicSetLtCurr)       ||
         s->rps[ST_CURR_BEF].nb_refs > FF_ARRAY_ELEMS(ppc->RefPicSetStCurrBefore) ||
         s->rps[ST_CURR_AFT].nb_refs > FF_ARRAY_ELEMS(ppc->RefPicSetStCurrAfter)) {
@@ -269,7 +299,13 @@ static int nvdec_hevc_frame_params(AVCodecContext *avctx,
 {
     const HEVCContext *s = avctx->priv_data;
     const HEVCSPS *sps = s->ps.sps;
-    return ff_nvdec_frame_params(avctx, hw_frames_ctx, sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering + 1);
+    return ff_nvdec_frame_params(avctx, hw_frames_ctx, sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering + 1, 1);
+}
+
+static int nvdec_hevc_decode_init(AVCodecContext *avctx) {
+    NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
+    ctx->supports_444 = 1;
+    return ff_nvdec_decode_init(avctx);
 }
 
 const AVHWAccel ff_hevc_nvdec_hwaccel = {
@@ -281,7 +317,7 @@ const AVHWAccel ff_hevc_nvdec_hwaccel = {
     .end_frame            = ff_nvdec_end_frame,
     .decode_slice         = nvdec_hevc_decode_slice,
     .frame_params         = nvdec_hevc_frame_params,
-    .init                 = ff_nvdec_decode_init,
+    .init                 = nvdec_hevc_decode_init,
     .uninit               = ff_nvdec_decode_uninit,
     .priv_data_size       = sizeof(NVDECContext),
 };
diff --git a/libavcodec/nvdec_mjpeg.c b/libavcodec/nvdec_mjpeg.c
index 7e404246cec73..be39d23bae34f 100644
--- a/libavcodec/nvdec_mjpeg.c
+++ b/libavcodec/nvdec_mjpeg.c
@@ -66,7 +66,7 @@ static int nvdec_mjpeg_frame_params(AVCodecContext *avctx,
                                   AVBufferRef *hw_frames_ctx)
 {
     // Only need storage for the current frame
-    return ff_nvdec_frame_params(avctx, hw_frames_ctx, 1);
+    return ff_nvdec_frame_params(avctx, hw_frames_ctx, 1, 0);
 }
 
 #if CONFIG_MJPEG_NVDEC_HWACCEL
diff --git a/libavcodec/nvdec_mpeg12.c b/libavcodec/nvdec_mpeg12.c
index 7293d50555480..300e1d3d88d7f 100644
--- a/libavcodec/nvdec_mpeg12.c
+++ b/libavcodec/nvdec_mpeg12.c
@@ -87,7 +87,7 @@ static int nvdec_mpeg12_frame_params(AVCodecContext *avctx,
                                   AVBufferRef *hw_frames_ctx)
 {
     // Each frame can at most have one P and one B reference
-    return ff_nvdec_frame_params(avctx, hw_frames_ctx, 2);
+    return ff_nvdec_frame_params(avctx, hw_frames_ctx, 2, 0);
 }
 
 #if CONFIG_MPEG2_NVDEC_HWACCEL
diff --git a/libavcodec/nvdec_mpeg4.c b/libavcodec/nvdec_mpeg4.c
index 907af1391a929..739b049933ef8 100644
--- a/libavcodec/nvdec_mpeg4.c
+++ b/libavcodec/nvdec_mpeg4.c
@@ -103,7 +103,7 @@ static int nvdec_mpeg4_frame_params(AVCodecContext *avctx,
                                   AVBufferRef *hw_frames_ctx)
 {
     // Each frame can at most have one P and one B reference
-    return ff_nvdec_frame_params(avctx, hw_frames_ctx, 2);
+    return ff_nvdec_frame_params(avctx, hw_frames_ctx, 2, 0);
 }
 
 const AVHWAccel ff_mpeg4_nvdec_hwaccel = {
diff --git a/libavcodec/nvdec_vc1.c b/libavcodec/nvdec_vc1.c
index 7257692d66a35..10e7b5ab0d74c 100644
--- a/libavcodec/nvdec_vc1.c
+++ b/libavcodec/nvdec_vc1.c
@@ -107,7 +107,7 @@ static int nvdec_vc1_frame_params(AVCodecContext *avctx,
                                   AVBufferRef *hw_frames_ctx)
 {
     // Each frame can at most have one P and one B reference
-    return ff_nvdec_frame_params(avctx, hw_frames_ctx, 2);
+    return ff_nvdec_frame_params(avctx, hw_frames_ctx, 2, 0);
 }
 
 const AVHWAccel ff_vc1_nvdec_hwaccel = {
diff --git a/libavcodec/nvdec_vp8.c b/libavcodec/nvdec_vp8.c
index 7b37445613b13..9c4608d8cfa50 100644
--- a/libavcodec/nvdec_vp8.c
+++ b/libavcodec/nvdec_vp8.c
@@ -87,7 +87,7 @@ static int nvdec_vp8_frame_params(AVCodecContext *avctx,
                                   AVBufferRef *hw_frames_ctx)
 {
     // VP8 uses a fixed size pool of 3 possible reference frames
-    return ff_nvdec_frame_params(avctx, hw_frames_ctx, 3);
+    return ff_nvdec_frame_params(avctx, hw_frames_ctx, 3, 0);
 }
 
 AVHWAccel ff_vp8_nvdec_hwaccel = {
diff --git a/libavcodec/nvdec_vp9.c b/libavcodec/nvdec_vp9.c
index 3b665a9bc701c..a76bcf9943318 100644
--- a/libavcodec/nvdec_vp9.c
+++ b/libavcodec/nvdec_vp9.c
@@ -166,7 +166,7 @@ static int nvdec_vp9_frame_params(AVCodecContext *avctx,
                                   AVBufferRef *hw_frames_ctx)
 {
     // VP9 uses a fixed size pool of 8 possible reference frames
-    return ff_nvdec_frame_params(avctx, hw_frames_ctx, 8);
+    return ff_nvdec_frame_params(avctx, hw_frames_ctx, 8, 0);
 }
 
 const AVHWAccel ff_vp9_nvdec_hwaccel = {
diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c
index e180d7b99380f..d3413b3fd7258 100644
--- a/libavcodec/nvenc.c
+++ b/libavcodec/nvenc.c
@@ -25,12 +25,15 @@
 
 #include "libavutil/hwcontext_cuda.h"
 #include "libavutil/hwcontext.h"
+#include "libavutil/cuda_check.h"
 #include "libavutil/imgutils.h"
 #include "libavutil/avassert.h"
 #include "libavutil/mem.h"
 #include "libavutil/pixdesc.h"
 #include "internal.h"
 
+#define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, dl_fn->cuda_dl, x)
+
 #define NVENC_CAP 0x30
 #define IS_CBR(rc) (rc == NV_ENC_PARAMS_RC_CBR ||             \
                     rc == NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ || \
@@ -119,7 +122,19 @@ static int nvenc_print_error(void *log_ctx, NVENCSTATUS err,
 
 static void nvenc_print_driver_requirement(AVCodecContext *avctx, int level)
 {
-#if NVENCAPI_CHECK_VERSION(8, 1)
+#if NVENCAPI_CHECK_VERSION(9, 0)
+# if defined(_WIN32) || defined(__CYGWIN__)
+    const char *minver = "418.81";
+# else
+    const char *minver = "418.30";
+# endif
+#elif NVENCAPI_CHECK_VERSION(8, 2)
+# if defined(_WIN32) || defined(__CYGWIN__)
+    const char *minver = "397.93";
+# else
+    const char *minver = "396.24";
+#endif
+#elif NVENCAPI_CHECK_VERSION(8, 1)
 # if defined(_WIN32) || defined(__CYGWIN__)
     const char *minver = "390.77";
 # else
@@ -183,37 +198,23 @@ static int nvenc_push_context(AVCodecContext *avctx)
 {
     NvencContext *ctx            = avctx->priv_data;
     NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
-    CUresult cu_res;
 
     if (ctx->d3d11_device)
         return 0;
 
-    cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
-    if (cu_res != CUDA_SUCCESS) {
-        av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
-        return AVERROR_EXTERNAL;
-    }
-
-    return 0;
+    return CHECK_CU(dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context));
 }
 
 static int nvenc_pop_context(AVCodecContext *avctx)
 {
     NvencContext *ctx            = avctx->priv_data;
     NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
-    CUresult cu_res;
     CUcontext dummy;
 
     if (ctx->d3d11_device)
         return 0;
 
-    cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
-    if (cu_res != CUDA_SUCCESS) {
-        av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
-        return AVERROR_EXTERNAL;
-    }
-
-    return 0;
+    return CHECK_CU(dl_fn->cuda_dl->cuCtxPopCurrent(&dummy));
 }
 
 static av_cold int nvenc_open_session(AVCodecContext *avctx)
@@ -406,32 +407,23 @@ static av_cold int nvenc_check_device(AVCodecContext *avctx, int idx)
     NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
     char name[128] = { 0};
     int major, minor, ret;
-    CUresult cu_res;
     CUdevice cu_device;
     int loglevel = AV_LOG_VERBOSE;
 
     if (ctx->device == LIST_DEVICES)
         loglevel = AV_LOG_INFO;
 
-    cu_res = dl_fn->cuda_dl->cuDeviceGet(&cu_device, idx);
-    if (cu_res != CUDA_SUCCESS) {
-        av_log(avctx, AV_LOG_ERROR,
-               "Cannot access the CUDA device %d\n",
-               idx);
-        return -1;
-    }
+    ret = CHECK_CU(dl_fn->cuda_dl->cuDeviceGet(&cu_device, idx));
+    if (ret < 0)
+        return ret;
 
-    cu_res = dl_fn->cuda_dl->cuDeviceGetName(name, sizeof(name), cu_device);
-    if (cu_res != CUDA_SUCCESS) {
-        av_log(avctx, AV_LOG_ERROR, "cuDeviceGetName failed on device %d\n", idx);
-        return -1;
-    }
+    ret = CHECK_CU(dl_fn->cuda_dl->cuDeviceGetName(name, sizeof(name), cu_device));
+    if (ret < 0)
+        return ret;
 
-    cu_res = dl_fn->cuda_dl->cuDeviceComputeCapability(&major, &minor, cu_device);
-    if (cu_res != CUDA_SUCCESS) {
-        av_log(avctx, AV_LOG_ERROR, "cuDeviceComputeCapability failed on device %d\n", idx);
-        return -1;
-    }
+    ret = CHECK_CU(dl_fn->cuda_dl->cuDeviceComputeCapability(&major, &minor, cu_device));
+    if (ret < 0)
+        return ret;
 
     av_log(avctx, loglevel, "[ GPU #%d - < %s > has Compute SM %d.%d ]\n", idx, name, major, minor);
     if (((major << 4) | minor) < NVENC_CAP) {
@@ -442,11 +434,9 @@ static av_cold int nvenc_check_device(AVCodecContext *avctx, int idx)
     if (ctx->device != idx && ctx->device != ANY_DEVICE)
         return -1;
 
-    cu_res = dl_fn->cuda_dl->cuCtxCreate(&ctx->cu_context_internal, 0, cu_device);
-    if (cu_res != CUDA_SUCCESS) {
-        av_log(avctx, AV_LOG_FATAL, "Failed creating CUDA context for NVENC: 0x%x\n", (int)cu_res);
+    ret = CHECK_CU(dl_fn->cuda_dl->cuCtxCreate(&ctx->cu_context_internal, 0, cu_device));
+    if (ret < 0)
         goto fail;
-    }
 
     ctx->cu_context = ctx->cu_context_internal;
 
@@ -477,7 +467,7 @@ static av_cold int nvenc_check_device(AVCodecContext *avctx, int idx)
         return ret;
 
 fail2:
-    dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal);
+    CHECK_CU(dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal));
     ctx->cu_context_internal = NULL;
 
 fail:
@@ -555,17 +545,11 @@ static av_cold int nvenc_setup_device(AVCodecContext *avctx)
     } else {
         int i, nb_devices = 0;
 
-        if ((dl_fn->cuda_dl->cuInit(0)) != CUDA_SUCCESS) {
-            av_log(avctx, AV_LOG_ERROR,
-                   "Cannot init CUDA\n");
+        if (CHECK_CU(dl_fn->cuda_dl->cuInit(0)) < 0)
             return AVERROR_UNKNOWN;
-        }
 
-        if ((dl_fn->cuda_dl->cuDeviceGetCount(&nb_devices)) != CUDA_SUCCESS) {
-            av_log(avctx, AV_LOG_ERROR,
-                   "Cannot enumerate the CUDA devices\n");
+        if (CHECK_CU(dl_fn->cuda_dl->cuDeviceGetCount(&nb_devices)) < 0)
             return AVERROR_UNKNOWN;
-        }
 
         if (!nb_devices) {
             av_log(avctx, AV_LOG_FATAL, "No CUDA capable devices found\n");
@@ -1098,6 +1082,10 @@ static av_cold int nvenc_setup_hevc_config(AVCodecContext *avctx)
 
     hevc->tier = ctx->tier;
 
+#ifdef NVENC_HAVE_HEVC_BFRAME_REF_MODE
+    hevc->useBFramesAsRef = ctx->b_ref_mode;
+#endif
+
     return 0;
 }
 
@@ -1460,7 +1448,7 @@ av_cold int ff_nvenc_encode_close(AVCodecContext *avctx)
     ctx->nvencoder = NULL;
 
     if (ctx->cu_context_internal)
-        dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal);
+        CHECK_CU(dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal));
     ctx->cu_context = ctx->cu_context_internal = NULL;
 
 #if CONFIG_D3D11VA
diff --git a/libavcodec/nvenc.h b/libavcodec/nvenc.h
index ebb7a80fc327a..ddd6168409093 100644
--- a/libavcodec/nvenc.h
+++ b/libavcodec/nvenc.h
@@ -49,6 +49,11 @@ typedef void ID3D11Device;
 #define NVENC_HAVE_QP_MAP_MODE
 #endif
 
+// SDK 9.0 compile time feature checks
+#if NVENCAPI_CHECK_VERSION(9, 0)
+#define NVENC_HAVE_HEVC_BFRAME_REF_MODE
+#endif
+
 typedef struct NvencSurface
 {
     NV_ENC_INPUT_PTR input_surface;
diff --git a/libavcodec/nvenc_hevc.c b/libavcodec/nvenc_hevc.c
index 0df7eab8cd2b0..d567d960baac4 100644
--- a/libavcodec/nvenc_hevc.c
+++ b/libavcodec/nvenc_hevc.c
@@ -116,6 +116,17 @@ static const AVOption options[] = {
                                                             OFFSET(cqp),          AV_OPT_TYPE_INT,   { .i64 = -1 }, -1, 51, VE },
     { "weighted_pred","Set 1 to enable weighted prediction",
                                                             OFFSET(weighted_pred),AV_OPT_TYPE_INT,   { .i64 = 0 }, 0, 1, VE },
+#ifdef NVENC_HAVE_HEVC_BFRAME_REF_MODE
+    { "b_ref_mode",   "Use B frames as references",         OFFSET(b_ref_mode),   AV_OPT_TYPE_INT,   { .i64 = NV_ENC_BFRAME_REF_MODE_DISABLED }, NV_ENC_BFRAME_REF_MODE_DISABLED, NV_ENC_BFRAME_REF_MODE_MIDDLE, VE, "b_ref_mode" },
+    { "disabled",     "B frames will not be used for reference", 0,               AV_OPT_TYPE_CONST, { .i64 = NV_ENC_BFRAME_REF_MODE_DISABLED }, 0, 0, VE, "b_ref_mode" },
+    { "each",         "Each B frame will be used for reference", 0,               AV_OPT_TYPE_CONST, { .i64 = NV_ENC_BFRAME_REF_MODE_EACH }, 0, 0, VE, "b_ref_mode" },
+    { "middle",       "Only (number of B frames)/2 will be used for reference", 0,AV_OPT_TYPE_CONST, { .i64 = NV_ENC_BFRAME_REF_MODE_MIDDLE }, 0, 0, VE, "b_ref_mode" },
+#else
+    { "b_ref_mode",   "(not supported)",                    OFFSET(b_ref_mode),   AV_OPT_TYPE_INT,   { .i64 = 0 }, 0, INT_MAX, VE, "b_ref_mode" },
+    { "disabled",     "",                                   0,                    AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0,       VE, "b_ref_mode" },
+    { "each",         "",                                   0,                    AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0,       VE, "b_ref_mode" },
+    { "middle",       "",                                   0,                    AV_OPT_TYPE_CONST, { .i64 = 2 }, 0, 0,       VE, "b_ref_mode" },
+#endif
     { NULL }
 };
 
diff --git a/libavcodec/options_table.h b/libavcodec/options_table.h
index 099261e1684aa..a3235bcd57b99 100644
--- a/libavcodec/options_table.h
+++ b/libavcodec/options_table.h
@@ -67,6 +67,7 @@ static const AVOption avcodec_options[] = {
 {"ilme", "interlaced motion estimation", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_INTERLACED_ME }, INT_MIN, INT_MAX, V|E, "flags"},
 {"cgop", "closed GOP", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_CLOSED_GOP }, INT_MIN, INT_MAX, V|E, "flags"},
 {"output_corrupt", "Output even potentially corrupted frames", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_OUTPUT_CORRUPT }, INT_MIN, INT_MAX, V|D, "flags"},
+{"flags2", NULL, OFFSET(flags2), AV_OPT_TYPE_FLAGS, {.i64 = DEFAULT}, 0, UINT_MAX, V|A|E|D, "flags2"},
 {"fast", "allow non-spec-compliant speedup tricks", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG2_FAST }, INT_MIN, INT_MAX, V|E, "flags2"},
 {"noout", "skip bitstream encoding", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG2_NO_OUTPUT }, INT_MIN, INT_MAX, V|E, "flags2"},
 {"ignorecrop", "ignore cropping information from sps", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG2_IGNORE_CROP }, INT_MIN, INT_MAX, V|D, "flags2"},
@@ -217,34 +218,11 @@ static const AVOption avcodec_options[] = {
 {"buffers", "picture buffer allocations", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_BUFFERS }, INT_MIN, INT_MAX, V|D, "debug"},
 {"thread_ops", "threading operations", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_THREADS }, INT_MIN, INT_MAX, V|A|D, "debug"},
 {"nomc", "skip motion compensation", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_NOMC }, INT_MIN, INT_MAX, V|A|D, "debug"},
-{"cmp", "full-pel ME compare function", OFFSET(me_cmp), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E, "cmp_func"},
-{"subcmp", "sub-pel ME compare function", OFFSET(me_sub_cmp), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E, "cmp_func"},
-{"mbcmp", "macroblock compare function", OFFSET(mb_cmp), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E, "cmp_func"},
-{"ildctcmp", "interlaced DCT compare function", OFFSET(ildct_cmp), AV_OPT_TYPE_INT, {.i64 = FF_CMP_VSAD }, INT_MIN, INT_MAX, V|E, "cmp_func"},
 {"dia_size", "diamond type & size for motion estimation", OFFSET(dia_size), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
 {"last_pred", "amount of motion predictors from the previous frame", OFFSET(last_predictor_count), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
 #if FF_API_PRIVATE_OPT
 {"preme", "pre motion estimation", OFFSET(pre_me), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
 #endif
-{"precmp", "pre motion estimation compare function", OFFSET(me_pre_cmp), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E, "cmp_func"},
-{"sad", "sum of absolute differences, fast", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_SAD }, INT_MIN, INT_MAX, V|E, "cmp_func"},
-{"sse", "sum of squared errors", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_SSE }, INT_MIN, INT_MAX, V|E, "cmp_func"},
-{"satd", "sum of absolute Hadamard transformed differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_SATD }, INT_MIN, INT_MAX, V|E, "cmp_func"},
-{"dct", "sum of absolute DCT transformed differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_DCT }, INT_MIN, INT_MAX, V|E, "cmp_func"},
-{"psnr", "sum of squared quantization errors (avoid, low quality)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_PSNR }, INT_MIN, INT_MAX, V|E, "cmp_func"},
-{"bit", "number of bits needed for the block", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_BIT }, INT_MIN, INT_MAX, V|E, "cmp_func"},
-{"rd", "rate distortion optimal, slow", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_RD }, INT_MIN, INT_MAX, V|E, "cmp_func"},
-{"zero", "0", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_ZERO }, INT_MIN, INT_MAX, V|E, "cmp_func"},
-{"vsad", "sum of absolute vertical differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_VSAD }, INT_MIN, INT_MAX, V|E, "cmp_func"},
-{"vsse", "sum of squared vertical differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_VSSE }, INT_MIN, INT_MAX, V|E, "cmp_func"},
-{"nsse", "noise preserving sum of squared differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_NSSE }, INT_MIN, INT_MAX, V|E, "cmp_func"},
-#if CONFIG_SNOW_ENCODER
-{"w53", "5/3 wavelet, only used in snow", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_W53 }, INT_MIN, INT_MAX, V|E, "cmp_func"},
-{"w97", "9/7 wavelet, only used in snow", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_W97 }, INT_MIN, INT_MAX, V|E, "cmp_func"},
-#endif
-{"dctmax", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_DCTMAX }, INT_MIN, INT_MAX, V|E, "cmp_func"},
-{"chroma", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_CHROMA }, INT_MIN, INT_MAX, V|E, "cmp_func"},
-{"msad", "sum of absolute differences, median predicted", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_MEDIAN_SAD }, INT_MIN, INT_MAX, V|E, "cmp_func"},
 {"pre_dia_size", "diamond type & size for motion estimation pre-pass", OFFSET(pre_dia_size), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
 {"subq", "sub-pel motion estimation quality", OFFSET(me_subpel_quality), AV_OPT_TYPE_INT, {.i64 = 8 }, INT_MIN, INT_MAX, V|E},
 {"me_range", "limit motion vectors range (1023 for DivX player)", OFFSET(me_range), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
@@ -271,7 +249,6 @@ static const AVOption avcodec_options[] = {
 {"nr", "noise reduction", OFFSET(noise_reduction), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
 #endif
 {"rc_init_occupancy", "number of bits which should be loaded into the rc buffer before decoding starts", OFFSET(rc_initial_buffer_occupancy), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
-{"flags2", NULL, OFFSET(flags2), AV_OPT_TYPE_FLAGS, {.i64 = DEFAULT}, 0, UINT_MAX, V|A|E|D, "flags2"},
 {"threads", "set the number of threads", OFFSET(thread_count), AV_OPT_TYPE_INT, {.i64 = 1 }, 0, INT_MAX, V|A|E|D, "threads"},
 {"auto", "autodetect a suitable number of threads to use", 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, INT_MIN, INT_MAX, V|E|D, "threads"},
 {"dc", "intra_dc_precision", OFFSET(intra_dc_precision), AV_OPT_TYPE_INT, {.i64 = 0 }, -8, 16, V|E},
@@ -310,6 +287,29 @@ static const AVOption avcodec_options[] = {
 {"skip_exp", "frame skip exponent", OFFSET(frame_skip_exp), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
 {"skipcmp", "frame skip compare function", OFFSET(frame_skip_cmp), AV_OPT_TYPE_INT, {.i64 = FF_CMP_DCTMAX }, INT_MIN, INT_MAX, V|E, "cmp_func"},
 #endif
+{"cmp", "full-pel ME compare function", OFFSET(me_cmp), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"subcmp", "sub-pel ME compare function", OFFSET(me_sub_cmp), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"mbcmp", "macroblock compare function", OFFSET(mb_cmp), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"ildctcmp", "interlaced DCT compare function", OFFSET(ildct_cmp), AV_OPT_TYPE_INT, {.i64 = FF_CMP_VSAD }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"precmp", "pre motion estimation compare function", OFFSET(me_pre_cmp), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"sad", "sum of absolute differences, fast", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_SAD }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"sse", "sum of squared errors", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_SSE }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"satd", "sum of absolute Hadamard transformed differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_SATD }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"dct", "sum of absolute DCT transformed differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_DCT }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"psnr", "sum of squared quantization errors (avoid, low quality)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_PSNR }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"bit", "number of bits needed for the block", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_BIT }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"rd", "rate distortion optimal, slow", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_RD }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"zero", "0", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_ZERO }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"vsad", "sum of absolute vertical differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_VSAD }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"vsse", "sum of squared vertical differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_VSSE }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"nsse", "noise preserving sum of squared differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_NSSE }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+#if CONFIG_SNOW_ENCODER
+{"w53", "5/3 wavelet, only used in snow", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_W53 }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"w97", "9/7 wavelet, only used in snow", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_W97 }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+#endif
+{"dctmax", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_DCTMAX }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"chroma", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_CHROMA }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"msad", "sum of absolute differences, median predicted", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_MEDIAN_SAD }, INT_MIN, INT_MAX, V|E, "cmp_func"},
 {"mblmin", "minimum macroblock Lagrange factor (VBR)", OFFSET(mb_lmin), AV_OPT_TYPE_INT, {.i64 = FF_QP2LAMBDA * 2 }, 1, FF_LAMBDA_MAX, V|E},
 {"mblmax", "maximum macroblock Lagrange factor (VBR)", OFFSET(mb_lmax), AV_OPT_TYPE_INT, {.i64 = FF_QP2LAMBDA * 31 }, 1, FF_LAMBDA_MAX, V|E},
 #if FF_API_PRIVATE_OPT
@@ -479,6 +479,7 @@ static const AVOption avcodec_options[] = {
 {"allow_high_depth", "allow to output YUV pixel formats with a different chroma sampling than 4:2:0 and/or other than 8 bits per component", 0, AV_OPT_TYPE_CONST, {.i64 = AV_HWACCEL_FLAG_ALLOW_HIGH_DEPTH }, INT_MIN, INT_MAX, V | D, "hwaccel_flags"},
 {"allow_profile_mismatch", "attempt to decode anyway if HW accelerated decoder's supported profiles do not exactly match the stream", 0, AV_OPT_TYPE_CONST, {.i64 = AV_HWACCEL_FLAG_ALLOW_PROFILE_MISMATCH }, INT_MIN, INT_MAX, V | D, "hwaccel_flags"},
 {"extra_hw_frames", "Number of extra hardware frames to allocate for the user", OFFSET(extra_hw_frames), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, V|D },
+{"discard_damaged_percentage", "Percentage of damaged samples to discard a frame", OFFSET(discard_damaged_percentage), AV_OPT_TYPE_INT, {.i64 = 95 }, 0, 100, V|D },
 {NULL},
 };
 
diff --git a/libavcodec/opus.c b/libavcodec/opus.c
index aa827b604c533..f74278a7e3ad8 100644
--- a/libavcodec/opus.c
+++ b/libavcodec/opus.c
@@ -31,6 +31,7 @@
 
 #include "opus_celt.h"
 #include "opustab.h"
+#include "internal.h"
 #include "vorbis.h"
 
 static const uint16_t opus_frame_duration[32] = {
@@ -326,6 +327,8 @@ av_cold int ff_opus_parse_extradata(AVCodecContext *avctx,
     }
 
     avctx->delay = AV_RL16(extradata + 10);
+    if (avctx->internal)
+        avctx->internal->skip_samples = avctx->delay;
 
     channels = avctx->extradata ? extradata[9] : (avctx->channels == 1) ? 1 : 2;
     if (!channels) {
diff --git a/libavcodec/opus_rc.c b/libavcodec/opus_rc.c
index 3972bb0b02830..c432eb90c9116 100644
--- a/libavcodec/opus_rc.c
+++ b/libavcodec/opus_rc.c
@@ -167,7 +167,7 @@ void ff_opus_rc_put_raw(OpusRangeCoder *rc, uint32_t val, uint32_t count)
     rc->rb.cachelen = (rc->rb.cachelen + to_write) % 32;
 
     if (!rc->rb.cachelen && count) {
-        AV_WB32(rc->rb.position, rc->rb.cacheval);
+        AV_WB32((uint8_t *)rc->rb.position, rc->rb.cacheval);
         rc->rb.bytes    += 4;
         rc->rb.position -= 4;
         rc->rb.cachelen = count - to_write;
diff --git a/libavcodec/opusenc.c b/libavcodec/opusenc.c
index 578785f4b417b..3c08ebcf69584 100644
--- a/libavcodec/opusenc.c
+++ b/libavcodec/opusenc.c
@@ -543,7 +543,7 @@ static int opus_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
         ff_bufqueue_add(avctx, &s->bufqueue, av_frame_clone(frame));
     } else {
         ff_opus_psy_signal_eof(&s->psyctx);
-        if (!s->afq.remaining_samples)
+        if (!s->afq.remaining_samples || !avctx->frame_number)
             return 0; /* We've been flushed and there's nothing left to encode */
     }
 
diff --git a/libavcodec/parsers.c b/libavcodec/parsers.c
index f01cad4c84e8a..33a71de8a0fab 100644
--- a/libavcodec/parsers.c
+++ b/libavcodec/parsers.c
@@ -40,7 +40,9 @@ extern AVCodecParser ff_dvbsub_parser;
 extern AVCodecParser ff_dvdsub_parser;
 extern AVCodecParser ff_dvd_nav_parser;
 extern AVCodecParser ff_flac_parser;
+extern AVCodecParser ff_g723_1_parser;
 extern AVCodecParser ff_g729_parser;
+extern AVCodecParser ff_gif_parser;
 extern AVCodecParser ff_gsm_parser;
 extern AVCodecParser ff_h261_parser;
 extern AVCodecParser ff_h263_parser;
diff --git a/libavcodec/pcm-dvdenc.c b/libavcodec/pcm-dvdenc.c
new file mode 100644
index 0000000000000..d26eaf071caeb
--- /dev/null
+++ b/libavcodec/pcm-dvdenc.c
@@ -0,0 +1,197 @@
+/*
+ * LPCM codecs for PCM formats found in Video DVD streams
+ * Copyright (c) 2018 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "bytestream.h"
+#include "internal.h"
+
+typedef struct PCMDVDContext {
+    uint8_t header[3];       // Header added to every frame
+    int block_size;          // Size of a block of samples in bytes
+    int samples_per_block;   // Number of samples per channel per block
+    int groups_per_block;    // Number of 20/24-bit sample groups per block
+    uint8_t *extra_samples;  // Pointer to leftover samples from a frame
+    int extra_sample_count;  // Number of leftover samples in the buffer
+} PCMDVDContext;
+
+static av_cold int pcm_dvd_encode_init(AVCodecContext *avctx)
+{
+    PCMDVDContext *s = avctx->priv_data;
+    int quant, freq, frame_size;
+
+    switch (avctx->sample_rate) {
+    case 48000:
+        freq = 0;
+        break;
+    case 96000:
+        freq = 1;
+        break;
+    }
+
+    switch (avctx->sample_fmt) {
+    case AV_SAMPLE_FMT_S16:
+        avctx->bits_per_coded_sample = 16;
+        quant = 0;
+        break;
+    case AV_SAMPLE_FMT_S32:
+        avctx->bits_per_coded_sample = 24;
+        quant = 2;
+        break;
+    }
+
+    avctx->bits_per_coded_sample = 16 + quant * 4;
+    avctx->block_align           = avctx->channels * avctx->bits_per_coded_sample / 8;
+    avctx->bit_rate              = avctx->block_align * 8LL * avctx->sample_rate;
+    if (avctx->bit_rate > 9800000) {
+        av_log(avctx, AV_LOG_ERROR, "Too big bitrate: reduce sample rate, bitdepth or channels.\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (avctx->sample_fmt == AV_SAMPLE_FMT_S16) {
+        s->samples_per_block = 1;
+        s->block_size        = avctx->channels * 2;
+        frame_size           = 2008 / s->block_size;
+    } else {
+        switch (avctx->channels) {
+        case 1:
+        case 2:
+        case 4:
+            /* one group has all the samples needed */
+            s->block_size        = 4 * avctx->bits_per_coded_sample / 8;
+            s->samples_per_block = 4 / avctx->channels;
+            s->groups_per_block  = 1;
+            break;
+        case 8:
+            /* two groups have all the samples needed */
+            s->block_size        = 8 * avctx->bits_per_coded_sample / 8;
+            s->samples_per_block = 1;
+            s->groups_per_block  = 2;
+            break;
+        default:
+            /* need avctx->channels groups */
+            s->block_size        = 4 * avctx->channels *
+                                   avctx->bits_per_coded_sample / 8;
+            s->samples_per_block = 4;
+            s->groups_per_block  = avctx->channels;
+            break;
+        }
+
+        frame_size = FFALIGN(2008 / s->block_size, s->samples_per_block);
+    }
+
+    s->header[0] = 0x0c;
+    s->header[1] = (quant << 6) | (freq << 4) | (avctx->channels - 1);
+    s->header[2] = 0x80;
+
+    if (!avctx->frame_size)
+        avctx->frame_size = frame_size;
+
+    return 0;
+}
+
+static int pcm_dvd_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
+                                const AVFrame *frame, int *got_packet_ptr)
+{
+    PCMDVDContext *s = avctx->priv_data;
+    int samples = frame->nb_samples * avctx->channels;
+    int64_t pkt_size = (frame->nb_samples / s->samples_per_block) * s->block_size + 3;
+    int blocks = (pkt_size - 3) / s->block_size;
+    const int16_t *src16;
+    const int32_t *src32;
+    PutByteContext pb;
+    int ret;
+
+    if ((ret = ff_alloc_packet2(avctx, avpkt, pkt_size, 0)) < 0)
+        return ret;
+
+    memcpy(avpkt->data, s->header, 3);
+
+    src16 = (const int16_t *)frame->data[0];
+    src32 = (const int32_t *)frame->data[0];
+
+    bytestream2_init_writer(&pb, avpkt->data + 3, avpkt->size - 3);
+
+    switch (avctx->sample_fmt) {
+    case AV_SAMPLE_FMT_S16:
+        do {
+            bytestream2_put_be16(&pb, *src16++);
+        } while (--samples);
+        break;
+    case AV_SAMPLE_FMT_S32:
+        if (avctx->channels == 1) {
+            do {
+                for (int i = 2; i; i--) {
+                    bytestream2_put_be16(&pb, src32[0] >> 16);
+                    bytestream2_put_be16(&pb, src32[1] >> 16);
+                    bytestream2_put_byte(&pb, (*src32++) >> 24);
+                    bytestream2_put_byte(&pb, (*src32++) >> 24);
+                }
+            } while (--blocks);
+        } else {
+            do {
+                for (int i = s->groups_per_block; i; i--) {
+                    bytestream2_put_be16(&pb, src32[0] >> 16);
+                    bytestream2_put_be16(&pb, src32[1] >> 16);
+                    bytestream2_put_be16(&pb, src32[2] >> 16);
+                    bytestream2_put_be16(&pb, src32[3] >> 16);
+                    bytestream2_put_byte(&pb, (*src32++) >> 24);
+                    bytestream2_put_byte(&pb, (*src32++) >> 24);
+                    bytestream2_put_byte(&pb, (*src32++) >> 24);
+                    bytestream2_put_byte(&pb, (*src32++) >> 24);
+                }
+            } while (--blocks);
+        }
+        break;
+    }
+
+    avpkt->pts      = frame->pts;
+    avpkt->size     = pkt_size;
+    avpkt->duration = ff_samples_to_time_base(avctx, frame->nb_samples);
+    *got_packet_ptr = 1;
+
+    return 0;
+}
+
+static av_cold int pcm_dvd_encode_close(AVCodecContext *avctx)
+{
+    return 0;
+}
+
+AVCodec ff_pcm_dvd_encoder = {
+    .name           = "pcm_dvd",
+    .long_name      = NULL_IF_CONFIG_SMALL("PCM signed 16|20|24-bit big-endian for DVD media"),
+    .type           = AVMEDIA_TYPE_AUDIO,
+    .id             = AV_CODEC_ID_PCM_DVD,
+    .priv_data_size = sizeof(PCMDVDContext),
+    .init           = pcm_dvd_encode_init,
+    .close          = pcm_dvd_encode_close,
+    .encode2        = pcm_dvd_encode_frame,
+    .capabilities   = AV_CODEC_CAP_SMALL_LAST_FRAME,
+    .supported_samplerates = (const int[]) { 48000, 96000, 0},
+    .channel_layouts = (const uint64_t[]) { AV_CH_LAYOUT_MONO,
+                                            AV_CH_LAYOUT_STEREO,
+                                            AV_CH_LAYOUT_5POINT1,
+                                            AV_CH_LAYOUT_7POINT1,
+                                            0 },
+    .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
+                                                     AV_SAMPLE_FMT_S32,
+                                                     AV_SAMPLE_FMT_NONE },
+};
diff --git a/libavcodec/pcm.c b/libavcodec/pcm.c
index 8c326c68294c7..ffcbccc77db58 100644
--- a/libavcodec/pcm.c
+++ b/libavcodec/pcm.c
@@ -42,6 +42,9 @@ static av_cold int pcm_encode_init(AVCodecContext *avctx)
     case AV_CODEC_ID_PCM_MULAW:
         pcm_ulaw_tableinit();
         break;
+    case AV_CODEC_ID_PCM_VIDC:
+        pcm_vidc_tableinit();
+        break;
     default:
         break;
     }
@@ -216,6 +219,12 @@ static int pcm_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
             *dst++ = linear_to_ulaw[(v + 32768) >> 2];
         }
         break;
+    case AV_CODEC_ID_PCM_VIDC:
+        for (; n > 0; n--) {
+            v      = *samples++;
+            *dst++ = linear_to_vidc[(v + 32768) >> 2];
+        }
+        break;
     default:
         return -1;
     }
@@ -249,6 +258,10 @@ static av_cold int pcm_decode_init(AVCodecContext *avctx)
         for (i = 0; i < 256; i++)
             s->table[i] = ulaw2linear(i);
         break;
+    case AV_CODEC_ID_PCM_VIDC:
+        for (i = 0; i < 256; i++)
+            s->table[i] = vidc2linear(i);
+        break;
     case AV_CODEC_ID_PCM_F16LE:
     case AV_CODEC_ID_PCM_F24LE:
         s->scale = 1. / (1 << (avctx->bits_per_coded_sample - 1));
@@ -485,6 +498,7 @@ static int pcm_decode_frame(AVCodecContext *avctx, void *data,
         break;
     case AV_CODEC_ID_PCM_ALAW:
     case AV_CODEC_ID_PCM_MULAW:
+    case AV_CODEC_ID_PCM_VIDC:
         for (; n > 0; n--) {
             AV_WN16A(samples, s->table[*src++]);
             samples += 2;
@@ -612,3 +626,4 @@ PCM_CODEC  (PCM_U32LE,        AV_SAMPLE_FMT_S32, pcm_u32le,        "PCM unsigned
 PCM_DECODER(PCM_ZORK,         AV_SAMPLE_FMT_U8,  pcm_zork,         "PCM Zork");
 PCM_CODEC  (PCM_S64BE,        AV_SAMPLE_FMT_S64, pcm_s64be,        "PCM signed 64-bit big-endian");
 PCM_CODEC  (PCM_S64LE,        AV_SAMPLE_FMT_S64, pcm_s64le,        "PCM signed 64-bit little-endian");
+PCM_CODEC  (PCM_VIDC,         AV_SAMPLE_FMT_S16, pcm_vidc,         "PCM Archimedes VIDC");
diff --git a/libavcodec/pcm_tablegen.c b/libavcodec/pcm_tablegen.c
index bf8e7fb707547..473a47f6d96f8 100644
--- a/libavcodec/pcm_tablegen.c
+++ b/libavcodec/pcm_tablegen.c
@@ -29,11 +29,13 @@ int main(void)
 {
     pcm_alaw_tableinit();
     pcm_ulaw_tableinit();
+    pcm_vidc_tableinit();
 
     write_fileheader();
 
     WRITE_ARRAY("static const", uint8_t, linear_to_alaw);
     WRITE_ARRAY("static const", uint8_t, linear_to_ulaw);
+    WRITE_ARRAY("static const", uint8_t, linear_to_vidc);
 
     return 0;
 }
diff --git a/libavcodec/pcm_tablegen.h b/libavcodec/pcm_tablegen.h
index 7ce147f768b59..d8763abc40d1c 100644
--- a/libavcodec/pcm_tablegen.h
+++ b/libavcodec/pcm_tablegen.h
@@ -36,6 +36,12 @@
 
 #define         BIAS            (0x84)      /* Bias for linear code. */
 
+#define         VIDC_SIGN_BIT    (1)
+#define         VIDC_QUANT_MASK  (0x1E)
+#define         VIDC_QUANT_SHIFT (1)
+#define         VIDC_SEG_SHIFT   (5)
+#define         VIDC_SEG_MASK    (0xE0)
+
 /* alaw2linear() - Convert an A-law value to 16-bit linear PCM */
 static av_cold int alaw2linear(unsigned char a_val)
 {
@@ -69,14 +75,30 @@ static av_cold int ulaw2linear(unsigned char u_val)
         return (u_val & SIGN_BIT) ? (BIAS - t) : (t - BIAS);
 }
 
+static av_cold int vidc2linear(unsigned char u_val)
+{
+        int t;
+
+        /*
+         * Extract and bias the quantization bits. Then
+         * shift up by the segment number and subtract out the bias.
+         */
+        t = (((u_val & VIDC_QUANT_MASK) >> VIDC_QUANT_SHIFT) << 3) + BIAS;
+        t <<= ((unsigned)u_val & VIDC_SEG_MASK) >> VIDC_SEG_SHIFT;
+
+        return (u_val & VIDC_SIGN_BIT) ? (BIAS - t) : (t - BIAS);
+}
+
 #if CONFIG_HARDCODED_TABLES
 #define pcm_alaw_tableinit()
 #define pcm_ulaw_tableinit()
+#define pcm_vidc_tableinit()
 #include "libavcodec/pcm_tables.h"
 #else
 /* 16384 entries per table */
 static uint8_t linear_to_alaw[16384];
 static uint8_t linear_to_ulaw[16384];
+static uint8_t linear_to_vidc[16384];
 
 static av_cold void build_xlaw_table(uint8_t *linear_to_xlaw,
                              int (*xlaw2linear)(unsigned char),
@@ -111,6 +133,11 @@ static void pcm_ulaw_tableinit(void)
 {
     build_xlaw_table(linear_to_ulaw, ulaw2linear, 0xff);
 }
+
+static void pcm_vidc_tableinit(void)
+{
+    build_xlaw_table(linear_to_vidc, vidc2linear, 0xff);
+}
 #endif /* CONFIG_HARDCODED_TABLES */
 
 #endif /* AVCODEC_PCM_TABLEGEN_H */
diff --git a/libavcodec/pgssubdec.c b/libavcodec/pgssubdec.c
index b897d72aab351..8c10f6d573313 100644
--- a/libavcodec/pgssubdec.c
+++ b/libavcodec/pgssubdec.c
@@ -676,6 +676,11 @@ static int decode(AVCodecContext *avctx, void *data, int *data_size,
              */
             break;
         case DISPLAY_SEGMENT:
+            if (*data_size) {
+                av_log(avctx, AV_LOG_ERROR, "Duplicate display segment\n");
+                ret = AVERROR_INVALIDDATA;
+                break;
+            }
             ret = display_end_segment(avctx, data, buf, segment_length);
             if (ret >= 0)
                 *data_size = ret;
diff --git a/libavcodec/pictordec.c b/libavcodec/pictordec.c
index b29a484534150..65d2d49060b4f 100644
--- a/libavcodec/pictordec.c
+++ b/libavcodec/pictordec.c
@@ -236,6 +236,9 @@ static int decode_frame(AVCodecContext *avctx,
             }
         }
 
+        if (s->nb_planes - plane > 1)
+            return AVERROR_INVALIDDATA;
+
         if (plane < s->nb_planes && x < avctx->width) {
             int run = (y + 1) * avctx->width - x;
             if (bits_per_plane == 8)
diff --git a/libavcodec/pngdec.c b/libavcodec/pngdec.c
index 01144680f200b..189bb9a4c1733 100644
--- a/libavcodec/pngdec.c
+++ b/libavcodec/pngdec.c
@@ -578,6 +578,10 @@ static int decode_ihdr_chunk(AVCodecContext *avctx, PNGDecContext *s,
     }
     s->color_type       = bytestream2_get_byte(&s->gb);
     s->compression_type = bytestream2_get_byte(&s->gb);
+    if (s->compression_type) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid compression method %d\n", s->compression_type);
+        goto error;
+    }
     s->filter_type      = bytestream2_get_byte(&s->gb);
     s->interlace_type   = bytestream2_get_byte(&s->gb);
     bytestream2_skip(&s->gb, 4); /* crc */
diff --git a/libavcodec/pnm.c b/libavcodec/pnm.c
index b06a6e81b5190..17926f256f4d7 100644
--- a/libavcodec/pnm.c
+++ b/libavcodec/pnm.c
@@ -36,13 +36,15 @@ static void pnm_get(PNMContext *sc, char *str, int buf_size)
 {
     char *s;
     int c;
+    uint8_t *bs  = sc->bytestream;
+    const uint8_t *end = sc->bytestream_end;
 
     /* skip spaces and comments */
-    while (sc->bytestream < sc->bytestream_end) {
-        c = *sc->bytestream++;
+    while (bs < end) {
+        c = *bs++;
         if (c == '#')  {
-            while (c != '\n' && sc->bytestream < sc->bytestream_end) {
-                c = *sc->bytestream++;
+            while (c != '\n' && bs < end) {
+                c = *bs++;
             }
         } else if (!pnm_space(c)) {
             break;
@@ -50,12 +52,13 @@ static void pnm_get(PNMContext *sc, char *str, int buf_size)
     }
 
     s = str;
-    while (sc->bytestream < sc->bytestream_end && !pnm_space(c)) {
+    while (bs < end && !pnm_space(c)) {
         if ((s - str)  < buf_size - 1)
             *s++ = c;
-        c = *sc->bytestream++;
+        c = *bs++;
     }
     *s = '\0';
+    sc->bytestream = bs;
 }
 
 int ff_pnm_decode_header(AVCodecContext *avctx, PNMContext * const s)
diff --git a/libavcodec/ppc/h264dsp.c b/libavcodec/ppc/h264dsp.c
index f510544dda581..d8a3baa34d6f8 100644
--- a/libavcodec/ppc/h264dsp.c
+++ b/libavcodec/ppc/h264dsp.c
@@ -625,7 +625,7 @@ static inline vec_u8 h264_deblock_q1(register vec_u8 p0,
     q1 = newq1;                                                                              \
 }
 
-static void h264_v_loop_filter_luma_altivec(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) {
+static void h264_v_loop_filter_luma_altivec(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0) {
 
     if ((tc0[0] & tc0[1] & tc0[2] & tc0[3]) >= 0) {
         register vec_u8 p2 = vec_ld(-3*stride, pix);
@@ -642,7 +642,7 @@ static void h264_v_loop_filter_luma_altivec(uint8_t *pix, int stride, int alpha,
     }
 }
 
-static void h264_h_loop_filter_luma_altivec(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) {
+static void h264_h_loop_filter_luma_altivec(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0) {
 
     register vec_u8 line0, line1, line2, line3, line4, line5;
     if ((tc0[0] & tc0[1] & tc0[2] & tc0[3]) < 0)
diff --git a/libavcodec/ppc/hevcdsp.c b/libavcodec/ppc/hevcdsp.c
index dcae43305a678..c1d562a4094aa 100644
--- a/libavcodec/ppc/hevcdsp.c
+++ b/libavcodec/ppc/hevcdsp.c
@@ -58,7 +58,13 @@ static av_always_inline void transform4x4(vec_s16 src_01, vec_s16 src_23,
     e1 = vec_msums(src_02, trans4[2], zero);
     o1 = vec_msums(src_13, trans4[3], zero);
 
-    add = vec_sl(vec_splat_s32(1), vec_splat_u32(shift - 1));
+    switch(shift) {
+    case  7: add = vec_sl(vec_splat_s32(1), vec_splat_u32( 7 - 1)); break;
+    case 10: add = vec_sl(vec_splat_s32(1), vec_splat_u32(10 - 1)); break;
+    case 12: add = vec_sl(vec_splat_s32(1), vec_splat_u32(12 - 1)); break;
+    default: abort();
+    }
+
     e0 = vec_add(e0, add);
     e1 = vec_add(e1, add);
 
@@ -72,7 +78,14 @@ static av_always_inline void scale(vec_s32 res[4], vec_s16 res_packed[2],
                                    const int shift)
 {
     int i;
-    vec_u32 v_shift = vec_splat_u32(shift);
+    vec_u32 v_shift;
+
+    switch(shift) {
+    case  7: v_shift = vec_splat_u32(7) ; break;
+    case 10: v_shift = vec_splat_u32(10); break;
+    case 12: v_shift = vec_splat_u32(12); break;
+    default: abort();
+    }
 
     for (i = 0; i < 4; i++)
         res[i] = vec_sra(res[i], v_shift);
diff --git a/libavcodec/profiles.c b/libavcodec/profiles.c
index c31399f83e142..eaf0d68d329dd 100644
--- a/libavcodec/profiles.c
+++ b/libavcodec/profiles.c
@@ -151,4 +151,29 @@ const AVProfile ff_sbc_profiles[] = {
     { FF_PROFILE_UNKNOWN },
 };
 
+const AVProfile ff_prores_profiles[] = {
+    { FF_PROFILE_PRORES_PROXY,    "Proxy"    },
+    { FF_PROFILE_PRORES_LT,       "LT"       },
+    { FF_PROFILE_PRORES_STANDARD, "Standard" },
+    { FF_PROFILE_PRORES_HQ,       "HQ"       },
+    { FF_PROFILE_PRORES_4444,     "4444"     },
+    { FF_PROFILE_PRORES_XQ,       "XQ"       },
+    { FF_PROFILE_UNKNOWN }
+};
+
+const AVProfile ff_mjpeg_profiles[] = {
+    { FF_PROFILE_MJPEG_HUFFMAN_BASELINE_DCT,            "Baseline"    },
+    { FF_PROFILE_MJPEG_HUFFMAN_EXTENDED_SEQUENTIAL_DCT, "Sequential"  },
+    { FF_PROFILE_MJPEG_HUFFMAN_PROGRESSIVE_DCT,         "Progressive" },
+    { FF_PROFILE_MJPEG_HUFFMAN_LOSSLESS,                "Lossless"    },
+    { FF_PROFILE_MJPEG_JPEG_LS,                         "JPEG LS"     },
+    { FF_PROFILE_UNKNOWN }
+};
+
+const AVProfile ff_arib_caption_profiles[] = {
+    { FF_PROFILE_ARIB_PROFILE_A, "Profile A" },
+    { FF_PROFILE_ARIB_PROFILE_C, "Profile C" },
+    { FF_PROFILE_UNKNOWN }
+};
+
 #endif /* !CONFIG_SMALL */
diff --git a/libavcodec/profiles.h b/libavcodec/profiles.h
index 9d7e211e15da3..a53b67e7f2ea6 100644
--- a/libavcodec/profiles.h
+++ b/libavcodec/profiles.h
@@ -33,5 +33,8 @@ extern const AVProfile ff_vc1_profiles[];
 extern const AVProfile ff_vp9_profiles[];
 extern const AVProfile ff_av1_profiles[];
 extern const AVProfile ff_sbc_profiles[];
+extern const AVProfile ff_prores_profiles[];
+extern const AVProfile ff_mjpeg_profiles[];
+extern const AVProfile ff_arib_caption_profiles[];
 
 #endif /* AVCODEC_PROFILES_H */
diff --git a/libavcodec/prores_metadata_bsf.c b/libavcodec/prores_metadata_bsf.c
new file mode 100644
index 0000000000000..0510d3520a565
--- /dev/null
+++ b/libavcodec/prores_metadata_bsf.c
@@ -0,0 +1,172 @@
+/*
+ * Prores Metadata bitstream filter
+ * Copyright (c) 2018 Jokyo Images
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Prores Metadata bitstream filter
+ * set frame colorspace property
+ */
+
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/opt.h"
+#include "bsf.h"
+
+typedef struct ProresMetadataContext {
+    const AVClass *class;
+
+    int color_primaries;
+    int transfer_characteristics;
+    int matrix_coefficients;
+} ProresMetadataContext;
+
+static int prores_metadata(AVBSFContext *bsf, AVPacket *pkt)
+{
+    ProresMetadataContext *ctx = bsf->priv_data;
+    int ret = 0;
+    int buf_size;
+    uint8_t *buf;
+
+    ret = ff_bsf_get_packet_ref(bsf, pkt);
+    if (ret < 0)
+        return ret;
+
+    ret = av_packet_make_writable(pkt);
+    if (ret < 0)
+        goto fail;
+
+    buf = pkt->data;
+    buf_size = pkt->size;
+
+    /* check start of the prores frame */
+    if (buf_size < 28) {
+        av_log(bsf, AV_LOG_ERROR, "not enough data in prores frame\n");
+        ret = AVERROR_INVALIDDATA;
+        goto fail;
+    }
+
+    if (AV_RL32(buf + 4) != AV_RL32("icpf")) {
+        av_log(bsf, AV_LOG_ERROR, "invalid frame header\n");
+        ret = AVERROR_INVALIDDATA;
+        goto fail;
+    }
+
+    if (AV_RB16(buf + 8) < 28) {
+        av_log(bsf, AV_LOG_ERROR, "invalid frame header size\n");
+        ret = AVERROR_INVALIDDATA;
+        goto fail;
+    }
+
+    /* set the new values */
+    if (ctx->color_primaries != -1)
+        buf[8+14] = ctx->color_primaries;
+    if (ctx->transfer_characteristics != -1)
+        buf[8+15] = ctx->transfer_characteristics;
+    if (ctx->matrix_coefficients != -1)
+        buf[8+16] = ctx->matrix_coefficients;
+
+fail:
+    if (ret < 0)
+        av_packet_unref(pkt);
+    return ret;
+}
+
+static const enum AVCodecID codec_ids[] = {
+    AV_CODEC_ID_PRORES, AV_CODEC_ID_NONE,
+};
+
+static int prores_metadata_init(AVBSFContext *bsf)
+{
+    ProresMetadataContext *ctx = bsf->priv_data;
+    /*! check options */
+    switch (ctx->color_primaries) {
+    case -1:
+    case 0:
+    case AVCOL_PRI_BT709:
+    case AVCOL_PRI_BT470BG:
+    case AVCOL_PRI_SMPTE170M:
+    case AVCOL_PRI_BT2020:
+    case AVCOL_PRI_SMPTE431:
+    case AVCOL_PRI_SMPTE432:
+        break;
+    default:
+        av_log(bsf, AV_LOG_ERROR, "Color primaries %d is not a valid value\n", ctx->color_primaries);
+        return AVERROR(EINVAL);
+    }
+
+    switch (ctx->matrix_coefficients) {
+    case -1:
+    case 0:
+    case AVCOL_SPC_BT709:
+    case AVCOL_SPC_SMPTE170M:
+    case AVCOL_SPC_BT2020_NCL:
+        break;
+    default:
+        av_log(bsf, AV_LOG_ERROR, "Colorspace %d is not a valid value\n", ctx->matrix_coefficients);
+        return AVERROR(EINVAL);
+    }
+
+    return 0;
+}
+
+#define OFFSET(x) offsetof(ProresMetadataContext, x)
+#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_BSF_PARAM)
+static const AVOption options[] = {
+    {"color_primaries", "select color primaries", OFFSET(color_primaries), AV_OPT_TYPE_INT, {.i64=-1}, -1, AVCOL_PRI_SMPTE432, FLAGS, "color_primaries"},
+    {"auto", "keep the same color primaries",  0, AV_OPT_TYPE_CONST, {.i64=-1},                     INT_MIN, INT_MAX, FLAGS, "color_primaries"},
+    {"unknown",                         NULL,  0, AV_OPT_TYPE_CONST, {.i64=0},                      INT_MIN, INT_MAX, FLAGS, "color_primaries"},
+    {"bt709",                           NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_PRI_BT709},        INT_MIN, INT_MAX, FLAGS, "color_primaries"},
+    {"bt470bg",                         NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_PRI_BT470BG},      INT_MIN, INT_MAX, FLAGS, "color_primaries"},
+    {"smpte170m",                       NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_PRI_SMPTE170M},    INT_MIN, INT_MAX, FLAGS, "color_primaries"},
+    {"bt2020",                          NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_PRI_BT2020},       INT_MIN, INT_MAX, FLAGS, "color_primaries"},
+    {"smpte431",                        NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_PRI_SMPTE431},     INT_MIN, INT_MAX, FLAGS, "color_primaries"},
+    {"smpte432",                        NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_PRI_SMPTE432},     INT_MIN, INT_MAX, FLAGS, "color_primaries"},
+
+    {"color_trc", "select color transfer", OFFSET(transfer_characteristics), AV_OPT_TYPE_INT, {.i64=-1}, -1, AVCOL_TRC_BT709, FLAGS, "color_trc"},
+    {"auto", "keep the same color transfer",  0, AV_OPT_TYPE_CONST, {.i64=-1},                               INT_MIN, INT_MAX, FLAGS, "color_trc"},
+    {"unknown",                        NULL,  0, AV_OPT_TYPE_CONST, {.i64=0},                                INT_MIN, INT_MAX, FLAGS, "color_trc"},
+    {"bt709",                          NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_TRC_BT709},                  INT_MIN, INT_MAX, FLAGS, "color_trc"},
+
+    {"colorspace", "select colorspace", OFFSET(matrix_coefficients), AV_OPT_TYPE_INT, {.i64=-1}, -1,  AVCOL_SPC_BT2020_NCL, FLAGS, "colorspace"},
+    {"auto", "keep the same colorspace",  0, AV_OPT_TYPE_CONST, {.i64=-1},                            INT_MIN, INT_MAX, FLAGS, "colorspace"},
+    {"unknown",                    NULL,  0, AV_OPT_TYPE_CONST, {.i64=0},                             INT_MIN, INT_MAX, FLAGS, "colorspace"},
+    {"bt709",                      NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_SPC_BT709},               INT_MIN, INT_MAX, FLAGS, "colorspace"},
+    {"smpte170m",                  NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_SPC_SMPTE170M},           INT_MIN, INT_MAX, FLAGS, "colorspace"},
+    {"bt2020nc",                   NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_SPC_BT2020_NCL},          INT_MIN, INT_MAX, FLAGS, "colorspace"},
+
+    { NULL },
+};
+
+static const AVClass prores_metadata_class = {
+    .class_name = "prores_metadata_bsf",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+const AVBitStreamFilter ff_prores_metadata_bsf = {
+    .name       = "prores_metadata",
+    .init       = prores_metadata_init,
+    .filter     = prores_metadata,
+    .priv_data_size = sizeof(ProresMetadataContext),
+    .priv_class = &prores_metadata_class,
+    .codec_ids  = codec_ids,
+};
diff --git a/libavcodec/proresdec.h b/libavcodec/proresdec.h
index 14ede5d16bf26..06e41dd09ad08 100644
--- a/libavcodec/proresdec.h
+++ b/libavcodec/proresdec.h
@@ -22,6 +22,7 @@
 #ifndef AVCODEC_PRORESDEC_H
 #define AVCODEC_PRORESDEC_H
 
+#include "get_bits.h"
 #include "blockdsp.h"
 #include "proresdsp.h"
 
@@ -50,6 +51,7 @@ typedef struct {
     const uint8_t *scan;
     int first_field;
     int alpha_info;
+    void (*unpack_alpha)(GetBitContext *gb, uint16_t *dst, int num_coeffs, const int num_bits);
 } ProresContext;
 
 #endif /* AVCODEC_PRORESDEC_H */
diff --git a/libavcodec/proresdec2.c b/libavcodec/proresdec2.c
index d818e5d8da582..6209c229c9d01 100644
--- a/libavcodec/proresdec2.c
+++ b/libavcodec/proresdec2.c
@@ -33,6 +33,7 @@
 #include "get_bits.h"
 #include "idctdsp.h"
 #include "internal.h"
+#include "profiles.h"
 #include "simple_idct.h"
 #include "proresdec.h"
 #include "proresdata.h"
@@ -45,15 +46,138 @@ static void permute(uint8_t *dst, const uint8_t *src, const uint8_t permutation[
         dst[i] = permutation[src[i]];
 }
 
+#define ALPHA_SHIFT_16_TO_10(alpha_val) (alpha_val >> 6)
+#define ALPHA_SHIFT_8_TO_10(alpha_val)  ((alpha_val << 2) | (alpha_val >> 6))
+#define ALPHA_SHIFT_16_TO_12(alpha_val) (alpha_val >> 4)
+#define ALPHA_SHIFT_8_TO_12(alpha_val)  ((alpha_val << 4) | (alpha_val >> 4))
+
+static void inline unpack_alpha(GetBitContext *gb, uint16_t *dst, int num_coeffs,
+                                const int num_bits, const int decode_precision) {
+    const int mask = (1 << num_bits) - 1;
+    int i, idx, val, alpha_val;
+
+    idx       = 0;
+    alpha_val = mask;
+    do {
+        do {
+            if (get_bits1(gb)) {
+                val = get_bits(gb, num_bits);
+            } else {
+                int sign;
+                val  = get_bits(gb, num_bits == 16 ? 7 : 4);
+                sign = val & 1;
+                val  = (val + 2) >> 1;
+                if (sign)
+                    val = -val;
+            }
+            alpha_val = (alpha_val + val) & mask;
+            if (num_bits == 16) {
+                if (decode_precision == 10) {
+                    dst[idx++] = ALPHA_SHIFT_16_TO_10(alpha_val);
+                } else { /* 12b */
+                    dst[idx++] = ALPHA_SHIFT_16_TO_12(alpha_val);
+                }
+            } else {
+                if (decode_precision == 10) {
+                    dst[idx++] = ALPHA_SHIFT_8_TO_10(alpha_val);
+                } else { /* 12b */
+                    dst[idx++] = ALPHA_SHIFT_8_TO_12(alpha_val);
+                }
+            }
+            if (idx >= num_coeffs)
+                break;
+        } while (get_bits_left(gb)>0 && get_bits1(gb));
+        val = get_bits(gb, 4);
+        if (!val)
+            val = get_bits(gb, 11);
+        if (idx + val > num_coeffs)
+            val = num_coeffs - idx;
+        if (num_bits == 16) {
+            for (i = 0; i < val; i++) {
+                if (decode_precision == 10) {
+                    dst[idx++] = ALPHA_SHIFT_16_TO_10(alpha_val);
+                } else { /* 12b */
+                    dst[idx++] = ALPHA_SHIFT_16_TO_12(alpha_val);
+                }
+            }
+        } else {
+            for (i = 0; i < val; i++) {
+                if (decode_precision == 10) {
+                    dst[idx++] = ALPHA_SHIFT_8_TO_10(alpha_val);
+                } else { /* 12b */
+                    dst[idx++] = ALPHA_SHIFT_8_TO_12(alpha_val);
+                }
+            }
+        }
+    } while (idx < num_coeffs);
+}
+
+static void unpack_alpha_10(GetBitContext *gb, uint16_t *dst, int num_coeffs,
+                            const int num_bits)
+{
+    if (num_bits == 16) {
+        unpack_alpha(gb, dst, num_coeffs, 16, 10);
+    } else { /* 8 bits alpha */
+        unpack_alpha(gb, dst, num_coeffs, 8, 10);
+    }
+}
+
+static void unpack_alpha_12(GetBitContext *gb, uint16_t *dst, int num_coeffs,
+                            const int num_bits)
+{
+    if (num_bits == 16) {
+        unpack_alpha(gb, dst, num_coeffs, 16, 12);
+    } else { /* 8 bits alpha */
+        unpack_alpha(gb, dst, num_coeffs, 8, 12);
+    }
+}
+
 static av_cold int decode_init(AVCodecContext *avctx)
 {
+    int ret = 0;
     ProresContext *ctx = avctx->priv_data;
     uint8_t idct_permutation[64];
 
     avctx->bits_per_raw_sample = 10;
 
+    switch (avctx->codec_tag) {
+    case MKTAG('a','p','c','o'):
+        avctx->profile = FF_PROFILE_PRORES_PROXY;
+        break;
+    case MKTAG('a','p','c','s'):
+        avctx->profile = FF_PROFILE_PRORES_LT;
+        break;
+    case MKTAG('a','p','c','n'):
+        avctx->profile = FF_PROFILE_PRORES_STANDARD;
+        break;
+    case MKTAG('a','p','c','h'):
+        avctx->profile = FF_PROFILE_PRORES_HQ;
+        break;
+    case MKTAG('a','p','4','h'):
+        avctx->profile = FF_PROFILE_PRORES_4444;
+        avctx->bits_per_raw_sample = 12;
+        break;
+    case MKTAG('a','p','4','x'):
+        avctx->profile = FF_PROFILE_PRORES_XQ;
+        avctx->bits_per_raw_sample = 12;
+        break;
+    default:
+        avctx->profile = FF_PROFILE_UNKNOWN;
+        av_log(avctx, AV_LOG_WARNING, "Unknown prores profile %d\n", avctx->codec_tag);
+    }
+
+    if (avctx->bits_per_raw_sample == 10) {
+        av_log(avctx, AV_LOG_DEBUG, "Auto bitdepth precision. Use 10b decoding based on codec tag.\n");
+    } else { /* 12b */
+        av_log(avctx, AV_LOG_DEBUG, "Auto bitdepth precision. Use 12b decoding based on codec tag.\n");
+    }
+
     ff_blockdsp_init(&ctx->bdsp, avctx);
-    ff_proresdsp_init(&ctx->prodsp, avctx);
+    ret = ff_proresdsp_init(&ctx->prodsp, avctx);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Fail to init proresdsp for bits per raw sample %d\n", avctx->bits_per_raw_sample);
+        return ret;
+    }
 
     ff_init_scantable_permutation(idct_permutation,
                                   ctx->prodsp.idct_permutation_type);
@@ -61,7 +185,15 @@ static av_cold int decode_init(AVCodecContext *avctx)
     permute(ctx->progressive_scan, ff_prores_progressive_scan, idct_permutation);
     permute(ctx->interlaced_scan, ff_prores_interlaced_scan, idct_permutation);
 
-    return 0;
+    if (avctx->bits_per_raw_sample == 10){
+        ctx->unpack_alpha = unpack_alpha_10;
+    } else if (avctx->bits_per_raw_sample == 12){
+        ctx->unpack_alpha = unpack_alpha_12;
+    } else {
+        av_log(avctx, AV_LOG_ERROR, "Fail to set unpack_alpha for bits per raw sample %d\n", avctx->bits_per_raw_sample);
+        return AVERROR_BUG;
+    }
+    return ret;
 }
 
 static int decode_frame_header(ProresContext *ctx, const uint8_t *buf,
@@ -87,10 +219,14 @@ static int decode_frame_header(ProresContext *ctx, const uint8_t *buf,
 
     width  = AV_RB16(buf + 8);
     height = AV_RB16(buf + 10);
+
     if (width != avctx->width || height != avctx->height) {
-        av_log(avctx, AV_LOG_ERROR, "picture resolution change: %dx%d -> %dx%d\n",
+        int ret;
+
+        av_log(avctx, AV_LOG_WARNING, "picture resolution change: %dx%d -> %dx%d\n",
                avctx->width, avctx->height, width, height);
-        return AVERROR_PATCHWELCOME;
+        if ((ret = ff_set_dimensions(avctx, width, height)) < 0)
+            return ret;
     }
 
     ctx->frame_type = (buf[12] >> 2) & 3;
@@ -113,9 +249,17 @@ static int decode_frame_header(ProresContext *ctx, const uint8_t *buf,
     }
 
     if (ctx->alpha_info) {
-        avctx->pix_fmt = (buf[12] & 0xC0) == 0xC0 ? AV_PIX_FMT_YUVA444P10 : AV_PIX_FMT_YUVA422P10;
+        if (avctx->bits_per_raw_sample == 10) {
+            avctx->pix_fmt = (buf[12] & 0xC0) == 0xC0 ? AV_PIX_FMT_YUVA444P10 : AV_PIX_FMT_YUVA422P10;
+        } else { /* 12b */
+            avctx->pix_fmt = (buf[12] & 0xC0) == 0xC0 ? AV_PIX_FMT_YUVA444P12 : AV_PIX_FMT_YUVA422P12;
+        }
     } else {
-        avctx->pix_fmt = (buf[12] & 0xC0) == 0xC0 ? AV_PIX_FMT_YUV444P10 : AV_PIX_FMT_YUV422P10;
+        if (avctx->bits_per_raw_sample == 10) {
+            avctx->pix_fmt = (buf[12] & 0xC0) == 0xC0 ? AV_PIX_FMT_YUV444P10 : AV_PIX_FMT_YUV422P10;
+        } else { /* 12b */
+            avctx->pix_fmt = (buf[12] & 0xC0) == 0xC0 ? AV_PIX_FMT_YUV444P12 : AV_PIX_FMT_YUV422P12;
+        }
     }
 
     avctx->color_primaries = buf[14];
@@ -436,51 +580,6 @@ static int decode_slice_chroma(AVCodecContext *avctx, SliceContext *slice,
     return 0;
 }
 
-static void unpack_alpha(GetBitContext *gb, uint16_t *dst, int num_coeffs,
-                         const int num_bits)
-{
-    const int mask = (1 << num_bits) - 1;
-    int i, idx, val, alpha_val;
-
-    idx       = 0;
-    alpha_val = mask;
-    do {
-        do {
-            if (get_bits1(gb)) {
-                val = get_bits(gb, num_bits);
-            } else {
-                int sign;
-                val  = get_bits(gb, num_bits == 16 ? 7 : 4);
-                sign = val & 1;
-                val  = (val + 2) >> 1;
-                if (sign)
-                    val = -val;
-            }
-            alpha_val = (alpha_val + val) & mask;
-            if (num_bits == 16) {
-                dst[idx++] = alpha_val >> 6;
-            } else {
-                dst[idx++] = (alpha_val << 2) | (alpha_val >> 6);
-            }
-            if (idx >= num_coeffs)
-                break;
-        } while (get_bits_left(gb)>0 && get_bits1(gb));
-        val = get_bits(gb, 4);
-        if (!val)
-            val = get_bits(gb, 11);
-        if (idx + val > num_coeffs)
-            val = num_coeffs - idx;
-        if (num_bits == 16) {
-            for (i = 0; i < val; i++)
-                dst[idx++] = alpha_val >> 6;
-        } else {
-            for (i = 0; i < val; i++)
-                dst[idx++] = (alpha_val << 2) | (alpha_val >> 6);
-
-        }
-    } while (idx < num_coeffs);
-}
-
 /**
  * Decode alpha slice plane.
  */
@@ -500,12 +599,13 @@ static void decode_slice_alpha(ProresContext *ctx,
     init_get_bits(&gb, buf, buf_size << 3);
 
     if (ctx->alpha_info == 2) {
-        unpack_alpha(&gb, blocks, blocks_per_slice * 4 * 64, 16);
+        ctx->unpack_alpha(&gb, blocks, blocks_per_slice * 4 * 64, 16);
     } else {
-        unpack_alpha(&gb, blocks, blocks_per_slice * 4 * 64, 8);
+        ctx->unpack_alpha(&gb, blocks, blocks_per_slice * 4 * 64, 8);
     }
 
     block = blocks;
+
     for (i = 0; i < 16; i++) {
         memcpy(dst, block, 16 * blocks_per_slice * sizeof(*dst));
         dst   += dst_stride >> 1;
@@ -527,6 +627,7 @@ static int decode_slice_thread(AVCodecContext *avctx, void *arg, int jobnr, int
     LOCAL_ALIGNED_16(int16_t, qmat_chroma_scaled,[64]);
     int mb_x_shift;
     int ret;
+    uint16_t val_no_chroma;
 
     slice->ret = -1;
     //av_log(avctx, AV_LOG_INFO, "slice %d mb width %d mb x %d y %d\n",
@@ -564,7 +665,8 @@ static int decode_slice_thread(AVCodecContext *avctx, void *arg, int jobnr, int
         chroma_stride = pic->linesize[1] << 1;
     }
 
-    if (avctx->pix_fmt == AV_PIX_FMT_YUV444P10 || avctx->pix_fmt == AV_PIX_FMT_YUVA444P10) {
+    if (avctx->pix_fmt == AV_PIX_FMT_YUV444P10 || avctx->pix_fmt == AV_PIX_FMT_YUVA444P10 ||
+        avctx->pix_fmt == AV_PIX_FMT_YUV444P12 || avctx->pix_fmt == AV_PIX_FMT_YUVA444P12) {
         mb_x_shift = 5;
         log2_chroma_blocks_per_mb = 2;
     } else {
@@ -605,10 +707,15 @@ static int decode_slice_thread(AVCodecContext *avctx, void *arg, int jobnr, int
     else {
         size_t mb_max_x = slice->mb_count << (mb_x_shift - 1);
         size_t i, j;
+        if (avctx->bits_per_raw_sample == 10) {
+            val_no_chroma = 511;
+        } else { /* 12b */
+            val_no_chroma = 511 * 4;
+        }
         for (i = 0; i < 16; ++i)
             for (j = 0; j < mb_max_x; ++j) {
-                *(uint16_t*)(dest_u + (i * chroma_stride) + (j << 1)) = 511;
-                *(uint16_t*)(dest_v + (i * chroma_stride) + (j << 1)) = 511;
+                *(uint16_t*)(dest_u + (i * chroma_stride) + (j << 1)) = val_no_chroma;
+                *(uint16_t*)(dest_v + (i * chroma_stride) + (j << 1)) = val_no_chroma;
             }
     }
 
@@ -730,4 +837,5 @@ AVCodec ff_prores_decoder = {
     .close          = decode_close,
     .decode         = decode_frame,
     .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS,
+    .profiles       = NULL_IF_CONFIG_SMALL(ff_prores_profiles),
 };
diff --git a/libavcodec/proresdsp.c b/libavcodec/proresdsp.c
index 5b5ada21fe0bd..a3c618cdd1ccf 100644
--- a/libavcodec/proresdsp.c
+++ b/libavcodec/proresdsp.c
@@ -27,42 +27,71 @@
 #include "proresdsp.h"
 #include "simple_idct.h"
 
-#define BIAS     (1 << (PRORES_BITS_PER_SAMPLE - 1))           ///< bias value for converting signed pixels into unsigned ones
-#define CLIP_MIN (1 << (PRORES_BITS_PER_SAMPLE - 8))           ///< minimum value for clipping resulting pixels
-#define CLIP_MAX (1 << PRORES_BITS_PER_SAMPLE) - CLIP_MIN - 1  ///< maximum value for clipping resulting pixels
+#define CLIP_MIN (1 << 2)                     ///< minimum value for clipping resulting pixels
+#define CLIP_MAX_10 (1 << 10) - CLIP_MIN - 1  ///< maximum value for clipping resulting pixels
+#define CLIP_MAX_12 (1 << 12) - CLIP_MIN - 1  ///< maximum value for clipping resulting pixels
 
-#define CLIP(x) (av_clip((x), CLIP_MIN, CLIP_MAX))
+#define CLIP_10(x) (av_clip((x), CLIP_MIN, CLIP_MAX_10))
+#define CLIP_12(x) (av_clip((x), CLIP_MIN, CLIP_MAX_12))
 
 /**
  * Add bias value, clamp and output pixels of a slice
  */
-static void put_pixels(uint16_t *dst, ptrdiff_t linesize, const int16_t *in)
-{
+
+static inline void put_pixel(uint16_t *dst, ptrdiff_t linesize, const int16_t *in, int bits_per_raw_sample) {
     int x, y, src_offset, dst_offset;
 
     for (y = 0, dst_offset = 0; y < 8; y++, dst_offset += linesize) {
         for (x = 0; x < 8; x++) {
             src_offset = (y << 3) + x;
 
-            dst[dst_offset + x] = CLIP(in[src_offset]);
+            if (bits_per_raw_sample == 10) {
+                dst[dst_offset + x] = CLIP_10(in[src_offset]);
+            } else {//12b
+                dst[dst_offset + x] = CLIP_12(in[src_offset]);
+            }
         }
     }
 }
 
-static void prores_idct_put_c(uint16_t *out, ptrdiff_t linesize, int16_t *block, const int16_t *qmat)
+static void put_pixels_10(uint16_t *dst, ptrdiff_t linesize, const int16_t *in)
+{
+    put_pixel(dst, linesize, in, 10);
+}
+
+static void put_pixels_12(uint16_t *dst, ptrdiff_t linesize, const int16_t *in)
+{
+    put_pixel(dst, linesize, in, 12);
+}
+
+static void prores_idct_put_10_c(uint16_t *out, ptrdiff_t linesize, int16_t *block, const int16_t *qmat)
+{
+    ff_prores_idct_10(block, qmat);
+    put_pixels_10(out, linesize >> 1, block);
+}
+
+static void prores_idct_put_12_c(uint16_t *out, ptrdiff_t linesize, int16_t *block, const int16_t *qmat)
 {
-    ff_prores_idct(block, qmat);
-    put_pixels(out, linesize >> 1, block);
+    ff_prores_idct_12(block, qmat);
+    put_pixels_12(out, linesize >> 1, block);
 }
 
-av_cold void ff_proresdsp_init(ProresDSPContext *dsp, AVCodecContext *avctx)
+av_cold int ff_proresdsp_init(ProresDSPContext *dsp, AVCodecContext *avctx)
 {
-    dsp->idct_put = prores_idct_put_c;
-    dsp->idct_permutation_type = FF_IDCT_PERM_NONE;
+    if (avctx->bits_per_raw_sample == 10) {
+        dsp->idct_put = prores_idct_put_10_c;
+        dsp->idct_permutation_type = FF_IDCT_PERM_NONE;
+    } else if (avctx->bits_per_raw_sample == 12) {
+        dsp->idct_put = prores_idct_put_12_c;
+        dsp->idct_permutation_type = FF_IDCT_PERM_NONE;
+    } else {
+        return AVERROR_BUG;
+    }
 
     if (ARCH_X86)
         ff_proresdsp_init_x86(dsp, avctx);
 
     ff_init_scantable_permutation(dsp->idct_permutation,
                                   dsp->idct_permutation_type);
+    return 0;
 }
diff --git a/libavcodec/proresdsp.h b/libavcodec/proresdsp.h
index 558fae53bf10a..37ba76b8e49c8 100644
--- a/libavcodec/proresdsp.h
+++ b/libavcodec/proresdsp.h
@@ -27,15 +27,13 @@
 #include <stdint.h>
 #include "avcodec.h"
 
-#define PRORES_BITS_PER_SAMPLE 10 ///< output precision of prores decoder
-
 typedef struct ProresDSPContext {
     int idct_permutation_type;
     uint8_t idct_permutation[64];
     void (*idct_put)(uint16_t *out, ptrdiff_t linesize, int16_t *block, const int16_t *qmat);
 } ProresDSPContext;
 
-void ff_proresdsp_init(ProresDSPContext *dsp, AVCodecContext *avctx);
+int ff_proresdsp_init(ProresDSPContext *dsp, AVCodecContext *avctx);
 
 void ff_proresdsp_init_x86(ProresDSPContext *dsp, AVCodecContext *avctx);
 
diff --git a/libavcodec/proresenc_anatoliy.c b/libavcodec/proresenc_anatoliy.c
index 0516066163a51..0fc79fc1dead1 100644
--- a/libavcodec/proresenc_anatoliy.c
+++ b/libavcodec/proresenc_anatoliy.c
@@ -2,6 +2,7 @@
  * Apple ProRes encoder
  *
  * Copyright (c) 2011 Anatoliy Wasserman
+ * Copyright (c) 2012 Konstantin Shishkov
  *
  * This file is part of FFmpeg.
  *
@@ -23,47 +24,42 @@
 /**
  * @file
  * Apple ProRes encoder (Anatoliy Wasserman version)
- * Known FOURCCs: 'apch' (HQ), 'apcn' (SD), 'apcs' (LT), 'acpo' (Proxy)
+ * Known FOURCCs: 'ap4h' (444), 'apch' (HQ), 'apcn' (422), 'apcs' (LT), 'acpo' (Proxy)
  */
 
+#include "libavutil/opt.h"
 #include "avcodec.h"
 #include "dct.h"
 #include "internal.h"
+#include "profiles.h"
+#include "proresdata.h"
 #include "put_bits.h"
 #include "bytestream.h"
 #include "fdctdsp.h"
 
 #define DEFAULT_SLICE_MB_WIDTH 8
 
-#define FF_PROFILE_PRORES_PROXY     0
-#define FF_PROFILE_PRORES_LT        1
-#define FF_PROFILE_PRORES_STANDARD  2
-#define FF_PROFILE_PRORES_HQ        3
-
 static const AVProfile profiles[] = {
     { FF_PROFILE_PRORES_PROXY,    "apco"},
     { FF_PROFILE_PRORES_LT,       "apcs"},
     { FF_PROFILE_PRORES_STANDARD, "apcn"},
     { FF_PROFILE_PRORES_HQ,       "apch"},
+    { FF_PROFILE_PRORES_4444,     "ap4h"},
+    { FF_PROFILE_PRORES_XQ,       "ap4x"},
     { FF_PROFILE_UNKNOWN }
 };
 
-static const int qp_start_table[4] = { 4, 1, 1, 1 };
-static const int qp_end_table[4]   = { 8, 9, 6, 6 };
-static const int bitrate_table[5]  = { 1000, 2100, 3500, 5400 };
-
-static const uint8_t progressive_scan[64] = {
-     0,  1,  8,  9,  2,  3, 10, 11,
-    16, 17, 24, 25, 18, 19, 26, 27,
-     4,  5, 12, 20, 13,  6,  7, 14,
-    21, 28, 29, 22, 15, 23, 30, 31,
-    32, 33, 40, 48, 41, 34, 35, 42,
-    49, 56, 57, 50, 43, 36, 37, 44,
-    51, 58, 59, 52, 45, 38, 39, 46,
-    53, 60, 61, 54, 47, 55, 62, 63
-};
+static const int qp_start_table[6] = {  8, 3, 2, 1, 1, 1};
+static const int qp_end_table[6]   = { 13, 9, 6, 6, 5, 4};
+static const int bitrate_table[6]  = { 1000, 2100, 3500, 5400, 7000, 10000};
 
-static const uint8_t QMAT_LUMA[4][64] = {
+static const int valid_primaries[9]  = { AVCOL_PRI_RESERVED0, AVCOL_PRI_BT709, AVCOL_PRI_UNSPECIFIED, AVCOL_PRI_BT470BG,
+                                         AVCOL_PRI_SMPTE170M, AVCOL_PRI_BT2020, AVCOL_PRI_SMPTE431, AVCOL_PRI_SMPTE432,INT_MAX };
+static const int valid_trc[4]        = { AVCOL_TRC_RESERVED0, AVCOL_TRC_BT709, AVCOL_TRC_UNSPECIFIED, INT_MAX };
+static const int valid_colorspace[5] = { AVCOL_SPC_BT709, AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_SMPTE170M,
+                                         AVCOL_SPC_BT2020_NCL, INT_MAX };
+
+static const uint8_t QMAT_LUMA[6][64] = {
     {
          4,  7,  9, 11, 13, 14, 15, 63,
          7,  7, 11, 12, 14, 15, 63, 63,
@@ -100,10 +96,28 @@ static const uint8_t QMAT_LUMA[4][64] = {
          4,  4,  4,  4,  4,  5,  5,  6,
          4,  4,  4,  4,  5,  5,  6,  7,
          4,  4,  4,  4,  5,  6,  7,  7
+    }, { /* 444 */
+        4,  4,  4,  4,  4,  4,  4,  4,
+        4,  4,  4,  4,  4,  4,  4,  4,
+        4,  4,  4,  4,  4,  4,  4,  4,
+        4,  4,  4,  4,  4,  4,  4,  5,
+        4,  4,  4,  4,  4,  4,  5,  5,
+        4,  4,  4,  4,  4,  5,  5,  6,
+        4,  4,  4,  4,  5,  5,  6,  7,
+        4,  4,  4,  4,  5,  6,  7,  7
+    }, { /* 444 XQ */
+        2,  2,  2,  2,  2,  2,  2,  2,
+        2,  2,  2,  2,  2,  2,  2,  2,
+        2,  2,  2,  2,  2,  2,  2,  2,
+        2,  2,  2,  2,  2,  2,  2,  3,
+        2,  2,  2,  2,  2,  2,  3,  3,
+        2,  2,  2,  2,  2,  3,  3,  3,
+        2,  2,  2,  2,  3,  3,  3,  4,
+        2,  2,  2,  2,  3,  3,  4,  4,
     }
 };
 
-static const uint8_t QMAT_CHROMA[4][64] = {
+static const uint8_t QMAT_CHROMA[6][64] = {
     {
          4,  7,  9, 11, 13, 14, 63, 63,
          7,  7, 11, 12, 14, 63, 63, 63,
@@ -140,18 +154,45 @@ static const uint8_t QMAT_CHROMA[4][64] = {
          4,  4,  4,  4,  4,  5,  5,  6,
          4,  4,  4,  4,  5,  5,  6,  7,
          4,  4,  4,  4,  5,  6,  7,  7
+    }, { /* 444 */
+        4,  4,  4,  4,  4,  4,  4,  4,
+        4,  4,  4,  4,  4,  4,  4,  4,
+        4,  4,  4,  4,  4,  4,  4,  4,
+        4,  4,  4,  4,  4,  4,  4,  5,
+        4,  4,  4,  4,  4,  4,  5,  5,
+        4,  4,  4,  4,  4,  5,  5,  6,
+        4,  4,  4,  4,  5,  5,  6,  7,
+        4,  4,  4,  4,  5,  6,  7,  7
+    }, { /* 444 xq */
+        4,  4,  4,  4,  4,  4,  4,  4,
+        4,  4,  4,  4,  4,  4,  4,  4,
+        4,  4,  4,  4,  4,  4,  4,  4,
+        4,  4,  4,  4,  4,  4,  4,  5,
+        4,  4,  4,  4,  4,  4,  5,  5,
+        4,  4,  4,  4,  4,  5,  5,  6,
+        4,  4,  4,  4,  5,  5,  6,  7,
+        4,  4,  4,  4,  5,  6,  7,  7
     }
 };
 
 
 typedef struct {
+    AVClass *class;
     FDCTDSPContext fdsp;
     uint8_t* fill_y;
     uint8_t* fill_u;
     uint8_t* fill_v;
+    uint8_t* fill_a;
 
     int qmat_luma[16][64];
     int qmat_chroma[16][64];
+    const uint8_t *scantable;
+
+    int is_422;
+    int need_alpha;
+    int is_interlaced;
+
+    char *vendor;
 } ProresContext;
 
 static void encode_codeword(PutBitContext *pb, int val, int codebook)
@@ -229,15 +270,15 @@ static const uint8_t run_to_cb[16] = { 0x06, 0x06, 0x05, 0x05, 0x04, 0x29,
 static const uint8_t lev_to_cb[10] = { 0x04, 0x0A, 0x05, 0x06, 0x04, 0x28,
         0x28, 0x28, 0x28, 0x4C };
 
-static void encode_ac_coeffs(AVCodecContext *avctx, PutBitContext *pb,
-        int16_t *in, int blocks_per_slice, int *qmat)
+static void encode_ac_coeffs(PutBitContext *pb,
+        int16_t *in, int blocks_per_slice, int *qmat, const uint8_t ff_prores_scan[64])
 {
     int prev_run = 4;
     int prev_level = 2;
 
     int run = 0, level, code, i, j;
     for (i = 1; i < 64; i++) {
-        int indp = progressive_scan[i];
+        int indp = ff_prores_scan[i];
         for (j = 0; j < blocks_per_slice; j++) {
             int val = QSCALE(qmat, indp, in[(j << 6) + indp]);
             if (val) {
@@ -278,81 +319,190 @@ static void fdct_get(FDCTDSPContext *fdsp, uint8_t *pixels, int stride, int16_t*
     fdsp->fdct(block);
 }
 
-static int encode_slice_plane(AVCodecContext *avctx, int mb_count,
-        uint8_t *src, int src_stride, uint8_t *buf, unsigned buf_size,
-        int *qmat, int chroma)
+static void calc_plane_dct(FDCTDSPContext *fdsp, uint8_t *src, int16_t * blocks, int src_stride, int mb_count, int chroma, int is_422)
 {
-    ProresContext* ctx = avctx->priv_data;
-    FDCTDSPContext *fdsp = &ctx->fdsp;
-    LOCAL_ALIGNED(16, int16_t, blocks, [DEFAULT_SLICE_MB_WIDTH << 8]);
     int16_t *block;
-    int i, blocks_per_slice;
-    PutBitContext pb;
+    int i;
 
     block = blocks;
-    for (i = 0; i < mb_count; i++) {
-        fdct_get(fdsp, src,                  src_stride, block + (0 << 6));
-        fdct_get(fdsp, src + 8 * src_stride, src_stride, block + ((2 - chroma) << 6));
-        if (!chroma) {
+
+    if (!chroma) { /* Luma plane */
+        for (i = 0; i < mb_count; i++) {
+            fdct_get(fdsp, src,                       src_stride, block + (0 << 6));
             fdct_get(fdsp, src + 16,                  src_stride, block + (1 << 6));
+            fdct_get(fdsp, src +      8 * src_stride, src_stride, block + (2 << 6));
             fdct_get(fdsp, src + 16 + 8 * src_stride, src_stride, block + (3 << 6));
+
+            block += 256;
+            src   += 32;
         }
+    } else if (chroma && is_422){ /* chroma plane 422 */
+        for (i = 0; i < mb_count; i++) {
+            fdct_get(fdsp, src,                  src_stride, block + (0 << 6));
+            fdct_get(fdsp, src + 8 * src_stride, src_stride, block + (1 << 6));
+            block += (256 >> 1);
+            src   += (32  >> 1);
+        }
+    } else { /* chroma plane 444 */
+        for (i = 0; i < mb_count; i++) {
+            fdct_get(fdsp, src,                       src_stride, block + (0 << 6));
+            fdct_get(fdsp, src +      8 * src_stride, src_stride, block + (1 << 6));
+            fdct_get(fdsp, src + 16,                  src_stride, block + (2 << 6));
+            fdct_get(fdsp, src + 16 + 8 * src_stride, src_stride, block + (3 << 6));
 
-        block += (256 >> chroma);
-        src   += (32  >> chroma);
+            block += 256;
+            src   += 32;
+        }
     }
+}
+
+static int encode_slice_plane(int16_t *blocks, int mb_count, uint8_t *buf, unsigned buf_size, int *qmat, int sub_sample_chroma,
+                              const uint8_t ff_prores_scan[64])
+{
+    int blocks_per_slice;
+    PutBitContext pb;
 
-    blocks_per_slice = mb_count << (2 - chroma);
+    blocks_per_slice = mb_count << (2 - sub_sample_chroma);
     init_put_bits(&pb, buf, buf_size);
 
     encode_dc_coeffs(&pb, blocks, blocks_per_slice, qmat);
-    encode_ac_coeffs(avctx, &pb, blocks, blocks_per_slice, qmat);
+    encode_ac_coeffs(&pb, blocks, blocks_per_slice, qmat, ff_prores_scan);
 
     flush_put_bits(&pb);
     return put_bits_ptr(&pb) - pb.buf;
 }
 
 static av_always_inline unsigned encode_slice_data(AVCodecContext *avctx,
-        uint8_t *dest_y, uint8_t *dest_u, uint8_t *dest_v, int luma_stride,
-        int chroma_stride, unsigned mb_count, uint8_t *buf, unsigned data_size,
-        unsigned* y_data_size, unsigned* u_data_size, unsigned* v_data_size,
-        int qp)
+                                                   int16_t * blocks_y, int16_t * blocks_u, int16_t * blocks_v,
+                                                   unsigned mb_count, uint8_t *buf, unsigned data_size,
+                                                   unsigned* y_data_size, unsigned* u_data_size, unsigned* v_data_size,
+                                                   int qp)
 {
     ProresContext* ctx = avctx->priv_data;
 
-    *y_data_size = encode_slice_plane(avctx, mb_count, dest_y, luma_stride,
-            buf, data_size, ctx->qmat_luma[qp - 1], 0);
+    *y_data_size = encode_slice_plane(blocks_y, mb_count,
+                                      buf, data_size, ctx->qmat_luma[qp - 1], 0, ctx->scantable);
 
     if (!(avctx->flags & AV_CODEC_FLAG_GRAY)) {
-        *u_data_size = encode_slice_plane(avctx, mb_count, dest_u,
-                chroma_stride, buf + *y_data_size, data_size - *y_data_size,
-                ctx->qmat_chroma[qp - 1], 1);
+        *u_data_size = encode_slice_plane(blocks_u, mb_count, buf + *y_data_size, data_size - *y_data_size,
+                                          ctx->qmat_chroma[qp - 1], ctx->is_422, ctx->scantable);
 
-        *v_data_size = encode_slice_plane(avctx, mb_count, dest_v,
-                chroma_stride, buf + *y_data_size + *u_data_size,
-                data_size - *y_data_size - *u_data_size,
-                ctx->qmat_chroma[qp - 1], 1);
+        *v_data_size = encode_slice_plane(blocks_v, mb_count, buf + *y_data_size + *u_data_size,
+                                          data_size - *y_data_size - *u_data_size,
+                                          ctx->qmat_chroma[qp - 1], ctx->is_422, ctx->scantable);
     }
 
     return *y_data_size + *u_data_size + *v_data_size;
 }
 
-static void subimage_with_fill(uint16_t *src, unsigned x, unsigned y,
-        unsigned stride, unsigned width, unsigned height, uint16_t *dst,
-        unsigned dst_width, unsigned dst_height)
+static void put_alpha_diff(PutBitContext *pb, int cur, int prev)
 {
+    const int abits = 16;
+    const int dbits = 7;
+    const int dsize = 1 << dbits - 1;
+    int diff = cur - prev;
+
+    diff = av_mod_uintp2(diff, abits);
+    if (diff >= (1 << abits) - dsize)
+        diff -= 1 << abits;
+    if (diff < -dsize || diff > dsize || !diff) {
+        put_bits(pb, 1, 1);
+        put_bits(pb, abits, diff);
+    } else {
+        put_bits(pb, 1, 0);
+        put_bits(pb, dbits - 1, FFABS(diff) - 1);
+        put_bits(pb, 1, diff < 0);
+    }
+}
 
+static inline void put_alpha_run(PutBitContext *pb, int run)
+{
+    if (run) {
+        put_bits(pb, 1, 0);
+        if (run < 0x10)
+            put_bits(pb, 4, run);
+        else
+            put_bits(pb, 15, run);
+    } else {
+        put_bits(pb, 1, 1);
+    }
+}
+
+static av_always_inline int encode_alpha_slice_data(AVCodecContext *avctx, int8_t * src_a,
+                                                   unsigned mb_count, uint8_t *buf, unsigned data_size, unsigned* a_data_size)
+{
+    const int abits = 16;
+    const int mask  = (1 << abits) - 1;
+    const int num_coeffs = mb_count * 256;
+    int prev = mask, cur;
+    int idx = 0;
+    int run = 0;
+    int16_t * blocks = (int16_t *)src_a;
+    PutBitContext pb;
+    init_put_bits(&pb, buf, data_size);
+
+    cur = blocks[idx++];
+    put_alpha_diff(&pb, cur, prev);
+    prev = cur;
+    do {
+        cur = blocks[idx++];
+        if (cur != prev) {
+            put_alpha_run (&pb, run);
+            put_alpha_diff(&pb, cur, prev);
+            prev = cur;
+            run  = 0;
+        } else {
+            run++;
+        }
+    } while (idx < num_coeffs);
+    if (run)
+        put_alpha_run(&pb, run);
+    flush_put_bits(&pb);
+    *a_data_size = put_bits_count(&pb) >> 3;
+
+    if (put_bits_left(&pb) < 0) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Underestimated required buffer size.\n");
+        return AVERROR_BUG;
+    } else {
+        return 0;
+    }
+}
+
+static inline void subimage_with_fill_template(uint16_t *src, unsigned x, unsigned y,
+                                               unsigned stride, unsigned width, unsigned height, uint16_t *dst,
+                                               unsigned dst_width, unsigned dst_height, int is_alpha_plane,
+                                               int is_interlaced, int is_top_field)
+{
     int box_width = FFMIN(width - x, dst_width);
-    int box_height = FFMIN(height - y, dst_height);
-    int i, j, src_stride = stride >> 1;
+    int i, j, src_stride, box_height;
     uint16_t last_pix, *last_line;
 
-    src += y * src_stride + x;
+    if (!is_interlaced) {
+        src_stride = stride >> 1;
+        src += y * src_stride + x;
+        box_height = FFMIN(height - y, dst_height);
+    } else {
+        src_stride = stride; /* 2 lines stride */
+        src += y * src_stride + x;
+        box_height = FFMIN(height/2 - y, dst_height);
+        if (!is_top_field)
+            src += stride >> 1;
+    }
+
     for (i = 0; i < box_height; ++i) {
         for (j = 0; j < box_width; ++j) {
-            dst[j] = src[j];
+            if (!is_alpha_plane) {
+                dst[j] = src[j];
+            } else {
+                dst[j] = src[j] << 6; /* alpha 10b to 16b */
+            }
+        }
+        if (!is_alpha_plane) {
+            last_pix = dst[j - 1];
+        } else {
+            last_pix = dst[j - 1] << 6; /* alpha 10b to 16b */
         }
-        last_pix = dst[j - 1];
         for (; j < dst_width; j++)
             dst[j] = last_pix;
         src += src_stride;
@@ -367,64 +517,113 @@ static void subimage_with_fill(uint16_t *src, unsigned x, unsigned y,
     }
 }
 
+static void subimage_with_fill(uint16_t *src, unsigned x, unsigned y,
+        unsigned stride, unsigned width, unsigned height, uint16_t *dst,
+        unsigned dst_width, unsigned dst_height, int is_interlaced, int is_top_field)
+{
+    subimage_with_fill_template(src, x, y, stride, width, height, dst, dst_width, dst_height, 0, is_interlaced, is_top_field);
+}
+
+/* reorganize alpha data and convert 10b -> 16b */
+static void subimage_alpha_with_fill(uint16_t *src, unsigned x, unsigned y,
+                               unsigned stride, unsigned width, unsigned height, uint16_t *dst,
+                               unsigned dst_width, unsigned dst_height, int is_interlaced, int is_top_field)
+{
+    subimage_with_fill_template(src, x, y, stride, width, height, dst, dst_width, dst_height, 1, is_interlaced, is_top_field);
+}
+
 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic, int mb_x,
         int mb_y, unsigned mb_count, uint8_t *buf, unsigned data_size,
-        int unsafe, int *qp)
+        int unsafe, int *qp, int is_interlaced, int is_top_field)
 {
-    int luma_stride, chroma_stride;
-    int hdr_size = 6, slice_size;
-    uint8_t *dest_y, *dest_u, *dest_v;
-    unsigned y_data_size = 0, u_data_size = 0, v_data_size = 0;
+    int luma_stride, chroma_stride, alpha_stride = 0;
     ProresContext* ctx = avctx->priv_data;
+    int hdr_size = 6 + (ctx->need_alpha * 2); /* v data size is write when there is alpha */
+    int ret = 0, slice_size;
+    uint8_t *dest_y, *dest_u, *dest_v;
+    unsigned y_data_size = 0, u_data_size = 0, v_data_size = 0, a_data_size = 0;
+    FDCTDSPContext *fdsp = &ctx->fdsp;
     int tgt_bits   = (mb_count * bitrate_table[avctx->profile]) >> 2;
     int low_bytes  = (tgt_bits - (tgt_bits >> 3)) >> 3; // 12% bitrate fluctuation
     int high_bytes = (tgt_bits + (tgt_bits >> 3)) >> 3;
 
+    LOCAL_ALIGNED(16, int16_t, blocks_y, [DEFAULT_SLICE_MB_WIDTH << 8]);
+    LOCAL_ALIGNED(16, int16_t, blocks_u, [DEFAULT_SLICE_MB_WIDTH << 8]);
+    LOCAL_ALIGNED(16, int16_t, blocks_v, [DEFAULT_SLICE_MB_WIDTH << 8]);
+
     luma_stride   = pic->linesize[0];
     chroma_stride = pic->linesize[1];
 
-    dest_y = pic->data[0] + (mb_y << 4) * luma_stride   + (mb_x << 5);
-    dest_u = pic->data[1] + (mb_y << 4) * chroma_stride + (mb_x << 4);
-    dest_v = pic->data[2] + (mb_y << 4) * chroma_stride + (mb_x << 4);
+    if (ctx->need_alpha)
+        alpha_stride = pic->linesize[3];
 
-    if (unsafe) {
+    if (!is_interlaced) {
+        dest_y = pic->data[0] + (mb_y << 4) * luma_stride   + (mb_x << 5);
+        dest_u = pic->data[1] + (mb_y << 4) * chroma_stride + (mb_x << (5 - ctx->is_422));
+        dest_v = pic->data[2] + (mb_y << 4) * chroma_stride + (mb_x << (5 - ctx->is_422));
+    } else {
+        dest_y = pic->data[0] + (mb_y << 4) * luma_stride * 2   + (mb_x << 5);
+        dest_u = pic->data[1] + (mb_y << 4) * chroma_stride * 2 + (mb_x << (5 - ctx->is_422));
+        dest_v = pic->data[2] + (mb_y << 4) * chroma_stride * 2 + (mb_x << (5 - ctx->is_422));
+        if (!is_top_field){ /* bottom field, offset dest */
+            dest_y += luma_stride;
+            dest_u += chroma_stride;
+            dest_v += chroma_stride;
+        }
+    }
 
+    if (unsafe) {
         subimage_with_fill((uint16_t *) pic->data[0], mb_x << 4, mb_y << 4,
                 luma_stride, avctx->width, avctx->height,
-                (uint16_t *) ctx->fill_y, mb_count << 4, 16);
-        subimage_with_fill((uint16_t *) pic->data[1], mb_x << 3, mb_y << 4,
-                chroma_stride, avctx->width >> 1, avctx->height,
-                (uint16_t *) ctx->fill_u, mb_count << 3, 16);
-        subimage_with_fill((uint16_t *) pic->data[2], mb_x << 3, mb_y << 4,
-                chroma_stride, avctx->width >> 1, avctx->height,
-                (uint16_t *) ctx->fill_v, mb_count << 3, 16);
-
-        encode_slice_data(avctx, ctx->fill_y, ctx->fill_u, ctx->fill_v,
-                mb_count << 5, mb_count << 4, mb_count, buf + hdr_size,
-                data_size - hdr_size, &y_data_size, &u_data_size, &v_data_size,
-                *qp);
+                (uint16_t *) ctx->fill_y, mb_count << 4, 16, is_interlaced, is_top_field);
+        subimage_with_fill((uint16_t *) pic->data[1], mb_x << (4 - ctx->is_422), mb_y << 4,
+                           chroma_stride, avctx->width >> ctx->is_422, avctx->height,
+                           (uint16_t *) ctx->fill_u, mb_count << (4 - ctx->is_422), 16, is_interlaced, is_top_field);
+        subimage_with_fill((uint16_t *) pic->data[2], mb_x << (4 - ctx->is_422), mb_y << 4,
+                           chroma_stride, avctx->width >> ctx->is_422, avctx->height,
+                           (uint16_t *) ctx->fill_v, mb_count << (4 - ctx->is_422), 16, is_interlaced, is_top_field);
+
+        /* no need for interlaced special case, data already reorganized in subimage_with_fill */
+        calc_plane_dct(fdsp, ctx->fill_y, blocks_y, mb_count <<  5,                mb_count, 0, 0);
+        calc_plane_dct(fdsp, ctx->fill_u, blocks_u, mb_count << (5 - ctx->is_422), mb_count, 1, ctx->is_422);
+        calc_plane_dct(fdsp, ctx->fill_v, blocks_v, mb_count << (5 - ctx->is_422), mb_count, 1, ctx->is_422);
+
+        slice_size = encode_slice_data(avctx, blocks_y, blocks_u, blocks_v,
+                          mb_count, buf + hdr_size, data_size - hdr_size,
+                          &y_data_size, &u_data_size, &v_data_size,
+                          *qp);
     } else {
-        slice_size = encode_slice_data(avctx, dest_y, dest_u, dest_v,
-                luma_stride, chroma_stride, mb_count, buf + hdr_size,
-                data_size - hdr_size, &y_data_size, &u_data_size, &v_data_size,
-                *qp);
+        if (!is_interlaced) {
+            calc_plane_dct(fdsp, dest_y, blocks_y, luma_stride, mb_count, 0, 0);
+            calc_plane_dct(fdsp, dest_u, blocks_u, chroma_stride, mb_count, 1, ctx->is_422);
+            calc_plane_dct(fdsp, dest_v, blocks_v, chroma_stride, mb_count, 1, ctx->is_422);
+        } else {
+            calc_plane_dct(fdsp, dest_y, blocks_y, luma_stride   * 2, mb_count, 0, 0);
+            calc_plane_dct(fdsp, dest_u, blocks_u, chroma_stride * 2, mb_count, 1, ctx->is_422);
+            calc_plane_dct(fdsp, dest_v, blocks_v, chroma_stride * 2, mb_count, 1, ctx->is_422);
+        }
+
+        slice_size = encode_slice_data(avctx, blocks_y, blocks_u, blocks_v,
+                          mb_count, buf + hdr_size, data_size - hdr_size,
+                          &y_data_size, &u_data_size, &v_data_size,
+                          *qp);
 
         if (slice_size > high_bytes && *qp < qp_end_table[avctx->profile]) {
             do {
                 *qp += 1;
-                slice_size = encode_slice_data(avctx, dest_y, dest_u, dest_v,
-                        luma_stride, chroma_stride, mb_count, buf + hdr_size,
-                        data_size - hdr_size, &y_data_size, &u_data_size,
-                        &v_data_size, *qp);
+                slice_size = encode_slice_data(avctx, blocks_y, blocks_u, blocks_v,
+                                               mb_count, buf + hdr_size, data_size - hdr_size,
+                                               &y_data_size, &u_data_size, &v_data_size,
+                                               *qp);
             } while (slice_size > high_bytes && *qp < qp_end_table[avctx->profile]);
         } else if (slice_size < low_bytes && *qp
                 > qp_start_table[avctx->profile]) {
             do {
                 *qp -= 1;
-                slice_size = encode_slice_data(avctx, dest_y, dest_u, dest_v,
-                        luma_stride, chroma_stride, mb_count, buf + hdr_size,
-                        data_size - hdr_size, &y_data_size, &u_data_size,
-                        &v_data_size, *qp);
+                slice_size = encode_slice_data(avctx, blocks_y, blocks_u, blocks_v,
+                                               mb_count, buf + hdr_size, data_size - hdr_size,
+                                               &y_data_size, &u_data_size, &v_data_size,
+                                               *qp);
             } while (slice_size < low_bytes && *qp > qp_start_table[avctx->profile]);
         }
     }
@@ -434,20 +633,47 @@ static int encode_slice(AVCodecContext *avctx, const AVFrame *pic, int mb_x,
     AV_WB16(buf + 2, y_data_size);
     AV_WB16(buf + 4, u_data_size);
 
-    return hdr_size + y_data_size + u_data_size + v_data_size;
+    if (ctx->need_alpha) {
+        AV_WB16(buf + 6, v_data_size); /* write v data size only if there is alpha */
+
+        subimage_alpha_with_fill((uint16_t *) pic->data[3], mb_x << 4, mb_y << 4,
+                           alpha_stride, avctx->width, avctx->height,
+                           (uint16_t *) ctx->fill_a, mb_count << 4, 16, is_interlaced, is_top_field);
+        ret = encode_alpha_slice_data(avctx, ctx->fill_a, mb_count,
+                                      buf + hdr_size + slice_size,
+                                      data_size - hdr_size - slice_size, &a_data_size);
+    }
+
+    if (ret != 0) {
+        return ret;
+    }
+    return hdr_size + y_data_size + u_data_size + v_data_size + a_data_size;
 }
 
 static int prores_encode_picture(AVCodecContext *avctx, const AVFrame *pic,
-        uint8_t *buf, const int buf_size)
+        uint8_t *buf, const int buf_size, const int picture_index, const int is_top_field)
 {
+    ProresContext *ctx = avctx->priv_data;
     int mb_width = (avctx->width + 15) >> 4;
-    int mb_height = (avctx->height + 15) >> 4;
     int hdr_size, sl_size, i;
-    int mb_y, sl_data_size, qp;
+    int mb_y, sl_data_size, qp, mb_height, picture_height, unsafe_mb_height_limit;
     int unsafe_bot, unsafe_right;
     uint8_t *sl_data, *sl_data_sizes;
     int slice_per_line = 0, rem = mb_width;
 
+    if (!ctx->is_interlaced) { /* progressive encoding */
+        mb_height = (avctx->height + 15) >> 4;
+        unsafe_mb_height_limit = mb_height;
+    } else {
+        if (is_top_field) {
+            picture_height = (avctx->height + 1) / 2;
+        } else {
+            picture_height = avctx->height / 2;
+        }
+        mb_height = (picture_height + 15) >> 4;
+        unsafe_mb_height_limit = mb_height;
+    }
+
     for (i = av_log2(DEFAULT_SLICE_MB_WIDTH); i >= 0; --i) {
         slice_per_line += rem >> i;
         rem &= (1 << i) - 1;
@@ -464,11 +690,14 @@ static int prores_encode_picture(AVCodecContext *avctx, const AVFrame *pic,
             while (mb_width - mb_x < slice_mb_count)
                 slice_mb_count >>= 1;
 
-            unsafe_bot = (avctx->height & 0xf) && (mb_y == mb_height - 1);
+            unsafe_bot = (avctx->height & 0xf) && (mb_y == unsafe_mb_height_limit - 1);
             unsafe_right = (avctx->width & 0xf) && (mb_x + slice_mb_count == mb_width);
 
             sl_size = encode_slice(avctx, pic, mb_x, mb_y, slice_mb_count,
-                    sl_data, sl_data_size, unsafe_bot || unsafe_right, &qp);
+                    sl_data, sl_data_size, unsafe_bot || unsafe_right, &qp, ctx->is_interlaced, is_top_field);
+            if (sl_size < 0){
+                return sl_size;
+            }
 
             bytestream_put_be16(&sl_data_sizes, sl_size);
             sl_data           += sl_size;
@@ -479,8 +708,8 @@ static int prores_encode_picture(AVCodecContext *avctx, const AVFrame *pic,
 
     buf[0] = hdr_size << 3;
     AV_WB32(buf + 1, sl_data - buf);
-    AV_WB16(buf + 5, slice_per_line * mb_height);
-    buf[7] = av_log2(DEFAULT_SLICE_MB_WIDTH) << 4;
+    AV_WB16(buf + 5, slice_per_line * mb_height); /* picture size */
+    buf[7] = av_log2(DEFAULT_SLICE_MB_WIDTH) << 4; /* number of slices */
 
     return sl_data - buf;
 }
@@ -488,9 +717,11 @@ static int prores_encode_picture(AVCodecContext *avctx, const AVFrame *pic,
 static int prores_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
                                const AVFrame *pict, int *got_packet)
 {
+    ProresContext *ctx = avctx->priv_data;
     int header_size = 148;
     uint8_t *buf;
-    int pic_size, ret;
+    int compress_frame_size, pic_size, ret, is_top_field_first = 0;
+    uint8_t frame_flags;
     int frame_size = FFALIGN(avctx->width, 16) * FFALIGN(avctx->height, 16)*16 + 500 + AV_INPUT_BUFFER_MIN_SIZE; //FIXME choose tighter limit
 
 
@@ -498,31 +729,71 @@ static int prores_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         return ret;
 
     buf = pkt->data;
-    pic_size = prores_encode_picture(avctx, pict, buf + header_size + 8,
-            pkt->size - header_size - 8);
+    compress_frame_size = 8 + header_size;
 
-    bytestream_put_be32(&buf, pic_size + 8 + header_size);
+    bytestream_put_be32(&buf, compress_frame_size);/* frame size will be update after picture(s) encoding */
     bytestream_put_buffer(&buf, "icpf", 4);
 
     bytestream_put_be16(&buf, header_size);
-    bytestream_put_be16(&buf, 0);
-    bytestream_put_buffer(&buf, "fmpg", 4);
+    bytestream_put_be16(&buf, 0); /* version */
+    bytestream_put_buffer(&buf, ctx->vendor, 4);
     bytestream_put_be16(&buf, avctx->width);
     bytestream_put_be16(&buf, avctx->height);
-    *buf++ = 0x83; // {10}(422){00}{00}(frame){11}
-    *buf++ = 0;
-    *buf++ = 2;
-    *buf++ = 2;
-    *buf++ = 6;
-    *buf++ = 32;
-    *buf++ = 0;
-    *buf++ = 3;
+    frame_flags = 0x82; /* 422 not interlaced */
+    if (avctx->profile >= FF_PROFILE_PRORES_4444) /* 4444 or 4444 Xq */
+        frame_flags |= 0x40; /* 444 chroma */
+    if (ctx->is_interlaced) {
+        if (pict->top_field_first || !pict->interlaced_frame) { /* tff frame or progressive frame interpret as tff */
+            av_log(avctx, AV_LOG_DEBUG, "use interlaced encoding, top field first\n");
+            frame_flags |= 0x04; /* interlaced tff */
+            is_top_field_first = 1;
+        } else {
+            av_log(avctx, AV_LOG_DEBUG, "use interlaced encoding, bottom field first\n");
+            frame_flags |= 0x08; /* interlaced bff */
+        }
+    } else {
+        av_log(avctx, AV_LOG_DEBUG, "use progressive encoding\n");
+    }
+    *buf++ = frame_flags;
+    *buf++ = 0; /* reserved */
+    /* only write color properties, if valid value. set to unspecified otherwise */
+    *buf++ = ff_int_from_list_or_default(avctx, "frame color primaries", pict->color_primaries, valid_primaries, 0);
+    *buf++ = ff_int_from_list_or_default(avctx, "frame color trc", pict->color_trc, valid_trc, 0);
+    *buf++ = ff_int_from_list_or_default(avctx, "frame colorspace", pict->colorspace, valid_colorspace, 0);
+    if (avctx->profile >= FF_PROFILE_PRORES_4444) {
+        if (avctx->pix_fmt == AV_PIX_FMT_YUV444P10) {
+            *buf++ = 0xA0;/* src b64a and no alpha */
+        } else {
+            *buf++ = 0xA2;/* src b64a and 16b alpha */
+        }
+    } else {
+        *buf++ = 32;/* src v210 and no alpha */
+    }
+    *buf++ = 0; /* reserved */
+    *buf++ = 3; /* luma and chroma matrix present */
 
     bytestream_put_buffer(&buf, QMAT_LUMA[avctx->profile],   64);
     bytestream_put_buffer(&buf, QMAT_CHROMA[avctx->profile], 64);
 
+    pic_size = prores_encode_picture(avctx, pict, buf,
+                                     pkt->size - compress_frame_size, 0, is_top_field_first);/* encode progressive or first field */
+    if (pic_size < 0) {
+        return pic_size;
+    }
+    compress_frame_size += pic_size;
+
+    if (ctx->is_interlaced) { /* encode second field */
+        pic_size = prores_encode_picture(avctx, pict, pkt->data + compress_frame_size,
+                                         pkt->size - compress_frame_size, 1, !is_top_field_first);
+        if (pic_size < 0) {
+            return pic_size;
+        }
+        compress_frame_size += pic_size;
+    }
+
+    AV_WB32(pkt->data, compress_frame_size);/* update frame size */
     pkt->flags |= AV_PKT_FLAG_KEY;
-    pkt->size = pic_size + 8 + header_size;
+    pkt->size = compress_frame_size;
     *got_packet = 1;
 
     return 0;
@@ -540,11 +811,14 @@ static av_cold int prores_encode_init(AVCodecContext *avctx)
     int i;
     ProresContext* ctx = avctx->priv_data;
 
-    if (avctx->pix_fmt != AV_PIX_FMT_YUV422P10) {
-        av_log(avctx, AV_LOG_ERROR, "need YUV422P10\n");
-        return AVERROR_PATCHWELCOME;
-    }
     avctx->bits_per_raw_sample = 10;
+    ctx->need_alpha = 0;
+    ctx->is_interlaced = !!(avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT);
+    if (ctx->is_interlaced) {
+        ctx->scantable = ff_prores_interlaced_scan;
+    } else {
+        ctx->scantable = ff_prores_progressive_scan;
+    }
 
     if (avctx->width & 0x1) {
         av_log(avctx, AV_LOG_ERROR,
@@ -558,27 +832,71 @@ static av_cold int prores_encode_init(AVCodecContext *avctx)
         return AVERROR(EINVAL);
     }
 
-    if ((avctx->height & 0xf) || (avctx->width & 0xf)) {
-        ctx->fill_y = av_malloc(4 * (DEFAULT_SLICE_MB_WIDTH << 8));
-        if (!ctx->fill_y)
-            return AVERROR(ENOMEM);
-        ctx->fill_u = ctx->fill_y + (DEFAULT_SLICE_MB_WIDTH << 9);
-        ctx->fill_v = ctx->fill_u + (DEFAULT_SLICE_MB_WIDTH << 8);
+    if (strlen(ctx->vendor) != 4) {
+        av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
+        return AVERROR(EINVAL);
     }
 
     if (avctx->profile == FF_PROFILE_UNKNOWN) {
-        avctx->profile = FF_PROFILE_PRORES_STANDARD;
-        av_log(avctx, AV_LOG_INFO,
+        if (avctx->pix_fmt == AV_PIX_FMT_YUV422P10) {
+            avctx->profile = FF_PROFILE_PRORES_STANDARD;
+            av_log(avctx, AV_LOG_INFO,
                 "encoding with ProRes standard (apcn) profile\n");
-
+        } else if (avctx->pix_fmt == AV_PIX_FMT_YUV444P10) {
+            avctx->profile = FF_PROFILE_PRORES_4444;
+            av_log(avctx, AV_LOG_INFO,
+                   "encoding with ProRes 4444 (ap4h) profile\n");
+        } else if (avctx->pix_fmt == AV_PIX_FMT_YUVA444P10) {
+            avctx->profile = FF_PROFILE_PRORES_4444;
+            av_log(avctx, AV_LOG_INFO,
+                   "encoding with ProRes 4444+ (ap4h) profile\n");
+        } else {
+            av_log(avctx, AV_LOG_ERROR, "Unknown pixel format\n");
+            return AVERROR(EINVAL);
+        }
     } else if (avctx->profile < FF_PROFILE_PRORES_PROXY
-            || avctx->profile > FF_PROFILE_PRORES_HQ) {
+            || avctx->profile > FF_PROFILE_PRORES_XQ) {
         av_log(
                 avctx,
                 AV_LOG_ERROR,
-                "unknown profile %d, use [0 - apco, 1 - apcs, 2 - apcn (default), 3 - apch]\n",
+                "unknown profile %d, use [0 - apco, 1 - apcs, 2 - apcn (default), 3 - apch, 4 - ap4h, 5 - ap4x]\n",
                 avctx->profile);
         return AVERROR(EINVAL);
+    } else if ((avctx->pix_fmt == AV_PIX_FMT_YUV422P10) && (avctx->profile > FF_PROFILE_PRORES_HQ)){
+        av_log(avctx, AV_LOG_ERROR,
+               "encoding with ProRes 444/Xq (ap4h/ap4x) profile, need YUV444P10 input\n");
+        return AVERROR(EINVAL);
+    }  else if ((avctx->pix_fmt == AV_PIX_FMT_YUV444P10 || avctx->pix_fmt == AV_PIX_FMT_YUVA444P10)
+                && (avctx->profile < FF_PROFILE_PRORES_4444)){
+        av_log(avctx, AV_LOG_ERROR,
+               "encoding with ProRes Proxy/LT/422/422 HQ (apco, apcs, apcn, ap4h) profile, need YUV422P10 input\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (avctx->profile < FF_PROFILE_PRORES_4444) { /* 422 versions */
+        ctx->is_422 = 1;
+        if ((avctx->height & 0xf) || (avctx->width & 0xf)) {
+            ctx->fill_y = av_malloc(4 * (DEFAULT_SLICE_MB_WIDTH << 8));
+            if (!ctx->fill_y)
+                return AVERROR(ENOMEM);
+            ctx->fill_u = ctx->fill_y + (DEFAULT_SLICE_MB_WIDTH << 9);
+            ctx->fill_v = ctx->fill_u + (DEFAULT_SLICE_MB_WIDTH << 8);
+        }
+    } else { /* 444 */
+        ctx->is_422 = 0;
+        if ((avctx->height & 0xf) || (avctx->width & 0xf)) {
+            ctx->fill_y = av_malloc(3 * (DEFAULT_SLICE_MB_WIDTH << 9));
+            if (!ctx->fill_y)
+                return AVERROR(ENOMEM);
+            ctx->fill_u = ctx->fill_y + (DEFAULT_SLICE_MB_WIDTH << 9);
+            ctx->fill_v = ctx->fill_u + (DEFAULT_SLICE_MB_WIDTH << 9);
+        }
+        if (avctx->pix_fmt == AV_PIX_FMT_YUVA444P10) {
+            ctx->need_alpha = 1;
+            ctx->fill_a = av_malloc(DEFAULT_SLICE_MB_WIDTH << 9); /* 8 blocks x 16px x 16px x sizeof (uint16) */
+            if (!ctx->fill_a)
+                return AVERROR(ENOMEM);
+        }
     }
 
     ff_fdctdsp_init(&ctx->fdsp, avctx);
@@ -597,10 +915,33 @@ static av_cold int prores_encode_close(AVCodecContext *avctx)
 {
     ProresContext* ctx = avctx->priv_data;
     av_freep(&ctx->fill_y);
+    av_freep(&ctx->fill_a);
 
     return 0;
 }
 
+#define OFFSET(x) offsetof(ProresContext, x)
+#define VE     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+
+static const AVOption options[] = {
+    { "vendor", "vendor ID", OFFSET(vendor), AV_OPT_TYPE_STRING, { .str = "fmpg" }, CHAR_MIN, CHAR_MAX, VE },
+    { NULL }
+};
+
+static const AVClass proresaw_enc_class = {
+    .class_name = "ProResAw encoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+static const AVClass prores_enc_class = {
+    .class_name = "ProRes encoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 AVCodec ff_prores_aw_encoder = {
     .name           = "prores_aw",
     .long_name      = NULL_IF_CONFIG_SMALL("Apple ProRes"),
@@ -610,9 +951,10 @@ AVCodec ff_prores_aw_encoder = {
     .init           = prores_encode_init,
     .close          = prores_encode_close,
     .encode2        = prores_encode_frame,
-    .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_YUV422P10, AV_PIX_FMT_NONE},
+    .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE},
     .capabilities   = AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_INTRA_ONLY,
-    .profiles       = profiles
+    .priv_class     = &proresaw_enc_class,
+    .profiles       = NULL_IF_CONFIG_SMALL(ff_prores_profiles),
 };
 
 AVCodec ff_prores_encoder = {
@@ -624,7 +966,8 @@ AVCodec ff_prores_encoder = {
     .init           = prores_encode_init,
     .close          = prores_encode_close,
     .encode2        = prores_encode_frame,
-    .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_YUV422P10, AV_PIX_FMT_NONE},
+    .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE},
     .capabilities   = AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_INTRA_ONLY,
-    .profiles       = profiles
+    .priv_class     = &prores_enc_class,
+    .profiles       = NULL_IF_CONFIG_SMALL(ff_prores_profiles),
 };
diff --git a/libavcodec/proresenc_kostya.c b/libavcodec/proresenc_kostya.c
index 81f3865ea6ef4..e045a972f1b75 100644
--- a/libavcodec/proresenc_kostya.c
+++ b/libavcodec/proresenc_kostya.c
@@ -28,6 +28,7 @@
 #include "avcodec.h"
 #include "fdctdsp.h"
 #include "put_bits.h"
+#include "profiles.h"
 #include "bytestream.h"
 #include "internal.h"
 #include "proresdata.h"
@@ -221,6 +222,7 @@ typedef struct ProresThreadData {
     DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
     int16_t custom_q[64];
+    int16_t custom_chroma_q[64];
     struct TrellisNode *nodes;
 } ProresThreadData;
 
@@ -231,6 +233,7 @@ typedef struct ProresContext {
     int16_t quants[MAX_STORED_Q][64];
     int16_t quants_chroma[MAX_STORED_Q][64];
     int16_t custom_q[64];
+    int16_t custom_chroma_q[64];
     const uint8_t *quant_mat;
     const uint8_t *quant_chroma_mat;
     const uint8_t *scantable;
@@ -573,7 +576,7 @@ static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
         qmat_chroma = ctx->quants_chroma[quant];
     } else {
         qmat = ctx->custom_q;
-        qmat_chroma = ctx->custom_q;
+        qmat_chroma = ctx->custom_chroma_q;
         for (i = 0; i < 64; i++) {
             qmat[i] = ctx->quant_mat[i] * quant;
             qmat_chroma[i] = ctx->quant_chroma_mat[i] * quant;
@@ -901,7 +904,7 @@ static int find_slice_quant(AVCodecContext *avctx,
                 qmat_chroma = ctx->quants_chroma[q];
             } else {
                 qmat = td->custom_q;
-                qmat_chroma = td->custom_q;
+                qmat_chroma = td->custom_chroma_q;
                 for (i = 0; i < 64; i++) {
                     qmat[i] = ctx->quant_mat[i] * q;
                     qmat_chroma[i] = ctx->quant_chroma_mat[i] * q;
@@ -1431,4 +1434,5 @@ AVCodec ff_prores_ks_encoder = {
                           AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
                       },
     .priv_class     = &proresenc_class,
+    .profiles       = NULL_IF_CONFIG_SMALL(ff_prores_profiles),
 };
diff --git a/libavcodec/prosumer.c b/libavcodec/prosumer.c
index 6e98677b556d0..ce3cbdbb734f4 100644
--- a/libavcodec/prosumer.c
+++ b/libavcodec/prosumer.c
@@ -38,7 +38,7 @@ typedef struct ProSumerContext {
 
     unsigned stride;
     unsigned size;
-    uint32_t lut[0x10000];
+    uint32_t lut[0x2000];
     uint8_t *initial_line;
     uint8_t *decbuffer;
 } ProSumerContext;
@@ -57,27 +57,25 @@ static int decompress(GetByteContext *gb, int size, PutByteContext *pb, const ui
     b = lut[2 * idx];
 
     while (1) {
-        if (bytestream2_get_bytes_left_p(pb) <= 0)
+        if (bytestream2_get_bytes_left_p(pb) <= 0 || bytestream2_get_eof(pb))
             return 0;
-        if (((b & 0xFF00u) != 0x8000u) || (b & 0xFFu)) {
+        if ((b & 0xFF00u) != 0x8000u || (b & 0xFFu)) {
             if ((b & 0xFF00u) != 0x8000u) {
                 bytestream2_put_le16(pb, b);
-            } else if (b & 0xFFu) {
+            } else {
                 idx = 0;
                 for (int i = 0; i < (b & 0xFFu); i++)
                     bytestream2_put_le32(pb, 0);
             }
             c = b >> 16;
             if (c & 0xFF00u) {
-                c = (((c >> 8) & 0xFFu) | (c & 0xFF00)) & 0xF00F;
                 fill = lut[2 * idx + 1];
-                if ((c & 0xFF00u) == 0x1000) {
+                if ((c & 0xF000u) == 0x1000) {
                     bytestream2_put_le16(pb, fill);
-                    c &= 0xFFFF00FFu;
                 } else {
                     bytestream2_put_le32(pb, fill);
-                    c &= 0xFFFF00FFu;
                 }
+                c = (c >> 8) & 0x0Fu;
             }
             while (c) {
                 a <<= 4;
@@ -86,21 +84,20 @@ static int decompress(GetByteContext *gb, int size, PutByteContext *pb, const ui
                     if (bytestream2_get_bytes_left(gb) <= 0) {
                         if (!a)
                             return 0;
-                        cnt = 4;
                     } else {
-                        pos = bytestream2_tell(gb) ^ 2;
-                        bytestream2_seek(gb, pos, SEEK_SET);
+                        pos = bytestream2_tell(gb);
+                        bytestream2_seek(gb, pos ^ 2, SEEK_SET);
                         AV_WN16(&a, bytestream2_peek_le16(gb));
-                        pos = pos ^ 2;
-                        bytestream2_seek(gb, pos, SEEK_SET);
-                        bytestream2_skip(gb, 2);
-                        cnt = 4;
+                        bytestream2_seek(gb, pos + 2, SEEK_SET);
                     }
+                    cnt = 4;
                 }
                 c--;
             }
             idx = a >> 20;
             b = lut[2 * idx];
+            if (!b)
+                return AVERROR_INVALIDDATA;
             continue;
         }
         idx = 2;
@@ -119,12 +116,10 @@ static int decompress(GetByteContext *gb, int size, PutByteContext *pb, const ui
                 }
                 return 0;
             }
-            pos = bytestream2_tell(gb) ^ 2;
-            bytestream2_seek(gb, pos, SEEK_SET);
+            pos = bytestream2_tell(gb);
+            bytestream2_seek(gb, pos ^ 2, SEEK_SET);
             AV_WN16(&a, bytestream2_peek_le16(gb));
-            pos = pos ^ 2;
-            bytestream2_seek(gb, pos, SEEK_SET);
-            bytestream2_skip(gb, 2);
+            bytestream2_seek(gb, pos + 2, SEEK_SET);
             cnt = 4;
             idx--;
         }
@@ -161,8 +156,9 @@ static int decode_frame(AVCodecContext *avctx, void *data,
     memset(s->decbuffer, 0, s->size);
     bytestream2_init(&s->gb, avpkt->data, avpkt->size);
     bytestream2_init_writer(&s->pb, s->decbuffer, s->size);
-
-    decompress(&s->gb, AV_RL32(avpkt->data + 28) >> 1, &s->pb, s->lut);
+    ret = decompress(&s->gb, AV_RL32(avpkt->data + 28) >> 1, &s->pb, s->lut);
+    if (ret < 0)
+        return ret;
     vertical_predict((uint32_t *)s->decbuffer, 0, (uint32_t *)s->initial_line, s->stride, 1);
     vertical_predict((uint32_t *)s->decbuffer, s->stride, (uint32_t *)s->decbuffer, s->stride, avctx->height - 1);
 
diff --git a/libavcodec/psd.c b/libavcodec/psd.c
index 43814471da14b..a31f73857a521 100644
--- a/libavcodec/psd.c
+++ b/libavcodec/psd.c
@@ -337,6 +337,30 @@ static int decode_frame(AVCodecContext *avctx, void *data,
         }
         avctx->pix_fmt = AV_PIX_FMT_PAL8;
         break;
+    case PSD_CMYK:
+        if (s->channel_count == 4) {
+            if (s->channel_depth == 8) {
+                avctx->pix_fmt = AV_PIX_FMT_GBRP;
+            } else if (s->channel_depth == 16) {
+                avctx->pix_fmt = AV_PIX_FMT_GBRP16BE;
+            } else {
+                avpriv_report_missing_feature(avctx, "channel depth %d for cmyk", s->channel_depth);
+                return AVERROR_PATCHWELCOME;
+            }
+        } else if (s->channel_count == 5) {
+            if (s->channel_depth == 8) {
+                avctx->pix_fmt = AV_PIX_FMT_GBRAP;
+            } else if (s->channel_depth == 16) {
+                avctx->pix_fmt = AV_PIX_FMT_GBRAP16BE;
+            } else {
+                avpriv_report_missing_feature(avctx, "channel depth %d for cmyk", s->channel_depth);
+                return AVERROR_PATCHWELCOME;
+            }
+        } else {
+            avpriv_report_missing_feature(avctx, "channel count %d for cmyk", s->channel_count);
+            return AVERROR_PATCHWELCOME;
+        }
+        break;
     case PSD_RGB:
         if (s->channel_count == 3) {
             if (s->channel_depth == 8) {
@@ -435,6 +459,66 @@ static int decode_frame(AVCodecContext *avctx, void *data,
                 }
             }
         }
+    } else if (s->color_mode == PSD_CMYK) {
+        uint8_t *dst[4] = { picture->data[0], picture->data[1], picture->data[2], picture->data[3] };
+        const uint8_t *src[5] = { ptr_data };
+        src[1] = src[0] + s->line_size * s->height;
+        src[2] = src[1] + s->line_size * s->height;
+        src[3] = src[2] + s->line_size * s->height;
+        src[4] = src[3] + s->line_size * s->height;
+        if (s->channel_depth == 8) {
+            for (y = 0; y < s->height; y++) {
+                for (x = 0; x < s->width; x++) {
+                    int k = src[3][x];
+                    int r = src[0][x] * k;
+                    int g = src[1][x] * k;
+                    int b = src[2][x] * k;
+                    dst[0][x] = g * 257 >> 16;
+                    dst[1][x] = b * 257 >> 16;
+                    dst[2][x] = r * 257 >> 16;
+                }
+                dst[0] += picture->linesize[0];
+                dst[1] += picture->linesize[1];
+                dst[2] += picture->linesize[2];
+                src[0] += s->line_size;
+                src[1] += s->line_size;
+                src[2] += s->line_size;
+                src[3] += s->line_size;
+            }
+            if (avctx->pix_fmt == AV_PIX_FMT_GBRAP) {
+                for (y = 0; y < s->height; y++) {
+                    memcpy(dst[3], src[4], s->line_size);
+                    src[4] += s->line_size;
+                    dst[3] += picture->linesize[3];
+                }
+            }
+        } else {
+            for (y = 0; y < s->height; y++) {
+                for (x = 0; x < s->width; x++) {
+                    int64_t k = AV_RB16(&src[3][x * 2]);
+                    int64_t r = AV_RB16(&src[0][x * 2]) * k;
+                    int64_t g = AV_RB16(&src[1][x * 2]) * k;
+                    int64_t b = AV_RB16(&src[2][x * 2]) * k;
+                    AV_WB16(&dst[0][x * 2], g * 65537 >> 32);
+                    AV_WB16(&dst[1][x * 2], b * 65537 >> 32);
+                    AV_WB16(&dst[2][x * 2], r * 65537 >> 32);
+                }
+                dst[0] += picture->linesize[0];
+                dst[1] += picture->linesize[1];
+                dst[2] += picture->linesize[2];
+                src[0] += s->line_size;
+                src[1] += s->line_size;
+                src[2] += s->line_size;
+                src[3] += s->line_size;
+            }
+            if (avctx->pix_fmt == AV_PIX_FMT_GBRAP16BE) {
+                for (y = 0; y < s->height; y++) {
+                    memcpy(dst[3], src[4], s->line_size);
+                    src[4] += s->line_size;
+                    dst[3] += picture->linesize[3];
+                }
+            }
+        }
     } else {/* Planar */
         if (s->channel_count == 1)/* gray 8 or gray 16be */
             eq_channel[0] = 0;/* assign first channel, to first plane */
diff --git a/libavcodec/qpeg.c b/libavcodec/qpeg.c
index e1210c1972060..d4195c5f0b7f1 100644
--- a/libavcodec/qpeg.c
+++ b/libavcodec/qpeg.c
@@ -80,16 +80,27 @@ static void qpeg_decode_intra(QpegContext *qctx, uint8_t *dst,
 
             p = bytestream2_get_byte(&qctx->buffer);
             for(i = 0; i < run; i++) {
-                dst[filled++] = p;
+                int step = FFMIN(run - i, width - filled);
+                memset(dst+filled, p, step);
+                filled += step;
+                i      += step - 1;
                 if (filled >= width) {
                     filled = 0;
                     dst -= stride;
                     rows_to_go--;
+                    while (run - i > width && rows_to_go > 0) {
+                        memset(dst, p, width);
+                        dst -= stride;
+                        rows_to_go--;
+                        i += width;
+                    }
                     if(rows_to_go <= 0)
                         break;
                 }
             }
         } else {
+            if (bytestream2_get_bytes_left(&qctx->buffer) < copy)
+                copy = bytestream2_get_bytes_left(&qctx->buffer);
             for(i = 0; i < copy; i++) {
                 dst[filled++] = bytestream2_get_byte(&qctx->buffer);
                 if (filled >= width) {
diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c
index 22e7a46a851dc..4a0be811fb92d 100644
--- a/libavcodec/qsvdec.c
+++ b/libavcodec/qsvdec.c
@@ -372,6 +372,8 @@ static int qsv_decode(AVCodecContext *avctx, QSVContext *q,
         ++q->zero_consume_run;
         if (q->zero_consume_run > 1)
             ff_qsv_print_warning(avctx, ret, "A decode call did not consume any data");
+    } else if (!*sync && bs.DataOffset) {
+        ++q->buffered_count;
     } else {
         q->zero_consume_run = 0;
     }
@@ -499,6 +501,8 @@ int ff_qsv_process_data(AVCodecContext *avctx, QSVContext *q,
         if (!q->avctx_internal)
             return AVERROR(ENOMEM);
 
+        q->avctx_internal->codec_id = avctx->codec_id;
+
         q->parser = av_parser_init(avctx->codec_id);
         if (!q->parser)
             return AVERROR(ENOMEM);
@@ -526,6 +530,16 @@ int ff_qsv_process_data(AVCodecContext *avctx, QSVContext *q,
                                            AV_PIX_FMT_NONE,
                                            AV_PIX_FMT_NONE };
         enum AVPixelFormat qsv_format;
+        AVPacket zero_pkt = {0};
+
+        if (q->buffered_count) {
+            q->reinit_flag = 1;
+            /* decode zero-size pkt to flush the buffered pkt before reinit */
+            q->buffered_count--;
+            return qsv_decode(avctx, q, frame, got_frame, &zero_pkt);
+        }
+
+        q->reinit_flag = 0;
 
         qsv_format = ff_qsv_map_pixfmt(q->parser->format, &q->fourcc);
         if (qsv_format < 0) {
diff --git a/libavcodec/qsvdec.h b/libavcodec/qsvdec.h
index 5b7b03a48b5cd..111536caba335 100644
--- a/libavcodec/qsvdec.h
+++ b/libavcodec/qsvdec.h
@@ -53,6 +53,8 @@ typedef struct QSVContext {
 
     AVFifoBuffer *async_fifo;
     int zero_consume_run;
+    int buffered_count;
+    int reinit_flag;
 
     // the internal parser and codec context for parsing the data
     AVCodecParserContext *parser;
diff --git a/libavcodec/qsvdec_h2645.c b/libavcodec/qsvdec_h2645.c
index d9d2318d1a423..9b49f5506e574 100644
--- a/libavcodec/qsvdec_h2645.c
+++ b/libavcodec/qsvdec_h2645.c
@@ -146,10 +146,11 @@ static int qsv_decode_frame(AVCodecContext *avctx, void *data,
             /* no more data */
             if (av_fifo_size(s->packet_fifo) < sizeof(AVPacket))
                 return avpkt->size ? avpkt->size : ff_qsv_process_data(avctx, &s->qsv, frame, got_frame, avpkt);
-
-            av_packet_unref(&s->buffer_pkt);
-
-            av_fifo_generic_read(s->packet_fifo, &s->buffer_pkt, sizeof(s->buffer_pkt), NULL);
+            /* in progress of reinit, no read from fifo and keep the buffer_pkt */
+            if (!s->qsv.reinit_flag) {
+                av_packet_unref(&s->buffer_pkt);
+                av_fifo_generic_read(s->packet_fifo, &s->buffer_pkt, sizeof(s->buffer_pkt), NULL);
+            }
         }
 
         ret = ff_qsv_process_data(avctx, &s->qsv, frame, got_frame, &s->buffer_pkt);
@@ -159,6 +160,8 @@ static int qsv_decode_frame(AVCodecContext *avctx, void *data,
             av_packet_unref(&s->buffer_pkt);
             return ret;
         }
+        if (s->qsv.reinit_flag)
+            continue;
 
         s->buffer_pkt.size -= ret;
         s->buffer_pkt.data += ret;
@@ -175,12 +178,6 @@ static void qsv_decode_flush(AVCodecContext *avctx)
     ff_qsv_decode_flush(avctx, &s->qsv);
 }
 
-#if defined(_WIN32)
-#define LOAD_PLUGIN_DEFAULT LOAD_PLUGIN_HEVC_SW
-#else
-#define LOAD_PLUGIN_DEFAULT LOAD_PLUGIN_HEVC_HW
-#endif
-
 #define OFFSET(x) offsetof(QSVH2645Context, x)
 #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
 
@@ -188,7 +185,7 @@ static void qsv_decode_flush(AVCodecContext *avctx)
 static const AVOption hevc_options[] = {
     { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = ASYNC_DEPTH_DEFAULT }, 1, INT_MAX, VD },
 
-    { "load_plugin", "A user plugin to load in an internal session", OFFSET(load_plugin), AV_OPT_TYPE_INT, { .i64 = LOAD_PLUGIN_DEFAULT }, LOAD_PLUGIN_NONE, LOAD_PLUGIN_HEVC_HW, VD, "load_plugin" },
+    { "load_plugin", "A user plugin to load in an internal session", OFFSET(load_plugin), AV_OPT_TYPE_INT, { .i64 = LOAD_PLUGIN_HEVC_HW }, LOAD_PLUGIN_NONE, LOAD_PLUGIN_HEVC_HW, VD, "load_plugin" },
     { "none",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = LOAD_PLUGIN_NONE },    0, 0, VD, "load_plugin" },
     { "hevc_sw",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = LOAD_PLUGIN_HEVC_SW }, 0, 0, VD, "load_plugin" },
     { "hevc_hw",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = LOAD_PLUGIN_HEVC_HW }, 0, 0, VD, "load_plugin" },
diff --git a/libavcodec/qsvdec_other.c b/libavcodec/qsvdec_other.c
index 993c7a8e807f0..03251d2c8536d 100644
--- a/libavcodec/qsvdec_other.c
+++ b/libavcodec/qsvdec_other.c
@@ -132,9 +132,11 @@ static int qsv_decode_frame(AVCodecContext *avctx, void *data,
             /* no more data */
             if (av_fifo_size(s->packet_fifo) < sizeof(AVPacket))
                 return avpkt->size ? avpkt->size : ff_qsv_process_data(avctx, &s->qsv, frame, got_frame, avpkt);
-
-            av_packet_unref(&s->input_ref);
-            av_fifo_generic_read(s->packet_fifo, &s->input_ref, sizeof(s->input_ref), NULL);
+            /* in progress of reinit, no read from fifo and keep the buffer_pkt */
+            if (!s->qsv.reinit_flag) {
+                av_packet_unref(&s->input_ref);
+                av_fifo_generic_read(s->packet_fifo, &s->input_ref, sizeof(s->input_ref), NULL);
+            }
         }
 
         ret = ff_qsv_process_data(avctx, &s->qsv, frame, got_frame, &s->input_ref);
@@ -145,6 +147,8 @@ static int qsv_decode_frame(AVCodecContext *avctx, void *data,
 
             return ret;
         }
+        if (s->qsv.reinit_flag)
+            continue;
 
         s->input_ref.size -= ret;
         s->input_ref.data += ret;
diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c
index 948751daf4ab8..5aa020d47b3c1 100644
--- a/libavcodec/qsvenc.c
+++ b/libavcodec/qsvenc.c
@@ -136,6 +136,9 @@ static void dump_video_param(AVCodecContext *avctx, QSVEncContext *q,
 #if QSV_HAVE_CO2
     mfxExtCodingOption2 *co2 = (mfxExtCodingOption2*)coding_opts[1];
 #endif
+#if QSV_HAVE_CO3
+    mfxExtCodingOption3 *co3 = (mfxExtCodingOption3*)coding_opts[2];
+#endif
 
     av_log(avctx, AV_LOG_VERBOSE, "profile: %s; level: %"PRIu16"\n",
            print_profile(info->CodecProfile), info->CodecLevel);
@@ -158,8 +161,8 @@ static void dump_video_param(AVCodecContext *avctx, QSVEncContext *q,
 #endif
         ) {
         av_log(avctx, AV_LOG_VERBOSE,
-               "BufferSizeInKB: %"PRIu16"; InitialDelayInKB: %"PRIu16"; TargetKbps: %"PRIu16"; MaxKbps: %"PRIu16"\n",
-               info->BufferSizeInKB, info->InitialDelayInKB, info->TargetKbps, info->MaxKbps);
+               "BufferSizeInKB: %"PRIu16"; InitialDelayInKB: %"PRIu16"; TargetKbps: %"PRIu16"; MaxKbps: %"PRIu16"; BRCParamMultiplier: %"PRIu16"\n",
+               info->BufferSizeInKB, info->InitialDelayInKB, info->TargetKbps, info->MaxKbps, info->BRCParamMultiplier);
     } else if (info->RateControlMethod == MFX_RATECONTROL_CQP) {
         av_log(avctx, AV_LOG_VERBOSE, "QPI: %"PRIu16"; QPP: %"PRIu16"; QPB: %"PRIu16"\n",
                info->QPI, info->QPP, info->QPB);
@@ -167,8 +170,8 @@ static void dump_video_param(AVCodecContext *avctx, QSVEncContext *q,
 #if QSV_HAVE_AVBR
     else if (info->RateControlMethod == MFX_RATECONTROL_AVBR) {
         av_log(avctx, AV_LOG_VERBOSE,
-               "TargetKbps: %"PRIu16"; Accuracy: %"PRIu16"; Convergence: %"PRIu16"\n",
-               info->TargetKbps, info->Accuracy, info->Convergence);
+               "TargetKbps: %"PRIu16"; Accuracy: %"PRIu16"; Convergence: %"PRIu16"; BRCParamMultiplier: %"PRIu16"\n",
+               info->TargetKbps, info->Accuracy, info->Convergence, info->BRCParamMultiplier);
     }
 #endif
 #if QSV_HAVE_LA
@@ -178,8 +181,8 @@ static void dump_video_param(AVCodecContext *avctx, QSVEncContext *q,
 #endif
              ) {
         av_log(avctx, AV_LOG_VERBOSE,
-               "TargetKbps: %"PRIu16"; LookAheadDepth: %"PRIu16"\n",
-               info->TargetKbps, co2->LookAheadDepth);
+               "TargetKbps: %"PRIu16"; LookAheadDepth: %"PRIu16"; BRCParamMultiplier: %"PRIu16"\n",
+               info->TargetKbps, co2->LookAheadDepth, info->BRCParamMultiplier);
     }
 #endif
 #if QSV_HAVE_ICQ
@@ -190,7 +193,12 @@ static void dump_video_param(AVCodecContext *avctx, QSVEncContext *q,
                info->ICQQuality, co2->LookAheadDepth);
     }
 #endif
-
+#if QSV_HAVE_QVBR
+    else if (info->RateControlMethod == MFX_RATECONTROL_QVBR) {
+        av_log(avctx, AV_LOG_VERBOSE, "QVBRQuality: %"PRIu16"\n",
+               co3->QVBRQuality);
+    }
+#endif
     av_log(avctx, AV_LOG_VERBOSE, "NumSlice: %"PRIu16"; NumRefFrame: %"PRIu16"\n",
            info->NumSlice, info->NumRefFrame);
     av_log(avctx, AV_LOG_VERBOSE, "RateDistortionOpt: %s\n",
@@ -226,6 +234,10 @@ static void dump_video_param(AVCodecContext *avctx, QSVEncContext *q,
     av_log(avctx, AV_LOG_VERBOSE, "\n");
 #endif
 
+#if QSV_HAVE_VDENC
+    av_log(avctx, AV_LOG_VERBOSE, "VDENC: %s\n", print_threestate(info->LowPower));
+#endif
+
 #if QSV_VERSION_ATLEAST(1, 8)
     av_log(avctx, AV_LOG_VERBOSE,
            "RepeatPPS: %s; NumMbPerSlice: %"PRIu16"; LookAheadDS: ",
@@ -263,6 +275,10 @@ static void dump_video_param(AVCodecContext *avctx, QSVEncContext *q,
                print_threestate(co->NalHrdConformance), print_threestate(co->SingleSeiNalUnit),
                print_threestate(co->VuiVclHrdParameters), print_threestate(co->VuiNalHrdParameters));
     }
+
+    av_log(avctx, AV_LOG_VERBOSE, "FrameRateExtD: %"PRIu32"; FrameRateExtN: %"PRIu32" \n",
+           info->FrameInfo.FrameRateExtD, info->FrameInfo.FrameRateExtN);
+
 }
 
 static int select_rc_mode(AVCodecContext *avctx, QSVEncContext *q)
@@ -322,7 +338,7 @@ static int select_rc_mode(AVCodecContext *avctx, QSVEncContext *q)
     }
 #endif
 #if QSV_HAVE_ICQ
-    else if (avctx->global_quality > 0) {
+    else if (avctx->global_quality > 0 && !avctx->rc_max_rate) {
         rc_mode = MFX_RATECONTROL_ICQ;
         rc_desc = "intelligent constant quality (ICQ)";
     }
@@ -336,6 +352,12 @@ static int select_rc_mode(AVCodecContext *avctx, QSVEncContext *q)
         rc_mode = MFX_RATECONTROL_AVBR;
         rc_desc = "average variable bitrate (AVBR)";
     }
+#endif
+#if QSV_HAVE_QVBR
+    else if (avctx->global_quality > 0) {
+        rc_mode = MFX_RATECONTROL_QVBR;
+        rc_desc = "constant quality with VBR algorithm (QVBR)";
+    }
 #endif
     else {
         rc_mode = MFX_RATECONTROL_VBR;
@@ -443,7 +465,16 @@ static int init_video_param(AVCodecContext *avctx, QSVEncContext *q)
                                    avctx->sw_pix_fmt : avctx->pix_fmt;
     const AVPixFmtDescriptor *desc;
     float quant;
+    int target_bitrate_kbps, max_bitrate_kbps, brc_param_multiplier;
+    int buffer_size_in_kilobytes, initial_delay_in_kilobytes;
     int ret;
+    mfxVersion ver;
+
+    ret = MFXQueryVersion(q->session,&ver);
+    if (ret != MFX_ERR_NONE) {
+        av_log(avctx, AV_LOG_ERROR, "Error getting the session handle\n");
+        return AVERROR_UNKNOWN;
+    }
 
     ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);
     if (ret < 0)
@@ -464,6 +495,9 @@ static int init_video_param(AVCodecContext *avctx, QSVEncContext *q)
         }
     }
 
+#if QSV_HAVE_VDENC
+    q->param.mfx.LowPower           = q->low_power ? MFX_CODINGOPTION_ON : MFX_CODINGOPTION_OFF;
+#endif
     q->param.mfx.CodecProfile       = q->profile;
     q->param.mfx.TargetUsage        = avctx->compression_level;
     q->param.mfx.GopPicSize         = FFMAX(0, avctx->gop_size);
@@ -493,10 +527,10 @@ static int init_video_param(AVCodecContext *avctx, QSVEncContext *q)
     q->param.mfx.FrameInfo.BitDepthChroma = desc->comp[0].depth;
     q->param.mfx.FrameInfo.Shift          = desc->comp[0].depth > 8;
 
-    // TODO:  detect version of MFX--if the minor version is greater than
-    // or equal to 19, then can use the same alignment settings as H.264
-    // for HEVC
-    q->width_align = avctx->codec_id == AV_CODEC_ID_HEVC ? 32 : 16;
+    // If the minor version is greater than or equal to 19,
+    // then can use the same alignment settings as H.264 for HEVC
+    q->width_align = (avctx->codec_id != AV_CODEC_ID_HEVC ||
+                      QSV_RUNTIME_VERSION_ATLEAST(ver, 1, 19)) ? 16 : 32;
     q->param.mfx.FrameInfo.Width = FFALIGN(avctx->width, q->width_align);
 
     if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
@@ -534,16 +568,32 @@ static int init_video_param(AVCodecContext *avctx, QSVEncContext *q)
     if (ret < 0)
         return ret;
 
+    //libmfx BRC parameters are 16 bits thus maybe overflow, then BRCParamMultiplier is needed
+    buffer_size_in_kilobytes   = avctx->rc_buffer_size / 8000;
+    initial_delay_in_kilobytes = avctx->rc_initial_buffer_occupancy / 1000;
+    target_bitrate_kbps        = avctx->bit_rate / 1000;
+    max_bitrate_kbps           = avctx->rc_max_rate / 1000;
+    brc_param_multiplier       = (FFMAX(FFMAX3(target_bitrate_kbps, max_bitrate_kbps, buffer_size_in_kilobytes),
+                                  initial_delay_in_kilobytes) + 0x10000) / 0x10000;
+
     switch (q->param.mfx.RateControlMethod) {
     case MFX_RATECONTROL_CBR:
     case MFX_RATECONTROL_VBR:
 #if QSV_HAVE_VCM
     case MFX_RATECONTROL_VCM:
 #endif
-        q->param.mfx.BufferSizeInKB   = avctx->rc_buffer_size / 8000;
-        q->param.mfx.InitialDelayInKB = avctx->rc_initial_buffer_occupancy / 1000;
-        q->param.mfx.TargetKbps       = avctx->bit_rate / 1000;
-        q->param.mfx.MaxKbps          = avctx->rc_max_rate / 1000;
+#if QSV_HAVE_QVBR
+    case MFX_RATECONTROL_QVBR:
+#endif
+        q->param.mfx.BufferSizeInKB   = buffer_size_in_kilobytes / brc_param_multiplier;
+        q->param.mfx.InitialDelayInKB = initial_delay_in_kilobytes / brc_param_multiplier;
+        q->param.mfx.TargetKbps       = target_bitrate_kbps / brc_param_multiplier;
+        q->param.mfx.MaxKbps          = max_bitrate_kbps / brc_param_multiplier;
+        q->param.mfx.BRCParamMultiplier = brc_param_multiplier;
+#if QSV_HAVE_QVBR
+        if (q->param.mfx.RateControlMethod == MFX_RATECONTROL_QVBR)
+            q->extco3.QVBRQuality = av_clip(avctx->global_quality, 0, 51);
+#endif
         break;
     case MFX_RATECONTROL_CQP:
         quant = avctx->global_quality / FF_QP2LAMBDA;
@@ -555,15 +605,17 @@ static int init_video_param(AVCodecContext *avctx, QSVEncContext *q)
         break;
 #if QSV_HAVE_AVBR
     case MFX_RATECONTROL_AVBR:
-        q->param.mfx.TargetKbps  = avctx->bit_rate / 1000;
+        q->param.mfx.TargetKbps  = target_bitrate_kbps / brc_param_multiplier;
         q->param.mfx.Convergence = q->avbr_convergence;
         q->param.mfx.Accuracy    = q->avbr_accuracy;
+        q->param.mfx.BRCParamMultiplier = brc_param_multiplier;
         break;
 #endif
 #if QSV_HAVE_LA
     case MFX_RATECONTROL_LA:
-        q->param.mfx.TargetKbps  = avctx->bit_rate / 1000;
+        q->param.mfx.TargetKbps  = target_bitrate_kbps / brc_param_multiplier;
         q->extco2.LookAheadDepth = q->look_ahead_depth;
+        q->param.mfx.BRCParamMultiplier = brc_param_multiplier;
         break;
 #if QSV_HAVE_ICQ
     case MFX_RATECONTROL_LA_ICQ:
@@ -611,8 +663,8 @@ FF_ENABLE_DEPRECATION_WARNINGS
 
         q->extparam_internal[q->nb_extparam_internal++] = (mfxExtBuffer *)&q->extco;
 
-#if QSV_HAVE_CO2
         if (avctx->codec_id == AV_CODEC_ID_H264) {
+#if QSV_HAVE_CO2
             q->extco2.Header.BufferId     = MFX_EXTBUFF_CODING_OPTION2;
             q->extco2.Header.BufferSz     = sizeof(q->extco2);
 
@@ -641,11 +693,10 @@ FF_ENABLE_DEPRECATION_WARNINGS
             q->extco2.Trellis = q->trellis;
 #endif
 
-#if QSV_HAVE_LA_DS
+#if QSV_VERSION_ATLEAST(1, 8)
             q->extco2.LookAheadDS = q->look_ahead_downsampling;
-#endif
+            q->extco2.RepeatPPS   = q->repeat_pps ? MFX_CODINGOPTION_ON : MFX_CODINGOPTION_OFF;
 
-#if QSV_HAVE_BREF_TYPE
 #if FF_API_PRIVATE_OPT
 FF_DISABLE_DEPRECATION_WARNINGS
             if (avctx->b_frame_strategy >= 0)
@@ -675,13 +726,10 @@ FF_ENABLE_DEPRECATION_WARNINGS
             }
 #endif
             q->extparam_internal[q->nb_extparam_internal++] = (mfxExtBuffer *)&q->extco2;
-        }
 #endif
+
 #if QSV_HAVE_MF
-        if (avctx->codec_id == AV_CODEC_ID_H264) {
-            mfxVersion    ver;
-            ret = MFXQueryVersion(q->session,&ver);
-            if (ret >= MFX_ERR_NONE && QSV_RUNTIME_VERSION_ATLEAST(ver, 1, 25)) {
+            if (QSV_RUNTIME_VERSION_ATLEAST(ver, 1, 25)) {
                 q->extmfp.Header.BufferId     = MFX_EXTBUFF_MULTI_FRAME_PARAM;
                 q->extmfp.Header.BufferSz     = sizeof(q->extmfp);
 
@@ -689,7 +737,12 @@ FF_ENABLE_DEPRECATION_WARNINGS
                 av_log(avctx,AV_LOG_VERBOSE,"MFMode:%d\n", q->extmfp.MFMode);
                 q->extparam_internal[q->nb_extparam_internal++] = (mfxExtBuffer *)&q->extmfp;
             }
+#endif
         }
+#if QSV_HAVE_CO3
+        q->extco3.Header.BufferId      = MFX_EXTBUFF_CODING_OPTION3;
+        q->extco3.Header.BufferSz      = sizeof(q->extco3);
+        q->extparam_internal[q->nb_extparam_internal++] = (mfxExtBuffer *)&q->extco3;
 #endif
     }
 
@@ -712,7 +765,7 @@ static int qsv_retrieve_enc_jpeg_params(AVCodecContext *avctx, QSVEncContext *q)
         return ff_qsv_print_error(avctx, ret,
                                   "Error calling GetVideoParam");
 
-    q->packet_size = q->param.mfx.BufferSizeInKB * 1000;
+    q->packet_size = q->param.mfx.BufferSizeInKB * q->param.mfx.BRCParamMultiplier * 1000;
 
     // for qsv mjpeg the return value maybe 0 so alloc the buffer
     if (q->packet_size == 0)
@@ -745,12 +798,21 @@ static int qsv_retrieve_enc_params(AVCodecContext *avctx, QSVEncContext *q)
         .Header.BufferSz = sizeof(co2),
     };
 #endif
+#if QSV_HAVE_CO3
+    mfxExtCodingOption3 co3 = {
+        .Header.BufferId = MFX_EXTBUFF_CODING_OPTION3,
+        .Header.BufferSz = sizeof(co3),
+    };
+#endif
 
     mfxExtBuffer *ext_buffers[] = {
         (mfxExtBuffer*)&extradata,
         (mfxExtBuffer*)&co,
 #if QSV_HAVE_CO2
         (mfxExtBuffer*)&co2,
+#endif
+#if QSV_HAVE_CO3
+        (mfxExtBuffer*)&co3,
 #endif
     };
 
@@ -765,7 +827,7 @@ static int qsv_retrieve_enc_params(AVCodecContext *avctx, QSVEncContext *q)
         return ff_qsv_print_error(avctx, ret,
                                   "Error calling GetVideoParam");
 
-    q->packet_size = q->param.mfx.BufferSizeInKB * 1000;
+    q->packet_size = q->param.mfx.BufferSizeInKB * q->param.mfx.BRCParamMultiplier * 1000;
 
     if (!extradata.SPSBufSize || (need_pps && !extradata.PPSBufSize)) {
         av_log(avctx, AV_LOG_ERROR, "No extradata returned from libmfx.\n");
@@ -1192,6 +1254,13 @@ static int encode_frame(AVCodecContext *avctx, QSVEncContext *q,
     if (qsv_frame) {
         surf = &qsv_frame->surface;
         enc_ctrl = &qsv_frame->enc_ctrl;
+        memset(enc_ctrl, 0, sizeof(mfxEncodeCtrl));
+
+        if (frame->pict_type == AV_PICTURE_TYPE_I) {
+            enc_ctrl->FrameType = MFX_FRAMETYPE_I | MFX_FRAMETYPE_REF;
+            if (q->forced_idr)
+                enc_ctrl->FrameType |= MFX_FRAMETYPE_IDR;
+        }
     }
 
     ret = av_new_packet(&new_pkt, q->packet_size);
@@ -1320,16 +1389,22 @@ int ff_qsv_encode(AVCodecContext *avctx, QSVEncContext *q,
         new_pkt.pts  = av_rescale_q(bs->TimeStamp,       (AVRational){1, 90000}, avctx->time_base);
         new_pkt.size = bs->DataLength;
 
-        if (bs->FrameType & MFX_FRAMETYPE_IDR ||
-            bs->FrameType & MFX_FRAMETYPE_xIDR)
+        if (bs->FrameType & MFX_FRAMETYPE_IDR || bs->FrameType & MFX_FRAMETYPE_xIDR) {
             new_pkt.flags |= AV_PKT_FLAG_KEY;
-
-        if (bs->FrameType & MFX_FRAMETYPE_I || bs->FrameType & MFX_FRAMETYPE_xI)
+            pict_type = AV_PICTURE_TYPE_I;
+        } else if (bs->FrameType & MFX_FRAMETYPE_I || bs->FrameType & MFX_FRAMETYPE_xI)
             pict_type = AV_PICTURE_TYPE_I;
         else if (bs->FrameType & MFX_FRAMETYPE_P || bs->FrameType & MFX_FRAMETYPE_xP)
             pict_type = AV_PICTURE_TYPE_P;
         else if (bs->FrameType & MFX_FRAMETYPE_B || bs->FrameType & MFX_FRAMETYPE_xB)
             pict_type = AV_PICTURE_TYPE_B;
+        else if (bs->FrameType == MFX_FRAMETYPE_UNKNOWN) {
+            pict_type = AV_PICTURE_TYPE_NONE;
+            av_log(avctx, AV_LOG_WARNING, "Unknown FrameType, set pict_type to AV_PICTURE_TYPE_NONE.\n");
+        } else {
+            av_log(avctx, AV_LOG_ERROR, "Invalid FrameType:%d.\n", bs->FrameType);
+            return AVERROR_INVALIDDATA;
+        }
 
 #if FF_API_CODED_FRAME
 FF_DISABLE_DEPRECATION_WARNINGS
diff --git a/libavcodec/qsvenc.h b/libavcodec/qsvenc.h
index 50cc4267e7c05..00afbd80aafd6 100644
--- a/libavcodec/qsvenc.h
+++ b/libavcodec/qsvenc.h
@@ -44,6 +44,7 @@
 #define QSV_HAVE_LA     QSV_VERSION_ATLEAST(1, 7)
 #define QSV_HAVE_LA_DS  QSV_VERSION_ATLEAST(1, 8)
 #define QSV_HAVE_LA_HRD QSV_VERSION_ATLEAST(1, 11)
+#define QSV_HAVE_VDENC  QSV_VERSION_ATLEAST(1, 15)
 
 #if defined(_WIN32) || defined(__CYGWIN__)
 #define QSV_HAVE_AVBR   QSV_VERSION_ATLEAST(1, 3)
@@ -53,9 +54,9 @@
 #define QSV_HAVE_MF     0
 #else
 #define QSV_HAVE_AVBR   0
-#define QSV_HAVE_ICQ    0
+#define QSV_HAVE_ICQ    QSV_VERSION_ATLEAST(1, 28)
 #define QSV_HAVE_VCM    0
-#define QSV_HAVE_QVBR   0
+#define QSV_HAVE_QVBR   QSV_VERSION_ATLEAST(1, 28)
 #define QSV_HAVE_MF     QSV_VERSION_ATLEAST(1, 25)
 #endif
 
@@ -87,6 +88,7 @@
 { "adaptive_i",     "Adaptive I-frame placement",             OFFSET(qsv.adaptive_i),     AV_OPT_TYPE_INT, { .i64 = -1 }, -1,          1, VE },                         \
 { "adaptive_b",     "Adaptive B-frame placement",             OFFSET(qsv.adaptive_b),     AV_OPT_TYPE_INT, { .i64 = -1 }, -1,          1, VE },                         \
 { "b_strategy",     "Strategy to choose between I/P/B-frames", OFFSET(qsv.b_strategy),    AV_OPT_TYPE_INT, { .i64 = -1 }, -1,          1, VE },                         \
+{ "forced_idr",     "Forcing I frames as IDR frames",         OFFSET(qsv.forced_idr),     AV_OPT_TYPE_BOOL,{ .i64 = 0  },  0,          1, VE },                         \
 
 typedef int SetEncodeCtrlCB (AVCodecContext *avctx,
                              const AVFrame *frame, mfxEncodeCtrl* enc_ctrl);
@@ -109,6 +111,9 @@ typedef struct QSVEncContext {
 #if QSV_HAVE_CO2
     mfxExtCodingOption2 extco2;
 #endif
+#if QSV_HAVE_CO3
+    mfxExtCodingOption3 extco3;
+#endif
 #if QSV_HAVE_MF
     mfxExtMultiFrameParam   extmfp;
     mfxExtMultiFrameControl extmfc;
@@ -117,7 +122,7 @@ typedef struct QSVEncContext {
     mfxFrameSurface1       **opaque_surfaces;
     AVBufferRef             *opaque_alloc_buf;
 
-    mfxExtBuffer  *extparam_internal[2 + QSV_HAVE_CO2 + (QSV_HAVE_MF * 2)];
+    mfxExtBuffer  *extparam_internal[2 + QSV_HAVE_CO2 + QSV_HAVE_CO3 + (QSV_HAVE_MF * 2)];
     int         nb_extparam_internal;
 
     mfxExtBuffer **extparam;
@@ -161,6 +166,9 @@ typedef struct QSVEncContext {
     int int_ref_qp_delta;
     int recovery_point_sei;
 
+    int repeat_pps;
+    int low_power;
+
     int a53_cc;
 
 #if QSV_HAVE_MF
@@ -168,6 +176,7 @@ typedef struct QSVEncContext {
 #endif
     char *load_plugins;
     SetEncodeCtrlCB *set_encode_ctrl_cb;
+    int forced_idr;
 } QSVEncContext;
 
 int ff_qsv_enc_init(AVCodecContext *avctx, QSVEncContext *q);
diff --git a/libavcodec/qsvenc_h264.c b/libavcodec/qsvenc_h264.c
index 07c9d64e6b81f..f458137848cb4 100644
--- a/libavcodec/qsvenc_h264.c
+++ b/libavcodec/qsvenc_h264.c
@@ -154,6 +154,12 @@ static const AVOption options[] = {
     { "auto"   , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_MF_AUTO     }, INT_MIN, INT_MAX,     VE, "mfmode" },
 #endif
 
+#if QSV_HAVE_VDENC
+    { "low_power", "enable low power mode(experimental: many limitations by mfx version, BRC modes, etc.)", OFFSET(qsv.low_power), AV_OPT_TYPE_BOOL, { .i64 =  0 }, 0, 1, VE},
+#endif
+
+    { "repeat_pps", "repeat pps for every frame", OFFSET(qsv.repeat_pps), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
+
     { NULL },
 };
 
diff --git a/libavcodec/qsvenc_hevc.c b/libavcodec/qsvenc_hevc.c
index 4339b316a3d5a..1c615b4e818d8 100644
--- a/libavcodec/qsvenc_hevc.c
+++ b/libavcodec/qsvenc_hevc.c
@@ -217,12 +217,6 @@ static av_cold int qsv_enc_close(AVCodecContext *avctx)
     return ff_qsv_enc_close(avctx, &q->qsv);
 }
 
-#if defined(_WIN32)
-#define LOAD_PLUGIN_DEFAULT LOAD_PLUGIN_HEVC_SW
-#else
-#define LOAD_PLUGIN_DEFAULT LOAD_PLUGIN_HEVC_HW
-#endif
-
 #define OFFSET(x) offsetof(QSVHEVCEncContext, x)
 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption options[] = {
@@ -230,7 +224,7 @@ static const AVOption options[] = {
 
     { "idr_interval", "Distance (in I-frames) between IDR frames", OFFSET(qsv.idr_interval), AV_OPT_TYPE_INT, { .i64 = 0 }, -1, INT_MAX, VE, "idr_interval" },
     { "begin_only", "Output an IDR-frame only at the beginning of the stream", 0, AV_OPT_TYPE_CONST, { .i64 = -1 }, 0, 0, VE, "idr_interval" },
-    { "load_plugin", "A user plugin to load in an internal session", OFFSET(load_plugin), AV_OPT_TYPE_INT, { .i64 = LOAD_PLUGIN_DEFAULT }, LOAD_PLUGIN_NONE, LOAD_PLUGIN_HEVC_HW, VE, "load_plugin" },
+    { "load_plugin", "A user plugin to load in an internal session", OFFSET(load_plugin), AV_OPT_TYPE_INT, { .i64 = LOAD_PLUGIN_HEVC_HW }, LOAD_PLUGIN_NONE, LOAD_PLUGIN_HEVC_HW, VE, "load_plugin" },
     { "none",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = LOAD_PLUGIN_NONE },    0, 0, VE, "load_plugin" },
     { "hevc_sw",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = LOAD_PLUGIN_HEVC_SW }, 0, 0, VE, "load_plugin" },
     { "hevc_hw",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = LOAD_PLUGIN_HEVC_HW }, 0, 0, VE, "load_plugin" },
diff --git a/libavcodec/qsvenc_jpeg.c b/libavcodec/qsvenc_jpeg.c
index c18fe91940bb3..1e7785a8261de 100644
--- a/libavcodec/qsvenc_jpeg.c
+++ b/libavcodec/qsvenc_jpeg.c
@@ -64,6 +64,7 @@ static av_cold int qsv_enc_close(AVCodecContext *avctx)
 #define OFFSET(x) offsetof(QSVMJPEGEncContext, x)
 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption options[] = {
+    { "async_depth", "Maximum processing parallelism", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = ASYNC_DEPTH_DEFAULT }, 1, INT_MAX, VE },
     { NULL },
 };
 
@@ -74,6 +75,11 @@ static const AVClass class = {
     .version    = LIBAVUTIL_VERSION_INT,
 };
 
+static const AVCodecDefault qsv_enc_defaults[] = {
+    { "global_quality",  "80" },
+    { NULL },
+};
+
 AVCodec ff_mjpeg_qsv_encoder = {
     .name           = "mjpeg_qsv",
     .long_name      = NULL_IF_CONFIG_SMALL("MJPEG (Intel Quick Sync Video acceleration)"),
@@ -88,5 +94,6 @@ AVCodec ff_mjpeg_qsv_encoder = {
                                                     AV_PIX_FMT_QSV,
                                                     AV_PIX_FMT_NONE },
     .priv_class     = &class,
+    .defaults       = qsv_enc_defaults,
     .wrapper_name   = "qsv",
 };
diff --git a/libavcodec/qtrle.c b/libavcodec/qtrle.c
index 6155b4f3e3c97..a744d7ba681d9 100644
--- a/libavcodec/qtrle.c
+++ b/libavcodec/qtrle.c
@@ -297,10 +297,11 @@ static void qtrle_decode_16bpp(QtrleContext *s, int row_ptr, int lines_to_change
 
 static void qtrle_decode_24bpp(QtrleContext *s, int row_ptr, int lines_to_change)
 {
-    int rle_code;
+    int rle_code, rle_code_half;
     int pixel_ptr;
     int row_inc = s->frame->linesize[0];
-    uint8_t r, g, b;
+    uint8_t b;
+    uint16_t rg;
     uint8_t *rgb = s->frame->data[0];
     int pixel_limit = s->frame->linesize[0] * s->avctx->height;
 
@@ -318,25 +319,31 @@ static void qtrle_decode_24bpp(QtrleContext *s, int row_ptr, int lines_to_change
             } else if (rle_code < 0) {
                 /* decode the run length code */
                 rle_code = -rle_code;
-                r = bytestream2_get_byte(&s->g);
-                g = bytestream2_get_byte(&s->g);
+                rg = bytestream2_get_ne16(&s->g);
                 b = bytestream2_get_byte(&s->g);
 
                 CHECK_PIXEL_PTR(rle_code * 3);
 
                 while (rle_code--) {
-                    rgb[pixel_ptr++] = r;
-                    rgb[pixel_ptr++] = g;
-                    rgb[pixel_ptr++] = b;
+                    AV_WN16A(rgb + pixel_ptr, rg);
+                    rgb[pixel_ptr + 2] = b;
+                    pixel_ptr += 3;
                 }
             } else {
                 CHECK_PIXEL_PTR(rle_code * 3);
 
-                /* copy pixels directly to output */
-                while (rle_code--) {
-                    rgb[pixel_ptr++] = bytestream2_get_byte(&s->g);
-                    rgb[pixel_ptr++] = bytestream2_get_byte(&s->g);
-                    rgb[pixel_ptr++] = bytestream2_get_byte(&s->g);
+                rle_code_half = rle_code / 2;
+
+                while (rle_code_half--) { /* copy 2 raw rgb value at the same time */
+                    AV_WN32A(rgb + pixel_ptr, bytestream2_get_ne32(&s->g)); /* rgbr */
+                    AV_WN16A(rgb + pixel_ptr + 4, bytestream2_get_ne16(&s->g)); /* rgbr */
+                    pixel_ptr += 6;
+                }
+
+                if (rle_code % 2 != 0){ /* not even raw value */
+                    AV_WN16A(rgb + pixel_ptr, bytestream2_get_ne16(&s->g));
+                    rgb[pixel_ptr + 2] = bytestream2_get_byte(&s->g);
+                    pixel_ptr += 3;
                 }
             }
         }
@@ -346,7 +353,7 @@ static void qtrle_decode_24bpp(QtrleContext *s, int row_ptr, int lines_to_change
 
 static void qtrle_decode_32bpp(QtrleContext *s, int row_ptr, int lines_to_change)
 {
-    int rle_code;
+    int rle_code, rle_code_half;
     int pixel_ptr;
     int row_inc = s->frame->linesize[0];
     unsigned int argb;
@@ -367,7 +374,7 @@ static void qtrle_decode_32bpp(QtrleContext *s, int row_ptr, int lines_to_change
             } else if (rle_code < 0) {
                 /* decode the run length code */
                 rle_code = -rle_code;
-                argb = bytestream2_get_be32(&s->g);
+                argb = bytestream2_get_ne32(&s->g);
 
                 CHECK_PIXEL_PTR(rle_code * 4);
 
@@ -379,10 +386,15 @@ static void qtrle_decode_32bpp(QtrleContext *s, int row_ptr, int lines_to_change
                 CHECK_PIXEL_PTR(rle_code * 4);
 
                 /* copy pixels directly to output */
-                while (rle_code--) {
-                    argb = bytestream2_get_be32(&s->g);
-                    AV_WN32A(rgb + pixel_ptr, argb);
-                    pixel_ptr  += 4;
+                rle_code_half = rle_code / 2;
+                while (rle_code_half--) { /* copy 2 argb raw value at the same time */
+                    AV_WN64A(rgb + pixel_ptr, bytestream2_get_ne64(&s->g));
+                    pixel_ptr += 8;
+                }
+
+                if (rle_code % 2 != 0){ /* not even raw value */
+                    AV_WN32A(rgb + pixel_ptr, bytestream2_get_ne32(&s->g));
+                    pixel_ptr += 4;
                 }
             }
         }
@@ -416,7 +428,7 @@ static av_cold int qtrle_decode_init(AVCodecContext *avctx)
         break;
 
     case 32:
-        avctx->pix_fmt = AV_PIX_FMT_RGB32;
+        avctx->pix_fmt = AV_PIX_FMT_ARGB;
         break;
 
     default:
diff --git a/libavcodec/r210dec.c b/libavcodec/r210dec.c
index dbc94c76bd3ce..407684c7fcd60 100644
--- a/libavcodec/r210dec.c
+++ b/libavcodec/r210dec.c
@@ -27,11 +27,7 @@
 
 static av_cold int decode_init(AVCodecContext *avctx)
 {
-    if ((avctx->codec_tag & 0xFFFFFF) == MKTAG('r', '1', '0', 0)) {
-        avctx->pix_fmt = AV_PIX_FMT_BGR48;
-    } else {
-        avctx->pix_fmt = AV_PIX_FMT_RGB48;
-    }
+    avctx->pix_fmt = AV_PIX_FMT_GBRP10;
     avctx->bits_per_raw_sample = 10;
 
     return 0;
@@ -45,7 +41,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     const uint32_t *src = (const uint32_t *)avpkt->data;
     int aligned_width = FFALIGN(avctx->width,
                                 avctx->codec_id == AV_CODEC_ID_R10K ? 1 : 64);
-    uint8_t *dst_line;
+    uint8_t *g_line, *b_line, *r_line;
     int r10 = (avctx->codec_tag & 0xFFFFFF) == MKTAG('r', '1', '0', 0);
     int le = avctx->codec_tag == MKTAG('R', '1', '0', 'k') &&
              avctx->extradata_size >= 12 && !memcmp(&avctx->extradata[4], "DpxE", 4) &&
@@ -61,10 +57,14 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
 
     pic->pict_type = AV_PICTURE_TYPE_I;
     pic->key_frame = 1;
-    dst_line = pic->data[0];
+    g_line = pic->data[0];
+    b_line = pic->data[1];
+    r_line = pic->data[2];
 
     for (h = 0; h < avctx->height; h++) {
-        uint16_t *dst = (uint16_t *)dst_line;
+        uint16_t *dstg = (uint16_t *)g_line;
+        uint16_t *dstb = (uint16_t *)b_line;
+        uint16_t *dstr = (uint16_t *)r_line;
         for (w = 0; w < avctx->width; w++) {
             uint32_t pixel;
             uint16_t r, g, b;
@@ -73,21 +73,27 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
             } else {
                 pixel = av_be2ne32(*src++);
             }
-            if (avctx->codec_id == AV_CODEC_ID_R210 || r10) {
-                b =  pixel <<  6;
-                g = (pixel >>  4) & 0xffc0;
-                r = (pixel >> 14) & 0xffc0;
+            if (avctx->codec_id == AV_CODEC_ID_R210) {
+                b =  pixel & 0x3ff;
+                g = (pixel >> 10) & 0x3ff;
+                r = (pixel >> 20) & 0x3ff;
+            } else if (r10) {
+                r =  pixel & 0x3ff;
+                g = (pixel >> 10) & 0x3ff;
+                b = (pixel >> 20) & 0x3ff;
             } else {
-                b = (pixel <<  4) & 0xffc0;
-                g = (pixel >>  6) & 0xffc0;
-                r = (pixel >> 16) & 0xffc0;
+                b = (pixel >>  2) & 0x3ff;
+                g = (pixel >> 12) & 0x3ff;
+                r = (pixel >> 22) & 0x3ff;
             }
-            *dst++ = r | (r >> 10);
-            *dst++ = g | (g >> 10);
-            *dst++ = b | (b >> 10);
+            *dstr++ = r;
+            *dstg++ = g;
+            *dstb++ = b;
         }
         src += aligned_width - avctx->width;
-        dst_line += pic->linesize[0];
+        g_line += pic->linesize[0];
+        b_line += pic->linesize[1];
+        r_line += pic->linesize[2];
     }
 
     *got_frame      = 1;
diff --git a/libavcodec/r210enc.c b/libavcodec/r210enc.c
index a55e5434f317f..02412f3684a91 100644
--- a/libavcodec/r210enc.c
+++ b/libavcodec/r210enc.c
@@ -43,22 +43,26 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     int aligned_width = FFALIGN(avctx->width,
                                 avctx->codec_id == AV_CODEC_ID_R10K ? 1 : 64);
     int pad = (aligned_width - avctx->width) * 4;
-    uint8_t *src_line;
+    uint8_t *srcr_line, *srcg_line, *srcb_line;
     uint8_t *dst;
 
     if ((ret = ff_alloc_packet2(avctx, pkt, 4 * aligned_width * avctx->height, 0)) < 0)
         return ret;
 
-    src_line = pic->data[0];
+    srcg_line = pic->data[0];
+    srcb_line = pic->data[1];
+    srcr_line = pic->data[2];
     dst = pkt->data;
 
     for (i = 0; i < avctx->height; i++) {
-        uint16_t *src = (uint16_t *)src_line;
+        uint16_t *srcr = (uint16_t *)srcr_line;
+        uint16_t *srcg = (uint16_t *)srcg_line;
+        uint16_t *srcb = (uint16_t *)srcb_line;
         for (j = 0; j < avctx->width; j++) {
             uint32_t pixel;
-            uint16_t r = *src++ >> 6;
-            uint16_t g = *src++ >> 6;
-            uint16_t b = *src++ >> 6;
+            uint16_t r = *srcr++;
+            uint16_t g = *srcg++;
+            uint16_t b = *srcb++;
             if (avctx->codec_id == AV_CODEC_ID_R210)
                 pixel = (r << 20) | (g << 10) | b;
             else
@@ -70,7 +74,9 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         }
         memset(dst, 0, pad);
         dst += pad;
-        src_line += pic->linesize[0];
+        srcr_line += pic->linesize[2];
+        srcg_line += pic->linesize[0];
+        srcb_line += pic->linesize[1];
     }
 
     pkt->flags |= AV_PKT_FLAG_KEY;
@@ -87,7 +93,7 @@ AVCodec ff_r210_encoder = {
     .id             = AV_CODEC_ID_R210,
     .init           = encode_init,
     .encode2        = encode_frame,
-    .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_RGB48, AV_PIX_FMT_NONE },
+    .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_GBRP10, AV_PIX_FMT_NONE },
     .capabilities   = AV_CODEC_CAP_INTRA_ONLY,
 };
 #endif
@@ -99,7 +105,7 @@ AVCodec ff_r10k_encoder = {
     .id             = AV_CODEC_ID_R10K,
     .init           = encode_init,
     .encode2        = encode_frame,
-    .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_RGB48, AV_PIX_FMT_NONE },
+    .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_GBRP10, AV_PIX_FMT_NONE },
     .capabilities   = AV_CODEC_CAP_INTRA_ONLY,
 };
 #endif
@@ -111,7 +117,7 @@ AVCodec ff_avrp_encoder = {
     .id             = AV_CODEC_ID_AVRP,
     .init           = encode_init,
     .encode2        = encode_frame,
-    .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_RGB48, AV_PIX_FMT_NONE },
+    .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_GBRP10, AV_PIX_FMT_NONE },
     .capabilities   = AV_CODEC_CAP_INTRA_ONLY,
 };
 #endif
diff --git a/libavcodec/rangecoder.c b/libavcodec/rangecoder.c
index 0d53bef076073..a6a3f082efcfc 100644
--- a/libavcodec/rangecoder.c
+++ b/libavcodec/rangecoder.c
@@ -106,8 +106,10 @@ void ff_build_rac_states(RangeCoder *c, int factor, int max_p)
 }
 
 /* Return the number of bytes written. */
-int ff_rac_terminate(RangeCoder *c)
+int ff_rac_terminate(RangeCoder *c, int version)
 {
+    if (version == 1)
+        put_rac(c, (uint8_t[]) { 129 }, 0);
     c->range = 0xFF;
     c->low  += 0xFF;
     renorm_encoder(c);
@@ -119,3 +121,22 @@ int ff_rac_terminate(RangeCoder *c)
 
     return c->bytestream - c->bytestream_start;
 }
+
+int ff_rac_check_termination(RangeCoder *c, int version)
+{
+    if (version == 1) {
+        RangeCoder tmp = *c;
+        get_rac(c, (uint8_t[]) { 129 });
+
+        if (c->bytestream == tmp.bytestream && c->bytestream > c->bytestream_start)
+            tmp.low -= *--tmp.bytestream;
+        tmp.bytestream_end = tmp.bytestream;
+
+        if (get_rac(&tmp, (uint8_t[]) { 129 }))
+            return AVERROR_INVALIDDATA;
+    } else {
+        if (c->bytestream_end != c->bytestream)
+            return AVERROR_INVALIDDATA;
+    }
+    return 0;
+}
diff --git a/libavcodec/rangecoder.h b/libavcodec/rangecoder.h
index 44af88b8f57f1..4d4ca4d526c33 100644
--- a/libavcodec/rangecoder.h
+++ b/libavcodec/rangecoder.h
@@ -48,7 +48,24 @@ typedef struct RangeCoder {
 
 void ff_init_range_encoder(RangeCoder *c, uint8_t *buf, int buf_size);
 void ff_init_range_decoder(RangeCoder *c, const uint8_t *buf, int buf_size);
-int ff_rac_terminate(RangeCoder *c);
+
+/**
+ * Terminates the range coder
+ * @param version version 0 requires the decoder to know the data size in bytes
+ *                version 1 needs about 1 bit more space but does not need to
+ *                          carry the size from encoder to decoder
+ */
+int ff_rac_terminate(RangeCoder *c, int version);
+
+/**
+ * Check if at the current position there is a valid looking termination
+ * @param version version 0 requires the decoder to know the data size in bytes
+ *                version 1 needs about 1 bit more space but does not need to
+ *                          carry the size from encoder to decoder
+ * @returns negative AVERROR code on error or non negative.
+ */
+int ff_rac_check_termination(RangeCoder *c, int version);
+
 void ff_build_rac_states(RangeCoder *c, int factor, int max_p);
 
 static inline void renorm_encoder(RangeCoder *c)
diff --git a/libavcodec/rasc.c b/libavcodec/rasc.c
index e8e0740ddd5f3..21fc43f325c76 100644
--- a/libavcodec/rasc.c
+++ b/libavcodec/rasc.c
@@ -215,7 +215,7 @@ static int decode_move(AVCodecContext *avctx,
     bytestream2_skip(gb, 8);
     compression = bytestream2_get_le32(gb);
 
-    if (nb_moves > INT32_MAX / 16)
+    if (nb_moves > INT32_MAX / 16 || nb_moves > avctx->width * avctx->height)
         return AVERROR_INVALIDDATA;
 
     uncompressed_size = 16 * nb_moves;
@@ -353,6 +353,8 @@ static int decode_dlta(AVCodecContext *avctx,
     compression = bytestream2_get_le32(gb);
 
     if (compression == 1) {
+        if (w * h * s->bpp * 3 < uncompressed_size)
+            return AVERROR_INVALIDDATA;
         ret = decode_zlib(avctx, avpkt, size, uncompressed_size);
         if (ret < 0)
             return ret;
@@ -680,6 +682,9 @@ static int decode_frame(AVCodecContext *avctx,
     while (bytestream2_get_bytes_left(gb) > 0) {
         unsigned type, size = 0;
 
+        if (bytestream2_get_bytes_left(gb) < 8)
+            return AVERROR_INVALIDDATA;
+
         type = bytestream2_get_le32(gb);
         if (type == KBND || type == BNDL) {
             intra = type == KBND;
@@ -718,12 +723,12 @@ static int decode_frame(AVCodecContext *avctx,
             return ret;
     }
 
-    if ((ret = ff_get_buffer(avctx, s->frame, 0)) < 0)
-        return ret;
-
     if (!s->frame2->data[0] || !s->frame1->data[0])
         return AVERROR_INVALIDDATA;
 
+    if ((ret = ff_get_buffer(avctx, s->frame, 0)) < 0)
+        return ret;
+
     copy_plane(avctx, s->frame2, s->frame);
     if (avctx->pix_fmt == AV_PIX_FMT_PAL8)
         memcpy(s->frame->data[1], s->frame2->data[1], 1024);
diff --git a/libavcodec/raw.c b/libavcodec/raw.c
index d731c087d1e18..b6fb91c1c64f0 100644
--- a/libavcodec/raw.c
+++ b/libavcodec/raw.c
@@ -177,6 +177,10 @@ const PixelFormatTag ff_raw_pix_fmt_tags[] = {
     { AV_PIX_FMT_YUVA422P10BE, MKTAG(10 , 10 , '4', 'Y') },
     { AV_PIX_FMT_YUVA444P10LE, MKTAG('Y', '4',  0 , 10 ) },
     { AV_PIX_FMT_YUVA444P10BE, MKTAG(10 ,  0 , '4', 'Y') },
+    { AV_PIX_FMT_YUVA422P12LE, MKTAG('Y', '4', 10 , 12 ) },
+    { AV_PIX_FMT_YUVA422P12BE, MKTAG(12 , 10 , '4', 'Y') },
+    { AV_PIX_FMT_YUVA444P12LE, MKTAG('Y', '4',  0 , 12 ) },
+    { AV_PIX_FMT_YUVA444P12BE, MKTAG(12 ,  0 , '4', 'Y') },
     { AV_PIX_FMT_YUVA420P16LE, MKTAG('Y', '4', 11 , 16 ) },
     { AV_PIX_FMT_YUVA420P16BE, MKTAG(16 , 11 , '4', 'Y') },
     { AV_PIX_FMT_YUVA422P16LE, MKTAG('Y', '4', 10 , 16 ) },
diff --git a/libavcodec/rpza.c b/libavcodec/rpza.c
index b71ebd1cbe946..8e1efa24450ef 100644
--- a/libavcodec/rpza.c
+++ b/libavcodec/rpza.c
@@ -73,13 +73,12 @@ typedef struct RpzaContext {
 static int rpza_decode_stream(RpzaContext *s)
 {
     int width = s->avctx->width;
-    int stride = s->frame->linesize[0] / 2;
-    int row_inc = stride - 4;
+    int stride, row_inc, ret;
     int chunk_size;
     uint16_t colorA = 0, colorB;
     uint16_t color4[4];
     uint16_t ta, tb;
-    uint16_t *pixels = (uint16_t *)s->frame->data[0];
+    uint16_t *pixels;
 
     int row_ptr = 0;
     int pixel_ptr = 0;
@@ -106,6 +105,15 @@ static int rpza_decode_stream(RpzaContext *s)
     /* Number of 4x4 blocks in frame. */
     total_blocks = ((s->avctx->width + 3) / 4) * ((s->avctx->height + 3) / 4);
 
+    if (total_blocks / 32 > bytestream2_get_bytes_left(&s->gb))
+        return AVERROR_INVALIDDATA;
+
+    if ((ret = ff_reget_buffer(s->avctx, s->frame)) < 0)
+        return ret;
+    pixels = (uint16_t *)s->frame->data[0];
+    stride = s->frame->linesize[0] / 2;
+    row_inc = stride - 4;
+
     /* Process chunk data */
     while (bytestream2_get_bytes_left(&s->gb)) {
         uint8_t opcode = bytestream2_get_byte(&s->gb); /* Get opcode */
@@ -256,9 +264,6 @@ static int rpza_decode_frame(AVCodecContext *avctx,
 
     bytestream2_init(&s->gb, avpkt->data, avpkt->size);
 
-    if ((ret = ff_reget_buffer(avctx, s->frame)) < 0)
-        return ret;
-
     ret = rpza_decode_stream(s);
     if (ret < 0)
         return ret;
diff --git a/libavcodec/rscc.c b/libavcodec/rscc.c
index 7921f149ed5dd..7d4e842cd3f81 100644
--- a/libavcodec/rscc.c
+++ b/libavcodec/rscc.c
@@ -64,6 +64,7 @@ typedef struct RsccContext {
     /* zlib interaction */
     uint8_t *inflated_buf;
     uLongf inflated_size;
+    int valid_pixels;
 } RsccContext;
 
 static av_cold int rscc_init(AVCodecContext *avctx)
@@ -347,8 +348,11 @@ static int rscc_decode_frame(AVCodecContext *avctx, void *data,
         }
         memcpy (frame->data[1], ctx->palette, AVPALETTE_SIZE);
     }
-
-    *got_frame = 1;
+    // We only return a picture when enough of it is undamaged, this avoids copying nearly broken frames around
+    if (ctx->valid_pixels < ctx->inflated_size)
+        ctx->valid_pixels += pixel_size;
+    if (ctx->valid_pixels >= ctx->inflated_size * (100 - avctx->discard_damaged_percentage) / 100)
+        *got_frame = 1;
 
     ret = avpkt->size;
 end:
diff --git a/libavcodec/sbrdsp_fixed.c b/libavcodec/sbrdsp_fixed.c
index 57d98da97997f..91fa664c0872e 100644
--- a/libavcodec/sbrdsp_fixed.c
+++ b/libavcodec/sbrdsp_fixed.c
@@ -34,32 +34,36 @@
 static SoftFloat sbr_sum_square_c(int (*x)[2], int n)
 {
     SoftFloat ret;
-    uint64_t accu, round;
+    uint64_t accu = 0, round;
     uint64_t accu0 = 0, accu1 = 0, accu2 = 0, accu3 = 0;
     int i, nz, nz0;
     unsigned u;
 
+    nz = 0;
     for (i = 0; i < n; i += 2) {
-        // Larger values are inavlid and could cause overflows of accu.
-        av_assert2(FFABS(x[i + 0][0]) >> 30 == 0);
         accu0 += (int64_t)x[i + 0][0] * x[i + 0][0];
-        av_assert2(FFABS(x[i + 0][1]) >> 30 == 0);
         accu1 += (int64_t)x[i + 0][1] * x[i + 0][1];
-        av_assert2(FFABS(x[i + 1][0]) >> 30 == 0);
         accu2 += (int64_t)x[i + 1][0] * x[i + 1][0];
-        av_assert2(FFABS(x[i + 1][1]) >> 30 == 0);
         accu3 += (int64_t)x[i + 1][1] * x[i + 1][1];
+        if ((accu0|accu1|accu2|accu3) > UINT64_MAX - INT32_MIN*(int64_t)INT32_MIN || i+2>=n) {
+            accu0 >>= nz;
+            accu1 >>= nz;
+            accu2 >>= nz;
+            accu3 >>= nz;
+            while ((accu0|accu1|accu2|accu3) > (UINT64_MAX - accu) >> 2) {
+                accu0 >>= 1;
+                accu1 >>= 1;
+                accu2 >>= 1;
+                accu3 >>= 1;
+                accu  >>= 1;
+                nz ++;
+            }
+            accu += accu0 + accu1 + accu2 + accu3;
+            accu0 = accu1 = accu2 = accu3 = 0;
+        }
     }
 
-    nz0 = 15;
-    while ((accu0|accu1|accu2|accu3) >> 62) {
-        accu0 >>= 1;
-        accu1 >>= 1;
-        accu2 >>= 1;
-        accu3 >>= 1;
-        nz0 --;
-    }
-    accu = accu0 + accu1 + accu2 + accu3;
+    nz0 = 15 - nz;
 
     u = accu >> 32;
     if (u) {
diff --git a/libavcodec/scpr.c b/libavcodec/scpr.c
index e41fbbec13c7b..750cf59fe49ee 100644
--- a/libavcodec/scpr.c
+++ b/libavcodec/scpr.c
@@ -27,44 +27,13 @@
 #include "avcodec.h"
 #include "bytestream.h"
 #include "internal.h"
+#include "scpr.h"
+#include "scpr3.h"
 
 #define TOP  0x01000000
 #define BOT    0x010000
 
-typedef struct RangeCoder {
-    unsigned   code;
-    unsigned   range;
-    unsigned   code1;
-} RangeCoder;
-
-typedef struct PixelModel {
-    unsigned    freq[256];
-    unsigned    lookup[16];
-    unsigned    total_freq;
-} PixelModel;
-
-typedef struct SCPRContext {
-    AVFrame        *last_frame;
-    AVFrame        *current_frame;
-    GetByteContext  gb;
-    RangeCoder      rc;
-    PixelModel      pixel_model[3][4096];
-    unsigned        op_model[6][7];
-    unsigned        run_model[6][257];
-    unsigned        range_model[257];
-    unsigned        count_model[257];
-    unsigned        fill_model[6];
-    unsigned        sxy_model[4][17];
-    unsigned        mv_model[2][513];
-    unsigned        nbx, nby;
-    unsigned        nbcount;
-    unsigned       *blocks;
-    unsigned        cbits;
-    int             cxshift;
-
-    int           (*get_freq)(RangeCoder *rc, unsigned total_freq, unsigned *freq);
-    int           (*decode)(GetByteContext *gb, RangeCoder *rc, unsigned cumFreq, unsigned freq, unsigned total_freq);
-} SCPRContext;
+#include "scpr3.c"
 
 static void init_rangecoder(RangeCoder *rc, GetByteContext *gb)
 {
@@ -90,14 +59,14 @@ static void reinit_tables(SCPRContext *s)
     }
 
     for (j = 0; j < 6; j++) {
-        unsigned *p = s->run_model[j];
+        uint32_t *p = s->run_model[j];
         for (i = 0; i < 256; i++)
             p[i] = 1;
         p[256] = 256;
     }
 
     for (j = 0; j < 6; j++) {
-        unsigned *op = s->op_model[j];
+        uint32_t *op = s->op_model[j];
         for (i = 0; i < 6; i++)
             op[i] = 1;
         op[6] = 6;
@@ -130,13 +99,13 @@ static void reinit_tables(SCPRContext *s)
     s->mv_model[1][512] = 512;
 }
 
-static int decode(GetByteContext *gb, RangeCoder *rc, unsigned cumFreq, unsigned freq, unsigned total_freq)
+static int decode(GetByteContext *gb, RangeCoder *rc, uint32_t cumFreq, uint32_t freq, uint32_t total_freq)
 {
     rc->code -= cumFreq * rc->range;
     rc->range *= freq;
 
     while (rc->range < TOP && bytestream2_get_bytes_left(gb) > 0) {
-        unsigned byte = bytestream2_get_byteu(gb);
+        uint32_t byte = bytestream2_get_byteu(gb);
         rc->code = (rc->code << 8) | byte;
         rc->range <<= 8;
     }
@@ -144,7 +113,7 @@ static int decode(GetByteContext *gb, RangeCoder *rc, unsigned cumFreq, unsigned
     return 0;
 }
 
-static int get_freq(RangeCoder *rc, unsigned total_freq, unsigned *freq)
+static int get_freq(RangeCoder *rc, uint32_t total_freq, uint32_t *freq)
 {
     if (total_freq == 0)
         return AVERROR_INVALIDDATA;
@@ -159,9 +128,9 @@ static int get_freq(RangeCoder *rc, unsigned total_freq, unsigned *freq)
     return 0;
 }
 
-static int decode0(GetByteContext *gb, RangeCoder *rc, unsigned cumFreq, unsigned freq, unsigned total_freq)
+static int decode0(GetByteContext *gb, RangeCoder *rc, uint32_t cumFreq, uint32_t freq, uint32_t total_freq)
 {
-    unsigned t;
+    uint32_t t;
 
     if (total_freq == 0)
         return AVERROR_INVALIDDATA;
@@ -172,7 +141,7 @@ static int decode0(GetByteContext *gb, RangeCoder *rc, unsigned cumFreq, unsigne
     rc->range = rc->range * (uint64_t)(freq + cumFreq) / total_freq - (t + 1);
 
     while (rc->range < TOP && bytestream2_get_bytes_left(gb) > 0) {
-        unsigned byte = bytestream2_get_byteu(gb);
+        uint32_t byte = bytestream2_get_byteu(gb);
         rc->code = (rc->code << 8) | byte;
         rc->code1 <<= 8;
         rc->range <<= 8;
@@ -181,7 +150,7 @@ static int decode0(GetByteContext *gb, RangeCoder *rc, unsigned cumFreq, unsigne
     return 0;
 }
 
-static int get_freq0(RangeCoder *rc, unsigned total_freq, unsigned *freq)
+static int get_freq0(RangeCoder *rc, uint32_t total_freq, uint32_t *freq)
 {
     if (rc->range == 0)
         return AVERROR_INVALIDDATA;
@@ -191,13 +160,13 @@ static int get_freq0(RangeCoder *rc, unsigned total_freq, unsigned *freq)
     return 0;
 }
 
-static int decode_value(SCPRContext *s, unsigned *cnt, unsigned maxc, unsigned step, unsigned *rval)
+static int decode_value(SCPRContext *s, uint32_t *cnt, uint32_t maxc, uint32_t step, uint32_t *rval)
 {
     GetByteContext *gb = &s->gb;
     RangeCoder *rc = &s->rc;
-    unsigned totfr = cnt[maxc];
-    unsigned value;
-    unsigned c = 0, cumfr = 0, cnt_c = 0;
+    uint32_t totfr = cnt[maxc];
+    uint32_t value;
+    uint32_t c = 0, cumfr = 0, cnt_c = 0;
     int i, ret;
 
     if ((ret = s->get_freq(rc, totfr, &value)) < 0)
@@ -223,7 +192,7 @@ static int decode_value(SCPRContext *s, unsigned *cnt, unsigned maxc, unsigned s
     if (totfr > BOT) {
         totfr = 0;
         for (i = 0; i < maxc; i++) {
-            unsigned nc = (cnt[i] >> 1) + 1;
+            uint32_t nc = (cnt[i] >> 1) + 1;
             cnt[i] = nc;
             totfr += nc;
         }
@@ -235,12 +204,12 @@ static int decode_value(SCPRContext *s, unsigned *cnt, unsigned maxc, unsigned s
     return 0;
 }
 
-static int decode_unit(SCPRContext *s, PixelModel *pixel, unsigned step, unsigned *rval)
+static int decode_unit(SCPRContext *s, PixelModel *pixel, uint32_t step, uint32_t *rval)
 {
     GetByteContext *gb = &s->gb;
     RangeCoder *rc = &s->rc;
-    unsigned totfr = pixel->total_freq;
-    unsigned value, x = 0, cumfr = 0, cnt_x = 0;
+    uint32_t totfr = pixel->total_freq;
+    uint32_t value, x = 0, cumfr = 0, cnt_x = 0;
     int i, j, ret, c, cnt_c;
 
     if ((ret = s->get_freq(rc, totfr, &value)) < 0)
@@ -278,13 +247,13 @@ static int decode_unit(SCPRContext *s, PixelModel *pixel, unsigned step, unsigne
     if (totfr > BOT) {
         totfr = 0;
         for (i = 0; i < 256; i++) {
-            unsigned nc = (pixel->freq[i] >> 1) + 1;
+            uint32_t nc = (pixel->freq[i] >> 1) + 1;
             pixel->freq[i] = nc;
             totfr += nc;
         }
         for (i = 0; i < 16; i++) {
-            unsigned sum = 0;
-            unsigned i16_17 = i << 4;
+            uint32_t sum = 0;
+            uint32_t i16_17 = i << 4;
             for (j = 0; j < 16; j++)
                 sum += pixel->freq[i16_17 + j];
             pixel->lookup[i] = sum;
@@ -297,7 +266,7 @@ static int decode_unit(SCPRContext *s, PixelModel *pixel, unsigned step, unsigne
     return 0;
 }
 
-static int decode_units(SCPRContext *s, unsigned *r, unsigned *g, unsigned *b,
+static int decode_units(SCPRContext *s, uint32_t *r, uint32_t *g, uint32_t *b,
                         int *cx, int *cx1)
 {
     const int cxshift = s->cxshift;
@@ -329,10 +298,10 @@ static int decompress_i(AVCodecContext *avctx, uint32_t *dst, int linesize)
 {
     SCPRContext *s = avctx->priv_data;
     GetByteContext *gb = &s->gb;
-    int cx = 0, cx1 = 0, k = 0, clr = 0;
-    int run, off, y = 0, x = 0, z, ret;
-    unsigned r, g, b, backstep = linesize - avctx->width;
-    unsigned lx, ly, ptype;
+    int cx = 0, cx1 = 0, k = 0;
+    int run, off, y = 0, x = 0, ret;
+    uint32_t clr = 0, r, g, b, backstep = linesize - avctx->width;
+    uint32_t lx, ly, ptype;
 
     reinit_tables(s);
     bytestream2_skip(gb, 2);
@@ -387,120 +356,11 @@ static int decompress_i(AVCodecContext *avctx, uint32_t *dst, int linesize)
         if (run <= 0)
             return AVERROR_INVALIDDATA;
 
-        switch (ptype) {
-        case 0:
-            while (run-- > 0) {
-                if (y >= avctx->height)
-                    return AVERROR_INVALIDDATA;
-
-                dst[y * linesize + x] = clr;
-                lx = x;
-                ly = y;
-                x++;
-                if (x >= avctx->width) {
-                    x = 0;
-                    y++;
-                }
-            }
-            break;
-        case 1:
-            while (run-- > 0) {
-                if (y >= avctx->height)
-                    return AVERROR_INVALIDDATA;
-
-                dst[y * linesize + x] = dst[ly * linesize + lx];
-                lx = x;
-                ly = y;
-                x++;
-                if (x >= avctx->width) {
-                    x = 0;
-                    y++;
-                }
-            }
-            clr = dst[ly * linesize + lx];
-            break;
-        case 2:
-            while (run-- > 0) {
-                if (y < 1 || y >= avctx->height)
-                    return AVERROR_INVALIDDATA;
-
-                clr = dst[y * linesize + x + off + 1];
-                dst[y * linesize + x] = clr;
-                lx = x;
-                ly = y;
-                x++;
-                if (x >= avctx->width) {
-                    x = 0;
-                    y++;
-                }
-            }
-            break;
-        case 4:
-            while (run-- > 0) {
-                uint8_t *odst = (uint8_t *)dst;
-
-                if (y < 1 || y >= avctx->height ||
-                    (y == 1 && x == 0))
-                    return AVERROR_INVALIDDATA;
-
-                if (x == 0) {
-                    z = backstep;
-                } else {
-                    z = 0;
-                }
-
-                r = odst[(ly * linesize + lx) * 4] +
-                    odst[((y * linesize + x) + off) * 4 + 4] -
-                    odst[((y * linesize + x) + off - z) * 4];
-                g = odst[(ly * linesize + lx) * 4 + 1] +
-                    odst[((y * linesize + x) + off) * 4 + 5] -
-                    odst[((y * linesize + x) + off - z) * 4 + 1];
-                b = odst[(ly * linesize + lx) * 4 + 2] +
-                    odst[((y * linesize + x) + off) * 4 + 6] -
-                    odst[((y * linesize + x) + off - z) * 4 + 2];
-                clr = ((b & 0xFF) << 16) + ((g & 0xFF) << 8) + (r & 0xFF);
-                dst[y * linesize + x] = clr;
-                lx = x;
-                ly = y;
-                x++;
-                if (x >= avctx->width) {
-                    x = 0;
-                    y++;
-                }
-            }
-            break;
-        case 5:
-            while (run-- > 0) {
-                if (y < 1 || y >= avctx->height ||
-                    (y == 1 && x == 0))
-                    return AVERROR_INVALIDDATA;
-
-                if (x == 0) {
-                    z = backstep;
-                } else {
-                    z = 0;
-                }
-
-                clr = dst[y * linesize + x + off - z];
-                dst[y * linesize + x] = clr;
-                lx = x;
-                ly = y;
-                x++;
-                if (x >= avctx->width) {
-                    x = 0;
-                    y++;
-                }
-            }
-            break;
-        }
-
-        if (avctx->bits_per_coded_sample == 16) {
-            cx1 = (clr & 0x3F00) >> 2;
-            cx = (clr & 0x3FFFFF) >> 16;
-        } else {
-            cx1 = (clr & 0xFC00) >> 4;
-            cx = (clr & 0xFFFFFF) >> 18;
-        }
+        ret = decode_run_i(avctx, ptype, run, &x, &y, clr,
+                           dst, linesize, &lx, &ly,
+                           backstep, off, &cx, &cx1);
+        if (run < 0)
+            return ret;
     }
 
     return 0;
@@ -512,7 +372,7 @@ static int decompress_p(AVCodecContext *avctx,
 {
     SCPRContext *s = avctx->priv_data;
     GetByteContext *gb = &s->gb;
-    int ret, temp, min, max, x, y, cx = 0, cx1 = 0;
+    int ret, temp = 0, min, max, x, y, cx = 0, cx1 = 0;
     int backstep = linesize - avctx->width;
 
     if (bytestream2_get_byte(gb) == 0)
@@ -589,8 +449,8 @@ static int decompress_p(AVCodecContext *avctx,
                     }
                 }
             } else {
-                int run, z, bx = x * 16 + sx1, by = y * 16 + sy1;
-                unsigned r, g, b, clr, ptype = 0;
+                int run, bx = x * 16 + sx1, by = y * 16 + sy1;
+                uint32_t r, g, b, clr, ptype = 0;
 
                 for (; by < y * 16 + sy2 && by < avctx->height;) {
                     ret = decode_value(s, s->op_model[ptype], 6, 1000, &ptype);
@@ -611,134 +471,11 @@ static int decompress_p(AVCodecContext *avctx,
                     if (run <= 0)
                         return AVERROR_INVALIDDATA;
 
-                    switch (ptype) {
-                    case 0:
-                        while (run-- > 0) {
-                            if (by >= avctx->height)
-                                return AVERROR_INVALIDDATA;
-
-                            dst[by * linesize + bx] = clr;
-                            bx++;
-                            if (bx >= x * 16 + sx2 || bx >= avctx->width) {
-                                bx = x * 16 + sx1;
-                                by++;
-                            }
-                        }
-                        break;
-                    case 1:
-                        while (run-- > 0) {
-                            if (bx == 0) {
-                                if (by < 1)
-                                    return AVERROR_INVALIDDATA;
-                                z = backstep;
-                            } else {
-                                z = 0;
-                            }
-
-                            if (by >= avctx->height)
-                                return AVERROR_INVALIDDATA;
-
-                            clr = dst[by * linesize + bx - 1 - z];
-                            dst[by * linesize + bx] = clr;
-                            bx++;
-                            if (bx >= x * 16 + sx2 || bx >= avctx->width) {
-                                bx = x * 16 + sx1;
-                                by++;
-                            }
-                        }
-                        break;
-                    case 2:
-                        while (run-- > 0) {
-                            if (by < 1 || by >= avctx->height)
-                                return AVERROR_INVALIDDATA;
-
-                            clr = dst[(by - 1) * linesize + bx];
-                            dst[by * linesize + bx] = clr;
-                            bx++;
-                            if (bx >= x * 16 + sx2 || bx >= avctx->width) {
-                                bx = x * 16 + sx1;
-                                by++;
-                            }
-                        }
-                        break;
-                    case 3:
-                        while (run-- > 0) {
-                            if (by >= avctx->height)
-                                return AVERROR_INVALIDDATA;
-
-                            clr = prev[by * plinesize + bx];
-                            dst[by * linesize + bx] = clr;
-                            bx++;
-                            if (bx >= x * 16 + sx2 || bx >= avctx->width) {
-                                bx = x * 16 + sx1;
-                                by++;
-                            }
-                        }
-                        break;
-                    case 4:
-                        while (run-- > 0) {
-                            uint8_t *odst = (uint8_t *)dst;
-
-                            if (by < 1 || by >= avctx->height)
-                                return AVERROR_INVALIDDATA;
-
-                            if (bx == 0) {
-                                if (by < 2)
-                                    return AVERROR_INVALIDDATA;
-                                z = backstep;
-                            } else {
-                                z = 0;
-                            }
-
-                            r = odst[((by - 1) * linesize + bx) * 4] +
-                                odst[(by * linesize + bx - 1 - z) * 4] -
-                                odst[((by - 1) * linesize + bx - 1 - z) * 4];
-                            g = odst[((by - 1) * linesize + bx) * 4 + 1] +
-                                odst[(by * linesize + bx - 1 - z) * 4 + 1] -
-                                odst[((by - 1) * linesize + bx - 1 - z) * 4 + 1];
-                            b = odst[((by - 1) * linesize + bx) * 4 + 2] +
-                                odst[(by * linesize + bx - 1 - z) * 4 + 2] -
-                                odst[((by - 1) * linesize + bx - 1 - z) * 4 + 2];
-                            clr = ((b & 0xFF) << 16) + ((g & 0xFF) << 8) + (r & 0xFF);
-                            dst[by * linesize + bx] = clr;
-                            bx++;
-                            if (bx >= x * 16 + sx2 || bx >= avctx->width) {
-                                bx = x * 16 + sx1;
-                                by++;
-                            }
-                        }
-                        break;
-                    case 5:
-                        while (run-- > 0) {
-                            if (by < 1 || by >= avctx->height)
-                                return AVERROR_INVALIDDATA;
-
-                            if (bx == 0) {
-                                if (by < 2)
-                                    return AVERROR_INVALIDDATA;
-                                z = backstep;
-                            } else {
-                                z = 0;
-                            }
-
-                            clr = dst[(by - 1) * linesize + bx - 1 - z];
-                            dst[by * linesize + bx] = clr;
-                            bx++;
-                            if (bx >= x * 16 + sx2 || bx >= avctx->width) {
-                                bx = x * 16 + sx1;
-                                by++;
-                            }
-                        }
-                        break;
-                    }
-
-                    if (avctx->bits_per_coded_sample == 16) {
-                        cx1 = (clr & 0x3F00) >> 2;
-                        cx = (clr & 0x3FFFFF) >> 16;
-                    } else {
-                        cx1 = (clr & 0xFC00) >> 4;
-                        cx = (clr & 0xFFFFFF) >> 18;
-                    }
+                    ret = decode_run_p(avctx, ptype, run, x, y, clr,
+                                       dst, prev, linesize, plinesize, &bx, &by,
+                                       backstep, sx1, sx2, &cx, &cx1);
+                    if (ret < 0)
+                        return ret;
                 }
             }
         }
@@ -768,18 +505,25 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     type = bytestream2_peek_byte(gb);
 
     if (type == 2) {
+        s->version = 1;
         s->get_freq = get_freq0;
         s->decode = decode0;
         frame->key_frame = 1;
         ret = decompress_i(avctx, (uint32_t *)s->current_frame->data[0],
                            s->current_frame->linesize[0] / 4);
     } else if (type == 18) {
+        s->version = 2;
         s->get_freq = get_freq;
         s->decode = decode;
         frame->key_frame = 1;
         ret = decompress_i(avctx, (uint32_t *)s->current_frame->data[0],
                            s->current_frame->linesize[0] / 4);
-    } else if (type == 17) {
+    } else if (type == 34) {
+        frame->key_frame = 1;
+        s->version = 3;
+        ret = decompress_i3(avctx, (uint32_t *)s->current_frame->data[0],
+                            s->current_frame->linesize[0] / 4);
+    } else if (type == 17 || type == 33) {
         uint32_t clr, *dst = (uint32_t *)s->current_frame->data[0];
         int x, y;
 
@@ -809,10 +553,16 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         if (ret < 0)
             return ret;
 
-        ret = decompress_p(avctx, (uint32_t *)s->current_frame->data[0],
-                           s->current_frame->linesize[0] / 4,
-                           (uint32_t *)s->last_frame->data[0],
-                           s->last_frame->linesize[0] / 4);
+        if (s->version == 1 || s->version == 2)
+            ret = decompress_p(avctx, (uint32_t *)s->current_frame->data[0],
+                               s->current_frame->linesize[0] / 4,
+                               (uint32_t *)s->last_frame->data[0],
+                               s->last_frame->linesize[0] / 4);
+        else
+            ret = decompress_p3(avctx, (uint32_t *)s->current_frame->data[0],
+                                s->current_frame->linesize[0] / 4,
+                                (uint32_t *)s->last_frame->data[0],
+                                s->last_frame->linesize[0] / 4);
         if (ret == 1)
             return avpkt->size;
     } else {
diff --git a/libavcodec/scpr.h b/libavcodec/scpr.h
new file mode 100644
index 0000000000000..15cb87c464e68
--- /dev/null
+++ b/libavcodec/scpr.h
@@ -0,0 +1,365 @@
+/*
+ * ScreenPressor decoder
+ *
+ * Copyright (c) 2017 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_SCPR_H
+#define AVCODEC_SCPR_H
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "avcodec.h"
+#include "bytestream.h"
+#include "internal.h"
+#include "scpr3.h"
+
+typedef struct RangeCoder {
+    uint32_t   code;
+    uint32_t   range;
+    uint32_t   code1;
+} RangeCoder;
+
+typedef struct PixelModel {
+    uint32_t    freq[256];
+    uint32_t    lookup[16];
+    uint32_t    total_freq;
+} PixelModel;
+
+typedef struct SCPRContext {
+    int             version;
+    AVFrame        *last_frame;
+    AVFrame        *current_frame;
+    GetByteContext  gb;
+    RangeCoder      rc;
+    PixelModel      pixel_model[3][4096];
+    uint32_t        op_model[6][7];
+    uint32_t        run_model[6][257];
+    uint32_t        range_model[257];
+    uint32_t        count_model[257];
+    uint32_t        fill_model[6];
+    uint32_t        sxy_model[4][17];
+    uint32_t        mv_model[2][513];
+    uint32_t        nbx, nby;
+    uint32_t        nbcount;
+    uint32_t       *blocks;
+    uint32_t        cbits;
+    int             cxshift;
+
+    PixelModel3     pixel_model3[3][4096];
+    RunModel3       run_model3[6];
+    RunModel3       range_model3;
+    RunModel3       count_model3;
+    FillModel3      fill_model3;
+    SxyModel3       sxy_model3[4];
+    MVModel3        mv_model3[2];
+    OpModel3        op_model3[6];
+
+    int           (*get_freq)(RangeCoder *rc, uint32_t total_freq, uint32_t *freq);
+    int           (*decode)(GetByteContext *gb, RangeCoder *rc, uint32_t cumFreq, uint32_t freq, uint32_t total_freq);
+} SCPRContext;
+
+static int decode_run_i(AVCodecContext *avctx, uint32_t ptype, int run,
+                        int *px, int *py, uint32_t clr, uint32_t *dst,
+                        int linesize, uint32_t *plx, uint32_t *ply,
+                        uint32_t backstep, int off, int *cx, int *cx1)
+{
+    uint32_t r, g, b;
+    int z;
+    int x = *px,
+        y = *py;
+    uint32_t lx = *plx,
+             ly = *ply;
+
+    if (y >= avctx->height)
+        return AVERROR_INVALIDDATA;
+
+    switch (ptype) {
+    case 0:
+        while (run-- > 0) {
+            dst[y * linesize + x] = clr;
+            lx = x;
+            ly = y;
+            (x)++;
+            if (x >= avctx->width) {
+                x = 0;
+                (y)++;
+                if (y >= avctx->height && run)
+                    return AVERROR_INVALIDDATA;
+            }
+        }
+        break;
+    case 1:
+        while (run-- > 0) {
+            dst[y * linesize + x] = dst[ly * linesize + lx];
+            lx = x;
+            ly = y;
+            (x)++;
+            if (x >= avctx->width) {
+                x = 0;
+                (y)++;
+                if (y >= avctx->height && run)
+                    return AVERROR_INVALIDDATA;
+            }
+        }
+        clr = dst[ly * linesize + lx];
+        break;
+    case 2:
+        if (y < 1)
+            return AVERROR_INVALIDDATA;
+
+        while (run-- > 0) {
+            clr = dst[y * linesize + x + off + 1];
+            dst[y * linesize + x] = clr;
+            lx = x;
+            ly = y;
+            (x)++;
+            if (x >= avctx->width) {
+                x = 0;
+                (y)++;
+                if (y >= avctx->height && run)
+                    return AVERROR_INVALIDDATA;
+            }
+        }
+        break;
+    case 4:
+        if (y < 1 || (y == 1 && x == 0))
+            return AVERROR_INVALIDDATA;
+
+        while (run-- > 0) {
+            uint8_t *odst = (uint8_t *)dst;
+            int off1 = (ly * linesize + lx) * 4;
+            int off2 = ((y * linesize + x) + off) * 4;
+
+            if (x == 0) {
+                z = backstep * 4;
+            } else {
+                z = 0;
+            }
+
+            r = odst[off1] +
+                odst[off2 + 4] -
+                odst[off2 - z ];
+            g = odst[off1 + 1] +
+                odst[off2 + 5] -
+                odst[off2 - z  + 1];
+            b = odst[off1 + 2] +
+                odst[off2 + 6] -
+                odst[off2 - z  + 2];
+            clr = ((b & 0xFF) << 16) + ((g & 0xFF) << 8) + (r & 0xFF);
+            dst[y * linesize + x] = clr;
+            lx = x;
+            ly = y;
+            (x)++;
+            if (x >= avctx->width) {
+                x = 0;
+                (y)++;
+                if (y >= avctx->height && run)
+                    return AVERROR_INVALIDDATA;
+            }
+        }
+        break;
+    case 5:
+        if (y < 1 || (y == 1 && x == 0))
+            return AVERROR_INVALIDDATA;
+
+        while (run-- > 0) {
+            if (x == 0) {
+                z = backstep;
+            } else {
+                z = 0;
+            }
+
+            clr = dst[y * linesize + x + off - z];
+            dst[y * linesize + x] = clr;
+            lx = x;
+            ly = y;
+            (x)++;
+            if (x >= avctx->width) {
+                x = 0;
+                (y)++;
+                if (y >= avctx->height && run)
+                    return AVERROR_INVALIDDATA;
+            }
+        }
+        break;
+    }
+
+    *px = x;
+    *py = y;
+    *plx= lx;
+    *ply= ly;
+
+    if (avctx->bits_per_coded_sample == 16) {
+        *cx1 = (clr & 0x3F00) >> 2;
+        *cx = (clr & 0x3FFFFF) >> 16;
+    } else {
+        *cx1 = (clr & 0xFC00) >> 4;
+        *cx = (clr & 0xFFFFFF) >> 18;
+    }
+
+    return 0;
+}
+
+static int decode_run_p(AVCodecContext *avctx, uint32_t ptype, int run,
+                        int x, int y, uint32_t clr,
+                        uint32_t *dst, uint32_t *prev,
+                        int linesize, int plinesize,
+                        uint32_t *bx, uint32_t *by,
+                        uint32_t backstep, int sx1, int sx2,
+                        int *cx, int *cx1)
+{
+    uint32_t r, g, b;
+    int z;
+
+    switch (ptype) {
+    case 0:
+        while (run-- > 0) {
+            if (*by >= avctx->height)
+                return AVERROR_INVALIDDATA;
+
+            dst[*by * linesize + *bx] = clr;
+            (*bx)++;
+            if (*bx >= x * 16 + sx2 || *bx >= avctx->width) {
+                *bx = x * 16 + sx1;
+                (*by)++;
+            }
+        }
+        break;
+    case 1:
+        while (run-- > 0) {
+            if (*bx == 0) {
+                if (*by < 1)
+                    return AVERROR_INVALIDDATA;
+                z = backstep;
+            } else {
+                z = 0;
+            }
+
+            if (*by >= avctx->height)
+                return AVERROR_INVALIDDATA;
+
+            clr = dst[*by * linesize + *bx - 1 - z];
+            dst[*by * linesize + *bx] = clr;
+            (*bx)++;
+            if (*bx >= x * 16 + sx2 || *bx >= avctx->width) {
+                *bx = x * 16 + sx1;
+                (*by)++;
+            }
+        }
+        break;
+    case 2:
+        while (run-- > 0) {
+            if (*by < 1 || *by >= avctx->height)
+                return AVERROR_INVALIDDATA;
+
+            clr = dst[(*by - 1) * linesize + *bx];
+            dst[*by * linesize + *bx] = clr;
+            (*bx)++;
+            if (*bx >= x * 16 + sx2 || *bx >= avctx->width) {
+                *bx = x * 16 + sx1;
+                (*by)++;
+            }
+        }
+        break;
+    case 3:
+        while (run-- > 0) {
+            if (*by >= avctx->height)
+                return AVERROR_INVALIDDATA;
+
+            clr = prev[*by * plinesize + *bx];
+            dst[*by * linesize + *bx] = clr;
+            (*bx)++;
+            if (*bx >= x * 16 + sx2 || *bx >= avctx->width) {
+                *bx = x * 16 + sx1;
+                (*by)++;
+            }
+        }
+        break;
+    case 4:
+        while (run-- > 0) {
+            uint8_t *odst = (uint8_t *)dst;
+
+            if (*by < 1 || *by >= avctx->height)
+                return AVERROR_INVALIDDATA;
+
+            if (*bx == 0) {
+                if (*by < 2)
+                    return AVERROR_INVALIDDATA;
+                z = backstep;
+            } else {
+                z = 0;
+            }
+
+            r = odst[((*by - 1) * linesize + *bx) * 4] +
+                odst[(*by * linesize + *bx - 1 - z) * 4] -
+                odst[((*by - 1) * linesize + *bx - 1 - z) * 4];
+            g = odst[((*by - 1) * linesize + *bx) * 4 + 1] +
+                odst[(*by * linesize + *bx - 1 - z) * 4 + 1] -
+                odst[((*by - 1) * linesize + *bx - 1 - z) * 4 + 1];
+            b = odst[((*by - 1) * linesize + *bx) * 4 + 2] +
+                odst[(*by * linesize + *bx - 1 - z) * 4 + 2] -
+                odst[((*by - 1) * linesize + *bx - 1 - z) * 4 + 2];
+            clr = ((b & 0xFF) << 16) + ((g & 0xFF) << 8) + (r & 0xFF);
+            dst[*by * linesize + *bx] = clr;
+            (*bx)++;
+            if (*bx >= x * 16 + sx2 || *bx >= avctx->width) {
+                *bx = x * 16 + sx1;
+                (*by)++;
+            }
+        }
+        break;
+    case 5:
+        while (run-- > 0) {
+            if (*by < 1 || *by >= avctx->height)
+                return AVERROR_INVALIDDATA;
+
+            if (*bx == 0) {
+                if (*by < 2)
+                    return AVERROR_INVALIDDATA;
+                z = backstep;
+            } else {
+                z = 0;
+            }
+
+            clr = dst[(*by - 1) * linesize + *bx - 1 - z];
+            dst[*by * linesize + *bx] = clr;
+            (*bx)++;
+            if (*bx >= x * 16 + sx2 || *bx >= avctx->width) {
+                *bx = x * 16 + sx1;
+                (*by)++;
+            }
+        }
+        break;
+    }
+
+    if (avctx->bits_per_coded_sample == 16) {
+        *cx1 = (clr & 0x3F00) >> 2;
+        *cx = (clr & 0x3FFFFF) >> 16;
+    } else {
+        *cx1 = (clr & 0xFC00) >> 4;
+        *cx = (clr & 0xFFFFFF) >> 18;
+    }
+
+    return 0;
+}
+
+#endif /* AVCODEC_SCPR_H */
diff --git a/libavcodec/scpr3.c b/libavcodec/scpr3.c
new file mode 100644
index 0000000000000..b59a8cc28f6aa
--- /dev/null
+++ b/libavcodec/scpr3.c
@@ -0,0 +1,1207 @@
+/*
+ * ScreenPressor version 3 decoder
+ *
+ * Copyright (c) 2017 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "libavutil/qsort.h"
+
+#include "avcodec.h"
+#include "bytestream.h"
+#include "internal.h"
+#include "scpr.h"
+
+static void renew_table3(uint32_t nsym, uint32_t *cntsum,
+                         uint16_t *freqs, uint16_t *freqs1,
+                         uint16_t *cnts, uint8_t *dectab)
+{
+    uint32_t a = 0, b = 4096 / nsym, c = b - (b >> 1);
+
+    *cntsum = c * nsym;
+
+    for (int d = 0; d < nsym; d++) {
+        freqs[d] = b;
+        freqs1[d] = a;
+        cnts[d] = c;
+        for (int q = a + 128 - 1 >> 7, f = (a + b - 1 >> 7) + 1; q < f; q++)
+            dectab[q] = d;
+
+        a += b;
+    }
+}
+
+static void reinit_tables3(SCPRContext * s)
+{
+    for (int i = 0; i < 3; i++) {
+        for (int j = 0; j < 4096; j++) {
+            PixelModel3 *m = &s->pixel_model3[i][j];
+            m->type = 0;
+        }
+    }
+
+    for (int i = 0; i < 6; i++) {
+        renew_table3(256, &s->run_model3[i].cntsum,
+                     s->run_model3[i].freqs[0], s->run_model3[i].freqs[1],
+                     s->run_model3[i].cnts, s->run_model3[i].dectab);
+    }
+
+    renew_table3(256, &s->range_model3.cntsum,
+                 s->range_model3.freqs[0], s->range_model3.freqs[1],
+                 s->range_model3.cnts, s->range_model3.dectab);
+
+    renew_table3(5, &s->fill_model3.cntsum,
+                 s->fill_model3.freqs[0], s->fill_model3.freqs[1],
+                 s->fill_model3.cnts, s->fill_model3.dectab);
+
+    renew_table3(256, &s->count_model3.cntsum,
+                 s->count_model3.freqs[0], s->count_model3.freqs[1],
+                 s->count_model3.cnts, s->count_model3.dectab);
+
+    for (int i = 0; i < 4; i++) {
+        renew_table3(16, &s->sxy_model3[i].cntsum,
+                     s->sxy_model3[i].freqs[0], s->sxy_model3[i].freqs[1],
+                     s->sxy_model3[i].cnts, s->sxy_model3[i].dectab);
+    }
+
+    for (int i = 0; i < 2; i++) {
+        renew_table3(512, &s->mv_model3[i].cntsum,
+                     s->mv_model3[i].freqs[0], s->mv_model3[i].freqs[1],
+                     s->mv_model3[i].cnts, s->mv_model3[i].dectab);
+    }
+
+    for (int i = 0; i < 6; i++) {
+        renew_table3(6, &s->op_model3[i].cntsum,
+                     s->op_model3[i].freqs[0], s->op_model3[i].freqs[1],
+                     s->op_model3[i].cnts, s->op_model3[i].dectab);
+    }
+}
+
+static int decode3(GetByteContext *gb, RangeCoder *rc, uint32_t a, uint32_t b)
+{
+    uint32_t code = a * (rc->code >> 12) + (rc->code & 0xFFF) - b;
+
+    while (code < 0x800000 && bytestream2_get_bytes_left(gb) > 0)
+        code = bytestream2_get_byteu(gb) | (code << 8);
+    rc->code = code;
+
+    return 0;
+}
+
+static void rescale(PixelModel3 *m, int *totfr)
+{
+    uint32_t a;
+
+    a = 256 - m->size;
+    for (int b = 0; b < m->size; b++) {
+        m->freqs[b] -= m->freqs[b] >> 1;
+        a += m->freqs[b];
+    }
+
+    *totfr = a;
+}
+
+static int add_symbol(PixelModel3 *m, int index, uint32_t symbol, int *totfr, int max)
+{
+    if (m->size == max)
+        return 0;
+
+    for (int c = m->size - 1; c >= index; c--) {
+        m->symbols[c + 1] = m->symbols[c];
+        m->freqs[c + 1] = m->freqs[c];
+    }
+
+    m->symbols[index] = symbol;
+    m->freqs[index] = 50;
+    m->size++;
+
+    if (m->maxpos >= index)
+        m->maxpos++;
+
+    *totfr += 50;
+    if (*totfr + 50 > 4096)
+        rescale(m, totfr);
+
+    return 1;
+}
+
+static int decode_adaptive45(PixelModel3 *m, int rccode, uint32_t *value,
+                             uint16_t *a, uint16_t *b, uint32_t *c, int max)
+{
+    uint32_t q, g, maxpos, d, e = *c, totfr = *c;
+    int ret;
+
+    for (d = 0; e <= 2048; d++)
+        e <<= 1;
+    maxpos = m->maxpos;
+    rccode >>= d;
+    *c = m->freqs[maxpos];
+    m->freqs[maxpos] += 4096 - e >> d;
+
+    for (q = 0, g = 0, e = 0; q < m->size; q++) {
+        uint32_t f = m->symbols[q];
+        uint32_t p = e + f - g;
+        uint32_t k = m->freqs[q];
+
+        if (rccode < p) {
+            *value = rccode - e + g;
+            *b = rccode << d;
+            *a = 1 << d;
+            m->freqs[maxpos] = *c;
+            ret = add_symbol(m, q, *value, &totfr, max);
+            *c = totfr;
+            return ret;
+        }
+
+        if (p + k > rccode) {
+            *value = f;
+            e += *value - g;
+            *b = e << d;
+            *a = k << d;
+            m->freqs[maxpos] = *c;
+            m->freqs[q] += 50;
+            totfr += 50;
+            if ((q != maxpos) && (m->freqs[q] > m->freqs[maxpos]))
+                m->maxpos = q;
+            if (totfr + 50 > 4096)
+                rescale(m, &totfr);
+            *c = totfr;
+            return 1;
+        }
+
+        e += f - g + k;
+        g = f + 1;
+    }
+
+    m->freqs[maxpos] = *c;
+    *value = g + rccode - e;
+    *b = rccode << d;
+    *a = 1 << d;
+    ret = add_symbol(m, q, *value, &totfr, max);
+    *c = totfr;
+    return ret;
+}
+
+static int update_model6_to_7(PixelModel3 *m)
+{
+    PixelModel3 n = {0};
+    int c, d, e, f, k, p, length, i, j, index;
+    uint16_t *freqs, *freqs1, *cnts;
+
+    n.type = 7;
+
+    length = m->length;
+    freqs = n.freqs;
+    freqs1 = n.freqs1;
+    cnts = n.cnts;
+    n.cntsum = m->cnts[length];
+    for (i = 0; i < length; i++) {
+        if (!m->cnts[i])
+            continue;
+        index = m->symbols[i];
+        freqs[index] = m->freqs[2 * i];
+        freqs1[index] = m->freqs[2 * i + 1];
+        cnts[index] = m->cnts[i];
+    }
+    c = 1 << m->fshift;
+    d = c - (c >> 1);
+    for (j = 0, e = 0; j < 256; j++) {
+        f = freqs[j];
+        if (!f) {
+            f = c;
+            freqs[j] = c;
+            freqs1[j] = e;
+            cnts[j] = d;
+        }
+        p = (e + 127) >> 7;
+        k = ((f + e - 1) >> 7) + 1;
+        for (i = 0; i < k - p; i++)
+            n.dectab[p + i] = j;
+        e += f;
+    }
+
+    memcpy(m, &n, sizeof(n));
+
+    return 0;
+}
+
+static void calc_sum(PixelModel3 *m)
+{
+    uint32_t a;
+    int len;
+
+    len = m->length;
+    a = 256 - m->size << (m->fshift > 0 ? m->fshift - 1 : 0);
+    for (int c = 0; c < len; c++)
+        a += m->cnts[c];
+    m->cnts[len] = a;
+}
+
+static void rescale_dec(PixelModel3 *m)
+{
+    uint16_t cnts[256] = {0};
+    uint16_t freqs[512] = {0};
+    int b, c, e, g;
+    uint32_t a;
+
+    for (a = 1 << (0 < m->fshift ? m->fshift - 1 : 0), b = 0; b < 256; b++)
+        cnts[b] = a;
+
+    for (a = 0, b = m->size; a < b; a++)
+        cnts[m->symbols[a]] = m->cnts[a];
+
+    for (b = a = 0; b < 256; b++) {
+        freqs[2 * b] = cnts[b];
+        freqs[2 * b + 1] = a;
+        a += cnts[b];
+    }
+
+    if (m->fshift > 0)
+        m->fshift--;
+
+    a = 256 - m->size << (0 < m->fshift ? m->fshift - 1 : 0);
+    for (b = 0, c = m->size; b < c; b++) {
+        m->cnts[b] -= m->cnts[b] >> 1;
+        a = a + m->cnts[b];
+        e = m->symbols[b];
+        g = freqs[2 * e + 1];
+        m->freqs[2 * b] = freqs[2 * e];
+        m->freqs[2 * b + 1] = g;
+    }
+    m->cnts[m->length] = a;
+}
+
+static int update_model5_to_6(PixelModel3 *m, uint8_t value)
+{
+    PixelModel3 n = {0};
+    int c, d, e, f, g, k, q, p;
+
+    n.type = 6;
+    n.length = 32;
+
+    for (c = m->size, d = 256 - c, e = 0; e < c; e++)
+        d = d + m->freqs[e];
+
+    for (e = 0; d <= 2048; e++)
+        d <<= 1;
+
+    for (q = d = 0, g = q = 0; g < c; g++) {
+        p = m->symbols[g];
+        d = d + (p - q);
+        q = m->freqs[g];
+        k = q << e;
+        n.freqs[2 * g] = k;
+        n.freqs[2 * g + 1] = d << e;
+        n.cnts[g] = k - (k >> 1);
+        n.symbols[g] = p;
+        d += q;
+        q = p + 1;
+    }
+
+    n.fshift = e;
+    e = 1 << n.fshift;
+    d = 0;
+    if (value > 0) {
+        d = -1;
+        for (p = f = g = 0; p < c; p++) {
+            k = n.symbols[p];
+            if (k > d && k < value) {
+                d = k;
+                g = n.freqs[2 * p];
+                f = n.freqs[2 * p + 1];
+            }
+        }
+        d = 0 < g ? f + g + (value - d - 1 << n.fshift) : value << n.fshift;
+    }
+    n.freqs[2 * c] = e;
+    n.freqs[2 * c + 1] = d;
+    n.cnts[c] = e - (e >> 1);
+    n.symbols[c] = value;
+    n.size = c + 1;
+    e = 25 << n.fshift;
+    n.cnts[c] += e;
+    n.cnts[32] += e;
+    if (n.cnts[32] + e > 4096)
+        rescale_dec(&n);
+
+    calc_sum(&n);
+    for (c = 0, e = n.size - 1; c < e; c++) {
+        for (g = c + 1, f = n.size; g < f; g++) {
+            if (q = n.freqs[2 * g], k = n.freqs[2 * c], q > k) {
+                int l = n.freqs[2 * c + 1];
+                int h = n.freqs[2 * g + 1];
+                n.freqs[2 * c] = q;
+                n.freqs[2 * c + 1] = h;
+                n.freqs[2 * g] = k;
+                n.freqs[2 * g + 1] = l;
+                FFSWAP(uint16_t, n.cnts[c], n.cnts[g]);
+                FFSWAP(uint8_t, n.symbols[c], n.symbols[g]);
+            }
+        }
+    }
+
+    memcpy(m, &n, sizeof(n));
+
+    return 0;
+}
+
+static void grow_dec(PixelModel3 *m)
+{
+    int a;
+
+    a = 2 * m->length;
+    m->cnts[2 * m->length] = m->cnts[m->length];
+    m->length = a;
+}
+
+static int add_dec(PixelModel3 *m, int sym, int f1, int f2)
+{
+    int size;
+
+    if (m->size >= 40 || m->size >= m->length)
+        return -1;
+
+    size = m->size;
+    m->symbols[size] = sym;
+    m->freqs[2 * size] = f1;
+    m->freqs[2 * size + 1] = f2;
+    m->cnts[size] = f1 - (f1 >> 1);
+    m->size++;
+
+    return size;
+}
+
+static void incr_cntdec(PixelModel3 *m, int a)
+{
+    int b, len, d, e, g;
+
+    b = 25 << m->fshift;
+    len = m->length;
+    m->cnts[a] += b;
+    m->cnts[len] += b;
+    if (a > 0 && m->cnts[a] > m->cnts[a - 1]) {
+        FFSWAP(uint16_t, m->cnts[a], m->cnts[a - 1]);
+        d = m->freqs[2 * a];
+        e = m->freqs[2 * a + 1];
+        g = m->freqs[2 * (a - 1) + 1];
+        m->freqs[2 * a] = m->freqs[2 * (a - 1)];
+        m->freqs[2 * a + 1] = g;
+        g = a - 1;
+        m->freqs[2 * g] = d;
+        m->freqs[2 * g + 1] = e;
+        FFSWAP(uint8_t, m->symbols[a], m->symbols[a - 1]);
+    }
+
+    if (m->cnts[len] + b > 4096)
+        rescale_dec(m);
+}
+
+static int decode_adaptive6(PixelModel3 *m, uint32_t code, uint32_t *value,
+                            uint16_t *a, uint16_t *b)
+{
+    int c, d, e, f, g, q;
+
+    for (c = 0, d = 0, e = 0, f = 0, g = 0, q = m->size; g < q; g++) {
+        uint32_t p = m->freqs[2 * g + 1];
+
+        if (p <= code) {
+            uint32_t k = m->freqs[2 * g];
+
+            if (p + k > code) {
+                *value = m->symbols[g];
+                *a = k;
+                *b = p;
+                incr_cntdec(m, g);
+                return 1;
+            }
+
+            if (p >= d) {
+                c = k;
+                d = p;
+                e = m->symbols[g];
+            }
+        }
+    }
+
+    g = 1 << m->fshift;
+    q = f = 0;
+
+    if (c > 0) {
+        f = code - (d + c) >> m->fshift;
+        q = f + e + 1;
+        f = d + c + (f << m->fshift);
+    } else {
+        q = code >> m->fshift;
+        f = q << m->fshift;
+    }
+
+    *a = g;
+    *b = f;
+    *value = q;
+
+    c = add_dec(m, q, g, f);
+    if (c < 0) {
+        if (m->length == 64)
+            return 0;
+        grow_dec(m);
+        c = add_dec(m, q, g, f);
+    }
+
+    incr_cntdec(m, c);
+    return 1;
+}
+
+static int cmpbytes(const void *p1, const void *p2)
+{
+    int left  = *(const uint8_t *)p1;
+    int right = *(const uint8_t *)p2;
+    return FFDIFFSIGN(left, right);
+}
+
+static int update_model1_to_2(PixelModel3 *m, uint32_t val)
+{
+    PixelModel3 n = {0};
+    int i, b;
+
+    n.type = 2;
+    n.size = m->size + 1;
+    b = m->size;
+    for (i = 0; i < b; i++)
+        n.symbols[i] = m->symbols[i];
+    n.symbols[b] = val;
+
+    memcpy(m, &n, sizeof(n));
+
+    return 0;
+}
+
+static int update_model1_to_4(PixelModel3 *m, uint32_t val)
+{
+    PixelModel3 n = {0};
+    int size, i;
+
+    size = m->size;
+    n.type = 4;
+    n.size = size;
+    for (i = 0; i < n.size; i++) {
+        n.symbols[i] = m->symbols[i];
+    }
+    AV_QSORT(n.symbols, size, uint8_t, cmpbytes);
+    for (i = 0; i < n.size; i++) {
+        if (val == n.symbols[i]) {
+            n.freqs[i] = 100;
+            n.maxpos = i;
+        } else {
+            n.freqs[i] = 50;
+        }
+    }
+
+    memcpy(m, &n, sizeof(n));
+
+    return 0;
+}
+
+static int update_model1_to_5(PixelModel3 *m, uint32_t val)
+{
+    PixelModel3 n = {0};
+    int i, size, freqs;
+    uint32_t a;
+
+    size = m->size;
+    n.size = size;
+    for (i = 0; i < size; i++) {
+        n.symbols[i] = m->symbols[i];
+    }
+    AV_QSORT(n.symbols, size, uint8_t, cmpbytes);
+    size = n.size;
+    for (i = 0; i < size; i++) {
+        if (val == n.symbols[i]) {
+            n.freqs[i] = 100;
+            n.maxpos = i;
+        } else {
+            n.freqs[i] = 50;
+        }
+    }
+    a = 256 - size;
+    for (i = 0; i < size; i++, a += freqs)
+        freqs = n.freqs[i];
+    n.type = 5;
+    n.cntsum = a;
+
+    memcpy(m, &n, sizeof(n));
+
+    return 0;
+}
+
+static int decode_static1(PixelModel3 *m, uint32_t val)
+{
+    uint32_t size;
+
+    size = m->size;
+    for (int i = 0; i < size; i++) {
+        if (val == m->symbols[i]) {
+            if (size <= 4)
+                return update_model1_to_4(m, val);
+            else
+                return update_model1_to_5(m, val);
+        }
+    }
+
+    if (size >= 14)
+        return update_model1_to_2(m, val);
+
+    m->symbols[size] = val;
+    m->size++;
+    return 0;
+}
+
+static int update_model2_to_6(PixelModel3 *m, uint8_t value, int a4)
+{
+    PixelModel3 n = {0};
+    int c, d, e, f, g, q;
+
+    n.type = 6;
+    n.length = a4;
+
+    memset(n.symbols, 1u, a4);
+
+    c = m->size;
+    d = 256 - c + (64 * c + 64);
+    for (e = 0; d <= 2048; e++) {
+        d <<= 1;
+    }
+
+    g = q = 0;
+    AV_QSORT(m->symbols, c, uint8_t, cmpbytes);
+    for (f = d = 0; f < c; f++) {
+        int p = f;
+        int k = m->symbols[p];
+        int l;
+        g = g + (k - q);
+
+        if (k == value) {
+            d = p;
+            q = 128;
+        } else {
+            q = 64;
+        }
+        l = q << e;
+        n.freqs[2 * p] = l;
+        n.freqs[2 * p + 1] = g << e;
+        n.symbols[p] = k;
+        n.cnts[p] = l - (l >> 1);
+        g += q;
+        q = k + 1;
+    }
+    n.size = c;
+    n.fshift = e;
+    calc_sum(&n);
+
+    if (d > 0) {
+        c = n.freqs[0];
+        e = n.freqs[1];
+        g = n.freqs[2 * d + 1];
+        n.freqs[0] = n.freqs[2 * d];
+        n.freqs[1] = g;
+        n.freqs[2 * d] = c;
+        n.freqs[2 * d + 1] = e;
+        FFSWAP(uint16_t, n.cnts[0], n.cnts[d]);
+        FFSWAP(uint8_t, n.symbols[0], n.symbols[d]);
+    }
+
+    memcpy(m, &n, sizeof(n));
+
+    return 0;
+}
+
+static int update_model2_to_3(PixelModel3 *m, uint32_t val)
+{
+    PixelModel3 n = {0};
+    uint32_t size;
+
+    n.type = 3;
+    n.size = m->size + 1;
+
+    size = m->size;
+    for (int i = 0; i < size; i++)
+        n.symbols[i] = m->symbols[i];
+    n.symbols[size] = val;
+
+    memcpy(m, &n, sizeof(n));
+
+    return 0;
+}
+
+static int decode_static2(PixelModel3 *m, uint32_t val)
+{
+    uint32_t size;
+
+    size = m->size;
+    for (int i = 0; i < size; i++) {
+        if (val == m->symbols[i]) {
+            int a;
+
+            if (m->size <= 32)
+                a = 32;
+            else
+                a = 64;
+            return update_model2_to_6(m, val, a);
+        }
+    }
+
+    if (size >= 64)
+        return update_model2_to_3(m, val);
+
+    m->symbols[size] = val;
+    m->size++;
+
+    return 0;
+}
+
+static int update_model3_to_7(PixelModel3 *m, uint8_t value)
+{
+    PixelModel3 n = {0};
+    int c, d, e, f, g, q;
+
+    n.type = 7;
+
+    for (c = 0; c < 256; c++) {
+        d = c;
+        n.freqs[d] = 1;
+        n.cnts[d] = 1;
+    }
+
+    for (c = m->size, d = (4096 - (256 - c)) / (c + 1) | 0, e = d - (d >> 1), g = 0; g < c;) {
+        q = g++;
+        q = m->symbols[q];
+        n.freqs[q] = d;
+        n.cnts[q] = e;
+    }
+    n.freqs[value] += d;
+    n.cnts[value] += 16;
+    for (d = c = n.cntsum = 0; 256 > d; d++) {
+        e = d;
+        n.cntsum += n.cnts[e];
+        n.freqs1[e] = c;
+        for (g = n.freqs[e], q = c + 128 - 1 >> 7, f = (c + g - 1 >> 7) + 1; q < f; q++) {
+            n.dectab[q] = e;
+        }
+        c += g;
+    }
+
+    memcpy(m, &n, sizeof(n));
+
+    return 0;
+}
+
+static int decode_static3(PixelModel3 *m, uint32_t val)
+{
+    uint32_t size = m->size;
+
+    for (int i = 0; i < size; i++) {
+        if (val == m->symbols[i])
+            return update_model3_to_7(m, val);
+    }
+
+    if (size >= 256)
+        return 0;
+
+    m->symbols[size] = val;
+    m->size++;
+    return 0;
+}
+
+static void sync_code3(GetByteContext *gb, RangeCoder *rc)
+{
+    rc->code1++;
+    if (rc->code1 == 0x20000) {
+        rc->code = bytestream2_get_le32(gb);
+        rc->code1 = 0;
+    }
+}
+
+static int decode_value3(SCPRContext *s, uint32_t max, uint32_t *cntsum,
+                         uint16_t *freqs1, uint16_t *freqs2,
+                         uint16_t *cnts, uint8_t *dectable,
+                         uint32_t *value)
+{
+    GetByteContext *gb = &s->gb;
+    RangeCoder *rc = &s->rc;
+    uint32_t r, y, a, b, e, g, q;
+
+    r = dectable[(rc->code & 0xFFFu) >> 7];
+    if (r < max) {
+        while (freqs2[r + 1] <= (rc->code & 0xFFF)) {
+            if (++r >= max)
+                break;
+        }
+    }
+
+    if (r > max)
+        return AVERROR_INVALIDDATA;
+
+    cnts[r] += 16;
+    a = freqs1[r];
+    b = freqs2[r];
+    *cntsum += 16;
+    if (*cntsum + 16 > 4096) {
+        *cntsum = 0;
+        for (int c = 0, i = 0; i < max + 1; i++) {
+            e = cnts[i];
+            freqs2[i] = c;
+            freqs1[i] = e;
+            g = (c + 127) >> 7;
+            c += e;
+            q = ((c - 1) >> 7) + 1;
+            if (q > g) {
+                for (int j = 0; j < q - g; j++)
+                    dectable[j + g] = i;
+            }
+            y = e - (e >> 1);
+            cnts[i] = y;
+            *cntsum += y;
+        }
+    }
+
+    decode3(gb, rc, a, b);
+    sync_code3(gb, rc);
+
+    *value = r;
+
+    return 0;
+}
+
+static void calc_sum5(PixelModel3 *m)
+{
+    uint32_t a;
+
+    a = 256 - m->size;
+    for (int b = 0; b < m->size; b++)
+        a += m->freqs[b];
+    m->cntsum = a;
+}
+
+static int update_model4_to_5(PixelModel3 *m, uint32_t value)
+{
+    PixelModel3 n = {0};
+    int c, e, g, totfr;
+
+    n.type = 5;
+
+    for (c = 0, e = 0; c < m->size && m->symbols[c] < value; c++) {
+        n.symbols[c] = m->symbols[c];
+        e += n.freqs[c] = m->freqs[c];
+    }
+
+    g = c;
+    n.symbols[g] = value;
+    e += n.freqs[g++] = 50;
+    for (; c < m->size; g++, c++) {
+        n.symbols[g] = m->symbols[c];
+        e += n.freqs[g] = m->freqs[c];
+    }
+    n.size = m->size + 1;
+    if (e > 4096)
+        rescale(&n, &totfr);
+
+    calc_sum5(&n);
+
+    memcpy(m, &n, sizeof(n));
+
+    return 0;
+}
+
+static int decode_unit3(SCPRContext *s, PixelModel3 *m, uint32_t code, uint32_t *value)
+{
+    GetByteContext *gb = &s->gb;
+    RangeCoder *rc = &s->rc;
+    uint16_t a = 0, b = 0;
+    uint32_t param;
+    int type;
+
+    type = m->type;
+    switch (type) {
+    case 0:
+        *value = bytestream2_get_byte(&s->gb);
+        m->type = 1;
+        m->size = 1;
+        m->symbols[0] = *value;
+        sync_code3(gb, rc);
+        break;
+    case 1:
+        *value = bytestream2_get_byte(&s->gb);
+        decode_static1(m, *value);
+        sync_code3(gb, rc);
+        break;
+    case 2:
+        *value = bytestream2_get_byte(&s->gb);
+        decode_static2(m, *value);
+        sync_code3(gb, rc);
+        break;
+    case 3:
+        *value = bytestream2_get_byte(&s->gb);
+        decode_static3(m, *value);
+        sync_code3(gb, rc);
+        break;
+    case 4:
+        param = m->freqs[0] + m->freqs[1] + m->freqs[2] + m->freqs[3] + 256 - m->size;
+        if (!decode_adaptive45(m, code, value, &a, &b, &param, 4))
+            update_model4_to_5(m, *value);
+        decode3(gb, rc, a, b);
+        sync_code3(gb, rc);
+        break;
+    case 5:
+        if (!decode_adaptive45(m, code, value, &a, &b, &m->cntsum, 16))
+            update_model5_to_6(m, *value);
+        decode3(gb, rc, a, b);
+        sync_code3(gb, rc);
+        break;
+    case 6:
+        if (!decode_adaptive6(m, code, value, &a, &b)) {
+            update_model6_to_7(m);
+        }
+        decode3(gb, rc, a, b);
+        sync_code3(gb, rc);
+        break;
+    case 7:
+        return decode_value3(s, 255, &m->cntsum,
+                             m->freqs, m->freqs1,
+                             m->cnts, m->dectab, value);
+    }
+
+    if (*value > 255)
+        return AVERROR_INVALIDDATA;
+
+    return 0;
+}
+
+static int decode_units3(SCPRContext * s, uint32_t *red,
+                         uint32_t *green, uint32_t *blue,
+                         int *cx, int *cx1)
+{
+    RangeCoder *rc = &s->rc;
+    int ret;
+
+    ret = decode_unit3(s, &s->pixel_model3[0][*cx + *cx1], rc->code & 0xFFF, red);
+    if (ret < 0)
+        return ret;
+
+    *cx1 = (*cx << 6) & 0xFC0;
+    *cx = *red >> 2;
+
+    ret = decode_unit3(s, &s->pixel_model3[1][*cx + *cx1], rc->code & 0xFFF, green);
+    if (ret < 0)
+        return ret;
+
+    *cx1 = (*cx << 6) & 0xFC0;
+    *cx = *green >> 2;
+
+    ret = decode_unit3(s, &s->pixel_model3[2][*cx + *cx1], rc->code & 0xFFF, blue);
+    if (ret < 0)
+        return ret;
+
+    *cx1 = (*cx << 6) & 0xFC0;
+    *cx = *blue >> 2;
+
+    return 0;
+}
+
+static void init_rangecoder3(RangeCoder *rc, GetByteContext *gb)
+{
+    rc->code  = bytestream2_get_le32(gb);
+    rc->code1 = 0;
+}
+
+static int decompress_i3(AVCodecContext *avctx, uint32_t *dst, int linesize)
+{
+    SCPRContext *s = avctx->priv_data;
+    GetByteContext *gb = &s->gb;
+    RangeCoder *rc = &s->rc;
+    int cx = 0, cx1 = 0, k = 0;
+    int run, off, y = 0, x = 0, ret;
+    uint32_t backstep = linesize - avctx->width;
+    uint32_t clr = 0, lx, ly, ptype, r, g, b;
+
+    bytestream2_skip(gb, 1);
+    init_rangecoder3(rc, gb);
+    reinit_tables3(s);
+
+    while (k < avctx->width + 1) {
+        ret = decode_units3(s, &r, &g, &b, &cx, &cx1);
+        if (ret < 0)
+            return ret;
+        ret = decode_value3(s, 255, &s->run_model3[0].cntsum,
+                            s->run_model3[0].freqs[0],
+                            s->run_model3[0].freqs[1],
+                            s->run_model3[0].cnts,
+                            s->run_model3[0].dectab, &run);
+        if (ret < 0)
+            return ret;
+        if (run <= 0)
+            return AVERROR_INVALIDDATA;
+
+        clr = (b << 16) + (g << 8) + r;
+        k += run;
+        while (run-- > 0) {
+            if (y >= avctx->height)
+                return AVERROR_INVALIDDATA;
+
+            dst[y * linesize + x] = clr;
+            lx = x;
+            ly = y;
+            x++;
+            if (x >= avctx->width) {
+                x = 0;
+                y++;
+            }
+        }
+    }
+    off = -linesize - 1;
+    ptype = 0;
+
+    while (x < avctx->width && y < avctx->height) {
+        ret = decode_value3(s, 5, &s->op_model3[ptype].cntsum,
+                            s->op_model3[ptype].freqs[0],
+                            s->op_model3[ptype].freqs[1],
+                            s->op_model3[ptype].cnts,
+                            s->op_model3[ptype].dectab, &ptype);
+        if (ret < 0)
+            return ret;
+        if (ptype == 0) {
+            ret = decode_units3(s, &r, &g, &b, &cx, &cx1);
+            if (ret < 0)
+                return ret;
+            clr = (b << 16) + (g << 8) + r;
+        }
+        if (ptype > 5)
+            return AVERROR_INVALIDDATA;
+        ret = decode_value3(s, 255, &s->run_model3[ptype].cntsum,
+                            s->run_model3[ptype].freqs[0],
+                            s->run_model3[ptype].freqs[1],
+                            s->run_model3[ptype].cnts,
+                            s->run_model3[ptype].dectab, &run);
+        if (ret < 0)
+            return ret;
+        if (run <= 0)
+            return AVERROR_INVALIDDATA;
+
+        ret = decode_run_i(avctx, ptype, run, &x, &y, clr,
+                           dst, linesize, &lx, &ly,
+                           backstep, off, &cx, &cx1);
+        if (run < 0)
+            return ret;
+    }
+
+    return 0;
+}
+
+static int decompress_p3(AVCodecContext *avctx,
+                         uint32_t *dst, int linesize,
+                         uint32_t *prev, int plinesize)
+{
+    SCPRContext *s = avctx->priv_data;
+    GetByteContext *gb = &s->gb;
+    int ret, temp, min, max, x, y, cx = 0, cx1 = 0;
+    int backstep = linesize - avctx->width;
+    int mvx = 0, mvy = 0;
+
+    if (bytestream2_get_byte(gb) == 0)
+        return 1;
+    init_rangecoder3(&s->rc, gb);
+
+    ret  = decode_value3(s, 255, &s->range_model3.cntsum,
+                         s->range_model3.freqs[0],
+                         s->range_model3.freqs[1],
+                         s->range_model3.cnts,
+                         s->range_model3.dectab, &min);
+    ret |= decode_value3(s, 255, &s->range_model3.cntsum,
+                         s->range_model3.freqs[0],
+                         s->range_model3.freqs[1],
+                         s->range_model3.cnts,
+                         s->range_model3.dectab, &temp);
+    min += temp << 8;
+    ret |= decode_value3(s, 255, &s->range_model3.cntsum,
+                         s->range_model3.freqs[0],
+                         s->range_model3.freqs[1],
+                         s->range_model3.cnts,
+                         s->range_model3.dectab, &max);
+    ret |= decode_value3(s, 255, &s->range_model3.cntsum,
+                         s->range_model3.freqs[0],
+                         s->range_model3.freqs[1],
+                         s->range_model3.cnts,
+                         s->range_model3.dectab, &temp);
+    if (ret < 0)
+        return ret;
+
+    max += temp << 8;
+    if (min > max || min >= s->nbcount)
+        return AVERROR_INVALIDDATA;
+
+    memset(s->blocks, 0, sizeof(*s->blocks) * s->nbcount);
+
+    while (min <= max) {
+        int fill, count;
+
+        ret  = decode_value3(s, 4, &s->fill_model3.cntsum,
+                             s->fill_model3.freqs[0],
+                             s->fill_model3.freqs[1],
+                             s->fill_model3.cnts,
+                             s->fill_model3.dectab, &fill);
+        ret |= decode_value3(s, 255, &s->count_model3.cntsum,
+                             s->count_model3.freqs[0],
+                             s->count_model3.freqs[1],
+                             s->count_model3.cnts,
+                             s->count_model3.dectab, &count);
+        if (ret < 0)
+            return ret;
+        if (count <= 0)
+            return AVERROR_INVALIDDATA;
+
+        while (min < s->nbcount && count-- > 0) {
+            s->blocks[min++] = fill;
+        }
+    }
+
+    for (y = 0; y < s->nby; y++) {
+        for (x = 0; x < s->nbx; x++) {
+            int sy1 = 0, sy2 = 16, sx1 = 0, sx2 = 16;
+
+            if (s->blocks[y * s->nbx + x] == 0)
+                continue;
+
+            if (((s->blocks[y * s->nbx + x] + 1) & 1) > 0) {
+                ret  = decode_value3(s, 15, &s->sxy_model3[0].cntsum,
+                                     s->sxy_model3[0].freqs[0],
+                                     s->sxy_model3[0].freqs[1],
+                                     s->sxy_model3[0].cnts,
+                                     s->sxy_model3[0].dectab, &sx1);
+                ret |= decode_value3(s, 15, &s->sxy_model3[1].cntsum,
+                                     s->sxy_model3[1].freqs[0],
+                                     s->sxy_model3[1].freqs[1],
+                                     s->sxy_model3[1].cnts,
+                                     s->sxy_model3[1].dectab, &sy1);
+                ret |= decode_value3(s, 15, &s->sxy_model3[2].cntsum,
+                                     s->sxy_model3[2].freqs[0],
+                                     s->sxy_model3[2].freqs[1],
+                                     s->sxy_model3[2].cnts,
+                                     s->sxy_model3[2].dectab, &sx2);
+                ret |= decode_value3(s, 15, &s->sxy_model3[3].cntsum,
+                                     s->sxy_model3[3].freqs[0],
+                                     s->sxy_model3[3].freqs[1],
+                                     s->sxy_model3[3].cnts,
+                                     s->sxy_model3[3].dectab, &sy2);
+                if (ret < 0)
+                    return ret;
+
+                sx2++;
+                sy2++;
+            }
+            if (((s->blocks[y * s->nbx + x] + 3) & 2) > 0) {
+                int i, a, b, c, j, by = y * 16, bx = x * 16;
+                uint32_t code;
+
+                a = s->rc.code & 0xFFF;
+                c = 1;
+
+                if (a < 0x800)
+                    c = 0;
+                b = 2048;
+                if (!c)
+                    b = 0;
+
+                code = a + ((s->rc.code >> 1) & 0xFFFFF800) - b;
+                while (code < 0x800000 && bytestream2_get_bytes_left(gb) > 0)
+                    code = bytestream2_get_byteu(gb) | (code << 8);
+                s->rc.code = code;
+
+                sync_code3(gb, &s->rc);
+
+                if (!c) {
+                    ret  = decode_value3(s, 511, &s->mv_model3[0].cntsum,
+                                         s->mv_model3[0].freqs[0],
+                                         s->mv_model3[0].freqs[1],
+                                         s->mv_model3[0].cnts,
+                                         s->mv_model3[0].dectab, &mvx);
+                    ret |= decode_value3(s, 511, &s->mv_model3[1].cntsum,
+                                         s->mv_model3[1].freqs[0],
+                                         s->mv_model3[1].freqs[1],
+                                         s->mv_model3[1].cnts,
+                                         s->mv_model3[1].dectab, &mvy);
+                    if (ret < 0)
+                        return ret;
+
+                    mvx -= 256;
+                    mvy -= 256;
+                }
+
+                if (by + mvy + sy1 < 0 || bx + mvx + sx1 < 0 ||
+                    by + mvy + sy1 >= avctx->height || bx + mvx + sx1 >= avctx->width)
+                    return AVERROR_INVALIDDATA;
+
+                for (i = 0; i < sy2 - sy1 && (by + sy1 + i) < avctx->height && (by + mvy + sy1 + i) < avctx->height; i++) {
+                    for (j = 0; j < sx2 - sx1 && (bx + sx1 + j) < avctx->width && (bx + mvx + sx1 + j) < avctx->width; j++) {
+                        dst[(by + i + sy1) * linesize + bx + sx1 + j] = prev[(by + mvy + sy1 + i) * plinesize + bx + sx1 + mvx + j];
+                    }
+                }
+            } else {
+                int run, bx = x * 16 + sx1, by = y * 16 + sy1;
+                uint32_t clr, ptype = 0, r, g, b;
+
+                for (; by < y * 16 + sy2 && by < avctx->height;) {
+                    ret = decode_value3(s, 5, &s->op_model3[ptype].cntsum,
+                                        s->op_model3[ptype].freqs[0],
+                                        s->op_model3[ptype].freqs[1],
+                                        s->op_model3[ptype].cnts,
+                                        s->op_model3[ptype].dectab, &ptype);
+                    if (ret < 0)
+                        return ret;
+                    if (ptype == 0) {
+                        ret = decode_units3(s, &r, &g, &b, &cx, &cx1);
+                        if (ret < 0)
+                            return ret;
+
+                        clr = (b << 16) + (g << 8) + r;
+                    }
+                    if (ptype > 5)
+                        return AVERROR_INVALIDDATA;
+                    ret = decode_value3(s, 255, &s->run_model3[ptype].cntsum,
+                                        s->run_model3[ptype].freqs[0],
+                                        s->run_model3[ptype].freqs[1],
+                                        s->run_model3[ptype].cnts,
+                                        s->run_model3[ptype].dectab, &run);
+                    if (ret < 0)
+                        return ret;
+                    if (run <= 0)
+                        return AVERROR_INVALIDDATA;
+
+                    ret = decode_run_p(avctx, ptype, run, x, y, clr,
+                                       dst, prev, linesize, plinesize, &bx, &by,
+                                       backstep, sx1, sx2, &cx, &cx1);
+                    if (ret < 0)
+                        return ret;
+                }
+            }
+        }
+    }
+
+    return 0;
+}
diff --git a/libavcodec/scpr3.h b/libavcodec/scpr3.h
new file mode 100644
index 0000000000000..92ad968882e11
--- /dev/null
+++ b/libavcodec/scpr3.h
@@ -0,0 +1,82 @@
+/*
+ * ScreenPressor version 3 decoder
+ *
+ * Copyright (c) 2017 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_SCPR3_H
+#define AVCODEC_SCPR3_H
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "avcodec.h"
+#include "internal.h"
+
+typedef struct PixelModel3 {
+    uint8_t    type;
+    uint8_t    length;
+    uint8_t    maxpos;
+    uint8_t    fshift;
+    uint16_t   size;
+    uint32_t   cntsum;
+    uint8_t    symbols[256];
+    uint16_t   freqs[256];
+    uint16_t   freqs1[256];
+    uint16_t   cnts[256];
+    uint8_t    dectab[32];
+} PixelModel3;
+
+typedef struct FillModel3 {
+    uint32_t   cntsum;
+    uint16_t   freqs[2][5];
+    uint16_t   cnts[5];
+    uint8_t    dectab[32];
+} FillModel3;
+
+typedef struct OpModel3 {
+    uint32_t   cntsum;
+    uint16_t   freqs[2][6];
+    uint16_t   cnts[6];
+    uint8_t    dectab[32];
+} OpModel3;
+
+typedef struct RunModel3 {
+    uint32_t   cntsum;
+    uint16_t   freqs[2][256];
+    uint16_t   cnts[256];
+    uint8_t    dectab[32];
+} RunModel3;
+
+typedef struct SxyModel3 {
+    uint32_t   cntsum;
+    uint16_t   freqs[2][16];
+    uint16_t   cnts[16];
+    uint8_t    dectab[32];
+} SxyModel3;
+
+typedef struct MVModel3 {
+    uint32_t   cntsum;
+    uint16_t   freqs[2][512];
+    uint16_t   cnts[512];
+    uint8_t    dectab[32];
+} MVModel3;
+
+#endif /* AVCODEC_SCPR3_H */
diff --git a/libavcodec/shorten.c b/libavcodec/shorten.c
index 4b45e6d6dc73a..4134af74cfe6d 100644
--- a/libavcodec/shorten.c
+++ b/libavcodec/shorten.c
@@ -382,7 +382,7 @@ static int decode_subframe_lpc(ShortenContext *s, int command, int channel,
     /* subtract offset from previous samples to use in prediction */
     if (command == FN_QLPC && coffset)
         for (i = -pred_order; i < 0; i++)
-            s->decoded[channel][i] -= coffset;
+            s->decoded[channel][i] -= (unsigned)coffset;
 
     /* decode residual and do LPC prediction */
     init_sum = pred_order ? (command == FN_QLPC ? s->lpcqoffset : 0) : coffset;
@@ -397,7 +397,7 @@ static int decode_subframe_lpc(ShortenContext *s, int command, int channel,
     /* add offset to current samples */
     if (command == FN_QLPC && coffset)
         for (i = 0; i < s->blocksize; i++)
-            s->decoded[channel][i] += coffset;
+            s->decoded[channel][i] += (unsigned)coffset;
 
     return 0;
 }
diff --git a/libavcodec/simple_idct.c b/libavcodec/simple_idct.c
index 78b29c0fe3e03..3b2e73653819e 100644
--- a/libavcodec/simple_idct.c
+++ b/libavcodec/simple_idct.c
@@ -236,7 +236,7 @@ void ff_simple_idct44_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
     }
 }
 
-void ff_prores_idct(int16_t *block, const int16_t *qmat)
+void ff_prores_idct_10(int16_t *block, const int16_t *qmat)
 {
     int i;
 
@@ -251,3 +251,19 @@ void ff_prores_idct(int16_t *block, const int16_t *qmat)
         idctSparseCol_extrashift_10(block + i);
     }
 }
+
+void ff_prores_idct_12(int16_t *block, const int16_t *qmat)
+{
+    int i;
+
+    for (i = 0; i < 64; i++)
+        block[i] *= qmat[i];
+
+    for (i = 0; i < 8; i++)
+        idctRowCondDC_int16_12bit(block + i*8, 0);
+
+    for (i = 0; i < 8; i++) {
+        block[i] += 8192;
+        idctSparseCol_int16_12bit(block + i);
+    }
+}
diff --git a/libavcodec/simple_idct.h b/libavcodec/simple_idct.h
index 39df2308caa5a..20578b33477b1 100644
--- a/libavcodec/simple_idct.h
+++ b/libavcodec/simple_idct.h
@@ -52,7 +52,8 @@ void ff_simple_idct_int16_12bit(int16_t *block);
  * and scales by a factor of 2 more between the two IDCTs to account
  * for larger scale of input coefficients.
  */
-void ff_prores_idct(int16_t *block, const int16_t *qmat);
+void ff_prores_idct_10(int16_t *block, const int16_t *qmat);
+void ff_prores_idct_12(int16_t *block, const int16_t *qmat);
 
 void ff_simple_idct248_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
 
diff --git a/libavcodec/sinewin.h b/libavcodec/sinewin.h
index 6b97a7185f5fe..329e9bb5be6fc 100644
--- a/libavcodec/sinewin.h
+++ b/libavcodec/sinewin.h
@@ -38,6 +38,9 @@
 #define SINETABLE(size) \
     SINETABLE_CONST DECLARE_ALIGNED(32, INTFLOAT, AAC_RENAME(ff_sine_##size))[size]
 
+#define SINETABLE120960(size) \
+    DECLARE_ALIGNED(32, INTFLOAT, AAC_RENAME(ff_sine_##size))[size]
+
 /**
  * Generate a sine window.
  * @param   window  pointer to half window
@@ -52,11 +55,11 @@ void AAC_RENAME(ff_init_ff_sine_windows)(int index);
 
 extern SINETABLE(  32);
 extern SINETABLE(  64);
-extern SINETABLE( 120);
+extern SINETABLE120960(120);
 extern SINETABLE( 128);
 extern SINETABLE( 256);
 extern SINETABLE( 512);
-extern SINETABLE( 960);
+extern SINETABLE120960(960);
 extern SINETABLE(1024);
 extern SINETABLE(2048);
 extern SINETABLE(4096);
diff --git a/libavcodec/sinewin_tablegen.h b/libavcodec/sinewin_tablegen.h
index 0fa3561abce8b..dc52234ed034a 100644
--- a/libavcodec/sinewin_tablegen.h
+++ b/libavcodec/sinewin_tablegen.h
@@ -32,8 +32,8 @@
 #include "libavutil/common.h"
 
 #if !USE_FIXED
-SINETABLE( 120);
-SINETABLE( 960);
+SINETABLE120960(120);
+SINETABLE120960(960);
 #endif
 #if !CONFIG_HARDCODED_TABLES
 SINETABLE(  32);
diff --git a/libavcodec/sinewin_tablegen_template.c b/libavcodec/sinewin_tablegen_template.c
index 43ce1ba82e838..b8eb407bd8952 100644
--- a/libavcodec/sinewin_tablegen_template.c
+++ b/libavcodec/sinewin_tablegen_template.c
@@ -33,6 +33,8 @@
 #define SINETABLE_CONST
 #define SINETABLE(size) \
     INTFLOAT AAC_RENAME(ff_sine_##size)[size]
+#define SINETABLE120960(size) \
+    INTFLOAT AAC_RENAME(ff_sine_##size)[size]
 #define FF_ARRAY_ELEMS(a) (sizeof(a) / sizeof((a)[0]))
 #include "sinewin_tablegen.h"
 #include "tableprint.h"
diff --git a/libavcodec/snowenc.c b/libavcodec/snowenc.c
index 61a658fa4401a..df1729a08338d 100644
--- a/libavcodec/snowenc.c
+++ b/libavcodec/snowenc.c
@@ -1899,7 +1899,7 @@ FF_DISABLE_DEPRECATION_WARNINGS
 FF_ENABLE_DEPRECATION_WARNINGS
 #endif
 
-    pkt->size = ff_rac_terminate(c);
+    pkt->size = ff_rac_terminate(c, 0);
     if (s->current_picture->key_frame)
         pkt->flags |= AV_PKT_FLAG_KEY;
     *got_packet = 1;
diff --git a/libavcodec/sonic.c b/libavcodec/sonic.c
index 2e3ca79fdd1c8..34d2952e69ce5 100644
--- a/libavcodec/sonic.c
+++ b/libavcodec/sonic.c
@@ -842,7 +842,7 @@ static int sonic_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
 
 //    av_log(avctx, AV_LOG_DEBUG, "used bytes: %d\n", (put_bits_count(&pb)+7)/8);
 
-    avpkt->size = ff_rac_terminate(&c);
+    avpkt->size = ff_rac_terminate(&c, 0);
     *got_packet_ptr = 1;
     return 0;
 
diff --git a/libavcodec/tests/.gitignore b/libavcodec/tests/.gitignore
index 73945a7c82e44..56ddb2cbeb07b 100644
--- a/libavcodec/tests/.gitignore
+++ b/libavcodec/tests/.gitignore
@@ -2,6 +2,7 @@
 /avpacket
 /cabac
 /celp_math
+/codec_desc
 /dct
 /fft
 /fft-fixed
diff --git a/libavcodec/tests/dct.c b/libavcodec/tests/dct.c
index e8fa4a3cc1a40..2ca8039c01ae2 100644
--- a/libavcodec/tests/dct.c
+++ b/libavcodec/tests/dct.c
@@ -73,7 +73,7 @@ static void ff_prores_idct_wrap(int16_t *dst){
     for(i=0; i<64; i++){
         qmat[i]=4;
     }
-    ff_prores_idct(dst, qmat);
+    ff_prores_idct_10(dst, qmat);
     for(i=0; i<64; i++) {
          dst[i] -= 512;
     }
diff --git a/libavcodec/tests/h264_levels.c b/libavcodec/tests/h264_levels.c
index 794517eb6ca4b..0e00f05af6c6a 100644
--- a/libavcodec/tests/h264_levels.c
+++ b/libavcodec/tests/h264_levels.c
@@ -102,7 +102,7 @@ static const struct {
     // Check level 1b.
     {  32 * 1200,  66, 10 },
     {  32 * 1500, 100, 10 },
-    {  96 * 1200,  66, 10 },
+    {  96 * 1200,  66, 11 },
     {  96 * 1500, 100,  9 },
     { 144 * 1200,  66, 11 },
     { 144 * 1500, 100, 11 },
diff --git a/libavcodec/tests/rangecoder.c b/libavcodec/tests/rangecoder.c
index 2da5c0ce33ade..d6cf9ec3807d5 100644
--- a/libavcodec/tests/rangecoder.c
+++ b/libavcodec/tests/rangecoder.c
@@ -24,41 +24,53 @@
 
 #include "libavcodec/rangecoder.h"
 
-#define SIZE 10240
+#define SIZE 1240
 
 int main(void)
 {
     RangeCoder c;
-    uint8_t b[9 * SIZE];
+    uint8_t b[9 * SIZE] = {0};
     uint8_t r[9 * SIZE];
-    int i;
+    int i, p, actual_length, version;
     uint8_t state[10];
     AVLFG prng;
 
     av_lfg_init(&prng, 1);
+    for (version = 0; version < 2; version++) {
+        for (p = 0; p< 1024; p++) {
+            ff_init_range_encoder(&c, b, SIZE);
+            ff_build_rac_states(&c, (1LL << 32) / 20, 128 + 64 + 32 + 16);
 
-    ff_init_range_encoder(&c, b, SIZE);
-    ff_build_rac_states(&c, (1LL << 32) / 20, 128 + 64 + 32 + 16);
+            memset(state, 128, sizeof(state));
 
-    memset(state, 128, sizeof(state));
+            for (i = 0; i < SIZE; i++)
+                r[i] = av_lfg_get(&prng) % 7;
 
-    for (i = 0; i < SIZE; i++)
-        r[i] = av_lfg_get(&prng) % 7;
+            for (i = 0; i < SIZE; i++)
+                put_rac(&c, state, r[i] & 1);
 
-    for (i = 0; i < SIZE; i++)
-        put_rac(&c, state, r[i] & 1);
+            actual_length = ff_rac_terminate(&c, version);
 
-    ff_rac_terminate(&c);
+            ff_init_range_decoder(&c, b, version ? SIZE : actual_length);
 
-    ff_init_range_decoder(&c, b, SIZE);
+            memset(state, 128, sizeof(state));
 
-    memset(state, 128, sizeof(state));
+            for (i = 0; i < SIZE; i++)
+                if ((r[i] & 1) != get_rac(&c, state)) {
+                    av_log(NULL, AV_LOG_ERROR, "rac failure at %d pass %d version %d\n", i, p, version);
+                    return 1;
+                }
 
-    for (i = 0; i < SIZE; i++)
-        if ((r[i] & 1) != get_rac(&c, state)) {
-            av_log(NULL, AV_LOG_ERROR, "rac failure at %d\n", i);
-            return 1;
+            if (ff_rac_check_termination(&c, version) < 0) {
+                av_log(NULL, AV_LOG_ERROR, "rac failure at termination pass %d version %d\n", p, version);
+                return 1;
+            }
+            if (c.bytestream - c.bytestream_start - actual_length != version) {
+                av_log(NULL, AV_LOG_ERROR, "rac failure at pass %d version %d\n", p, version);
+                return 1;
+            }
         }
+    }
 
     return 0;
 }
diff --git a/libavcodec/tiff.c b/libavcodec/tiff.c
index b537ec06a5f0c..112f5b52f472b 100644
--- a/libavcodec/tiff.c
+++ b/libavcodec/tiff.c
@@ -37,6 +37,7 @@
 #include "libavutil/avstring.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
 #include "avcodec.h"
 #include "bytestream.h"
 #include "faxcompr.h"
@@ -46,11 +47,15 @@
 #include "tiff.h"
 #include "tiff_data.h"
 #include "thread.h"
+#include "get_bits.h"
 
 typedef struct TiffContext {
+    AVClass *class;
     AVCodecContext *avctx;
     GetByteContext gb;
 
+    int get_subimage;
+
     int width, height;
     unsigned int bpp, bppcount;
     uint32_t palette[256];
@@ -65,6 +70,12 @@ typedef struct TiffContext {
     int fill_order;
     uint32_t res[4];
 
+    int is_bayer;
+    uint8_t pattern[4];
+    unsigned white_level;
+
+    uint32_t sub_ifd;
+
     int strips, rps, sstype;
     int sot;
     int stripsizesoff, stripsize, stripoff, strippos;
@@ -236,7 +247,8 @@ static int add_metadata(int count, int type,
     };
 }
 
-static void av_always_inline horizontal_fill(unsigned int bpp, uint8_t* dst,
+static void av_always_inline horizontal_fill(TiffContext *s,
+                                             unsigned int bpp, uint8_t* dst,
                                              int usePtr, const uint8_t *src,
                                              uint8_t c, int width, int offset)
 {
@@ -267,6 +279,15 @@ static void av_always_inline horizontal_fill(unsigned int bpp, uint8_t* dst,
             dst[(width+offset)*2+0] = (usePtr ? src[width] : c) >> 4;
         }
         break;
+    case 12: {
+                 uint16_t *dst16 = (uint16_t *)dst;
+                 GetBitContext gb;
+                 init_get_bits8(&gb, src, width);
+                 for (int i = 0; i < s->width; i++) {
+                     dst16[i] = get_bits(&gb, 12) << 4;
+                 }
+             }
+        break;
     default:
         if (usePtr) {
             memcpy(dst + offset, src, width);
@@ -289,6 +310,19 @@ static int deinvert_buffer(TiffContext *s, const uint8_t *src, int size)
     return 0;
 }
 
+static void unpack_gray(TiffContext *s, AVFrame *p,
+                       const uint8_t *src, int lnum, int width, int bpp)
+{
+    GetBitContext gb;
+    uint16_t *dst = (uint16_t *)(p->data[0] + lnum * p->linesize[0]);
+
+    init_get_bits8(&gb, src, width);
+
+    for (int i = 0; i < s->width; i++) {
+        dst[i] = get_bits(&gb, bpp);
+    }
+}
+
 static void unpack_yuv(TiffContext *s, AVFrame *p,
                        const uint8_t *src, int lnum)
 {
@@ -368,7 +402,7 @@ static int tiff_unpack_zlib(TiffContext *s, AVFrame *p, uint8_t *dst, int stride
     src = zbuf;
     for (line = 0; line < lines; line++) {
         if (s->bpp < 8 && s->avctx->pix_fmt == AV_PIX_FMT_PAL8) {
-            horizontal_fill(s->bpp, dst, 1, src, 0, width, 0);
+            horizontal_fill(s, s->bpp, dst, 1, src, 0, width, 0);
         } else {
             memcpy(dst, src, width);
         }
@@ -433,7 +467,7 @@ static int tiff_unpack_lzma(TiffContext *s, AVFrame *p, uint8_t *dst, int stride
     src = buf;
     for (line = 0; line < lines; line++) {
         if (s->bpp < 8 && s->avctx->pix_fmt == AV_PIX_FMT_PAL8) {
-            horizontal_fill(s->bpp, dst, 1, src, 0, width, 0);
+            horizontal_fill(s, s->bpp, dst, 1, src, 0, width, 0);
         } else {
             memcpy(dst, src, width);
         }
@@ -476,7 +510,7 @@ static int tiff_unpack_fax(TiffContext *s, uint8_t *dst, int stride,
                           s->compr, s->fax_opts);
     if (s->bpp < 8 && s->avctx->pix_fmt == AV_PIX_FMT_PAL8)
         for (line = 0; line < lines; line++) {
-            horizontal_fill(s->bpp, dst, 1, dst, 0, width, 0);
+            horizontal_fill(s, s->bpp, dst, 1, dst, 0, width, 0);
             dst += stride;
         }
     return ret;
@@ -516,6 +550,18 @@ static int tiff_unpack_strip(TiffContext *s, AVFrame *p, uint8_t *dst, int strid
         av_assert0(width <= bytes_per_row);
         av_assert0(s->bpp == 24);
     }
+    if (s->is_bayer) {
+        width = (s->bpp * s->width + 7) >> 3;
+    }
+    if (p->format == AV_PIX_FMT_GRAY12) {
+        av_fast_padded_malloc(&s->yuv_line, &s->yuv_line_size, width);
+        if (s->yuv_line == NULL) {
+            av_log(s->avctx, AV_LOG_ERROR, "Not enough memory\n");
+            return AVERROR(ENOMEM);
+        }
+        dst = s->yuv_line;
+        stride = 0;
+    }
 
     if (s->compr == TIFF_DEFLATE || s->compr == TIFF_ADOBE_DEFLATE) {
 #if CONFIG_ZLIB
@@ -559,10 +605,12 @@ static int tiff_unpack_strip(TiffContext *s, AVFrame *p, uint8_t *dst, int strid
                 return AVERROR_INVALIDDATA;
             }
             if (s->bpp < 8 && s->avctx->pix_fmt == AV_PIX_FMT_PAL8)
-                horizontal_fill(s->bpp, dst, 1, dst, 0, width, 0);
+                horizontal_fill(s, s->bpp, dst, 1, dst, 0, width, 0);
             if (is_yuv) {
                 unpack_yuv(s, p, dst, strip_start + line);
                 line += s->subsampling[1] - 1;
+            } else if (p->format == AV_PIX_FMT_GRAY12) {
+                unpack_gray(s, p, dst, strip_start + line, width, s->bpp);
             }
             dst += stride;
         }
@@ -571,7 +619,7 @@ static int tiff_unpack_strip(TiffContext *s, AVFrame *p, uint8_t *dst, int strid
     if (s->compr == TIFF_CCITT_RLE ||
         s->compr == TIFF_G3        ||
         s->compr == TIFF_G4) {
-        if (is_yuv)
+        if (is_yuv || p->format == AV_PIX_FMT_GRAY12)
             return AVERROR_INVALIDDATA;
 
         return tiff_unpack_fax(s, dst, stride, src, size, width, lines);
@@ -595,7 +643,7 @@ static int tiff_unpack_strip(TiffContext *s, AVFrame *p, uint8_t *dst, int strid
                 return AVERROR_INVALIDDATA;
 
             if (!s->fill_order) {
-                horizontal_fill(s->bpp * (s->avctx->pix_fmt == AV_PIX_FMT_PAL8),
+                horizontal_fill(s, s->bpp * (s->avctx->pix_fmt == AV_PIX_FMT_PAL8 || s->is_bayer),
                                 dst, 1, src, 0, width, 0);
             } else {
                 int i;
@@ -619,7 +667,7 @@ static int tiff_unpack_strip(TiffContext *s, AVFrame *p, uint8_t *dst, int strid
                                "Copy went out of bounds\n");
                         return AVERROR_INVALIDDATA;
                     }
-                    horizontal_fill(s->bpp * (s->avctx->pix_fmt == AV_PIX_FMT_PAL8),
+                    horizontal_fill(s, s->bpp * (s->avctx->pix_fmt == AV_PIX_FMT_PAL8),
                                     dst, 1, src, 0, code, pixels);
                     src    += code;
                     pixels += code;
@@ -631,7 +679,7 @@ static int tiff_unpack_strip(TiffContext *s, AVFrame *p, uint8_t *dst, int strid
                         return AVERROR_INVALIDDATA;
                     }
                     c = *src++;
-                    horizontal_fill(s->bpp * (s->avctx->pix_fmt == AV_PIX_FMT_PAL8),
+                    horizontal_fill(s, s->bpp * (s->avctx->pix_fmt == AV_PIX_FMT_PAL8),
                                     dst, 0, NULL, c, code, pixels);
                     pixels += code;
                 }
@@ -646,6 +694,8 @@ static int tiff_unpack_strip(TiffContext *s, AVFrame *p, uint8_t *dst, int strid
         if (is_yuv) {
             unpack_yuv(s, p, dst, strip_start + line);
             line += s->subsampling[1] - 1;
+        } else if (p->format == AV_PIX_FMT_GRAY12) {
+            unpack_gray(s, p, dst, strip_start + line, width, s->bpp);
         }
         dst += stride;
     }
@@ -665,7 +715,7 @@ static int init_image(TiffContext *s, ThreadFrame *frame)
         return AVERROR_INVALIDDATA;
     }
 
-    switch (s->planar * 1000 + s->bpp * 10 + s->bppcount) {
+    switch (s->planar * 1000 + s->bpp * 10 + s->bppcount + s->is_bayer * 10000) {
     case 11:
         if (!s->palette_is_set) {
             s->avctx->pix_fmt = AV_PIX_FMT_MONOBLACK;
@@ -681,6 +731,69 @@ static int init_image(TiffContext *s, ThreadFrame *frame)
     case 81:
         s->avctx->pix_fmt = s->palette_is_set ? AV_PIX_FMT_PAL8 : AV_PIX_FMT_GRAY8;
         break;
+    case 121:
+        s->avctx->pix_fmt = AV_PIX_FMT_GRAY12;
+        break;
+    case 10081:
+        switch (AV_RL32(s->pattern)) {
+        case 0x02010100:
+            s->avctx->pix_fmt = AV_PIX_FMT_BAYER_RGGB8;
+            break;
+        case 0x00010102:
+            s->avctx->pix_fmt = AV_PIX_FMT_BAYER_BGGR8;
+            break;
+        case 0x01000201:
+            s->avctx->pix_fmt = AV_PIX_FMT_BAYER_GBRG8;
+            break;
+        case 0x01020001:
+            s->avctx->pix_fmt = AV_PIX_FMT_BAYER_GRBG8;
+            break;
+        default:
+            av_log(s->avctx, AV_LOG_ERROR, "Unsupported Bayer pattern: 0x%X\n",
+                   AV_RL32(s->pattern));
+            return AVERROR_PATCHWELCOME;
+        }
+        break;
+    case 10121:
+        switch (AV_RL32(s->pattern)) {
+        case 0x02010100:
+            s->avctx->pix_fmt = s->le ? AV_PIX_FMT_BAYER_RGGB16LE : AV_PIX_FMT_BAYER_RGGB16BE;
+            break;
+        case 0x00010102:
+            s->avctx->pix_fmt = s->le ? AV_PIX_FMT_BAYER_BGGR16LE : AV_PIX_FMT_BAYER_BGGR16BE;
+            break;
+        case 0x01000201:
+            s->avctx->pix_fmt = s->le ? AV_PIX_FMT_BAYER_GBRG16LE : AV_PIX_FMT_BAYER_GBRG16BE;
+            break;
+        case 0x01020001:
+            s->avctx->pix_fmt = s->le ? AV_PIX_FMT_BAYER_GRBG16LE : AV_PIX_FMT_BAYER_GRBG16BE;
+            break;
+        default:
+            av_log(s->avctx, AV_LOG_ERROR, "Unsupported Bayer pattern: 0x%X\n",
+                   AV_RL32(s->pattern));
+            return AVERROR_PATCHWELCOME;
+        }
+        break;
+    case 10161:
+        switch (AV_RL32(s->pattern)) {
+        case 0x02010100:
+            s->avctx->pix_fmt = s->le ? AV_PIX_FMT_BAYER_RGGB16LE : AV_PIX_FMT_BAYER_RGGB16BE;
+            break;
+        case 0x00010102:
+            s->avctx->pix_fmt = s->le ? AV_PIX_FMT_BAYER_BGGR16LE : AV_PIX_FMT_BAYER_BGGR16BE;
+            break;
+        case 0x01000201:
+            s->avctx->pix_fmt = s->le ? AV_PIX_FMT_BAYER_GBRG16LE : AV_PIX_FMT_BAYER_GBRG16BE;
+            break;
+        case 0x01020001:
+            s->avctx->pix_fmt = s->le ? AV_PIX_FMT_BAYER_GRBG16LE : AV_PIX_FMT_BAYER_GRBG16BE;
+            break;
+        default:
+            av_log(s->avctx, AV_LOG_ERROR, "Unsupported Bayer pattern: 0x%X\n",
+                   AV_RL32(s->pattern));
+            return AVERROR_PATCHWELCOME;
+        }
+        break;
     case 243:
         if (s->photometric == TIFF_PHOTOMETRIC_YCBCR) {
             if (s->subsampling[0] == 1 && s->subsampling[1] == 1) {
@@ -712,7 +825,7 @@ static int init_image(TiffContext *s, ThreadFrame *frame)
         s->avctx->pix_fmt = s->le ? AV_PIX_FMT_YA16LE : AV_PIX_FMT_YA16BE;
         break;
     case 324:
-        s->avctx->pix_fmt = AV_PIX_FMT_RGBA;
+        s->avctx->pix_fmt = s->photometric == TIFF_PHOTOMETRIC_SEPARATED ? AV_PIX_FMT_RGB0 : AV_PIX_FMT_RGBA;
         break;
     case 483:
         s->avctx->pix_fmt = s->le ? AV_PIX_FMT_RGB48LE  : AV_PIX_FMT_RGB48BE;
@@ -961,21 +1074,41 @@ static int tiff_decode_tag(TiffContext *s, AVFrame *frame)
     case TIFF_PREDICTOR:
         s->predictor = value;
         break;
+    case TIFF_SUB_IFDS:
+        s->sub_ifd = value;
+        break;
+    case TIFF_WHITE_LEVEL:
+        s->white_level = value;
+        break;
+    case TIFF_CFA_PATTERN_DIM:
+        if (count != 2 || (ff_tget(&s->gb, type, s->le) != 2 &&
+                           ff_tget(&s->gb, type, s->le) != 2)) {
+            av_log(s->avctx, AV_LOG_ERROR, "CFA Pattern dimensions are not 2x2\n");
+            return AVERROR_INVALIDDATA;
+        }
+        break;
+    case TIFF_CFA_PATTERN:
+        s->is_bayer = 1;
+        s->pattern[0] = ff_tget(&s->gb, type, s->le);
+        s->pattern[1] = ff_tget(&s->gb, type, s->le);
+        s->pattern[2] = ff_tget(&s->gb, type, s->le);
+        s->pattern[3] = ff_tget(&s->gb, type, s->le);
+        break;
     case TIFF_PHOTOMETRIC:
         switch (value) {
         case TIFF_PHOTOMETRIC_WHITE_IS_ZERO:
         case TIFF_PHOTOMETRIC_BLACK_IS_ZERO:
         case TIFF_PHOTOMETRIC_RGB:
         case TIFF_PHOTOMETRIC_PALETTE:
+        case TIFF_PHOTOMETRIC_SEPARATED:
         case TIFF_PHOTOMETRIC_YCBCR:
+        case TIFF_PHOTOMETRIC_CFA:
             s->photometric = value;
             break;
         case TIFF_PHOTOMETRIC_ALPHA_MASK:
-        case TIFF_PHOTOMETRIC_SEPARATED:
         case TIFF_PHOTOMETRIC_CIE_LAB:
         case TIFF_PHOTOMETRIC_ICC_LAB:
         case TIFF_PHOTOMETRIC_ITU_LAB:
-        case TIFF_PHOTOMETRIC_CFA:
         case TIFF_PHOTOMETRIC_LOG_L:
         case TIFF_PHOTOMETRIC_LOG_LUV:
         case TIFF_PHOTOMETRIC_LINEAR_RAW:
@@ -1192,7 +1325,7 @@ static int tiff_decode_tag(TiffContext *s, AVFrame *frame)
     default:
         if (s->avctx->err_recognition & AV_EF_EXPLODE) {
             av_log(s->avctx, AV_LOG_ERROR,
-                   "Unknown or unsupported tag %d/0X%0X\n",
+                   "Unknown or unsupported tag %d/0x%0X\n",
                    tag, tag);
             return AVERROR_INVALIDDATA;
         }
@@ -1235,10 +1368,13 @@ static int decode_frame(AVCodecContext *avctx,
     }
     s->le          = le;
     // TIFF_BPP is not a required tag and defaults to 1
+again:
     s->bppcount    = s->bpp = 1;
     s->photometric = TIFF_PHOTOMETRIC_NONE;
     s->compr       = TIFF_RAW;
     s->fill_order  = 0;
+    s->white_level = 0;
+    s->is_bayer    = 0;
     free_geotags(s);
 
     // Reset these offsets so we can tell if they were set this frame
@@ -1253,6 +1389,16 @@ static int decode_frame(AVCodecContext *avctx,
             return ret;
     }
 
+    if (s->sub_ifd && s->get_subimage) {
+        off = s->sub_ifd;
+        if (off >= UINT_MAX - 14 || avpkt->size < off + 14) {
+            av_log(avctx, AV_LOG_ERROR, "IFD offset is greater than image size\n");
+            return AVERROR_INVALIDDATA;
+        }
+        s->sub_ifd = 0;
+        goto again;
+    }
+
     for (i = 0; i<s->geotag_count; i++) {
         const char *keyname = get_geokey_name(s->geotags[i].key);
         if (!keyname) {
@@ -1304,6 +1450,7 @@ static int decode_frame(AVCodecContext *avctx,
     planes = s->planar ? s->bppcount : 1;
     for (plane = 0; plane < planes; plane++) {
         int remaining = avpkt->size;
+        int decoded_height;
         stride = p->linesize[plane];
         dst = p->data[plane];
         for (i = 0; i < s->height; i += s->rps) {
@@ -1331,6 +1478,8 @@ static int decode_frame(AVCodecContext *avctx,
                 break;
             }
         }
+        decoded_height = FFMIN(i, s->height);
+
         if (s->predictor == 2) {
             if (s->photometric == TIFF_PHOTOMETRIC_YCBCR) {
                 av_log(s->avctx, AV_LOG_ERROR, "predictor == 2 with YUV is unsupported");
@@ -1347,7 +1496,7 @@ static int decode_frame(AVCodecContext *avctx,
                 s->avctx->pix_fmt == AV_PIX_FMT_YA16LE ||
                 s->avctx->pix_fmt == AV_PIX_FMT_GBRP16LE ||
                 s->avctx->pix_fmt == AV_PIX_FMT_GBRAP16LE) {
-                for (i = 0; i < s->height; i++) {
+                for (i = 0; i < decoded_height; i++) {
                     for (j = soff; j < ssize; j += 2)
                         AV_WL16(dst + j, AV_RL16(dst + j) + AV_RL16(dst + j - soff));
                     dst += stride;
@@ -1358,13 +1507,13 @@ static int decode_frame(AVCodecContext *avctx,
                        s->avctx->pix_fmt == AV_PIX_FMT_YA16BE ||
                        s->avctx->pix_fmt == AV_PIX_FMT_GBRP16BE ||
                        s->avctx->pix_fmt == AV_PIX_FMT_GBRAP16BE) {
-                for (i = 0; i < s->height; i++) {
+                for (i = 0; i < decoded_height; i++) {
                     for (j = soff; j < ssize; j += 2)
                         AV_WB16(dst + j, AV_RB16(dst + j) + AV_RB16(dst + j - soff));
                     dst += stride;
                 }
             } else {
-                for (i = 0; i < s->height; i++) {
+                for (i = 0; i < decoded_height; i++) {
                     for (j = soff; j < ssize; j++)
                         dst[j] += dst[j - soff];
                     dst += stride;
@@ -1381,6 +1530,24 @@ static int decode_frame(AVCodecContext *avctx,
                 dst += stride;
             }
         }
+
+        if (s->photometric == TIFF_PHOTOMETRIC_SEPARATED &&
+            s->avctx->pix_fmt == AV_PIX_FMT_RGB0) {
+            dst = p->data[plane];
+            for (i = 0; i < s->height; i++) {
+                for (j = 0; j < s->width; j++) {
+                    int k =  255 - dst[4 * j + 3];
+                    int r = (255 - dst[4 * j    ]) * k;
+                    int g = (255 - dst[4 * j + 1]) * k;
+                    int b = (255 - dst[4 * j + 2]) * k;
+                    dst[4 * j    ] = r * 257 >> 16;
+                    dst[4 * j + 1] = g * 257 >> 16;
+                    dst[4 * j + 2] = b * 257 >> 16;
+                    dst[4 * j + 3] = 255;
+                }
+                dst += p->linesize[plane];
+            }
+        }
     }
 
     if (s->planar && s->bppcount > 2) {
@@ -1390,6 +1557,15 @@ static int decode_frame(AVCodecContext *avctx,
         FFSWAP(int,      p->linesize[0], p->linesize[1]);
     }
 
+    if (s->is_bayer && s->white_level && s->bpp == 16) {
+        uint16_t *dst = (uint16_t *)p->data[0];
+        for (i = 0; i < s->height; i++) {
+            for (j = 0; j < s->width; j++)
+                dst[j] = FFMIN((dst[j] / (float)s->white_level) * 65535, 65535);
+            dst += stride / 2;
+        }
+    }
+
     *got_frame = 1;
 
     return avpkt->size;
@@ -1421,11 +1597,26 @@ static av_cold int tiff_end(AVCodecContext *avctx)
     ff_lzw_decode_close(&s->lzw);
     av_freep(&s->deinvert_buf);
     s->deinvert_buf_size = 0;
+    av_freep(&s->yuv_line);
+    s->yuv_line_size = 0;
     av_freep(&s->fax_buffer);
     s->fax_buffer_size = 0;
     return 0;
 }
 
+#define OFFSET(x) offsetof(TiffContext, x)
+static const AVOption tiff_options[] = {
+    { "subimage", "decode subimage instead if available", OFFSET(get_subimage), AV_OPT_TYPE_BOOL, {.i64=0},  0, 1, AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM },
+    { NULL },
+};
+
+static const AVClass tiff_decoder_class = {
+    .class_name = "TIFF decoder",
+    .item_name  = av_default_item_name,
+    .option     = tiff_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 AVCodec ff_tiff_decoder = {
     .name           = "tiff",
     .long_name      = NULL_IF_CONFIG_SMALL("TIFF image"),
@@ -1437,4 +1628,5 @@ AVCodec ff_tiff_decoder = {
     .decode         = decode_frame,
     .init_thread_copy = ONLY_IF_THREADS_ENABLED(tiff_init),
     .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
+    .priv_class     = &tiff_decoder_class,
 };
diff --git a/libavcodec/tiff.h b/libavcodec/tiff.h
index 3f692afa00da4..4b08650108883 100644
--- a/libavcodec/tiff.h
+++ b/libavcodec/tiff.h
@@ -70,18 +70,22 @@ enum TiffTags {
     TIFF_TILE_LENGTH        = 0x143,
     TIFF_TILE_OFFSETS       = 0x144,
     TIFF_TILE_BYTE_COUNTS   = 0x145,
+    TIFF_SUB_IFDS           = 0x14A,
     TIFF_EXTRASAMPLES       = 0x152,
     TIFF_YCBCR_COEFFICIENTS = 0x211,
     TIFF_YCBCR_SUBSAMPLING  = 0x212,
     TIFF_YCBCR_POSITIONING  = 0x213,
     TIFF_REFERENCE_BW       = 0x214,
+    TIFF_CFA_PATTERN_DIM    = 0x828D,
+    TIFF_CFA_PATTERN        = 0x828E,
     TIFF_COPYRIGHT          = 0x8298,
     TIFF_MODEL_TIEPOINT     = 0x8482,
     TIFF_MODEL_PIXEL_SCALE  = 0x830E,
     TIFF_MODEL_TRANSFORMATION= 0x8480,
     TIFF_GEO_KEY_DIRECTORY  = 0x87AF,
     TIFF_GEO_DOUBLE_PARAMS  = 0x87B0,
-    TIFF_GEO_ASCII_PARAMS   = 0x87B1
+    TIFF_GEO_ASCII_PARAMS   = 0x87B1,
+    TIFF_WHITE_LEVEL        = 0xC61D,
 };
 
 /** list of TIFF compression types */
diff --git a/libavcodec/trace_headers_bsf.c b/libavcodec/trace_headers_bsf.c
index 8322229d4c6d0..3ec78fe822dbc 100644
--- a/libavcodec/trace_headers_bsf.c
+++ b/libavcodec/trace_headers_bsf.c
@@ -28,6 +28,7 @@
 
 typedef struct TraceHeadersContext {
     CodedBitstreamContext *cbc;
+    CodedBitstreamFragment fragment;
 } TraceHeadersContext;
 
 
@@ -44,13 +45,13 @@ static int trace_headers_init(AVBSFContext *bsf)
     ctx->cbc->trace_level  = AV_LOG_INFO;
 
     if (bsf->par_in->extradata) {
-        CodedBitstreamFragment ps;
+        CodedBitstreamFragment *frag = &ctx->fragment;
 
         av_log(bsf, AV_LOG_INFO, "Extradata\n");
 
-        err = ff_cbs_read_extradata(ctx->cbc, &ps, bsf->par_in);
+        err = ff_cbs_read_extradata(ctx->cbc, frag, bsf->par_in);
 
-        ff_cbs_fragment_uninit(ctx->cbc, &ps);
+        ff_cbs_fragment_reset(ctx->cbc, frag);
     }
 
     return err;
@@ -60,13 +61,14 @@ static void trace_headers_close(AVBSFContext *bsf)
 {
     TraceHeadersContext *ctx = bsf->priv_data;
 
+    ff_cbs_fragment_free(ctx->cbc, &ctx->fragment);
     ff_cbs_close(&ctx->cbc);
 }
 
 static int trace_headers(AVBSFContext *bsf, AVPacket *pkt)
 {
     TraceHeadersContext *ctx = bsf->priv_data;
-    CodedBitstreamFragment au;
+    CodedBitstreamFragment *frag = &ctx->fragment;
     char tmp[256] = { 0 };
     int err;
 
@@ -92,9 +94,9 @@ static int trace_headers(AVBSFContext *bsf, AVPacket *pkt)
 
     av_log(bsf, AV_LOG_INFO, "Packet: %d bytes%s.\n", pkt->size, tmp);
 
-    err = ff_cbs_read_packet(ctx->cbc, &au, pkt);
+    err = ff_cbs_read_packet(ctx->cbc, frag, pkt);
 
-    ff_cbs_fragment_uninit(ctx->cbc, &au);
+    ff_cbs_fragment_reset(ctx->cbc, frag);
 
     if (err < 0)
         av_packet_unref(pkt);
diff --git a/libavcodec/truehd_core_bsf.c b/libavcodec/truehd_core_bsf.c
new file mode 100644
index 0000000000000..be021af8e83c6
--- /dev/null
+++ b/libavcodec/truehd_core_bsf.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2018 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "bsf.h"
+#include "get_bits.h"
+#include "mlp_parser.h"
+#include "mlp.h"
+
+typedef struct AccessUnit {
+    uint8_t bits[4];
+    uint16_t offset;
+    uint16_t optional;
+} AccessUnit;
+
+typedef struct TrueHDCoreContext {
+    const AVClass *class;
+
+    MLPHeaderInfo hdr;
+} TrueHDCoreContext;
+
+static int truehd_core_filter(AVBSFContext *ctx, AVPacket *out)
+{
+    TrueHDCoreContext *s = ctx->priv_data;
+    GetBitContext gbc;
+    AccessUnit units[MAX_SUBSTREAMS];
+    AVPacket *in;
+    int ret, i, size, last_offset = 0;
+    int in_size, out_size;
+    int have_header = 0;
+    int substream_bits = 0;
+    int start, end;
+    uint16_t dts;
+
+    ret = ff_bsf_get_packet(ctx, &in);
+    if (ret < 0)
+        return ret;
+
+    if (in->size < 4)
+        goto fail;
+
+    ret = init_get_bits(&gbc, in->data, 32);
+    if (ret < 0)
+        goto fail;
+
+    skip_bits(&gbc, 4);
+    in_size = get_bits(&gbc, 12) * 2;
+    if (in_size < 4 || in_size > in->size)
+        goto fail;
+
+    out_size = in_size;
+    dts = get_bits(&gbc, 16);
+
+    ret = init_get_bits8(&gbc, in->data + 4, in->size - 4);
+    if (ret < 0)
+        goto fail;
+
+    if (show_bits_long(&gbc, 32) == 0xf8726fba) {
+        if ((ret = ff_mlp_read_major_sync(ctx, &s->hdr, &gbc)) != 0)
+            goto fail;
+        have_header = 1;
+    }
+
+    if (s->hdr.num_substreams > MAX_SUBSTREAMS)
+        goto fail;
+
+    start = get_bits_count(&gbc);
+    for (i = 0; i < s->hdr.num_substreams; i++) {
+        for (int j = 0; j < 4; j++)
+            units[i].bits[j] = get_bits1(&gbc);
+
+        units[i].offset = get_bits(&gbc, 12) * 2;
+        if (i < FFMIN(s->hdr.num_substreams, 3)) {
+            last_offset = units[i].offset;
+            substream_bits += 16;
+        }
+
+        if (units[i].bits[0]) {
+            units[i].optional = get_bits(&gbc, 16);
+            if (i < FFMIN(s->hdr.num_substreams, 3))
+                substream_bits += 16;
+        }
+    }
+    end = get_bits_count(&gbc);
+
+    size = ((end + 7) >> 3) + 4 + last_offset;
+    if (size >= 0 && size <= in->size)
+        out_size = size;
+    if (out_size < in_size) {
+        int bpos = 0, reduce = (end - start - substream_bits) >> 4;
+        uint16_t parity_nibble = 0;
+        uint16_t auheader;
+
+        ret = av_new_packet(out, out_size);
+        if (ret < 0)
+            goto fail;
+
+        AV_WB16(out->data + 2, dts);
+        parity_nibble = dts;
+        out->size -= reduce * 2;
+        parity_nibble ^= out->size / 2;
+
+        if (have_header) {
+            memcpy(out->data + 4, in->data + 4, 28);
+            out->data[16 + 4] = (out->data[16 + 4] & 0x0f) | (FFMIN(s->hdr.num_substreams, 3) << 4);
+            out->data[25 + 4] = out->data[25 + 4] & 0xfe;
+            out->data[26 + 4] = 0xff;
+            out->data[27 + 4] = 0xff;
+            AV_WL16(out->data + 4 + 26, ff_mlp_checksum16(out->data + 4, 26));
+        }
+
+        for (i = 0; i < FFMIN(s->hdr.num_substreams, 3); i++) {
+            uint16_t substr_hdr = 0;
+
+            substr_hdr |= (units[i].bits[0] << 15);
+            substr_hdr |= (units[i].bits[1] << 14);
+            substr_hdr |= (units[i].bits[2] << 13);
+            substr_hdr |= (units[i].bits[3] << 12);
+            substr_hdr |= (units[i].offset / 2) & 0x0FFF;
+
+            AV_WB16(out->data + have_header * 28 + 4 + bpos, substr_hdr);
+
+            parity_nibble ^= out->data[have_header * 28 + 4 + bpos++];
+            parity_nibble ^= out->data[have_header * 28 + 4 + bpos++];
+
+            if (units[i].bits[0]) {
+                AV_WB16(out->data + have_header * 28 + 4 + bpos, units[i].optional);
+
+                parity_nibble ^= out->data[have_header * 28 + 4 + bpos++];
+                parity_nibble ^= out->data[have_header * 28 + 4 + bpos++];
+            }
+        }
+
+        parity_nibble ^= parity_nibble >> 8;
+        parity_nibble ^= parity_nibble >> 4;
+        parity_nibble &= 0xF;
+
+        memcpy(out->data + have_header * 28 + 4 + bpos,
+               in->data + 4 + (end >> 3),
+               out_size - (4 + (end >> 3)));
+        auheader  = (parity_nibble ^ 0xF) << 12;
+        auheader |= (out->size / 2) & 0x0fff;
+        AV_WB16(out->data, auheader);
+
+        ret = av_packet_copy_props(out, in);
+    } else {
+        av_packet_move_ref(out, in);
+    }
+
+fail:
+    if (ret < 0)
+        av_packet_unref(out);
+    av_packet_free(&in);
+
+    return ret;
+}
+
+static const enum AVCodecID codec_ids[] = {
+    AV_CODEC_ID_TRUEHD, AV_CODEC_ID_NONE,
+};
+
+const AVBitStreamFilter ff_truehd_core_bsf = {
+    .name           = "truehd_core",
+    .priv_data_size = sizeof(TrueHDCoreContext),
+    .filter         = truehd_core_filter,
+    .codec_ids      = codec_ids,
+};
diff --git a/libavcodec/truemotion2.c b/libavcodec/truemotion2.c
index 58a577f53c75a..4d27f0cbfc5e1 100644
--- a/libavcodec/truemotion2.c
+++ b/libavcodec/truemotion2.c
@@ -112,9 +112,13 @@ typedef struct TM2Huff {
     int *lens; ///< codelengths
 } TM2Huff;
 
+/**
+ *
+ * @returns the length of the longest code or an AVERROR code
+ */
 static int tm2_read_tree(TM2Context *ctx, uint32_t prefix, int length, TM2Huff *huff)
 {
-    int ret;
+    int ret, ret2;
     if (length > huff->max_bits) {
         av_log(ctx->avctx, AV_LOG_ERROR, "Tree exceeded its given depth (%i)\n",
                huff->max_bits);
@@ -133,14 +137,14 @@ static int tm2_read_tree(TM2Context *ctx, uint32_t prefix, int length, TM2Huff *
         huff->bits[huff->num] = prefix;
         huff->lens[huff->num] = length;
         huff->num++;
-        return 0;
+        return length;
     } else { /* non-terminal node */
-        if ((ret = tm2_read_tree(ctx, prefix << 1, length + 1, huff)) < 0)
-            return ret;
+        if ((ret2 = tm2_read_tree(ctx, prefix << 1, length + 1, huff)) < 0)
+            return ret2;
         if ((ret = tm2_read_tree(ctx, (prefix << 1) | 1, length + 1, huff)) < 0)
             return ret;
     }
-    return 0;
+    return FFMAX(ret, ret2);
 }
 
 static int tm2_build_huff_table(TM2Context *ctx, TM2Codes *code)
@@ -183,6 +187,11 @@ static int tm2_build_huff_table(TM2Context *ctx, TM2Codes *code)
 
     res = tm2_read_tree(ctx, 0, 0, &huff);
 
+    if (res >= 0 && res != huff.max_bits) {
+        av_log(ctx->avctx, AV_LOG_ERROR, "Got less bits than expected: %i of %i\n",
+               res, huff.max_bits);
+        res = AVERROR_INVALIDDATA;
+    }
     if (huff.num != huff.max_num) {
         av_log(ctx->avctx, AV_LOG_ERROR, "Got less codes than expected: %i of %i\n",
                huff.num, huff.max_num);
@@ -484,7 +493,7 @@ static inline void tm2_high_chroma(int *data, int stride, int *last, unsigned *C
     }
 }
 
-static inline void tm2_low_chroma(int *data, int stride, int *clast, int *CD, int *deltas, int bx)
+static inline void tm2_low_chroma(int *data, int stride, int *clast, unsigned *CD, int *deltas, int bx)
 {
     int t;
     int l;
@@ -494,8 +503,8 @@ static inline void tm2_low_chroma(int *data, int stride, int *clast, int *CD, in
         prev = clast[-3];
     else
         prev = 0;
-    t        = (CD[0] + CD[1]) >> 1;
-    l        = (prev - CD[0] - CD[1] + clast[1]) >> 1;
+    t        = (int)(CD[0] + CD[1]) >> 1;
+    l        = (int)(prev - CD[0] - CD[1] + clast[1]) >> 1;
     CD[1]    = CD[0] + CD[1] - t;
     CD[0]    = t;
     clast[0] = l;
diff --git a/libavcodec/truemotion2rt.c b/libavcodec/truemotion2rt.c
index 9df0b527bbdfb..e3ab998fda7cb 100644
--- a/libavcodec/truemotion2rt.c
+++ b/libavcodec/truemotion2rt.c
@@ -116,7 +116,7 @@ static int truemotion2rt_decode_frame(AVCodecContext *avctx, void *data,
     if (ret < 0)
         return ret;
 
-    if (avctx->width / s->hscale * avctx->height * s->delta_size > avpkt->size * 8LL * 4)
+    if ((avctx->width + s->hscale - 1)/ s->hscale * avctx->height * s->delta_size > avpkt->size * 8LL * 4)
         return AVERROR_INVALIDDATA;
 
     ret = init_get_bits8(gb, avpkt->data + ret, avpkt->size - ret);
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index 285bfdbc63cb4..cc04b7347122c 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -214,6 +214,8 @@ void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height,
     case AV_PIX_FMT_YUVA422P9BE:
     case AV_PIX_FMT_YUVA422P10LE:
     case AV_PIX_FMT_YUVA422P10BE:
+    case AV_PIX_FMT_YUVA422P12LE:
+    case AV_PIX_FMT_YUVA422P12BE:
     case AV_PIX_FMT_YUVA422P16LE:
     case AV_PIX_FMT_YUVA422P16BE:
     case AV_PIX_FMT_YUV440P10LE:
@@ -234,6 +236,8 @@ void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height,
     case AV_PIX_FMT_YUVA444P9BE:
     case AV_PIX_FMT_YUVA444P10LE:
     case AV_PIX_FMT_YUVA444P10BE:
+    case AV_PIX_FMT_YUVA444P12LE:
+    case AV_PIX_FMT_YUVA444P12BE:
     case AV_PIX_FMT_YUVA444P16LE:
     case AV_PIX_FMT_YUVA444P16BE:
     case AV_PIX_FMT_GBRP9LE:
@@ -1397,10 +1401,8 @@ const char *avcodec_profile_name(enum AVCodecID codec_id, int profile)
 
 unsigned avcodec_version(void)
 {
-//    av_assert0(AV_CODEC_ID_V410==164);
     av_assert0(AV_CODEC_ID_PCM_S8_PLANAR==65563);
     av_assert0(AV_CODEC_ID_ADPCM_G722==69660);
-//     av_assert0(AV_CODEC_ID_BMV_AUDIO==86071);
     av_assert0(AV_CODEC_ID_SRT==94216);
     av_assert0(LIBAVCODEC_VERSION_MICRO >= 100);
 
@@ -1438,6 +1440,7 @@ int av_get_exact_bits_per_sample(enum AVCodecID codec_id)
     case AV_CODEC_ID_DSD_MSBF_PLANAR:
     case AV_CODEC_ID_PCM_ALAW:
     case AV_CODEC_ID_PCM_MULAW:
+    case AV_CODEC_ID_PCM_VIDC:
     case AV_CODEC_ID_PCM_S8:
     case AV_CODEC_ID_PCM_S8_PLANAR:
     case AV_CODEC_ID_PCM_U8:
@@ -1594,8 +1597,6 @@ static int get_audio_frame_duration(enum AVCodecID id, int sr, int ch, int ba,
             return 256 * (frame_bytes / 64);
         if (id == AV_CODEC_ID_RA_144)
             return 160 * (frame_bytes / 20);
-        if (id == AV_CODEC_ID_G723_1)
-            return 240 * (frame_bytes / 24);
 
         if (bps > 0) {
             /* calc from frame_bytes and bits_per_coded_sample */
@@ -2204,3 +2205,22 @@ int64_t ff_guess_coded_bitrate(AVCodecContext *avctx)
 
     return bitrate;
 }
+
+int ff_int_from_list_or_default(void *ctx, const char * val_name, int val,
+                                const int * array_valid_values, int default_value)
+{
+    int i = 0, ref_val;
+
+    while (1) {
+        ref_val = array_valid_values[i];
+        if (ref_val == INT_MAX)
+            break;
+        if (val == ref_val)
+            return val;
+        i++;
+    }
+    /* val is not a valid value */
+    av_log(ctx, AV_LOG_DEBUG,
+           "%s %d are not supported. Set to default value : %d\n", val_name, val, default_value);
+    return default_value;
+}
diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h
index 452bf0d9bc258..0d4671beb1aa8 100644
--- a/libavcodec/v4l2_m2m.h
+++ b/libavcodec/v4l2_m2m.h
@@ -104,7 +104,7 @@ int ff_v4l2_m2m_codec_init(AVCodecContext *avctx);
 int ff_v4l2_m2m_codec_end(AVCodecContext *avctx);
 
 /**
- * Reinitializes the V4L2m2mContext when the driver cant continue processing
+ * Reinitializes the V4L2m2mContext when the driver cannot continue processing
  * with the capture parameters.
  *
  * @param[in] ctx The V4L2m2mContext instantiated by the encoder/decoder.
@@ -114,7 +114,7 @@ int ff_v4l2_m2m_codec_end(AVCodecContext *avctx);
 int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *ctx);
 
 /**
- * Reinitializes the V4L2m2mContext when the driver cant continue processing
+ * Reinitializes the V4L2m2mContext when the driver cannot continue processing
  * with the  any of the current V4L2Contexts (ie, changes in output and capture).
  *
  * @param[in] ctx The V4L2m2mContext instantiated by the encoder/decoder.
diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
index 2c34cdce2cf59..2dda451882614 100644
--- a/libavcodec/vaapi_encode.c
+++ b/libavcodec/vaapi_encode.c
@@ -158,16 +158,10 @@ static int vaapi_encode_issue(AVCodecContext *avctx,
         av_log(avctx, AV_LOG_DEBUG, ".\n");
     }
 
-    av_assert0(pic->input_available && !pic->encode_issued);
+    av_assert0(!pic->encode_issued);
     for (i = 0; i < pic->nb_refs; i++) {
         av_assert0(pic->refs[i]);
-        // If we are serialised then the references must have already
-        // completed.  If not, they must have been issued but need not
-        // have completed yet.
-        if (ctx->issue_mode == ISSUE_MODE_SERIALISE_EVERYTHING)
-            av_assert0(pic->refs[i]->encode_complete);
-        else
-            av_assert0(pic->refs[i]->encode_issued);
+        av_assert0(pic->refs[i]->encode_issued);
     }
 
     av_log(avctx, AV_LOG_DEBUG, "Input surface is %#x.\n", pic->input_surface);
@@ -319,16 +313,60 @@ static int vaapi_encode_issue(AVCodecContext *avctx,
         }
     }
 
+    if (pic->nb_slices == 0)
+        pic->nb_slices = ctx->nb_slices;
     if (pic->nb_slices > 0) {
+        int rounding;
+
         pic->slices = av_mallocz_array(pic->nb_slices, sizeof(*pic->slices));
         if (!pic->slices) {
             err = AVERROR(ENOMEM);
             goto fail;
         }
+
+        for (i = 0; i < pic->nb_slices; i++)
+            pic->slices[i].row_size = ctx->slice_size;
+
+        rounding = ctx->slice_block_rows - ctx->nb_slices * ctx->slice_size;
+        if (rounding > 0) {
+            // Place rounding error at top and bottom of frame.
+            av_assert0(rounding < pic->nb_slices);
+            // Some Intel drivers contain a bug where the encoder will fail
+            // if the last slice is smaller than the one before it.  Since
+            // that's straightforward to avoid here, just do so.
+            if (rounding <= 2) {
+                for (i = 0; i < rounding; i++)
+                    ++pic->slices[i].row_size;
+            } else {
+                for (i = 0; i < (rounding + 1) / 2; i++)
+                    ++pic->slices[pic->nb_slices - i - 1].row_size;
+                for (i = 0; i < rounding / 2; i++)
+                    ++pic->slices[i].row_size;
+            }
+        } else if (rounding < 0) {
+            // Remove rounding error from last slice only.
+            av_assert0(rounding < ctx->slice_size);
+            pic->slices[pic->nb_slices - 1].row_size += rounding;
+        }
     }
     for (i = 0; i < pic->nb_slices; i++) {
         slice = &pic->slices[i];
         slice->index = i;
+        if (i == 0) {
+            slice->row_start   = 0;
+            slice->block_start = 0;
+        } else {
+            const VAAPIEncodeSlice *prev = &pic->slices[i - 1];
+            slice->row_start   = prev->row_start   + prev->row_size;
+            slice->block_start = prev->block_start + prev->block_size;
+        }
+        slice->block_size  = slice->row_size * ctx->slice_block_cols;
+
+        av_log(avctx, AV_LOG_DEBUG, "Slice %d: %d-%d (%d rows), "
+               "%d-%d (%d blocks).\n", i, slice->row_start,
+               slice->row_start + slice->row_size - 1, slice->row_size,
+               slice->block_start, slice->block_start + slice->block_size - 1,
+               slice->block_size);
 
         if (ctx->codec->slice_params_size > 0) {
             slice->codec_slice_params = av_mallocz(ctx->codec->slice_params_size);
@@ -422,10 +460,7 @@ static int vaapi_encode_issue(AVCodecContext *avctx,
 
     pic->encode_issued = 1;
 
-    if (ctx->issue_mode == ISSUE_MODE_SERIALISE_EVERYTHING)
-        return vaapi_encode_wait(avctx, pic);
-    else
-        return 0;
+    return 0;
 
 fail_with_picture:
     vaEndPicture(ctx->hwctx->display, ctx->va_context);
@@ -526,14 +561,23 @@ static int vaapi_encode_discard(AVCodecContext *avctx,
     return 0;
 }
 
-static VAAPIEncodePicture *vaapi_encode_alloc(void)
+static VAAPIEncodePicture *vaapi_encode_alloc(AVCodecContext *avctx)
 {
+    VAAPIEncodeContext *ctx = avctx->priv_data;
     VAAPIEncodePicture *pic;
 
     pic = av_mallocz(sizeof(*pic));
     if (!pic)
         return NULL;
 
+    if (ctx->codec->picture_priv_data_size > 0) {
+        pic->priv_data = av_mallocz(ctx->codec->picture_priv_data_size);
+        if (!pic->priv_data) {
+            av_freep(&pic);
+            return NULL;
+        }
+    }
+
     pic->input_surface = VA_INVALID_ID;
     pic->recon_surface = VA_INVALID_ID;
     pic->output_buffer = VA_INVALID_ID;
@@ -573,315 +617,330 @@ static int vaapi_encode_free(AVCodecContext *avctx,
     return 0;
 }
 
-static int vaapi_encode_step(AVCodecContext *avctx,
-                             VAAPIEncodePicture *target)
+static void vaapi_encode_add_ref(AVCodecContext *avctx,
+                                 VAAPIEncodePicture *pic,
+                                 VAAPIEncodePicture *target,
+                                 int is_ref, int in_dpb, int prev)
 {
-    VAAPIEncodeContext *ctx = avctx->priv_data;
-    VAAPIEncodePicture *pic;
-    int i, err;
+    int refs = 0;
 
-    if (ctx->issue_mode == ISSUE_MODE_SERIALISE_EVERYTHING ||
-        ctx->issue_mode == ISSUE_MODE_MINIMISE_LATENCY) {
-        // These two modes are equivalent, except that we wait for
-        // immediate completion on each operation if serialised.
-
-        if (!target) {
-            // No target, nothing to do yet.
-            return 0;
-        }
-
-        if (target->encode_complete) {
-            // Already done.
-            return 0;
-        }
-
-        pic = target;
-        for (i = 0; i < pic->nb_refs; i++) {
-            if (!pic->refs[i]->encode_complete) {
-                err = vaapi_encode_step(avctx, pic->refs[i]);
-                if (err < 0)
-                    return err;
-            }
-        }
-
-        err = vaapi_encode_issue(avctx, pic);
-        if (err < 0)
-            return err;
-
-    } else if (ctx->issue_mode == ISSUE_MODE_MAXIMISE_THROUGHPUT) {
-        int activity;
-
-        // Run through the list of all available pictures repeatedly
-        // and issue the first one found which has all dependencies
-        // available (including previously-issued but not necessarily
-        // completed pictures).
-        do {
-            activity = 0;
-            for (pic = ctx->pic_start; pic; pic = pic->next) {
-                if (!pic->input_available || pic->encode_issued)
-                    continue;
-                for (i = 0; i < pic->nb_refs; i++) {
-                    if (!pic->refs[i]->encode_issued)
-                        break;
-                }
-                if (i < pic->nb_refs)
-                    continue;
-                err = vaapi_encode_issue(avctx, pic);
-                if (err < 0)
-                    return err;
-                activity = 1;
-                // Start again from the beginning of the list,
-                // because issuing this picture may have satisfied
-                // forward dependencies of earlier ones.
-                break;
-            }
-        } while(activity);
+    if (is_ref) {
+        av_assert0(pic != target);
+        av_assert0(pic->nb_refs < MAX_PICTURE_REFERENCES);
+        pic->refs[pic->nb_refs++] = target;
+        ++refs;
+    }
 
-        // If we had a defined target for this step then it will
-        // always have been issued by now.
-        if (target) {
-            av_assert0(target->encode_issued && "broken dependencies?");
-        }
+    if (in_dpb) {
+        av_assert0(pic->nb_dpb_pics < MAX_DPB_SIZE);
+        pic->dpb[pic->nb_dpb_pics++] = target;
+        ++refs;
+    }
 
-    } else {
-        av_assert0(0);
+    if (prev) {
+        av_assert0(!pic->prev);
+        pic->prev = target;
+        ++refs;
     }
 
-    return 0;
+    target->ref_count[0] += refs;
+    target->ref_count[1] += refs;
 }
 
-static int vaapi_encode_get_next(AVCodecContext *avctx,
-                                 VAAPIEncodePicture **pic_out)
+static void vaapi_encode_remove_refs(AVCodecContext *avctx,
+                                     VAAPIEncodePicture *pic,
+                                     int level)
 {
-    VAAPIEncodeContext *ctx = avctx->priv_data;
-    VAAPIEncodePicture *start, *end, *pic;
     int i;
 
-    for (pic = ctx->pic_start; pic; pic = pic->next) {
-        if (pic->next)
-            av_assert0(pic->display_order + 1 == pic->next->display_order);
-        if (pic->display_order == ctx->input_order) {
-            *pic_out = pic;
-            return 0;
-        }
+    if (pic->ref_removed[level])
+        return;
+
+    for (i = 0; i < pic->nb_refs; i++) {
+        av_assert0(pic->refs[i]);
+        --pic->refs[i]->ref_count[level];
+        av_assert0(pic->refs[i]->ref_count[level] >= 0);
     }
 
-    pic = vaapi_encode_alloc();
-    if (!pic)
-        return AVERROR(ENOMEM);
+    for (i = 0; i < pic->nb_dpb_pics; i++) {
+        av_assert0(pic->dpb[i]);
+        --pic->dpb[i]->ref_count[level];
+        av_assert0(pic->dpb[i]->ref_count[level] >= 0);
+    }
 
-    if (ctx->input_order == 0 || ctx->force_idr ||
-        ctx->gop_counter >= ctx->gop_size) {
-        pic->type = PICTURE_TYPE_IDR;
-        ctx->force_idr = 0;
-        ctx->gop_counter = 1;
-        ctx->p_counter = 0;
-    } else if (ctx->p_counter >= ctx->p_per_i) {
-        pic->type = PICTURE_TYPE_I;
-        ++ctx->gop_counter;
-        ctx->p_counter = 0;
-    } else {
-        pic->type = PICTURE_TYPE_P;
-        pic->refs[0] = ctx->pic_end;
-        pic->nb_refs = 1;
-        ++ctx->gop_counter;
-        ++ctx->p_counter;
+    av_assert0(pic->prev || pic->type == PICTURE_TYPE_IDR);
+    if (pic->prev) {
+        --pic->prev->ref_count[level];
+        av_assert0(pic->prev->ref_count[level] >= 0);
     }
-    start = end = pic;
 
-    if (pic->type != PICTURE_TYPE_IDR) {
-        // If that was not an IDR frame, add B-frames display-before and
-        // encode-after it, but not exceeding the GOP size.
+    pic->ref_removed[level] = 1;
+}
 
-        for (i = 0; i < ctx->b_per_p &&
-             ctx->gop_counter < ctx->gop_size; i++) {
-            pic = vaapi_encode_alloc();
-            if (!pic)
-                goto fail;
+static void vaapi_encode_set_b_pictures(AVCodecContext *avctx,
+                                        VAAPIEncodePicture *start,
+                                        VAAPIEncodePicture *end,
+                                        VAAPIEncodePicture *prev,
+                                        int current_depth,
+                                        VAAPIEncodePicture **last)
+{
+    VAAPIEncodeContext *ctx = avctx->priv_data;
+    VAAPIEncodePicture *pic, *next, *ref;
+    int i, len;
 
-            pic->type = PICTURE_TYPE_B;
-            pic->refs[0] = ctx->pic_end;
-            pic->refs[1] = end;
-            pic->nb_refs = 2;
+    av_assert0(start && end && start != end && start->next != end);
 
-            pic->next = start;
-            pic->display_order = ctx->input_order + ctx->b_per_p - i - 1;
-            pic->encode_order  = pic->display_order + 1;
-            start = pic;
+    // If we are at the maximum depth then encode all pictures as
+    // non-referenced B-pictures.  Also do this if there is exactly one
+    // picture left, since there will be nothing to reference it.
+    if (current_depth == ctx->max_b_depth || start->next->next == end) {
+        for (pic = start->next; pic; pic = pic->next) {
+            if (pic == end)
+                break;
+            pic->type    = PICTURE_TYPE_B;
+            pic->b_depth = current_depth;
+
+            vaapi_encode_add_ref(avctx, pic, start, 1, 1, 0);
+            vaapi_encode_add_ref(avctx, pic, end,   1, 1, 0);
+            vaapi_encode_add_ref(avctx, pic, prev,  0, 0, 1);
 
-            ++ctx->gop_counter;
+            for (ref = end->refs[1]; ref; ref = ref->refs[1])
+                vaapi_encode_add_ref(avctx, pic, ref, 0, 1, 0);
         }
-    }
+        *last = prev;
 
-    if (ctx->input_order == 0) {
-        pic->display_order = 0;
-        pic->encode_order  = 0;
+    } else {
+        // Split the current list at the midpoint with a referenced
+        // B-picture, then descend into each side separately.
+        len = 0;
+        for (pic = start->next; pic != end; pic = pic->next)
+            ++len;
+        for (pic = start->next, i = 1; 2 * i < len; pic = pic->next, i++);
 
-        ctx->pic_start = ctx->pic_end = pic;
+        pic->type    = PICTURE_TYPE_B;
+        pic->b_depth = current_depth;
 
-    } else {
-        for (i = 0, pic = start; pic; i++, pic = pic->next) {
-            pic->display_order = ctx->input_order + i;
-            if (end->type == PICTURE_TYPE_IDR)
-                pic->encode_order = ctx->input_order + i;
-            else if (pic == end)
-                pic->encode_order = ctx->input_order;
-            else
-                pic->encode_order = ctx->input_order + i + 1;
-        }
+        pic->is_reference = 1;
 
-        av_assert0(ctx->pic_end);
-        ctx->pic_end->next = start;
-        ctx->pic_end = end;
-    }
-    *pic_out = start;
+        vaapi_encode_add_ref(avctx, pic, pic,   0, 1, 0);
+        vaapi_encode_add_ref(avctx, pic, start, 1, 1, 0);
+        vaapi_encode_add_ref(avctx, pic, end,   1, 1, 0);
+        vaapi_encode_add_ref(avctx, pic, prev,  0, 0, 1);
 
-    av_log(avctx, AV_LOG_DEBUG, "Pictures:");
-    for (pic = ctx->pic_start; pic; pic = pic->next) {
-        av_log(avctx, AV_LOG_DEBUG, " %s (%"PRId64"/%"PRId64")",
-               picture_type_name[pic->type],
-               pic->display_order, pic->encode_order);
-    }
-    av_log(avctx, AV_LOG_DEBUG, "\n");
+        for (ref = end->refs[1]; ref; ref = ref->refs[1])
+            vaapi_encode_add_ref(avctx, pic, ref, 0, 1, 0);
 
-    return 0;
+        if (i > 1)
+            vaapi_encode_set_b_pictures(avctx, start, pic, pic,
+                                        current_depth + 1, &next);
+        else
+            next = pic;
 
-fail:
-    while (start) {
-        pic = start->next;
-        vaapi_encode_free(avctx, start);
-        start = pic;
+        vaapi_encode_set_b_pictures(avctx, pic, end, next,
+                                    current_depth + 1, last);
     }
-    return AVERROR(ENOMEM);
 }
 
-static int vaapi_encode_truncate_gop(AVCodecContext *avctx)
+static int vaapi_encode_pick_next(AVCodecContext *avctx,
+                                  VAAPIEncodePicture **pic_out)
 {
     VAAPIEncodeContext *ctx = avctx->priv_data;
-    VAAPIEncodePicture *pic, *last_pic, *next;
+    VAAPIEncodePicture *pic = NULL, *next, *start;
+    int i, b_counter, closed_gop_end;
 
-    av_assert0(!ctx->pic_start || ctx->pic_start->input_available);
-
-    // Find the last picture we actually have input for.
+    // If there are any B-frames already queued, the next one to encode
+    // is the earliest not-yet-issued frame for which all references are
+    // available.
     for (pic = ctx->pic_start; pic; pic = pic->next) {
-        if (!pic->input_available)
+        if (pic->encode_issued)
+            continue;
+        if (pic->type != PICTURE_TYPE_B)
+            continue;
+        for (i = 0; i < pic->nb_refs; i++) {
+            if (!pic->refs[i]->encode_issued)
+                break;
+        }
+        if (i == pic->nb_refs)
             break;
-        last_pic = pic;
     }
 
     if (pic) {
-        if (last_pic->type == PICTURE_TYPE_B) {
-            // Some fixing up is required.  Change the type of this
-            // picture to P, then modify preceding B references which
-            // point beyond it to point at it instead.
-
-            last_pic->type = PICTURE_TYPE_P;
-            last_pic->encode_order = last_pic->refs[1]->encode_order;
-
-            for (pic = ctx->pic_start; pic != last_pic; pic = pic->next) {
-                if (pic->type == PICTURE_TYPE_B &&
-                    pic->refs[1] == last_pic->refs[1])
-                    pic->refs[1] = last_pic;
-            }
+        av_log(avctx, AV_LOG_DEBUG, "Pick B-picture at depth %d to "
+               "encode next.\n", pic->b_depth);
+        *pic_out = pic;
+        return 0;
+    }
 
-            last_pic->nb_refs = 1;
-            last_pic->refs[1] = NULL;
-        } else {
-            // We can use the current structure (no references point
-            // beyond the end), but there are unused pics to discard.
+    // Find the B-per-Pth available picture to become the next picture
+    // on the top layer.
+    start = NULL;
+    b_counter = 0;
+    closed_gop_end = ctx->closed_gop ||
+                     ctx->idr_counter == ctx->gop_per_idr;
+    for (pic = ctx->pic_start; pic; pic = next) {
+        next = pic->next;
+        if (pic->encode_issued) {
+            start = pic;
+            continue;
         }
+        // If the next available picture is force-IDR, encode it to start
+        // a new GOP immediately.
+        if (pic->force_idr)
+            break;
+        if (b_counter == ctx->b_per_p)
+            break;
+        // If this picture ends a closed GOP or starts a new GOP then it
+        // needs to be in the top layer.
+        if (ctx->gop_counter + b_counter + closed_gop_end >= ctx->gop_size)
+            break;
+        // If the picture after this one is force-IDR, we need to encode
+        // this one in the top layer.
+        if (next && next->force_idr)
+            break;
+        ++b_counter;
+    }
 
-        // Discard all following pics, they will never be used.
-        for (pic = last_pic->next; pic; pic = next) {
-            next = pic->next;
-            vaapi_encode_free(avctx, pic);
-        }
+    // At the end of the stream the last picture must be in the top layer.
+    if (!pic && ctx->end_of_stream) {
+        --b_counter;
+        pic = ctx->pic_end;
+        if (pic->encode_issued)
+            return AVERROR_EOF;
+    }
 
-        last_pic->next = NULL;
-        ctx->pic_end = last_pic;
+    if (!pic) {
+        av_log(avctx, AV_LOG_DEBUG, "Pick nothing to encode next - "
+               "need more input for reference pictures.\n");
+        return AVERROR(EAGAIN);
+    }
+    if (ctx->input_order <= ctx->decode_delay && !ctx->end_of_stream) {
+        av_log(avctx, AV_LOG_DEBUG, "Pick nothing to encode next - "
+               "need more input for timestamps.\n");
+        return AVERROR(EAGAIN);
+    }
+
+    if (pic->force_idr) {
+        av_log(avctx, AV_LOG_DEBUG, "Pick forced IDR-picture to "
+               "encode next.\n");
+        pic->type = PICTURE_TYPE_IDR;
+        ctx->idr_counter = 1;
+        ctx->gop_counter = 1;
+
+    } else if (ctx->gop_counter + b_counter >= ctx->gop_size) {
+        if (ctx->idr_counter == ctx->gop_per_idr) {
+            av_log(avctx, AV_LOG_DEBUG, "Pick new-GOP IDR-picture to "
+                   "encode next.\n");
+            pic->type = PICTURE_TYPE_IDR;
+            ctx->idr_counter = 1;
+        } else {
+            av_log(avctx, AV_LOG_DEBUG, "Pick new-GOP I-picture to "
+                   "encode next.\n");
+            pic->type = PICTURE_TYPE_I;
+            ++ctx->idr_counter;
+        }
+        ctx->gop_counter = 1;
 
     } else {
-        // Input is available for all pictures, so we don't need to
-        // mangle anything.
+        if (ctx->gop_counter + b_counter + closed_gop_end == ctx->gop_size) {
+            av_log(avctx, AV_LOG_DEBUG, "Pick group-end P-picture to "
+                   "encode next.\n");
+        } else {
+            av_log(avctx, AV_LOG_DEBUG, "Pick normal P-picture to "
+                   "encode next.\n");
+        }
+        pic->type = PICTURE_TYPE_P;
+        av_assert0(start);
+        ctx->gop_counter += 1 + b_counter;
     }
+    pic->is_reference = 1;
+    *pic_out = pic;
 
-    av_log(avctx, AV_LOG_DEBUG, "Pictures ending truncated GOP:");
-    for (pic = ctx->pic_start; pic; pic = pic->next) {
-        av_log(avctx, AV_LOG_DEBUG, " %s (%"PRId64"/%"PRId64")",
-               picture_type_name[pic->type],
-               pic->display_order, pic->encode_order);
+    vaapi_encode_add_ref(avctx, pic, pic, 0, 1, 0);
+    if (pic->type != PICTURE_TYPE_IDR) {
+        vaapi_encode_add_ref(avctx, pic, start,
+                             pic->type == PICTURE_TYPE_P,
+                             b_counter > 0, 0);
+        vaapi_encode_add_ref(avctx, pic, ctx->next_prev, 0, 0, 1);
     }
-    av_log(avctx, AV_LOG_DEBUG, "\n");
+    if (ctx->next_prev)
+        --ctx->next_prev->ref_count[0];
 
+    if (b_counter > 0) {
+        vaapi_encode_set_b_pictures(avctx, start, pic, pic, 1,
+                                    &ctx->next_prev);
+    } else {
+        ctx->next_prev = pic;
+    }
+    ++ctx->next_prev->ref_count[0];
     return 0;
 }
 
 static int vaapi_encode_clear_old(AVCodecContext *avctx)
 {
     VAAPIEncodeContext *ctx = avctx->priv_data;
-    VAAPIEncodePicture *pic, *old;
-    int i;
+    VAAPIEncodePicture *pic, *prev, *next;
 
-    while (ctx->pic_start != ctx->pic_end) {
-        old = ctx->pic_start;
-        if (old->encode_order > ctx->output_order)
-            break;
+    av_assert0(ctx->pic_start);
 
-        for (pic = old->next; pic; pic = pic->next) {
-            if (pic->encode_complete)
-                continue;
-            for (i = 0; i < pic->nb_refs; i++) {
-                if (pic->refs[i] == old) {
-                    // We still need this picture because it's referred to
-                    // directly by a later one, so it and all following
-                    // pictures have to stay.
-                    return 0;
-                }
-            }
-        }
+    // Remove direct references once each picture is complete.
+    for (pic = ctx->pic_start; pic; pic = pic->next) {
+        if (pic->encode_complete && pic->next)
+            vaapi_encode_remove_refs(avctx, pic, 0);
+    }
 
-        pic = ctx->pic_start;
-        ctx->pic_start = pic->next;
-        vaapi_encode_free(avctx, pic);
+    // Remove indirect references once a picture has no direct references.
+    for (pic = ctx->pic_start; pic; pic = pic->next) {
+        if (pic->encode_complete && pic->ref_count[0] == 0)
+            vaapi_encode_remove_refs(avctx, pic, 1);
+    }
+
+    // Clear out all complete pictures with no remaining references.
+    prev = NULL;
+    for (pic = ctx->pic_start; pic; pic = next) {
+        next = pic->next;
+        if (pic->encode_complete && pic->ref_count[1] == 0) {
+            av_assert0(pic->ref_removed[0] && pic->ref_removed[1]);
+            if (prev)
+                prev->next = next;
+            else
+                ctx->pic_start = next;
+            vaapi_encode_free(avctx, pic);
+        } else {
+            prev = pic;
+        }
     }
 
     return 0;
 }
 
-int ff_vaapi_encode2(AVCodecContext *avctx, AVPacket *pkt,
-                     const AVFrame *input_image, int *got_packet)
+int ff_vaapi_encode_send_frame(AVCodecContext *avctx, const AVFrame *frame)
 {
     VAAPIEncodeContext *ctx = avctx->priv_data;
     VAAPIEncodePicture *pic;
     int err;
 
-    if (input_image) {
-        av_log(avctx, AV_LOG_DEBUG, "Encode frame: %ux%u (%"PRId64").\n",
-               input_image->width, input_image->height, input_image->pts);
+    if (frame) {
+        av_log(avctx, AV_LOG_DEBUG, "Input frame: %ux%u (%"PRId64").\n",
+               frame->width, frame->height, frame->pts);
 
-        if (input_image->pict_type == AV_PICTURE_TYPE_I) {
-            err = vaapi_encode_truncate_gop(avctx);
-            if (err < 0)
-                goto fail;
-            ctx->force_idr = 1;
-        }
-
-        err = vaapi_encode_get_next(avctx, &pic);
-        if (err) {
-            av_log(avctx, AV_LOG_ERROR, "Input setup failed: %d.\n", err);
-            return err;
-        }
+        pic = vaapi_encode_alloc(avctx);
+        if (!pic)
+            return AVERROR(ENOMEM);
 
         pic->input_image = av_frame_alloc();
         if (!pic->input_image) {
             err = AVERROR(ENOMEM);
             goto fail;
         }
-        err = av_frame_ref(pic->input_image, input_image);
+        err = av_frame_ref(pic->input_image, frame);
         if (err < 0)
             goto fail;
-        pic->input_surface = (VASurfaceID)(uintptr_t)input_image->data[3];
-        pic->pts = input_image->pts;
+
+        if (ctx->input_order == 0)
+            pic->force_idr = 1;
+
+        pic->input_surface = (VASurfaceID)(uintptr_t)frame->data[3];
+        pic->pts = frame->pts;
 
         if (ctx->input_order == 0)
             ctx->first_pts = pic->pts;
@@ -890,72 +949,89 @@ int ff_vaapi_encode2(AVCodecContext *avctx, AVPacket *pkt,
         if (ctx->output_delay > 0)
             ctx->ts_ring[ctx->input_order % (3 * ctx->output_delay)] = pic->pts;
 
-        pic->input_available = 1;
+        pic->display_order = ctx->input_order;
+        ++ctx->input_order;
 
-    } else {
-        if (!ctx->end_of_stream) {
-            err = vaapi_encode_truncate_gop(avctx);
-            if (err < 0)
-                goto fail;
-            ctx->end_of_stream = 1;
+        if (ctx->pic_start) {
+            ctx->pic_end->next = pic;
+            ctx->pic_end       = pic;
+        } else {
+            ctx->pic_start     = pic;
+            ctx->pic_end       = pic;
         }
+
+    } else {
+        ctx->end_of_stream = 1;
+
+        // Fix timestamps if we hit end-of-stream before the initial decode
+        // delay has elapsed.
+        if (ctx->input_order < ctx->decode_delay)
+            ctx->dts_pts_diff = ctx->pic_end->pts - ctx->first_pts;
     }
 
-    ++ctx->input_order;
-    ++ctx->output_order;
-    av_assert0(ctx->output_order + ctx->output_delay + 1 == ctx->input_order);
+    return 0;
 
-    for (pic = ctx->pic_start; pic; pic = pic->next)
-        if (pic->encode_order == ctx->output_order)
-            break;
+fail:
+    return err;
+}
 
-    // pic can be null here if we don't have a specific target in this
-    // iteration.  We might still issue encodes if things can be overlapped,
-    // even though we don't intend to output anything.
+int ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
+{
+    VAAPIEncodeContext *ctx = avctx->priv_data;
+    VAAPIEncodePicture *pic;
+    int err;
 
-    err = vaapi_encode_step(avctx, pic);
-    if (err < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
-        goto fail;
+    if (!ctx->pic_start) {
+        if (ctx->end_of_stream)
+            return AVERROR_EOF;
+        else
+            return AVERROR(EAGAIN);
     }
 
-    if (!pic) {
-        *got_packet = 0;
-    } else {
-        err = vaapi_encode_output(avctx, pic, pkt);
-        if (err < 0) {
-            av_log(avctx, AV_LOG_ERROR, "Output failed: %d.\n", err);
-            goto fail;
-        }
+    pic = NULL;
+    err = vaapi_encode_pick_next(avctx, &pic);
+    if (err < 0)
+        return err;
+    av_assert0(pic);
 
-        if (ctx->output_delay == 0) {
-            pkt->dts = pkt->pts;
-        } else if (ctx->output_order < ctx->decode_delay) {
-            if (ctx->ts_ring[ctx->output_order] < INT64_MIN + ctx->dts_pts_diff)
-                pkt->dts = INT64_MIN;
-            else
-                pkt->dts = ctx->ts_ring[ctx->output_order] - ctx->dts_pts_diff;
-        } else {
-            pkt->dts = ctx->ts_ring[(ctx->output_order - ctx->decode_delay) %
-                                    (3 * ctx->output_delay)];
-        }
+    pic->encode_order = ctx->encode_order++;
 
-        *got_packet = 1;
+    err = vaapi_encode_issue(avctx, pic);
+    if (err < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
+        return err;
     }
 
-    err = vaapi_encode_clear_old(avctx);
+    err = vaapi_encode_output(avctx, pic, pkt);
     if (err < 0) {
-        av_log(avctx, AV_LOG_ERROR, "List clearing failed: %d.\n", err);
-        goto fail;
+        av_log(avctx, AV_LOG_ERROR, "Output failed: %d.\n", err);
+        return err;
     }
 
+    if (ctx->output_delay == 0) {
+        pkt->dts = pkt->pts;
+    } else if (pic->encode_order < ctx->decode_delay) {
+        if (ctx->ts_ring[pic->encode_order] < INT64_MIN + ctx->dts_pts_diff)
+            pkt->dts = INT64_MIN;
+        else
+            pkt->dts = ctx->ts_ring[pic->encode_order] - ctx->dts_pts_diff;
+    } else {
+        pkt->dts = ctx->ts_ring[(pic->encode_order - ctx->decode_delay) %
+                                (3 * ctx->output_delay)];
+    }
+    av_log(avctx, AV_LOG_DEBUG, "Output packet: pts %"PRId64" dts %"PRId64".\n",
+           pkt->pts, pkt->dts);
+
+    ctx->output_order = pic->encode_order;
+    vaapi_encode_clear_old(avctx);
+
     return 0;
+}
 
-fail:
-    // Unclear what to clean up on failure.  There are probably some things we
-    // could do usefully clean up here, but for now just leave them for uninit()
-    // to do instead.
-    return err;
+int ff_vaapi_encode2(AVCodecContext *avctx, AVPacket *pkt,
+                     const AVFrame *input_image, int *got_packet)
+{
+    return AVERROR(ENOSYS);
 }
 
 static av_cold void vaapi_encode_add_global_param(AVCodecContext *avctx,
@@ -1091,8 +1167,9 @@ static av_cold int vaapi_encode_profile_entrypoint(AVCodecContext *avctx)
                 break;
         }
         if (j >= n) {
-            av_log(avctx, AV_LOG_VERBOSE, "Matching profile %d is "
-                   "not supported by driver.\n", profile->va_profile);
+            av_log(avctx, AV_LOG_VERBOSE, "Compatible profile %s (%d) "
+                   "is not supported by driver.\n", profile_string,
+                   profile->va_profile);
             continue;
         }
 
@@ -1206,17 +1283,42 @@ static av_cold int vaapi_encode_profile_entrypoint(AVCodecContext *avctx)
     return err;
 }
 
+static const VAAPIEncodeRCMode vaapi_encode_rc_modes[] = {
+    //                                  Bitrate   Quality
+    //                                     | Maxrate | HRD/VBV
+    { 0 }, //                              |    |    |    |
+    { RC_MODE_CQP,  "CQP",  1, VA_RC_CQP,  0,   0,   1,   0 },
+    { RC_MODE_CBR,  "CBR",  1, VA_RC_CBR,  1,   0,   0,   1 },
+    { RC_MODE_VBR,  "VBR",  1, VA_RC_VBR,  1,   1,   0,   1 },
+#if VA_CHECK_VERSION(1, 1, 0)
+    { RC_MODE_ICQ,  "ICQ",  1, VA_RC_ICQ,  0,   0,   1,   0 },
+#else
+    { RC_MODE_ICQ,  "ICQ",  0 },
+#endif
+#if VA_CHECK_VERSION(1, 3, 0)
+    { RC_MODE_QVBR, "QVBR", 1, VA_RC_QVBR, 1,   1,   1,   1 },
+    { RC_MODE_AVBR, "AVBR", 0, VA_RC_AVBR, 1,   0,   0,   0 },
+#else
+    { RC_MODE_QVBR, "QVBR", 0 },
+    { RC_MODE_AVBR, "AVBR", 0 },
+#endif
+};
+
 static av_cold int vaapi_encode_init_rate_control(AVCodecContext *avctx)
 {
     VAAPIEncodeContext *ctx = avctx->priv_data;
+    uint32_t supported_va_rc_modes;
+    const VAAPIEncodeRCMode *rc_mode;
     int64_t rc_bits_per_second;
     int     rc_target_percentage;
     int     rc_window_size;
+    int     rc_quality;
     int64_t hrd_buffer_size;
     int64_t hrd_initial_buffer_fullness;
     int fr_num, fr_den;
     VAConfigAttrib rc_attr = { VAConfigAttribRateControl };
     VAStatus vas;
+    char supported_rc_modes_string[64];
 
     vas = vaGetConfigAttributes(ctx->hwctx->display,
                                 ctx->va_profile, ctx->va_entrypoint,
@@ -1226,118 +1328,213 @@ static av_cold int vaapi_encode_init_rate_control(AVCodecContext *avctx)
                "config attribute: %d (%s).\n", vas, vaErrorStr(vas));
         return AVERROR_EXTERNAL;
     }
-
     if (rc_attr.value == VA_ATTRIB_NOT_SUPPORTED) {
         av_log(avctx, AV_LOG_VERBOSE, "Driver does not report any "
-               "supported rate control modes: assuming constant-quality.\n");
-        ctx->va_rc_mode = VA_RC_CQP;
-        return 0;
-    }
-    if (avctx->flags & AV_CODEC_FLAG_QSCALE ||
-        avctx->bit_rate <= 0) {
-        if (rc_attr.value & VA_RC_CQP) {
-            av_log(avctx, AV_LOG_VERBOSE, "Using constant-quality mode.\n");
-            ctx->va_rc_mode = VA_RC_CQP;
-            if (avctx->bit_rate > 0 || avctx->rc_max_rate > 0) {
-                av_log(avctx, AV_LOG_WARNING, "Bitrate target parameters "
-                       "ignored in constant-quality mode.\n");
+               "supported rate control modes: assuming CQP only.\n");
+        supported_va_rc_modes = VA_RC_CQP;
+        strcpy(supported_rc_modes_string, "unknown");
+    } else {
+        char *str = supported_rc_modes_string;
+        size_t len = sizeof(supported_rc_modes_string);
+        int i, first = 1, res;
+
+        supported_va_rc_modes = rc_attr.value;
+        for (i = 0; i < FF_ARRAY_ELEMS(vaapi_encode_rc_modes); i++) {
+            rc_mode = &vaapi_encode_rc_modes[i];
+            if (supported_va_rc_modes & rc_mode->va_mode) {
+                res = snprintf(str, len, "%s%s",
+                               first ? "" : ", ", rc_mode->name);
+                first = 0;
+                if (res < 0) {
+                    *str = 0;
+                    break;
+                }
+                len -= res;
+                str += res;
+                if (len == 0)
+                    break;
             }
-            return 0;
-        } else {
-            av_log(avctx, AV_LOG_ERROR, "Driver does not support "
-                   "constant-quality mode (%#x).\n", rc_attr.value);
-            return AVERROR(EINVAL);
         }
-    }
 
-    if (!(rc_attr.value & (VA_RC_CBR | VA_RC_VBR))) {
-        av_log(avctx, AV_LOG_ERROR, "Driver does not support any "
-               "bitrate-targetted rate control modes.\n");
-        return AVERROR(EINVAL);
+        av_log(avctx, AV_LOG_DEBUG, "Driver supports RC modes %s.\n",
+               supported_rc_modes_string);
+    }
+
+    // Rate control mode selection:
+    // * If the user has set a mode explicitly with the rc_mode option,
+    //   use it and fail if it is not available.
+    // * If an explicit QP option has been set, use CQP.
+    // * If the codec is CQ-only, use CQP.
+    // * If the QSCALE avcodec option is set, use CQP.
+    // * If bitrate and quality are both set, try QVBR.
+    // * If quality is set, try ICQ, then CQP.
+    // * If bitrate and maxrate are set and have the same value, try CBR.
+    // * If a bitrate is set, try AVBR, then VBR, then CBR.
+    // * If no bitrate is set, try ICQ, then CQP.
+
+#define TRY_RC_MODE(mode, fail) do { \
+        rc_mode = &vaapi_encode_rc_modes[mode]; \
+        if (!(rc_mode->va_mode & supported_va_rc_modes)) { \
+            if (fail) { \
+                av_log(avctx, AV_LOG_ERROR, "Driver does not support %s " \
+                       "RC mode (supported modes: %s).\n", rc_mode->name, \
+                       supported_rc_modes_string); \
+                return AVERROR(EINVAL); \
+            } \
+            av_log(avctx, AV_LOG_DEBUG, "Driver does not support %s " \
+                   "RC mode.\n", rc_mode->name); \
+            rc_mode = NULL; \
+        } else { \
+            goto rc_mode_found; \
+        } \
+    } while (0)
+
+    if (ctx->explicit_rc_mode)
+        TRY_RC_MODE(ctx->explicit_rc_mode, 1);
+
+    if (ctx->explicit_qp)
+        TRY_RC_MODE(RC_MODE_CQP, 1);
+
+    if (ctx->codec->flags & FLAG_CONSTANT_QUALITY_ONLY)
+        TRY_RC_MODE(RC_MODE_CQP, 1);
+
+    if (avctx->flags & AV_CODEC_FLAG_QSCALE)
+        TRY_RC_MODE(RC_MODE_CQP, 1);
+
+    if (avctx->bit_rate > 0 && avctx->global_quality > 0)
+        TRY_RC_MODE(RC_MODE_QVBR, 0);
+
+    if (avctx->global_quality > 0) {
+        TRY_RC_MODE(RC_MODE_ICQ, 0);
+        TRY_RC_MODE(RC_MODE_CQP, 0);
+    }
+
+    if (avctx->bit_rate > 0 && avctx->rc_max_rate == avctx->bit_rate)
+        TRY_RC_MODE(RC_MODE_CBR, 0);
+
+    if (avctx->bit_rate > 0) {
+        TRY_RC_MODE(RC_MODE_AVBR, 0);
+        TRY_RC_MODE(RC_MODE_VBR, 0);
+        TRY_RC_MODE(RC_MODE_CBR, 0);
+    } else {
+        TRY_RC_MODE(RC_MODE_ICQ, 0);
+        TRY_RC_MODE(RC_MODE_CQP, 0);
     }
 
-    if (avctx->rc_buffer_size)
-        hrd_buffer_size = avctx->rc_buffer_size;
-    else if (avctx->rc_max_rate > 0)
-        hrd_buffer_size = avctx->rc_max_rate;
-    else
-        hrd_buffer_size = avctx->bit_rate;
-    if (avctx->rc_initial_buffer_occupancy) {
-        if (avctx->rc_initial_buffer_occupancy > hrd_buffer_size) {
-            av_log(avctx, AV_LOG_ERROR, "Invalid RC buffer settings: "
-                   "must have initial buffer size (%d) < "
-                   "buffer size (%"PRId64").\n",
-                   avctx->rc_initial_buffer_occupancy, hrd_buffer_size);
+    av_log(avctx, AV_LOG_ERROR, "Driver does not support any "
+           "RC mode compatible with selected options "
+           "(supported modes: %s).\n", supported_rc_modes_string);
+    return AVERROR(EINVAL);
+
+rc_mode_found:
+    if (rc_mode->bitrate) {
+        if (avctx->bit_rate <= 0) {
+            av_log(avctx, AV_LOG_ERROR, "Bitrate must be set for %s "
+                   "RC mode.\n", rc_mode->name);
             return AVERROR(EINVAL);
         }
-        hrd_initial_buffer_fullness = avctx->rc_initial_buffer_occupancy;
-    } else {
-        hrd_initial_buffer_fullness = hrd_buffer_size * 3 / 4;
-    }
 
-    if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
-        av_log(avctx, AV_LOG_ERROR, "Invalid bitrate settings: must have "
-               "bitrate (%"PRId64") <= maxrate (%"PRId64").\n",
-               avctx->bit_rate, avctx->rc_max_rate);
-        return AVERROR(EINVAL);
-    }
+        if (rc_mode->mode == RC_MODE_AVBR) {
+            // For maximum confusion AVBR is hacked into the existing API
+            // by overloading some of the fields with completely different
+            // meanings.
 
-    if (avctx->rc_max_rate > avctx->bit_rate) {
-        if (!(rc_attr.value & VA_RC_VBR)) {
-            av_log(avctx, AV_LOG_WARNING, "Driver does not support "
-                   "VBR mode (%#x), using CBR mode instead.\n",
-                   rc_attr.value);
-            ctx->va_rc_mode = VA_RC_CBR;
+            // Target percentage does not apply in AVBR mode.
+            rc_bits_per_second = avctx->bit_rate;
 
-            rc_bits_per_second   = avctx->bit_rate;
+            // Accuracy tolerance range for meeting the specified target
+            // bitrate.  It's very unclear how this is actually intended
+            // to work - since we do want to get the specified bitrate,
+            // set the accuracy to 100% for now.
             rc_target_percentage = 100;
-        } else {
-            ctx->va_rc_mode = VA_RC_VBR;
-
-            rc_bits_per_second   = avctx->rc_max_rate;
-            rc_target_percentage = (avctx->bit_rate * 100) /
-                                   avctx->rc_max_rate;
-        }
 
-    } else if (avctx->rc_max_rate == avctx->bit_rate) {
-        if (!(rc_attr.value & VA_RC_CBR)) {
-            av_log(avctx, AV_LOG_WARNING, "Driver does not support "
-                   "CBR mode (%#x), using VBR mode instead.\n",
-                   rc_attr.value);
-            ctx->va_rc_mode = VA_RC_VBR;
+            // Convergence period in frames.  The GOP size reflects the
+            // user's intended block size for cutting, so reusing that
+            // as the convergence period seems a reasonable default.
+            rc_window_size = avctx->gop_size > 0 ? avctx->gop_size : 60;
+
+        } else if (rc_mode->maxrate) {
+            if (avctx->rc_max_rate > 0) {
+                if (avctx->rc_max_rate < avctx->bit_rate) {
+                    av_log(avctx, AV_LOG_ERROR, "Invalid bitrate settings: "
+                           "bitrate (%"PRId64") must not be greater than "
+                           "maxrate (%"PRId64").\n", avctx->bit_rate,
+                           avctx->rc_max_rate);
+                    return AVERROR(EINVAL);
+                }
+                rc_bits_per_second   = avctx->rc_max_rate;
+                rc_target_percentage = (avctx->bit_rate * 100) /
+                                       avctx->rc_max_rate;
+            } else {
+                // We only have a target bitrate, but this mode requires
+                // that a maximum rate be supplied as well.  Since the
+                // user does not want this to be a constraint, arbitrarily
+                // pick a maximum rate of double the target rate.
+                rc_bits_per_second   = 2 * avctx->bit_rate;
+                rc_target_percentage = 50;
+            }
         } else {
-            ctx->va_rc_mode = VA_RC_CBR;
+            if (avctx->rc_max_rate > avctx->bit_rate) {
+                av_log(avctx, AV_LOG_WARNING, "Max bitrate is ignored "
+                       "in %s RC mode.\n", rc_mode->name);
+            }
+            rc_bits_per_second   = avctx->bit_rate;
+            rc_target_percentage = 100;
         }
-
-        rc_bits_per_second   = avctx->bit_rate;
+    } else {
+        rc_bits_per_second   = 0;
         rc_target_percentage = 100;
+    }
 
+    if (rc_mode->quality) {
+        if (ctx->explicit_qp) {
+            rc_quality = ctx->explicit_qp;
+        } else if (avctx->global_quality > 0) {
+            rc_quality = avctx->global_quality;
+        } else {
+            rc_quality = ctx->codec->default_quality;
+            av_log(avctx, AV_LOG_WARNING, "No quality level set; "
+                   "using default (%d).\n", rc_quality);
+        }
     } else {
-        if (rc_attr.value & VA_RC_VBR) {
-            ctx->va_rc_mode = VA_RC_VBR;
-
-            // We only have a target bitrate, but VAAPI requires that a
-            // maximum rate be supplied as well.  Since the user has
-            // offered no particular constraint, arbitrarily pick a
-            // maximum rate of double the target rate.
-            rc_bits_per_second   = 2 * avctx->bit_rate;
-            rc_target_percentage = 50;
+        rc_quality = 0;
+    }
+
+    if (rc_mode->hrd) {
+        if (avctx->rc_buffer_size)
+            hrd_buffer_size = avctx->rc_buffer_size;
+        else if (avctx->rc_max_rate > 0)
+            hrd_buffer_size = avctx->rc_max_rate;
+        else
+            hrd_buffer_size = avctx->bit_rate;
+        if (avctx->rc_initial_buffer_occupancy) {
+            if (avctx->rc_initial_buffer_occupancy > hrd_buffer_size) {
+                av_log(avctx, AV_LOG_ERROR, "Invalid RC buffer settings: "
+                       "must have initial buffer size (%d) <= "
+                       "buffer size (%"PRId64").\n",
+                       avctx->rc_initial_buffer_occupancy, hrd_buffer_size);
+                return AVERROR(EINVAL);
+            }
+            hrd_initial_buffer_fullness = avctx->rc_initial_buffer_occupancy;
         } else {
-            ctx->va_rc_mode = VA_RC_CBR;
+            hrd_initial_buffer_fullness = hrd_buffer_size * 3 / 4;
+        }
 
-            rc_bits_per_second   = avctx->bit_rate;
-            rc_target_percentage = 100;
+        rc_window_size = (hrd_buffer_size * 1000) / rc_bits_per_second;
+    } else {
+        if (avctx->rc_buffer_size || avctx->rc_initial_buffer_occupancy) {
+            av_log(avctx, AV_LOG_WARNING, "Buffering settings are ignored "
+                   "in %s RC mode.\n", rc_mode->name);
         }
-    }
 
-    rc_window_size = (hrd_buffer_size * 1000) / rc_bits_per_second;
+        hrd_buffer_size             = 0;
+        hrd_initial_buffer_fullness = 0;
 
-    av_log(avctx, AV_LOG_VERBOSE, "RC mode: %s, %d%% of %"PRId64" bps "
-           "over %d ms.\n", ctx->va_rc_mode == VA_RC_VBR ? "VBR" : "CBR",
-           rc_target_percentage, rc_bits_per_second, rc_window_size);
-    av_log(avctx, AV_LOG_VERBOSE, "RC buffer: %"PRId64" bits, "
-           "initial fullness %"PRId64" bits.\n",
-           hrd_buffer_size, hrd_initial_buffer_fullness);
+        if (rc_mode->mode != RC_MODE_AVBR) {
+            // Already set (with completely different meaning) for AVBR.
+            rc_window_size = 1000;
+        }
+    }
 
     if (rc_bits_per_second          > UINT32_MAX ||
         hrd_buffer_size             > UINT32_MAX ||
@@ -1347,36 +1544,70 @@ static av_cold int vaapi_encode_init_rate_control(AVCodecContext *avctx)
         return AVERROR(EINVAL);
     }
 
+    ctx->rc_mode     = rc_mode;
+    ctx->rc_quality  = rc_quality;
+    ctx->va_rc_mode  = rc_mode->va_mode;
     ctx->va_bit_rate = rc_bits_per_second;
 
-    ctx->config_attributes[ctx->nb_config_attributes++] =
-        (VAConfigAttrib) {
-        .type  = VAConfigAttribRateControl,
-        .value = ctx->va_rc_mode,
-    };
+    av_log(avctx, AV_LOG_VERBOSE, "RC mode: %s.\n", rc_mode->name);
+    if (rc_attr.value == VA_ATTRIB_NOT_SUPPORTED) {
+        // This driver does not want the RC mode attribute to be set.
+    } else {
+        ctx->config_attributes[ctx->nb_config_attributes++] =
+            (VAConfigAttrib) {
+            .type  = VAConfigAttribRateControl,
+            .value = ctx->va_rc_mode,
+        };
+    }
 
-    ctx->rc_params.misc.type = VAEncMiscParameterTypeRateControl;
-    ctx->rc_params.rc = (VAEncMiscParameterRateControl) {
-        .bits_per_second   = rc_bits_per_second,
-        .target_percentage = rc_target_percentage,
-        .window_size       = rc_window_size,
-        .initial_qp        = 0,
-        .min_qp            = (avctx->qmin > 0 ? avctx->qmin : 0),
-        .basic_unit_size   = 0,
+    if (rc_mode->quality)
+        av_log(avctx, AV_LOG_VERBOSE, "RC quality: %d.\n", rc_quality);
+
+    if (rc_mode->va_mode != VA_RC_CQP) {
+        if (rc_mode->mode == RC_MODE_AVBR) {
+            av_log(avctx, AV_LOG_VERBOSE, "RC target: %"PRId64" bps "
+                   "converging in %d frames with %d%% accuracy.\n",
+                   rc_bits_per_second, rc_window_size,
+                   rc_target_percentage);
+        } else if (rc_mode->bitrate) {
+            av_log(avctx, AV_LOG_VERBOSE, "RC target: %d%% of "
+                   "%"PRId64" bps over %d ms.\n", rc_target_percentage,
+                   rc_bits_per_second, rc_window_size);
+        }
+
+        ctx->rc_params.misc.type = VAEncMiscParameterTypeRateControl;
+        ctx->rc_params.rc = (VAEncMiscParameterRateControl) {
+            .bits_per_second    = rc_bits_per_second,
+            .target_percentage  = rc_target_percentage,
+            .window_size        = rc_window_size,
+            .initial_qp         = 0,
+            .min_qp             = (avctx->qmin > 0 ? avctx->qmin : 0),
+            .basic_unit_size    = 0,
 #if VA_CHECK_VERSION(1, 1, 0)
-        .max_qp            = (avctx->qmax > 0 ? avctx->qmax : 0),
+            .ICQ_quality_factor = av_clip(rc_quality, 1, 51),
+            .max_qp             = (avctx->qmax > 0 ? avctx->qmax : 0),
 #endif
-    };
-    vaapi_encode_add_global_param(avctx, &ctx->rc_params.misc,
-                                  sizeof(ctx->rc_params));
+#if VA_CHECK_VERSION(1, 3, 0)
+            .quality_factor     = rc_quality,
+#endif
+        };
+        vaapi_encode_add_global_param(avctx, &ctx->rc_params.misc,
+                                      sizeof(ctx->rc_params));
+    }
 
-    ctx->hrd_params.misc.type = VAEncMiscParameterTypeHRD;
-    ctx->hrd_params.hrd = (VAEncMiscParameterHRD) {
-        .initial_buffer_fullness = hrd_initial_buffer_fullness,
-        .buffer_size             = hrd_buffer_size,
-    };
-    vaapi_encode_add_global_param(avctx, &ctx->hrd_params.misc,
-                                  sizeof(ctx->hrd_params));
+    if (rc_mode->hrd) {
+        av_log(avctx, AV_LOG_VERBOSE, "RC buffer: %"PRId64" bits, "
+               "initial fullness %"PRId64" bits.\n",
+               hrd_buffer_size, hrd_initial_buffer_fullness);
+
+        ctx->hrd_params.misc.type = VAEncMiscParameterTypeHRD;
+        ctx->hrd_params.hrd = (VAEncMiscParameterHRD) {
+            .initial_buffer_fullness = hrd_initial_buffer_fullness,
+            .buffer_size             = hrd_buffer_size,
+        };
+        vaapi_encode_add_global_param(avctx, &ctx->hrd_params.misc,
+                                      sizeof(ctx->hrd_params));
+    }
 
     if (avctx->framerate.num > 0 && avctx->framerate.den > 0)
         av_reduce(&fr_num, &fr_den,
@@ -1385,6 +1616,9 @@ static av_cold int vaapi_encode_init_rate_control(AVCodecContext *avctx)
         av_reduce(&fr_num, &fr_den,
                   avctx->time_base.den, avctx->time_base.num, 65535);
 
+    av_log(avctx, AV_LOG_VERBOSE, "RC framerate: %d/%d (%.2f fps).\n",
+           fr_num, fr_den, (double)fr_num / fr_den);
+
     ctx->fr_params.misc.type = VAEncMiscParameterTypeFrameRate;
     ctx->fr_params.fr.framerate = (unsigned int)fr_den << 16 | fr_num;
 
@@ -1420,14 +1654,16 @@ static av_cold int vaapi_encode_init_gop_structure(AVCodecContext *avctx)
         ref_l1 = attr.value >> 16 & 0xffff;
     }
 
-    if (avctx->gop_size <= 1) {
+    if (ctx->codec->flags & FLAG_INTRA_ONLY ||
+        avctx->gop_size <= 1) {
         av_log(avctx, AV_LOG_VERBOSE, "Using intra frames only.\n");
         ctx->gop_size = 1;
     } else if (ref_l0 < 1) {
         av_log(avctx, AV_LOG_ERROR, "Driver does not support any "
                "reference frames.\n");
         return AVERROR(EINVAL);
-    } else if (ref_l1 < 1 || avctx->max_b_frames < 1) {
+    } else if (!(ctx->codec->flags & FLAG_B_PICTURES) ||
+               ref_l1 < 1 || avctx->max_b_frames < 1) {
         av_log(avctx, AV_LOG_VERBOSE, "Using intra and P-frames "
                "(supported references: %d / %d).\n", ref_l0, ref_l1);
         ctx->gop_size = avctx->gop_size;
@@ -1439,11 +1675,125 @@ static av_cold int vaapi_encode_init_gop_structure(AVCodecContext *avctx)
         ctx->gop_size = avctx->gop_size;
         ctx->p_per_i  = INT_MAX;
         ctx->b_per_p  = avctx->max_b_frames;
+        if (ctx->codec->flags & FLAG_B_PICTURE_REFERENCES) {
+            ctx->max_b_depth = FFMIN(ctx->desired_b_depth,
+                                     av_log2(ctx->b_per_p) + 1);
+        } else {
+            ctx->max_b_depth = 1;
+        }
+    }
+
+    if (ctx->codec->flags & FLAG_NON_IDR_KEY_PICTURES) {
+        ctx->closed_gop  = !!(avctx->flags & AV_CODEC_FLAG_CLOSED_GOP);
+        ctx->gop_per_idr = ctx->idr_interval + 1;
+    } else {
+        ctx->closed_gop  = 1;
+        ctx->gop_per_idr = 1;
     }
 
     return 0;
 }
 
+static av_cold int vaapi_encode_init_slice_structure(AVCodecContext *avctx)
+{
+    VAAPIEncodeContext *ctx = avctx->priv_data;
+    VAConfigAttrib attr[2] = { { VAConfigAttribEncMaxSlices },
+                               { VAConfigAttribEncSliceStructure } };
+    VAStatus vas;
+    uint32_t max_slices, slice_structure;
+    int req_slices;
+
+    if (!(ctx->codec->flags & FLAG_SLICE_CONTROL)) {
+        if (avctx->slices > 0) {
+            av_log(avctx, AV_LOG_WARNING, "Multiple slices were requested "
+                   "but this codec does not support controlling slices.\n");
+        }
+        return 0;
+    }
+
+    ctx->slice_block_rows = (avctx->height + ctx->slice_block_height - 1) /
+                             ctx->slice_block_height;
+    ctx->slice_block_cols = (avctx->width  + ctx->slice_block_width  - 1) /
+                             ctx->slice_block_width;
+
+    if (avctx->slices <= 1) {
+        ctx->nb_slices  = 1;
+        ctx->slice_size = ctx->slice_block_rows;
+        return 0;
+    }
+
+    vas = vaGetConfigAttributes(ctx->hwctx->display,
+                                ctx->va_profile,
+                                ctx->va_entrypoint,
+                                attr, FF_ARRAY_ELEMS(attr));
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to query slice "
+               "attributes: %d (%s).\n", vas, vaErrorStr(vas));
+        return AVERROR_EXTERNAL;
+    }
+    max_slices      = attr[0].value;
+    slice_structure = attr[1].value;
+    if (max_slices      == VA_ATTRIB_NOT_SUPPORTED ||
+        slice_structure == VA_ATTRIB_NOT_SUPPORTED) {
+        av_log(avctx, AV_LOG_ERROR, "Driver does not support encoding "
+               "pictures as multiple slices.\n.");
+        return AVERROR(EINVAL);
+    }
+
+    // For fixed-size slices currently we only support whole rows, making
+    // rectangular slices.  This could be extended to arbitrary runs of
+    // blocks, but since slices tend to be a conformance requirement and
+    // most cases (such as broadcast or bluray) want rectangular slices
+    // only it would need to be gated behind another option.
+    if (avctx->slices > ctx->slice_block_rows) {
+        av_log(avctx, AV_LOG_WARNING, "Not enough rows to use "
+               "configured number of slices (%d < %d); using "
+               "maximum.\n", ctx->slice_block_rows, avctx->slices);
+        req_slices = ctx->slice_block_rows;
+    } else {
+        req_slices = avctx->slices;
+    }
+    if (slice_structure & VA_ENC_SLICE_STRUCTURE_ARBITRARY_ROWS ||
+        slice_structure & VA_ENC_SLICE_STRUCTURE_ARBITRARY_MACROBLOCKS) {
+        ctx->nb_slices  = req_slices;
+        ctx->slice_size = ctx->slice_block_rows / ctx->nb_slices;
+    } else if (slice_structure & VA_ENC_SLICE_STRUCTURE_POWER_OF_TWO_ROWS) {
+        int k;
+        for (k = 1;; k *= 2) {
+            if (2 * k * (req_slices - 1) + 1 >= ctx->slice_block_rows)
+                break;
+        }
+        ctx->nb_slices  = (ctx->slice_block_rows + k - 1) / k;
+        ctx->slice_size = k;
+#if VA_CHECK_VERSION(1, 0, 0)
+    } else if (slice_structure & VA_ENC_SLICE_STRUCTURE_EQUAL_ROWS) {
+        ctx->nb_slices  = ctx->slice_block_rows;
+        ctx->slice_size = 1;
+#endif
+    } else {
+        av_log(avctx, AV_LOG_ERROR, "Driver does not support any usable "
+               "slice structure modes (%#x).\n", slice_structure);
+        return AVERROR(EINVAL);
+    }
+
+    if (ctx->nb_slices > avctx->slices) {
+        av_log(avctx, AV_LOG_WARNING, "Slice count rounded up to "
+               "%d (from %d) due to driver constraints on slice "
+               "structure.\n", ctx->nb_slices, avctx->slices);
+    }
+    if (ctx->nb_slices > max_slices) {
+        av_log(avctx, AV_LOG_ERROR, "Driver does not support "
+               "encoding with %d slices (max %"PRIu32").\n",
+               ctx->nb_slices, max_slices);
+        return AVERROR(EINVAL);
+    }
+
+    av_log(avctx, AV_LOG_VERBOSE, "Encoding pictures with %d slices "
+           "(default size %d block rows).\n",
+           ctx->nb_slices, ctx->slice_size);
+    return 0;
+}
+
 static av_cold int vaapi_encode_init_packed_headers(AVCodecContext *avctx)
 {
     VAAPIEncodeContext *ctx = avctx->priv_data;
@@ -1673,9 +2023,6 @@ static av_cold int vaapi_encode_create_recon_frames(AVCodecContext *avctx)
     ctx->recon_frames->sw_format = recon_format;
     ctx->recon_frames->width     = ctx->surface_width;
     ctx->recon_frames->height    = ctx->surface_height;
-    // At most three IDR/I/P frames and two runs of B frames can be in
-    // flight at any one time.
-    ctx->recon_frames->initial_pool_size = 3 + 2 * ctx->b_per_p;
 
     err = av_hwframe_ctx_init(ctx->recon_frames_ref);
     if (err < 0) {
@@ -1734,6 +2081,10 @@ av_cold int ff_vaapi_encode_init(AVCodecContext *avctx)
     if (err < 0)
         goto fail;
 
+    err = vaapi_encode_init_slice_structure(avctx);
+    if (err < 0)
+        goto fail;
+
     err = vaapi_encode_init_packed_headers(avctx);
     if (err < 0)
         goto fail;
@@ -1787,10 +2138,8 @@ av_cold int ff_vaapi_encode_init(AVCodecContext *avctx)
             goto fail;
     }
 
-    ctx->input_order  = 0;
     ctx->output_delay = ctx->b_per_p;
-    ctx->decode_delay = 1;
-    ctx->output_order = - ctx->output_delay - 1;
+    ctx->decode_delay = ctx->max_b_depth;
 
     if (ctx->codec->sequence_params_size > 0) {
         ctx->codec_sequence_params =
@@ -1818,10 +2167,6 @@ av_cold int ff_vaapi_encode_init(AVCodecContext *avctx)
         }
     }
 
-    // This should be configurable somehow.  (Needs testing on a machine
-    // where it actually overlaps properly, though.)
-    ctx->issue_mode = ISSUE_MODE_MAXIMISE_THROUGHPUT;
-
     if (ctx->va_packed_headers & VA_ENC_PACKED_HEADER_SEQUENCE &&
         ctx->codec->write_sequence_header &&
         avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
index 091889f9ae83c..44a8db566e051 100644
--- a/libavcodec/vaapi_encode.h
+++ b/libavcodec/vaapi_encode.h
@@ -38,6 +38,7 @@ struct VAAPIEncodePicture;
 enum {
     MAX_CONFIG_ATTRIBUTES  = 4,
     MAX_GLOBAL_PARAMS      = 4,
+    MAX_DPB_SIZE           = 16,
     MAX_PICTURE_REFERENCES = 2,
     MAX_REORDER_DELAY      = 16,
     MAX_PARAM_BUFFER_SIZE  = 1024,
@@ -52,6 +53,10 @@ enum {
 
 typedef struct VAAPIEncodeSlice {
     int             index;
+    int             row_start;
+    int             row_size;
+    int             block_start;
+    int             block_size;
     void           *priv_data;
     void           *codec_slice_params;
 } VAAPIEncodeSlice;
@@ -62,9 +67,10 @@ typedef struct VAAPIEncodePicture {
     int64_t         display_order;
     int64_t         encode_order;
     int64_t         pts;
+    int             force_idr;
 
     int             type;
-    int             input_available;
+    int             b_depth;
     int             encode_issued;
     int             encode_complete;
 
@@ -83,8 +89,26 @@ typedef struct VAAPIEncodePicture {
     void           *priv_data;
     void           *codec_picture_params;
 
-    int          nb_refs;
+    // Whether this picture is a reference picture.
+    int             is_reference;
+
+    // The contents of the DPB after this picture has been decoded.
+    // This will contain the picture itself if it is a reference picture,
+    // but not if it isn't.
+    int                     nb_dpb_pics;
+    struct VAAPIEncodePicture *dpb[MAX_DPB_SIZE];
+    // The reference pictures used in decoding this picture.  If they are
+    // used by later pictures they will also appear in the DPB.
+    int                     nb_refs;
     struct VAAPIEncodePicture *refs[MAX_PICTURE_REFERENCES];
+    // The previous reference picture in encode order.  Must be in at least
+    // one of the reference list and DPB list.
+    struct VAAPIEncodePicture *prev;
+    // Reference count for other pictures referring to this one through
+    // the above pointers, directly from incomplete pictures and indirectly
+    // through completed pictures.
+    int             ref_count[2];
+    int             ref_removed[2];
 
     int          nb_slices;
     VAAPIEncodeSlice *slices;
@@ -105,6 +129,36 @@ typedef struct VAAPIEncodeProfile {
     VAProfile va_profile;
 } VAAPIEncodeProfile;
 
+enum {
+    RC_MODE_AUTO,
+    RC_MODE_CQP,
+    RC_MODE_CBR,
+    RC_MODE_VBR,
+    RC_MODE_ICQ,
+    RC_MODE_QVBR,
+    RC_MODE_AVBR,
+    RC_MODE_MAX = RC_MODE_AVBR,
+};
+
+typedef struct VAAPIEncodeRCMode {
+    // Mode from above enum (RC_MODE_*).
+    int mode;
+    // Name.
+    const char *name;
+    // Supported in the compile-time VAAPI version.
+    int supported;
+    // VA mode value (VA_RC_*).
+    uint32_t va_mode;
+    // Uses bitrate parameters.
+    int bitrate;
+    // Supports maxrate distinct from bitrate.
+    int maxrate;
+    // Uses quality value.
+    int quality;
+    // Supports HRD/VBV parameters.
+    int hrd;
+} VAAPIEncodeRCMode;
+
 typedef struct VAAPIEncodeContext {
     const AVClass *class;
 
@@ -116,6 +170,20 @@ typedef struct VAAPIEncodeContext {
     // Use low power encoding mode.
     int             low_power;
 
+    // Number of I frames between IDR frames.
+    int             idr_interval;
+
+    // Desired B frame reference depth.
+    int             desired_b_depth;
+
+    // Explicitly set RC mode (otherwise attempt to pick from
+    // available modes).
+    int             explicit_rc_mode;
+
+    // Explicitly-set QP, for use with the "qp" options.
+    // (Forces CQP mode when set, overriding everything else.)
+    int             explicit_qp;
+
     // Desired packed headers.
     unsigned int    desired_packed_headers;
 
@@ -125,12 +193,22 @@ typedef struct VAAPIEncodeContext {
     int             surface_width;
     int             surface_height;
 
+    // The block size for slice calculations.
+    int             slice_block_width;
+    int             slice_block_height;
+
     // Everything above this point must be set before calling
     // ff_vaapi_encode_init().
 
     // Chosen encoding profile details.
     const VAAPIEncodeProfile *profile;
 
+    // Chosen rate control mode details.
+    const VAAPIEncodeRCMode *rc_mode;
+    // RC quality level - meaning depends on codec and RC mode.
+    // In CQP mode this sets the fixed quantiser value.
+    int             rc_quality;
+
     // Encoding profile (VAProfile*).
     VAProfile       va_profile;
     // Encoding entrypoint (VAEntryoint*).
@@ -199,51 +277,82 @@ typedef struct VAAPIEncodeContext {
 
     // Current encoding window, in display (input) order.
     VAAPIEncodePicture *pic_start, *pic_end;
+    // The next picture to use as the previous reference picture in
+    // encoding order.
+    VAAPIEncodePicture *next_prev;
 
     // Next input order index (display order).
     int64_t         input_order;
     // Number of frames that output is behind input.
     int64_t         output_delay;
+    // Next encode order index.
+    int64_t         encode_order;
     // Number of frames decode output will need to be delayed.
     int64_t         decode_delay;
-    // Next output order index (encode order).
+    // Next output order index (in encode order).
     int64_t         output_order;
 
-    enum {
-        // All encode operations are done independently (synchronise
-        // immediately after every operation).
-        ISSUE_MODE_SERIALISE_EVERYTHING = 0,
-        // Overlap as many operations as possible.
-        ISSUE_MODE_MAXIMISE_THROUGHPUT,
-        // Overlap operations only when satisfying parallel dependencies.
-        ISSUE_MODE_MINIMISE_LATENCY,
-    } issue_mode;
-
     // Timestamp handling.
     int64_t         first_pts;
     int64_t         dts_pts_diff;
     int64_t         ts_ring[MAX_REORDER_DELAY * 3];
 
+    // Slice structure.
+    int slice_block_rows;
+    int slice_block_cols;
+    int nb_slices;
+    int slice_size;
+
     // Frame type decision.
     int gop_size;
+    int closed_gop;
+    int gop_per_idr;
     int p_per_i;
+    int max_b_depth;
     int b_per_p;
     int force_idr;
+    int idr_counter;
     int gop_counter;
-    int p_counter;
     int end_of_stream;
 } VAAPIEncodeContext;
 
+enum {
+    // Codec supports controlling the subdivision of pictures into slices.
+    FLAG_SLICE_CONTROL         = 1 << 0,
+    // Codec only supports constant quality (no rate control).
+    FLAG_CONSTANT_QUALITY_ONLY = 1 << 1,
+    // Codec is intra-only.
+    FLAG_INTRA_ONLY            = 1 << 2,
+    // Codec supports B-pictures.
+    FLAG_B_PICTURES            = 1 << 3,
+    // Codec supports referencing B-pictures.
+    FLAG_B_PICTURE_REFERENCES  = 1 << 4,
+    // Codec supports non-IDR key pictures (that is, key pictures do
+    // not necessarily empty the DPB).
+    FLAG_NON_IDR_KEY_PICTURES  = 1 << 5,
+};
+
 typedef struct VAAPIEncodeType {
     // List of supported profiles and corresponding VAAPI profiles.
     // (Must end with FF_PROFILE_UNKNOWN.)
     const VAAPIEncodeProfile *profiles;
 
+    // Codec feature flags.
+    int flags;
+
+    // Default quality for this codec - used as quantiser or RC quality
+    // factor depending on RC mode.
+    int default_quality;
+
     // Perform any extra codec-specific configuration after the
     // codec context is initialised (set up the private data and
     // add any necessary global parameters).
     int (*configure)(AVCodecContext *avctx);
 
+    // The size of any private data structure associated with each
+    // picture (can be zero if not required).
+    size_t picture_priv_data_size;
+
     // The size of the parameter structures:
     // sizeof(VAEnc{type}ParameterBuffer{codec}).
     size_t sequence_params_size;
@@ -299,6 +408,9 @@ typedef struct VAAPIEncodeType {
 int ff_vaapi_encode2(AVCodecContext *avctx, AVPacket *pkt,
                      const AVFrame *input_image, int *got_packet);
 
+int ff_vaapi_encode_send_frame(AVCodecContext *avctx, const AVFrame *frame);
+int ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt);
+
 int ff_vaapi_encode_init(AVCodecContext *avctx);
 int ff_vaapi_encode_close(AVCodecContext *avctx);
 
@@ -308,7 +420,32 @@ int ff_vaapi_encode_close(AVCodecContext *avctx);
       "Use low-power encoding mode (only available on some platforms; " \
       "may not support all encoding features)", \
       OFFSET(common.low_power), AV_OPT_TYPE_BOOL, \
-      { .i64 = 0 }, 0, 1, FLAGS }
+      { .i64 = 0 }, 0, 1, FLAGS }, \
+    { "idr_interval", \
+      "Distance (in I-frames) between IDR frames", \
+      OFFSET(common.idr_interval), AV_OPT_TYPE_INT, \
+      { .i64 = 0 }, 0, INT_MAX, FLAGS }, \
+    { "b_depth", \
+      "Maximum B-frame reference depth", \
+      OFFSET(common.desired_b_depth), AV_OPT_TYPE_INT, \
+      { .i64 = 1 }, 1, INT_MAX, FLAGS }
+
+#define VAAPI_ENCODE_RC_MODE(name, desc) \
+    { #name, desc, 0, AV_OPT_TYPE_CONST, { .i64 = RC_MODE_ ## name }, \
+      0, 0, FLAGS, "rc_mode" }
+#define VAAPI_ENCODE_RC_OPTIONS \
+    { "rc_mode",\
+      "Set rate control mode", \
+      OFFSET(common.explicit_rc_mode), AV_OPT_TYPE_INT, \
+      { .i64 = RC_MODE_AUTO }, RC_MODE_AUTO, RC_MODE_MAX, FLAGS, "rc_mode" }, \
+    { "auto", "Choose mode automatically based on other parameters", \
+      0, AV_OPT_TYPE_CONST, { .i64 = RC_MODE_AUTO }, 0, 0, FLAGS, "rc_mode" }, \
+    VAAPI_ENCODE_RC_MODE(CQP,  "Constant-quality"), \
+    VAAPI_ENCODE_RC_MODE(CBR,  "Constant-bitrate"), \
+    VAAPI_ENCODE_RC_MODE(VBR,  "Variable-bitrate"), \
+    VAAPI_ENCODE_RC_MODE(ICQ,  "Intelligent constant-quality"), \
+    VAAPI_ENCODE_RC_MODE(QVBR, "Quality-defined variable-bitrate"), \
+    VAAPI_ENCODE_RC_MODE(AVBR, "Average variable-bitrate")
 
 
 #endif /* AVCODEC_VAAPI_ENCODE_H */
diff --git a/libavcodec/vaapi_encode_h264.c b/libavcodec/vaapi_encode_h264.c
index 8feae0d42fb10..91be33f99fce7 100644
--- a/libavcodec/vaapi_encode_h264.c
+++ b/libavcodec/vaapi_encode_h264.c
@@ -47,6 +47,20 @@ static const uint8_t vaapi_encode_h264_sei_identifier_uuid[16] = {
     0x96, 0x75, 0x19, 0xd4, 0x1f, 0xea, 0xa9, 0x4d,
 };
 
+typedef struct VAAPIEncodeH264Picture {
+    int frame_num;
+    int pic_order_cnt;
+
+    int64_t last_idr_frame;
+    uint16_t idr_pic_id;
+
+    int primary_pic_type;
+    int slice_type;
+
+    int cpb_delay;
+    int dpb_delay;
+} VAAPIEncodeH264Picture;
+
 typedef struct VAAPIEncodeH264Context {
     VAAPIEncodeContext common;
 
@@ -67,18 +81,7 @@ typedef struct VAAPIEncodeH264Context {
     int fixed_qp_p;
     int fixed_qp_b;
 
-    // Stream state.
-    int frame_num;
-    int pic_order_cnt;
-    int next_frame_num;
-    int64_t last_idr_frame;
-    int64_t idr_pic_count;
-
-    int primary_pic_type;
-    int slice_type;
-
-    int cpb_delay;
-    int dpb_delay;
+    int dpb_frames;
 
     // Writer structures.
     CodedBitstreamContext *cbc;
@@ -171,7 +174,7 @@ static int vaapi_encode_h264_write_sequence_header(AVCodecContext *avctx,
 
     err = vaapi_encode_h264_write_access_unit(avctx, data, data_len, au);
 fail:
-    ff_cbs_fragment_uninit(priv->cbc, au);
+    ff_cbs_fragment_reset(priv->cbc, au);
     return err;
 }
 
@@ -197,7 +200,7 @@ static int vaapi_encode_h264_write_slice_header(AVCodecContext *avctx,
 
     err = vaapi_encode_h264_write_access_unit(avctx, data, data_len, au);
 fail:
-    ff_cbs_fragment_uninit(priv->cbc, au);
+    ff_cbs_fragment_reset(priv->cbc, au);
     return err;
 }
 
@@ -261,7 +264,7 @@ static int vaapi_encode_h264_write_extra_header(AVCodecContext *avctx,
         if (err < 0)
             goto fail;
 
-        ff_cbs_fragment_uninit(priv->cbc, au);
+        ff_cbs_fragment_reset(priv->cbc, au);
 
         *type = VAEncPackedHeaderRawData;
         return 0;
@@ -283,7 +286,7 @@ static int vaapi_encode_h264_write_extra_header(AVCodecContext *avctx,
     }
 
 fail:
-    ff_cbs_fragment_uninit(priv->cbc, au);
+    ff_cbs_fragment_reset(priv->cbc, au);
     return err;
 }
 
@@ -295,10 +298,6 @@ static int vaapi_encode_h264_init_sequence_params(AVCodecContext *avctx)
     H264RawPPS                        *pps = &priv->raw_pps;
     VAEncSequenceParameterBufferH264 *vseq = ctx->codec_sequence_params;
     VAEncPictureParameterBufferH264  *vpic = ctx->codec_picture_params;
-    int dpb_frames;
-
-    memset(&priv->current_access_unit, 0,
-           sizeof(priv->current_access_unit));
 
     memset(sps, 0, sizeof(*sps));
     memset(pps, 0, sizeof(*pps));
@@ -322,9 +321,9 @@ static int vaapi_encode_h264_init_sequence_params(AVCodecContext *avctx)
     }
 
     if (ctx->gop_size == 1)
-        dpb_frames = 0;
+        priv->dpb_frames = 0;
     else
-        dpb_frames = 1 + (ctx->b_per_p > 0);
+        priv->dpb_frames = 1 + ctx->max_b_depth;
 
     if (avctx->level != FF_LEVEL_UNKNOWN) {
         sps->level_idc = avctx->level;
@@ -335,7 +334,7 @@ static int vaapi_encode_h264_init_sequence_params(AVCodecContext *avctx)
                                     avctx->bit_rate,
                                     priv->mb_width  * 16,
                                     priv->mb_height * 16,
-                                    dpb_frames);
+                                    priv->dpb_frames);
         if (level) {
             av_log(avctx, AV_LOG_VERBOSE, "Using level %s.\n", level->name);
             if (level->constraint_set3_flag)
@@ -353,10 +352,9 @@ static int vaapi_encode_h264_init_sequence_params(AVCodecContext *avctx)
 
     sps->log2_max_frame_num_minus4 = 4;
     sps->pic_order_cnt_type        = 0;
-    sps->log2_max_pic_order_cnt_lsb_minus4 =
-        av_clip(av_log2(ctx->b_per_p + 1) - 2, 0, 12);
+    sps->log2_max_pic_order_cnt_lsb_minus4 = 4;
 
-    sps->max_num_ref_frames = dpb_frames;
+    sps->max_num_ref_frames = priv->dpb_frames;
 
     sps->pic_width_in_mbs_minus1        = priv->mb_width  - 1;
     sps->pic_height_in_map_units_minus1 = priv->mb_height - 1;
@@ -389,18 +387,20 @@ static int vaapi_encode_h264_init_sequence_params(AVCodecContext *avctx)
             {  80, 33 }, {  18, 11 }, {  15, 11 }, {  64, 33 },
             { 160, 99 }, {   4,  3 }, {   3,  2 }, {   2,  1 },
         };
-        int i;
+        int num, den, i;
+        av_reduce(&num, &den, avctx->sample_aspect_ratio.num,
+                  avctx->sample_aspect_ratio.den, 65535);
         for (i = 0; i < FF_ARRAY_ELEMS(sar_idc); i++) {
-            if (avctx->sample_aspect_ratio.num == sar_idc[i].num &&
-                avctx->sample_aspect_ratio.den == sar_idc[i].den) {
+            if (num == sar_idc[i].num &&
+                den == sar_idc[i].den) {
                 sps->vui.aspect_ratio_idc = i;
                 break;
             }
         }
         if (i >= FF_ARRAY_ELEMS(sar_idc)) {
             sps->vui.aspect_ratio_idc = 255;
-            sps->vui.sar_width  = avctx->sample_aspect_ratio.num;
-            sps->vui.sar_height = avctx->sample_aspect_ratio.den;
+            sps->vui.sar_width  = num;
+            sps->vui.sar_height = den;
         }
         sps->vui.aspect_ratio_info_present_flag = 1;
     }
@@ -493,8 +493,8 @@ static int vaapi_encode_h264_init_sequence_params(AVCodecContext *avctx)
     sps->vui.motion_vectors_over_pic_boundaries_flag = 1;
     sps->vui.log2_max_mv_length_horizontal = 15;
     sps->vui.log2_max_mv_length_vertical   = 15;
-    sps->vui.max_num_reorder_frames        = (ctx->b_per_p > 0);
-    sps->vui.max_dec_frame_buffering       = sps->max_num_ref_frames;
+    sps->vui.max_num_reorder_frames        = ctx->max_b_depth;
+    sps->vui.max_dec_frame_buffering       = ctx->max_b_depth + 1;
 
     pps->nal_unit_header.nal_ref_idc = 3;
     pps->nal_unit_header.nal_unit_type = H264_NAL_PPS;
@@ -615,46 +615,43 @@ static int vaapi_encode_h264_init_picture_params(AVCodecContext *avctx,
 {
     VAAPIEncodeContext               *ctx = avctx->priv_data;
     VAAPIEncodeH264Context          *priv = avctx->priv_data;
-    H264RawSPS                       *sps = &priv->raw_sps;
+    VAAPIEncodeH264Picture          *hpic = pic->priv_data;
+    VAAPIEncodePicture              *prev = pic->prev;
+    VAAPIEncodeH264Picture         *hprev = prev ? prev->priv_data : NULL;
     VAEncPictureParameterBufferH264 *vpic = pic->codec_picture_params;
     int i;
 
-    memset(&priv->current_access_unit, 0,
-           sizeof(priv->current_access_unit));
-
     if (pic->type == PICTURE_TYPE_IDR) {
         av_assert0(pic->display_order == pic->encode_order);
-        priv->frame_num      = 0;
-        priv->next_frame_num = 1;
-        priv->cpb_delay      = 0;
-        priv->last_idr_frame = pic->display_order;
-        ++priv->idr_pic_count;
-
-        priv->slice_type       = 7;
-        priv->primary_pic_type = 0;
+
+        hpic->frame_num      = 0;
+        hpic->last_idr_frame = pic->display_order;
+        hpic->idr_pic_id     = hprev ? hprev->idr_pic_id + 1 : 0;
+
+        hpic->primary_pic_type = 0;
+        hpic->slice_type       = 7;
     } else {
-        priv->frame_num      = priv->next_frame_num;
+        av_assert0(prev);
 
-        if (pic->type != PICTURE_TYPE_B) {
-            // Reference picture, so frame_num advances.
-            priv->next_frame_num = (priv->frame_num + 1) &
-                ((1 << (4 + sps->log2_max_frame_num_minus4)) - 1);
-        }
-        ++priv->cpb_delay;
+        hpic->frame_num = hprev->frame_num + prev->is_reference;
+
+        hpic->last_idr_frame = hprev->last_idr_frame;
+        hpic->idr_pic_id     = hprev->idr_pic_id;
 
         if (pic->type == PICTURE_TYPE_I) {
-            priv->slice_type       = 7;
-            priv->primary_pic_type = 0;
+            hpic->slice_type       = 7;
+            hpic->primary_pic_type = 0;
         } else if (pic->type == PICTURE_TYPE_P) {
-            priv->slice_type       = 5;
-            priv->primary_pic_type = 1;
+            hpic->slice_type       = 5;
+            hpic->primary_pic_type = 1;
         } else {
-            priv->slice_type       = 6;
-            priv->primary_pic_type = 2;
+            hpic->slice_type       = 6;
+            hpic->primary_pic_type = 2;
         }
     }
-    priv->pic_order_cnt = pic->display_order - priv->last_idr_frame;
-    priv->dpb_delay     = pic->display_order - pic->encode_order + 1;
+    hpic->pic_order_cnt = pic->display_order - hpic->last_idr_frame;
+    hpic->dpb_delay     = pic->display_order - pic->encode_order + ctx->max_b_depth;
+    hpic->cpb_delay     = pic->encode_order - hpic->last_idr_frame;
 
     if (priv->aud) {
         priv->aud_needed = 1;
@@ -662,7 +659,7 @@ static int vaapi_encode_h264_init_picture_params(AVCodecContext *avctx,
             .nal_unit_header = {
                 .nal_unit_type = H264_NAL_AUD,
             },
-            .primary_pic_type  = priv->primary_pic_type,
+            .primary_pic_type  = hpic->primary_pic_type,
         };
     } else {
         priv->aud_needed = 0;
@@ -679,8 +676,8 @@ static int vaapi_encode_h264_init_picture_params(AVCodecContext *avctx,
 
     if (priv->sei & SEI_TIMING) {
         priv->sei_pic_timing = (H264RawSEIPicTiming) {
-            .cpb_removal_delay = 2 * priv->cpb_delay,
-            .dpb_output_delay  = 2 * priv->dpb_delay,
+            .cpb_removal_delay = 2 * hpic->cpb_delay,
+            .dpb_output_delay  = 2 * hpic->dpb_delay,
         };
 
         priv->sei_needed |= SEI_TIMING;
@@ -698,25 +695,25 @@ static int vaapi_encode_h264_init_picture_params(AVCodecContext *avctx,
 
     vpic->CurrPic = (VAPictureH264) {
         .picture_id          = pic->recon_surface,
-        .frame_idx           = priv->frame_num,
+        .frame_idx           = hpic->frame_num,
         .flags               = 0,
-        .TopFieldOrderCnt    = priv->pic_order_cnt,
-        .BottomFieldOrderCnt = priv->pic_order_cnt,
+        .TopFieldOrderCnt    = hpic->pic_order_cnt,
+        .BottomFieldOrderCnt = hpic->pic_order_cnt,
     };
 
     for (i = 0; i < pic->nb_refs; i++) {
-        VAAPIEncodePicture *ref = pic->refs[i];
-        unsigned int frame_num = (ref->encode_order - priv->last_idr_frame) &
-            ((1 << (4 + sps->log2_max_frame_num_minus4)) - 1);
-        unsigned int pic_order_cnt = ref->display_order - priv->last_idr_frame;
+        VAAPIEncodePicture      *ref = pic->refs[i];
+        VAAPIEncodeH264Picture *href;
 
         av_assert0(ref && ref->encode_order < pic->encode_order);
+        href = ref->priv_data;
+
         vpic->ReferenceFrames[i] = (VAPictureH264) {
             .picture_id          = ref->recon_surface,
-            .frame_idx           = frame_num,
+            .frame_idx           = href->frame_num,
             .flags               = VA_PICTURE_H264_SHORT_TERM_REFERENCE,
-            .TopFieldOrderCnt    = pic_order_cnt,
-            .BottomFieldOrderCnt = pic_order_cnt,
+            .TopFieldOrderCnt    = href->pic_order_cnt,
+            .BottomFieldOrderCnt = href->pic_order_cnt,
         };
     }
     for (; i < FF_ARRAY_ELEMS(vpic->ReferenceFrames); i++) {
@@ -728,46 +725,142 @@ static int vaapi_encode_h264_init_picture_params(AVCodecContext *avctx,
 
     vpic->coded_buf = pic->output_buffer;
 
-    vpic->frame_num = priv->frame_num;
+    vpic->frame_num = hpic->frame_num;
 
     vpic->pic_fields.bits.idr_pic_flag       = (pic->type == PICTURE_TYPE_IDR);
     vpic->pic_fields.bits.reference_pic_flag = (pic->type != PICTURE_TYPE_B);
 
-    pic->nb_slices = 1;
-
     return 0;
 }
 
+static void vaapi_encode_h264_default_ref_pic_list(AVCodecContext *avctx,
+                                                   VAAPIEncodePicture *pic,
+                                                   VAAPIEncodePicture **rpl0,
+                                                   VAAPIEncodePicture **rpl1,
+                                                   int *rpl_size)
+{
+    VAAPIEncodePicture *prev;
+    VAAPIEncodeH264Picture *hp, *hn, *hc;
+    int i, j, n = 0;
+
+    prev = pic->prev;
+    av_assert0(prev);
+    hp = pic->priv_data;
+
+    for (i = 0; i < pic->prev->nb_dpb_pics; i++) {
+        hn = prev->dpb[i]->priv_data;
+        av_assert0(hn->frame_num < hp->frame_num);
+
+        if (pic->type == PICTURE_TYPE_P) {
+            for (j = n; j > 0; j--) {
+                hc = rpl0[j - 1]->priv_data;
+                av_assert0(hc->frame_num != hn->frame_num);
+                if (hc->frame_num > hn->frame_num)
+                    break;
+                rpl0[j] = rpl0[j - 1];
+            }
+            rpl0[j] = prev->dpb[i];
+
+        } else if (pic->type == PICTURE_TYPE_B) {
+            for (j = n; j > 0; j--) {
+                hc = rpl0[j - 1]->priv_data;
+                av_assert0(hc->pic_order_cnt != hp->pic_order_cnt);
+                if (hc->pic_order_cnt < hp->pic_order_cnt) {
+                    if (hn->pic_order_cnt > hp->pic_order_cnt ||
+                        hn->pic_order_cnt < hc->pic_order_cnt)
+                        break;
+                } else {
+                    if (hn->pic_order_cnt > hc->pic_order_cnt)
+                        break;
+                }
+                rpl0[j] = rpl0[j - 1];
+            }
+            rpl0[j] = prev->dpb[i];
+
+            for (j = n; j > 0; j--) {
+                hc = rpl1[j - 1]->priv_data;
+                av_assert0(hc->pic_order_cnt != hp->pic_order_cnt);
+                if (hc->pic_order_cnt > hp->pic_order_cnt) {
+                    if (hn->pic_order_cnt < hp->pic_order_cnt ||
+                        hn->pic_order_cnt > hc->pic_order_cnt)
+                        break;
+                } else {
+                    if (hn->pic_order_cnt < hc->pic_order_cnt)
+                        break;
+                }
+                rpl1[j] = rpl1[j - 1];
+            }
+            rpl1[j] = prev->dpb[i];
+        }
+
+        ++n;
+    }
+
+    if (pic->type == PICTURE_TYPE_B) {
+        for (i = 0; i < n; i++) {
+            if (rpl0[i] != rpl1[i])
+                break;
+        }
+        if (i == n)
+            FFSWAP(VAAPIEncodePicture*, rpl1[0], rpl1[1]);
+    }
+
+    if (pic->type == PICTURE_TYPE_P ||
+        pic->type == PICTURE_TYPE_B) {
+        av_log(avctx, AV_LOG_DEBUG, "Default RefPicList0 for fn=%d/poc=%d:",
+               hp->frame_num, hp->pic_order_cnt);
+        for (i = 0; i < n; i++) {
+            hn = rpl0[i]->priv_data;
+            av_log(avctx, AV_LOG_DEBUG, "  fn=%d/poc=%d",
+                   hn->frame_num, hn->pic_order_cnt);
+        }
+        av_log(avctx, AV_LOG_DEBUG, "\n");
+    }
+    if (pic->type == PICTURE_TYPE_B) {
+        av_log(avctx, AV_LOG_DEBUG, "Default RefPicList1 for fn=%d/poc=%d:",
+               hp->frame_num, hp->pic_order_cnt);
+        for (i = 0; i < n; i++) {
+            hn = rpl1[i]->priv_data;
+            av_log(avctx, AV_LOG_DEBUG, "  fn=%d/poc=%d",
+                   hn->frame_num, hn->pic_order_cnt);
+        }
+        av_log(avctx, AV_LOG_DEBUG, "\n");
+    }
+
+    *rpl_size = n;
+}
+
 static int vaapi_encode_h264_init_slice_params(AVCodecContext *avctx,
                                                VAAPIEncodePicture *pic,
                                                VAAPIEncodeSlice *slice)
 {
     VAAPIEncodeH264Context          *priv = avctx->priv_data;
+    VAAPIEncodeH264Picture          *hpic = pic->priv_data;
+    VAAPIEncodePicture              *prev = pic->prev;
     H264RawSPS                       *sps = &priv->raw_sps;
     H264RawPPS                       *pps = &priv->raw_pps;
     H264RawSliceHeader                *sh = &priv->raw_slice.header;
     VAEncPictureParameterBufferH264 *vpic = pic->codec_picture_params;
     VAEncSliceParameterBufferH264 *vslice = slice->codec_slice_params;
-    int i;
+    int i, j;
 
     if (pic->type == PICTURE_TYPE_IDR) {
         sh->nal_unit_header.nal_unit_type = H264_NAL_IDR_SLICE;
         sh->nal_unit_header.nal_ref_idc   = 3;
     } else {
         sh->nal_unit_header.nal_unit_type = H264_NAL_SLICE;
-        sh->nal_unit_header.nal_ref_idc   = pic->type != PICTURE_TYPE_B;
+        sh->nal_unit_header.nal_ref_idc   = pic->is_reference;
     }
 
-    // Only one slice per frame.
-    sh->first_mb_in_slice = 0;
-    sh->slice_type        = priv->slice_type;
+    sh->first_mb_in_slice = slice->block_start;
+    sh->slice_type        = hpic->slice_type;
 
     sh->pic_parameter_set_id = pps->pic_parameter_set_id;
 
-    sh->frame_num  = priv->frame_num;
-    sh->idr_pic_id = priv->idr_pic_count;
-
-    sh->pic_order_cnt_lsb = priv->pic_order_cnt &
+    sh->frame_num = hpic->frame_num &
+        ((1 << (4 + sps->log2_max_frame_num_minus4)) - 1);
+    sh->idr_pic_id = hpic->idr_pic_id;
+    sh->pic_order_cnt_lsb = hpic->pic_order_cnt &
         ((1 << (4 + sps->log2_max_pic_order_cnt_lsb_minus4)) - 1);
 
     sh->direct_spatial_mv_pred_flag = 1;
@@ -779,9 +872,149 @@ static int vaapi_encode_h264_init_slice_params(AVCodecContext *avctx,
     else
         sh->slice_qp_delta = priv->fixed_qp_idr - (pps->pic_init_qp_minus26 + 26);
 
+    if (pic->is_reference && pic->type != PICTURE_TYPE_IDR) {
+        VAAPIEncodePicture *discard_list[MAX_DPB_SIZE];
+        int discard = 0, keep = 0;
+
+        // Discard everything which is in the DPB of the previous frame but
+        // not in the DPB of this one.
+        for (i = 0; i < prev->nb_dpb_pics; i++) {
+            for (j = 0; j < pic->nb_dpb_pics; j++) {
+                if (prev->dpb[i] == pic->dpb[j])
+                    break;
+            }
+            if (j == pic->nb_dpb_pics) {
+                discard_list[discard] = prev->dpb[i];
+                ++discard;
+            } else {
+                ++keep;
+            }
+        }
+        av_assert0(keep <= priv->dpb_frames);
+
+        if (discard == 0) {
+            sh->adaptive_ref_pic_marking_mode_flag = 0;
+        } else {
+            sh->adaptive_ref_pic_marking_mode_flag = 1;
+            for (i = 0; i < discard; i++) {
+                VAAPIEncodeH264Picture *old = discard_list[i]->priv_data;
+                av_assert0(old->frame_num < hpic->frame_num);
+                sh->mmco[i].memory_management_control_operation = 1;
+                sh->mmco[i].difference_of_pic_nums_minus1 =
+                    hpic->frame_num - old->frame_num - 1;
+            }
+            sh->mmco[i].memory_management_control_operation = 0;
+        }
+    }
+
+    // If the intended references are not the first entries of RefPicListN
+    // by default, use ref-pic-list-modification to move them there.
+    if (pic->type == PICTURE_TYPE_P || pic->type == PICTURE_TYPE_B) {
+        VAAPIEncodePicture *def_l0[MAX_DPB_SIZE], *def_l1[MAX_DPB_SIZE];
+        VAAPIEncodeH264Picture *href;
+        int n;
+
+        vaapi_encode_h264_default_ref_pic_list(avctx, pic,
+                                               def_l0, def_l1, &n);
+
+        if (pic->type == PICTURE_TYPE_P) {
+            int need_rplm = 0;
+            for (i = 0; i < pic->nb_refs; i++) {
+                av_assert0(pic->refs[i]);
+                if (pic->refs[i] != def_l0[i])
+                    need_rplm = 1;
+            }
+
+            sh->ref_pic_list_modification_flag_l0 = need_rplm;
+            if (need_rplm) {
+                int pic_num = hpic->frame_num;
+                for (i = 0; i < pic->nb_refs; i++) {
+                    href = pic->refs[i]->priv_data;
+                    av_assert0(href->frame_num != pic_num);
+                    if (href->frame_num < pic_num) {
+                        sh->rplm_l0[i].modification_of_pic_nums_idc = 0;
+                        sh->rplm_l0[i].abs_diff_pic_num_minus1 =
+                            pic_num - href->frame_num - 1;
+                    } else {
+                        sh->rplm_l0[i].modification_of_pic_nums_idc = 1;
+                        sh->rplm_l0[i].abs_diff_pic_num_minus1 =
+                            href->frame_num - pic_num - 1;
+                    }
+                    pic_num = href->frame_num;
+                }
+                sh->rplm_l0[i].modification_of_pic_nums_idc = 3;
+            }
+
+        } else {
+            int need_rplm_l0 = 0, need_rplm_l1 = 0;
+            int n0 = 0, n1 = 0;
+            for (i = 0; i < pic->nb_refs; i++) {
+                av_assert0(pic->refs[i]);
+                href = pic->refs[i]->priv_data;
+                av_assert0(href->pic_order_cnt != hpic->pic_order_cnt);
+                if (href->pic_order_cnt < hpic->pic_order_cnt) {
+                    if (pic->refs[i] != def_l0[n0])
+                        need_rplm_l0 = 1;
+                    ++n0;
+                } else {
+                    if (pic->refs[i] != def_l1[n1])
+                        need_rplm_l1 = 1;
+                    ++n1;
+                }
+            }
 
-    vslice->macroblock_address = sh->first_mb_in_slice;
-    vslice->num_macroblocks    = priv->mb_width * priv->mb_height;
+            sh->ref_pic_list_modification_flag_l0 = need_rplm_l0;
+            if (need_rplm_l0) {
+                int pic_num = hpic->frame_num;
+                for (i = j = 0; i < pic->nb_refs; i++) {
+                    href = pic->refs[i]->priv_data;
+                    if (href->pic_order_cnt > hpic->pic_order_cnt)
+                        continue;
+                    av_assert0(href->frame_num != pic_num);
+                    if (href->frame_num < pic_num) {
+                        sh->rplm_l0[j].modification_of_pic_nums_idc = 0;
+                        sh->rplm_l0[j].abs_diff_pic_num_minus1 =
+                            pic_num - href->frame_num - 1;
+                    } else {
+                        sh->rplm_l0[j].modification_of_pic_nums_idc = 1;
+                        sh->rplm_l0[j].abs_diff_pic_num_minus1 =
+                            href->frame_num - pic_num - 1;
+                    }
+                    pic_num = href->frame_num;
+                    ++j;
+                }
+                av_assert0(j == n0);
+                sh->rplm_l0[j].modification_of_pic_nums_idc = 3;
+            }
+
+            sh->ref_pic_list_modification_flag_l1 = need_rplm_l1;
+            if (need_rplm_l1) {
+                int pic_num = hpic->frame_num;
+                for (i = j = 0; i < pic->nb_refs; i++) {
+                    href = pic->refs[i]->priv_data;
+                    if (href->pic_order_cnt < hpic->pic_order_cnt)
+                        continue;
+                    av_assert0(href->frame_num != pic_num);
+                    if (href->frame_num < pic_num) {
+                        sh->rplm_l1[j].modification_of_pic_nums_idc = 0;
+                        sh->rplm_l1[j].abs_diff_pic_num_minus1 =
+                            pic_num - href->frame_num - 1;
+                    } else {
+                        sh->rplm_l1[j].modification_of_pic_nums_idc = 1;
+                        sh->rplm_l1[j].abs_diff_pic_num_minus1 =
+                            href->frame_num - pic_num - 1;
+                    }
+                    pic_num = href->frame_num;
+                    ++j;
+                }
+                av_assert0(j == n1);
+                sh->rplm_l1[j].modification_of_pic_nums_idc = 3;
+            }
+        }
+    }
+
+    vslice->macroblock_address = slice->block_start;
+    vslice->num_macroblocks    = slice->block_size;
 
     vslice->macroblock_info = VA_INVALID_ID;
 
@@ -832,33 +1065,34 @@ static av_cold int vaapi_encode_h264_configure(AVCodecContext *avctx)
     priv->mb_height = FFALIGN(avctx->height, 16) / 16;
 
     if (ctx->va_rc_mode == VA_RC_CQP) {
-        priv->fixed_qp_p = priv->qp;
+        priv->fixed_qp_p = av_clip(ctx->rc_quality, 1, 51);
         if (avctx->i_quant_factor > 0.0)
-            priv->fixed_qp_idr = (int)((priv->fixed_qp_p * avctx->i_quant_factor +
-                                        avctx->i_quant_offset) + 0.5);
+            priv->fixed_qp_idr =
+                av_clip((avctx->i_quant_factor * priv->fixed_qp_p +
+                         avctx->i_quant_offset) + 0.5, 1, 51);
         else
             priv->fixed_qp_idr = priv->fixed_qp_p;
         if (avctx->b_quant_factor > 0.0)
-            priv->fixed_qp_b = (int)((priv->fixed_qp_p * avctx->b_quant_factor +
-                                      avctx->b_quant_offset) + 0.5);
+            priv->fixed_qp_b =
+                av_clip((avctx->b_quant_factor * priv->fixed_qp_p +
+                         avctx->b_quant_offset) + 0.5, 1, 51);
         else
             priv->fixed_qp_b = priv->fixed_qp_p;
 
-        priv->sei &= ~SEI_TIMING;
-
         av_log(avctx, AV_LOG_DEBUG, "Using fixed QP = "
                "%d / %d / %d for IDR- / P- / B-frames.\n",
                priv->fixed_qp_idr, priv->fixed_qp_p, priv->fixed_qp_b);
 
-    } else if (ctx->va_rc_mode == VA_RC_CBR ||
-               ctx->va_rc_mode == VA_RC_VBR) {
+    } else {
         // These still need to be  set for pic_init_qp/slice_qp_delta.
         priv->fixed_qp_idr = 26;
         priv->fixed_qp_p   = 26;
         priv->fixed_qp_b   = 26;
+    }
 
-    } else {
-        av_assert0(0 && "Invalid RC mode.");
+    if (!ctx->rc_mode->hrd) {
+        // Timing SEI requires a mode respecting HRD parameters.
+        priv->sei &= ~SEI_TIMING;
     }
 
     if (priv->sei & SEI_IDENTIFIER) {
@@ -903,8 +1137,17 @@ static const VAAPIEncodeProfile vaapi_encode_h264_profiles[] = {
 static const VAAPIEncodeType vaapi_encode_type_h264 = {
     .profiles              = vaapi_encode_h264_profiles,
 
+    .flags                 = FLAG_SLICE_CONTROL |
+                             FLAG_B_PICTURES |
+                             FLAG_B_PICTURE_REFERENCES |
+                             FLAG_NON_IDR_KEY_PICTURES,
+
+    .default_quality       = 20,
+
     .configure             = &vaapi_encode_h264_configure,
 
+    .picture_priv_data_size = sizeof(VAAPIEncodeH264Picture),
+
     .sequence_params_size  = sizeof(VAEncSequenceParameterBufferH264),
     .init_sequence_params  = &vaapi_encode_h264_init_sequence_params,
 
@@ -978,6 +1221,11 @@ static av_cold int vaapi_encode_h264_init(AVCodecContext *avctx)
     ctx->surface_width  = FFALIGN(avctx->width,  16);
     ctx->surface_height = FFALIGN(avctx->height, 16);
 
+    ctx->slice_block_height = ctx->slice_block_width = 16;
+
+    if (priv->qp > 0)
+        ctx->explicit_qp = priv->qp;
+
     return ff_vaapi_encode_init(avctx);
 }
 
@@ -985,6 +1233,7 @@ static av_cold int vaapi_encode_h264_close(AVCodecContext *avctx)
 {
     VAAPIEncodeH264Context *priv = avctx->priv_data;
 
+    ff_cbs_fragment_free(priv->cbc, &priv->current_access_unit);
     ff_cbs_close(&priv->cbc);
     av_freep(&priv->sei_identifier_string);
 
@@ -995,9 +1244,10 @@ static av_cold int vaapi_encode_h264_close(AVCodecContext *avctx)
 #define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM)
 static const AVOption vaapi_encode_h264_options[] = {
     VAAPI_ENCODE_COMMON_OPTIONS,
+    VAAPI_ENCODE_RC_OPTIONS,
 
     { "qp", "Constant QP (for P-frames; scaled by qfactor/qoffset for I/B)",
-      OFFSET(qp), AV_OPT_TYPE_INT, { .i64 = 20 }, 0, 52, FLAGS },
+      OFFSET(qp), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 52, FLAGS },
     { "quality", "Set encode quality (trades off against speed, higher is faster)",
       OFFSET(quality), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, FLAGS },
     { "coder", "Entropy coder type",
@@ -1092,7 +1342,8 @@ AVCodec ff_h264_vaapi_encoder = {
     .id             = AV_CODEC_ID_H264,
     .priv_data_size = sizeof(VAAPIEncodeH264Context),
     .init           = &vaapi_encode_h264_init,
-    .encode2        = &ff_vaapi_encode2,
+    .send_frame     = &ff_vaapi_encode_send_frame,
+    .receive_packet = &ff_vaapi_encode_receive_packet,
     .close          = &vaapi_encode_h264_close,
     .priv_class     = &vaapi_encode_h264_class,
     .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE,
diff --git a/libavcodec/vaapi_encode_h265.c b/libavcodec/vaapi_encode_h265.c
index 10312fbd60a39..758bd40a37bd6 100644
--- a/libavcodec/vaapi_encode_h265.c
+++ b/libavcodec/vaapi_encode_h265.c
@@ -42,6 +42,16 @@ enum {
     SEI_CONTENT_LIGHT_LEVEL     = 0x10,
 };
 
+typedef struct VAAPIEncodeH265Picture {
+    int pic_order_cnt;
+
+    int64_t last_idr_frame;
+
+    int slice_nal_unit;
+    int slice_type;
+    int pic_type;
+} VAAPIEncodeH265Picture;
+
 typedef struct VAAPIEncodeH265Context {
     VAAPIEncodeContext common;
 
@@ -54,21 +64,10 @@ typedef struct VAAPIEncodeH265Context {
     int sei;
 
     // Derived settings.
-    unsigned int ctu_width;
-    unsigned int ctu_height;
-
     int fixed_qp_idr;
     int fixed_qp_p;
     int fixed_qp_b;
 
-    // Stream state.
-    int64_t last_idr_frame;
-    int pic_order_cnt;
-
-    int slice_nal_unit;
-    int slice_type;
-    int pic_type;
-
     // Writer structures.
     H265RawAUD   raw_aud;
     H265RawVPS   raw_vps;
@@ -160,7 +159,7 @@ static int vaapi_encode_h265_write_sequence_header(AVCodecContext *avctx,
 
     err = vaapi_encode_h265_write_access_unit(avctx, data, data_len, au);
 fail:
-    ff_cbs_fragment_uninit(priv->cbc, au);
+    ff_cbs_fragment_reset(priv->cbc, au);
     return err;
 }
 
@@ -186,7 +185,7 @@ static int vaapi_encode_h265_write_slice_header(AVCodecContext *avctx,
 
     err = vaapi_encode_h265_write_access_unit(avctx, data, data_len, au);
 fail:
-    ff_cbs_fragment_uninit(priv->cbc, au);
+    ff_cbs_fragment_reset(priv->cbc, au);
     return err;
 }
 
@@ -243,7 +242,7 @@ static int vaapi_encode_h265_write_extra_header(AVCodecContext *avctx,
         if (err < 0)
             goto fail;
 
-        ff_cbs_fragment_uninit(priv->cbc, au);
+        ff_cbs_fragment_reset(priv->cbc, au);
 
         *type = VAEncPackedHeaderRawData;
         return 0;
@@ -252,7 +251,7 @@ static int vaapi_encode_h265_write_extra_header(AVCodecContext *avctx,
     }
 
 fail:
-    ff_cbs_fragment_uninit(priv->cbc, au);
+    ff_cbs_fragment_reset(priv->cbc, au);
     return err;
 }
 
@@ -271,9 +270,6 @@ static int vaapi_encode_h265_init_sequence_params(AVCodecContext *avctx)
     int chroma_format, bit_depth;
     int i;
 
-    memset(&priv->current_access_unit, 0,
-           sizeof(priv->current_access_unit));
-
     memset(vps, 0, sizeof(*vps));
     memset(sps, 0, sizeof(*sps));
     memset(pps, 0, sizeof(*pps));
@@ -349,7 +345,8 @@ static int vaapi_encode_h265_init_sequence_params(AVCodecContext *avctx)
 
         level = ff_h265_guess_level(ptl, avctx->bit_rate,
                                     ctx->surface_width, ctx->surface_height,
-                                    1, 1, 1, (ctx->b_per_p > 0) + 1);
+                                    ctx->nb_slices, 1, 1,
+                                    (ctx->b_per_p > 0) + 1);
         if (level) {
             av_log(avctx, AV_LOG_VERBOSE, "Using level %s.\n", level->name);
             ptl->general_level_idc = level->level_idc;
@@ -363,8 +360,8 @@ static int vaapi_encode_h265_init_sequence_params(AVCodecContext *avctx)
     }
 
     vps->vps_sub_layer_ordering_info_present_flag = 0;
-    vps->vps_max_dec_pic_buffering_minus1[0]      = (ctx->b_per_p > 0) + 1;
-    vps->vps_max_num_reorder_pics[0]              = (ctx->b_per_p > 0);
+    vps->vps_max_dec_pic_buffering_minus1[0]      = ctx->max_b_depth + 1;
+    vps->vps_max_num_reorder_pics[0]              = ctx->max_b_depth;
     vps->vps_max_latency_increase_plus1[0]        = 0;
 
     vps->vps_max_layer_id             = 0;
@@ -474,18 +471,20 @@ static int vaapi_encode_h265_init_sequence_params(AVCodecContext *avctx)
             {  80, 33 }, {  18, 11 }, {  15, 11 }, {  64, 33 },
             { 160, 99 }, {   4,  3 }, {   3,  2 }, {   2,  1 },
         };
-        int i;
+        int num, den, i;
+        av_reduce(&num, &den, avctx->sample_aspect_ratio.num,
+                  avctx->sample_aspect_ratio.den, 65535);
         for (i = 0; i < FF_ARRAY_ELEMS(sar_idc); i++) {
-            if (avctx->sample_aspect_ratio.num == sar_idc[i].num &&
-                avctx->sample_aspect_ratio.den == sar_idc[i].den) {
+            if (num == sar_idc[i].num &&
+                den == sar_idc[i].den) {
                 vui->aspect_ratio_idc = i;
                 break;
             }
         }
         if (i >= FF_ARRAY_ELEMS(sar_idc)) {
             vui->aspect_ratio_idc = 255;
-            vui->sar_width  = avctx->sample_aspect_ratio.num;
-            vui->sar_height = avctx->sample_aspect_ratio.den;
+            vui->sar_width  = num;
+            vui->sar_height = den;
         }
         vui->aspect_ratio_info_present_flag = 1;
     }
@@ -673,41 +672,54 @@ static int vaapi_encode_h265_init_sequence_params(AVCodecContext *avctx)
 static int vaapi_encode_h265_init_picture_params(AVCodecContext *avctx,
                                                  VAAPIEncodePicture *pic)
 {
+    VAAPIEncodeContext               *ctx = avctx->priv_data;
     VAAPIEncodeH265Context          *priv = avctx->priv_data;
+    VAAPIEncodeH265Picture          *hpic = pic->priv_data;
+    VAAPIEncodePicture              *prev = pic->prev;
+    VAAPIEncodeH265Picture         *hprev = prev ? prev->priv_data : NULL;
     VAEncPictureParameterBufferHEVC *vpic = pic->codec_picture_params;
     int i;
 
     if (pic->type == PICTURE_TYPE_IDR) {
         av_assert0(pic->display_order == pic->encode_order);
 
-        priv->last_idr_frame = pic->display_order;
+        hpic->last_idr_frame = pic->display_order;
 
-        priv->slice_nal_unit = HEVC_NAL_IDR_W_RADL;
-        priv->slice_type     = HEVC_SLICE_I;
-        priv->pic_type       = 0;
+        hpic->slice_nal_unit = HEVC_NAL_IDR_W_RADL;
+        hpic->slice_type     = HEVC_SLICE_I;
+        hpic->pic_type       = 0;
     } else {
-        av_assert0(pic->encode_order > priv->last_idr_frame);
+        av_assert0(prev);
+        hpic->last_idr_frame = hprev->last_idr_frame;
 
         if (pic->type == PICTURE_TYPE_I) {
-            priv->slice_nal_unit = HEVC_NAL_CRA_NUT;
-            priv->slice_type     = HEVC_SLICE_I;
-            priv->pic_type       = 0;
+            hpic->slice_nal_unit = HEVC_NAL_CRA_NUT;
+            hpic->slice_type     = HEVC_SLICE_I;
+            hpic->pic_type       = 0;
         } else if (pic->type == PICTURE_TYPE_P) {
             av_assert0(pic->refs[0]);
-            priv->slice_nal_unit = HEVC_NAL_TRAIL_R;
-            priv->slice_type     = HEVC_SLICE_P;
-            priv->pic_type       = 1;
+            hpic->slice_nal_unit = HEVC_NAL_TRAIL_R;
+            hpic->slice_type     = HEVC_SLICE_P;
+            hpic->pic_type       = 1;
         } else {
+            VAAPIEncodePicture *irap_ref;
             av_assert0(pic->refs[0] && pic->refs[1]);
-            if (pic->refs[1]->type == PICTURE_TYPE_I)
-                priv->slice_nal_unit = HEVC_NAL_RASL_N;
-            else
-                priv->slice_nal_unit = HEVC_NAL_TRAIL_N;
-            priv->slice_type = HEVC_SLICE_B;
-            priv->pic_type   = 2;
+            for (irap_ref = pic; irap_ref; irap_ref = irap_ref->refs[1]) {
+                if (irap_ref->type == PICTURE_TYPE_I)
+                    break;
+            }
+            if (pic->b_depth == ctx->max_b_depth) {
+                hpic->slice_nal_unit = irap_ref ? HEVC_NAL_RASL_N
+                                                : HEVC_NAL_TRAIL_N;
+            } else {
+                hpic->slice_nal_unit = irap_ref ? HEVC_NAL_RASL_R
+                                                : HEVC_NAL_TRAIL_R;
+            }
+            hpic->slice_type = HEVC_SLICE_B;
+            hpic->pic_type   = 2;
         }
     }
-    priv->pic_order_cnt = pic->display_order - priv->last_idr_frame;
+    hpic->pic_order_cnt = pic->display_order - hpic->last_idr_frame;
 
     if (priv->aud) {
         priv->aud_needed = 1;
@@ -717,7 +729,7 @@ static int vaapi_encode_h265_init_picture_params(AVCodecContext *avctx,
                 .nuh_layer_id          = 0,
                 .nuh_temporal_id_plus1 = 1,
             },
-            .pic_type = priv->pic_type,
+            .pic_type = hpic->pic_type,
         };
     } else {
         priv->aud_needed = 0;
@@ -797,17 +809,20 @@ static int vaapi_encode_h265_init_picture_params(AVCodecContext *avctx,
 
     vpic->decoded_curr_pic = (VAPictureHEVC) {
         .picture_id    = pic->recon_surface,
-        .pic_order_cnt = priv->pic_order_cnt,
+        .pic_order_cnt = hpic->pic_order_cnt,
         .flags         = 0,
     };
 
     for (i = 0; i < pic->nb_refs; i++) {
-        VAAPIEncodePicture *ref = pic->refs[i];
+        VAAPIEncodePicture      *ref = pic->refs[i];
+        VAAPIEncodeH265Picture *href;
+
         av_assert0(ref && ref->encode_order < pic->encode_order);
+        href = ref->priv_data;
 
         vpic->reference_frames[i] = (VAPictureHEVC) {
             .picture_id    = ref->recon_surface,
-            .pic_order_cnt = ref->display_order - priv->last_idr_frame,
+            .pic_order_cnt = href->pic_order_cnt,
             .flags = (ref->display_order < pic->display_order ?
                       VA_PICTURE_HEVC_RPS_ST_CURR_BEFORE : 0) |
                      (ref->display_order > pic->display_order ?
@@ -823,7 +838,7 @@ static int vaapi_encode_h265_init_picture_params(AVCodecContext *avctx,
 
     vpic->coded_buf = pic->output_buffer;
 
-    vpic->nal_unit_type = priv->slice_nal_unit;
+    vpic->nal_unit_type = hpic->slice_nal_unit;
 
     switch (pic->type) {
     case PICTURE_TYPE_IDR:
@@ -850,8 +865,6 @@ static int vaapi_encode_h265_init_picture_params(AVCodecContext *avctx,
         av_assert0(0 && "invalid picture type");
     }
 
-    pic->nb_slices = 1;
-
     return 0;
 }
 
@@ -859,8 +872,8 @@ static int vaapi_encode_h265_init_slice_params(AVCodecContext *avctx,
                                                VAAPIEncodePicture *pic,
                                                VAAPIEncodeSlice *slice)
 {
-    VAAPIEncodeContext                *ctx = avctx->priv_data;
     VAAPIEncodeH265Context           *priv = avctx->priv_data;
+    VAAPIEncodeH265Picture           *hpic = pic->priv_data;
     const H265RawSPS                  *sps = &priv->raw_sps;
     const H265RawPPS                  *pps = &priv->raw_pps;
     H265RawSliceHeader                 *sh = &priv->raw_slice.header;
@@ -869,77 +882,95 @@ static int vaapi_encode_h265_init_slice_params(AVCodecContext *avctx,
     int i;
 
     sh->nal_unit_header = (H265RawNALUnitHeader) {
-        .nal_unit_type         = priv->slice_nal_unit,
+        .nal_unit_type         = hpic->slice_nal_unit,
         .nuh_layer_id          = 0,
         .nuh_temporal_id_plus1 = 1,
     };
 
     sh->slice_pic_parameter_set_id      = pps->pps_pic_parameter_set_id;
 
-    // Currently we only support one slice per frame.
-    sh->first_slice_segment_in_pic_flag = 1;
-    sh->slice_segment_address           = 0;
+    sh->first_slice_segment_in_pic_flag = slice->index == 0;
+    sh->slice_segment_address           = slice->block_start;
 
-    sh->slice_type = priv->slice_type;
+    sh->slice_type = hpic->slice_type;
 
-    sh->slice_pic_order_cnt_lsb = priv->pic_order_cnt &
+    sh->slice_pic_order_cnt_lsb = hpic->pic_order_cnt &
         (1 << (sps->log2_max_pic_order_cnt_lsb_minus4 + 4)) - 1;
 
     if (pic->type != PICTURE_TYPE_IDR) {
         H265RawSTRefPicSet *rps;
-        VAAPIEncodePicture *st;
-        int used;
+        const VAAPIEncodeH265Picture *strp;
+        int rps_poc[MAX_DPB_SIZE];
+        int rps_used[MAX_DPB_SIZE];
+        int i, j, poc, rps_pics;
 
         sh->short_term_ref_pic_set_sps_flag = 0;
 
         rps = &sh->short_term_ref_pic_set;
         memset(rps, 0, sizeof(*rps));
 
-        for (st = ctx->pic_start; st; st = st->next) {
-            if (st->encode_order >= pic->encode_order) {
-                // Not yet in DPB.
+        rps_pics = 0;
+        for (i = 0; i < pic->nb_refs; i++) {
+            strp = pic->refs[i]->priv_data;
+            rps_poc[rps_pics]  = strp->pic_order_cnt;
+            rps_used[rps_pics] = 1;
+            ++rps_pics;
+        }
+        for (i = 0; i < pic->nb_dpb_pics; i++) {
+            if (pic->dpb[i] == pic)
                 continue;
+            for (j = 0; j < pic->nb_refs; j++) {
+                if (pic->dpb[i] == pic->refs[j])
+                    break;
             }
-            used = 0;
-            for (i = 0; i < pic->nb_refs; i++) {
-                if (pic->refs[i] == st)
-                    used = 1;
-            }
-            if (!used) {
-                // Usually each picture always uses all of the others in the
-                // DPB as references.  The one case we have to treat here is
-                // a non-IDR IRAP picture, which may need to hold unused
-                // references across itself to be used for the decoding of
-                // following RASL pictures.  This looks for such an RASL
-                // picture, and keeps the reference if there is one.
-                VAAPIEncodePicture *rp;
-                for (rp = ctx->pic_start; rp; rp = rp->next) {
-                    if (rp->encode_order < pic->encode_order)
-                        continue;
-                    if (rp->type != PICTURE_TYPE_B)
-                        continue;
-                    if (rp->refs[0] == st && rp->refs[1] == pic)
-                        break;
-                }
-                if (!rp)
-                    continue;
-            }
-            // This only works for one instance of each (delta_poc_sN_minus1
-            // is relative to the previous frame in the list, not relative to
-            // the current frame directly).
-            if (st->display_order < pic->display_order) {
-                rps->delta_poc_s0_minus1[rps->num_negative_pics] =
-                    pic->display_order - st->display_order - 1;
-                rps->used_by_curr_pic_s0_flag[rps->num_negative_pics] = used;
-                ++rps->num_negative_pics;
-            } else {
-                rps->delta_poc_s1_minus1[rps->num_positive_pics] =
-                    st->display_order - pic->display_order - 1;
-                rps->used_by_curr_pic_s1_flag[rps->num_positive_pics] = used;
-                ++rps->num_positive_pics;
+            if (j < pic->nb_refs)
+                continue;
+            strp = pic->dpb[i]->priv_data;
+            rps_poc[rps_pics]  = strp->pic_order_cnt;
+            rps_used[rps_pics] = 0;
+            ++rps_pics;
+        }
+
+        for (i = 1; i < rps_pics; i++) {
+            for (j = i; j > 0; j--) {
+                if (rps_poc[j] > rps_poc[j - 1])
+                    break;
+                av_assert0(rps_poc[j] != rps_poc[j - 1]);
+                FFSWAP(int, rps_poc[j],  rps_poc[j - 1]);
+                FFSWAP(int, rps_used[j], rps_used[j - 1]);
             }
         }
 
+        av_log(avctx, AV_LOG_DEBUG, "RPS for POC %d:",
+               hpic->pic_order_cnt);
+        for (i = 0; i < rps_pics; i++) {
+            av_log(avctx, AV_LOG_DEBUG, " (%d,%d)",
+                   rps_poc[i], rps_used[i]);
+        }
+        av_log(avctx, AV_LOG_DEBUG, "\n");
+
+        for (i = 0; i < rps_pics; i++) {
+            av_assert0(rps_poc[i] != hpic->pic_order_cnt);
+            if (rps_poc[i] > hpic->pic_order_cnt)
+                break;
+        }
+
+        rps->num_negative_pics = i;
+        poc = hpic->pic_order_cnt;
+        for (j = i - 1; j >= 0; j--) {
+            rps->delta_poc_s0_minus1[i - 1 - j] = poc - rps_poc[j] - 1;
+            rps->used_by_curr_pic_s0_flag[i - 1 - j] = rps_used[j];
+            poc = rps_poc[j];
+        }
+
+        rps->num_positive_pics = rps_pics - i;
+        poc = hpic->pic_order_cnt;
+        for (j = i; j < rps_pics; j++) {
+            rps->delta_poc_s1_minus1[j - i] = rps_poc[j] - poc - 1;
+            rps->used_by_curr_pic_s1_flag[j - i] = rps_used[j];
+            poc = rps_poc[j];
+        }
+
         sh->num_long_term_sps  = 0;
         sh->num_long_term_pics = 0;
 
@@ -968,7 +999,7 @@ static int vaapi_encode_h265_init_slice_params(AVCodecContext *avctx,
 
     *vslice = (VAEncSliceParameterBufferHEVC) {
         .slice_segment_address = sh->slice_segment_address,
-        .num_ctu_in_slice      = priv->ctu_width * priv->ctu_height,
+        .num_ctu_in_slice      = slice->block_size,
 
         .slice_type                 = sh->slice_type,
         .slice_pic_parameter_set_id = sh->slice_pic_parameter_set_id,
@@ -989,7 +1020,7 @@ static int vaapi_encode_h265_init_slice_params(AVCodecContext *avctx,
         .slice_tc_offset_div2   = sh->slice_tc_offset_div2,
 
         .slice_fields.bits = {
-            .last_slice_of_pic_flag       = 1,
+            .last_slice_of_pic_flag       = slice->index == pic->nb_slices - 1,
             .dependent_slice_segment_flag = sh->dependent_slice_segment_flag,
             .colour_plane_id              = sh->colour_plane_id,
             .slice_temporal_mvp_enabled_flag =
@@ -1041,23 +1072,22 @@ static av_cold int vaapi_encode_h265_configure(AVCodecContext *avctx)
     if (err < 0)
         return err;
 
-    priv->ctu_width     = FFALIGN(ctx->surface_width,  32) / 32;
-    priv->ctu_height    = FFALIGN(ctx->surface_height, 32) / 32;
-
-    av_log(avctx, AV_LOG_VERBOSE, "Input %ux%u -> Surface %ux%u -> CTU %ux%u.\n",
-           avctx->width, avctx->height, ctx->surface_width,
-           ctx->surface_height, priv->ctu_width, priv->ctu_height);
-
     if (ctx->va_rc_mode == VA_RC_CQP) {
-        priv->fixed_qp_p = priv->qp;
+        // Note that VAAPI only supports positive QP values - the range is
+        // therefore always bounded below by 1, even in 10-bit mode where
+        // it should go down to -12.
+
+        priv->fixed_qp_p = av_clip(ctx->rc_quality, 1, 51);
         if (avctx->i_quant_factor > 0.0)
-            priv->fixed_qp_idr = (int)((priv->fixed_qp_p * avctx->i_quant_factor +
-                                        avctx->i_quant_offset) + 0.5);
+            priv->fixed_qp_idr =
+                av_clip((avctx->i_quant_factor * priv->fixed_qp_p +
+                         avctx->i_quant_offset) + 0.5, 1, 51);
         else
             priv->fixed_qp_idr = priv->fixed_qp_p;
         if (avctx->b_quant_factor > 0.0)
-            priv->fixed_qp_b = (int)((priv->fixed_qp_p * avctx->b_quant_factor +
-                                      avctx->b_quant_offset) + 0.5);
+            priv->fixed_qp_b =
+                av_clip((avctx->b_quant_factor * priv->fixed_qp_p +
+                         avctx->b_quant_offset) + 0.5, 1, 51);
         else
             priv->fixed_qp_b = priv->fixed_qp_p;
 
@@ -1065,15 +1095,11 @@ static av_cold int vaapi_encode_h265_configure(AVCodecContext *avctx)
                "%d / %d / %d for IDR- / P- / B-frames.\n",
                priv->fixed_qp_idr, priv->fixed_qp_p, priv->fixed_qp_b);
 
-    } else if (ctx->va_rc_mode == VA_RC_CBR ||
-               ctx->va_rc_mode == VA_RC_VBR) {
-        // These still need to be  set for pic_init_qp/slice_qp_delta.
+    } else {
+        // These still need to be set for init_qp/slice_qp_delta.
         priv->fixed_qp_idr = 30;
         priv->fixed_qp_p   = 30;
         priv->fixed_qp_b   = 30;
-
-    } else {
-        av_assert0(0 && "Invalid RC mode.");
     }
 
     return 0;
@@ -1092,8 +1118,17 @@ static const VAAPIEncodeProfile vaapi_encode_h265_profiles[] = {
 static const VAAPIEncodeType vaapi_encode_type_h265 = {
     .profiles              = vaapi_encode_h265_profiles,
 
+    .flags                 = FLAG_SLICE_CONTROL |
+                             FLAG_B_PICTURES |
+                             FLAG_B_PICTURE_REFERENCES |
+                             FLAG_NON_IDR_KEY_PICTURES,
+
+    .default_quality       = 25,
+
     .configure             = &vaapi_encode_h265_configure,
 
+    .picture_priv_data_size = sizeof(VAAPIEncodeH265Picture),
+
     .sequence_params_size  = sizeof(VAEncSequenceParameterBufferHEVC),
     .init_sequence_params  = &vaapi_encode_h265_init_sequence_params,
 
@@ -1138,6 +1173,12 @@ static av_cold int vaapi_encode_h265_init(AVCodecContext *avctx)
     ctx->surface_width  = FFALIGN(avctx->width,  16);
     ctx->surface_height = FFALIGN(avctx->height, 16);
 
+    // CTU size is currently hard-coded to 32.
+    ctx->slice_block_width = ctx->slice_block_height = 32;
+
+    if (priv->qp > 0)
+        ctx->explicit_qp = priv->qp;
+
     return ff_vaapi_encode_init(avctx);
 }
 
@@ -1145,6 +1186,7 @@ static av_cold int vaapi_encode_h265_close(AVCodecContext *avctx)
 {
     VAAPIEncodeH265Context *priv = avctx->priv_data;
 
+    ff_cbs_fragment_free(priv->cbc, &priv->current_access_unit);
     ff_cbs_close(&priv->cbc);
 
     return ff_vaapi_encode_close(avctx);
@@ -1154,9 +1196,10 @@ static av_cold int vaapi_encode_h265_close(AVCodecContext *avctx)
 #define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM)
 static const AVOption vaapi_encode_h265_options[] = {
     VAAPI_ENCODE_COMMON_OPTIONS,
+    VAAPI_ENCODE_RC_OPTIONS,
 
     { "qp", "Constant QP (for P-frames; scaled by qfactor/qoffset for I/B)",
-      OFFSET(qp), AV_OPT_TYPE_INT, { .i64 = 25 }, 0, 52, FLAGS },
+      OFFSET(qp), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 52, FLAGS },
 
     { "aud", "Include AUD",
       OFFSET(aud), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
@@ -1242,7 +1285,8 @@ AVCodec ff_hevc_vaapi_encoder = {
     .id             = AV_CODEC_ID_HEVC,
     .priv_data_size = sizeof(VAAPIEncodeH265Context),
     .init           = &vaapi_encode_h265_init,
-    .encode2        = &ff_vaapi_encode2,
+    .send_frame     = &ff_vaapi_encode_send_frame,
+    .receive_packet = &ff_vaapi_encode_receive_packet,
     .close          = &vaapi_encode_h265_close,
     .priv_class     = &vaapi_encode_h265_class,
     .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE,
diff --git a/libavcodec/vaapi_encode_mjpeg.c b/libavcodec/vaapi_encode_mjpeg.c
index fe8439ce88000..4dcdc3d16bb0a 100644
--- a/libavcodec/vaapi_encode_mjpeg.c
+++ b/libavcodec/vaapi_encode_mjpeg.c
@@ -142,7 +142,7 @@ static int vaapi_encode_mjpeg_write_image_header(AVCodecContext *avctx,
 
     err = 0;
 fail:
-    ff_cbs_fragment_uninit(priv->cbc, frag);
+    ff_cbs_fragment_reset(priv->cbc, frag);
     return err;
 }
 
@@ -230,6 +230,8 @@ static int vaapi_encode_mjpeg_init_picture_params(AVCodecContext *avctx,
     const uint8_t *components;
     int t, i, quant_scale, len;
 
+    av_assert0(pic->type == PICTURE_TYPE_IDR);
+
     desc = av_pix_fmt_desc_get(priv->common.input_frames->sw_format);
     av_assert0(desc);
     if (desc->flags & AV_PIX_FMT_FLAG_RGB)
@@ -436,7 +438,7 @@ static av_cold int vaapi_encode_mjpeg_configure(AVCodecContext *avctx)
     VAAPIEncodeMJPEGContext *priv = avctx->priv_data;
     int err;
 
-    priv->quality = avctx->global_quality;
+    priv->quality = ctx->rc_quality;
     if (priv->quality < 1 || priv->quality > 100) {
         av_log(avctx, AV_LOG_ERROR, "Invalid quality value %d "
                "(must be 1-100).\n", priv->quality);
@@ -476,8 +478,13 @@ static const VAAPIEncodeProfile vaapi_encode_mjpeg_profiles[] = {
 static const VAAPIEncodeType vaapi_encode_type_mjpeg = {
     .profiles              = vaapi_encode_mjpeg_profiles,
 
+    .flags                 = FLAG_CONSTANT_QUALITY_ONLY |
+                             FLAG_INTRA_ONLY,
+
     .configure             = &vaapi_encode_mjpeg_configure,
 
+    .default_quality       = 80,
+
     .picture_params_size   = sizeof(VAEncPictureParameterBufferJPEG),
     .init_picture_params   = &vaapi_encode_mjpeg_init_picture_params,
 
@@ -510,6 +517,7 @@ static av_cold int vaapi_encode_mjpeg_close(AVCodecContext *avctx)
 {
     VAAPIEncodeMJPEGContext *priv = avctx->priv_data;
 
+    ff_cbs_fragment_free(priv->cbc, &priv->current_fragment);
     ff_cbs_close(&priv->cbc);
 
     return ff_vaapi_encode_close(avctx);
@@ -531,9 +539,7 @@ static const AVOption vaapi_encode_mjpeg_options[] = {
 };
 
 static const AVCodecDefault vaapi_encode_mjpeg_defaults[] = {
-    { "global_quality", "80" },
     { "b",              "0"  },
-    { "g",              "1"  },
     { NULL },
 };
 
@@ -551,7 +557,8 @@ AVCodec ff_mjpeg_vaapi_encoder = {
     .id             = AV_CODEC_ID_MJPEG,
     .priv_data_size = sizeof(VAAPIEncodeMJPEGContext),
     .init           = &vaapi_encode_mjpeg_init,
-    .encode2        = &ff_vaapi_encode2,
+    .send_frame     = &ff_vaapi_encode_send_frame,
+    .receive_packet = &ff_vaapi_encode_receive_packet,
     .close          = &vaapi_encode_mjpeg_close,
     .priv_class     = &vaapi_encode_mjpeg_class,
     .capabilities   = AV_CODEC_CAP_HARDWARE |
diff --git a/libavcodec/vaapi_encode_mpeg2.c b/libavcodec/vaapi_encode_mpeg2.c
index 1377eeb9bbd9c..fb1ef71fdc15a 100644
--- a/libavcodec/vaapi_encode_mpeg2.c
+++ b/libavcodec/vaapi_encode_mpeg2.c
@@ -35,9 +35,6 @@ typedef struct VAAPIEncodeMPEG2Context {
     int level;
 
     // Derived settings.
-    int mb_width;
-    int mb_height;
-
     int quant_i;
     int quant_p;
     int quant_b;
@@ -138,7 +135,7 @@ static int vaapi_encode_mpeg2_write_sequence_header(AVCodecContext *avctx,
 
     err = vaapi_encode_mpeg2_write_fragment(avctx, data, data_len, frag);
 fail:
-    ff_cbs_fragment_uninit(priv->cbc, frag);
+    ff_cbs_fragment_reset(priv->cbc, frag);
     return 0;
 }
 
@@ -162,7 +159,7 @@ static int vaapi_encode_mpeg2_write_picture_header(AVCodecContext *avctx,
 
     err = vaapi_encode_mpeg2_write_fragment(avctx, data, data_len, frag);
 fail:
-    ff_cbs_fragment_uninit(priv->cbc, frag);
+    ff_cbs_fragment_reset(priv->cbc, frag);
     return 0;
 }
 
@@ -316,7 +313,8 @@ static int vaapi_encode_mpeg2_init_sequence_params(AVCodecContext *avctx)
 
     goph->group_start_code = MPEG2_START_GROUP;
 
-    goph->time_code   = 0;
+    // Marker bit in the middle of time_code.
+    goph->time_code   = 1 << 12;
     goph->closed_gop  = 1;
     goph->broken_link = 0;
 
@@ -477,8 +475,6 @@ static int vaapi_encode_mpeg2_init_picture_params(AVCodecContext *avctx,
     vpic->f_code[1][0]       = pce->f_code[1][0];
     vpic->f_code[1][1]       = pce->f_code[1][1];
 
-    pic->nb_slices = priv->mb_height;
-
     return 0;
 }
 
@@ -490,8 +486,8 @@ static int vaapi_encode_mpeg2_init_slice_params(AVCodecContext *avctx,
     VAEncSliceParameterBufferMPEG2   *vslice = slice->codec_slice_params;
     int qp;
 
-    vslice->macroblock_address = priv->mb_width * slice->index;
-    vslice->num_macroblocks    = priv->mb_width;
+    vslice->macroblock_address = slice->block_start;
+    vslice->num_macroblocks    = slice->block_size;
 
     switch (pic->type) {
     case PICTURE_TYPE_IDR:
@@ -525,23 +521,18 @@ static av_cold int vaapi_encode_mpeg2_configure(AVCodecContext *avctx)
     if (err < 0)
         return err;
 
-    priv->mb_width  = FFALIGN(avctx->width,  16) / 16;
-    priv->mb_height = FFALIGN(avctx->height, 16) / 16;
-
     if (ctx->va_rc_mode == VA_RC_CQP) {
-        priv->quant_p = av_clip(avctx->global_quality, 1, 31);
+        priv->quant_p = av_clip(ctx->rc_quality, 1, 31);
         if (avctx->i_quant_factor > 0.0)
-            priv->quant_i = av_clip((avctx->global_quality *
-                                     avctx->i_quant_factor +
-                                     avctx->i_quant_offset) + 0.5,
-                                    1, 31);
+            priv->quant_i =
+                av_clip((avctx->i_quant_factor * priv->quant_p +
+                         avctx->i_quant_offset) + 0.5, 1, 31);
         else
             priv->quant_i = priv->quant_p;
         if (avctx->b_quant_factor > 0.0)
-            priv->quant_b = av_clip((avctx->global_quality *
-                                     avctx->b_quant_factor +
-                                     avctx->b_quant_offset) + 0.5,
-                                    1, 31);
+            priv->quant_b =
+                av_clip((avctx->b_quant_factor * priv->quant_p +
+                         avctx->b_quant_offset) + 0.5, 1, 31);
         else
             priv->quant_b = priv->quant_p;
 
@@ -550,9 +541,17 @@ static av_cold int vaapi_encode_mpeg2_configure(AVCodecContext *avctx)
                priv->quant_i, priv->quant_p, priv->quant_b);
 
     } else {
-        av_assert0(0 && "Invalid RC mode.");
+        priv->quant_i = 16;
+        priv->quant_p = 16;
+        priv->quant_b = 16;
     }
 
+    ctx->slice_block_rows = FFALIGN(avctx->height, 16) / 16;
+    ctx->slice_block_cols = FFALIGN(avctx->width,  16) / 16;
+
+    ctx->nb_slices  = ctx->slice_block_rows;
+    ctx->slice_size = 1;
+
     return 0;
 }
 
@@ -565,8 +564,12 @@ static const VAAPIEncodeProfile vaapi_encode_mpeg2_profiles[] = {
 static const VAAPIEncodeType vaapi_encode_type_mpeg2 = {
     .profiles              = vaapi_encode_mpeg2_profiles,
 
+    .flags                 = FLAG_B_PICTURES,
+
     .configure             = &vaapi_encode_mpeg2_configure,
 
+    .default_quality       = 10,
+
     .sequence_params_size  = sizeof(VAEncSequenceParameterBufferMPEG2),
     .init_sequence_params  = &vaapi_encode_mpeg2_init_sequence_params,
 
@@ -628,6 +631,7 @@ static av_cold int vaapi_encode_mpeg2_close(AVCodecContext *avctx)
 {
     VAAPIEncodeMPEG2Context *priv = avctx->priv_data;
 
+    ff_cbs_fragment_free(priv->cbc, &priv->current_fragment);
     ff_cbs_close(&priv->cbc);
 
     return ff_vaapi_encode_close(avctx);
@@ -637,6 +641,7 @@ static av_cold int vaapi_encode_mpeg2_close(AVCodecContext *avctx)
 #define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM)
 static const AVOption vaapi_encode_mpeg2_options[] = {
     VAAPI_ENCODE_COMMON_OPTIONS,
+    VAAPI_ENCODE_RC_OPTIONS,
 
     { "profile", "Set profile (in profile_and_level_indication)",
       OFFSET(profile), AV_OPT_TYPE_INT,
@@ -671,7 +676,6 @@ static const AVCodecDefault vaapi_encode_mpeg2_defaults[] = {
     { "i_qoffset",      "0"   },
     { "b_qfactor",      "6/5" },
     { "b_qoffset",      "0"   },
-    { "global_quality", "10"  },
     { "qmin",           "-1"  },
     { "qmax",           "-1"  },
     { NULL },
@@ -691,7 +695,8 @@ AVCodec ff_mpeg2_vaapi_encoder = {
     .id             = AV_CODEC_ID_MPEG2VIDEO,
     .priv_data_size = sizeof(VAAPIEncodeMPEG2Context),
     .init           = &vaapi_encode_mpeg2_init,
-    .encode2        = &ff_vaapi_encode2,
+    .send_frame     = &ff_vaapi_encode_send_frame,
+    .receive_packet = &ff_vaapi_encode_receive_packet,
     .close          = &vaapi_encode_mpeg2_close,
     .priv_class     = &vaapi_encode_mpeg2_class,
     .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE,
diff --git a/libavcodec/vaapi_encode_vp8.c b/libavcodec/vaapi_encode_vp8.c
index 697b465787205..ddbe4c9075633 100644
--- a/libavcodec/vaapi_encode_vp8.c
+++ b/libavcodec/vaapi_encode_vp8.c
@@ -161,14 +161,15 @@ static int vaapi_encode_vp8_write_quant_table(AVCodecContext *avctx,
 
 static av_cold int vaapi_encode_vp8_configure(AVCodecContext *avctx)
 {
+    VAAPIEncodeContext     *ctx = avctx->priv_data;
     VAAPIEncodeVP8Context *priv = avctx->priv_data;
 
-    priv->q_index_p = av_clip(avctx->global_quality, 0, VP8_MAX_QUANT);
+    priv->q_index_p = av_clip(ctx->rc_quality, 0, VP8_MAX_QUANT);
     if (avctx->i_quant_factor > 0.0)
-        priv->q_index_i = av_clip((avctx->global_quality *
-                                   avctx->i_quant_factor +
-                                   avctx->i_quant_offset) + 0.5,
-                                  0, VP8_MAX_QUANT);
+        priv->q_index_i =
+            av_clip((avctx->i_quant_factor * priv->q_index_p  +
+                     avctx->i_quant_offset) + 0.5,
+                    0, VP8_MAX_QUANT);
     else
         priv->q_index_i = priv->q_index_p;
 
@@ -185,6 +186,8 @@ static const VAAPIEncodeType vaapi_encode_type_vp8 = {
 
     .configure             = &vaapi_encode_vp8_configure,
 
+    .default_quality       = 40,
+
     .sequence_params_size  = sizeof(VAEncSequenceParameterBufferVP8),
     .init_sequence_params  = &vaapi_encode_vp8_init_sequence_params,
 
@@ -215,6 +218,8 @@ static av_cold int vaapi_encode_vp8_init(AVCodecContext *avctx)
 #define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM)
 static const AVOption vaapi_encode_vp8_options[] = {
     VAAPI_ENCODE_COMMON_OPTIONS,
+    VAAPI_ENCODE_RC_OPTIONS,
+
     { "loop_filter_level", "Loop filter level",
       OFFSET(loop_filter_level), AV_OPT_TYPE_INT, { .i64 = 16 }, 0, 63, FLAGS },
     { "loop_filter_sharpness", "Loop filter sharpness",
@@ -226,7 +231,6 @@ static const AVCodecDefault vaapi_encode_vp8_defaults[] = {
     { "b",              "0"   },
     { "bf",             "0"   },
     { "g",              "120" },
-    { "global_quality", "40"  },
     { "qmin",           "-1"  },
     { "qmax",           "-1"  },
     { NULL },
@@ -246,7 +250,8 @@ AVCodec ff_vp8_vaapi_encoder = {
     .id             = AV_CODEC_ID_VP8,
     .priv_data_size = sizeof(VAAPIEncodeVP8Context),
     .init           = &vaapi_encode_vp8_init,
-    .encode2        = &ff_vaapi_encode2,
+    .send_frame     = &ff_vaapi_encode_send_frame,
+    .receive_packet = &ff_vaapi_encode_receive_packet,
     .close          = &ff_vaapi_encode_close,
     .priv_class     = &vaapi_encode_vp8_class,
     .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE,
diff --git a/libavcodec/vaapi_encode_vp9.c b/libavcodec/vaapi_encode_vp9.c
index 39bc868f3a6e2..f89fd0d07a27a 100644
--- a/libavcodec/vaapi_encode_vp9.c
+++ b/libavcodec/vaapi_encode_vp9.c
@@ -32,6 +32,10 @@
 #define VP9_MAX_QUANT 255
 
 
+typedef struct VAAPIEncodeVP9Picture {
+    int slot;
+} VAAPIEncodeVP9Picture;
+
 typedef struct VAAPIEncodeVP9Context {
     VAAPIEncodeContext common;
 
@@ -43,22 +47,9 @@ typedef struct VAAPIEncodeVP9Context {
     int q_idx_idr;
     int q_idx_p;
     int q_idx_b;
-
-    // Stream state.
-
-    // Reference direction for B-like frames:
-    // 0 - most recent P/IDR frame is last.
-    // 1 - most recent P frame is golden.
-    int last_ref_dir;
 } VAAPIEncodeVP9Context;
 
 
-#define vseq_var(name)     vseq->name, name
-#define vseq_field(name)   vseq->seq_fields.bits.name, name
-#define vpic_var(name)     vpic->name, name
-#define vpic_field(name)   vpic->pic_fields.bits.name, name
-
-
 static int vaapi_encode_vp9_init_sequence_params(AVCodecContext *avctx)
 {
     VAAPIEncodeContext               *ctx = avctx->priv_data;
@@ -88,6 +79,7 @@ static int vaapi_encode_vp9_init_picture_params(AVCodecContext *avctx,
 {
     VAAPIEncodeContext              *ctx = avctx->priv_data;
     VAAPIEncodeVP9Context          *priv = avctx->priv_data;
+    VAAPIEncodeVP9Picture          *hpic = pic->priv_data;
     VAEncPictureParameterBufferVP9 *vpic = pic->codec_picture_params;
     int i;
 
@@ -98,65 +90,71 @@ static int vaapi_encode_vp9_init_picture_params(AVCodecContext *avctx,
     case PICTURE_TYPE_IDR:
         av_assert0(pic->nb_refs == 0);
         vpic->ref_flags.bits.force_kf = 1;
-        vpic->refresh_frame_flags = 0x01;
-        priv->last_ref_dir = 0;
+        vpic->refresh_frame_flags = 0xff;
+        hpic->slot = 0;
         break;
     case PICTURE_TYPE_P:
         av_assert0(pic->nb_refs == 1);
-        if (ctx->b_per_p > 0) {
-            if (priv->last_ref_dir) {
-                vpic->ref_flags.bits.ref_frame_ctrl_l0  = 2;
-                vpic->ref_flags.bits.ref_gf_idx         = 1;
-                vpic->ref_flags.bits.ref_gf_sign_bias   = 1;
-                vpic->refresh_frame_flags = 0x01;
+        {
+            VAAPIEncodeVP9Picture *href = pic->refs[0]->priv_data;
+            av_assert0(href->slot == 0 || href->slot == 1);
+
+            if (ctx->max_b_depth > 0) {
+                hpic->slot = !href->slot;
+                vpic->refresh_frame_flags = 1 << hpic->slot | 0xfc;
             } else {
-                vpic->ref_flags.bits.ref_frame_ctrl_l0  = 1;
-                vpic->ref_flags.bits.ref_last_idx       = 0;
-                vpic->ref_flags.bits.ref_last_sign_bias = 1;
-                vpic->refresh_frame_flags = 0x02;
+                hpic->slot = 0;
+                vpic->refresh_frame_flags = 0xff;
             }
-        } else {
             vpic->ref_flags.bits.ref_frame_ctrl_l0  = 1;
-            vpic->ref_flags.bits.ref_last_idx       = 0;
+            vpic->ref_flags.bits.ref_last_idx       = href->slot;
             vpic->ref_flags.bits.ref_last_sign_bias = 1;
-            vpic->refresh_frame_flags = 0x01;
         }
         break;
     case PICTURE_TYPE_B:
         av_assert0(pic->nb_refs == 2);
-        if (priv->last_ref_dir) {
+        {
+            VAAPIEncodeVP9Picture *href0 = pic->refs[0]->priv_data,
+                                  *href1 = pic->refs[1]->priv_data;
+            av_assert0(href0->slot < pic->b_depth + 1 &&
+                       href1->slot < pic->b_depth + 1);
+
+            if (pic->b_depth == ctx->max_b_depth) {
+                // Unreferenced frame.
+                vpic->refresh_frame_flags = 0x00;
+                hpic->slot = 8;
+            } else {
+                vpic->refresh_frame_flags = 0xfe << pic->b_depth & 0xff;
+                hpic->slot = 1 + pic->b_depth;
+            }
             vpic->ref_flags.bits.ref_frame_ctrl_l0  = 1;
             vpic->ref_flags.bits.ref_frame_ctrl_l1  = 2;
-            vpic->ref_flags.bits.ref_last_idx       = 0;
+            vpic->ref_flags.bits.ref_last_idx       = href0->slot;
             vpic->ref_flags.bits.ref_last_sign_bias = 1;
-            vpic->ref_flags.bits.ref_gf_idx         = 1;
+            vpic->ref_flags.bits.ref_gf_idx         = href1->slot;
             vpic->ref_flags.bits.ref_gf_sign_bias   = 0;
-        } else {
-            vpic->ref_flags.bits.ref_frame_ctrl_l0  = 2;
-            vpic->ref_flags.bits.ref_frame_ctrl_l1  = 1;
-            vpic->ref_flags.bits.ref_last_idx       = 0;
-            vpic->ref_flags.bits.ref_last_sign_bias = 0;
-            vpic->ref_flags.bits.ref_gf_idx         = 1;
-            vpic->ref_flags.bits.ref_gf_sign_bias   = 1;
         }
-        vpic->refresh_frame_flags = 0x00;
         break;
     default:
         av_assert0(0 && "invalid picture type");
     }
+    if (vpic->refresh_frame_flags == 0x00) {
+        av_log(avctx, AV_LOG_DEBUG, "Pic %"PRId64" not stored.\n",
+               pic->display_order);
+    } else {
+        av_log(avctx, AV_LOG_DEBUG, "Pic %"PRId64" stored in slot %d.\n",
+               pic->display_order, hpic->slot);
+    }
 
     for (i = 0; i < FF_ARRAY_ELEMS(vpic->reference_frames); i++)
         vpic->reference_frames[i] = VA_INVALID_SURFACE;
-    if (pic->type == PICTURE_TYPE_P) {
-        av_assert0(pic->refs[0]);
-        vpic->reference_frames[priv->last_ref_dir] =
-            pic->refs[0]->recon_surface;
-    } else if (pic->type == PICTURE_TYPE_B) {
-        av_assert0(pic->refs[0] && pic->refs[1]);
-        vpic->reference_frames[!priv->last_ref_dir] =
-            pic->refs[0]->recon_surface;
-        vpic->reference_frames[priv->last_ref_dir] =
-            pic->refs[1]->recon_surface;
+
+    for (i = 0; i < pic->nb_refs; i++) {
+        VAAPIEncodePicture *ref_pic = pic->refs[i];
+        int slot;
+        slot = ((VAAPIEncodeVP9Picture*)ref_pic->priv_data)->slot;
+        av_assert0(vpic->reference_frames[slot] == VA_INVALID_SURFACE);
+        vpic->reference_frames[slot] = ref_pic->recon_surface;
     }
 
     vpic->pic_flags.bits.frame_type = (pic->type != PICTURE_TYPE_IDR);
@@ -175,31 +173,34 @@ static int vaapi_encode_vp9_init_picture_params(AVCodecContext *avctx,
     vpic->filter_level    = priv->loop_filter_level;
     vpic->sharpness_level = priv->loop_filter_sharpness;
 
-    if (ctx->b_per_p > 0 && pic->type == PICTURE_TYPE_P)
-        priv->last_ref_dir = !priv->last_ref_dir;
-
     return 0;
 }
 
 static av_cold int vaapi_encode_vp9_configure(AVCodecContext *avctx)
 {
+    VAAPIEncodeContext     *ctx = avctx->priv_data;
     VAAPIEncodeVP9Context *priv = avctx->priv_data;
 
-    priv->q_idx_p = av_clip(avctx->global_quality, 0, VP9_MAX_QUANT);
-    if (avctx->i_quant_factor > 0.0)
-        priv->q_idx_idr = av_clip((avctx->global_quality *
-                                   avctx->i_quant_factor +
-                                   avctx->i_quant_offset) + 0.5,
-                                  0, VP9_MAX_QUANT);
-    else
-        priv->q_idx_idr = priv->q_idx_p;
-    if (avctx->b_quant_factor > 0.0)
-        priv->q_idx_b = av_clip((avctx->global_quality *
-                                 avctx->b_quant_factor +
-                                 avctx->b_quant_offset) + 0.5,
-                                0, VP9_MAX_QUANT);
-    else
-        priv->q_idx_b = priv->q_idx_p;
+    if (ctx->rc_mode->quality) {
+        priv->q_idx_p = av_clip(ctx->rc_quality, 0, VP9_MAX_QUANT);
+        if (avctx->i_quant_factor > 0.0)
+            priv->q_idx_idr =
+                av_clip((avctx->i_quant_factor * priv->q_idx_p  +
+                         avctx->i_quant_offset) + 0.5,
+                        0, VP9_MAX_QUANT);
+        else
+            priv->q_idx_idr = priv->q_idx_p;
+        if (avctx->b_quant_factor > 0.0)
+            priv->q_idx_b =
+                av_clip((avctx->b_quant_factor * priv->q_idx_p  +
+                         avctx->b_quant_offset) + 0.5,
+                        0, VP9_MAX_QUANT);
+        else
+            priv->q_idx_b = priv->q_idx_p;
+    } else {
+        // Arbitrary value.
+        priv->q_idx_idr = priv->q_idx_p = priv->q_idx_b = 100;
+    }
 
     return 0;
 }
@@ -213,6 +214,13 @@ static const VAAPIEncodeProfile vaapi_encode_vp9_profiles[] = {
 static const VAAPIEncodeType vaapi_encode_type_vp9 = {
     .profiles              = vaapi_encode_vp9_profiles,
 
+    .flags                 = FLAG_B_PICTURES |
+                             FLAG_B_PICTURE_REFERENCES,
+
+    .default_quality       = 100,
+
+    .picture_priv_data_size = sizeof(VAAPIEncodeVP9Picture),
+
     .configure             = &vaapi_encode_vp9_configure,
 
     .sequence_params_size  = sizeof(VAEncSequenceParameterBufferVP9),
@@ -244,6 +252,8 @@ static av_cold int vaapi_encode_vp9_init(AVCodecContext *avctx)
 #define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM)
 static const AVOption vaapi_encode_vp9_options[] = {
     VAAPI_ENCODE_COMMON_OPTIONS,
+    VAAPI_ENCODE_RC_OPTIONS,
+
     { "loop_filter_level", "Loop filter level",
       OFFSET(loop_filter_level), AV_OPT_TYPE_INT, { .i64 = 16 }, 0, 63, FLAGS },
     { "loop_filter_sharpness", "Loop filter sharpness",
@@ -255,7 +265,6 @@ static const AVCodecDefault vaapi_encode_vp9_defaults[] = {
     { "b",              "0"   },
     { "bf",             "0"   },
     { "g",              "250" },
-    { "global_quality", "100" },
     { "qmin",           "-1"  },
     { "qmax",           "-1"  },
     { NULL },
@@ -275,7 +284,8 @@ AVCodec ff_vp9_vaapi_encoder = {
     .id             = AV_CODEC_ID_VP9,
     .priv_data_size = sizeof(VAAPIEncodeVP9Context),
     .init           = &vaapi_encode_vp9_init,
-    .encode2        = &ff_vaapi_encode2,
+    .send_frame     = &ff_vaapi_encode_send_frame,
+    .receive_packet = &ff_vaapi_encode_receive_packet,
     .close          = &ff_vaapi_encode_close,
     .priv_class     = &vaapi_encode_vp9_class,
     .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE,
diff --git a/libavcodec/vc1.c b/libavcodec/vc1.c
index 3581d87b57b15..e102b931d84a5 100644
--- a/libavcodec/vc1.c
+++ b/libavcodec/vc1.c
@@ -379,11 +379,6 @@ int ff_vc1_decode_sequence_header(AVCodecContext *avctx, VC1Context *v, GetBitCo
     } else {
         v->res_rtm_flag = get_bits1(gb); //reserved
     }
-    if (!v->res_rtm_flag) {
-        av_log(avctx, AV_LOG_ERROR,
-               "Old WMV3 version detected, some frames may be decoded incorrectly\n");
-        //return -1;
-    }
     //TODO: figure out what they mean (always 0x402F)
     if (!v->res_fasttx)
         skip_bits(gb, 16);
diff --git a/libavcodec/vc1_parser.c b/libavcodec/vc1_parser.c
index bb54947f559b0..493ffde61104c 100644
--- a/libavcodec/vc1_parser.c
+++ b/libavcodec/vc1_parser.c
@@ -67,7 +67,7 @@ static void vc1_extract_header(AVCodecParserContext *s, AVCodecContext *avctx,
     int ret;
     vpc->v.s.avctx = avctx;
     vpc->v.parse_only = 1;
-    init_get_bits(&gb, buf, buf_size * 8);
+    init_get_bits8(&gb, buf, buf_size);
     switch (vpc->prev_start_code) {
     case VC1_CODE_SEQHDR & 0xFF:
         ff_vc1_decode_sequence_header(avctx, &vpc->v, &gb);
diff --git a/libavcodec/vc1_pred.c b/libavcodec/vc1_pred.c
index de736ec7750d7..9e29b44a1f0a8 100644
--- a/libavcodec/vc1_pred.c
+++ b/libavcodec/vc1_pred.c
@@ -262,18 +262,23 @@ void ff_vc1_pred_mv(VC1Context *v, int n, int dmv_x, int dmv_y,
         return;
     }
 
-    C = s->current_picture.motion_val[dir][xy -    1 + v->blocks_off];
-    A = s->current_picture.motion_val[dir][xy - wrap + v->blocks_off];
+    a_valid = !s->first_slice_line || (n == 2 || n == 3);
+    b_valid = a_valid;
+    c_valid = s->mb_x || (n == 1 || n == 3);
     if (mv1) {
         if (v->field_mode && mixedmv_pic)
             off = (s->mb_x == (s->mb_width - 1)) ? -2 : 2;
         else
             off = (s->mb_x == (s->mb_width - 1)) ? -1 : 2;
+        b_valid = b_valid && s->mb_width > 1;
     } else {
         //in 4-MV mode different blocks have different B predictor position
         switch (n) {
         case 0:
-            off = (s->mb_x > 0) ? -1 : 1;
+            if (v->res_rtm_flag)
+                off = s->mb_x ? -1 : 1;
+            else
+                off = s->mb_x ? -1 : 2 * s->mb_width - wrap - 1;
             break;
         case 1:
             off = (s->mb_x == (s->mb_width - 1)) ? -1 : 1;
@@ -284,12 +289,10 @@ void ff_vc1_pred_mv(VC1Context *v, int n, int dmv_x, int dmv_y,
         case 3:
             off = -1;
         }
+        if (v->field_mode && s->mb_width == 1)
+            b_valid = b_valid && c_valid;
     }
-    B = s->current_picture.motion_val[dir][xy - wrap + off + v->blocks_off];
 
-    a_valid = !s->first_slice_line || (n == 2 || n == 3);
-    b_valid = a_valid && (s->mb_width > 1);
-    c_valid = s->mb_x || (n == 1 || n == 3);
     if (v->field_mode) {
         a_valid = a_valid && !is_intra[xy - wrap];
         b_valid = b_valid && !is_intra[xy - wrap + off];
@@ -297,6 +300,7 @@ void ff_vc1_pred_mv(VC1Context *v, int n, int dmv_x, int dmv_y,
     }
 
     if (a_valid) {
+        A = s->current_picture.motion_val[dir][xy - wrap + v->blocks_off];
         a_f = v->mv_f[dir][xy - wrap + v->blocks_off];
         num_oppfield  += a_f;
         num_samefield += 1 - a_f;
@@ -307,6 +311,7 @@ void ff_vc1_pred_mv(VC1Context *v, int n, int dmv_x, int dmv_y,
         a_f = 0;
     }
     if (b_valid) {
+        B = s->current_picture.motion_val[dir][xy - wrap + off + v->blocks_off];
         b_f = v->mv_f[dir][xy - wrap + off + v->blocks_off];
         num_oppfield  += b_f;
         num_samefield += 1 - b_f;
@@ -317,6 +322,7 @@ void ff_vc1_pred_mv(VC1Context *v, int n, int dmv_x, int dmv_y,
         b_f = 0;
     }
     if (c_valid) {
+        C = s->current_picture.motion_val[dir][xy - 1 + v->blocks_off];
         c_f = v->mv_f[dir][xy - 1 + v->blocks_off];
         num_oppfield  += c_f;
         num_samefield += 1 - c_f;
diff --git a/libavcodec/vdpau.c b/libavcodec/vdpau.c
index 1b2ec989cda47..167f06d7aebbf 100644
--- a/libavcodec/vdpau.c
+++ b/libavcodec/vdpau.c
@@ -208,8 +208,12 @@ int ff_vdpau_common_init(AVCodecContext *avctx, VdpDecoderProfile profile,
         return vdpau_error(status);
     if (avctx->codec_id == AV_CODEC_ID_HEVC && strncmp(info_string, "NVIDIA ", 7) == 0 &&
         !(avctx->hwaccel_flags & AV_HWACCEL_FLAG_ALLOW_PROFILE_MISMATCH)) {
-        av_log(avctx, AV_LOG_VERBOSE, "HEVC with NVIDIA VDPAU drivers is buggy, skipping.\n");
-        return AVERROR(ENOTSUP);
+        int driver_version = 0;
+        sscanf(info_string, "NVIDIA VDPAU Driver Shared Library  %d", &driver_version);
+        if (driver_version < 410) {
+            av_log(avctx, AV_LOG_VERBOSE, "HEVC with NVIDIA VDPAU drivers is buggy, skipping.\n");
+            return AVERROR(ENOTSUP);
+        }
     }
 
     status = vdctx->get_proc_address(vdctx->device,
diff --git a/libavcodec/version.h b/libavcodec/version.h
index 0d9a28f432778..309e4111cbea8 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -28,8 +28,8 @@
 #include "libavutil/version.h"
 
 #define LIBAVCODEC_VERSION_MAJOR  58
-#define LIBAVCODEC_VERSION_MINOR  33
-#define LIBAVCODEC_VERSION_MICRO 101
+#define LIBAVCODEC_VERSION_MINOR  47
+#define LIBAVCODEC_VERSION_MICRO 103
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
                                                LIBAVCODEC_VERSION_MINOR, \
@@ -132,6 +132,9 @@
 #ifndef FF_API_NEXT
 #define FF_API_NEXT              (LIBAVCODEC_VERSION_MAJOR < 59)
 #endif
+#ifndef FF_API_UNSANITIZED_BITRATES
+#define FF_API_UNSANITIZED_BITRATES (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
 
 
 #endif /* AVCODEC_VERSION_H */
diff --git a/libavcodec/videotoolbox.c b/libavcodec/videotoolbox.c
index ac45e23c16feb..da7236f1002c7 100644
--- a/libavcodec/videotoolbox.c
+++ b/libavcodec/videotoolbox.c
@@ -903,6 +903,11 @@ static int videotoolbox_common_end_frame(AVCodecContext *avctx, AVFrame *frame)
     AVVideotoolboxContext *videotoolbox = videotoolbox_get_context(avctx);
     VTContext *vtctx = avctx->internal->hwaccel_priv_data;
 
+    frame->crop_right = 0;
+    frame->crop_left = 0;
+    frame->crop_top = 0;
+    frame->crop_bottom = 0;
+
     if (vtctx->reconfig_needed == true) {
         vtctx->reconfig_needed = false;
         av_log(avctx, AV_LOG_VERBOSE, "VideoToolbox decoder needs reconfig, restarting..\n");
@@ -969,6 +974,12 @@ static int videotoolbox_hevc_end_frame(AVCodecContext *avctx)
     HEVCContext *h = avctx->priv_data;
     AVFrame *frame = h->ref->frame;
     VTContext *vtctx = avctx->internal->hwaccel_priv_data;
+
+    h->output_frame->crop_right = 0;
+    h->output_frame->crop_left = 0;
+    h->output_frame->crop_top = 0;
+    h->output_frame->crop_bottom = 0;
+
     int ret = videotoolbox_common_end_frame(avctx, frame);
     vtctx->bitstream_size = 0;
     return ret;
diff --git a/libavcodec/vp3.c b/libavcodec/vp3.c
index 0e6da89abbcad..b248c90413961 100644
--- a/libavcodec/vp3.c
+++ b/libavcodec/vp3.c
@@ -223,6 +223,10 @@ typedef struct Vp3DecodeContext {
      * which of the fragments are coded */
     int *coded_fragment_list[3];
 
+    int *kf_coded_fragment_list;
+    int *nkf_coded_fragment_list;
+    int num_kf_coded_fragment[3];
+
     VLC dc_vlc[16];
     VLC ac_vlc_1[16];
     VLC ac_vlc_2[16];
@@ -271,7 +275,8 @@ static av_cold void free_tables(AVCodecContext *avctx)
 
     av_freep(&s->superblock_coding);
     av_freep(&s->all_fragments);
-    av_freep(&s->coded_fragment_list[0]);
+    av_freep(&s->nkf_coded_fragment_list);
+    av_freep(&s->kf_coded_fragment_list);
     av_freep(&s->dct_tokens_base);
     av_freep(&s->superblock_fragments);
     av_freep(&s->macroblock_coding);
@@ -411,27 +416,7 @@ static void init_dequantizer(Vp3DecodeContext *s, int qpi)
  */
 static void init_loop_filter(Vp3DecodeContext *s)
 {
-    int *bounding_values = s->bounding_values_array + 127;
-    int filter_limit;
-    int x;
-    int value;
-
-    filter_limit = s->filter_limit_values[s->qps[0]];
-    av_assert0(filter_limit < 128U);
-
-    /* set up the bounding values */
-    memset(s->bounding_values_array, 0, 256 * sizeof(int));
-    for (x = 0; x < filter_limit; x++) {
-        bounding_values[-x] = -x;
-        bounding_values[x] = x;
-    }
-    for (x = value = filter_limit; x < 128 && value; x++, value--) {
-        bounding_values[ x] =  value;
-        bounding_values[-x] = -value;
-    }
-    if (value)
-        bounding_values[128] = value;
-    bounding_values[129] = bounding_values[130] = filter_limit * 0x02020202;
+    ff_vp3dsp_set_bounding_values(s->bounding_values_array, s->filter_limit_values[s->qps[0]]);
 }
 
 /*
@@ -538,44 +523,65 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb)
     s->total_num_coded_frags = 0;
     memset(s->macroblock_coding, MODE_COPY, s->macroblock_count);
 
+    s->coded_fragment_list[0] = s->keyframe ? s->kf_coded_fragment_list
+                                            : s->nkf_coded_fragment_list;
+
     for (plane = 0; plane < 3; plane++) {
         int sb_start = superblock_starts[plane];
         int sb_end   = sb_start + (plane ? s->c_superblock_count
                                          : s->y_superblock_count);
         int num_coded_frags = 0;
 
-        for (i = sb_start; i < sb_end && get_bits_left(gb) > 0; i++) {
-            if (s->keyframe == 0 && get_bits_left(gb) < plane0_num_coded_frags >> 2) {
-                return AVERROR_INVALIDDATA;
-            }
-            /* iterate through all 16 fragments in a superblock */
-            for (j = 0; j < 16; j++) {
-                /* if the fragment is in bounds, check its coding status */
-                current_fragment = s->superblock_fragments[i * 16 + j];
-                if (current_fragment != -1) {
-                    int coded = s->superblock_coding[i];
-
-                    if (s->superblock_coding[i] == SB_PARTIALLY_CODED) {
-                        /* fragment may or may not be coded; this is the case
-                         * that cares about the fragment coding runs */
-                        if (current_run-- == 0) {
-                            bit        ^= 1;
-                            current_run = get_vlc2(gb, s->fragment_run_length_vlc.table, 5, 2);
+        if (s->keyframe) {
+            if (s->num_kf_coded_fragment[plane] == -1) {
+                for (i = sb_start; i < sb_end; i++) {
+                    /* iterate through all 16 fragments in a superblock */
+                    for (j = 0; j < 16; j++) {
+                        /* if the fragment is in bounds, check its coding status */
+                        current_fragment = s->superblock_fragments[i * 16 + j];
+                        if (current_fragment != -1) {
+                            s->coded_fragment_list[plane][num_coded_frags++] =
+                                current_fragment;
                         }
-                        coded = bit;
                     }
+                }
+                s->num_kf_coded_fragment[plane] = num_coded_frags;
+            } else
+                num_coded_frags = s->num_kf_coded_fragment[plane];
+        } else {
+            for (i = sb_start; i < sb_end && get_bits_left(gb) > 0; i++) {
+                if (get_bits_left(gb) < plane0_num_coded_frags >> 2) {
+                    return AVERROR_INVALIDDATA;
+                }
+                /* iterate through all 16 fragments in a superblock */
+                for (j = 0; j < 16; j++) {
+                    /* if the fragment is in bounds, check its coding status */
+                    current_fragment = s->superblock_fragments[i * 16 + j];
+                    if (current_fragment != -1) {
+                        int coded = s->superblock_coding[i];
+
+                        if (coded == SB_PARTIALLY_CODED) {
+                            /* fragment may or may not be coded; this is the case
+                             * that cares about the fragment coding runs */
+                            if (current_run-- == 0) {
+                                bit        ^= 1;
+                                current_run = get_vlc2(gb, s->fragment_run_length_vlc.table, 5, 2);
+                            }
+                            coded = bit;
+                        }
 
-                    if (coded) {
-                        /* default mode; actual mode will be decoded in
-                         * the next phase */
-                        s->all_fragments[current_fragment].coding_method =
-                            MODE_INTER_NO_MV;
-                        s->coded_fragment_list[plane][num_coded_frags++] =
-                            current_fragment;
-                    } else {
-                        /* not coded; copy this fragment from the prior frame */
-                        s->all_fragments[current_fragment].coding_method =
-                            MODE_COPY;
+                        if (coded) {
+                            /* default mode; actual mode will be decoded in
+                             * the next phase */
+                            s->all_fragments[current_fragment].coding_method =
+                                MODE_INTER_NO_MV;
+                            s->coded_fragment_list[plane][num_coded_frags++] =
+                                current_fragment;
+                        } else {
+                            /* not coded; copy this fragment from the prior frame */
+                            s->all_fragments[current_fragment].coding_method =
+                                MODE_COPY;
+                        }
                     }
                 }
             }
@@ -1691,7 +1697,9 @@ static av_cold int allocate_tables(AVCodecContext *avctx)
     s->superblock_coding = av_mallocz(s->superblock_count);
     s->all_fragments     = av_mallocz_array(s->fragment_count, sizeof(Vp3Fragment));
 
-    s->coded_fragment_list[0] = av_mallocz_array(s->fragment_count, sizeof(int));
+    s-> kf_coded_fragment_list = av_mallocz_array(s->fragment_count, sizeof(int));
+    s->nkf_coded_fragment_list = av_mallocz_array(s->fragment_count, sizeof(int));
+    memset(s-> num_kf_coded_fragment, -1, sizeof(s-> num_kf_coded_fragment));
 
     s->dct_tokens_base = av_mallocz_array(s->fragment_count,
                                           64 * sizeof(*s->dct_tokens_base));
@@ -1703,7 +1711,8 @@ static av_cold int allocate_tables(AVCodecContext *avctx)
     s->macroblock_coding    = av_mallocz(s->macroblock_count + 1);
 
     if (!s->superblock_coding    || !s->all_fragments          ||
-        !s->dct_tokens_base      || !s->coded_fragment_list[0] ||
+        !s->dct_tokens_base      || !s->kf_coded_fragment_list ||
+        !s->nkf_coded_fragment_list ||
         !s->superblock_fragments || !s->macroblock_coding      ||
         !s->motion_val[0]        || !s->motion_val[1]) {
         vp3_decode_end(avctx);
@@ -1932,6 +1941,7 @@ static int update_frames(AVCodecContext *avctx)
     return ret;
 }
 
+#if HAVE_THREADS
 static int ref_frame(Vp3DecodeContext *s, ThreadFrame *dst, ThreadFrame *src)
 {
     ff_thread_release_buffer(s->avctx, dst);
@@ -1950,7 +1960,6 @@ static int ref_frames(Vp3DecodeContext *dst, Vp3DecodeContext *src)
     return 0;
 }
 
-#if HAVE_THREADS
 static int vp3_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
 {
     Vp3DecodeContext *s = dst->priv_data, *s1 = src->priv_data;
@@ -2257,6 +2266,8 @@ static int vp3_init_thread_copy(AVCodecContext *avctx)
     s->superblock_coding      = NULL;
     s->all_fragments          = NULL;
     s->coded_fragment_list[0] = NULL;
+    s-> kf_coded_fragment_list= NULL;
+    s->nkf_coded_fragment_list= NULL;
     s->dct_tokens_base        = NULL;
     s->superblock_fragments   = NULL;
     s->macroblock_coding      = NULL;
diff --git a/libavcodec/vp3data.h b/libavcodec/vp3data.h
index 3884bca878766..c82b1b3a8642a 100644
--- a/libavcodec/vp3data.h
+++ b/libavcodec/vp3data.h
@@ -73,7 +73,7 @@ static const uint8_t vp31_dc_scale_factor[64] = {
      20,  10,  10,  10,  10,  10,  10,  10
 };
 
-static const uint32_t vp31_ac_scale_factor[64] = {
+static const uint16_t vp31_ac_scale_factor[64] = {
     500, 450, 400, 370, 340, 310, 285, 265,
     245, 225, 210, 195, 185, 180, 170, 160,
     150, 145, 135, 130, 125, 115, 110, 107,
diff --git a/libavcodec/vp3dsp.c b/libavcodec/vp3dsp.c
index fdaa292d3717b..ac4c57441c434 100644
--- a/libavcodec/vp3dsp.c
+++ b/libavcodec/vp3dsp.c
@@ -27,6 +27,7 @@
 #include "libavutil/attributes.h"
 #include "libavutil/common.h"
 #include "libavutil/intreadwrite.h"
+#include "libavutil/avassert.h"
 
 #include "avcodec.h"
 #include "rnd_avg.h"
@@ -194,6 +195,158 @@ static av_always_inline void idct(uint8_t *dst, ptrdiff_t stride,
     }
 }
 
+static av_always_inline void idct10(uint8_t *dst, ptrdiff_t stride,
+                                    int16_t *input, int type)
+{
+    int16_t *ip = input;
+
+    int A, B, C, D, Ad, Bd, Cd, Dd, E, F, G, H;
+    int Ed, Gd, Add, Bdd, Fd, Hd;
+
+    int i;
+
+    /* Inverse DCT on the rows now */
+    for (i = 0; i < 4; i++) {
+        /* Check for non-zero values */
+        if (ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8]) {
+            A =  M(xC1S7, ip[1 * 8]);
+            B =  M(xC7S1, ip[1 * 8]);
+            C =  M(xC3S5, ip[3 * 8]);
+            D = -M(xC5S3, ip[3 * 8]);
+
+            Ad = M(xC4S4, (A - C));
+            Bd = M(xC4S4, (B - D));
+
+            Cd = A + C;
+            Dd = B + D;
+
+            E = M(xC4S4, ip[0 * 8]);
+            F = E;
+
+            G = M(xC2S6, ip[2 * 8]);
+            H = M(xC6S2, ip[2 * 8]);
+
+            Ed = E - G;
+            Gd = E + G;
+
+            Add = F + Ad;
+            Bdd = Bd - H;
+
+            Fd = F - Ad;
+            Hd = Bd + H;
+
+            /* Final sequence of operations over-write original inputs */
+            ip[0 * 8] = Gd + Cd;
+            ip[7 * 8] = Gd - Cd;
+
+            ip[1 * 8] = Add + Hd;
+            ip[2 * 8] = Add - Hd;
+
+            ip[3 * 8] = Ed + Dd;
+            ip[4 * 8] = Ed - Dd;
+
+            ip[5 * 8] = Fd + Bdd;
+            ip[6 * 8] = Fd - Bdd;
+
+        }
+
+        ip += 1;
+    }
+
+    ip = input;
+
+    for (i = 0; i < 8; i++) {
+        /* Check for non-zero values (bitwise or faster than ||) */
+        if (ip[0] | ip[1] | ip[2] | ip[3]) {
+            A =  M(xC1S7, ip[1]);
+            B =  M(xC7S1, ip[1]);
+            C =  M(xC3S5, ip[3]);
+            D = -M(xC5S3, ip[3]);
+
+            Ad = M(xC4S4, (A - C));
+            Bd = M(xC4S4, (B - D));
+
+            Cd = A + C;
+            Dd = B + D;
+
+            E = M(xC4S4, ip[0]);
+            if (type == 1)
+                E += 16 * 128;
+            F = E;
+
+            G = M(xC2S6, ip[2]);
+            H = M(xC6S2, ip[2]);
+
+            Ed = E - G;
+            Gd = E + G;
+
+            Add = F + Ad;
+            Bdd = Bd - H;
+
+            Fd = F - Ad;
+            Hd = Bd + H;
+
+            Gd += 8;
+            Add += 8;
+            Ed += 8;
+            Fd += 8;
+
+            /* Final sequence of operations over-write original inputs. */
+            if (type == 1) {
+                dst[0 * stride] = av_clip_uint8((Gd + Cd) >> 4);
+                dst[7 * stride] = av_clip_uint8((Gd - Cd) >> 4);
+
+                dst[1 * stride] = av_clip_uint8((Add + Hd) >> 4);
+                dst[2 * stride] = av_clip_uint8((Add - Hd) >> 4);
+
+                dst[3 * stride] = av_clip_uint8((Ed + Dd) >> 4);
+                dst[4 * stride] = av_clip_uint8((Ed - Dd) >> 4);
+
+                dst[5 * stride] = av_clip_uint8((Fd + Bdd) >> 4);
+                dst[6 * stride] = av_clip_uint8((Fd - Bdd) >> 4);
+            } else {
+                dst[0 * stride] = av_clip_uint8(dst[0 * stride] + ((Gd + Cd) >> 4));
+                dst[7 * stride] = av_clip_uint8(dst[7 * stride] + ((Gd - Cd) >> 4));
+
+                dst[1 * stride] = av_clip_uint8(dst[1 * stride] + ((Add + Hd) >> 4));
+                dst[2 * stride] = av_clip_uint8(dst[2 * stride] + ((Add - Hd) >> 4));
+
+                dst[3 * stride] = av_clip_uint8(dst[3 * stride] + ((Ed + Dd) >> 4));
+                dst[4 * stride] = av_clip_uint8(dst[4 * stride] + ((Ed - Dd) >> 4));
+
+                dst[5 * stride] = av_clip_uint8(dst[5 * stride] + ((Fd + Bdd) >> 4));
+                dst[6 * stride] = av_clip_uint8(dst[6 * stride] + ((Fd - Bdd) >> 4));
+            }
+        } else {
+            if (type == 1) {
+                dst[0*stride] =
+                dst[1*stride] =
+                dst[2*stride] =
+                dst[3*stride] =
+                dst[4*stride] =
+                dst[5*stride] =
+                dst[6*stride] =
+                dst[7*stride] = 128;
+            }
+        }
+
+        ip += 8;
+        dst++;
+    }
+}
+
+void ff_vp3dsp_idct10_put(uint8_t *dest, ptrdiff_t stride, int16_t *block)
+{
+    idct10(dest, stride, block, 1);
+    memset(block, 0, sizeof(*block) * 64);
+}
+
+void ff_vp3dsp_idct10_add(uint8_t *dest, ptrdiff_t stride, int16_t *block)
+{
+    idct10(dest, stride, block, 2);
+    memset(block, 0, sizeof(*block) * 64);
+}
+
 static void vp3_idct_put_c(uint8_t *dest /* align 8 */, ptrdiff_t stride,
                            int16_t *block /* align 16 */)
 {
@@ -227,14 +380,14 @@ static void vp3_idct_dc_add_c(uint8_t *dest /* align 8 */, ptrdiff_t stride,
     block[0] = 0;
 }
 
-static void vp3_v_loop_filter_c(uint8_t *first_pixel, ptrdiff_t stride,
-                                int *bounding_values)
+static av_always_inline void vp3_v_loop_filter_c(uint8_t *first_pixel, ptrdiff_t stride,
+                                                 int *bounding_values, int count)
 {
     unsigned char *end;
     int filter_value;
     const ptrdiff_t nstride = -stride;
 
-    for (end = first_pixel + 8; first_pixel < end; first_pixel++) {
+    for (end = first_pixel + count; first_pixel < end; first_pixel++) {
         filter_value = (first_pixel[2 * nstride] - first_pixel[stride]) +
                        (first_pixel[0] - first_pixel[nstride]) * 3;
         filter_value = bounding_values[(filter_value + 4) >> 3];
@@ -244,13 +397,13 @@ static void vp3_v_loop_filter_c(uint8_t *first_pixel, ptrdiff_t stride,
     }
 }
 
-static void vp3_h_loop_filter_c(uint8_t *first_pixel, ptrdiff_t stride,
-                                int *bounding_values)
+static av_always_inline void vp3_h_loop_filter_c(uint8_t *first_pixel, ptrdiff_t stride,
+                                                 int *bounding_values, int count)
 {
     unsigned char *end;
     int filter_value;
 
-    for (end = first_pixel + 8 * stride; first_pixel != end; first_pixel += stride) {
+    for (end = first_pixel + count * stride; first_pixel != end; first_pixel += stride) {
         filter_value = (first_pixel[-2] - first_pixel[1]) +
                        (first_pixel[ 0] - first_pixel[-1]) * 3;
         filter_value = bounding_values[(filter_value + 4) >> 3];
@@ -260,6 +413,18 @@ static void vp3_h_loop_filter_c(uint8_t *first_pixel, ptrdiff_t stride,
     }
 }
 
+#define LOOP_FILTER(prefix, suffix, dim, count) \
+void prefix##_##dim##_loop_filter_##count##suffix(uint8_t *first_pixel, ptrdiff_t stride, \
+                                int *bounding_values) \
+{ \
+    vp3_##dim##_loop_filter_c(first_pixel, stride, bounding_values, count); \
+}
+
+static LOOP_FILTER(vp3,_c, v, 8)
+static LOOP_FILTER(vp3,_c, h, 8)
+LOOP_FILTER(ff_vp3dsp, , v, 12)
+LOOP_FILTER(ff_vp3dsp, , h, 12)
+
 static void put_no_rnd_pixels_l2(uint8_t *dst, const uint8_t *src1,
                                  const uint8_t *src2, ptrdiff_t stride, int h)
 {
@@ -284,8 +449,8 @@ av_cold void ff_vp3dsp_init(VP3DSPContext *c, int flags)
     c->idct_put      = vp3_idct_put_c;
     c->idct_add      = vp3_idct_add_c;
     c->idct_dc_add   = vp3_idct_dc_add_c;
-    c->v_loop_filter = vp3_v_loop_filter_c;
-    c->h_loop_filter = vp3_h_loop_filter_c;
+    c->v_loop_filter = vp3_v_loop_filter_8_c;
+    c->h_loop_filter = vp3_h_loop_filter_8_c;
 
     if (ARCH_ARM)
         ff_vp3dsp_init_arm(c, flags);
@@ -293,4 +458,37 @@ av_cold void ff_vp3dsp_init(VP3DSPContext *c, int flags)
         ff_vp3dsp_init_ppc(c, flags);
     if (ARCH_X86)
         ff_vp3dsp_init_x86(c, flags);
+    if (ARCH_MIPS)
+        ff_vp3dsp_init_mips(c, flags);
+}
+
+/*
+ * This function initializes the loop filter boundary limits if the frame's
+ * quality index is different from the previous frame's.
+ *
+ * where sizeof(bounding_values_array) is 256 * sizeof(int)
+ *
+ * The filter_limit_values may not be larger than 127.
+ */
+void ff_vp3dsp_set_bounding_values(int * bounding_values_array, int filter_limit)
+{
+    int *bounding_values = bounding_values_array + 127;
+    int x;
+    int value;
+
+    av_assert0(filter_limit < 128U);
+
+    /* set up the bounding values */
+    memset(bounding_values_array, 0, 256 * sizeof(int));
+    for (x = 0; x < filter_limit; x++) {
+        bounding_values[-x] = -x;
+        bounding_values[x] = x;
+    }
+    for (x = value = filter_limit; x < 128 && value; x++, value--) {
+        bounding_values[ x] =  value;
+        bounding_values[-x] = -value;
+    }
+    if (value)
+        bounding_values[128] = value;
+    bounding_values[129] = bounding_values[130] = filter_limit * 0x02020202;
 }
diff --git a/libavcodec/vp3dsp.h b/libavcodec/vp3dsp.h
index 2fdad162caabd..32b2cad0ef27b 100644
--- a/libavcodec/vp3dsp.h
+++ b/libavcodec/vp3dsp.h
@@ -45,9 +45,18 @@ typedef struct VP3DSPContext {
     void (*h_loop_filter)(uint8_t *src, ptrdiff_t stride, int *bounding_values);
 } VP3DSPContext;
 
+void ff_vp3dsp_v_loop_filter_12(uint8_t *first_pixel, ptrdiff_t stride, int *bounding_values);
+void ff_vp3dsp_h_loop_filter_12(uint8_t *first_pixel, ptrdiff_t stride, int *bounding_values);
+
+void ff_vp3dsp_idct10_put(uint8_t *dest, ptrdiff_t stride, int16_t *block);
+void ff_vp3dsp_idct10_add(uint8_t *dest, ptrdiff_t stride, int16_t *block);
+
 void ff_vp3dsp_init(VP3DSPContext *c, int flags);
 void ff_vp3dsp_init_arm(VP3DSPContext *c, int flags);
 void ff_vp3dsp_init_ppc(VP3DSPContext *c, int flags);
 void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags);
+void ff_vp3dsp_init_mips(VP3DSPContext *c, int flags);
+
+void ff_vp3dsp_set_bounding_values(int * bound_values_array, int filter_limit);
 
 #endif /* AVCODEC_VP3DSP_H */
diff --git a/libavcodec/vp5.c b/libavcodec/vp5.c
index cb08cec33f503..49988b8b7685f 100644
--- a/libavcodec/vp5.c
+++ b/libavcodec/vp5.c
@@ -252,6 +252,7 @@ static int vp5_parse_coeff(VP56Context *s)
             for (i=coeff_idx; i<=ctx_last; i++)
                 s->coeff_ctx[ff_vp56_b6to4[b]][i] = 5;
         s->above_blocks[s->above_block_idx[b]].not_null_dc = s->coeff_ctx[ff_vp56_b6to4[b]][0];
+        s->idct_selector[b] = 63;
     }
     return 0;
 }
diff --git a/libavcodec/vp56.c b/libavcodec/vp56.c
index b69fe6c176d1c..72fea3780e79c 100644
--- a/libavcodec/vp56.c
+++ b/libavcodec/vp56.c
@@ -33,6 +33,8 @@
 
 void ff_vp56_init_dequant(VP56Context *s, int quantizer)
 {
+    if (s->quantizer != quantizer)
+        ff_vp3dsp_set_bounding_values(s->bounding_values_array, ff_vp56_filter_threshold[quantizer]);
     s->quantizer = quantizer;
     s->dequant_dc = ff_vp56_dc_dequant[quantizer] << 2;
     s->dequant_ac = ff_vp56_ac_dequant[quantizer] << 2;
@@ -196,12 +198,8 @@ static void vp56_decode_4mv(VP56Context *s, int row, int col)
     s->macroblocks[row * s->mb_width + col].mv = s->mv[3];
 
     /* chroma vectors are average luma vectors */
-    if (s->avctx->codec->id == AV_CODEC_ID_VP5) {
-        s->mv[4].x = s->mv[5].x = RSHIFT(mv.x,2);
-        s->mv[4].y = s->mv[5].y = RSHIFT(mv.y,2);
-    } else {
-        s->mv[4] = s->mv[5] = (VP56mv) {mv.x/4, mv.y/4};
-    }
+    s->mv[4].x = s->mv[5].x = RSHIFT(mv.x,2);
+    s->mv[4].y = s->mv[5].y = RSHIFT(mv.y,2);
 }
 
 static VP56mb vp56_decode_mv(VP56Context *s, int row, int col)
@@ -324,9 +322,17 @@ static void vp56_add_predictors_dc(VP56Context *s, VP56Frame ref_frame)
 static void vp56_deblock_filter(VP56Context *s, uint8_t *yuv,
                                 ptrdiff_t stride, int dx, int dy)
 {
+    if (s->avctx->codec->id == AV_CODEC_ID_VP5) {
     int t = ff_vp56_filter_threshold[s->quantizer];
     if (dx)  s->vp56dsp.edge_filter_hor(yuv +         10-dx , stride, t);
     if (dy)  s->vp56dsp.edge_filter_ver(yuv + stride*(10-dy), stride, t);
+    } else {
+        int * bounding_values = s->bounding_values_array + 127;
+        if (dx)
+            ff_vp3dsp_h_loop_filter_12(yuv +         10-dx, stride, bounding_values);
+        if (dy)
+            ff_vp3dsp_v_loop_filter_12(yuv + stride*(10-dy), stride, bounding_values);
+    }
 }
 
 static void vp56_mc(VP56Context *s, int b, int plane, uint8_t *src,
@@ -400,6 +406,24 @@ static void vp56_mc(VP56Context *s, int b, int plane, uint8_t *src,
     }
 }
 
+static void vp56_idct_put(VP56Context *s, uint8_t * dest, ptrdiff_t stride, int16_t *block, int selector)
+{
+    if (selector > 10 || selector == 1)
+        s->vp3dsp.idct_put(dest, stride, block);
+    else
+        ff_vp3dsp_idct10_put(dest, stride, block);
+}
+
+static void vp56_idct_add(VP56Context *s, uint8_t * dest, ptrdiff_t stride, int16_t *block, int selector)
+{
+    if (selector > 10)
+        s->vp3dsp.idct_add(dest, stride, block);
+    else if (selector > 1)
+        ff_vp3dsp_idct10_add(dest, stride, block);
+    else
+        s->vp3dsp.idct_dc_add(dest, stride, block);
+}
+
 static av_always_inline void vp56_render_mb(VP56Context *s, int row, int col, int is_alpha, VP56mb mb_type)
 {
     int b, ab, b_max, plane, off;
@@ -420,8 +444,8 @@ static av_always_inline void vp56_render_mb(VP56Context *s, int row, int col, in
         case VP56_MB_INTRA:
             for (b=0; b<b_max; b++) {
                 plane = ff_vp56_b2p[b+ab];
-                s->vp3dsp.idct_put(frame_current->data[plane] + s->block_offset[b],
-                                s->stride[plane], s->block_coeff[b]);
+                vp56_idct_put(s, frame_current->data[plane] + s->block_offset[b],
+                                s->stride[plane], s->block_coeff[b], s->idct_selector[b]);
             }
             break;
 
@@ -433,8 +457,8 @@ static av_always_inline void vp56_render_mb(VP56Context *s, int row, int col, in
                 s->hdsp.put_pixels_tab[1][0](frame_current->data[plane] + off,
                                              frame_ref->data[plane] + off,
                                              s->stride[plane], 8);
-                s->vp3dsp.idct_add(frame_current->data[plane] + off,
-                                s->stride[plane], s->block_coeff[b]);
+                vp56_idct_add(s, frame_current->data[plane] + off,
+                              s->stride[plane], s->block_coeff[b], s->idct_selector[b]);
             }
             break;
 
@@ -451,8 +475,8 @@ static av_always_inline void vp56_render_mb(VP56Context *s, int row, int col, in
                 plane = ff_vp56_b2p[b+ab];
                 vp56_mc(s, b, plane, frame_ref->data[plane], s->stride[plane],
                         16*col+x_off, 16*row+y_off);
-                s->vp3dsp.idct_add(frame_current->data[plane] + s->block_offset[b],
-                                s->stride[plane], s->block_coeff[b]);
+                vp56_idct_add(s, frame_current->data[plane] + s->block_offset[b],
+                              s->stride[plane], s->block_coeff[b], s->idct_selector[b]);
             }
             break;
     }
diff --git a/libavcodec/vp56.h b/libavcodec/vp56.h
index b8dda9e73a549..84b2f6c94bbfa 100644
--- a/libavcodec/vp56.h
+++ b/libavcodec/vp56.h
@@ -105,6 +105,7 @@ typedef struct VP56Macroblock {
 typedef struct VP56Model {
     uint8_t coeff_reorder[64];       /* used in vp6 only */
     uint8_t coeff_index_to_pos[64];  /* used in vp6 only */
+    uint8_t coeff_index_to_idct_selector[64]; /* used in vp6 only */
     uint8_t vector_sig[2];           /* delta sign */
     uint8_t vector_dct[2];           /* delta coding types */
     uint8_t vector_pdi[2][2];        /* predefined delta init */
@@ -157,6 +158,7 @@ struct vp56_context {
     VP56mb mb_type;
     VP56Macroblock *macroblocks;
     DECLARE_ALIGNED(16, int16_t, block_coeff)[6][64];
+    int idct_selector[6];
 
     /* motion vectors */
     VP56mv mv[6];  /* vectors for each block in MB */
@@ -170,6 +172,7 @@ struct vp56_context {
     int filter_mode;
     int max_vector_length;
     int sample_variance_threshold;
+    DECLARE_ALIGNED(8, int, bounding_values_array)[256];
 
     uint8_t coeff_ctx[4][64];              /* used in vp5 only */
     uint8_t coeff_ctx_last[4];             /* used in vp5 only */
@@ -227,6 +230,14 @@ int ff_vp56_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
 extern const uint8_t ff_vp56_norm_shift[256];
 int ff_vp56_init_range_decoder(VP56RangeCoder *c, const uint8_t *buf, int buf_size);
 
+/**
+ * vp5689 returns 1 if the end of the stream has been reached, 0 otherwise.
+ */
+static av_always_inline int vpX_rac_is_end(VP56RangeCoder *c)
+{
+    return c->end <= c->buffer && c->bits >= 0;
+}
+
 static av_always_inline unsigned int vp56_rac_renorm(VP56RangeCoder *c)
 {
     int shift = ff_vp56_norm_shift[c->high];
diff --git a/libavcodec/vp56dsp.c b/libavcodec/vp56dsp.c
index 9f299dc60f2d7..e8d93d66809a6 100644
--- a/libavcodec/vp56dsp.c
+++ b/libavcodec/vp56dsp.c
@@ -72,27 +72,8 @@ av_cold void ff_vp5dsp_init(VP56DSPContext *s)
 #endif /* CONFIG_VP5_DECODER */
 
 #if CONFIG_VP6_DECODER
-static int vp6_adjust(int v, int t)
-{
-    int V = v, s = v >> 31;
-    V ^= s;
-    V -= s;
-    if (V-t-1 >= (unsigned)(t-1))
-        return v;
-    V = 2*t - V;
-    V += s;
-    V ^= s;
-    return V;
-}
-
-VP56_EDGE_FILTER(vp6, hor, 1, stride)
-VP56_EDGE_FILTER(vp6, ver, stride, 1)
-
 av_cold void ff_vp6dsp_init(VP56DSPContext *s)
 {
-    s->edge_filter_hor = vp6_edge_filter_hor;
-    s->edge_filter_ver = vp6_edge_filter_ver;
-
     s->vp6_filter_diag4 = ff_vp6_filter_diag4_c;
 
     if (ARCH_ARM)
diff --git a/libavcodec/vp6.c b/libavcodec/vp6.c
index 645fc5c690e03..977fcb7076465 100644
--- a/libavcodec/vp6.c
+++ b/libavcodec/vp6.c
@@ -194,6 +194,18 @@ static void vp6_coeff_order_table_init(VP56Context *s)
         for (pos=1; pos<64; pos++)
             if (s->modelp->coeff_reorder[pos] == i)
                 s->modelp->coeff_index_to_pos[idx++] = pos;
+
+    for (idx = 0; idx < 64; idx++) {
+        int max = 0;
+        for (i = 0; i <= idx; i++) {
+            int v = s->modelp->coeff_index_to_pos[i];
+            if (v > max)
+                max = v;
+        }
+        if (s->sub_version > 6)
+            max++;
+        s->modelp->coeff_index_to_idct_selector[idx] = max;
+    }
 }
 
 static void vp6_default_models_init(VP56Context *s)
@@ -446,6 +458,7 @@ static int vp6_parse_coeff_huffman(VP56Context *s)
             cg = FFMIN(vp6_coeff_groups[coeff_idx], 3);
             vlc_coeff = &s->ract_vlc[pt][ct][cg];
         }
+        s->idct_selector[b] = model->coeff_index_to_idct_selector[FFMIN(coeff_idx, 63)];
     }
     return 0;
 }
@@ -527,6 +540,7 @@ static int vp6_parse_coeff(VP56Context *s)
 
         s->left_block[ff_vp56_b6to4[b]].not_null_dc =
         s->above_blocks[s->above_block_idx[b]].not_null_dc = !!s->block_coeff[b][0];
+        s->idct_selector[b] = model->coeff_index_to_idct_selector[FFMIN(coeff_idx, 63)];
     }
     return 0;
 }
diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
index a06692c47676b..ba79e5fdabe73 100644
--- a/libavcodec/vp8.c
+++ b/libavcodec/vp8.c
@@ -2268,7 +2268,7 @@ void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
 
 #define MARGIN (16 << 2)
 static av_always_inline
-void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
+int vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
                                     VP8Frame *prev_frame, int is_vp7)
 {
     VP8Context *s = avctx->priv_data;
@@ -2285,6 +2285,10 @@ void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
 
         s->mv_bounds.mv_min.x = -MARGIN;
         s->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
+
+        if (vpX_rac_is_end(&s->c)) {
+            return AVERROR_INVALIDDATA;
+        }
         for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
             if (mb_y == 0)
                 AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
@@ -2298,18 +2302,19 @@ void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
         s->mv_bounds.mv_min.y -= 64;
         s->mv_bounds.mv_max.y -= 64;
     }
+    return 0;
 }
 
-static void vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
+static int vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
                                    VP8Frame *prev_frame)
 {
-    vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
+    return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
 }
 
-static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
+static int vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
                                    VP8Frame *prev_frame)
 {
-    vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
+    return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
 }
 
 #if HAVE_THREADS
@@ -2744,9 +2749,11 @@ int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                 !s->segmentation.update_map)
                 ff_thread_await_progress(&prev_frame->tf, 1, 0);
             if (is_vp7)
-                vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
+                ret = vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
             else
-                vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
+                ret = vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
+            if (ret < 0)
+                goto err;
         }
 
         if (avctx->active_thread_type == FF_THREAD_FRAME)
diff --git a/libavcodec/vp8dsp.c b/libavcodec/vp8dsp.c
index fed5c67a90c18..30452bf5f85f0 100644
--- a/libavcodec/vp8dsp.c
+++ b/libavcodec/vp8dsp.c
@@ -679,6 +679,8 @@ av_cold void ff_vp78dsp_init(VP8DSPContext *dsp)
         ff_vp78dsp_init_ppc(dsp);
     if (ARCH_X86)
         ff_vp78dsp_init_x86(dsp);
+    if (ARCH_AARCH64)
+        ff_vp78dsp_init_aarch64(dsp);
 }
 
 #if CONFIG_VP7_DECODER
@@ -739,5 +741,7 @@ av_cold void ff_vp8dsp_init(VP8DSPContext *dsp)
         ff_vp8dsp_init_x86(dsp);
     if (ARCH_MIPS)
         ff_vp8dsp_init_mips(dsp);
+    if (ARCH_AARCH64)
+        ff_vp8dsp_init_aarch64(dsp);
 }
 #endif /* CONFIG_VP8_DECODER */
diff --git a/libavcodec/vp8dsp.h b/libavcodec/vp8dsp.h
index eaae4aed6d9ab..cfe1524b0b38d 100644
--- a/libavcodec/vp8dsp.h
+++ b/libavcodec/vp8dsp.h
@@ -91,11 +91,13 @@ void ff_put_vp8_pixels4_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
 void ff_vp7dsp_init(VP8DSPContext *c);
 
 void ff_vp78dsp_init(VP8DSPContext *c);
+void ff_vp78dsp_init_aarch64(VP8DSPContext *c);
 void ff_vp78dsp_init_arm(VP8DSPContext *c);
 void ff_vp78dsp_init_ppc(VP8DSPContext *c);
 void ff_vp78dsp_init_x86(VP8DSPContext *c);
 
 void ff_vp8dsp_init(VP8DSPContext *c);
+void ff_vp8dsp_init_aarch64(VP8DSPContext *c);
 void ff_vp8dsp_init_arm(VP8DSPContext *c);
 void ff_vp8dsp_init_x86(VP8DSPContext *c);
 void ff_vp8dsp_init_mips(VP8DSPContext *c);
diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
index b1178c9c0cb0c..acf3ffc9e7304 100644
--- a/libavcodec/vp9.c
+++ b/libavcodec/vp9.c
@@ -1306,6 +1306,9 @@ static int decode_tiles(AVCodecContext *avctx,
                         decode_sb_mem(td, row, col, lflvl_ptr,
                                       yoff2, uvoff2, BL_64X64);
                     } else {
+                        if (vpX_rac_is_end(td->c)) {
+                            return AVERROR_INVALIDDATA;
+                        }
                         decode_sb(td, row, col, lflvl_ptr,
                                   yoff2, uvoff2, BL_64X64);
                     }
diff --git a/libavcodec/vp9_metadata_bsf.c b/libavcodec/vp9_metadata_bsf.c
index be010edc3f483..b79f08af6c37c 100644
--- a/libavcodec/vp9_metadata_bsf.c
+++ b/libavcodec/vp9_metadata_bsf.c
@@ -86,7 +86,7 @@ static int vp9_metadata_filter(AVBSFContext *bsf, AVPacket *out)
 
     err = 0;
 fail:
-    ff_cbs_fragment_uninit(ctx->cbc, frag);
+    ff_cbs_fragment_reset(ctx->cbc, frag);
 
     if (err < 0)
         av_packet_unref(out);
@@ -105,6 +105,8 @@ static int vp9_metadata_init(AVBSFContext *bsf)
 static void vp9_metadata_close(AVBSFContext *bsf)
 {
     VP9MetadataContext *ctx = bsf->priv_data;
+
+    ff_cbs_fragment_free(ctx->cbc, &ctx->fragment);
     ff_cbs_close(&ctx->cbc);
 }
 
diff --git a/libavcodec/vp9_parser.c b/libavcodec/vp9_parser.c
index 9531f34a32531..c957a75667b57 100644
--- a/libavcodec/vp9_parser.c
+++ b/libavcodec/vp9_parser.c
@@ -36,12 +36,16 @@ static int parse(AVCodecParserContext *ctx,
     *out_data = data;
     *out_size = size;
 
-    if ((res = init_get_bits8(&gb, data, size)) < 0)
+    if (!size || (res = init_get_bits8(&gb, data, size)) < 0)
         return size; // parsers can't return errors
     get_bits(&gb, 2); // frame marker
     profile  = get_bits1(&gb);
     profile |= get_bits1(&gb) << 1;
     if (profile == 3) profile += get_bits1(&gb);
+    if (profile > 3)
+        return size;
+
+    avctx->profile = profile;
 
     if (get_bits1(&gb)) {
         keyframe = 0;
diff --git a/libavcodec/wavpack.c b/libavcodec/wavpack.c
index 8306ec020f545..d0242809fe02c 100644
--- a/libavcodec/wavpack.c
+++ b/libavcodec/wavpack.c
@@ -940,13 +940,23 @@ static int wavpack_decode_block(AVCodecContext *avctx, int block_no,
             case 3:
                 chmask = bytestream2_get_le32(&gb);
                 break;
-            case 5:
+            case 4:
                 size = bytestream2_get_byte(&gb);
-                if (avctx->channels != size)
+                chan  |= (bytestream2_get_byte(&gb) & 0xF) << 8;
+                chan  += 1;
+                if (avctx->channels != chan)
                     av_log(avctx, AV_LOG_WARNING, "%i channels signalled"
-                           " instead of %i.\n", size, avctx->channels);
+                           " instead of %i.\n", chan, avctx->channels);
+                chmask = bytestream2_get_le24(&gb);
+                break;
+            case 5:
+                size = bytestream2_get_byte(&gb);
                 chan  |= (bytestream2_get_byte(&gb) & 0xF) << 8;
-                chmask = bytestream2_get_le16(&gb);
+                chan  += 1;
+                if (avctx->channels != chan)
+                    av_log(avctx, AV_LOG_WARNING, "%i channels signalled"
+                           " instead of %i.\n", chan, avctx->channels);
+                chmask = bytestream2_get_le32(&gb);
                 break;
             default:
                 av_log(avctx, AV_LOG_ERROR, "Invalid channel info size %d\n",
diff --git a/libavcodec/wcmv.c b/libavcodec/wcmv.c
index ebd5ef66f4af9..2988c15b23acf 100644
--- a/libavcodec/wcmv.c
+++ b/libavcodec/wcmv.c
@@ -56,21 +56,13 @@ static int decode_frame(AVCodecContext *avctx,
     }
 
     bytestream2_init(&gb, avpkt->data, avpkt->size);
+    blocks = bytestream2_get_le16(&gb);
+    if (!blocks)
+        return avpkt->size;
 
     if ((ret = ff_get_buffer(avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
         return ret;
 
-    if (s->prev_frame->data[0]) {
-        ret = av_frame_copy(frame, s->prev_frame);
-        if (ret < 0)
-            return ret;
-    } else {
-        ptrdiff_t linesize[4] = { frame->linesize[0], 0, 0, 0 };
-        av_image_fill_black(frame->data, linesize, avctx->pix_fmt, 0,
-                            avctx->width, avctx->height);
-    }
-
-    blocks = bytestream2_get_le16(&gb);
     if (blocks > 5) {
         GetByteContext bgb;
         int x = 0, size;
@@ -162,6 +154,16 @@ static int decode_frame(AVCodecContext *avctx,
         bytestream2_seek(&gb, 2, SEEK_SET);
     }
 
+    if (s->prev_frame->data[0]) {
+        ret = av_frame_copy(frame, s->prev_frame);
+        if (ret < 0)
+            return ret;
+    } else {
+        ptrdiff_t linesize[4] = { frame->linesize[0], 0, 0, 0 };
+        av_image_fill_black(frame->data, linesize, avctx->pix_fmt, 0,
+                            avctx->width, avctx->height);
+    }
+
     for (int block = 0; block < blocks; block++) {
         int x, y, w, h;
 
diff --git a/libavcodec/wmaprodec.c b/libavcodec/wmaprodec.c
index 9439bfa771c0f..d0fa974c80bf6 100644
--- a/libavcodec/wmaprodec.c
+++ b/libavcodec/wmaprodec.c
@@ -210,6 +210,7 @@ typedef struct WMAProDecodeCtx {
     int              subframe_offset;               ///< subframe offset in the bit reservoir
     uint8_t          packet_loss;                   ///< set in case of bitstream error
     uint8_t          packet_done;                   ///< set when a packet is fully decoded
+    uint8_t          eof_done;                      ///< set when EOF reached and extra subframe is written (XMA1/2)
 
     /* frame decode state */
     uint32_t         frame_num;                     ///< current frame number (not used for decoding)
@@ -1609,7 +1610,34 @@ static int decode_packet(AVCodecContext *avctx, WMAProDecodeCtx *s,
 
     *got_frame_ptr = 0;
 
-    if (s->packet_done || s->packet_loss) {
+    if (!buf_size) {
+        AVFrame *frame = data;
+        int i;
+
+        /** Must output remaining samples after stream end. WMAPRO 5.1 created
+         * by XWMA encoder don't though (maybe only 1/2ch streams need it). */
+        s->packet_done = 0;
+        if (s->eof_done)
+            return 0;
+
+        /** clean output buffer and copy last IMDCT samples */
+        for (i = 0; i < s->nb_channels; i++) {
+            memset(frame->extended_data[i], 0,
+            s->samples_per_frame * sizeof(*s->channel[i].out));
+
+            memcpy(frame->extended_data[i], s->channel[i].out,
+                   s->samples_per_frame * sizeof(*s->channel[i].out) >> 1);
+        }
+
+        /* TODO: XMA should output 128 samples only (instead of 512) and WMAPRO
+         * maybe 768 (with 2048), XMA needs changes in multi-stream handling though. */
+
+        s->eof_done = 1;
+        s->packet_done = 1;
+        *got_frame_ptr = 1;
+        return 0;
+    }
+    else if (s->packet_done || s->packet_loss) {
         s->packet_done = 0;
 
         /** sanity check for the buffer length */
@@ -1922,6 +1950,7 @@ static void flush(WMAProDecodeCtx *s)
                sizeof(*s->channel[i].out));
     s->packet_loss = 1;
     s->skip_packets = 0;
+    s->eof_done = 0;
 }
 
 
@@ -1976,7 +2005,7 @@ AVCodec ff_xma1_decoder = {
     .init           = xma_decode_init,
     .close          = xma_decode_end,
     .decode         = xma_decode_packet,
-    .capabilities   = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
                                                       AV_SAMPLE_FMT_NONE },
 };
@@ -1991,7 +2020,7 @@ AVCodec ff_xma2_decoder = {
     .close          = xma_decode_end,
     .decode         = xma_decode_packet,
     .flush          = xma_flush,
-    .capabilities   = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
                                                       AV_SAMPLE_FMT_NONE },
 };
diff --git a/libavcodec/wmavoice.c b/libavcodec/wmavoice.c
index 444e303b0df16..68bb65986e38d 100644
--- a/libavcodec/wmavoice.c
+++ b/libavcodec/wmavoice.c
@@ -1906,7 +1906,7 @@ static int wmavoice_decode_packet(AVCodecContext *ctx, void *data,
      * in a single "muxer" packet, so we artificially emulate that by
      * capping the packet size at ctx->block_align. */
     for (size = avpkt->size; size > ctx->block_align; size -= ctx->block_align);
-    init_get_bits(&s->gb, avpkt->data, size << 3);
+    init_get_bits8(&s->gb, avpkt->data, size);
 
     /* size == ctx->block_align is used to indicate whether we are dealing with
      * a new packet or a packet of which we already read the packet header
diff --git a/libavcodec/wmv2dec.c b/libavcodec/wmv2dec.c
index 4f97d9227ce1e..92daa1639e5e9 100644
--- a/libavcodec/wmv2dec.c
+++ b/libavcodec/wmv2dec.c
@@ -181,6 +181,14 @@ int ff_wmv2_decode_secondary_picture_header(MpegEncContext *s)
             }
 
             s->dc_table_index = get_bits1(&s->gb);
+
+            // at minimum one bit per macroblock is required at least in a valid frame,
+            // we discard frames much smaller than this. Frames smaller than 1/8 of the
+            // smallest "black/skip" frame generally contain not much recoverable content
+            // while at the same time they have the highest computational requirements
+            // per byte
+            if (get_bits_left(&s->gb) * 8LL < (s->width+15)/16 * ((s->height+15)/16))
+                return AVERROR_INVALIDDATA;
         }
         s->inter_intra_pred = 0;
         s->no_rounding      = 1;
diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c
index 39f65c151f75b..08eb7ead44d7a 100644
--- a/libavcodec/x86/h264dsp_init.c
+++ b/libavcodec/x86/h264dsp_init.c
@@ -101,13 +101,13 @@ void ff_h264_loop_filter_strength_mmxext(int16_t bS[2][4][4], uint8_t nnz[40],
 
 #define LF_FUNC(DIR, TYPE, DEPTH, OPT)                                        \
 void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT(uint8_t *pix,  \
-                                                               int stride,    \
+                                                               ptrdiff_t stride, \
                                                                int alpha,     \
                                                                int beta,      \
                                                                int8_t *tc0);
 #define LF_IFUNC(DIR, TYPE, DEPTH, OPT) \
 void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT(uint8_t *pix,  \
-                                                               int stride,    \
+                                                               ptrdiff_t stride, \
                                                                int alpha,     \
                                                                int beta);
 
diff --git a/libavcodec/x86/proresdsp_init.c b/libavcodec/x86/proresdsp_init.c
index 8ca4d4d9b3c44..bde79ab8c078c 100644
--- a/libavcodec/x86/proresdsp_init.c
+++ b/libavcodec/x86/proresdsp_init.c
@@ -35,14 +35,16 @@ av_cold void ff_proresdsp_init_x86(ProresDSPContext *dsp, AVCodecContext *avctx)
 #if ARCH_X86_64
     int cpu_flags = av_get_cpu_flags();
 
-    if (EXTERNAL_SSE2(cpu_flags)) {
-        dsp->idct_permutation_type = FF_IDCT_PERM_TRANSPOSE;
-        dsp->idct_put = ff_prores_idct_put_10_sse2;
-    }
+    if (avctx->bits_per_raw_sample == 10){
+        if (EXTERNAL_SSE2(cpu_flags)) {
+            dsp->idct_permutation_type = FF_IDCT_PERM_TRANSPOSE;
+            dsp->idct_put = ff_prores_idct_put_10_sse2;
+        }
 
-    if (EXTERNAL_AVX(cpu_flags)) {
-        dsp->idct_permutation_type = FF_IDCT_PERM_TRANSPOSE;
-        dsp->idct_put = ff_prores_idct_put_10_avx;
+        if (EXTERNAL_AVX(cpu_flags)) {
+            dsp->idct_permutation_type = FF_IDCT_PERM_TRANSPOSE;
+            dsp->idct_put = ff_prores_idct_put_10_avx;
+        }
     }
 #endif /* ARCH_X86_64 */
 }
diff --git a/libavcodec/xfacedec.c b/libavcodec/xfacedec.c
index d045cb6ef4ef1..ab4c0823f5686 100644
--- a/libavcodec/xfacedec.c
+++ b/libavcodec/xfacedec.c
@@ -123,7 +123,7 @@ static int xface_decode_frame(AVCodecContext *avctx,
     if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
 
-    for (i = 0, k = 0; avpkt->data[i] && i < avpkt->size; i++) {
+    for (i = 0, k = 0; i < avpkt->size && avpkt->data[i]; i++) {
         c = avpkt->data[i];
 
         /* ignore invalid digits */
diff --git a/libavcodec/xpmdec.c b/libavcodec/xpmdec.c
index 03172e4aadab4..43dd9bc7e7ea9 100644
--- a/libavcodec/xpmdec.c
+++ b/libavcodec/xpmdec.c
@@ -26,6 +26,10 @@
 #include "avcodec.h"
 #include "internal.h"
 
+#define MIN_ELEMENT ' '
+#define MAX_ELEMENT 0xfe
+#define NB_ELEMENTS (MAX_ELEMENT - MIN_ELEMENT + 1)
+
 typedef struct XPMContext {
     uint32_t  *pixels;
     int        pixels_size;
@@ -290,10 +294,10 @@ static int ascii2index(const uint8_t *cpixel, int cpp)
     int n = 0, m = 1, i;
 
     for (i = 0; i < cpp; i++) {
-        if (*p < ' ' || *p > '~')
+        if (*p < MIN_ELEMENT || *p > MAX_ELEMENT)
             return AVERROR_INVALIDDATA;
-        n += (*p++ - ' ') * m;
-        m *= 95;
+        n += (*p++ - MIN_ELEMENT) * m;
+        m *= NB_ELEMENTS;
     }
     return n;
 }
@@ -346,7 +350,7 @@ static int xpm_decode_frame(AVCodecContext *avctx, void *data,
 
     size = 1;
     for (i = 0; i < cpp; i++)
-        size *= 95;
+        size *= NB_ELEMENTS;
 
     if (ncolors <= 0 || ncolors > size) {
         av_log(avctx, AV_LOG_ERROR, "invalid number of colors: %d\n", ncolors);
diff --git a/libavcodec/zmbv.c b/libavcodec/zmbv.c
index 79e0892070412..e07009d0fbdf2 100644
--- a/libavcodec/zmbv.c
+++ b/libavcodec/zmbv.c
@@ -525,9 +525,6 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
         return AVERROR_INVALIDDATA;
     }
 
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
-        return ret;
-
     if (c->comp == 0) { // uncompressed data
         if (c->decomp_size < len) {
             av_log(avctx, AV_LOG_ERROR, "Buffer too small\n");
@@ -553,6 +550,9 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
         av_log(avctx, AV_LOG_ERROR, "decompressed size %d is incorrect, expected %d\n", c->decomp_len, expected_size);
         return AVERROR_INVALIDDATA;
     }
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+        return ret;
+
     if (c->flags & ZMBV_KEYFRAME) {
         frame->key_frame = 1;
         frame->pict_type = AV_PICTURE_TYPE_I;
diff --git a/libavcodec/zmbvenc.c b/libavcodec/zmbvenc.c
index 4d9147657d259..98029de5f6696 100644
--- a/libavcodec/zmbvenc.c
+++ b/libavcodec/zmbvenc.c
@@ -34,34 +34,53 @@
 
 #include <zlib.h>
 
+/* Frame header flags */
 #define ZMBV_KEYFRAME 1
 #define ZMBV_DELTAPAL 2
 
+/* Motion block width/height (maximum allowed value is 255)
+ * Note: histogram datatype in block_cmp() must be big enough to hold values
+ * up to (4 * ZMBV_BLOCK * ZMBV_BLOCK)
+ */
 #define ZMBV_BLOCK 16
 
+/* Keyframe header format values */
+enum ZmbvFormat {
+    ZMBV_FMT_NONE  = 0,
+    ZMBV_FMT_1BPP  = 1,
+    ZMBV_FMT_2BPP  = 2,
+    ZMBV_FMT_4BPP  = 3,
+    ZMBV_FMT_8BPP  = 4,
+    ZMBV_FMT_15BPP = 5,
+    ZMBV_FMT_16BPP = 6,
+    ZMBV_FMT_24BPP = 7,
+    ZMBV_FMT_32BPP = 8
+};
+
 /**
  * Encoder context
  */
 typedef struct ZmbvEncContext {
     AVCodecContext *avctx;
 
-    int range;
+    int lrange, urange;
     uint8_t *comp_buf, *work_buf;
     uint8_t pal[768];
     uint32_t pal2[256]; //for quick comparisons
-    uint8_t *prev;
+    uint8_t *prev, *prev_buf;
     int pstride;
     int comp_size;
     int keyint, curfrm;
+    int bypp;
+    enum ZmbvFormat fmt;
     z_stream zstream;
 
-    int score_tab[256];
+    int score_tab[ZMBV_BLOCK * ZMBV_BLOCK * 4 + 1];
 } ZmbvEncContext;
 
 
 /** Block comparing function
  * XXX should be optimized and moved to DSPContext
- * TODO handle out of edge ME
  */
 static inline int block_cmp(ZmbvEncContext *c, uint8_t *src, int stride,
                             uint8_t *src2, int stride2, int bw, int bh,
@@ -69,20 +88,27 @@ static inline int block_cmp(ZmbvEncContext *c, uint8_t *src, int stride,
 {
     int sum = 0;
     int i, j;
-    uint8_t histogram[256] = {0};
+    uint16_t histogram[256] = {0};
+    int bw_bytes = bw * c->bypp;
 
-    *xored = 0;
+    /* Build frequency histogram of byte values for src[] ^ src2[] */
     for(j = 0; j < bh; j++){
-        for(i = 0; i < bw; i++){
+        for(i = 0; i < bw_bytes; i++){
             int t = src[i] ^ src2[i];
             histogram[t]++;
-            *xored |= t;
         }
         src += stride;
         src2 += stride2;
     }
 
-    for(i = 1; i < 256; i++)
+    /* If not all the xored values were 0, then the blocks are different */
+    *xored = (histogram[0] < bw_bytes * bh);
+
+    /* Exit early if blocks are equal */
+    if (!*xored) return 0;
+
+    /* Sum the entropy of all values */
+    for(i = 0; i < 256; i++)
         sum += c->score_tab[histogram[i]];
 
     return sum;
@@ -94,23 +120,42 @@ static inline int block_cmp(ZmbvEncContext *c, uint8_t *src, int stride,
 static int zmbv_me(ZmbvEncContext *c, uint8_t *src, int sstride, uint8_t *prev,
                    int pstride, int x, int y, int *mx, int *my, int *xored)
 {
-    int dx, dy, tx, ty, tv, bv, bw, bh;
+    int dx, dy, txored, tv, bv, bw, bh;
+    int mx0, my0;
 
-    *mx = *my = 0;
+    mx0 = *mx;
+    my0 = *my;
     bw = FFMIN(ZMBV_BLOCK, c->avctx->width - x);
     bh = FFMIN(ZMBV_BLOCK, c->avctx->height - y);
+
+    /* Try (0,0) */
     bv = block_cmp(c, src, sstride, prev, pstride, bw, bh, xored);
+    *mx = *my = 0;
     if(!bv) return 0;
-    for(ty = FFMAX(y - c->range, 0); ty < FFMIN(y + c->range, c->avctx->height - bh); ty++){
-        for(tx = FFMAX(x - c->range, 0); tx < FFMIN(x + c->range, c->avctx->width - bw); tx++){
-            if(tx == x && ty == y) continue; // we already tested this block
-            dx = tx - x;
-            dy = ty - y;
-            tv = block_cmp(c, src, sstride, prev + dx + dy * pstride, pstride, bw, bh, xored);
+
+    /* Try previous block's MV (if not 0,0) */
+    if (mx0 || my0){
+        tv = block_cmp(c, src, sstride, prev + mx0 * c->bypp + my0 * pstride, pstride, bw, bh, &txored);
+        if(tv < bv){
+            bv = tv;
+            *mx = mx0;
+            *my = my0;
+            *xored = txored;
+            if(!bv) return 0;
+        }
+    }
+
+    /* Try other MVs from top-to-bottom, left-to-right */
+    for(dy = -c->lrange; dy <= c->urange; dy++){
+        for(dx = -c->lrange; dx <= c->urange; dx++){
+            if(!dx && !dy) continue; // we already tested this block
+            if(dx == mx0 && dy == my0) continue; // this one too
+            tv = block_cmp(c, src, sstride, prev + dx * c->bypp + dy * pstride, pstride, bw, bh, &txored);
             if(tv < bv){
                  bv = tv;
                  *mx = dx;
                  *my = dy;
+                 *xored = txored;
                  if(!bv) return 0;
              }
          }
@@ -141,9 +186,10 @@ FF_DISABLE_DEPRECATION_WARNINGS
     avctx->coded_frame->key_frame = keyframe;
 FF_ENABLE_DEPRECATION_WARNINGS
 #endif
-    chpal = !keyframe && memcmp(p->data[1], c->pal2, 1024);
 
-    palptr = (uint32_t*)p->data[1];
+    palptr = (avctx->pix_fmt == AV_PIX_FMT_PAL8) ? (uint32_t *)p->data[1] : NULL;
+    chpal = !keyframe && palptr && memcmp(palptr, c->pal2, 1024);
+
     src = p->data[0];
     prev = c->prev;
     if(chpal){
@@ -157,25 +203,27 @@ FF_ENABLE_DEPRECATION_WARNINGS
             c->pal[i * 3 + 1] = tpal[1];
             c->pal[i * 3 + 2] = tpal[2];
         }
-        memcpy(c->pal2, p->data[1], 1024);
+        memcpy(c->pal2, palptr, 1024);
     }
     if(keyframe){
-        for(i = 0; i < 256; i++){
-            AV_WB24(c->pal+(i*3), palptr[i]);
+        if (palptr){
+            for(i = 0; i < 256; i++){
+                AV_WB24(c->pal+(i*3), palptr[i]);
+            }
+            memcpy(c->work_buf, c->pal, 768);
+            memcpy(c->pal2, palptr, 1024);
+            work_size = 768;
         }
-        memcpy(c->work_buf, c->pal, 768);
-        memcpy(c->pal2, p->data[1], 1024);
-        work_size = 768;
         for(i = 0; i < avctx->height; i++){
-            memcpy(c->work_buf + work_size, src, avctx->width);
+            memcpy(c->work_buf + work_size, src, avctx->width * c->bypp);
             src += p->linesize[0];
-            work_size += avctx->width;
+            work_size += avctx->width * c->bypp;
         }
     }else{
         int x, y, bh2, bw2, xored;
         uint8_t *tsrc, *tprev;
         uint8_t *mv;
-        int mx, my;
+        int mx = 0, my = 0;
 
         bw = (avctx->width + ZMBV_BLOCK - 1) / ZMBV_BLOCK;
         bh = (avctx->height + ZMBV_BLOCK - 1) / ZMBV_BLOCK;
@@ -188,16 +236,16 @@ FF_ENABLE_DEPRECATION_WARNINGS
             for(x = 0; x < avctx->width; x += ZMBV_BLOCK, mv += 2) {
                 bw2 = FFMIN(avctx->width - x, ZMBV_BLOCK);
 
-                tsrc = src + x;
-                tprev = prev + x;
+                tsrc = src + x * c->bypp;
+                tprev = prev + x * c->bypp;
 
                 zmbv_me(c, tsrc, p->linesize[0], tprev, c->pstride, x, y, &mx, &my, &xored);
                 mv[0] = (mx << 1) | !!xored;
                 mv[1] = my << 1;
-                tprev += mx + my * c->pstride;
+                tprev += mx * c->bypp + my * c->pstride;
                 if(xored){
                     for(j = 0; j < bh2; j++){
-                        for(i = 0; i < bw2; i++)
+                        for(i = 0; i < bw2 * c->bypp; i++)
                             c->work_buf[work_size++] = tsrc[i] ^ tprev[i];
                         tsrc += p->linesize[0];
                         tprev += c->pstride;
@@ -212,7 +260,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
     src = p->data[0];
     prev = c->prev;
     for(i = 0; i < avctx->height; i++){
-        memcpy(prev, src, avctx->width);
+        memcpy(prev, src, avctx->width * c->bypp);
         prev += c->pstride;
         src += p->linesize[0];
     }
@@ -243,7 +291,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
         *buf++ = 0; // hi ver
         *buf++ = 1; // lo ver
         *buf++ = 1; // comp
-        *buf++ = 4; // format - 8bpp
+        *buf++ = c->fmt; // format
         *buf++ = ZMBV_BLOCK; // block width
         *buf++ = ZMBV_BLOCK; // block height
     }
@@ -263,7 +311,7 @@ static av_cold int encode_end(AVCodecContext *avctx)
     av_freep(&c->work_buf);
 
     deflateEnd(&c->zstream);
-    av_freep(&c->prev);
+    av_freep(&c->prev_buf);
 
     return 0;
 }
@@ -277,17 +325,48 @@ static av_cold int encode_init(AVCodecContext *avctx)
     int zret; // Zlib return code
     int i;
     int lvl = 9;
+    int prev_size, prev_offset;
+
+    switch (avctx->pix_fmt) {
+    case AV_PIX_FMT_PAL8:
+        c->fmt = ZMBV_FMT_8BPP;
+        c->bypp = 1;
+        break;
+    case AV_PIX_FMT_RGB555LE:
+        c->fmt = ZMBV_FMT_15BPP;
+        c->bypp = 2;
+        break;
+    case AV_PIX_FMT_RGB565LE:
+        c->fmt = ZMBV_FMT_16BPP;
+        c->bypp = 2;
+        break;
+    case AV_PIX_FMT_BGR0:
+        c->fmt = ZMBV_FMT_32BPP;
+        c->bypp = 4;
+        break;
+    default:
+        av_log(avctx, AV_LOG_INFO, "unsupported pixel format\n");
+        return AVERROR(EINVAL);
+    }
 
-    for(i=1; i<256; i++)
-        c->score_tab[i] = -i * log2(i / (double)(ZMBV_BLOCK * ZMBV_BLOCK)) * 256;
+    /* Entropy-based score tables for comparing blocks.
+     * Suitable for blocks up to (ZMBV_BLOCK * ZMBV_BLOCK) bytes.
+     * Scores are nonnegative, lower is better.
+     */
+    for(i = 1; i <= ZMBV_BLOCK * ZMBV_BLOCK * c->bypp; i++)
+        c->score_tab[i] = -i * log2(i / (double)(ZMBV_BLOCK * ZMBV_BLOCK * c->bypp)) * 256;
 
     c->avctx = avctx;
 
     c->curfrm = 0;
     c->keyint = avctx->keyint_min;
-    c->range = 8;
-    if(avctx->me_range > 0)
-        c->range = FFMIN(avctx->me_range, 127);
+
+    /* Motion estimation range: maximum distance is -64..63 */
+    c->lrange = c->urange = 8;
+    if(avctx->me_range > 0){
+        c->lrange = FFMIN(avctx->me_range, 64);
+        c->urange = FFMIN(avctx->me_range, 63);
+    }
 
     if(avctx->compression_level >= 0)
         lvl = avctx->compression_level;
@@ -298,7 +377,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
 
     // Needed if zlib unused or init aborted before deflateInit
     memset(&c->zstream, 0, sizeof(z_stream));
-    c->comp_size = avctx->width * avctx->height + 1024 +
+    c->comp_size = avctx->width * c->bypp * avctx->height + 1024 +
         ((avctx->width + ZMBV_BLOCK - 1) / ZMBV_BLOCK) * ((avctx->height + ZMBV_BLOCK - 1) / ZMBV_BLOCK) * 2 + 4;
     if (!(c->work_buf = av_malloc(c->comp_size))) {
         av_log(avctx, AV_LOG_ERROR, "Can't allocate work buffer.\n");
@@ -313,11 +392,23 @@ static av_cold int encode_init(AVCodecContext *avctx)
         av_log(avctx, AV_LOG_ERROR, "Can't allocate compression buffer.\n");
         return AVERROR(ENOMEM);
     }
-    c->pstride = FFALIGN(avctx->width, 16);
-    if (!(c->prev = av_malloc(c->pstride * avctx->height))) {
+
+    /* Allocate prev buffer - pad around the image to allow out-of-edge ME:
+     * - The image should be padded with `lrange` rows before and `urange` rows
+     *   after.
+     * - The stride should be padded with `lrange` pixels, then rounded up to a
+     *   multiple of 16 bytes.
+     * - The first row should also be padded with `lrange` pixels before, then
+     *   aligned up to a multiple of 16 bytes.
+     */
+    c->pstride = FFALIGN((avctx->width + c->lrange) * c->bypp, 16);
+    prev_size = FFALIGN(c->lrange * c->bypp, 16) + c->pstride * (c->lrange + avctx->height + c->urange);
+    prev_offset = FFALIGN(c->lrange, 16) + c->pstride * c->lrange;
+    if (!(c->prev_buf = av_mallocz(prev_size))) {
         av_log(avctx, AV_LOG_ERROR, "Can't allocate picture.\n");
         return AVERROR(ENOMEM);
     }
+    c->prev = c->prev_buf + prev_offset;
 
     c->zstream.zalloc = Z_NULL;
     c->zstream.zfree = Z_NULL;
@@ -340,5 +431,9 @@ AVCodec ff_zmbv_encoder = {
     .init           = encode_init,
     .encode2        = encode_frame,
     .close          = encode_end,
-    .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_PAL8, AV_PIX_FMT_NONE },
+    .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_PAL8,
+                                                     AV_PIX_FMT_RGB555LE,
+                                                     AV_PIX_FMT_RGB565LE,
+                                                     AV_PIX_FMT_BGR0,
+                                                     AV_PIX_FMT_NONE },
 };
diff --git a/libavdevice/decklink_common.cpp b/libavdevice/decklink_common.cpp
index b88d6c6219a8d..d3cc1eb3d16f1 100644
--- a/libavdevice/decklink_common.cpp
+++ b/libavdevice/decklink_common.cpp
@@ -77,7 +77,7 @@ static IDeckLinkIterator *decklink_create_iterator(AVFormatContext *avctx)
     return iter;
 }
 
-int decklink_get_attr_string(IDeckLink *dl, BMDDeckLinkAttributeID cfg_id, const char **s)
+static int decklink_get_attr_string(IDeckLink *dl, BMDDeckLinkAttributeID cfg_id, const char **s)
 {
     DECKLINK_STR tmp;
     HRESULT hr;
@@ -171,6 +171,11 @@ int ff_decklink_set_configs(AVFormatContext *avctx,
         if (ret < 0)
             return ret;
     }
+    if (direction == DIRECTION_OUT && cctx->timing_offset != INT_MIN) {
+        res = ctx->cfg->SetInt(bmdDeckLinkConfigReferenceInputTimingOffset, cctx->timing_offset);
+        if (res != S_OK)
+            av_log(avctx, AV_LOG_WARNING, "Setting timing offset failed.\n");
+    }
     return 0;
 }
 
diff --git a/libavdevice/decklink_common_c.h b/libavdevice/decklink_common_c.h
index 8e3bbeb7df79c..ca85ec2504c5d 100644
--- a/libavdevice/decklink_common_c.h
+++ b/libavdevice/decklink_common_c.h
@@ -57,6 +57,7 @@ struct decklink_cctx {
     int64_t queue_size;
     int copyts;
     int64_t timestamp_align;
+    int timing_offset;
 };
 
 #endif /* AVDEVICE_DECKLINK_COMMON_C_H */
diff --git a/libavdevice/decklink_dec.cpp b/libavdevice/decklink_dec.cpp
index deb8f787ee66c..9de8fa0c9da1a 100644
--- a/libavdevice/decklink_dec.cpp
+++ b/libavdevice/decklink_dec.cpp
@@ -1167,14 +1167,14 @@ av_cold int ff_decklink_read_header(AVFormatContext *avctx)
         break;
     case bmdFormat8BitARGB:
         st->codecpar->codec_id    = AV_CODEC_ID_RAWVIDEO;
-        st->codecpar->codec_tag   = avcodec_pix_fmt_to_codec_tag((enum AVPixelFormat)st->codecpar->format);
         st->codecpar->format      = AV_PIX_FMT_0RGB;
+        st->codecpar->codec_tag   = avcodec_pix_fmt_to_codec_tag((enum AVPixelFormat)st->codecpar->format);
         st->codecpar->bit_rate    = av_rescale(ctx->bmd_width * ctx->bmd_height * 32, st->time_base.den, st->time_base.num);
         break;
     case bmdFormat8BitBGRA:
         st->codecpar->codec_id    = AV_CODEC_ID_RAWVIDEO;
-        st->codecpar->codec_tag   = avcodec_pix_fmt_to_codec_tag((enum AVPixelFormat)st->codecpar->format);
         st->codecpar->format      = AV_PIX_FMT_BGR0;
+        st->codecpar->codec_tag   = avcodec_pix_fmt_to_codec_tag((enum AVPixelFormat)st->codecpar->format);
         st->codecpar->bit_rate    = av_rescale(ctx->bmd_width * ctx->bmd_height * 32, st->time_base.den, st->time_base.num);
         break;
     case bmdFormat10BitRGB:
diff --git a/libavdevice/decklink_enc_c.c b/libavdevice/decklink_enc_c.c
index 6169078159c57..63cbd39ecd59f 100644
--- a/libavdevice/decklink_enc_c.c
+++ b/libavdevice/decklink_enc_c.c
@@ -35,6 +35,8 @@ static const AVOption options[] = {
     { "unset"       ,  NULL                     , 0                   , AV_OPT_TYPE_CONST , { .i64 = 0   }, 0, 0, ENC, "duplex_mode"},
     { "half"        ,  NULL                     , 0                   , AV_OPT_TYPE_CONST , { .i64 = 1   }, 0, 0, ENC, "duplex_mode"},
     { "full"        ,  NULL                     , 0                   , AV_OPT_TYPE_CONST , { .i64 = 2   }, 0, 0, ENC, "duplex_mode"},
+    { "timing_offset", "genlock timing pixel offset", OFFSET(timing_offset), AV_OPT_TYPE_INT,   { .i64 = INT_MIN }, INT_MIN, INT_MAX, ENC, "timing_offset"},
+    { "unset"       ,  NULL                     , 0                        , AV_OPT_TYPE_CONST, { .i64 = INT_MIN },       0,       0, ENC, "timing_offset"},
     { NULL },
 };
 
diff --git a/libavdevice/dshow.c b/libavdevice/dshow.c
index 25481580af390..d7f5bd7069ac6 100644
--- a/libavdevice/dshow.c
+++ b/libavdevice/dshow.c
@@ -278,12 +278,12 @@ dshow_cycle_devices(AVFormatContext *avctx, ICreateDevEnum *devenum,
                     goto fail1;
                 }
                 *device_unique_name = unique_name;
+                unique_name = NULL;
                 // success, loop will end now
             }
         } else {
             av_log(avctx, AV_LOG_INFO, " \"%s\"\n", friendly_name);
             av_log(avctx, AV_LOG_INFO, "    Alternative name \"%s\"\n", unique_name);
-            av_free(unique_name);
         }
 
 fail1:
@@ -291,7 +291,8 @@ dshow_cycle_devices(AVFormatContext *avctx, ICreateDevEnum *devenum,
             IMalloc_Free(co_malloc, olestr);
         if (bind_ctx)
             IBindCtx_Release(bind_ctx);
-        av_free(friendly_name);
+        av_freep(&friendly_name);
+        av_freep(&unique_name);
         if (bag)
             IPropertyBag_Release(bag);
         IMoniker_Release(m);
@@ -941,6 +942,8 @@ dshow_add_device(AVFormatContext *avctx,
     AVStream *st;
     int ret = AVERROR(EIO);
 
+    type.pbFormat = NULL;
+
     st = avformat_new_stream(avctx, NULL);
     if (!st) {
         ret = AVERROR(ENOMEM);
@@ -989,7 +992,8 @@ dshow_add_device(AVFormatContext *avctx,
             if (par->codec_id == AV_CODEC_ID_NONE) {
                 av_log(avctx, AV_LOG_ERROR, "Unknown compression type. "
                                  "Please report type 0x%X.\n", (int) bih->biCompression);
-                return AVERROR_PATCHWELCOME;
+                ret = AVERROR_PATCHWELCOME;
+                goto error;
             }
             par->bits_per_coded_sample = bih->biBitCount;
         } else {
@@ -1030,6 +1034,8 @@ dshow_add_device(AVFormatContext *avctx,
     ret = 0;
 
 error:
+    if (type.pbFormat)
+        CoTaskMemFree(type.pbFormat);
     return ret;
 }
 
diff --git a/libavdevice/dshow_pin.c b/libavdevice/dshow_pin.c
index 664246da92aba..53b1c9150d94a 100644
--- a/libavdevice/dshow_pin.c
+++ b/libavdevice/dshow_pin.c
@@ -249,8 +249,20 @@ libAVPin_Setup(libAVPin *this, libAVFilter *filter)
 
     return 1;
 }
+
+static void
+libAVPin_Free(libAVPin *this)
+{
+    if (!this)
+        return;
+    av_freep(&this->imemvtbl);
+    if (this->type.pbFormat) {
+        CoTaskMemFree(this->type.pbFormat);
+        this->type.pbFormat = NULL;
+    }
+}
 DECLARE_CREATE(libAVPin, libAVPin_Setup(this, filter), libAVFilter *filter)
-DECLARE_DESTROY(libAVPin, nothing)
+DECLARE_DESTROY(libAVPin, libAVPin_Free)
 
 /*****************************************************************************
  * libAVMemInputPin
diff --git a/libavdevice/gdigrab.c b/libavdevice/gdigrab.c
index ab08c11788481..b226bd0831495 100644
--- a/libavdevice/gdigrab.c
+++ b/libavdevice/gdigrab.c
@@ -277,14 +277,20 @@ gdigrab_read_header(AVFormatContext *s1)
     }
     bpp = GetDeviceCaps(source_hdc, BITSPIXEL);
 
+    horzres = GetDeviceCaps(source_hdc, HORZRES);
+    vertres = GetDeviceCaps(source_hdc, VERTRES);
+    desktophorzres = GetDeviceCaps(source_hdc, DESKTOPHORZRES);
+    desktopvertres = GetDeviceCaps(source_hdc, DESKTOPVERTRES);
+
     if (hwnd) {
         GetClientRect(hwnd, &virtual_rect);
+        /* window -- get the right height and width for scaling DPI */
+        virtual_rect.left   = virtual_rect.left   * desktophorzres / horzres;
+        virtual_rect.right  = virtual_rect.right  * desktophorzres / horzres;
+        virtual_rect.top    = virtual_rect.top    * desktopvertres / vertres;
+        virtual_rect.bottom = virtual_rect.bottom * desktopvertres / vertres;
     } else {
         /* desktop -- get the right height and width for scaling DPI */
-        horzres = GetDeviceCaps(source_hdc, HORZRES);
-        vertres = GetDeviceCaps(source_hdc, VERTRES);
-        desktophorzres = GetDeviceCaps(source_hdc, DESKTOPHORZRES);
-        desktopvertres = GetDeviceCaps(source_hdc, DESKTOPVERTRES);
         virtual_rect.left = GetSystemMetrics(SM_XVIRTUALSCREEN);
         virtual_rect.top = GetSystemMetrics(SM_YVIRTUALSCREEN);
         virtual_rect.right = (virtual_rect.left + GetSystemMetrics(SM_CXVIRTUALSCREEN)) * desktophorzres / horzres;
@@ -473,25 +479,26 @@ static void paint_mouse_pointer(AVFormatContext *s1, struct gdigrab *gdigrab)
             goto icon_error;
         }
 
-        pos.x = ci.ptScreenPos.x - clip_rect.left - info.xHotspot;
-        pos.y = ci.ptScreenPos.y - clip_rect.top - info.yHotspot;
-
         if (hwnd) {
             RECT rect;
 
             if (GetWindowRect(hwnd, &rect)) {
-                pos.x -= rect.left;
-                pos.y -= rect.top;
+                pos.x = ci.ptScreenPos.x - clip_rect.left - info.xHotspot - rect.left;
+                pos.y = ci.ptScreenPos.y - clip_rect.top - info.yHotspot - rect.top;
+
+                //that would keep the correct location of mouse with hidpi screens
+                pos.x = pos.x * desktophorzres / horzres;
+                pos.y = pos.y * desktopvertres / vertres;
             } else {
                 CURSOR_ERROR("Couldn't get window rectangle");
                 goto icon_error;
             }
+        } else {
+            //that would keep the correct location of mouse with hidpi screens
+            pos.x = ci.ptScreenPos.x * desktophorzres / horzres - clip_rect.left - info.xHotspot;
+            pos.y = ci.ptScreenPos.y * desktopvertres / vertres - clip_rect.top - info.yHotspot;
         }
 
-        //that would keep the correct location of mouse with hidpi screens
-        pos.x = pos.x * desktophorzres / horzres;
-        pos.y = pos.y * desktopvertres / vertres;
-
         av_log(s1, AV_LOG_DEBUG, "Cursor pos (%li,%li) -> (%li,%li)\n",
                 ci.ptScreenPos.x, ci.ptScreenPos.y, pos.x, pos.y);
 
diff --git a/libavdevice/iec61883.c b/libavdevice/iec61883.c
index dcf755392675a..cafafb2672b70 100644
--- a/libavdevice/iec61883.c
+++ b/libavdevice/iec61883.c
@@ -23,7 +23,7 @@
  * libiec61883 interface
  */
 
-#include <sys/poll.h>
+#include <poll.h>
 #include <libraw1394/raw1394.h>
 #include <libavc1394/avc1394.h>
 #include <libavc1394/rom1394.h>
diff --git a/libavdevice/libndi_newtek_dec.c b/libavdevice/libndi_newtek_dec.c
index 4fb719770e57c..d2d5648c4bbac 100644
--- a/libavdevice/libndi_newtek_dec.c
+++ b/libavdevice/libndi_newtek_dec.c
@@ -33,6 +33,7 @@ struct NDIContext {
     int find_sources;
     int64_t wait_sources;
     int allow_video_fields;
+    char *extra_ips;
 
     /* Runtime */
     NDIlib_recv_create_t *recv;
@@ -99,7 +100,7 @@ static int ndi_find_sources(AVFormatContext *avctx, const char *name, NDIlib_sou
     struct NDIContext *ctx = avctx->priv_data;
     const NDIlib_source_t *ndi_srcs = NULL;
     const NDIlib_find_create_t find_create_desc = { .show_local_sources = true,
-        .p_groups = NULL, .p_extra_ips = NULL };
+        .p_groups = NULL, .p_extra_ips = ctx->extra_ips };
 
     if (!ctx->ndi_find)
         ctx->ndi_find = NDIlib_find_create2(&find_create_desc);
@@ -317,6 +318,7 @@ static const AVOption options[] = {
     { "find_sources", "Find available sources"  , OFFSET(find_sources), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, DEC },
     { "wait_sources", "Time to wait until the number of online sources have changed"  , OFFSET(wait_sources), AV_OPT_TYPE_DURATION, { .i64 = 1000000 }, 100000, 20000000, DEC },
     { "allow_video_fields", "When this flag is FALSE, all video that you receive will be progressive"  , OFFSET(allow_video_fields), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, DEC },
+    { "extra_ips", "List of comma separated ip addresses to scan for remote sources",       OFFSET(extra_ips), AV_OPT_TYPE_STRING, {.str = NULL }, 0, 0, DEC },
     { NULL },
 };
 
diff --git a/libavdevice/sdl2.c b/libavdevice/sdl2.c
index da5143078eef8..d6fc74a66c765 100644
--- a/libavdevice/sdl2.c
+++ b/libavdevice/sdl2.c
@@ -40,6 +40,7 @@ typedef struct {
     SDL_Renderer *renderer;
     char *window_title;
     int window_width, window_height;  /**< size of the window */
+    int window_x, window_y;           /**< position of the window */
     int window_fullscreen;
     int window_borderless;
     int enable_quit_action;
@@ -155,8 +156,6 @@ static int sdl2_write_trailer(AVFormatContext *s)
     return 0;
 }
 
-#define SDL_BASE_FLAGS (SDL_SWSURFACE|SDL_WINDOW_RESIZABLE)
-
 static int sdl2_write_header(AVFormatContext *s)
 {
     SDLContext *sdl = s->priv_data;
@@ -196,8 +195,9 @@ static int sdl2_write_header(AVFormatContext *s)
     }
 
     /* resize texture to width and height from the codec context information */
-    flags = SDL_BASE_FLAGS | (sdl->window_fullscreen ? SDL_WINDOW_FULLSCREEN : 0) |
-                             (sdl->window_borderless ? SDL_WINDOW_BORDERLESS : 0);
+    flags = SDL_WINDOW_HIDDEN |
+            (sdl->window_fullscreen ? SDL_WINDOW_FULLSCREEN : 0) |
+            (sdl->window_borderless ? SDL_WINDOW_BORDERLESS : SDL_WINDOW_RESIZABLE);
 
     /* initialization */
     if (!sdl->inited){
@@ -216,6 +216,8 @@ static int sdl2_write_header(AVFormatContext *s)
     }
 
     SDL_SetWindowTitle(sdl->window, sdl->window_title);
+    SDL_SetWindowPosition(sdl->window, sdl->window_x, sdl->window_y);
+    SDL_ShowWindow(sdl->window);
 
     sdl->texture = SDL_CreateTexture(sdl->renderer, sdl->texture_fmt, SDL_TEXTUREACCESS_STREAMING,
                                      codecpar->width, codecpar->height);
@@ -337,6 +339,8 @@ static int sdl2_write_packet(AVFormatContext *s, AVPacket *pkt)
 static const AVOption options[] = {
     { "window_title",      "set SDL window title",       OFFSET(window_title), AV_OPT_TYPE_STRING,     { .str = NULL }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM },
     { "window_size",       "set SDL window forced size", OFFSET(window_width), AV_OPT_TYPE_IMAGE_SIZE, { .str = NULL }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM },
+    { "window_x",          "set SDL window x position",  OFFSET(window_x),     AV_OPT_TYPE_INT,        { .i64 = SDL_WINDOWPOS_CENTERED }, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
+    { "window_y",          "set SDL window y position",  OFFSET(window_y),     AV_OPT_TYPE_INT,        { .i64 = SDL_WINDOWPOS_CENTERED }, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
     { "window_fullscreen", "set SDL window fullscreen",  OFFSET(window_fullscreen), AV_OPT_TYPE_BOOL,  { .i64 = 0 },    0, 1, AV_OPT_FLAG_ENCODING_PARAM },
     { "window_borderless", "set SDL window border off",  OFFSET(window_borderless), AV_OPT_TYPE_BOOL,  { .i64 = 0 },    0, 1, AV_OPT_FLAG_ENCODING_PARAM },
     { "window_enable_quit", "set if quit action is available", OFFSET(enable_quit_action), AV_OPT_TYPE_INT, {.i64=1},   0, 1, AV_OPT_FLAG_ENCODING_PARAM },
diff --git a/libavdevice/v4l2.c b/libavdevice/v4l2.c
index 10a0ff0dd611c..1b9c6e760b419 100644
--- a/libavdevice/v4l2.c
+++ b/libavdevice/v4l2.c
@@ -95,7 +95,11 @@ struct video_data {
     int (*open_f)(const char *file, int oflag, ...);
     int (*close_f)(int fd);
     int (*dup_f)(int fd);
+#ifdef __GLIBC__
     int (*ioctl_f)(int fd, unsigned long int request, ...);
+#else
+    int (*ioctl_f)(int fd, int request, ...);
+#endif
     ssize_t (*read_f)(int fd, void *buffer, size_t n);
     void *(*mmap_f)(void *start, size_t length, int prot, int flags, int fd, int64_t offset);
     int (*munmap_f)(void *_start, size_t length);
diff --git a/libavdevice/version.h b/libavdevice/version.h
index e6ee009cc4554..e6ae2c44fc7fa 100644
--- a/libavdevice/version.h
+++ b/libavdevice/version.h
@@ -28,8 +28,8 @@
 #include "libavutil/version.h"
 
 #define LIBAVDEVICE_VERSION_MAJOR  58
-#define LIBAVDEVICE_VERSION_MINOR   4
-#define LIBAVDEVICE_VERSION_MICRO 105
+#define LIBAVDEVICE_VERSION_MINOR   6
+#define LIBAVDEVICE_VERSION_MICRO 101
 
 #define LIBAVDEVICE_VERSION_INT AV_VERSION_INT(LIBAVDEVICE_VERSION_MAJOR, \
                                                LIBAVDEVICE_VERSION_MINOR, \
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 62cc2f561f250..fef6ec5c55f45 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -28,6 +28,7 @@ OBJS-$(HAVE_THREADS)                         += pthread.o
 OBJS-$(CONFIG_QSVVPP)                        += qsvvpp.o
 DNN-OBJS-$(CONFIG_LIBTENSORFLOW)             += dnn_backend_tf.o
 OBJS-$(CONFIG_DNN)                           += dnn_interface.o dnn_backend_native.o $(DNN-OBJS-yes)
+OBJS-$(CONFIG_SCENE_SAD)                     += scene_sad.o
 
 # audio filters
 OBJS-$(CONFIG_ABENCH_FILTER)                 += f_bench.o
@@ -62,6 +63,7 @@ OBJS-$(CONFIG_AMETADATA_FILTER)              += f_metadata.o
 OBJS-$(CONFIG_AMIX_FILTER)                   += af_amix.o
 OBJS-$(CONFIG_AMULTIPLY_FILTER)              += af_amultiply.o
 OBJS-$(CONFIG_ANEQUALIZER_FILTER)            += af_anequalizer.o
+OBJS-$(CONFIG_ANLMDN_FILTER)                 += af_anlmdn.o
 OBJS-$(CONFIG_ANULL_FILTER)                  += af_anull.o
 OBJS-$(CONFIG_APAD_FILTER)                   += af_apad.o
 OBJS-$(CONFIG_APERMS_FILTER)                 += f_perms.o
@@ -141,6 +143,7 @@ OBJS-$(CONFIG_ANOISESRC_FILTER)              += asrc_anoisesrc.o
 OBJS-$(CONFIG_ANULLSRC_FILTER)               += asrc_anullsrc.o
 OBJS-$(CONFIG_FLITE_FILTER)                  += asrc_flite.o
 OBJS-$(CONFIG_HILBERT_FILTER)                += asrc_hilbert.o
+OBJS-$(CONFIG_SINC_FILTER)                   += asrc_sinc.o
 OBJS-$(CONFIG_SINE_FILTER)                   += asrc_sine.o
 
 OBJS-$(CONFIG_ANULLSINK_FILTER)              += asink_anullsink.o
@@ -164,8 +167,10 @@ OBJS-$(CONFIG_BM3D_FILTER)                   += vf_bm3d.o
 OBJS-$(CONFIG_BOXBLUR_FILTER)                += vf_boxblur.o boxblur.o
 OBJS-$(CONFIG_BOXBLUR_OPENCL_FILTER)         += vf_avgblur_opencl.o opencl.o \
                                                 opencl/avgblur.o boxblur.o
-OBJS-$(CONFIG_BWDIF_FILTER)                  += vf_bwdif.o
+OBJS-$(CONFIG_BWDIF_FILTER)                  += vf_bwdif.o yadif_common.o
+OBJS-$(CONFIG_CHROMAHOLD_FILTER)             += vf_chromakey.o
 OBJS-$(CONFIG_CHROMAKEY_FILTER)              += vf_chromakey.o
+OBJS-$(CONFIG_CHROMASHIFT_FILTER)            += vf_chromashift.o
 OBJS-$(CONFIG_CIESCOPE_FILTER)               += vf_ciescope.o
 OBJS-$(CONFIG_CODECVIEW_FILTER)              += vf_codecview.o
 OBJS-$(CONFIG_COLORBALANCE_FILTER)           += vf_colorbalance.o
@@ -191,6 +196,7 @@ OBJS-$(CONFIG_DEBAND_FILTER)                 += vf_deband.o
 OBJS-$(CONFIG_DEBLOCK_FILTER)                += vf_deblock.o
 OBJS-$(CONFIG_DECIMATE_FILTER)               += vf_decimate.o
 OBJS-$(CONFIG_DECONVOLVE_FILTER)             += vf_convolve.o framesync.o
+OBJS-$(CONFIG_DEDOT_FILTER)                  += vf_dedot.o
 OBJS-$(CONFIG_DEFLATE_FILTER)                += vf_neighbor.o
 OBJS-$(CONFIG_DEFLICKER_FILTER)              += vf_deflicker.o
 OBJS-$(CONFIG_DEINTERLACE_QSV_FILTER)        += vf_deinterlace_qsv.o
@@ -233,11 +239,13 @@ OBJS-$(CONFIG_FPS_FILTER)                    += vf_fps.o
 OBJS-$(CONFIG_FRAMEPACK_FILTER)              += vf_framepack.o
 OBJS-$(CONFIG_FRAMERATE_FILTER)              += vf_framerate.o
 OBJS-$(CONFIG_FRAMESTEP_FILTER)              += vf_framestep.o
+OBJS-$(CONFIG_FREEZEDETECT_FILTER)           += vf_freezedetect.o
 OBJS-$(CONFIG_FREI0R_FILTER)                 += vf_frei0r.o
 OBJS-$(CONFIG_FSPP_FILTER)                   += vf_fspp.o
 OBJS-$(CONFIG_GBLUR_FILTER)                  += vf_gblur.o
 OBJS-$(CONFIG_GEQ_FILTER)                    += vf_geq.o
 OBJS-$(CONFIG_GRADFUN_FILTER)                += vf_gradfun.o
+OBJS-$(CONFIG_GRAPHMONITOR_FILTER)           += f_graphmonitor.o
 OBJS-$(CONFIG_GREYEDGE_FILTER)               += vf_colorconstancy.o
 OBJS-$(CONFIG_HALDCLUT_FILTER)               += vf_lut3d.o framesync.o
 OBJS-$(CONFIG_HFLIP_FILTER)                  += vf_hflip.o
@@ -272,6 +280,7 @@ OBJS-$(CONFIG_LUTRGB_FILTER)                 += vf_lut.o
 OBJS-$(CONFIG_LUTYUV_FILTER)                 += vf_lut.o
 OBJS-$(CONFIG_MASKEDCLAMP_FILTER)            += vf_maskedclamp.o framesync.o
 OBJS-$(CONFIG_MASKEDMERGE_FILTER)            += vf_maskedmerge.o framesync.o
+OBJS-$(CONFIG_MASKFUN_FILTER)                += vf_maskfun.o
 OBJS-$(CONFIG_MCDEINT_FILTER)                += vf_mcdeint.o
 OBJS-$(CONFIG_MERGEPLANES_FILTER)            += vf_mergeplanes.o framesync.o
 OBJS-$(CONFIG_MESTIMATE_FILTER)              += vf_mestimate.o motion_estimation.o
@@ -324,6 +333,7 @@ OBJS-$(CONFIG_REMOVEGRAIN_FILTER)            += vf_removegrain.o
 OBJS-$(CONFIG_REMOVELOGO_FILTER)             += bbox.o lswsutils.o lavfutils.o vf_removelogo.o
 OBJS-$(CONFIG_REPEATFIELDS_FILTER)           += vf_repeatfields.o
 OBJS-$(CONFIG_REVERSE_FILTER)                += f_reverse.o
+OBJS-$(CONFIG_RGBASHIFT_FILTER)              += vf_chromashift.o
 OBJS-$(CONFIG_ROBERTS_FILTER)                += vf_convolution.o
 OBJS-$(CONFIG_ROBERTS_OPENCL_FILTER)         += vf_convolution_opencl.o opencl.o \
                                                 opencl/convolution.o
@@ -340,7 +350,8 @@ OBJS-$(CONFIG_SELECTIVECOLOR_FILTER)         += vf_selectivecolor.o
 OBJS-$(CONFIG_SENDCMD_FILTER)                += f_sendcmd.o
 OBJS-$(CONFIG_SEPARATEFIELDS_FILTER)         += vf_separatefields.o
 OBJS-$(CONFIG_SETDAR_FILTER)                 += vf_aspect.o
-OBJS-$(CONFIG_SETFIELD_FILTER)               += vf_setfield.o
+OBJS-$(CONFIG_SETFIELD_FILTER)               += vf_setparams.o
+OBJS-$(CONFIG_SETPARAMS_FILTER)              += vf_setparams.o
 OBJS-$(CONFIG_SETPTS_FILTER)                 += setpts.o
 OBJS-$(CONFIG_SETRANGE_FILTER)               += vf_setparams.o
 OBJS-$(CONFIG_SETSAR_FILTER)                 += vf_aspect.o
@@ -379,8 +390,11 @@ OBJS-$(CONFIG_TMIX_FILTER)                   += vf_mix.o framesync.o
 OBJS-$(CONFIG_TONEMAP_FILTER)                += vf_tonemap.o colorspace.o
 OBJS-$(CONFIG_TONEMAP_OPENCL_FILTER)         += vf_tonemap_opencl.o colorspace.o opencl.o \
                                                 opencl/tonemap.o opencl/colorspace_common.o
+OBJS-$(CONFIG_TPAD_FILTER)                   += vf_tpad.o
 OBJS-$(CONFIG_TRANSPOSE_FILTER)              += vf_transpose.o
 OBJS-$(CONFIG_TRANSPOSE_NPP_FILTER)          += vf_transpose_npp.o
+OBJS-$(CONFIG_TRANSPOSE_OPENCL_FILTER)       += vf_transpose_opencl.o opencl.o opencl/transpose.o
+OBJS-$(CONFIG_TRANSPOSE_VAAPI_FILTER)        += vf_transpose_vaapi.o vaapi_vpp.o
 OBJS-$(CONFIG_TRIM_FILTER)                   += trim.o
 OBJS-$(CONFIG_UNPREMULTIPLY_FILTER)          += vf_premultiply.o framesync.o
 OBJS-$(CONFIG_UNSHARP_FILTER)                += vf_unsharp.o
@@ -391,6 +405,7 @@ OBJS-$(CONFIG_VAGUEDENOISER_FILTER)          += vf_vaguedenoiser.o
 OBJS-$(CONFIG_VECTORSCOPE_FILTER)            += vf_vectorscope.o
 OBJS-$(CONFIG_VFLIP_FILTER)                  += vf_vflip.o
 OBJS-$(CONFIG_VFRDET_FILTER)                 += vf_vfrdet.o
+OBJS-$(CONFIG_VIBRANCE_FILTER)               += vf_vibrance.o
 OBJS-$(CONFIG_VIDSTABDETECT_FILTER)          += vidstabutils.o vf_vidstabdetect.o
 OBJS-$(CONFIG_VIDSTABTRANSFORM_FILTER)       += vidstabutils.o vf_vidstabtransform.o
 OBJS-$(CONFIG_VIGNETTE_FILTER)               += vf_vignette.o
@@ -401,7 +416,10 @@ OBJS-$(CONFIG_W3FDIF_FILTER)                 += vf_w3fdif.o
 OBJS-$(CONFIG_WAVEFORM_FILTER)               += vf_waveform.o
 OBJS-$(CONFIG_WEAVE_FILTER)                  += vf_weave.o
 OBJS-$(CONFIG_XBR_FILTER)                    += vf_xbr.o
-OBJS-$(CONFIG_YADIF_FILTER)                  += vf_yadif.o
+OBJS-$(CONFIG_XSTACK_FILTER)                 += vf_stack.o framesync.o
+OBJS-$(CONFIG_YADIF_FILTER)                  += vf_yadif.o yadif_common.o
+OBJS-$(CONFIG_YADIF_CUDA_FILTER)             += vf_yadif_cuda.o vf_yadif_cuda.ptx.o \
+                                                yadif_common.o
 OBJS-$(CONFIG_ZMQ_FILTER)                    += f_zmq.o
 OBJS-$(CONFIG_ZOOMPAN_FILTER)                += vf_zoompan.o
 OBJS-$(CONFIG_ZSCALE_FILTER)                 += vf_zscale.o
@@ -432,6 +450,7 @@ OBJS-$(CONFIG_NULLSINK_FILTER)               += vsink_nullsink.o
 # multimedia filters
 OBJS-$(CONFIG_ABITSCOPE_FILTER)              += avf_abitscope.o
 OBJS-$(CONFIG_ADRAWGRAPH_FILTER)             += f_drawgraph.o
+OBJS-$(CONFIG_AGRAPHMONITOR_FILTER)          += f_graphmonitor.o
 OBJS-$(CONFIG_AHISTOGRAM_FILTER)             += avf_ahistogram.o
 OBJS-$(CONFIG_APHASEMETER_FILTER)            += avf_aphasemeter.o
 OBJS-$(CONFIG_AVECTORSCOPE_FILTER)           += avf_avectorscope.o
diff --git a/libavfilter/af_acrossover.c b/libavfilter/af_acrossover.c
index 9acf3f14e45e5..3ccc4d72f6015 100644
--- a/libavfilter/af_acrossover.c
+++ b/libavfilter/af_acrossover.c
@@ -26,6 +26,7 @@
 #include "libavutil/attributes.h"
 #include "libavutil/avstring.h"
 #include "libavutil/channel_layout.h"
+#include "libavutil/eval.h"
 #include "libavutil/internal.h"
 #include "libavutil/opt.h"
 
@@ -95,8 +96,7 @@ static av_cold int init(AVFilterContext *ctx)
 
         p = NULL;
 
-        ret = sscanf(arg, "%f", &freq);
-
+        av_sscanf(arg, "%f", &freq);
         if (freq <= 0) {
             av_log(ctx, AV_LOG_ERROR, "Frequency %f must be positive number.\n", freq);
             return AVERROR(EINVAL);
diff --git a/libavfilter/af_adelay.c b/libavfilter/af_adelay.c
index 46c3d51040fb9..eb97039566d71 100644
--- a/libavfilter/af_adelay.c
+++ b/libavfilter/af_adelay.c
@@ -19,10 +19,12 @@
  */
 
 #include "libavutil/avstring.h"
+#include "libavutil/eval.h"
 #include "libavutil/opt.h"
 #include "libavutil/samplefmt.h"
 #include "avfilter.h"
 #include "audio.h"
+#include "filters.h"
 #include "internal.h"
 
 typedef struct ChanDelay {
@@ -38,8 +40,10 @@ typedef struct AudioDelayContext {
     ChanDelay *chandelay;
     int nb_delays;
     int block_align;
-    unsigned max_delay;
+    int64_t padding;
+    int64_t max_delay;
     int64_t next_pts;
+    int eof;
 
     void (*delay_channel)(ChanDelay *d, int nb_samples,
                           const uint8_t *src, uint8_t *dst);
@@ -137,7 +141,7 @@ static int config_input(AVFilterLink *inlink)
     p = s->delays;
     for (i = 0; i < s->nb_delays; i++) {
         ChanDelay *d = &s->chandelay[i];
-        float delay;
+        float delay, div;
         char type = 0;
         int ret;
 
@@ -146,10 +150,11 @@ static int config_input(AVFilterLink *inlink)
 
         p = NULL;
 
-        ret = sscanf(arg, "%d%c", &d->delay, &type);
+        ret = av_sscanf(arg, "%d%c", &d->delay, &type);
         if (ret != 2 || type != 'S') {
-            sscanf(arg, "%f", &delay);
-            d->delay = delay * inlink->sample_rate / 1000.0;
+            div = type == 's' ? 1.0 : 1000.0;
+            av_sscanf(arg, "%f", &delay);
+            d->delay = delay * inlink->sample_rate / div;
         }
 
         if (d->delay < 0) {
@@ -158,6 +163,21 @@ static int config_input(AVFilterLink *inlink)
         }
     }
 
+    s->padding = s->chandelay[0].delay;
+    for (i = 1; i < s->nb_delays; i++) {
+        ChanDelay *d = &s->chandelay[i];
+
+        s->padding = FFMIN(s->padding, d->delay);
+    }
+
+    if (s->padding) {
+        for (i = 0; i < s->nb_delays; i++) {
+            ChanDelay *d = &s->chandelay[i];
+
+            d->delay -= s->padding;
+        }
+    }
+
     for (i = 0; i < s->nb_delays; i++) {
         ChanDelay *d = &s->chandelay[i];
 
@@ -210,26 +230,30 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
             s->delay_channel(d, frame->nb_samples, src, dst);
     }
 
-    s->next_pts = frame->pts + av_rescale_q(frame->nb_samples, (AVRational){1, inlink->sample_rate}, inlink->time_base);
+    out_frame->pts = s->next_pts;
+    s->next_pts += av_rescale_q(frame->nb_samples, (AVRational){1, inlink->sample_rate}, inlink->time_base);
     av_frame_free(&frame);
     return ff_filter_frame(ctx->outputs[0], out_frame);
 }
 
-static int request_frame(AVFilterLink *outlink)
+static int activate(AVFilterContext *ctx)
 {
-    AVFilterContext *ctx = outlink->src;
+    AVFilterLink *inlink = ctx->inputs[0];
+    AVFilterLink *outlink = ctx->outputs[0];
     AudioDelayContext *s = ctx->priv;
-    int ret;
+    AVFrame *frame = NULL;
+    int ret, status;
+    int64_t pts;
 
-    ret = ff_request_frame(ctx->inputs[0]);
-    if (ret == AVERROR_EOF && !ctx->is_disabled && s->max_delay) {
-        int nb_samples = FFMIN(s->max_delay, 2048);
-        AVFrame *frame;
+    FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
+
+    if (s->padding) {
+        int nb_samples = FFMIN(s->padding, 2048);
 
         frame = ff_get_audio_buffer(outlink, nb_samples);
         if (!frame)
             return AVERROR(ENOMEM);
-        s->max_delay -= nb_samples;
+        s->padding -= nb_samples;
 
         av_samples_set_silence(frame->extended_data, 0,
                                frame->nb_samples,
@@ -240,10 +264,47 @@ static int request_frame(AVFilterLink *outlink)
         if (s->next_pts != AV_NOPTS_VALUE)
             s->next_pts += av_rescale_q(nb_samples, (AVRational){1, outlink->sample_rate}, outlink->time_base);
 
-        ret = filter_frame(ctx->inputs[0], frame);
+        return ff_filter_frame(outlink, frame);
+    }
+
+    ret = ff_inlink_consume_frame(inlink, &frame);
+    if (ret < 0)
+        return ret;
+
+    if (ret > 0)
+        return filter_frame(inlink, frame);
+
+    if (ff_inlink_acknowledge_status(inlink, &status, &pts)) {
+        if (status == AVERROR_EOF)
+            s->eof = 1;
     }
 
-    return ret;
+    if (s->eof && s->max_delay) {
+        int nb_samples = FFMIN(s->max_delay, 2048);
+
+        frame = ff_get_audio_buffer(outlink, nb_samples);
+        if (!frame)
+            return AVERROR(ENOMEM);
+        s->max_delay -= nb_samples;
+
+        av_samples_set_silence(frame->extended_data, 0,
+                               frame->nb_samples,
+                               outlink->channels,
+                               frame->format);
+
+        frame->pts = s->next_pts;
+        return filter_frame(inlink, frame);
+    }
+
+    if (s->eof && s->max_delay == 0) {
+        ff_outlink_set_status(outlink, AVERROR_EOF, s->next_pts);
+        return 0;
+    }
+
+    if (!s->eof)
+        FF_FILTER_FORWARD_WANTED(outlink, inlink);
+
+    return FFERROR_NOT_READY;
 }
 
 static av_cold void uninit(AVFilterContext *ctx)
@@ -262,16 +323,14 @@ static const AVFilterPad adelay_inputs[] = {
         .name         = "default",
         .type         = AVMEDIA_TYPE_AUDIO,
         .config_props = config_input,
-        .filter_frame = filter_frame,
     },
     { NULL }
 };
 
 static const AVFilterPad adelay_outputs[] = {
     {
-        .name          = "default",
-        .request_frame = request_frame,
-        .type          = AVMEDIA_TYPE_AUDIO,
+        .name = "default",
+        .type = AVMEDIA_TYPE_AUDIO,
     },
     { NULL }
 };
@@ -282,6 +341,7 @@ AVFilter ff_af_adelay = {
     .query_formats = query_formats,
     .priv_size     = sizeof(AudioDelayContext),
     .priv_class    = &adelay_class,
+    .activate      = activate,
     .uninit        = uninit,
     .inputs        = adelay_inputs,
     .outputs       = adelay_outputs,
diff --git a/libavfilter/af_aecho.c b/libavfilter/af_aecho.c
index b9ac18d3a462b..876a149df4b10 100644
--- a/libavfilter/af_aecho.c
+++ b/libavfilter/af_aecho.c
@@ -78,7 +78,7 @@ static void fill_items(char *item_str, int *nb_items, float *items)
         char *tstr = av_strtok(p, "|", &saveptr);
         p = NULL;
         if (tstr)
-            new_nb_items += sscanf(tstr, "%f", &items[new_nb_items]) == 1;
+            new_nb_items += av_sscanf(tstr, "%f", &items[new_nb_items]) == 1;
     }
 
     *nb_items = new_nb_items;
diff --git a/libavfilter/af_afade.c b/libavfilter/af_afade.c
index 9aab644039cdc..195fb65ab519d 100644
--- a/libavfilter/af_afade.c
+++ b/libavfilter/af_afade.c
@@ -23,10 +23,6 @@
  * fade audio filter
  */
 
-#define FF_INTERNAL_FIELDS 1
-#include "framequeue.h"
-
-#include "libavutil/audio_fifo.h"
 #include "libavutil/opt.h"
 #include "audio.h"
 #include "avfilter.h"
@@ -43,9 +39,7 @@ typedef struct AudioFadeContext {
     int64_t start_time;
     int overlap;
     int cf0_eof;
-    int prev_size;
     int crossfade_is_over;
-    AVAudioFifo *fifo[2];
     int64_t pts;
 
     void (*fade_samples)(uint8_t **dst, uint8_t * const *src,
@@ -57,7 +51,7 @@ typedef struct AudioFadeContext {
                               int curve0, int curve1);
 } AudioFadeContext;
 
-enum CurveType { TRI, QSIN, ESIN, HSIN, LOG, IPAR, QUA, CUB, SQU, CBR, PAR, EXP, IQSIN, IHSIN, DESE, DESI, LOSI, NB_CURVES };
+enum CurveType { TRI, QSIN, ESIN, HSIN, LOG, IPAR, QUA, CUB, SQU, CBR, PAR, EXP, IQSIN, IHSIN, DESE, DESI, LOSI, NONE, NB_CURVES };
 
 #define OFFSET(x) offsetof(AudioFadeContext, x)
 #define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
@@ -159,6 +153,9 @@ static double fade_gain(int curve, int64_t index, int64_t range)
                    gain = (A - B) / (C - B);
                }
         break;
+    case NONE:
+        gain = 1.0;
+        break;
     }
 
     return gain;
@@ -245,8 +242,8 @@ static const AVOption afade_options[] = {
     { "ns",           "set number of samples for fade duration",     OFFSET(nb_samples),   AV_OPT_TYPE_INT64,  {.i64 = 44100}, 1, INT64_MAX, FLAGS },
     { "start_time",   "set time to start fading",                    OFFSET(start_time),   AV_OPT_TYPE_DURATION, {.i64 = 0. }, 0, INT64_MAX, FLAGS },
     { "st",           "set time to start fading",                    OFFSET(start_time),   AV_OPT_TYPE_DURATION, {.i64 = 0. }, 0, INT64_MAX, FLAGS },
-    { "duration",     "set fade duration",                           OFFSET(duration),     AV_OPT_TYPE_DURATION, {.i64 = 0. }, 0, INT32_MAX, FLAGS },
-    { "d",            "set fade duration",                           OFFSET(duration),     AV_OPT_TYPE_DURATION, {.i64 = 0. }, 0, INT32_MAX, FLAGS },
+    { "duration",     "set fade duration",                           OFFSET(duration),     AV_OPT_TYPE_DURATION, {.i64 = 0. }, 0, INT64_MAX, FLAGS },
+    { "d",            "set fade duration",                           OFFSET(duration),     AV_OPT_TYPE_DURATION, {.i64 = 0. }, 0, INT64_MAX, FLAGS },
     { "curve",        "set fade curve type",                         OFFSET(curve),        AV_OPT_TYPE_INT,    {.i64 = TRI  }, 0, NB_CURVES - 1, FLAGS, "curve" },
     { "c",            "set fade curve type",                         OFFSET(curve),        AV_OPT_TYPE_INT,    {.i64 = TRI  }, 0, NB_CURVES - 1, FLAGS, "curve" },
     { "tri",          "linear slope",                                0,                    AV_OPT_TYPE_CONST,  {.i64 = TRI  }, 0, 0, FLAGS, "curve" },
@@ -266,6 +263,7 @@ static const AVOption afade_options[] = {
     { "dese",         "double-exponential seat",                     0,                    AV_OPT_TYPE_CONST,  {.i64 = DESE }, 0, 0, FLAGS, "curve" },
     { "desi",         "double-exponential sigmoid",                  0,                    AV_OPT_TYPE_CONST,  {.i64 = DESI }, 0, 0, FLAGS, "curve" },
     { "losi",         "logistic sigmoid",                            0,                    AV_OPT_TYPE_CONST,  {.i64 = LOSI }, 0, 0, FLAGS, "curve" },
+    { "nofade",       "no fade; keep audio as-is",                   0,                    AV_OPT_TYPE_CONST,  {.i64 = NONE }, 0, 0, FLAGS, "curve" },
     { NULL }
 };
 
@@ -386,6 +384,7 @@ static const AVOption acrossfade_options[] = {
     {     "dese",     "double-exponential seat",                       0,                    AV_OPT_TYPE_CONST,  {.i64 = DESE }, 0, 0, FLAGS, "curve" },
     {     "desi",     "double-exponential sigmoid",                    0,                    AV_OPT_TYPE_CONST,  {.i64 = DESI }, 0, 0, FLAGS, "curve" },
     {     "losi",     "logistic sigmoid",                              0,                    AV_OPT_TYPE_CONST,  {.i64 = LOSI }, 0, 0, FLAGS, "curve" },
+    {     "nofade",   "no fade; keep audio as-is",                     0,                    AV_OPT_TYPE_CONST,  {.i64 = NONE }, 0, 0, FLAGS, "curve" },
     { "curve2",       "set fade curve type for 2nd stream",            OFFSET(curve2),       AV_OPT_TYPE_INT,    {.i64 = TRI  }, 0, NB_CURVES - 1, FLAGS, "curve" },
     { "c2",           "set fade curve type for 2nd stream",            OFFSET(curve2),       AV_OPT_TYPE_INT,    {.i64 = TRI  }, 0, NB_CURVES - 1, FLAGS, "curve" },
     { NULL }
@@ -472,8 +471,8 @@ static int activate(AVFilterContext *ctx)
         return ff_filter_frame(outlink, in);
     }
 
-    if (ff_framequeue_queued_samples(&ctx->inputs[0]->fifo) > s->nb_samples) {
-        nb_samples = ff_framequeue_queued_samples(&ctx->inputs[0]->fifo) - s->nb_samples;
+    if (ff_inlink_queued_samples(ctx->inputs[0]) > s->nb_samples) {
+        nb_samples = ff_inlink_queued_samples(ctx->inputs[0]) - s->nb_samples;
         if (nb_samples > 0) {
             ret = ff_inlink_consume_samples(ctx->inputs[0], nb_samples, nb_samples, &in);
             if (ret < 0) {
@@ -484,7 +483,7 @@ static int activate(AVFilterContext *ctx)
         s->pts += av_rescale_q(in->nb_samples,
             (AVRational){ 1, outlink->sample_rate }, outlink->time_base);
         return ff_filter_frame(outlink, in);
-    } else if (ff_framequeue_queued_samples(&ctx->inputs[1]->fifo) >= s->nb_samples) {
+    } else if (ff_inlink_queued_samples(ctx->inputs[1]) >= s->nb_samples) {
         if (s->overlap) {
             out = ff_get_audio_buffer(outlink, s->nb_samples);
             if (!out)
@@ -554,10 +553,10 @@ static int activate(AVFilterContext *ctx)
             return ff_filter_frame(outlink, out);
         }
     } else if (ff_outlink_frame_wanted(ctx->outputs[0])) {
-        if (!s->cf0_eof && ctx->inputs[0]->status_in) {
+        if (!s->cf0_eof && ff_outlink_get_status(ctx->inputs[0])) {
             s->cf0_eof = 1;
         }
-        if (ctx->inputs[1]->status_in) {
+        if (ff_outlink_get_status(ctx->inputs[1])) {
             ff_outlink_set_status(ctx->outputs[0], AVERROR_EOF, AV_NOPTS_VALUE);
             return 0;
         }
diff --git a/libavfilter/af_afftdn.c b/libavfilter/af_afftdn.c
index fbcb0f18d5e56..9619aadbee0c8 100644
--- a/libavfilter/af_afftdn.c
+++ b/libavfilter/af_afftdn.c
@@ -28,6 +28,7 @@
 #include "avfilter.h"
 #include "audio.h"
 #include "formats.h"
+#include "filters.h"
 
 #define C       (M_LN10 * 0.1)
 #define RATIO    0.98
@@ -563,7 +564,7 @@ static void read_custom_noise(AudioFFTDeNoiseContext *s, int ch)
 
         p = NULL;
 
-        ret = sscanf(arg, "%d", &band_noise[i]);
+        ret = av_sscanf(arg, "%d", &band_noise[i]);
         if (ret != 1) {
             av_log(s, AV_LOG_ERROR, "Custom band noise must be integer.\n");
             break;
@@ -1153,7 +1154,7 @@ static void get_auto_noise_levels(AudioFFTDeNoiseContext *s,
     }
 }
 
-static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
+static int output_frame(AVFilterLink *inlink)
 {
     AVFilterContext *ctx = inlink->dst;
     AVFilterLink *outlink = ctx->outputs[0];
@@ -1162,117 +1163,145 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
     ThreadData td;
     int ret = 0;
 
-    if (s->pts == AV_NOPTS_VALUE)
-        s->pts = frame->pts;
+    in = ff_get_audio_buffer(outlink, s->window_length);
+    if (!in)
+        return AVERROR(ENOMEM);
 
-    ret = av_audio_fifo_write(s->fifo, (void **)frame->extended_data, frame->nb_samples);
-    av_frame_free(&frame);
+    ret = av_audio_fifo_peek(s->fifo, (void **)in->extended_data, s->window_length);
     if (ret < 0)
-        return ret;
+        goto end;
 
-    while (av_audio_fifo_size(s->fifo) >= s->window_length) {
-        if (!in) {
-            in = ff_get_audio_buffer(outlink, s->window_length);
-            if (!in)
-                return AVERROR(ENOMEM);
-        }
+    if (s->track_noise) {
+        for (int ch = 0; ch < inlink->channels; ch++) {
+            DeNoiseChannel *dnch = &s->dnch[ch];
+            double levels[15];
 
-        ret = av_audio_fifo_peek(s->fifo, (void **)in->extended_data, s->window_length);
-        if (ret < 0)
-            break;
+            get_auto_noise_levels(s, dnch, levels);
+            set_noise_profile(s, dnch, levels, 0);
+        }
 
-        if (s->track_noise) {
-            for (int ch = 0; ch < inlink->channels; ch++) {
-                DeNoiseChannel *dnch = &s->dnch[ch];
-                double levels[15];
+        if (s->noise_floor != s->last_noise_floor)
+            set_parameters(s);
+    }
 
-                get_auto_noise_levels(s, dnch, levels);
-                set_noise_profile(s, dnch, levels, 0);
-            }
+    if (s->sample_noise_start) {
+        for (int ch = 0; ch < inlink->channels; ch++) {
+            DeNoiseChannel *dnch = &s->dnch[ch];
 
-            if (s->noise_floor != s->last_noise_floor)
-                set_parameters(s);
+            init_sample_noise(dnch);
         }
+        s->sample_noise_start = 0;
+        s->sample_noise = 1;
+    }
 
-        if (s->sample_noise_start) {
-            for (int ch = 0; ch < inlink->channels; ch++) {
-                DeNoiseChannel *dnch = &s->dnch[ch];
+    if (s->sample_noise) {
+        for (int ch = 0; ch < inlink->channels; ch++) {
+            DeNoiseChannel *dnch = &s->dnch[ch];
 
-                init_sample_noise(dnch);
-            }
-            s->sample_noise_start = 0;
-            s->sample_noise = 1;
+            sample_noise_block(s, dnch, in, ch);
         }
+    }
 
-        if (s->sample_noise) {
-            for (int ch = 0; ch < inlink->channels; ch++) {
-                DeNoiseChannel *dnch = &s->dnch[ch];
+    if (s->sample_noise_end) {
+        for (int ch = 0; ch < inlink->channels; ch++) {
+            DeNoiseChannel *dnch = &s->dnch[ch];
+            double sample_noise[15];
 
-                sample_noise_block(s, dnch, in, ch);
-            }
+            finish_sample_noise(s, dnch, sample_noise);
+            set_noise_profile(s, dnch, sample_noise, 1);
+            set_band_parameters(s, dnch);
         }
+        s->sample_noise = 0;
+        s->sample_noise_end = 0;
+    }
 
-        if (s->sample_noise_end) {
-            for (int ch = 0; ch < inlink->channels; ch++) {
-                DeNoiseChannel *dnch = &s->dnch[ch];
-                double sample_noise[15];
+    s->block_count++;
+    td.in = in;
+    ctx->internal->execute(ctx, filter_channel, &td, NULL,
+                           FFMIN(outlink->channels, ff_filter_get_nb_threads(ctx)));
 
-                finish_sample_noise(s, dnch, sample_noise);
-                set_noise_profile(s, dnch, sample_noise, 1);
-                set_band_parameters(s, dnch);
-            }
-            s->sample_noise = 0;
-            s->sample_noise_end = 0;
-        }
-
-        s->block_count++;
-        td.in = in;
-        ctx->internal->execute(ctx, filter_channel, &td, NULL,
-                               FFMIN(outlink->channels, ff_filter_get_nb_threads(ctx)));
+    out = ff_get_audio_buffer(outlink, s->sample_advance);
+    if (!out) {
+        ret = AVERROR(ENOMEM);
+        goto end;
+    }
 
-        out = ff_get_audio_buffer(outlink, s->sample_advance);
-        if (!out) {
-            ret = AVERROR(ENOMEM);
+    for (int ch = 0; ch < inlink->channels; ch++) {
+        DeNoiseChannel *dnch = &s->dnch[ch];
+        double *src = dnch->out_samples;
+        float *orig = (float *)in->extended_data[ch];
+        float *dst = (float *)out->extended_data[ch];
+
+        switch (s->output_mode) {
+        case IN_MODE:
+            for (int m = 0; m < s->sample_advance; m++)
+                dst[m] = orig[m];
+            break;
+        case OUT_MODE:
+            for (int m = 0; m < s->sample_advance; m++)
+                dst[m] = src[m];
             break;
+        case NOISE_MODE:
+            for (int m = 0; m < s->sample_advance; m++)
+                dst[m] = orig[m] - src[m];
+            break;
+        default:
+            av_frame_free(&out);
+            ret = AVERROR_BUG;
+            goto end;
         }
+        memmove(src, src + s->sample_advance, (s->window_length - s->sample_advance) * sizeof(*src));
+        memset(src + (s->window_length - s->sample_advance), 0, s->sample_advance * sizeof(*src));
+    }
 
-        for (int ch = 0; ch < inlink->channels; ch++) {
-            DeNoiseChannel *dnch = &s->dnch[ch];
-            double *src = dnch->out_samples;
-            float *orig = (float *)in->extended_data[ch];
-            float *dst = (float *)out->extended_data[ch];
-
-            switch (s->output_mode) {
-            case IN_MODE:
-                for (int m = 0; m < s->sample_advance; m++)
-                    dst[m] = orig[m];
-                break;
-            case OUT_MODE:
-                for (int m = 0; m < s->sample_advance; m++)
-                    dst[m] = src[m];
-                break;
-            case NOISE_MODE:
-                for (int m = 0; m < s->sample_advance; m++)
-                    dst[m] = orig[m] - src[m];
-                break;
-            default:
-                return AVERROR_BUG;
-            }
-            memmove(src, src + s->sample_advance, (s->window_length - s->sample_advance) * sizeof(*src));
-            memset(src + (s->window_length - s->sample_advance), 0, s->sample_advance * sizeof(*src));
-        }
+    av_audio_fifo_drain(s->fifo, s->sample_advance);
+
+    out->pts = s->pts;
+    ret = ff_filter_frame(outlink, out);
+    if (ret < 0)
+        goto end;
+    s->pts += s->sample_advance;
+end:
+    av_frame_free(&in);
 
-        av_audio_fifo_drain(s->fifo, s->sample_advance);
+    return ret;
+}
 
-        out->pts = s->pts;
-        ret = ff_filter_frame(outlink, out);
+static int activate(AVFilterContext *ctx)
+{
+    AVFilterLink *inlink = ctx->inputs[0];
+    AVFilterLink *outlink = ctx->outputs[0];
+    AudioFFTDeNoiseContext *s = ctx->priv;
+    AVFrame *frame = NULL;
+    int ret;
+
+    FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
+
+    ret = ff_inlink_consume_frame(inlink, &frame);
+    if (ret < 0)
+        return ret;
+
+    if (ret > 0) {
+        if (s->pts == AV_NOPTS_VALUE)
+            s->pts = frame->pts;
+
+        ret = av_audio_fifo_write(s->fifo, (void **)frame->extended_data, frame->nb_samples);
+        av_frame_free(&frame);
         if (ret < 0)
-            break;
-        s->pts += s->sample_advance;
+            return ret;
     }
-    av_frame_free(&in);
 
-    return ret;
+    if (av_audio_fifo_size(s->fifo) >= s->window_length)
+        return output_frame(inlink);
+
+    FF_FILTER_FORWARD_STATUS(inlink, outlink);
+    if (ff_outlink_frame_wanted(outlink) &&
+        av_audio_fifo_size(s->fifo) < s->window_length) {
+        ff_inlink_request_frame(inlink);
+        return 0;
+    }
+
+    return FFERROR_NOT_READY;
 }
 
 static av_cold void uninit(AVFilterContext *ctx)
@@ -1352,7 +1381,8 @@ static int process_command(AVFilterContext *ctx, const char *cmd, const char *ar
         if (!strcmp(args, "start")) {
             s->sample_noise_start = 1;
             s->sample_noise_end = 0;
-        } else if (!strcmp(args, "end")) {
+        } else if (!strcmp(args, "end") ||
+                   !strcmp(args, "stop")) {
             s->sample_noise_start = 0;
             s->sample_noise_end = 1;
         }
@@ -1360,7 +1390,7 @@ static int process_command(AVFilterContext *ctx, const char *cmd, const char *ar
                !strcmp(cmd, "noise_reduction")) {
         float nr;
 
-        if (sscanf(args, "%f", &nr) == 1) {
+        if (av_sscanf(args, "%f", &nr) == 1) {
             s->noise_reduction = av_clipf(nr, 0.01, 97);
             need_reset = 1;
         }
@@ -1368,7 +1398,7 @@ static int process_command(AVFilterContext *ctx, const char *cmd, const char *ar
                !strcmp(cmd, "noise_floor")) {
         float nf;
 
-        if (sscanf(args, "%f", &nf) == 1) {
+        if (av_sscanf(args, "%f", &nf) == 1) {
             s->noise_floor = av_clipf(nf, -80, -20);
             need_reset = 1;
         }
@@ -1393,7 +1423,6 @@ static const AVFilterPad inputs[] = {
     {
         .name         = "default",
         .type         = AVMEDIA_TYPE_AUDIO,
-        .filter_frame = filter_frame,
         .config_props = config_input,
     },
     { NULL }
@@ -1413,6 +1442,7 @@ AVFilter ff_af_afftdn = {
     .query_formats   = query_formats,
     .priv_size       = sizeof(AudioFFTDeNoiseContext),
     .priv_class      = &afftdn_class,
+    .activate        = activate,
     .uninit          = uninit,
     .inputs          = inputs,
     .outputs         = outputs,
diff --git a/libavfilter/af_afftfilt.c b/libavfilter/af_afftfilt.c
index 7f28e1f77bcdf..8518f08dc5eb5 100644
--- a/libavfilter/af_afftfilt.c
+++ b/libavfilter/af_afftfilt.c
@@ -36,6 +36,7 @@ typedef struct AFFTFiltContext {
 
     FFTContext *fft, *ifft;
     FFTComplex **fft_data;
+    FFTComplex **fft_temp;
     int nb_exprs;
     int window_size;
     AVExpr **real;
@@ -51,15 +52,15 @@ typedef struct AFFTFiltContext {
     float *window_func_lut;
 } AFFTFiltContext;
 
-static const char *const var_names[] = {            "sr",     "b",       "nb",        "ch",        "chs",   "pts",        NULL };
-enum                                   { VAR_SAMPLE_RATE, VAR_BIN, VAR_NBBINS, VAR_CHANNEL, VAR_CHANNELS, VAR_PTS, VAR_VARS_NB };
+static const char *const var_names[] = {            "sr",     "b",       "nb",        "ch",        "chs",   "pts",     "re",     "im", NULL };
+enum                                   { VAR_SAMPLE_RATE, VAR_BIN, VAR_NBBINS, VAR_CHANNEL, VAR_CHANNELS, VAR_PTS, VAR_REAL, VAR_IMAG, VAR_VARS_NB };
 
 #define OFFSET(x) offsetof(AFFTFiltContext, x)
 #define A AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
 
 static const AVOption afftfilt_options[] = {
-    { "real", "set channels real expressions",       OFFSET(real_str), AV_OPT_TYPE_STRING, {.str = "1" }, 0, 0, A },
-    { "imag",  "set channels imaginary expressions", OFFSET(img_str),  AV_OPT_TYPE_STRING, {.str = NULL }, 0, 0, A },
+    { "real", "set channels real expressions",       OFFSET(real_str), AV_OPT_TYPE_STRING, {.str = "re" }, 0, 0, A },
+    { "imag", "set channels imaginary expressions",  OFFSET(img_str),  AV_OPT_TYPE_STRING, {.str = "im" }, 0, 0, A },
     { "win_size", "set window size", OFFSET(fft_bits), AV_OPT_TYPE_INT, {.i64=12}, 4, 17, A, "fft" },
         { "w16",    0, 0, AV_OPT_TYPE_CONST, {.i64=4},  0, 0, A, "fft" },
         { "w32",    0, 0, AV_OPT_TYPE_CONST, {.i64=5},  0, 0, A, "fft" },
@@ -81,13 +82,56 @@ static const AVOption afftfilt_options[] = {
         { "hann",     "Hann",             0, AV_OPT_TYPE_CONST, {.i64=WFUNC_HANNING},  0, 0, A, "win_func" },
         { "hanning",  "Hanning",          0, AV_OPT_TYPE_CONST, {.i64=WFUNC_HANNING},  0, 0, A, "win_func" },
         { "hamming",  "Hamming",          0, AV_OPT_TYPE_CONST, {.i64=WFUNC_HAMMING},  0, 0, A, "win_func" },
+        { "blackman", "Blackman",         0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BLACKMAN}, 0, 0, A, "win_func" },
+        { "welch",    "Welch",            0, AV_OPT_TYPE_CONST, {.i64=WFUNC_WELCH},    0, 0, A, "win_func" },
+        { "flattop",  "Flat-top",         0, AV_OPT_TYPE_CONST, {.i64=WFUNC_FLATTOP},  0, 0, A, "win_func" },
+        { "bharris",  "Blackman-Harris",  0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BHARRIS},  0, 0, A, "win_func" },
+        { "bnuttall", "Blackman-Nuttall", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BNUTTALL}, 0, 0, A, "win_func" },
+        { "bhann",    "Bartlett-Hann",    0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BHANN},    0, 0, A, "win_func" },
         { "sine",     "Sine",             0, AV_OPT_TYPE_CONST, {.i64=WFUNC_SINE},     0, 0, A, "win_func" },
+        { "nuttall",  "Nuttall",          0, AV_OPT_TYPE_CONST, {.i64=WFUNC_NUTTALL},  0, 0, A, "win_func" },
+        { "lanczos",  "Lanczos",          0, AV_OPT_TYPE_CONST, {.i64=WFUNC_LANCZOS},  0, 0, A, "win_func" },
+        { "gauss",    "Gauss",            0, AV_OPT_TYPE_CONST, {.i64=WFUNC_GAUSS},    0, 0, A, "win_func" },
+        { "tukey",    "Tukey",            0, AV_OPT_TYPE_CONST, {.i64=WFUNC_TUKEY},    0, 0, A, "win_func" },
+        { "dolph",    "Dolph-Chebyshev",  0, AV_OPT_TYPE_CONST, {.i64=WFUNC_DOLPH},    0, 0, A, "win_func" },
+        { "cauchy",   "Cauchy",           0, AV_OPT_TYPE_CONST, {.i64=WFUNC_CAUCHY},   0, 0, A, "win_func" },
+        { "parzen",   "Parzen",           0, AV_OPT_TYPE_CONST, {.i64=WFUNC_PARZEN},   0, 0, A, "win_func" },
+        { "poisson",  "Poisson",          0, AV_OPT_TYPE_CONST, {.i64=WFUNC_POISSON},  0, 0, A, "win_func" },
+        { "bohman",   "Bohman",           0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BOHMAN},   0, 0, A, "win_func" },
     { "overlap", "set window overlap", OFFSET(overlap), AV_OPT_TYPE_FLOAT, {.dbl=0.75}, 0,  1, A },
     { NULL },
 };
 
 AVFILTER_DEFINE_CLASS(afftfilt);
 
+static inline double getreal(void *priv, double x, double ch)
+{
+    AFFTFiltContext *s = priv;
+    int ich, ix;
+
+    ich = av_clip(ch, 0, s->nb_exprs - 1);
+    ix = av_clip(x, 0, s->window_size / 2);
+
+    return s->fft_data[ich][ix].re;
+}
+
+static inline double getimag(void *priv, double x, double ch)
+{
+    AFFTFiltContext *s = priv;
+    int ich, ix;
+
+    ich = av_clip(ch, 0, s->nb_exprs - 1);
+    ix = av_clip(x, 0, s->window_size / 2);
+
+    return s->fft_data[ich][ix].im;
+}
+
+static double realf(void *priv, double x, double ch) { return getreal(priv, x, ch); }
+static double imagf(void *priv, double x, double ch) { return getimag(priv, x, ch); }
+
+static const char *const func2_names[]    = { "real", "imag", NULL };
+double (*func2[])(void *, double, double) = {  realf,  imagf, NULL };
+
 static int config_input(AVFilterLink *inlink)
 {
     AVFilterContext *ctx = inlink->dst;
@@ -98,6 +142,7 @@ static int config_input(AVFilterLink *inlink)
     char *args;
     const char *last_expr = "1";
 
+    s->pts  = AV_NOPTS_VALUE;
     s->fft  = av_fft_init(s->fft_bits, 0);
     s->ifft = av_fft_init(s->fft_bits, 1);
     if (!s->fft || !s->ifft)
@@ -109,12 +154,22 @@ static int config_input(AVFilterLink *inlink)
     if (!s->fft_data)
         return AVERROR(ENOMEM);
 
+    s->fft_temp = av_calloc(inlink->channels, sizeof(*s->fft_temp));
+    if (!s->fft_temp)
+        return AVERROR(ENOMEM);
+
     for (ch = 0; ch < inlink->channels; ch++) {
         s->fft_data[ch] = av_calloc(s->window_size, sizeof(**s->fft_data));
         if (!s->fft_data[ch])
             return AVERROR(ENOMEM);
     }
 
+    for (ch = 0; ch < inlink->channels; ch++) {
+        s->fft_temp[ch] = av_calloc(s->window_size, sizeof(**s->fft_temp));
+        if (!s->fft_temp[ch])
+            return AVERROR(ENOMEM);
+    }
+
     s->real = av_calloc(inlink->channels, sizeof(*s->real));
     if (!s->real)
         return AVERROR(ENOMEM);
@@ -131,7 +186,7 @@ static int config_input(AVFilterLink *inlink)
         char *arg = av_strtok(ch == 0 ? args : NULL, "|", &saveptr);
 
         ret = av_expr_parse(&s->real[ch], arg ? arg : last_expr, var_names,
-                            NULL, NULL, NULL, NULL, 0, ctx);
+                            NULL, NULL, func2_names, func2, 0, ctx);
         if (ret < 0)
             break;
         if (arg)
@@ -149,7 +204,7 @@ static int config_input(AVFilterLink *inlink)
         char *arg = av_strtok(ch == 0 ? args : NULL, "|", &saveptr);
 
         ret = av_expr_parse(&s->imag[ch], arg ? arg : last_expr, var_names,
-                            NULL, NULL, NULL, NULL, 0, ctx);
+                            NULL, NULL, func2_names, func2, 0, ctx);
         if (ret < 0)
             break;
         if (arg)
@@ -197,6 +252,9 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
     int ch, n, ret, i, j, k;
     int start = s->start, end = s->end;
 
+    if (s->pts == AV_NOPTS_VALUE)
+        s->pts = frame->pts;
+
     ret = av_audio_fifo_write(s->fifo, (void **)frame->extended_data, frame->nb_samples);
     av_frame_free(&frame);
     if (ret < 0)
@@ -235,43 +293,49 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
 
         for (ch = 0; ch < inlink->channels; ch++) {
             FFTComplex *fft_data = s->fft_data[ch];
-            float *buf = (float *)s->buffer->extended_data[ch];
-            int x;
-
-            values[VAR_CHANNEL] = ch;
 
             av_fft_permute(s->fft, fft_data);
             av_fft_calc(s->fft, fft_data);
+        }
+
+        for (ch = 0; ch < inlink->channels; ch++) {
+            FFTComplex *fft_data = s->fft_data[ch];
+            FFTComplex *fft_temp = s->fft_temp[ch];
+            float *buf = (float *)s->buffer->extended_data[ch];
+            int x;
+            values[VAR_CHANNEL] = ch;
 
-            for (n = 0; n < window_size / 2; n++) {
+            for (n = 0; n <= window_size / 2; n++) {
                 float fr, fi;
 
                 values[VAR_BIN] = n;
+                values[VAR_REAL] = fft_data[n].re;
+                values[VAR_IMAG] = fft_data[n].im;
 
                 fr = av_expr_eval(s->real[ch], values, s);
                 fi = av_expr_eval(s->imag[ch], values, s);
 
-                fft_data[n].re *= fr;
-                fft_data[n].im *= fi;
+                fft_temp[n].re = fr;
+                fft_temp[n].im = fi;
             }
 
             for (n = window_size / 2 + 1, x = window_size / 2 - 1; n < window_size; n++, x--) {
-                fft_data[n].re =  fft_data[x].re;
-                fft_data[n].im = -fft_data[x].im;
+                fft_temp[n].re =  fft_temp[x].re;
+                fft_temp[n].im = -fft_temp[x].im;
             }
 
-            av_fft_permute(s->ifft, fft_data);
-            av_fft_calc(s->ifft, fft_data);
+            av_fft_permute(s->ifft, fft_temp);
+            av_fft_calc(s->ifft, fft_temp);
 
             start = s->start;
             end = s->end;
             k = end;
             for (i = 0, j = start; j < k && i < window_size; i++, j++) {
-                buf[j] += s->fft_data[ch][i].re * f;
+                buf[j] += s->fft_temp[ch][i].re * f;
             }
 
             for (; i < window_size; i++, j++) {
-                buf[j] = s->fft_data[ch][i].re * f;
+                buf[j] = s->fft_temp[ch][i].re * f;
             }
 
             start += s->hop_size;
@@ -362,8 +426,11 @@ static av_cold void uninit(AVFilterContext *ctx)
     for (i = 0; i < s->nb_exprs; i++) {
         if (s->fft_data)
             av_freep(&s->fft_data[i]);
+        if (s->fft_temp)
+            av_freep(&s->fft_temp[i]);
     }
     av_freep(&s->fft_data);
+    av_freep(&s->fft_temp);
 
     for (i = 0; i < s->nb_exprs; i++) {
         av_expr_free(s->real[i]);
diff --git a/libavfilter/af_afir.c b/libavfilter/af_afir.c
index 244da3ab4cff3..31919f62e9c93 100644
--- a/libavfilter/af_afir.c
+++ b/libavfilter/af_afir.c
@@ -56,61 +56,107 @@ static void fcmul_add_c(float *sum, const float *t, const float *c, ptrdiff_t le
     sum[2 * n] += t[2 * n] * c[2 * n];
 }
 
-static int fir_channel(AVFilterContext *ctx, void *arg, int ch, int nb_jobs)
+static int fir_quantum(AVFilterContext *ctx, AVFrame *out, int ch, int offset)
 {
     AudioFIRContext *s = ctx->priv;
-    const float *src = (const float *)s->in[0]->extended_data[ch];
-    int index1 = (s->index + 1) % 3;
-    int index2 = (s->index + 2) % 3;
-    float *sum = s->sum[ch];
-    AVFrame *out = arg;
-    float *block;
-    float *dst;
+    const float *in = (const float *)s->in[0]->extended_data[ch] + offset;
+    float *block, *buf, *ptr = (float *)out->extended_data[ch] + offset;
+    const int nb_samples = FFMIN(s->min_part_size, out->nb_samples - offset);
     int n, i, j;
 
-    memset(sum, 0, sizeof(*sum) * s->fft_length);
-    block = s->block[ch] + s->part_index * s->block_size;
-    memset(block, 0, sizeof(*block) * s->fft_length);
+    for (int segment = 0; segment < s->nb_segments; segment++) {
+        AudioFIRSegment *seg = &s->seg[segment];
+        float *src = (float *)seg->input->extended_data[ch];
+        float *dst = (float *)seg->output->extended_data[ch];
+        float *sum = (float *)seg->sum->extended_data[ch];
 
-    s->fdsp->vector_fmul_scalar(block + s->part_size, src, s->dry_gain, FFALIGN(s->nb_samples, 4));
-    emms_c();
+        s->fdsp->vector_fmul_scalar(src + seg->input_offset, in, s->dry_gain, FFALIGN(nb_samples, 4));
+        emms_c();
+
+        seg->output_offset[ch] += s->min_part_size;
+        if (seg->output_offset[ch] == seg->part_size) {
+            seg->output_offset[ch] = 0;
+        } else {
+            memmove(src, src + s->min_part_size, (seg->input_size - s->min_part_size) * sizeof(*src));
 
-    av_rdft_calc(s->rdft[ch], block);
-    block[2 * s->part_size] = block[1];
-    block[1] = 0;
+            dst += seg->output_offset[ch];
+            for (n = 0; n < nb_samples; n++) {
+                ptr[n] += dst[n];
+            }
+            continue;
+        }
 
-    j = s->part_index;
+        memset(sum, 0, sizeof(*sum) * seg->fft_length);
+        block = (float *)seg->block->extended_data[ch] + seg->part_index[ch] * seg->block_size;
+        memset(block + seg->part_size, 0, sizeof(*block) * (seg->fft_length - seg->part_size));
 
-    for (i = 0; i < s->nb_partitions; i++) {
-        const int coffset = i * s->coeff_size;
-        const FFTComplex *coeff = s->coeff[ch * !s->one2many] + coffset;
+        memcpy(block, src, sizeof(*src) * seg->part_size);
 
-        block = s->block[ch] + j * s->block_size;
-        s->fcmul_add(sum, block, (const float *)coeff, s->part_size);
+        av_rdft_calc(seg->rdft[ch], block);
+        block[2 * seg->part_size] = block[1];
+        block[1] = 0;
 
-        if (j == 0)
-            j = s->nb_partitions;
-        j--;
-    }
+        j = seg->part_index[ch];
+
+        for (i = 0; i < seg->nb_partitions; i++) {
+            const int coffset = j * seg->coeff_size;
+            const float *block = (const float *)seg->block->extended_data[ch] + i * seg->block_size;
+            const FFTComplex *coeff = (const FFTComplex *)seg->coeff->extended_data[ch * !s->one2many] + coffset;
+
+            s->afirdsp.fcmul_add(sum, block, (const float *)coeff, seg->part_size);
+
+            if (j == 0)
+                j = seg->nb_partitions;
+            j--;
+        }
+
+        sum[1] = sum[2 * seg->part_size];
+        av_rdft_calc(seg->irdft[ch], sum);
+
+        buf = (float *)seg->buffer->extended_data[ch];
+        for (n = 0; n < seg->part_size; n++) {
+            buf[n] += sum[n];
+        }
+
+        memcpy(dst, buf, seg->part_size * sizeof(*dst));
 
-    sum[1] = sum[2 * s->part_size];
-    av_rdft_calc(s->irdft[ch], sum);
+        buf = (float *)seg->buffer->extended_data[ch];
+        memcpy(buf, sum + seg->part_size, seg->part_size * sizeof(*buf));
 
-    dst = (float *)s->buffer->extended_data[ch] + index1 * s->part_size;
-    for (n = 0; n < s->part_size; n++) {
-        dst[n] += sum[n];
+        seg->part_index[ch] = (seg->part_index[ch] + 1) % seg->nb_partitions;
+
+        memmove(src, src + s->min_part_size, (seg->input_size - s->min_part_size) * sizeof(*src));
+
+        for (n = 0; n < nb_samples; n++) {
+            ptr[n] += dst[n];
+        }
     }
 
-    dst = (float *)s->buffer->extended_data[ch] + index2 * s->part_size;
+    s->fdsp->vector_fmul_scalar(ptr, ptr, s->wet_gain, FFALIGN(nb_samples, 4));
+    emms_c();
+
+    return 0;
+}
+
+static int fir_channel(AVFilterContext *ctx, AVFrame *out, int ch)
+{
+    AudioFIRContext *s = ctx->priv;
 
-    memcpy(dst, sum + s->part_size, s->part_size * sizeof(*dst));
+    for (int offset = 0; offset < out->nb_samples; offset += s->min_part_size) {
+        fir_quantum(ctx, out, ch, offset);
+    }
 
-    dst = (float *)s->buffer->extended_data[ch] + s->index * s->part_size;
+    return 0;
+}
 
-    if (out) {
-        float *ptr = (float *)out->extended_data[ch];
-        s->fdsp->vector_fmul_scalar(ptr, dst, s->wet_gain, FFALIGN(out->nb_samples, 4));
-        emms_c();
+static int fir_channels(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    AVFrame *out = arg;
+    const int start = (out->channels * jobnr) / nb_jobs;
+    const int end = (out->channels * (jobnr+1)) / nb_jobs;
+
+    for (int ch = start; ch < end; ch++) {
+        fir_channel(ctx, out, ch);
     }
 
     return 0;
@@ -120,43 +166,27 @@ static int fir_frame(AudioFIRContext *s, AVFrame *in, AVFilterLink *outlink)
 {
     AVFilterContext *ctx = outlink->src;
     AVFrame *out = NULL;
-    int ret;
 
-    s->nb_samples = in->nb_samples;
-
-    if (!s->want_skip) {
-        out = ff_get_audio_buffer(outlink, s->nb_samples);
-        if (!out)
-            return AVERROR(ENOMEM);
+    out = ff_get_audio_buffer(outlink, in->nb_samples);
+    if (!out) {
+        av_frame_free(&in);
+        return AVERROR(ENOMEM);
     }
 
     if (s->pts == AV_NOPTS_VALUE)
         s->pts = in->pts;
     s->in[0] = in;
-    ctx->internal->execute(ctx, fir_channel, out, NULL, outlink->channels);
+    ctx->internal->execute(ctx, fir_channels, out, NULL, FFMIN(outlink->channels,
+                                                               ff_filter_get_nb_threads(ctx)));
 
-    s->part_index = (s->part_index + 1) % s->nb_partitions;
-
-    if (!s->want_skip) {
-        out->pts = s->pts;
-        if (s->pts != AV_NOPTS_VALUE)
-            s->pts += av_rescale_q(out->nb_samples, (AVRational){1, outlink->sample_rate}, outlink->time_base);
-    }
-
-    s->index++;
-    if (s->index == 3)
-        s->index = 0;
+    out->pts = s->pts;
+    if (s->pts != AV_NOPTS_VALUE)
+        s->pts += av_rescale_q(out->nb_samples, (AVRational){1, outlink->sample_rate}, outlink->time_base);
 
     av_frame_free(&in);
+    s->in[0] = NULL;
 
-    if (s->want_skip == 1) {
-        s->want_skip = 0;
-        ret = 0;
-    } else {
-        ret = ff_filter_frame(outlink, out);
-    }
-
-    return ret;
+    return ff_filter_frame(outlink, out);
 }
 
 static void drawtext(AVFrame *pic, int x, int y, const char *txt, uint32_t color)
@@ -211,8 +241,9 @@ static void draw_line(AVFrame *out, int x0, int y0, int x1, int y1, uint32_t col
 static void draw_response(AVFilterContext *ctx, AVFrame *out)
 {
     AudioFIRContext *s = ctx->priv;
-    float *mag, *phase, min = FLT_MAX, max = FLT_MIN;
-    int prev_ymag = -1, prev_yphase = -1;
+    float *mag, *phase, *delay, min = FLT_MAX, max = FLT_MIN;
+    float min_delay = FLT_MAX, max_delay = FLT_MIN;
+    int prev_ymag = -1, prev_yphase = -1, prev_ydelay = -1;
     char text[32];
     int channel, i, x;
 
@@ -220,44 +251,56 @@ static void draw_response(AVFilterContext *ctx, AVFrame *out)
 
     phase = av_malloc_array(s->w, sizeof(*phase));
     mag = av_malloc_array(s->w, sizeof(*mag));
-    if (!mag || !phase)
+    delay = av_malloc_array(s->w, sizeof(*delay));
+    if (!mag || !phase || !delay)
         goto end;
 
     channel = av_clip(s->ir_channel, 0, s->in[1]->channels - 1);
     for (i = 0; i < s->w; i++) {
         const float *src = (const float *)s->in[1]->extended_data[channel];
         double w = i * M_PI / (s->w - 1);
-        double real = 0.;
-        double imag = 0.;
+        double div, real_num = 0., imag_num = 0., real = 0., imag = 0.;
 
         for (x = 0; x < s->nb_taps; x++) {
             real += cos(-x * w) * src[x];
             imag += sin(-x * w) * src[x];
+            real_num += cos(-x * w) * src[x] * x;
+            imag_num += sin(-x * w) * src[x] * x;
         }
 
         mag[i] = hypot(real, imag);
         phase[i] = atan2(imag, real);
+        div = real * real + imag * imag;
+        delay[i] = (real_num * real + imag_num * imag) / div;
         min = fminf(min, mag[i]);
         max = fmaxf(max, mag[i]);
+        min_delay = fminf(min_delay, delay[i]);
+        max_delay = fmaxf(max_delay, delay[i]);
     }
 
     for (i = 0; i < s->w; i++) {
         int ymag = mag[i] / max * (s->h - 1);
+        int ydelay = (delay[i] - min_delay) / (max_delay - min_delay) * (s->h - 1);
         int yphase = (0.5 * (1. + phase[i] / M_PI)) * (s->h - 1);
 
         ymag = s->h - 1 - av_clip(ymag, 0, s->h - 1);
         yphase = s->h - 1 - av_clip(yphase, 0, s->h - 1);
+        ydelay = s->h - 1 - av_clip(ydelay, 0, s->h - 1);
 
         if (prev_ymag < 0)
             prev_ymag = ymag;
         if (prev_yphase < 0)
             prev_yphase = yphase;
+        if (prev_ydelay < 0)
+            prev_ydelay = ydelay;
 
         draw_line(out, i,   ymag, FFMAX(i - 1, 0),   prev_ymag, 0xFFFF00FF);
         draw_line(out, i, yphase, FFMAX(i - 1, 0), prev_yphase, 0xFF00FF00);
+        draw_line(out, i, ydelay, FFMAX(i - 1, 0), prev_ydelay, 0xFF00FFFF);
 
         prev_ymag   = ymag;
         prev_yphase = yphase;
+        prev_ydelay = ydelay;
     }
 
     if (s->w > 400 && s->h > 100) {
@@ -268,61 +311,98 @@ static void draw_response(AVFilterContext *ctx, AVFrame *out)
         drawtext(out, 2, 12, "Min Magnitude:", 0xDDDDDDDD);
         snprintf(text, sizeof(text), "%.2f", min);
         drawtext(out, 15 * 8 + 2, 12, text, 0xDDDDDDDD);
+
+        drawtext(out, 2, 22, "Max Delay:", 0xDDDDDDDD);
+        snprintf(text, sizeof(text), "%.2f", max_delay);
+        drawtext(out, 11 * 8 + 2, 22, text, 0xDDDDDDDD);
+
+        drawtext(out, 2, 32, "Min Delay:", 0xDDDDDDDD);
+        snprintf(text, sizeof(text), "%.2f", min_delay);
+        drawtext(out, 11 * 8 + 2, 32, text, 0xDDDDDDDD);
     }
 
 end:
+    av_free(delay);
     av_free(phase);
     av_free(mag);
 }
 
+static int init_segment(AVFilterContext *ctx, AudioFIRSegment *seg,
+                        int offset, int nb_partitions, int part_size)
+{
+    AudioFIRContext *s = ctx->priv;
+
+    seg->rdft  = av_calloc(ctx->inputs[0]->channels, sizeof(*seg->rdft));
+    seg->irdft = av_calloc(ctx->inputs[0]->channels, sizeof(*seg->irdft));
+    if (!seg->rdft || !seg->irdft)
+        return AVERROR(ENOMEM);
+
+    seg->fft_length    = part_size * 2 + 1;
+    seg->part_size     = part_size;
+    seg->block_size    = FFALIGN(seg->fft_length, 32);
+    seg->coeff_size    = FFALIGN(seg->part_size + 1, 32);
+    seg->nb_partitions = nb_partitions;
+    seg->input_size    = offset + s->min_part_size;
+    seg->input_offset  = offset;
+
+    seg->part_index    = av_calloc(ctx->inputs[0]->channels, sizeof(*seg->part_index));
+    seg->output_offset = av_calloc(ctx->inputs[0]->channels, sizeof(*seg->output_offset));
+    if (!seg->part_index || !seg->output_offset)
+        return AVERROR(ENOMEM);
+
+    for (int ch = 0; ch < ctx->inputs[0]->channels; ch++) {
+        seg->rdft[ch]  = av_rdft_init(av_log2(2 * part_size), DFT_R2C);
+        seg->irdft[ch] = av_rdft_init(av_log2(2 * part_size), IDFT_C2R);
+        if (!seg->rdft[ch] || !seg->irdft[ch])
+            return AVERROR(ENOMEM);
+    }
+
+    seg->sum    = ff_get_audio_buffer(ctx->inputs[0], seg->fft_length);
+    seg->block  = ff_get_audio_buffer(ctx->inputs[0], seg->nb_partitions * seg->block_size);
+    seg->buffer = ff_get_audio_buffer(ctx->inputs[0], seg->part_size);
+    seg->coeff  = ff_get_audio_buffer(ctx->inputs[1], seg->nb_partitions * seg->coeff_size * 2);
+    seg->input  = ff_get_audio_buffer(ctx->inputs[0], seg->input_size);
+    seg->output = ff_get_audio_buffer(ctx->inputs[0], seg->part_size);
+    if (!seg->buffer || !seg->sum || !seg->block || !seg->coeff || !seg->input || !seg->output)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
 static int convert_coeffs(AVFilterContext *ctx)
 {
     AudioFIRContext *s = ctx->priv;
-    int ret, i, ch, n, N;
+    int left, offset = 0, part_size, max_part_size;
+    int ret, i, ch, n;
     float power = 0;
 
     s->nb_taps = ff_inlink_queued_samples(ctx->inputs[1]);
     if (s->nb_taps <= 0)
         return AVERROR(EINVAL);
 
-    for (n = 4; (1 << n) < s->nb_taps; n++);
-    N = FFMIN(n, 16);
-    s->ir_length = 1 << n;
-    s->fft_length = (1 << (N + 1)) + 1;
-    s->part_size = 1 << (N - 1);
-    s->block_size = FFALIGN(s->fft_length, 32);
-    s->coeff_size = FFALIGN(s->part_size + 1, 32);
-    s->nb_partitions = (s->nb_taps + s->part_size - 1) / s->part_size;
-    s->nb_coeffs = s->ir_length + s->nb_partitions;
-
-    for (ch = 0; ch < ctx->inputs[0]->channels; ch++) {
-        s->sum[ch] = av_calloc(s->fft_length, sizeof(**s->sum));
-        if (!s->sum[ch])
-            return AVERROR(ENOMEM);
+    if (s->minp > s->maxp) {
+        s->maxp = s->minp;
     }
 
-    for (ch = 0; ch < ctx->inputs[1]->channels; ch++) {
-        s->coeff[ch] = av_calloc(s->nb_partitions * s->coeff_size, sizeof(**s->coeff));
-        if (!s->coeff[ch])
-            return AVERROR(ENOMEM);
-    }
+    left = s->nb_taps;
+    part_size = 1 << av_log2(s->minp);
+    max_part_size = 1 << av_log2(s->maxp);
 
-    for (ch = 0; ch < ctx->inputs[0]->channels; ch++) {
-        s->block[ch] = av_calloc(s->nb_partitions * s->block_size, sizeof(**s->block));
-        if (!s->block[ch])
-            return AVERROR(ENOMEM);
-    }
+    s->min_part_size = part_size;
 
-    for (ch = 0; ch < ctx->inputs[0]->channels; ch++) {
-        s->rdft[ch]  = av_rdft_init(N, DFT_R2C);
-        s->irdft[ch] = av_rdft_init(N, IDFT_C2R);
-        if (!s->rdft[ch] || !s->irdft[ch])
-            return AVERROR(ENOMEM);
-    }
+    for (i = 0; left > 0; i++) {
+        int step = part_size == max_part_size ? INT_MAX : 1 + (i == 0);
+        int nb_partitions = FFMIN(step, (left + part_size - 1) / part_size);
 
-    s->buffer = ff_get_audio_buffer(ctx->inputs[0], s->part_size * 3);
-    if (!s->buffer)
-        return AVERROR(ENOMEM);
+        s->nb_segments = i + 1;
+        ret = init_segment(ctx, &s->seg[i], offset, nb_partitions, part_size);
+        if (ret < 0)
+            return ret;
+        offset += nb_partitions * part_size;
+        left -= nb_partitions * part_size;
+        part_size *= 2;
+        part_size = FFMIN(part_size, max_part_size);
+    }
 
     ret = ff_inlink_consume_samples(ctx->inputs[1], s->nb_taps, s->nb_taps, &s->in[1]);
     if (ret < 0)
@@ -337,7 +417,7 @@ static int convert_coeffs(AVFilterContext *ctx)
 
     switch (s->gtype) {
     case -1:
-        /* nothinkg to do */
+        /* nothing to do */
         break;
     case 0:
         for (ch = 0; ch < ctx->inputs[1]->channels; ch++) {
@@ -378,44 +458,59 @@ static int convert_coeffs(AVFilterContext *ctx)
         s->fdsp->vector_fmul_scalar(time, time, s->gain, FFALIGN(s->nb_taps, 4));
     }
 
+    av_log(ctx, AV_LOG_DEBUG, "nb_taps: %d\n", s->nb_taps);
+    av_log(ctx, AV_LOG_DEBUG, "nb_segments: %d\n", s->nb_segments);
+
     for (ch = 0; ch < ctx->inputs[1]->channels; ch++) {
         float *time = (float *)s->in[1]->extended_data[!s->one2many * ch];
-        float *block = s->block[ch];
-        FFTComplex *coeff = s->coeff[ch];
+        int toffset = 0;
 
         for (i = FFMAX(1, s->length * s->nb_taps); i < s->nb_taps; i++)
             time[i] = 0;
 
-        for (i = 0; i < s->nb_partitions; i++) {
-            const float scale = 1.f / s->part_size;
-            const int toffset = i * s->part_size;
-            const int coffset = i * s->coeff_size;
-            const int boffset = s->part_size;
-            const int remaining = s->nb_taps - (i * s->part_size);
-            const int size = remaining >= s->part_size ? s->part_size : remaining;
+        av_log(ctx, AV_LOG_DEBUG, "channel: %d\n", ch);
 
-            memset(block, 0, sizeof(*block) * s->fft_length);
-            memcpy(block + boffset, time + toffset, size * sizeof(*block));
+        for (int segment = 0; segment < s->nb_segments; segment++) {
+            AudioFIRSegment *seg = &s->seg[segment];
+            float *block = (float *)seg->block->extended_data[ch];
+            FFTComplex *coeff = (FFTComplex *)seg->coeff->extended_data[ch];
 
-            av_rdft_calc(s->rdft[0], block);
+            av_log(ctx, AV_LOG_DEBUG, "segment: %d\n", segment);
 
-            coeff[coffset].re = block[0] * scale;
-            coeff[coffset].im = 0;
-            for (n = 1; n < s->part_size; n++) {
-                coeff[coffset + n].re = block[2 * n] * scale;
-                coeff[coffset + n].im = block[2 * n + 1] * scale;
+            for (i = 0; i < seg->nb_partitions; i++) {
+                const float scale = 1.f / seg->part_size;
+                const int coffset = i * seg->coeff_size;
+                const int remaining = s->nb_taps - toffset;
+                const int size = remaining >= seg->part_size ? seg->part_size : remaining;
+
+                memset(block, 0, sizeof(*block) * seg->fft_length);
+                memcpy(block, time + toffset, size * sizeof(*block));
+
+                av_rdft_calc(seg->rdft[0], block);
+
+                coeff[coffset].re = block[0] * scale;
+                coeff[coffset].im = 0;
+                for (n = 1; n < seg->part_size; n++) {
+                    coeff[coffset + n].re = block[2 * n] * scale;
+                    coeff[coffset + n].im = block[2 * n + 1] * scale;
+                }
+                coeff[coffset + seg->part_size].re = block[1] * scale;
+                coeff[coffset + seg->part_size].im = 0;
+
+                toffset += size;
             }
-            coeff[coffset + s->part_size].re = block[1] * scale;
-            coeff[coffset + s->part_size].im = 0;
+
+            av_log(ctx, AV_LOG_DEBUG, "nb_partitions: %d\n", seg->nb_partitions);
+            av_log(ctx, AV_LOG_DEBUG, "partition size: %d\n", seg->part_size);
+            av_log(ctx, AV_LOG_DEBUG, "block size: %d\n", seg->block_size);
+            av_log(ctx, AV_LOG_DEBUG, "fft_length: %d\n", seg->fft_length);
+            av_log(ctx, AV_LOG_DEBUG, "coeff_size: %d\n", seg->coeff_size);
+            av_log(ctx, AV_LOG_DEBUG, "input_size: %d\n", seg->input_size);
+            av_log(ctx, AV_LOG_DEBUG, "input_offset: %d\n", seg->input_offset);
         }
     }
 
     av_frame_free(&s->in[1]);
-    av_log(ctx, AV_LOG_DEBUG, "nb_taps: %d\n", s->nb_taps);
-    av_log(ctx, AV_LOG_DEBUG, "nb_partitions: %d\n", s->nb_partitions);
-    av_log(ctx, AV_LOG_DEBUG, "partition size: %d\n", s->part_size);
-    av_log(ctx, AV_LOG_DEBUG, "ir_length: %d\n", s->ir_length);
-
     s->have_coeffs = 1;
 
     return 0;
@@ -441,8 +536,8 @@ static int activate(AVFilterContext *ctx)
 {
     AudioFIRContext *s = ctx->priv;
     AVFilterLink *outlink = ctx->outputs[0];
+    int ret, status, available, wanted;
     AVFrame *in = NULL;
-    int ret, status;
     int64_t pts;
 
     FF_FILTER_FORWARD_STATUS_BACK_ALL(ctx->outputs[0], ctx);
@@ -461,6 +556,8 @@ static int activate(AVFilterContext *ctx)
         if (!s->eof_coeffs) {
             if (ff_outlink_frame_wanted(ctx->outputs[0]))
                 ff_inlink_request_frame(ctx->inputs[1]);
+            else if (s->response && ff_outlink_frame_wanted(ctx->outputs[1]))
+                ff_inlink_request_frame(ctx->inputs[1]);
             return 0;
         }
     }
@@ -471,34 +568,30 @@ static int activate(AVFilterContext *ctx)
             return ret;
     }
 
-    if (s->need_padding) {
-        in = ff_get_audio_buffer(outlink, s->part_size);
-        if (!in)
-            return AVERROR(ENOMEM);
-        s->need_padding = 0;
-        ret = 1;
-    } else {
-        ret = ff_inlink_consume_samples(ctx->inputs[0], s->part_size, s->part_size, &in);
-    }
-
-    if (ret > 0) {
+    available = ff_inlink_queued_samples(ctx->inputs[0]);
+    wanted = FFMAX(s->min_part_size, (available / s->min_part_size) * s->min_part_size);
+    ret = ff_inlink_consume_samples(ctx->inputs[0], wanted, wanted, &in);
+    if (ret > 0)
         ret = fir_frame(s, in, outlink);
-        if (ret < 0)
-            return ret;
-    }
 
     if (ret < 0)
         return ret;
 
     if (s->response && s->have_coeffs) {
-        if (ff_outlink_frame_wanted(ctx->outputs[1])) {
-            s->video->pts = s->pts;
-            ret = ff_filter_frame(ctx->outputs[1], av_frame_clone(s->video));
-            if (ret < 0)
-                return ret;
+        int64_t old_pts = s->video->pts;
+        int64_t new_pts = av_rescale_q(s->pts, ctx->inputs[0]->time_base, ctx->outputs[1]->time_base);
+
+        if (ff_outlink_frame_wanted(ctx->outputs[1]) && old_pts < new_pts) {
+            s->video->pts = new_pts;
+            return ff_filter_frame(ctx->outputs[1], av_frame_clone(s->video));
         }
     }
 
+    if (ff_inlink_queued_samples(ctx->inputs[0]) >= s->min_part_size) {
+        ff_filter_set_ready(ctx, 10);
+        return 0;
+    }
+
     if (ff_inlink_acknowledge_status(ctx->inputs[0], &status, &pts)) {
         if (status == AVERROR_EOF) {
             ff_outlink_set_status(ctx->outputs[0], status, pts);
@@ -508,17 +601,20 @@ static int activate(AVFilterContext *ctx)
         }
     }
 
-    if (ff_outlink_frame_wanted(ctx->outputs[0])) {
+    if (ff_outlink_frame_wanted(ctx->outputs[0]) &&
+        !ff_outlink_get_status(ctx->inputs[0])) {
         ff_inlink_request_frame(ctx->inputs[0]);
         return 0;
     }
 
-    if (s->response && ff_outlink_frame_wanted(ctx->outputs[1])) {
+    if (s->response &&
+        ff_outlink_frame_wanted(ctx->outputs[1]) &&
+        !ff_outlink_get_status(ctx->inputs[0])) {
         ff_inlink_request_frame(ctx->inputs[0]);
         return 0;
     }
 
-    return 0;
+    return FFERROR_NOT_READY;
 }
 
 static int query_formats(AVFilterContext *ctx)
@@ -585,67 +681,53 @@ static int config_output(AVFilterLink *outlink)
     outlink->channel_layout = ctx->inputs[0]->channel_layout;
     outlink->channels = ctx->inputs[0]->channels;
 
-    s->sum = av_calloc(outlink->channels, sizeof(*s->sum));
-    s->coeff = av_calloc(ctx->inputs[1]->channels, sizeof(*s->coeff));
-    s->block = av_calloc(ctx->inputs[0]->channels, sizeof(*s->block));
-    s->rdft = av_calloc(outlink->channels, sizeof(*s->rdft));
-    s->irdft = av_calloc(outlink->channels, sizeof(*s->irdft));
-    if (!s->sum || !s->coeff || !s->block || !s->rdft || !s->irdft)
-        return AVERROR(ENOMEM);
-
     s->nb_channels = outlink->channels;
     s->nb_coef_channels = ctx->inputs[1]->channels;
-    s->want_skip = 1;
-    s->need_padding = 1;
     s->pts = AV_NOPTS_VALUE;
 
     return 0;
 }
 
-static av_cold void uninit(AVFilterContext *ctx)
+static void uninit_segment(AVFilterContext *ctx, AudioFIRSegment *seg)
 {
     AudioFIRContext *s = ctx->priv;
-    int ch;
 
-    if (s->sum) {
-        for (ch = 0; ch < s->nb_channels; ch++) {
-            av_freep(&s->sum[ch]);
+    if (seg->rdft) {
+        for (int ch = 0; ch < s->nb_channels; ch++) {
+            av_rdft_end(seg->rdft[ch]);
         }
     }
-    av_freep(&s->sum);
+    av_freep(&seg->rdft);
 
-    if (s->coeff) {
-        for (ch = 0; ch < s->nb_coef_channels; ch++) {
-            av_freep(&s->coeff[ch]);
+    if (seg->irdft) {
+        for (int ch = 0; ch < s->nb_channels; ch++) {
+            av_rdft_end(seg->irdft[ch]);
         }
     }
-    av_freep(&s->coeff);
+    av_freep(&seg->irdft);
 
-    if (s->block) {
-        for (ch = 0; ch < s->nb_channels; ch++) {
-            av_freep(&s->block[ch]);
-        }
-    }
-    av_freep(&s->block);
+    av_freep(&seg->output_offset);
+    av_freep(&seg->part_index);
 
-    if (s->rdft) {
-        for (ch = 0; ch < s->nb_channels; ch++) {
-            av_rdft_end(s->rdft[ch]);
-        }
-    }
-    av_freep(&s->rdft);
+    av_frame_free(&seg->block);
+    av_frame_free(&seg->sum);
+    av_frame_free(&seg->buffer);
+    av_frame_free(&seg->coeff);
+    av_frame_free(&seg->input);
+    av_frame_free(&seg->output);
+    seg->input_size = 0;
+}
 
-    if (s->irdft) {
-        for (ch = 0; ch < s->nb_channels; ch++) {
-            av_rdft_end(s->irdft[ch]);
-        }
-    }
-    av_freep(&s->irdft);
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    AudioFIRContext *s = ctx->priv;
 
-    av_frame_free(&s->in[1]);
-    av_frame_free(&s->buffer);
+    for (int i = 0; i < s->nb_segments; i++) {
+        uninit_segment(ctx, &s->seg[i]);
+    }
 
     av_freep(&s->fdsp);
+    av_frame_free(&s->in[1]);
 
     for (int i = 0; i < ctx->nb_outputs; i++)
         av_freep(&ctx->output_pads[i].name);
@@ -660,6 +742,8 @@ static int config_video(AVFilterLink *outlink)
     outlink->sample_aspect_ratio = (AVRational){1,1};
     outlink->w = s->w;
     outlink->h = s->h;
+    outlink->frame_rate = s->frame_rate;
+    outlink->time_base = av_inv_q(outlink->frame_rate);
 
     av_frame_free(&s->video);
     s->video = ff_get_video_buffer(outlink, outlink->w, outlink->h);
@@ -669,6 +753,14 @@ static int config_video(AVFilterLink *outlink)
     return 0;
 }
 
+void ff_afir_init(AudioFIRDSPContext *dsp)
+{
+    dsp->fcmul_add = fcmul_add_c;
+
+    if (ARCH_X86)
+        ff_afir_init_x86(dsp);
+}
+
 static av_cold int init(AVFilterContext *ctx)
 {
     AudioFIRContext *s = ctx->priv;
@@ -708,25 +800,22 @@ static av_cold int init(AVFilterContext *ctx)
         }
     }
 
-    s->fcmul_add = fcmul_add_c;
-
     s->fdsp = avpriv_float_dsp_alloc(0);
     if (!s->fdsp)
         return AVERROR(ENOMEM);
 
-    if (ARCH_X86)
-        ff_afir_init_x86(s);
+    ff_afir_init(&s->afirdsp);
 
     return 0;
 }
 
 static const AVFilterPad afir_inputs[] = {
     {
-        .name           = "main",
-        .type           = AVMEDIA_TYPE_AUDIO,
+        .name = "main",
+        .type = AVMEDIA_TYPE_AUDIO,
     },{
-        .name           = "ir",
-        .type           = AVMEDIA_TYPE_AUDIO,
+        .name = "ir",
+        .type = AVMEDIA_TYPE_AUDIO,
     },
     { NULL }
 };
@@ -752,6 +841,9 @@ static const AVOption afir_options[] = {
     { "response", "show IR frequency response", OFFSET(response), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, VF },
     { "channel", "set IR channel to display frequency response", OFFSET(ir_channel), AV_OPT_TYPE_INT, {.i64=0}, 0, 1024, VF },
     { "size",   "set video size",    OFFSET(w),          AV_OPT_TYPE_IMAGE_SIZE, {.str = "hd720"}, 0, 0, VF },
+    { "rate",   "set video rate",    OFFSET(frame_rate), AV_OPT_TYPE_VIDEO_RATE, {.str = "25"}, 0, INT32_MAX, VF },
+    { "minp",   "set min partition size", OFFSET(minp),  AV_OPT_TYPE_INT,   {.i64=8192}, 8, 32768, AF },
+    { "maxp",   "set max partition size", OFFSET(maxp),  AV_OPT_TYPE_INT,   {.i64=8192}, 8, 32768, AF },
     { NULL }
 };
 
diff --git a/libavfilter/af_afir.h b/libavfilter/af_afir.h
index 7d4f32eaebc99..f665c0ef808b9 100644
--- a/libavfilter/af_afir.h
+++ b/libavfilter/af_afir.h
@@ -21,7 +21,6 @@
 #ifndef AVFILTER_AFIR_H
 #define AVFILTER_AFIR_H
 
-#include "libavutil/audio_fifo.h"
 #include "libavutil/common.h"
 #include "libavutil/float_dsp.h"
 #include "libavutil/opt.h"
@@ -32,6 +31,33 @@
 #include "formats.h"
 #include "internal.h"
 
+typedef struct AudioFIRSegment {
+    int nb_partitions;
+    int part_size;
+    int block_size;
+    int fft_length;
+    int coeff_size;
+    int input_size;
+    int input_offset;
+
+    int *output_offset;
+    int *part_index;
+
+    AVFrame *sum;
+    AVFrame *block;
+    AVFrame *buffer;
+    AVFrame *coeff;
+    AVFrame *input;
+    AVFrame *output;
+
+    RDFTContext **rdft, **irdft;
+} AudioFIRSegment;
+
+typedef struct AudioFIRDSPContext {
+    void (*fcmul_add)(float *sum, const float *t, const float *c,
+                      ptrdiff_t len);
+} AudioFIRDSPContext;
+
 typedef struct AudioFIRContext {
     const AVClass *class;
 
@@ -44,45 +70,34 @@ typedef struct AudioFIRContext {
     float max_ir_len;
     int response;
     int w, h;
+    AVRational frame_rate;
     int ir_channel;
+    int minp;
+    int maxp;
 
     float gain;
 
     int eof_coeffs;
     int have_coeffs;
-    int nb_coeffs;
     int nb_taps;
-    int part_size;
-    int part_index;
-    int coeff_size;
-    int block_size;
-    int nb_partitions;
     int nb_channels;
-    int ir_length;
-    int fft_length;
     int nb_coef_channels;
     int one2many;
-    int nb_samples;
-    int want_skip;
-    int need_padding;
 
-    RDFTContext **rdft, **irdft;
-    float **sum;
-    float **block;
-    FFTComplex **coeff;
+    AudioFIRSegment seg[1024];
+    int nb_segments;
 
-    AVAudioFifo *fifo;
     AVFrame *in[2];
-    AVFrame *buffer;
     AVFrame *video;
+    int min_part_size;
     int64_t pts;
-    int index;
 
+    AudioFIRDSPContext afirdsp;
     AVFloatDSPContext *fdsp;
-    void (*fcmul_add)(float *sum, const float *t, const float *c,
-                      ptrdiff_t len);
+
 } AudioFIRContext;
 
-void ff_afir_init_x86(AudioFIRContext *s);
+void ff_afir_init(AudioFIRDSPContext *s);
+void ff_afir_init_x86(AudioFIRDSPContext *s);
 
 #endif /* AVFILTER_AFIR_H */
diff --git a/libavfilter/af_aiir.c b/libavfilter/af_aiir.c
index 845d542d29efb..20dea98cbbd75 100644
--- a/libavfilter/af_aiir.c
+++ b/libavfilter/af_aiir.c
@@ -63,6 +63,7 @@ typedef struct AudioIIRContext {
     int response;
     int w, h;
     int ir_channel;
+    AVRational rate;
 
     AVFrame *video;
 
@@ -939,11 +940,15 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 
     if (s->response) {
         AVFilterLink *outlink = ctx->outputs[1];
+        int64_t old_pts = s->video->pts;
+        int64_t new_pts = av_rescale_q(out->pts, ctx->inputs[0]->time_base, outlink->time_base);
 
-        s->video->pts = out->pts;
-        ret = ff_filter_frame(outlink, av_frame_clone(s->video));
-        if (ret < 0)
-            return ret;
+        if (new_pts > old_pts) {
+            s->video->pts = new_pts;
+            ret = ff_filter_frame(outlink, av_frame_clone(s->video));
+            if (ret < 0)
+                return ret;
+        }
     }
 
     return ff_filter_frame(outlink, out);
@@ -957,6 +962,8 @@ static int config_video(AVFilterLink *outlink)
     outlink->sample_aspect_ratio = (AVRational){1,1};
     outlink->w = s->w;
     outlink->h = s->h;
+    outlink->frame_rate = s->rate;
+    outlink->time_base = av_inv_q(outlink->frame_rate);
 
     return 0;
 }
@@ -1070,6 +1077,7 @@ static const AVOption aiir_options[] = {
     { "response", "show IR frequency response",    OFFSET(response), AV_OPT_TYPE_BOOL,   {.i64=0},     0, 1, VF },
     { "channel", "set IR channel to display frequency response", OFFSET(ir_channel), AV_OPT_TYPE_INT, {.i64=0}, 0, 1024, VF },
     { "size",   "set video size",                  OFFSET(w),        AV_OPT_TYPE_IMAGE_SIZE, {.str = "hd720"}, 0, 0, VF },
+    { "rate",   "set video rate",                  OFFSET(rate),     AV_OPT_TYPE_VIDEO_RATE, {.str = "25"}, 0, INT32_MAX, VF },
     { NULL },
 };
 
diff --git a/libavfilter/af_amerge.c b/libavfilter/af_amerge.c
index 3961c90701d86..567f25982d8ff 100644
--- a/libavfilter/af_amerge.c
+++ b/libavfilter/af_amerge.c
@@ -23,9 +23,6 @@
  * Audio merging filter
  */
 
-#define FF_INTERNAL_FIELDS 1
-#include "framequeue.h"
-
 #include "libavutil/avstring.h"
 #include "libavutil/bprint.h"
 #include "libavutil/channel_layout.h"
@@ -285,9 +282,9 @@ static int activate(AVFilterContext *ctx)
 
     FF_FILTER_FORWARD_STATUS_BACK_ALL(ctx->outputs[0], ctx);
 
-    nb_samples = ff_framequeue_queued_samples(&ctx->inputs[0]->fifo);
+    nb_samples = ff_inlink_queued_samples(ctx->inputs[0]);
     for (i = 1; i < ctx->nb_inputs && nb_samples > 0; i++) {
-        nb_samples = FFMIN(ff_framequeue_queued_samples(&ctx->inputs[i]->fifo), nb_samples);
+        nb_samples = FFMIN(ff_inlink_queued_samples(ctx->inputs[i]), nb_samples);
     }
 
     if (nb_samples) {
@@ -297,7 +294,7 @@ static int activate(AVFilterContext *ctx)
     }
 
     for (i = 0; i < ctx->nb_inputs; i++) {
-        if (ff_framequeue_queued_samples(&ctx->inputs[i]->fifo))
+        if (ff_inlink_queued_samples(ctx->inputs[i]))
             continue;
 
         if (ff_inlink_acknowledge_status(ctx->inputs[i], &status, &pts)) {
diff --git a/libavfilter/af_amultiply.c b/libavfilter/af_amultiply.c
index a742f6a9c6f98..b35eca725002a 100644
--- a/libavfilter/af_amultiply.c
+++ b/libavfilter/af_amultiply.c
@@ -24,9 +24,6 @@
 #include "libavutil/float_dsp.h"
 #include "libavutil/opt.h"
 
-#define FF_INTERNAL_FIELDS 1
-#include "framequeue.h"
-
 #include "audio.h"
 #include "avfilter.h"
 #include "formats.h"
@@ -85,8 +82,8 @@ static int activate(AVFilterContext *ctx)
 
     FF_FILTER_FORWARD_STATUS_BACK_ALL(ctx->outputs[0], ctx);
 
-    nb_samples = FFMIN(ff_framequeue_queued_samples(&ctx->inputs[0]->fifo),
-                       ff_framequeue_queued_samples(&ctx->inputs[1]->fifo));
+    nb_samples = FFMIN(ff_inlink_queued_samples(ctx->inputs[0]),
+                       ff_inlink_queued_samples(ctx->inputs[1]));
     for (i = 0; i < ctx->nb_inputs && nb_samples > 0; i++) {
         if (s->frames[i])
             continue;
@@ -150,7 +147,7 @@ static int activate(AVFilterContext *ctx)
 
     if (ff_outlink_frame_wanted(ctx->outputs[0])) {
         for (i = 0; i < 2; i++) {
-            if (ff_framequeue_queued_samples(&ctx->inputs[i]->fifo) > 0)
+            if (ff_inlink_queued_samples(ctx->inputs[i]) > 0)
                 continue;
             ff_inlink_request_frame(ctx->inputs[i]);
             return 0;
diff --git a/libavfilter/af_anlmdn.c b/libavfilter/af_anlmdn.c
new file mode 100644
index 0000000000000..87c49c63b1c6a
--- /dev/null
+++ b/libavfilter/af_anlmdn.c
@@ -0,0 +1,366 @@
+/*
+ * Copyright (c) 2019 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <float.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/audio_fifo.h"
+#include "libavutil/opt.h"
+#include "avfilter.h"
+#include "audio.h"
+#include "formats.h"
+
+#include "af_anlmdndsp.h"
+
+#define MAX_DIFF         11.f
+#define WEIGHT_LUT_NBITS 20
+#define WEIGHT_LUT_SIZE  (1<<WEIGHT_LUT_NBITS)
+
+#define SQR(x) ((x) * (x))
+
+typedef struct AudioNLMeansContext {
+    const AVClass *class;
+
+    float a;
+    int64_t pd;
+    int64_t rd;
+    int om;
+
+    float pdiff_lut_scale;
+    float weight_lut[WEIGHT_LUT_SIZE];
+
+    int K;
+    int S;
+    int N;
+    int H;
+
+    int offset;
+    AVFrame *in;
+    AVFrame *cache;
+
+    int64_t pts;
+
+    AVAudioFifo *fifo;
+    int eof_left;
+
+    AudioNLMDNDSPContext dsp;
+} AudioNLMeansContext;
+
+enum OutModes {
+    IN_MODE,
+    OUT_MODE,
+    NOISE_MODE,
+    NB_MODES
+};
+
+#define OFFSET(x) offsetof(AudioNLMeansContext, x)
+#define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption anlmdn_options[] = {
+    { "s", "set denoising strength", OFFSET(a),  AV_OPT_TYPE_FLOAT,    {.dbl=0.00001},0.00001, 10, AF },
+    { "p", "set patch duration",     OFFSET(pd), AV_OPT_TYPE_DURATION, {.i64=2000}, 1000, 100000, AF },
+    { "r", "set research duration",  OFFSET(rd), AV_OPT_TYPE_DURATION, {.i64=6000}, 2000, 300000, AF },
+    { "o", "set output mode",        OFFSET(om), AV_OPT_TYPE_INT,      {.i64=OUT_MODE},  0, NB_MODES-1, AF, "mode" },
+    {  "i", "input",                 0,          AV_OPT_TYPE_CONST,    {.i64=IN_MODE},   0,  0, AF, "mode" },
+    {  "o", "output",                0,          AV_OPT_TYPE_CONST,    {.i64=OUT_MODE},  0,  0, AF, "mode" },
+    {  "n", "noise",                 0,          AV_OPT_TYPE_CONST,    {.i64=NOISE_MODE},0,  0, AF, "mode" },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(anlmdn);
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *formats = NULL;
+    AVFilterChannelLayouts *layouts = NULL;
+    static const enum AVSampleFormat sample_fmts[] = {
+        AV_SAMPLE_FMT_FLTP,
+        AV_SAMPLE_FMT_NONE
+    };
+    int ret;
+
+    formats = ff_make_format_list(sample_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_formats(ctx, formats);
+    if (ret < 0)
+        return ret;
+
+    layouts = ff_all_channel_counts();
+    if (!layouts)
+        return AVERROR(ENOMEM);
+
+    ret = ff_set_common_channel_layouts(ctx, layouts);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_all_samplerates();
+    return ff_set_common_samplerates(ctx, formats);
+}
+
+static float compute_distance_ssd_c(const float *f1, const float *f2, ptrdiff_t K)
+{
+    float distance = 0.;
+
+    for (int k = -K; k <= K; k++)
+        distance += SQR(f1[k] - f2[k]);
+
+    return distance;
+}
+
+static void compute_cache_c(float *cache, const float *f,
+                            ptrdiff_t S, ptrdiff_t K,
+                            ptrdiff_t i, ptrdiff_t jj)
+{
+    int v = 0;
+
+    for (int j = jj; j < jj + S; j++, v++)
+        cache[v] += -SQR(f[i - K - 1] - f[j - K - 1]) + SQR(f[i + K] - f[j + K]);
+}
+
+void ff_anlmdn_init(AudioNLMDNDSPContext *dsp)
+{
+    dsp->compute_distance_ssd = compute_distance_ssd_c;
+    dsp->compute_cache        = compute_cache_c;
+
+    if (ARCH_X86)
+        ff_anlmdn_init_x86(dsp);
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    AudioNLMeansContext *s = ctx->priv;
+    int ret;
+
+    s->K = av_rescale(s->pd, outlink->sample_rate, AV_TIME_BASE);
+    s->S = av_rescale(s->rd, outlink->sample_rate, AV_TIME_BASE);
+
+    s->eof_left = -1;
+    s->pts = AV_NOPTS_VALUE;
+    s->H = s->K * 2 + 1;
+    s->N = s->H + (s->K + s->S) * 2;
+
+    av_log(ctx, AV_LOG_DEBUG, "K:%d S:%d H:%d N:%d\n", s->K, s->S, s->H, s->N);
+
+    av_frame_free(&s->in);
+    av_frame_free(&s->cache);
+    s->in = ff_get_audio_buffer(outlink, s->N);
+    if (!s->in)
+        return AVERROR(ENOMEM);
+
+    s->cache = ff_get_audio_buffer(outlink, s->S * 2);
+    if (!s->cache)
+        return AVERROR(ENOMEM);
+
+    s->fifo = av_audio_fifo_alloc(outlink->format, outlink->channels, s->N);
+    if (!s->fifo)
+        return AVERROR(ENOMEM);
+
+    ret = av_audio_fifo_write(s->fifo, (void **)s->in->extended_data, s->K + s->S);
+    if (ret < 0)
+        return ret;
+
+    s->pdiff_lut_scale = 1.f / MAX_DIFF * WEIGHT_LUT_SIZE;
+    for (int i = 0; i < WEIGHT_LUT_SIZE; i++) {
+        float w = -i / s->pdiff_lut_scale;
+
+        s->weight_lut[i] = expf(w);
+    }
+
+    ff_anlmdn_init(&s->dsp);
+
+    return 0;
+}
+
+static int filter_channel(AVFilterContext *ctx, void *arg, int ch, int nb_jobs)
+{
+    AudioNLMeansContext *s = ctx->priv;
+    AVFrame *out = arg;
+    const int S = s->S;
+    const int K = s->K;
+    const int om = s->om;
+    const float *f = (const float *)(s->in->extended_data[ch]) + K;
+    float *cache = (float *)s->cache->extended_data[ch];
+    const float sw = (65536.f / (4 * K + 2)) / sqrtf(s->a);
+    float *dst = (float *)out->extended_data[ch] + s->offset;
+
+    for (int i = S; i < s->H + S; i++) {
+        float P = 0.f, Q = 0.f;
+        int v = 0;
+
+        if (i == S) {
+            for (int j = i - S; j <= i + S; j++) {
+                if (i == j)
+                    continue;
+                cache[v++] = s->dsp.compute_distance_ssd(f + i, f + j, K);
+            }
+        } else {
+            s->dsp.compute_cache(cache, f, S, K, i, i - S);
+            s->dsp.compute_cache(cache + S, f, S, K, i, i + 1);
+        }
+
+        for (int j = 0; j < 2 * S && !ctx->is_disabled; j++) {
+            const float distance = cache[j];
+            unsigned weight_lut_idx;
+            float w;
+
+            av_assert2(distance >= 0.f);
+            w = distance * sw;
+            if (w >= MAX_DIFF)
+                continue;
+            weight_lut_idx = w * s->pdiff_lut_scale;
+            av_assert2(weight_lut_idx < WEIGHT_LUT_SIZE);
+            w = s->weight_lut[weight_lut_idx];
+            P += w * f[i - S + j + (j >= S)];
+            Q += w;
+        }
+
+        P += f[i];
+        Q += 1;
+
+        switch (om) {
+        case IN_MODE:    dst[i - S] = f[i];           break;
+        case OUT_MODE:   dst[i - S] = P / Q;          break;
+        case NOISE_MODE: dst[i - S] = f[i] - (P / Q); break;
+        }
+    }
+
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AudioNLMeansContext *s = ctx->priv;
+    AVFrame *out = NULL;
+    int available, wanted, ret;
+
+    if (s->pts == AV_NOPTS_VALUE)
+        s->pts = in->pts;
+
+    ret = av_audio_fifo_write(s->fifo, (void **)in->extended_data,
+                              in->nb_samples);
+    av_frame_free(&in);
+
+    s->offset = 0;
+    available = av_audio_fifo_size(s->fifo);
+    wanted = (available / s->H) * s->H;
+
+    if (wanted >= s->H && available >= s->N) {
+        out = ff_get_audio_buffer(outlink, wanted);
+        if (!out)
+            return AVERROR(ENOMEM);
+    }
+
+    while (available >= s->N) {
+        ret = av_audio_fifo_peek(s->fifo, (void **)s->in->extended_data, s->N);
+        if (ret < 0)
+            break;
+
+        ctx->internal->execute(ctx, filter_channel, out, NULL, inlink->channels);
+
+        av_audio_fifo_drain(s->fifo, s->H);
+
+        s->offset += s->H;
+        available -= s->H;
+    }
+
+    if (out) {
+        out->pts = s->pts;
+        out->nb_samples = s->offset;
+        if (s->eof_left >= 0) {
+            out->nb_samples = FFMIN(s->eof_left, s->offset);
+            s->eof_left -= out->nb_samples;
+        }
+        s->pts += s->offset;
+
+        return ff_filter_frame(outlink, out);
+    }
+
+    return ret;
+}
+
+static int request_frame(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    AudioNLMeansContext *s = ctx->priv;
+    int ret;
+
+    ret = ff_request_frame(ctx->inputs[0]);
+
+    if (ret == AVERROR_EOF && s->eof_left != 0) {
+        AVFrame *in;
+
+        if (s->eof_left < 0)
+            s->eof_left = av_audio_fifo_size(s->fifo) - (s->S + s->K);
+        if (s->eof_left < 0)
+            return AVERROR_EOF;
+        in = ff_get_audio_buffer(outlink, s->H);
+        if (!in)
+            return AVERROR(ENOMEM);
+
+        return filter_frame(ctx->inputs[0], in);
+    }
+
+    return ret;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    AudioNLMeansContext *s = ctx->priv;
+
+    av_audio_fifo_free(s->fifo);
+    av_frame_free(&s->in);
+    av_frame_free(&s->cache);
+}
+
+static const AVFilterPad inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_AUDIO,
+        .config_props  = config_output,
+        .request_frame = request_frame,
+    },
+    { NULL }
+};
+
+AVFilter ff_af_anlmdn = {
+    .name          = "anlmdn",
+    .description   = NULL_IF_CONFIG_SMALL("Reduce broadband noise from stream using Non-Local Means."),
+    .query_formats = query_formats,
+    .priv_size     = sizeof(AudioNLMeansContext),
+    .priv_class    = &anlmdn_class,
+    .uninit        = uninit,
+    .inputs        = inputs,
+    .outputs       = outputs,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL |
+                     AVFILTER_FLAG_SLICE_THREADS,
+};
diff --git a/libavfilter/af_anlmdndsp.h b/libavfilter/af_anlmdndsp.h
new file mode 100644
index 0000000000000..d8f5136cd8572
--- /dev/null
+++ b/libavfilter/af_anlmdndsp.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_ANLMDNDSP_H
+#define AVFILTER_ANLMDNDSP_H
+
+#include "libavutil/common.h"
+
+#include "audio.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+
+typedef struct AudioNLMDNDSPContext {
+    float (*compute_distance_ssd)(const float *f1, const float *f2, ptrdiff_t K);
+    void (*compute_cache)(float *cache, const float *f, ptrdiff_t S, ptrdiff_t K,
+                          ptrdiff_t i, ptrdiff_t jj);
+} AudioNLMDNDSPContext;
+
+void ff_anlmdn_init(AudioNLMDNDSPContext *s);
+void ff_anlmdn_init_x86(AudioNLMDNDSPContext *s);
+
+#endif /* AVFILTER_ANLMDNDSP_H */
diff --git a/libavfilter/af_apad.c b/libavfilter/af_apad.c
index f7a4199c6486c..fbcf6d1349cb0 100644
--- a/libavfilter/af_apad.c
+++ b/libavfilter/af_apad.c
@@ -41,6 +41,8 @@ typedef struct APadContext {
     int packet_size;
     int64_t pad_len, pad_len_left;
     int64_t whole_len, whole_len_left;
+    int64_t pad_dur;
+    int64_t whole_dur;
 } APadContext;
 
 #define OFFSET(x) offsetof(APadContext, x)
@@ -50,6 +52,8 @@ static const AVOption apad_options[] = {
     { "packet_size", "set silence packet size",                                  OFFSET(packet_size), AV_OPT_TYPE_INT,   { .i64 = 4096 }, 0, INT_MAX, A },
     { "pad_len",     "set number of samples of silence to add",                  OFFSET(pad_len),     AV_OPT_TYPE_INT64, { .i64 = -1 }, -1, INT64_MAX, A },
     { "whole_len",   "set minimum target number of samples in the audio stream", OFFSET(whole_len),   AV_OPT_TYPE_INT64, { .i64 = -1 }, -1, INT64_MAX, A },
+    { "pad_dur",     "set duration of silence to add",                           OFFSET(pad_dur),     AV_OPT_TYPE_DURATION, { .i64 = 0 }, 0, INT64_MAX, A },
+    { "whole_dur",   "set minimum target duration in the audio stream",          OFFSET(whole_dur),   AV_OPT_TYPE_DURATION, { .i64 = 0 }, 0, INT64_MAX, A },
     { NULL }
 };
 
@@ -64,8 +68,6 @@ static av_cold int init(AVFilterContext *ctx)
         av_log(ctx, AV_LOG_ERROR, "Both whole and pad length are set, this is not possible\n");
         return AVERROR(EINVAL);
     }
-    s->pad_len_left   = s->pad_len;
-    s->whole_len_left = s->whole_len;
 
     return 0;
 }
@@ -131,6 +133,22 @@ static int request_frame(AVFilterLink *outlink)
     return ret;
 }
 
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    APadContext *s  = ctx->priv;
+
+    if (s->pad_dur)
+        s->pad_len = av_rescale(s->pad_dur, outlink->sample_rate, AV_TIME_BASE);
+    if (s->whole_dur)
+        s->whole_len = av_rescale(s->whole_dur, outlink->sample_rate, AV_TIME_BASE);
+
+    s->pad_len_left   = s->pad_len;
+    s->whole_len_left = s->whole_len;
+
+    return 0;
+}
+
 static const AVFilterPad apad_inputs[] = {
     {
         .name         = "default",
@@ -144,6 +162,7 @@ static const AVFilterPad apad_outputs[] = {
     {
         .name          = "default",
         .request_frame = request_frame,
+        .config_props  = config_output,
         .type          = AVMEDIA_TYPE_AUDIO,
     },
     { NULL }
diff --git a/libavfilter/af_asetnsamples.c b/libavfilter/af_asetnsamples.c
index e8daec8d8f9a3..c60ce3063fa8c 100644
--- a/libavfilter/af_asetnsamples.c
+++ b/libavfilter/af_asetnsamples.c
@@ -76,6 +76,13 @@ static int activate(AVFilterContext *ctx)
             return AVERROR(ENOMEM);
         }
 
+        ret = av_frame_copy_props(pad_frame, frame);
+        if (ret < 0) {
+            av_frame_free(&pad_frame);
+            av_frame_free(&frame);
+            return ret;
+        }
+
         av_samples_copy(pad_frame->extended_data, frame->extended_data,
                         0, 0, frame->nb_samples, frame->channels, frame->format);
         av_samples_set_silence(pad_frame->extended_data, frame->nb_samples,
diff --git a/libavfilter/af_biquads.c b/libavfilter/af_biquads.c
index ae5e1c6adef17..86cd10df3b31e 100644
--- a/libavfilter/af_biquads.c
+++ b/libavfilter/af_biquads.c
@@ -63,6 +63,7 @@
  */
 
 #include "libavutil/avassert.h"
+#include "libavutil/ffmath.h"
 #include "libavutil/opt.h"
 #include "audio.h"
 #include "avfilter.h"
@@ -245,7 +246,7 @@ static int config_filter(AVFilterLink *outlink, int reset)
     AVFilterContext *ctx    = outlink->src;
     BiquadsContext *s       = ctx->priv;
     AVFilterLink *inlink    = ctx->inputs[0];
-    double A = exp(s->gain / 40 * log(10.));
+    double A = ff_exp10(s->gain / 40);
     double w0 = 2 * M_PI * s->frequency / inlink->sample_rate;
     double alpha, beta;
 
diff --git a/libavfilter/af_crossfeed.c b/libavfilter/af_crossfeed.c
index a0af280432600..beee67964ddd9 100644
--- a/libavfilter/af_crossfeed.c
+++ b/libavfilter/af_crossfeed.c
@@ -17,6 +17,7 @@
  */
 
 #include "libavutil/channel_layout.h"
+#include "libavutil/ffmath.h"
 #include "libavutil/opt.h"
 #include "avfilter.h"
 #include "audio.h"
@@ -57,7 +58,7 @@ static int config_input(AVFilterLink *inlink)
 {
     AVFilterContext *ctx = inlink->dst;
     CrossfeedContext *s = ctx->priv;
-    double A = exp(s->strength * -30 / 40 * log(10.));
+    double A = ff_exp10(s->strength * -30 / 40);
     double w0 = 2 * M_PI * (1. - s->range) * 2100 / inlink->sample_rate;
     double alpha;
 
diff --git a/libavfilter/af_headphone.c b/libavfilter/af_headphone.c
index 760b97b733c86..10638f9e7b57f 100644
--- a/libavfilter/af_headphone.c
+++ b/libavfilter/af_headphone.c
@@ -50,6 +50,7 @@ typedef struct HeadphoneContext {
     int eof_hrirs;
 
     int ir_len;
+    int air_len;
 
     int mapping[64];
 
@@ -72,6 +73,7 @@ typedef struct HeadphoneContext {
     float *data_ir[2];
     float *temp_src[2];
     FFTComplex *temp_fft[2];
+    FFTComplex *temp_afft[2];
 
     FFTContext *fft[2], *ifft[2];
     FFTComplex *data_hrtf[2];
@@ -157,6 +159,7 @@ typedef struct ThreadData {
     float **ringbuffer;
     float **temp_src;
     FFTComplex **temp_fft;
+    FFTComplex **temp_afft;
 } ThreadData;
 
 static int headphone_convolute(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
@@ -172,6 +175,7 @@ static int headphone_convolute(AVFilterContext *ctx, void *arg, int jobnr, int n
     float *ringbuffer = td->ringbuffer[jobnr];
     float *temp_src = td->temp_src[jobnr];
     const int ir_len = s->ir_len;
+    const int air_len = s->air_len;
     const float *src = (const float *)in->data[0];
     float *dst = (float *)out->data[0];
     const int in_channels = in->channels;
@@ -200,7 +204,7 @@ static int headphone_convolute(AVFilterContext *ctx, void *arg, int jobnr, int n
 
             if (l == s->lfe_channel) {
                 *dst += *(buffer[s->lfe_channel] + wr) * s->gain_lfe;
-                temp_ir += FFALIGN(ir_len, 16);
+                temp_ir += air_len;
                 continue;
             }
 
@@ -209,18 +213,18 @@ static int headphone_convolute(AVFilterContext *ctx, void *arg, int jobnr, int n
             if (read + ir_len < buffer_length) {
                 memcpy(temp_src, bptr + read, ir_len * sizeof(*temp_src));
             } else {
-                int len = FFMIN(ir_len - (read % ir_len), buffer_length - read);
+                int len = FFMIN(air_len - (read % ir_len), buffer_length - read);
 
                 memcpy(temp_src, bptr + read, len * sizeof(*temp_src));
-                memcpy(temp_src + len, bptr, (ir_len - len) * sizeof(*temp_src));
+                memcpy(temp_src + len, bptr, (air_len - len) * sizeof(*temp_src));
             }
 
-            dst[0] += s->fdsp->scalarproduct_float(temp_ir, temp_src, ir_len);
-            temp_ir += FFALIGN(ir_len, 16);
+            dst[0] += s->fdsp->scalarproduct_float(temp_ir, temp_src, FFALIGN(ir_len, 32));
+            temp_ir += air_len;
         }
 
-        if (fabs(*dst) > 1)
-            *n_clippings += 1;
+        if (fabsf(dst[0]) > 1)
+            n_clippings[0]++;
 
         dst += 2;
         src += in_channels;
@@ -249,6 +253,7 @@ static int headphone_fast_convolute(AVFilterContext *ctx, void *arg, int jobnr,
     const int buffer_length = s->buffer_length;
     const uint32_t modulo = (uint32_t)buffer_length - 1;
     FFTComplex *fft_in = s->temp_fft[jobnr];
+    FFTComplex *fft_acc = s->temp_afft[jobnr];
     FFTContext *ifft = s->ifft[jobnr];
     FFTContext *fft = s->fft[jobnr];
     const int n_fft = s->n_fft;
@@ -260,7 +265,7 @@ static int headphone_fast_convolute(AVFilterContext *ctx, void *arg, int jobnr,
 
     dst += offset;
 
-    n_read = FFMIN(s->ir_len, in->nb_samples);
+    n_read = FFMIN(ir_len, in->nb_samples);
     for (j = 0; j < n_read; j++) {
         dst[2 * j]     = ringbuffer[wr];
         ringbuffer[wr] = 0.0;
@@ -271,6 +276,8 @@ static int headphone_fast_convolute(AVFilterContext *ctx, void *arg, int jobnr,
         dst[2 * j] = 0;
     }
 
+    memset(fft_acc, 0, sizeof(FFTComplex) * n_fft);
+
     for (i = 0; i < in_channels; i++) {
         if (i == s->lfe_channel) {
             for (j = 0; j < in->nb_samples; j++) {
@@ -295,26 +302,26 @@ static int headphone_fast_convolute(AVFilterContext *ctx, void *arg, int jobnr,
             const float re = fft_in[j].re;
             const float im = fft_in[j].im;
 
-            fft_in[j].re = re * hcomplex->re - im * hcomplex->im;
-            fft_in[j].im = re * hcomplex->im + im * hcomplex->re;
+            fft_acc[j].re += re * hcomplex->re - im * hcomplex->im;
+            fft_acc[j].im += re * hcomplex->im + im * hcomplex->re;
         }
+    }
 
-        av_fft_permute(ifft, fft_in);
-        av_fft_calc(ifft, fft_in);
+    av_fft_permute(ifft, fft_acc);
+    av_fft_calc(ifft, fft_acc);
 
-        for (j = 0; j < in->nb_samples; j++) {
-            dst[2 * j] += fft_in[j].re * fft_scale;
-        }
+    for (j = 0; j < in->nb_samples; j++) {
+        dst[2 * j] += fft_acc[j].re * fft_scale;
+    }
 
-        for (j = 0; j < ir_len - 1; j++) {
-            int write_pos = (wr + j) & modulo;
+    for (j = 0; j < ir_len - 1; j++) {
+        int write_pos = (wr + j) & modulo;
 
-            *(ringbuffer + write_pos) += fft_in[in->nb_samples + j].re * fft_scale;
-        }
+        *(ringbuffer + write_pos) += fft_acc[in->nb_samples + j].re * fft_scale;
     }
 
     for (i = 0; i < out->nb_samples; i++) {
-        if (fabs(*dst) > 1) {
+        if (fabsf(dst[0]) > 1) {
             n_clippings[0]++;
         }
 
@@ -362,6 +369,7 @@ static int headphone_frame(HeadphoneContext *s, AVFrame *in, AVFilterLink *outli
     td.delay = s->delay; td.ir = s->data_ir; td.n_clippings = n_clippings;
     td.ringbuffer = s->ringbuffer; td.temp_src = s->temp_src;
     td.temp_fft = s->temp_fft;
+    td.temp_afft = s->temp_afft;
 
     if (s->type == TIME_DOMAIN) {
         ctx->internal->execute(ctx, headphone_convolute, &td, NULL, 2);
@@ -396,8 +404,9 @@ static int convert_coeffs(AVFilterContext *ctx, AVFilterLink *inlink)
     int n_fft;
     int i, j, k;
 
-    s->buffer_length = 1 << (32 - ff_clz(s->ir_len));
-    s->n_fft = n_fft = 1 << (32 - ff_clz(s->ir_len + s->size));
+    s->air_len = 1 << (32 - ff_clz(ir_len));
+    s->buffer_length = 1 << (32 - ff_clz(s->air_len));
+    s->n_fft = n_fft = 1 << (32 - ff_clz(ir_len + s->size));
 
     if (s->type == FREQUENCY_DOMAIN) {
         fft_in_l = av_calloc(n_fft, sizeof(*fft_in_l));
@@ -409,12 +418,12 @@ static int convert_coeffs(AVFilterContext *ctx, AVFilterLink *inlink)
 
         av_fft_end(s->fft[0]);
         av_fft_end(s->fft[1]);
-        s->fft[0] = av_fft_init(log2(s->n_fft), 0);
-        s->fft[1] = av_fft_init(log2(s->n_fft), 0);
+        s->fft[0] = av_fft_init(av_log2(s->n_fft), 0);
+        s->fft[1] = av_fft_init(av_log2(s->n_fft), 0);
         av_fft_end(s->ifft[0]);
         av_fft_end(s->ifft[1]);
-        s->ifft[0] = av_fft_init(log2(s->n_fft), 1);
-        s->ifft[1] = av_fft_init(log2(s->n_fft), 1);
+        s->ifft[0] = av_fft_init(av_log2(s->n_fft), 1);
+        s->ifft[1] = av_fft_init(av_log2(s->n_fft), 1);
 
         if (!s->fft[0] || !s->fft[1] || !s->ifft[0] || !s->ifft[1]) {
             av_log(ctx, AV_LOG_ERROR, "Unable to create FFT contexts of size %d.\n", s->n_fft);
@@ -423,8 +432,8 @@ static int convert_coeffs(AVFilterContext *ctx, AVFilterLink *inlink)
         }
     }
 
-    s->data_ir[0] = av_calloc(FFALIGN(s->ir_len, 16), sizeof(float) * s->nb_irs);
-    s->data_ir[1] = av_calloc(FFALIGN(s->ir_len, 16), sizeof(float) * s->nb_irs);
+    s->data_ir[0] = av_calloc(s->air_len, sizeof(float) * s->nb_irs);
+    s->data_ir[1] = av_calloc(s->air_len, sizeof(float) * s->nb_irs);
     s->delay[0] = av_calloc(s->nb_irs, sizeof(float));
     s->delay[1] = av_calloc(s->nb_irs, sizeof(float));
 
@@ -436,7 +445,10 @@ static int convert_coeffs(AVFilterContext *ctx, AVFilterLink *inlink)
         s->ringbuffer[1] = av_calloc(s->buffer_length, sizeof(float));
         s->temp_fft[0] = av_calloc(s->n_fft, sizeof(FFTComplex));
         s->temp_fft[1] = av_calloc(s->n_fft, sizeof(FFTComplex));
-        if (!s->temp_fft[0] || !s->temp_fft[1]) {
+        s->temp_afft[0] = av_calloc(s->n_fft, sizeof(FFTComplex));
+        s->temp_afft[1] = av_calloc(s->n_fft, sizeof(FFTComplex));
+        if (!s->temp_fft[0] || !s->temp_fft[1] ||
+            !s->temp_afft[0] || !s->temp_afft[1]) {
             ret = AVERROR(ENOMEM);
             goto fail;
         }
@@ -449,11 +461,11 @@ static int convert_coeffs(AVFilterContext *ctx, AVFilterLink *inlink)
     }
 
     if (s->type == TIME_DOMAIN) {
-        s->temp_src[0] = av_calloc(FFALIGN(ir_len, 16), sizeof(float));
-        s->temp_src[1] = av_calloc(FFALIGN(ir_len, 16), sizeof(float));
+        s->temp_src[0] = av_calloc(s->air_len, sizeof(float));
+        s->temp_src[1] = av_calloc(s->air_len, sizeof(float));
 
-        data_ir_l = av_calloc(nb_irs * FFALIGN(ir_len, 16), sizeof(*data_ir_l));
-        data_ir_r = av_calloc(nb_irs * FFALIGN(ir_len, 16), sizeof(*data_ir_r));
+        data_ir_l = av_calloc(nb_irs * s->air_len, sizeof(*data_ir_l));
+        data_ir_r = av_calloc(nb_irs * s->air_len, sizeof(*data_ir_r));
         if (!data_ir_r || !data_ir_l || !s->temp_src[0] || !s->temp_src[1]) {
             ret = AVERROR(ENOMEM);
             goto fail;
@@ -475,7 +487,7 @@ static int convert_coeffs(AVFilterContext *ctx, AVFilterLink *inlink)
 
         ret = ff_inlink_consume_samples(ctx->inputs[i + 1], len, len, &s->in[i + 1].frame);
         if (ret < 0)
-            return ret;
+            goto fail;
         ptr = (float *)s->in[i + 1].frame->extended_data[0];
 
         if (s->hrir_fmt == HRIR_STEREO) {
@@ -495,7 +507,7 @@ static int convert_coeffs(AVFilterContext *ctx, AVFilterLink *inlink)
             if (idx == -1)
                 continue;
             if (s->type == TIME_DOMAIN) {
-                offset = idx * FFALIGN(len, 16);
+                offset = idx * s->air_len;
                 for (j = 0; j < len; j++) {
                     data_ir_l[offset + j] = ptr[len * 2 - j * 2 - 2] * gain_lin;
                     data_ir_r[offset + j] = ptr[len * 2 - j * 2 - 1] * gain_lin;
@@ -538,7 +550,7 @@ static int convert_coeffs(AVFilterContext *ctx, AVFilterLink *inlink)
 
                 I = idx * 2;
                 if (s->type == TIME_DOMAIN) {
-                    offset = idx * FFALIGN(len, 16);
+                    offset = idx * s->air_len;
                     for (j = 0; j < len; j++) {
                         data_ir_l[offset + j] = ptr[len * N - j * N - N + I    ] * gain_lin;
                         data_ir_r[offset + j] = ptr[len * N - j * N - N + I + 1] * gain_lin;
@@ -567,8 +579,8 @@ static int convert_coeffs(AVFilterContext *ctx, AVFilterLink *inlink)
     }
 
     if (s->type == TIME_DOMAIN) {
-        memcpy(s->data_ir[0], data_ir_l, sizeof(float) * nb_irs * FFALIGN(ir_len, 16));
-        memcpy(s->data_ir[1], data_ir_r, sizeof(float) * nb_irs * FFALIGN(ir_len, 16));
+        memcpy(s->data_ir[0], data_ir_l, sizeof(float) * nb_irs * s->air_len);
+        memcpy(s->data_ir[1], data_ir_r, sizeof(float) * nb_irs * s->air_len);
     } else {
         s->data_hrtf[0] = av_calloc(n_fft * s->nb_irs, sizeof(FFTComplex));
         s->data_hrtf[1] = av_calloc(n_fft * s->nb_irs, sizeof(FFTComplex));
@@ -587,6 +599,9 @@ static int convert_coeffs(AVFilterContext *ctx, AVFilterLink *inlink)
 
 fail:
 
+    for (i = 0; i < s->nb_inputs - 1; i++)
+        av_frame_free(&s->in[i + 1].frame);
+
     av_freep(&data_ir_l);
     av_freep(&data_ir_r);
 
@@ -789,7 +804,7 @@ static int config_output(AVFilterLink *outlink)
         }
     }
 
-    s->gain_lfe = expf((s->gain - 3 * inlink->channels - 6 + s->lfe_gain) / 20 * M_LN10);
+    s->gain_lfe = expf((s->gain - 3 * inlink->channels + s->lfe_gain) / 20 * M_LN10);
 
     return 0;
 }
@@ -813,6 +828,8 @@ static av_cold void uninit(AVFilterContext *ctx)
     av_freep(&s->temp_src[1]);
     av_freep(&s->temp_fft[0]);
     av_freep(&s->temp_fft[1]);
+    av_freep(&s->temp_afft[0]);
+    av_freep(&s->temp_afft[1]);
     av_freep(&s->data_hrtf[0]);
     av_freep(&s->data_hrtf[1]);
     av_freep(&s->fdsp);
diff --git a/libavfilter/af_ladspa.c b/libavfilter/af_ladspa.c
index 3be26bc849db6..a8562fc073527 100644
--- a/libavfilter/af_ladspa.c
+++ b/libavfilter/af_ladspa.c
@@ -548,8 +548,8 @@ static av_cold int init(AVFilterContext *ctx)
             break;
         p = NULL;
 
-        if (sscanf(arg, "c%d=%f", &i, &val) != 2) {
-            if (sscanf(arg, "%f", &val) != 1) {
+        if (av_sscanf(arg, "c%d=%f", &i, &val) != 2) {
+            if (av_sscanf(arg, "%f", &val) != 1) {
                 av_log(ctx, AV_LOG_ERROR, "Invalid syntax.\n");
                 return AVERROR(EINVAL);
             }
@@ -715,7 +715,7 @@ static int process_command(AVFilterContext *ctx, const char *cmd, const char *ar
     LADSPA_Data value;
     unsigned long port;
 
-    if (sscanf(cmd, "c%ld", &port) + sscanf(args, "%f", &value) != 2)
+    if (av_sscanf(cmd, "c%ld", &port) + av_sscanf(args, "%f", &value) != 2)
         return AVERROR(EINVAL);
 
     return set_control(ctx, port, value);
diff --git a/libavfilter/af_sofalizer.c b/libavfilter/af_sofalizer.c
index d9098d7679db2..109cf5695b32d 100644
--- a/libavfilter/af_sofalizer.c
+++ b/libavfilter/af_sofalizer.c
@@ -42,9 +42,13 @@
 #define FREQUENCY_DOMAIN 1
 
 typedef struct MySofa {  /* contains data of one SOFA file */
-    struct MYSOFA_EASY *easy;
-    int n_samples;       /* length of one impulse response (IR) */
+    struct MYSOFA_HRTF *hrtf;
+    struct MYSOFA_LOOKUP *lookup;
+    struct MYSOFA_NEIGHBORHOOD *neighborhood;
+    int ir_samples;      /* length of one impulse response (IR) */
+    int n_samples;       /* ir_samples to next power of 2 */
     float *lir, *rir;    /* IRs (time-domain) */
+    float *fir;
     int max_delay;
 } MySofa;
 
@@ -84,7 +88,8 @@ typedef struct SOFAlizerContext {
     float *data_ir[2];          /* IRs for all channels to be convolved */
                                 /* (this excludes the LFE) */
     float *temp_src[2];
-    FFTComplex *temp_fft[2];
+    FFTComplex *temp_fft[2];    /* Array to hold FFT values */
+    FFTComplex *temp_afft[2];   /* Array to accumulate FFT values prior to IFFT */
 
                          /* control variables */
     float gain;          /* filter gain (in dB) */
@@ -92,6 +97,12 @@ typedef struct SOFAlizerContext {
     float elevation;     /* elevation of virtual loudspeakers (in deg.) */
     float radius;        /* distance virtual loudspeakers to listener (in metres) */
     int type;            /* processing type */
+    int framesize;       /* size of buffer */
+    int normalize;       /* should all IRs be normalized upon import ? */
+    int interpolate;     /* should wanted IRs be interpolated from neighbors ? */
+    int minphase;        /* should all IRs be minphased upon import ? */
+    float anglestep;     /* neighbor search angle step, in agles */
+    float radstep;       /* neighbor search radius step, in meters */
 
     VirtualSpeaker vspkrpos[64];
 
@@ -103,8 +114,16 @@ typedef struct SOFAlizerContext {
 
 static int close_sofa(struct MySofa *sofa)
 {
-    mysofa_close(sofa->easy);
-    sofa->easy = NULL;
+    if (sofa->neighborhood)
+        mysofa_neighborhood_free(sofa->neighborhood);
+    sofa->neighborhood = NULL;
+    if (sofa->lookup)
+        mysofa_lookup_free(sofa->lookup);
+    sofa->lookup = NULL;
+    if (sofa->hrtf)
+        mysofa_free(sofa->hrtf);
+    sofa->hrtf = NULL;
+    av_freep(&sofa->fir);
 
     return 0;
 }
@@ -113,19 +132,51 @@ static int preload_sofa(AVFilterContext *ctx, char *filename, int *samplingrate)
 {
     struct SOFAlizerContext *s = ctx->priv;
     struct MYSOFA_HRTF *mysofa;
+    char *license;
     int ret;
 
     mysofa = mysofa_load(filename, &ret);
+    s->sofa.hrtf = mysofa;
     if (ret || !mysofa) {
         av_log(ctx, AV_LOG_ERROR, "Can't find SOFA-file '%s'\n", filename);
         return AVERROR(EINVAL);
     }
 
+    ret = mysofa_check(mysofa);
+    if (ret != MYSOFA_OK) {
+        av_log(ctx, AV_LOG_ERROR, "Selected SOFA file is invalid. Please select valid SOFA file.\n");
+        return ret;
+    }
+
+    if (s->normalize)
+        mysofa_loudness(s->sofa.hrtf);
+
+    if (s->minphase)
+        mysofa_minphase(s->sofa.hrtf, 0.01f);
+
+    mysofa_tocartesian(s->sofa.hrtf);
+
+    s->sofa.lookup = mysofa_lookup_init(s->sofa.hrtf);
+    if (s->sofa.lookup == NULL)
+        return AVERROR(EINVAL);
+
+    if (s->interpolate)
+        s->sofa.neighborhood = mysofa_neighborhood_init_withstepdefine(s->sofa.hrtf,
+                                                                       s->sofa.lookup,
+                                                                       s->anglestep,
+                                                                       s->radstep);
+
+    s->sofa.fir = av_calloc(s->sofa.hrtf->N * s->sofa.hrtf->R, sizeof(*s->sofa.fir));
+    if (!s->sofa.fir)
+        return AVERROR(ENOMEM);
+
     if (mysofa->DataSamplingRate.elements != 1)
         return AVERROR(EINVAL);
+    av_log(ctx, AV_LOG_DEBUG, "Original IR length: %d.\n", mysofa->N);
     *samplingrate = mysofa->DataSamplingRate.values[0];
-    s->sofa.n_samples = mysofa->N;
-    mysofa_free(mysofa);
+    license = mysofa_getAttribute(mysofa->attributes, (char *)"License");
+    if (license)
+        av_log(ctx, AV_LOG_INFO, "SOFA license: %s\n", license);
 
     return 0;
 }
@@ -136,7 +187,7 @@ static int parse_channel_name(char **arg, int *rchannel, char *buf)
     int64_t layout, layout0;
 
     /* try to parse a channel name, e.g. "FL" */
-    if (sscanf(*arg, "%7[A-Z]%n", buf, &len)) {
+    if (av_sscanf(*arg, "%7[A-Z]%n", buf, &len)) {
         layout0 = layout = av_get_channel_layout(buf);
         /* channel_id <- first set bit in layout */
         for (i = 32; i > 0; i >>= 1) {
@@ -174,11 +225,11 @@ static void parse_speaker_pos(AVFilterContext *ctx, int64_t in_channel_layout)
             av_log(ctx, AV_LOG_WARNING, "Failed to parse \'%s\' as channel name.\n", buf);
             continue;
         }
-        if (sscanf(arg, "%f %f", &azim, &elev) == 2) {
+        if (av_sscanf(arg, "%f %f", &azim, &elev) == 2) {
             s->vspkrpos[out_ch_id].set = 1;
             s->vspkrpos[out_ch_id].azim = azim;
             s->vspkrpos[out_ch_id].elev = elev;
-        } else if (sscanf(arg, "%f", &azim) == 1) {
+        } else if (av_sscanf(arg, "%f", &azim) == 1) {
             s->vspkrpos[out_ch_id].set = 1;
             s->vspkrpos[out_ch_id].azim = azim;
             s->vspkrpos[out_ch_id].elev = 0;
@@ -272,6 +323,7 @@ typedef struct ThreadData {
     float **ringbuffer;
     float **temp_src;
     FFTComplex **temp_fft;
+    FFTComplex **temp_afft;
 } ThreadData;
 
 static int sofalizer_convolute(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
@@ -286,9 +338,12 @@ static int sofalizer_convolute(AVFilterContext *ctx, void *arg, int jobnr, int n
     int *n_clippings = &td->n_clippings[jobnr];
     float *ringbuffer = td->ringbuffer[jobnr];
     float *temp_src = td->temp_src[jobnr];
-    const int n_samples = s->sofa.n_samples; /* length of one IR */
-    const float *src = (const float *)in->data[0]; /* get pointer to audio input buffer */
-    float *dst = (float *)out->data[0]; /* get pointer to audio output buffer */
+    const int ir_samples = s->sofa.ir_samples; /* length of one IR */
+    const int n_samples = s->sofa.n_samples;
+    const int planar = in->format == AV_SAMPLE_FMT_FLTP;
+    const int mult = 1 + !planar;
+    const float *src = (const float *)in->extended_data[0]; /* get pointer to audio input buffer */
+    float *dst = (float *)out->extended_data[jobnr * planar]; /* get pointer to audio output buffer */
     const int in_channels = s->n_conv; /* number of input channels */
     /* ring buffer length is: longest IR plus max. delay -> next power of 2 */
     const int buffer_length = s->buffer_length;
@@ -299,7 +354,9 @@ static int sofalizer_convolute(AVFilterContext *ctx, void *arg, int jobnr, int n
     int read;
     int i, l;
 
-    dst += offset;
+    if (!planar)
+        dst += offset;
+
     for (l = 0; l < in_channels; l++) {
         /* get starting address of ringbuffer for each input channel */
         buffer[l] = ringbuffer + l * buffer_length;
@@ -309,9 +366,18 @@ static int sofalizer_convolute(AVFilterContext *ctx, void *arg, int jobnr, int n
         const float *temp_ir = ir; /* using same set of IRs for each sample */
 
         dst[0] = 0;
-        for (l = 0; l < in_channels; l++) {
-            /* write current input sample to ringbuffer (for each channel) */
-            buffer[l][wr] = src[l];
+        if (planar) {
+            for (l = 0; l < in_channels; l++) {
+                const float *srcp = (const float *)in->extended_data[l];
+
+                /* write current input sample to ringbuffer (for each channel) */
+                buffer[l][wr] = srcp[i];
+            }
+        } else {
+            for (l = 0; l < in_channels; l++) {
+                /* write current input sample to ringbuffer (for each channel) */
+                buffer[l][wr] = src[l];
+            }
         }
 
         /* loop goes through all channels to be convolved */
@@ -321,36 +387,36 @@ static int sofalizer_convolute(AVFilterContext *ctx, void *arg, int jobnr, int n
             if (l == s->lfe_channel) {
                 /* LFE is an input channel but requires no convolution */
                 /* apply gain to LFE signal and add to output buffer */
-                *dst += *(buffer[s->lfe_channel] + wr) * s->gain_lfe;
-                temp_ir += FFALIGN(n_samples, 32);
+                dst[0] += *(buffer[s->lfe_channel] + wr) * s->gain_lfe;
+                temp_ir += n_samples;
                 continue;
             }
 
             /* current read position in ringbuffer: input sample write position
              * - delay for l-th ch. + diff. betw. IR length and buffer length
              * (mod buffer length) */
-            read = (wr - delay[l] - (n_samples - 1) + buffer_length) & modulo;
+            read = (wr - delay[l] - (ir_samples - 1) + buffer_length) & modulo;
 
-            if (read + n_samples < buffer_length) {
-                memmove(temp_src, bptr + read, n_samples * sizeof(*temp_src));
+            if (read + ir_samples < buffer_length) {
+                memmove(temp_src, bptr + read, ir_samples * sizeof(*temp_src));
             } else {
-                int len = FFMIN(n_samples - (read % n_samples), buffer_length - read);
+                int len = FFMIN(n_samples - (read % ir_samples), buffer_length - read);
 
                 memmove(temp_src, bptr + read, len * sizeof(*temp_src));
                 memmove(temp_src + len, bptr, (n_samples - len) * sizeof(*temp_src));
             }
 
             /* multiply signal and IR, and add up the results */
-            dst[0] += s->fdsp->scalarproduct_float(temp_ir, temp_src, n_samples);
-            temp_ir += FFALIGN(n_samples, 32);
+            dst[0] += s->fdsp->scalarproduct_float(temp_ir, temp_src, FFALIGN(ir_samples, 32));
+            temp_ir += n_samples;
         }
 
         /* clippings counter */
-        if (fabs(dst[0]) > 1)
-            *n_clippings += 1;
+        if (fabsf(dst[0]) > 1)
+            n_clippings[0]++;
 
         /* move output buffer pointer by +2 to get to next sample of processed channel: */
-        dst += 2;
+        dst += mult;
         src += in_channels;
         wr   = (wr + 1) & modulo; /* update ringbuffer write position */
     }
@@ -370,15 +436,17 @@ static int sofalizer_fast_convolute(AVFilterContext *ctx, void *arg, int jobnr,
     FFTComplex *hrtf = s->data_hrtf[jobnr]; /* get pointers to current HRTF data */
     int *n_clippings = &td->n_clippings[jobnr];
     float *ringbuffer = td->ringbuffer[jobnr];
-    const int n_samples = s->sofa.n_samples; /* length of one IR */
-    const float *src = (const float *)in->data[0]; /* get pointer to audio input buffer */
-    float *dst = (float *)out->data[0]; /* get pointer to audio output buffer */
+    const int ir_samples = s->sofa.ir_samples; /* length of one IR */
+    const int planar = in->format == AV_SAMPLE_FMT_FLTP;
+    const int mult = 1 + !planar;
+    float *dst = (float *)out->extended_data[jobnr * planar]; /* get pointer to audio output buffer */
     const int in_channels = s->n_conv; /* number of input channels */
     /* ring buffer length is: longest IR plus max. delay -> next power of 2 */
     const int buffer_length = s->buffer_length;
     /* -1 for AND instead of MODULO (applied to powers of 2): */
     const uint32_t modulo = (uint32_t)buffer_length - 1;
     FFTComplex *fft_in = s->temp_fft[jobnr]; /* temporary array for FFT input/output data */
+    FFTComplex *fft_acc = s->temp_afft[jobnr];
     FFTContext *ifft = s->ifft[jobnr];
     FFTContext *fft = s->fft[jobnr];
     const int n_conv = s->n_conv;
@@ -389,29 +457,42 @@ static int sofalizer_fast_convolute(AVFilterContext *ctx, void *arg, int jobnr,
     int n_read;
     int i, j;
 
-    dst += offset;
+    if (!planar)
+        dst += offset;
 
     /* find minimum between number of samples and output buffer length:
      * (important, if one IR is longer than the output buffer) */
-    n_read = FFMIN(s->sofa.n_samples, in->nb_samples);
+    n_read = FFMIN(ir_samples, in->nb_samples);
     for (j = 0; j < n_read; j++) {
         /* initialize output buf with saved signal from overflow buf */
-        dst[2 * j]     = ringbuffer[wr];
-        ringbuffer[wr] = 0.0; /* re-set read samples to zero */
+        dst[mult * j]  = ringbuffer[wr];
+        ringbuffer[wr] = 0.0f; /* re-set read samples to zero */
         /* update ringbuffer read/write position */
         wr  = (wr + 1) & modulo;
     }
 
     /* initialize rest of output buffer with 0 */
     for (j = n_read; j < in->nb_samples; j++) {
-        dst[2 * j] = 0;
+        dst[mult * j] = 0;
     }
 
+    /* fill FFT accumulation with 0 */
+    memset(fft_acc, 0, sizeof(FFTComplex) * n_fft);
+
     for (i = 0; i < n_conv; i++) {
+        const float *src = (const float *)in->extended_data[i * planar]; /* get pointer to audio input buffer */
+
         if (i == s->lfe_channel) { /* LFE */
-            for (j = 0; j < in->nb_samples; j++) {
-                /* apply gain to LFE signal and add to output buffer */
-                dst[2 * j] += src[i + j * in_channels] * s->gain_lfe;
+            if (in->format == AV_SAMPLE_FMT_FLT) {
+                for (j = 0; j < in->nb_samples; j++) {
+                    /* apply gain to LFE signal and add to output buffer */
+                    dst[2 * j] += src[i + j * in_channels] * s->gain_lfe;
+                }
+            } else {
+                for (j = 0; j < in->nb_samples; j++) {
+                    /* apply gain to LFE signal and add to output buffer */
+                    dst[j] += src[j] * s->gain_lfe;
+                }
             }
             continue;
         }
@@ -423,10 +504,18 @@ static int sofalizer_fast_convolute(AVFilterContext *ctx, void *arg, int jobnr,
         /* fill FFT input with 0 (we want to zero-pad) */
         memset(fft_in, 0, sizeof(FFTComplex) * n_fft);
 
-        for (j = 0; j < in->nb_samples; j++) {
-            /* prepare input for FFT */
-            /* write all samples of current input channel to FFT input array */
-            fft_in[j].re = src[j * in_channels + i];
+        if (in->format == AV_SAMPLE_FMT_FLT) {
+            for (j = 0; j < in->nb_samples; j++) {
+                /* prepare input for FFT */
+                /* write all samples of current input channel to FFT input array */
+                fft_in[j].re = src[j * in_channels + i];
+            }
+        } else {
+            for (j = 0; j < in->nb_samples; j++) {
+                /* prepare input for FFT */
+                /* write all samples of current input channel to FFT input array */
+                fft_in[j].re = src[j];
+            }
         }
 
         /* transform input signal of current channel to frequency domain */
@@ -439,37 +528,34 @@ static int sofalizer_fast_convolute(AVFilterContext *ctx, void *arg, int jobnr,
 
             /* complex multiplication of input signal and HRTFs */
             /* output channel (real): */
-            fft_in[j].re = re * hcomplex->re - im * hcomplex->im;
+            fft_acc[j].re += re * hcomplex->re - im * hcomplex->im;
             /* output channel (imag): */
-            fft_in[j].im = re * hcomplex->im + im * hcomplex->re;
+            fft_acc[j].im += re * hcomplex->im + im * hcomplex->re;
         }
+    }
 
-        /* transform output signal of current channel back to time domain */
-        av_fft_permute(ifft, fft_in);
-        av_fft_calc(ifft, fft_in);
+    /* transform output signal of current channel back to time domain */
+    av_fft_permute(ifft, fft_acc);
+    av_fft_calc(ifft, fft_acc);
 
-        for (j = 0; j < in->nb_samples; j++) {
-            /* write output signal of current channel to output buffer */
-            dst[2 * j] += fft_in[j].re * fft_scale;
-        }
+    for (j = 0; j < in->nb_samples; j++) {
+        /* write output signal of current channel to output buffer */
+        dst[mult * j] += fft_acc[j].re * fft_scale;
+    }
 
-        for (j = 0; j < n_samples - 1; j++) { /* overflow length is IR length - 1 */
-            /* write the rest of output signal to overflow buffer */
-            int write_pos = (wr + j) & modulo;
+    for (j = 0; j < ir_samples - 1; j++) { /* overflow length is IR length - 1 */
+        /* write the rest of output signal to overflow buffer */
+        int write_pos = (wr + j) & modulo;
 
-            *(ringbuffer + write_pos) += fft_in[in->nb_samples + j].re * fft_scale;
-        }
+        *(ringbuffer + write_pos) += fft_acc[in->nb_samples + j].re * fft_scale;
     }
 
     /* go through all samples of current output buffer: count clippings */
     for (i = 0; i < out->nb_samples; i++) {
         /* clippings counter */
-        if (fabs(*dst) > 1) { /* if current output sample > 1 */
+        if (fabsf(dst[i * mult]) > 1) { /* if current output sample > 1 */
             n_clippings[0]++;
         }
-
-        /* move output buffer pointer by +2 to get to next sample of processed channel: */
-        dst += 2;
     }
 
     /* remember read/write position in ringbuffer for next call */
@@ -498,10 +584,11 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
     td.delay = s->delay; td.ir = s->data_ir; td.n_clippings = n_clippings;
     td.ringbuffer = s->ringbuffer; td.temp_src = s->temp_src;
     td.temp_fft = s->temp_fft;
+    td.temp_afft = s->temp_afft;
 
     if (s->type == TIME_DOMAIN) {
         ctx->internal->execute(ctx, sofalizer_convolute, &td, NULL, 2);
-    } else {
+    } else if (s->type == FREQUENCY_DOMAIN) {
         ctx->internal->execute(ctx, sofalizer_fast_convolute, &td, NULL, 2);
     }
     emms_c();
@@ -522,10 +609,14 @@ static int query_formats(AVFilterContext *ctx)
     AVFilterFormats *formats = NULL;
     AVFilterChannelLayouts *layouts = NULL;
     int ret, sample_rates[] = { 48000, -1 };
+    static const enum AVSampleFormat sample_fmts[] = {
+        AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
+        AV_SAMPLE_FMT_NONE
+    };
 
-    ret = ff_add_format(&formats, AV_SAMPLE_FMT_FLT);
-    if (ret)
-        return ret;
+    formats = ff_make_format_list(sample_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
     ret = ff_set_common_formats(ctx, formats);
     if (ret)
         return ret;
@@ -554,10 +645,55 @@ static int query_formats(AVFilterContext *ctx)
     return ff_set_common_samplerates(ctx, formats);
 }
 
+static int getfilter_float(AVFilterContext *ctx, float x, float y, float z,
+                           float *left, float *right,
+                           float *delay_left, float *delay_right)
+{
+    struct SOFAlizerContext *s = ctx->priv;
+    float c[3], delays[2];
+    float *fl, *fr;
+    int nearest;
+    int *neighbors;
+    float *res;
+
+    c[0] = x, c[1] = y, c[2] = z;
+    nearest = mysofa_lookup(s->sofa.lookup, c);
+    if (nearest < 0)
+        return AVERROR(EINVAL);
+
+    if (s->interpolate) {
+        neighbors = mysofa_neighborhood(s->sofa.neighborhood, nearest);
+        res = mysofa_interpolate(s->sofa.hrtf, c,
+                                 nearest, neighbors,
+                                 s->sofa.fir, delays);
+    } else {
+        if (s->sofa.hrtf->DataDelay.elements > s->sofa.hrtf->R) {
+            delays[0] = s->sofa.hrtf->DataDelay.values[nearest * s->sofa.hrtf->R];
+            delays[1] = s->sofa.hrtf->DataDelay.values[nearest * s->sofa.hrtf->R + 1];
+        } else {
+            delays[0] = s->sofa.hrtf->DataDelay.values[0];
+            delays[1] = s->sofa.hrtf->DataDelay.values[1];
+        }
+        res = s->sofa.hrtf->DataIR.values + nearest * s->sofa.hrtf->N * s->sofa.hrtf->R;
+    }
+
+    *delay_left  = delays[0];
+    *delay_right = delays[1];
+
+    fl = res;
+    fr = res + s->sofa.hrtf->N;
+
+    memcpy(left, fl, sizeof(float) * s->sofa.hrtf->N);
+    memcpy(right, fr, sizeof(float) * s->sofa.hrtf->N);
+
+    return 0;
+}
+
 static int load_data(AVFilterContext *ctx, int azim, int elev, float radius, int sample_rate)
 {
     struct SOFAlizerContext *s = ctx->priv;
     int n_samples;
+    int ir_samples;
     int n_conv = s->n_conv; /* no. channels to convolve */
     int n_fft;
     float delay_l; /* broadband delay for each IR */
@@ -572,39 +708,46 @@ static int load_data(AVFilterContext *ctx, int azim, int elev, float radius, int
     float *data_ir_r = NULL;
     int offset = 0; /* used for faster pointer arithmetics in for-loop */
     int i, j, azim_orig = azim, elev_orig = elev;
-    int filter_length, ret = 0;
+    int ret = 0;
     int n_current;
     int n_max = 0;
 
-    s->sofa.easy = mysofa_open(s->filename, sample_rate, &filter_length, &ret);
-    if (!s->sofa.easy || ret) { /* if an invalid SOFA file has been selected */
-        av_log(ctx, AV_LOG_ERROR, "Selected SOFA file is invalid. Please select valid SOFA file.\n");
-        return AVERROR_INVALIDDATA;
-    }
+    av_log(ctx, AV_LOG_DEBUG, "IR length: %d.\n", s->sofa.hrtf->N);
+    s->sofa.ir_samples = s->sofa.hrtf->N;
+    s->sofa.n_samples = 1 << (32 - ff_clz(s->sofa.ir_samples));
 
     n_samples = s->sofa.n_samples;
+    ir_samples = s->sofa.ir_samples;
+
+    if (s->type == TIME_DOMAIN) {
+        s->data_ir[0] = av_calloc(n_samples, sizeof(float) * s->n_conv);
+        s->data_ir[1] = av_calloc(n_samples, sizeof(float) * s->n_conv);
+
+        if (!s->data_ir[0] || !s->data_ir[1]) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+    }
 
-    s->data_ir[0] = av_calloc(FFALIGN(n_samples, 32), sizeof(float) * s->n_conv);
-    s->data_ir[1] = av_calloc(FFALIGN(n_samples, 32), sizeof(float) * s->n_conv);
     s->delay[0] = av_calloc(s->n_conv, sizeof(int));
     s->delay[1] = av_calloc(s->n_conv, sizeof(int));
 
-    if (!s->data_ir[0] || !s->data_ir[1] || !s->delay[0] || !s->delay[1]) {
+    if (!s->delay[0] || !s->delay[1]) {
         ret = AVERROR(ENOMEM);
         goto fail;
     }
 
     /* get temporary IR for L and R channel */
-    data_ir_l = av_calloc(n_conv * FFALIGN(n_samples, 32), sizeof(*data_ir_l));
-    data_ir_r = av_calloc(n_conv * FFALIGN(n_samples, 32), sizeof(*data_ir_r));
+    data_ir_l = av_calloc(n_conv * n_samples, sizeof(*data_ir_l));
+    data_ir_r = av_calloc(n_conv * n_samples, sizeof(*data_ir_r));
     if (!data_ir_r || !data_ir_l) {
         ret = AVERROR(ENOMEM);
         goto fail;
     }
 
     if (s->type == TIME_DOMAIN) {
-        s->temp_src[0] = av_calloc(FFALIGN(n_samples, 32), sizeof(float));
-        s->temp_src[1] = av_calloc(FFALIGN(n_samples, 32), sizeof(float));
+        s->temp_src[0] = av_calloc(n_samples, sizeof(float));
+        s->temp_src[1] = av_calloc(n_samples, sizeof(float));
         if (!s->temp_src[0] || !s->temp_src[1]) {
             ret = AVERROR(ENOMEM);
             goto fail;
@@ -638,10 +781,12 @@ static int load_data(AVFilterContext *ctx, int azim, int elev, float radius, int
         mysofa_s2c(coordinates);
 
         /* get id of IR closest to desired position */
-        mysofa_getfilter_float(s->sofa.easy, coordinates[0], coordinates[1], coordinates[2],
-                               data_ir_l + FFALIGN(n_samples, 32) * i,
-                               data_ir_r + FFALIGN(n_samples, 32) * i,
-                               &delay_l, &delay_r);
+        ret = getfilter_float(ctx, coordinates[0], coordinates[1], coordinates[2],
+                              data_ir_l + n_samples * i,
+                              data_ir_r + n_samples * i,
+                              &delay_l, &delay_r);
+        if (ret < 0)
+            goto fail;
 
         s->delay[0][i] = delay_l * sample_rate;
         s->delay[1][i] = delay_r * sample_rate;
@@ -651,24 +796,24 @@ static int load_data(AVFilterContext *ctx, int azim, int elev, float radius, int
 
     /* get size of ringbuffer (longest IR plus max. delay) */
     /* then choose next power of 2 for performance optimization */
-    n_current = s->sofa.n_samples + s->sofa.max_delay;
+    n_current = n_samples + s->sofa.max_delay;
     /* length of longest IR plus max. delay */
     n_max = FFMAX(n_max, n_current);
 
     /* buffer length is longest IR plus max. delay -> next power of 2
        (32 - count leading zeros gives required exponent)  */
     s->buffer_length = 1 << (32 - ff_clz(n_max));
-    s->n_fft = n_fft = 1 << (32 - ff_clz(n_max + sample_rate));
+    s->n_fft = n_fft = 1 << (32 - ff_clz(n_max + s->framesize));
 
     if (s->type == FREQUENCY_DOMAIN) {
         av_fft_end(s->fft[0]);
         av_fft_end(s->fft[1]);
-        s->fft[0] = av_fft_init(log2(s->n_fft), 0);
-        s->fft[1] = av_fft_init(log2(s->n_fft), 0);
+        s->fft[0] = av_fft_init(av_log2(s->n_fft), 0);
+        s->fft[1] = av_fft_init(av_log2(s->n_fft), 0);
         av_fft_end(s->ifft[0]);
         av_fft_end(s->ifft[1]);
-        s->ifft[0] = av_fft_init(log2(s->n_fft), 1);
-        s->ifft[1] = av_fft_init(log2(s->n_fft), 1);
+        s->ifft[0] = av_fft_init(av_log2(s->n_fft), 1);
+        s->ifft[1] = av_fft_init(av_log2(s->n_fft), 1);
 
         if (!s->fft[0] || !s->fft[1] || !s->ifft[0] || !s->ifft[1]) {
             av_log(ctx, AV_LOG_ERROR, "Unable to create FFT contexts of size %d.\n", s->n_fft);
@@ -680,7 +825,7 @@ static int load_data(AVFilterContext *ctx, int azim, int elev, float radius, int
     if (s->type == TIME_DOMAIN) {
         s->ringbuffer[0] = av_calloc(s->buffer_length, sizeof(float) * nb_input_channels);
         s->ringbuffer[1] = av_calloc(s->buffer_length, sizeof(float) * nb_input_channels);
-    } else {
+    } else if (s->type == FREQUENCY_DOMAIN) {
         /* get temporary HRTF memory for L and R channel */
         data_hrtf_l = av_malloc_array(n_fft, sizeof(*data_hrtf_l) * n_conv);
         data_hrtf_r = av_malloc_array(n_fft, sizeof(*data_hrtf_r) * n_conv);
@@ -693,7 +838,10 @@ static int load_data(AVFilterContext *ctx, int azim, int elev, float radius, int
         s->ringbuffer[1] = av_calloc(s->buffer_length, sizeof(float));
         s->temp_fft[0] = av_malloc_array(s->n_fft, sizeof(FFTComplex));
         s->temp_fft[1] = av_malloc_array(s->n_fft, sizeof(FFTComplex));
-        if (!s->temp_fft[0] || !s->temp_fft[1]) {
+        s->temp_afft[0] = av_malloc_array(s->n_fft, sizeof(FFTComplex));
+        s->temp_afft[1] = av_malloc_array(s->n_fft, sizeof(FFTComplex));
+        if (!s->temp_fft[0] || !s->temp_fft[1] ||
+            !s->temp_afft[0] || !s->temp_afft[1]) {
             ret = AVERROR(ENOMEM);
             goto fail;
         }
@@ -716,28 +864,28 @@ static int load_data(AVFilterContext *ctx, int azim, int elev, float radius, int
     for (i = 0; i < s->n_conv; i++) {
         float *lir, *rir;
 
-        offset = i * FFALIGN(n_samples, 32); /* no. samples already written */
+        offset = i * n_samples; /* no. samples already written */
 
         lir = data_ir_l + offset;
         rir = data_ir_r + offset;
 
         if (s->type == TIME_DOMAIN) {
-            for (j = 0; j < n_samples; j++) {
+            for (j = 0; j < ir_samples; j++) {
                 /* load reversed IRs of the specified source position
                  * sample-by-sample for left and right ear; and apply gain */
-                s->data_ir[0][offset + j] = lir[n_samples - 1 - j] * gain_lin;
-                s->data_ir[1][offset + j] = rir[n_samples - 1 - j] * gain_lin;
+                s->data_ir[0][offset + j] = lir[ir_samples - 1 - j] * gain_lin;
+                s->data_ir[1][offset + j] = rir[ir_samples - 1 - j] * gain_lin;
             }
-        } else {
+        } else if (s->type == FREQUENCY_DOMAIN) {
             memset(fft_in_l, 0, n_fft * sizeof(*fft_in_l));
             memset(fft_in_r, 0, n_fft * sizeof(*fft_in_r));
 
             offset = i * n_fft; /* no. samples already written */
-            for (j = 0; j < n_samples; j++) {
+            for (j = 0; j < ir_samples; j++) {
                 /* load non-reversed IRs of the specified source position
                  * sample-by-sample and apply gain,
                  * L channel is loaded to real part, R channel to imag part,
-                 * IRs ared shifted by L and R delay */
+                 * IRs are shifted by L and R delay */
                 fft_in_l[s->delay[0][i] + j].re = lir[j] * gain_lin;
                 fft_in_r[s->delay[1][i] + j].re = rir[j] * gain_lin;
             }
@@ -819,11 +967,11 @@ static int config_input(AVFilterLink *inlink)
     if (s->type == FREQUENCY_DOMAIN) {
         inlink->partial_buf_size =
         inlink->min_samples =
-        inlink->max_samples = inlink->sample_rate;
+        inlink->max_samples = s->framesize;
     }
 
-    /* gain -3 dB per channel, -6 dB to get LFE on a similar level */
-    s->gain_lfe = expf((s->gain - 3 * inlink->channels - 6 + s->lfe_gain) / 20 * M_LN10);
+    /* gain -3 dB per channel */
+    s->gain_lfe = expf((s->gain - 3 * inlink->channels + s->lfe_gain) / 20 * M_LN10);
 
     s->n_conv = inlink->channels;
 
@@ -846,6 +994,10 @@ static av_cold void uninit(AVFilterContext *ctx)
     av_fft_end(s->ifft[1]);
     av_fft_end(s->fft[0]);
     av_fft_end(s->fft[1]);
+    s->ifft[0] = NULL;
+    s->ifft[1] = NULL;
+    s->fft[0] = NULL;
+    s->fft[1] = NULL;
     av_freep(&s->delay[0]);
     av_freep(&s->delay[1]);
     av_freep(&s->data_ir[0]);
@@ -856,6 +1008,8 @@ static av_cold void uninit(AVFilterContext *ctx)
     av_freep(&s->speaker_elev);
     av_freep(&s->temp_src[0]);
     av_freep(&s->temp_src[1]);
+    av_freep(&s->temp_afft[0]);
+    av_freep(&s->temp_afft[1]);
     av_freep(&s->temp_fft[0]);
     av_freep(&s->temp_fft[1]);
     av_freep(&s->data_hrtf[0]);
@@ -871,12 +1025,18 @@ static const AVOption sofalizer_options[] = {
     { "gain",      "set gain in dB", OFFSET(gain),      AV_OPT_TYPE_FLOAT,  {.dbl=0},     -20,  40, .flags = FLAGS },
     { "rotation",  "set rotation"  , OFFSET(rotation),  AV_OPT_TYPE_FLOAT,  {.dbl=0},    -360, 360, .flags = FLAGS },
     { "elevation", "set elevation",  OFFSET(elevation), AV_OPT_TYPE_FLOAT,  {.dbl=0},     -90,  90, .flags = FLAGS },
-    { "radius",    "set radius",     OFFSET(radius),    AV_OPT_TYPE_FLOAT,  {.dbl=1},       0,   3, .flags = FLAGS },
+    { "radius",    "set radius",     OFFSET(radius),    AV_OPT_TYPE_FLOAT,  {.dbl=1},       0,   5, .flags = FLAGS },
     { "type",      "set processing", OFFSET(type),      AV_OPT_TYPE_INT,    {.i64=1},       0,   1, .flags = FLAGS, "type" },
     { "time",      "time domain",      0,               AV_OPT_TYPE_CONST,  {.i64=0},       0,   0, .flags = FLAGS, "type" },
     { "freq",      "frequency domain", 0,               AV_OPT_TYPE_CONST,  {.i64=1},       0,   0, .flags = FLAGS, "type" },
     { "speakers",  "set speaker custom positions", OFFSET(speakers_pos), AV_OPT_TYPE_STRING,  {.str=0},    0, 0, .flags = FLAGS },
-    { "lfegain",   "set lfe gain",                 OFFSET(lfe_gain),     AV_OPT_TYPE_FLOAT,   {.dbl=0},   -9, 9, .flags = FLAGS },
+    { "lfegain",   "set lfe gain",                 OFFSET(lfe_gain),     AV_OPT_TYPE_FLOAT,   {.dbl=0},  -20,40, .flags = FLAGS },
+    { "framesize", "set frame size", OFFSET(framesize), AV_OPT_TYPE_INT,    {.i64=1024},1024,96000, .flags = FLAGS },
+    { "normalize", "normalize IRs",  OFFSET(normalize), AV_OPT_TYPE_BOOL,   {.i64=1},       0,   1, .flags = FLAGS },
+    { "interpolate","interpolate IRs from neighbors",   OFFSET(interpolate),AV_OPT_TYPE_BOOL,    {.i64=0},       0,   1, .flags = FLAGS },
+    { "minphase",  "minphase IRs",   OFFSET(minphase),  AV_OPT_TYPE_BOOL,   {.i64=0},       0,   1, .flags = FLAGS },
+    { "anglestep", "set neighbor search angle step",    OFFSET(anglestep),  AV_OPT_TYPE_FLOAT,   {.dbl=.5},      0.01, 10, .flags = FLAGS },
+    { "radstep",   "set neighbor search radius step",   OFFSET(radstep),    AV_OPT_TYPE_FLOAT,   {.dbl=.01},     0.01,  1, .flags = FLAGS },
     { NULL }
 };
 
diff --git a/libavfilter/af_surround.c b/libavfilter/af_surround.c
index f29afecbfb640..fcd84155a7680 100644
--- a/libavfilter/af_surround.c
+++ b/libavfilter/af_surround.c
@@ -229,8 +229,8 @@ static int config_output(AVFilterLink *outlink)
 
 static void stereo_position(float a, float p, float *x, float *y)
 {
-      *x = av_clipf(a+FFMAX(0, sinf(p-M_PI_2))*FFDIFFSIGN(a,0), -1, 1);
-      *y = av_clipf(cosf(a*M_PI_2+M_PI)*cosf(M_PI_2-p/M_PI)*M_LN10+1, -1, 1);
+    *x = av_clipf(a+FFMAX(0, sinf(p-M_PI_2))*FFDIFFSIGN(a,0), -1, 1);
+    *y = av_clipf(cosf(a*M_PI_2+M_PI)*cosf(M_PI_2-p/M_PI)*M_LN10+1, -1, 1);
 }
 
 static inline void get_lfe(int output_lfe, int n, float lowcut, float highcut,
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 5e72803b13d66..c51ae0f3c70ff 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -55,6 +55,7 @@ extern AVFilter ff_af_ametadata;
 extern AVFilter ff_af_amix;
 extern AVFilter ff_af_amultiply;
 extern AVFilter ff_af_anequalizer;
+extern AVFilter ff_af_anlmdn;
 extern AVFilter ff_af_anull;
 extern AVFilter ff_af_apad;
 extern AVFilter ff_af_aperms;
@@ -134,6 +135,7 @@ extern AVFilter ff_asrc_anoisesrc;
 extern AVFilter ff_asrc_anullsrc;
 extern AVFilter ff_asrc_flite;
 extern AVFilter ff_asrc_hilbert;
+extern AVFilter ff_asrc_sinc;
 extern AVFilter ff_asrc_sine;
 
 extern AVFilter ff_asink_anullsink;
@@ -155,7 +157,9 @@ extern AVFilter ff_vf_bm3d;
 extern AVFilter ff_vf_boxblur;
 extern AVFilter ff_vf_boxblur_opencl;
 extern AVFilter ff_vf_bwdif;
+extern AVFilter ff_vf_chromahold;
 extern AVFilter ff_vf_chromakey;
+extern AVFilter ff_vf_chromashift;
 extern AVFilter ff_vf_ciescope;
 extern AVFilter ff_vf_codecview;
 extern AVFilter ff_vf_colorbalance;
@@ -180,6 +184,7 @@ extern AVFilter ff_vf_deband;
 extern AVFilter ff_vf_deblock;
 extern AVFilter ff_vf_decimate;
 extern AVFilter ff_vf_deconvolve;
+extern AVFilter ff_vf_dedot;
 extern AVFilter ff_vf_deflate;
 extern AVFilter ff_vf_deflicker;
 extern AVFilter ff_vf_deinterlace_qsv;
@@ -220,11 +225,13 @@ extern AVFilter ff_vf_fps;
 extern AVFilter ff_vf_framepack;
 extern AVFilter ff_vf_framerate;
 extern AVFilter ff_vf_framestep;
+extern AVFilter ff_vf_freezedetect;
 extern AVFilter ff_vf_frei0r;
 extern AVFilter ff_vf_fspp;
 extern AVFilter ff_vf_gblur;
 extern AVFilter ff_vf_geq;
 extern AVFilter ff_vf_gradfun;
+extern AVFilter ff_vf_graphmonitor;
 extern AVFilter ff_vf_greyedge;
 extern AVFilter ff_vf_haldclut;
 extern AVFilter ff_vf_hflip;
@@ -259,6 +266,7 @@ extern AVFilter ff_vf_lutrgb;
 extern AVFilter ff_vf_lutyuv;
 extern AVFilter ff_vf_maskedclamp;
 extern AVFilter ff_vf_maskedmerge;
+extern AVFilter ff_vf_maskfun;
 extern AVFilter ff_vf_mcdeint;
 extern AVFilter ff_vf_mergeplanes;
 extern AVFilter ff_vf_mestimate;
@@ -309,6 +317,7 @@ extern AVFilter ff_vf_removegrain;
 extern AVFilter ff_vf_removelogo;
 extern AVFilter ff_vf_repeatfields;
 extern AVFilter ff_vf_reverse;
+extern AVFilter ff_vf_rgbashift;
 extern AVFilter ff_vf_roberts;
 extern AVFilter ff_vf_roberts_opencl;
 extern AVFilter ff_vf_rotate;
@@ -325,6 +334,7 @@ extern AVFilter ff_vf_sendcmd;
 extern AVFilter ff_vf_separatefields;
 extern AVFilter ff_vf_setdar;
 extern AVFilter ff_vf_setfield;
+extern AVFilter ff_vf_setparams;
 extern AVFilter ff_vf_setpts;
 extern AVFilter ff_vf_setrange;
 extern AVFilter ff_vf_setsar;
@@ -361,8 +371,11 @@ extern AVFilter ff_vf_tlut2;
 extern AVFilter ff_vf_tmix;
 extern AVFilter ff_vf_tonemap;
 extern AVFilter ff_vf_tonemap_opencl;
+extern AVFilter ff_vf_tpad;
 extern AVFilter ff_vf_transpose;
 extern AVFilter ff_vf_transpose_npp;
+extern AVFilter ff_vf_transpose_opencl;
+extern AVFilter ff_vf_transpose_vaapi;
 extern AVFilter ff_vf_trim;
 extern AVFilter ff_vf_unpremultiply;
 extern AVFilter ff_vf_unsharp;
@@ -372,6 +385,7 @@ extern AVFilter ff_vf_vaguedenoiser;
 extern AVFilter ff_vf_vectorscope;
 extern AVFilter ff_vf_vflip;
 extern AVFilter ff_vf_vfrdet;
+extern AVFilter ff_vf_vibrance;
 extern AVFilter ff_vf_vidstabdetect;
 extern AVFilter ff_vf_vidstabtransform;
 extern AVFilter ff_vf_vignette;
@@ -382,7 +396,9 @@ extern AVFilter ff_vf_w3fdif;
 extern AVFilter ff_vf_waveform;
 extern AVFilter ff_vf_weave;
 extern AVFilter ff_vf_xbr;
+extern AVFilter ff_vf_xstack;
 extern AVFilter ff_vf_yadif;
+extern AVFilter ff_vf_yadif_cuda;
 extern AVFilter ff_vf_zmq;
 extern AVFilter ff_vf_zoompan;
 extern AVFilter ff_vf_zscale;
@@ -413,6 +429,7 @@ extern AVFilter ff_vsink_nullsink;
 /* multimedia filters */
 extern AVFilter ff_avf_abitscope;
 extern AVFilter ff_avf_adrawgraph;
+extern AVFilter ff_avf_agraphmonitor;
 extern AVFilter ff_avf_ahistogram;
 extern AVFilter ff_avf_aphasemeter;
 extern AVFilter ff_avf_avectorscope;
diff --git a/libavfilter/asrc_hilbert.c b/libavfilter/asrc_hilbert.c
index a3a395254fef3..a51c676c6fe14 100644
--- a/libavfilter/asrc_hilbert.c
+++ b/libavfilter/asrc_hilbert.c
@@ -67,6 +67,7 @@ static const AVOption hilbert_options[] = {
         { "cauchy",   "Cauchy",           0, AV_OPT_TYPE_CONST, {.i64=WFUNC_CAUCHY},   0, 0, FLAGS, "win_func" },
         { "parzen",   "Parzen",           0, AV_OPT_TYPE_CONST, {.i64=WFUNC_PARZEN},   0, 0, FLAGS, "win_func" },
         { "poisson",  "Poisson",          0, AV_OPT_TYPE_CONST, {.i64=WFUNC_POISSON},  0, 0, FLAGS, "win_func" },
+        { "bohman" ,  "Bohman",           0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BOHMAN},   0, 0, FLAGS, "win_func" },
     {NULL}
 };
 
diff --git a/libavfilter/asrc_sinc.c b/libavfilter/asrc_sinc.c
new file mode 100644
index 0000000000000..0135eb9023ba0
--- /dev/null
+++ b/libavfilter/asrc_sinc.c
@@ -0,0 +1,455 @@
+/*
+ * Copyright (c) 2008-2009 Rob Sykes <robs@users.sourceforge.net>
+ * Copyright (c) 2017 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/opt.h"
+
+#include "libavcodec/avfft.h"
+
+#include "audio.h"
+#include "avfilter.h"
+#include "internal.h"
+
+typedef struct SincContext {
+    const AVClass *class;
+
+    int sample_rate, nb_samples;
+    float att, beta, phase, Fc0, Fc1, tbw0, tbw1;
+    int num_taps[2];
+    int round;
+
+    int n, rdft_len;
+    float *coeffs;
+    int64_t pts;
+
+    RDFTContext *rdft, *irdft;
+} SincContext;
+
+static int request_frame(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    SincContext *s = ctx->priv;
+    const float *coeffs = s->coeffs;
+    AVFrame *frame = NULL;
+    int nb_samples;
+
+    nb_samples = FFMIN(s->nb_samples, s->n - s->pts);
+    if (nb_samples <= 0)
+        return AVERROR_EOF;
+
+    if (!(frame = ff_get_audio_buffer(outlink, nb_samples)))
+        return AVERROR(ENOMEM);
+
+    memcpy(frame->data[0], coeffs + s->pts, nb_samples * sizeof(float));
+
+    frame->pts = s->pts;
+    s->pts    += nb_samples;
+
+    return ff_filter_frame(outlink, frame);
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    SincContext *s = ctx->priv;
+    static const int64_t chlayouts[] = { AV_CH_LAYOUT_MONO, -1 };
+    int sample_rates[] = { s->sample_rate, -1 };
+    static const enum AVSampleFormat sample_fmts[] = { AV_SAMPLE_FMT_FLT,
+                                                       AV_SAMPLE_FMT_NONE };
+    AVFilterFormats *formats;
+    AVFilterChannelLayouts *layouts;
+    int ret;
+
+    formats = ff_make_format_list(sample_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_formats (ctx, formats);
+    if (ret < 0)
+        return ret;
+
+    layouts = avfilter_make_format64_list(chlayouts);
+    if (!layouts)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_channel_layouts(ctx, layouts);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_make_format_list(sample_rates);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    return ff_set_common_samplerates(ctx, formats);
+}
+
+static float bessel_I_0(float x)
+{
+    float term = 1, sum = 1, last_sum, x2 = x / 2;
+    int i = 1;
+
+    do {
+        float y = x2 / i++;
+
+        last_sum = sum;
+        sum += term *= y * y;
+    } while (sum != last_sum);
+
+    return sum;
+}
+
+static float *make_lpf(int num_taps, float Fc, float beta, float rho,
+                       float scale, int dc_norm)
+{
+    int i, m = num_taps - 1;
+    float *h = av_calloc(num_taps, sizeof(*h)), sum = 0;
+    float mult = scale / bessel_I_0(beta), mult1 = 1.f / (.5f * m + rho);
+
+    av_assert0(Fc >= 0 && Fc <= 1);
+
+    for (i = 0; i <= m / 2; i++) {
+        float z = i - .5f * m, x = z * M_PI, y = z * mult1;
+        h[i] = x ? sinf(Fc * x) / x : Fc;
+        sum += h[i] *= bessel_I_0(beta * sqrtf(1.f - y * y)) * mult;
+        if (m - i != i) {
+            h[m - i] = h[i];
+            sum += h[i];
+        }
+    }
+
+    for (i = 0; dc_norm && i < num_taps; i++)
+        h[i] *= scale / sum;
+
+    return h;
+}
+
+static float kaiser_beta(float att, float tr_bw)
+{
+    if (att >= 60.f) {
+        static const float coefs[][4] = {
+            {-6.784957e-10, 1.02856e-05, 0.1087556, -0.8988365 + .001},
+            {-6.897885e-10, 1.027433e-05, 0.10876, -0.8994658 + .002},
+            {-1.000683e-09, 1.030092e-05, 0.1087677, -0.9007898 + .003},
+            {-3.654474e-10, 1.040631e-05, 0.1087085, -0.8977766 + .006},
+            {8.106988e-09, 6.983091e-06, 0.1091387, -0.9172048 + .015},
+            {9.519571e-09, 7.272678e-06, 0.1090068, -0.9140768 + .025},
+            {-5.626821e-09, 1.342186e-05, 0.1083999, -0.9065452 + .05},
+            {-9.965946e-08, 5.073548e-05, 0.1040967, -0.7672778 + .085},
+            {1.604808e-07, -5.856462e-05, 0.1185998, -1.34824 + .1},
+            {-1.511964e-07, 6.363034e-05, 0.1064627, -0.9876665 + .18},
+        };
+        float realm = logf(tr_bw / .0005f) / logf(2.f);
+        float const *c0 = coefs[av_clip((int)realm, 0, FF_ARRAY_ELEMS(coefs) - 1)];
+        float const *c1 = coefs[av_clip(1 + (int)realm, 0, FF_ARRAY_ELEMS(coefs) - 1)];
+        float b0 = ((c0[0] * att + c0[1]) * att + c0[2]) * att + c0[3];
+        float b1 = ((c1[0] * att + c1[1]) * att + c1[2]) * att + c1[3];
+
+        return b0 + (b1 - b0) * (realm - (int)realm);
+    }
+    if (att > 50.f)
+        return .1102f * (att - 8.7f);
+    if (att > 20.96f)
+        return .58417f * powf(att - 20.96f, .4f) + .07886f * (att - 20.96f);
+    return 0;
+}
+
+static void kaiser_params(float att, float Fc, float tr_bw, float *beta, int *num_taps)
+{
+    *beta = *beta < 0.f ? kaiser_beta(att, tr_bw * .5f / Fc): *beta;
+    att = att < 60.f ? (att - 7.95f) / (2.285f * M_PI * 2.f) :
+        ((.0007528358f-1.577737e-05 * *beta) * *beta + 0.6248022f) * *beta + .06186902f;
+    *num_taps = !*num_taps ? ceilf(att/tr_bw + 1) : *num_taps;
+}
+
+static float *lpf(float Fn, float Fc, float tbw, int *num_taps, float att, float *beta, int round)
+{
+    int n = *num_taps;
+
+    if ((Fc /= Fn) <= 0.f || Fc >= 1.f) {
+        *num_taps = 0;
+        return NULL;
+    }
+
+    att = att ? att : 120.f;
+
+    kaiser_params(att, Fc, (tbw ? tbw / Fn : .05f) * .5f, beta, num_taps);
+
+    if (!n) {
+        n = *num_taps;
+        *num_taps = av_clip(n, 11, 32767);
+        if (round)
+            *num_taps = 1 + 2 * (int)((int)((*num_taps / 2) * Fc + .5f) / Fc + .5f);
+    }
+
+    return make_lpf(*num_taps |= 1, Fc, *beta, 0.f, 1.f, 0);
+}
+
+static void invert(float *h, int n)
+{
+    for (int i = 0; i < n; i++)
+        h[i] = -h[i];
+
+    h[(n - 1) / 2] += 1;
+}
+
+#define PACK(h, n)   h[1] = h[n]
+#define UNPACK(h, n) h[n] = h[1], h[n + 1] = h[1] = 0;
+#define SQR(a) ((a) * (a))
+
+static float safe_log(float x)
+{
+    av_assert0(x >= 0);
+    if (x)
+        return logf(x);
+    return -26;
+}
+
+static int fir_to_phase(SincContext *s, float **h, int *len, int *post_len, float phase)
+{
+    float *pi_wraps, *work, phase1 = (phase > 50.f ? 100.f - phase : phase) / 50.f;
+    int i, work_len, begin, end, imp_peak = 0, peak = 0;
+    float imp_sum = 0, peak_imp_sum = 0;
+    float prev_angle2 = 0, cum_2pi = 0, prev_angle1 = 0, cum_1pi = 0;
+
+    for (i = *len, work_len = 2 * 2 * 8; i > 1; work_len <<= 1, i >>= 1);
+
+    work = av_calloc(work_len + 2, sizeof(*work));    /* +2: (UN)PACK */
+    pi_wraps = av_calloc(((work_len + 2) / 2), sizeof(*pi_wraps));
+    if (!work || !pi_wraps)
+        return AVERROR(ENOMEM);
+
+    memcpy(work, *h, *len * sizeof(*work));
+
+    av_rdft_end(s->rdft);
+    av_rdft_end(s->irdft);
+    s->rdft = s->irdft = NULL;
+    s->rdft  = av_rdft_init(av_log2(work_len), DFT_R2C);
+    s->irdft = av_rdft_init(av_log2(work_len), IDFT_C2R);
+    if (!s->rdft || !s->irdft)
+        return AVERROR(ENOMEM);
+
+    av_rdft_calc(s->rdft, work);   /* Cepstral: */
+    UNPACK(work, work_len);
+
+    for (i = 0; i <= work_len; i += 2) {
+        float angle = atan2f(work[i + 1], work[i]);
+        float detect = 2 * M_PI;
+        float delta = angle - prev_angle2;
+        float adjust = detect * ((delta < -detect * .7f) - (delta > detect * .7f));
+
+        prev_angle2 = angle;
+        cum_2pi += adjust;
+        angle += cum_2pi;
+        detect = M_PI;
+        delta = angle - prev_angle1;
+        adjust = detect * ((delta < -detect * .7f) - (delta > detect * .7f));
+        prev_angle1 = angle;
+        cum_1pi += fabsf(adjust);        /* fabs for when 2pi and 1pi have combined */
+        pi_wraps[i >> 1] = cum_1pi;
+
+        work[i] = safe_log(sqrtf(SQR(work[i]) + SQR(work[i + 1])));
+        work[i + 1] = 0;
+    }
+
+    PACK(work, work_len);
+    av_rdft_calc(s->irdft, work);
+
+    for (i = 0; i < work_len; i++)
+        work[i] *= 2.f / work_len;
+
+    for (i = 1; i < work_len / 2; i++) {        /* Window to reject acausal components */
+        work[i] *= 2;
+        work[i + work_len / 2] = 0;
+    }
+    av_rdft_calc(s->rdft, work);
+
+    for (i = 2; i < work_len; i += 2)   /* Interpolate between linear & min phase */
+        work[i + 1] = phase1 * i / work_len * pi_wraps[work_len >> 1] + (1 - phase1) * (work[i + 1] + pi_wraps[i >> 1]) - pi_wraps[i >> 1];
+
+    work[0] = exp(work[0]);
+    work[1] = exp(work[1]);
+    for (i = 2; i < work_len; i += 2) {
+        float x = expf(work[i]);
+
+        work[i    ] = x * cosf(work[i + 1]);
+        work[i + 1] = x * sinf(work[i + 1]);
+    }
+
+    av_rdft_calc(s->irdft, work);
+    for (i = 0; i < work_len; i++)
+        work[i] *= 2.f / work_len;
+
+    /* Find peak pos. */
+    for (i = 0; i <= (int) (pi_wraps[work_len >> 1] / M_PI + .5f); i++) {
+        imp_sum += work[i];
+        if (fabs(imp_sum) > fabs(peak_imp_sum)) {
+            peak_imp_sum = imp_sum;
+            peak = i;
+        }
+        if (work[i] > work[imp_peak])   /* For debug check only */
+            imp_peak = i;
+    }
+
+    while (peak && fabsf(work[peak - 1]) > fabsf(work[peak]) && (work[peak - 1] * work[peak] > 0)) {
+        peak--;
+    }
+
+    if (!phase1) {
+        begin = 0;
+    } else if (phase1 == 1) {
+        begin = peak - *len / 2;
+    } else {
+        begin = (.997f - (2 - phase1) * .22f) * *len + .5f;
+        end = (.997f + (0 - phase1) * .22f) * *len + .5f;
+        begin = peak - (begin & ~3);
+        end = peak + 1 + ((end + 3) & ~3);
+        *len = end - begin;
+        *h = av_realloc_f(*h, *len, sizeof(**h));
+        if (!*h) {
+            av_free(pi_wraps);
+            av_free(work);
+            return AVERROR(ENOMEM);
+        }
+    }
+
+    for (i = 0; i < *len; i++) {
+        (*h)[i] = work[(begin + (phase > 50.f ? *len - 1 - i : i) + work_len) & (work_len - 1)];
+    }
+    *post_len = phase > 50 ? peak - begin : begin + *len - (peak + 1);
+
+    av_log(s, AV_LOG_DEBUG, "%d nPI=%g peak-sum@%i=%g (val@%i=%g); len=%i post=%i (%g%%)\n",
+           work_len, pi_wraps[work_len >> 1] / M_PI, peak, peak_imp_sum, imp_peak,
+           work[imp_peak], *len, *post_len, 100.f - 100.f * *post_len / (*len - 1));
+
+    av_free(pi_wraps);
+    av_free(work);
+
+    return 0;
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    SincContext *s = ctx->priv;
+    float Fn = s->sample_rate * .5f;
+    float *h[2];
+    int i, n, post_peak, longer;
+
+    outlink->sample_rate = s->sample_rate;
+    s->pts = 0;
+
+    if (s->Fc0 >= Fn || s->Fc1 >= Fn) {
+        av_log(ctx, AV_LOG_ERROR,
+               "filter frequency must be less than %d/2.\n", s->sample_rate);
+        return AVERROR(EINVAL);
+    }
+
+    h[0] = lpf(Fn, s->Fc0, s->tbw0, &s->num_taps[0], s->att, &s->beta, s->round);
+    h[1] = lpf(Fn, s->Fc1, s->tbw1, &s->num_taps[1], s->att, &s->beta, s->round);
+
+    if (h[0])
+        invert(h[0], s->num_taps[0]);
+
+    longer = s->num_taps[1] > s->num_taps[0];
+    n = s->num_taps[longer];
+
+    if (h[0] && h[1]) {
+        for (i = 0; i < s->num_taps[!longer]; i++)
+            h[longer][i + (n - s->num_taps[!longer]) / 2] += h[!longer][i];
+
+        if (s->Fc0 < s->Fc1)
+            invert(h[longer], n);
+
+        av_free(h[!longer]);
+    }
+
+    if (s->phase != 50.f) {
+        int ret = fir_to_phase(s, &h[longer], &n, &post_peak, s->phase);
+        if (ret < 0)
+            return ret;
+    } else {
+        post_peak = n >> 1;
+    }
+
+    s->n = 1 << (av_log2(n) + 1);
+    s->rdft_len = 1 << av_log2(n);
+    s->coeffs = av_calloc(s->n, sizeof(*s->coeffs));
+    if (!s->coeffs)
+        return AVERROR(ENOMEM);
+
+    for (i = 0; i < n; i++)
+        s->coeffs[i] = h[longer][i];
+    av_free(h[longer]);
+
+    av_rdft_end(s->rdft);
+    av_rdft_end(s->irdft);
+    s->rdft = s->irdft = NULL;
+
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    SincContext *s = ctx->priv;
+
+    av_freep(&s->coeffs);
+    av_rdft_end(s->rdft);
+    av_rdft_end(s->irdft);
+    s->rdft = s->irdft = NULL;
+}
+
+static const AVFilterPad sinc_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_AUDIO,
+        .config_props  = config_output,
+        .request_frame = request_frame,
+    },
+    { NULL }
+};
+
+#define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+#define OFFSET(x) offsetof(SincContext, x)
+
+static const AVOption sinc_options[] = {
+    { "sample_rate", "set sample rate",                               OFFSET(sample_rate), AV_OPT_TYPE_INT,   {.i64=44100},  1, INT_MAX, AF },
+    { "r",           "set sample rate",                               OFFSET(sample_rate), AV_OPT_TYPE_INT,   {.i64=44100},  1, INT_MAX, AF },
+    { "nb_samples",  "set the number of samples per requested frame", OFFSET(nb_samples),  AV_OPT_TYPE_INT,   {.i64=1024},   1, INT_MAX, AF },
+    { "n",           "set the number of samples per requested frame", OFFSET(nb_samples),  AV_OPT_TYPE_INT,   {.i64=1024},   1, INT_MAX, AF },
+    { "hp",          "set high-pass filter frequency",                OFFSET(Fc0),         AV_OPT_TYPE_FLOAT, {.dbl=0},      0, INT_MAX, AF },
+    { "lp",          "set low-pass filter frequency",                 OFFSET(Fc1),         AV_OPT_TYPE_FLOAT, {.dbl=0},      0, INT_MAX, AF },
+    { "phase",       "set filter phase response",                     OFFSET(phase),       AV_OPT_TYPE_FLOAT, {.dbl=50},     0,     100, AF },
+    { "beta",        "set kaiser window beta",                        OFFSET(beta),        AV_OPT_TYPE_FLOAT, {.dbl=-1},    -1,     256, AF },
+    { "att",         "set stop-band attenuation",                     OFFSET(att),         AV_OPT_TYPE_FLOAT, {.dbl=120},   40,     180, AF },
+    { "round",       "enable rounding",                               OFFSET(round),       AV_OPT_TYPE_BOOL,  {.i64=0},      0,       1, AF },
+    { "hptaps",      "set number of taps for high-pass filter",       OFFSET(num_taps[0]), AV_OPT_TYPE_INT,   {.i64=0},      0,   32768, AF },
+    { "lptaps",      "set number of taps for low-pass filter",        OFFSET(num_taps[1]), AV_OPT_TYPE_INT,   {.i64=0},      0,   32768, AF },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(sinc);
+
+AVFilter ff_asrc_sinc = {
+    .name          = "sinc",
+    .description   = NULL_IF_CONFIG_SMALL("Generate a sinc kaiser-windowed low-pass, high-pass, band-pass, or band-reject FIR coefficients."),
+    .priv_size     = sizeof(SincContext),
+    .priv_class    = &sinc_class,
+    .query_formats = query_formats,
+    .uninit        = uninit,
+    .inputs        = NULL,
+    .outputs       = sinc_outputs,
+};
diff --git a/libavfilter/avf_showfreqs.c b/libavfilter/avf_showfreqs.c
index 22f28ec387157..ff6a762547fe7 100644
--- a/libavfilter/avf_showfreqs.c
+++ b/libavfilter/avf_showfreqs.c
@@ -118,6 +118,7 @@ static const AVOption showfreqs_options[] = {
         { "cauchy",   "Cauchy",           0, AV_OPT_TYPE_CONST, {.i64=WFUNC_CAUCHY},   0, 0, FLAGS, "win_func" },
         { "parzen",   "Parzen",           0, AV_OPT_TYPE_CONST, {.i64=WFUNC_PARZEN},   0, 0, FLAGS, "win_func" },
         { "poisson",  "Poisson",          0, AV_OPT_TYPE_CONST, {.i64=WFUNC_POISSON},  0, 0, FLAGS, "win_func" },
+        { "bohman",   "Bohman",           0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BOHMAN} ,  0, 0, FLAGS, "win_func" },
     { "overlap",  "set window overlap", OFFSET(overlap), AV_OPT_TYPE_FLOAT, {.dbl=1.}, 0., 1., FLAGS },
     { "averaging", "set time averaging", OFFSET(avg), AV_OPT_TYPE_INT, {.i64=1}, 0, INT32_MAX, FLAGS },
     { "colors", "set channels colors", OFFSET(colors), AV_OPT_TYPE_STRING, {.str = "red|green|blue|yellow|orange|lime|pink|magenta|brown" }, 0, 0, FLAGS },
diff --git a/libavfilter/avf_showspectrum.c b/libavfilter/avf_showspectrum.c
index 41693a0ce1538..e8d3f1ec8d22b 100644
--- a/libavfilter/avf_showspectrum.c
+++ b/libavfilter/avf_showspectrum.c
@@ -46,7 +46,7 @@
 enum DisplayMode  { COMBINED, SEPARATE, NB_MODES };
 enum DataMode     { D_MAGNITUDE, D_PHASE, NB_DMODES };
 enum DisplayScale { LINEAR, SQRT, CBRT, LOG, FOURTHRT, FIFTHRT, NB_SCALES };
-enum ColorMode    { CHANNEL, INTENSITY, RAINBOW, MORELAND, NEBULAE, FIRE, FIERY, FRUIT, COOL, MAGMA, GREEN, NB_CLMODES };
+enum ColorMode    { CHANNEL, INTENSITY, RAINBOW, MORELAND, NEBULAE, FIRE, FIERY, FRUIT, COOL, MAGMA, GREEN, VIRIDIS, PLASMA, CIVIDIS, TERRAIN, NB_CLMODES };
 enum SlideMode    { REPLACE, SCROLL, FULLFRAME, RSCROLL, NB_SLIDES };
 enum Orientation  { VERTICAL, HORIZONTAL, NB_ORIENTATIONS };
 
@@ -123,6 +123,10 @@ static const AVOption showspectrum_options[] = {
         { "cool",      "cool based coloring",             0, AV_OPT_TYPE_CONST, {.i64=COOL},      0, 0, FLAGS, "color" },
         { "magma",     "magma based coloring",            0, AV_OPT_TYPE_CONST, {.i64=MAGMA},     0, 0, FLAGS, "color" },
         { "green",     "green based coloring",            0, AV_OPT_TYPE_CONST, {.i64=GREEN},     0, 0, FLAGS, "color" },
+        { "viridis",   "viridis based coloring",          0, AV_OPT_TYPE_CONST, {.i64=VIRIDIS},   0, 0, FLAGS, "color" },
+        { "plasma",    "plasma based coloring",           0, AV_OPT_TYPE_CONST, {.i64=PLASMA},    0, 0, FLAGS, "color" },
+        { "cividis",   "cividis based coloring",          0, AV_OPT_TYPE_CONST, {.i64=CIVIDIS},   0, 0, FLAGS, "color" },
+        { "terrain",   "terrain based coloring",          0, AV_OPT_TYPE_CONST, {.i64=TERRAIN},   0, 0, FLAGS, "color" },
     { "scale", "set display scale", OFFSET(scale), AV_OPT_TYPE_INT, {.i64=SQRT}, LINEAR, NB_SCALES-1, FLAGS, "scale" },
         { "lin",  "linear",      0, AV_OPT_TYPE_CONST, {.i64=LINEAR}, 0, 0, FLAGS, "scale" },
         { "sqrt", "square root", 0, AV_OPT_TYPE_CONST, {.i64=SQRT},   0, 0, FLAGS, "scale" },
@@ -152,6 +156,7 @@ static const AVOption showspectrum_options[] = {
         { "cauchy",   "Cauchy",           0, AV_OPT_TYPE_CONST, {.i64=WFUNC_CAUCHY},   0, 0, FLAGS, "win_func" },
         { "parzen",   "Parzen",           0, AV_OPT_TYPE_CONST, {.i64=WFUNC_PARZEN},   0, 0, FLAGS, "win_func" },
         { "poisson",  "Poisson",          0, AV_OPT_TYPE_CONST, {.i64=WFUNC_POISSON},  0, 0, FLAGS, "win_func" },
+        { "bohman",   "Bohman",           0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BOHMAN},   0, 0, FLAGS, "win_func" },
     { "orientation", "set orientation", OFFSET(orientation), AV_OPT_TYPE_INT, {.i64=VERTICAL}, 0, NB_ORIENTATIONS-1, FLAGS, "orientation" },
         { "vertical",   NULL, 0, AV_OPT_TYPE_CONST, {.i64=VERTICAL},   0, 0, FLAGS, "orientation" },
         { "horizontal", NULL, 0, AV_OPT_TYPE_CONST, {.i64=HORIZONTAL}, 0, 0, FLAGS, "orientation" },
@@ -247,12 +252,43 @@ static const struct ColorTable {
     { 0.35,            85/256.,     (138-128)/256.,      (179-128)/256. },
     { 0.48,            96/256.,     (128-128)/256.,      (189-128)/256. },
     { 0.64,           128/256.,     (103-128)/256.,      (214-128)/256. },
-    { 0.78,           167/256.,      (85-128)/256.,      (174-128)/256. },
-    {    1,           205/256.,      (80-128)/256.,      (152-128)/256. }},
+    { 0.92,           205/256.,      (80-128)/256.,      (152-128)/256. },
+    {    1,                  1,                  0,                   0 }},
     [GREEN] = {
     {    0,                  0,                  0,                   0 },
     {  .75,                 .5,                  0,                 -.5 },
     {    1,                  1,                  0,                   0 }},
+    [VIRIDIS] = {
+    {    0,                  0,                  0,                   0 },
+    { 0.10,          0x39/255.,   (0x9D -128)/255.,    (0x8F -128)/255. },
+    { 0.23,          0x5C/255.,   (0x9A -128)/255.,    (0x68 -128)/255. },
+    { 0.35,          0x69/255.,   (0x93 -128)/255.,    (0x57 -128)/255. },
+    { 0.48,          0x76/255.,   (0x88 -128)/255.,    (0x4B -128)/255. },
+    { 0.64,          0x8A/255.,   (0x72 -128)/255.,    (0x4F -128)/255. },
+    { 0.80,          0xA3/255.,   (0x50 -128)/255.,    (0x66 -128)/255. },
+    {    1,          0xCC/255.,   (0x2F -128)/255.,    (0x87 -128)/255. }},
+    [PLASMA] = {
+    {    0,                  0,                  0,                   0 },
+    { 0.10,          0x27/255.,   (0xC2 -128)/255.,    (0x82 -128)/255. },
+    { 0.58,          0x5B/255.,   (0x9A -128)/255.,    (0xAE -128)/255. },
+    { 0.70,          0x89/255.,   (0x44 -128)/255.,    (0xAB -128)/255. },
+    { 0.80,          0xB4/255.,   (0x2B -128)/255.,    (0x9E -128)/255. },
+    { 0.91,          0xD2/255.,   (0x38 -128)/255.,    (0x92 -128)/255. },
+    {    1,                  1,                  0,                  0. }},
+    [CIVIDIS] = {
+    {    0,                  0,                  0,                   0 },
+    { 0.20,          0x28/255.,   (0x98 -128)/255.,    (0x6F -128)/255. },
+    { 0.50,          0x48/255.,   (0x95 -128)/255.,    (0x74 -128)/255. },
+    { 0.63,          0x69/255.,   (0x84 -128)/255.,    (0x7F -128)/255. },
+    { 0.76,          0x89/255.,   (0x75 -128)/255.,    (0x84 -128)/255. },
+    { 0.90,          0xCE/255.,   (0x35 -128)/255.,    (0x95 -128)/255. },
+    {    1,                  1,                  0,                  0. }},
+    [TERRAIN] = {
+    {    0,                  0,                  0,                   0 },
+    { 0.15,                  0,                 .5,                   0 },
+    { 0.60,                  1,                -.5,                 -.5 },
+    { 0.85,                  1,                -.5,                  .5 },
+    {    1,                  1,                  0,                   0 }},
 };
 
 static av_cold void uninit(AVFilterContext *ctx)
@@ -482,6 +518,10 @@ static void color_range(ShowSpectrumContext *s, int ch,
         case FRUIT:
         case COOL:
         case GREEN:
+        case VIRIDIS:
+        case PLASMA:
+        case CIVIDIS:
+        case TERRAIN:
         case MAGMA:
         case INTENSITY:
             *uf = *yf;
@@ -1396,6 +1436,10 @@ static const AVOption showspectrumpic_options[] = {
         { "cool",      "cool based coloring",             0, AV_OPT_TYPE_CONST, {.i64=COOL},      0, 0, FLAGS, "color" },
         { "magma",     "magma based coloring",            0, AV_OPT_TYPE_CONST, {.i64=MAGMA},     0, 0, FLAGS, "color" },
         { "green",     "green based coloring",            0, AV_OPT_TYPE_CONST, {.i64=GREEN},     0, 0, FLAGS, "color" },
+        { "viridis",   "viridis based coloring",          0, AV_OPT_TYPE_CONST, {.i64=VIRIDIS},   0, 0, FLAGS, "color" },
+        { "plasma",    "plasma based coloring",           0, AV_OPT_TYPE_CONST, {.i64=PLASMA},    0, 0, FLAGS, "color" },
+        { "cividis",   "cividis based coloring",          0, AV_OPT_TYPE_CONST, {.i64=CIVIDIS},   0, 0, FLAGS, "color" },
+        { "terrain",   "terrain based coloring",          0, AV_OPT_TYPE_CONST, {.i64=TERRAIN},   0, 0, FLAGS, "color" },
     { "scale", "set display scale", OFFSET(scale), AV_OPT_TYPE_INT, {.i64=LOG}, 0, NB_SCALES-1, FLAGS, "scale" },
         { "lin",  "linear",      0, AV_OPT_TYPE_CONST, {.i64=LINEAR}, 0, 0, FLAGS, "scale" },
         { "sqrt", "square root", 0, AV_OPT_TYPE_CONST, {.i64=SQRT},   0, 0, FLAGS, "scale" },
@@ -1425,6 +1469,7 @@ static const AVOption showspectrumpic_options[] = {
         { "cauchy",   "Cauchy",           0, AV_OPT_TYPE_CONST, {.i64=WFUNC_CAUCHY},   0, 0, FLAGS, "win_func" },
         { "parzen",   "Parzen",           0, AV_OPT_TYPE_CONST, {.i64=WFUNC_PARZEN},   0, 0, FLAGS, "win_func" },
         { "poisson",  "Poisson",          0, AV_OPT_TYPE_CONST, {.i64=WFUNC_POISSON},  0, 0, FLAGS, "win_func" },
+        { "bohman",   "Bohman",           0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BOHMAN},   0, 0, FLAGS, "win_func" },
     { "orientation", "set orientation", OFFSET(orientation), AV_OPT_TYPE_INT, {.i64=VERTICAL}, 0, NB_ORIENTATIONS-1, FLAGS, "orientation" },
         { "vertical",   NULL, 0, AV_OPT_TYPE_CONST, {.i64=VERTICAL},   0, 0, FLAGS, "orientation" },
         { "horizontal", NULL, 0, AV_OPT_TYPE_CONST, {.i64=HORIZONTAL}, 0, 0, FLAGS, "orientation" },
diff --git a/libavfilter/blend.h b/libavfilter/blend.h
index eb2022683985b..00db51838d511 100644
--- a/libavfilter/blend.h
+++ b/libavfilter/blend.h
@@ -74,7 +74,7 @@ typedef struct FilterParams {
                   struct FilterParams *param, double *values, int starty);
 } FilterParams;
 
-void ff_blend_init(FilterParams *param, int is_16bit);
-void ff_blend_init_x86(FilterParams *param, int is_16bit);
+void ff_blend_init(FilterParams *param, int depth);
+void ff_blend_init_x86(FilterParams *param, int depth);
 
 #endif /* AVFILTER_BLEND_H */
diff --git a/libavfilter/buffersink.c b/libavfilter/buffersink.c
index 0f87b5439acd0..f9b0b5e7d8685 100644
--- a/libavfilter/buffersink.c
+++ b/libavfilter/buffersink.c
@@ -320,7 +320,7 @@ static const AVOption buffersink_options[] = {
     { NULL },
 };
 #undef FLAGS
-#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_AUDIO_PARAM
 static const AVOption abuffersink_options[] = {
     { "sample_fmts",     "set the supported sample formats",  OFFSET(sample_fmts),     AV_OPT_TYPE_BINARY, .flags = FLAGS },
     { "sample_rates",    "set the supported sample rates",    OFFSET(sample_rates),    AV_OPT_TYPE_BINARY, .flags = FLAGS },
diff --git a/libavfilter/buffersrc.c b/libavfilter/buffersrc.c
index cd56f8ca457e4..e0ff7e4dd84c9 100644
--- a/libavfilter/buffersrc.c
+++ b/libavfilter/buffersrc.c
@@ -33,6 +33,7 @@
 #include "libavutil/internal.h"
 #include "libavutil/opt.h"
 #include "libavutil/samplefmt.h"
+#include "libavutil/timestamp.h"
 #include "audio.h"
 #include "avfilter.h"
 #include "buffersrc.h"
@@ -67,15 +68,20 @@ typedef struct BufferSourceContext {
     int eof;
 } BufferSourceContext;
 
-#define CHECK_VIDEO_PARAM_CHANGE(s, c, width, height, format)\
+#define CHECK_VIDEO_PARAM_CHANGE(s, c, width, height, format, pts)\
     if (c->w != width || c->h != height || c->pix_fmt != format) {\
-        av_log(s, AV_LOG_INFO, "Changing frame properties on the fly is not supported by all filters.\n");\
+        av_log(s, AV_LOG_INFO, "filter context - w: %d h: %d fmt: %d, incoming frame - w: %d h: %d fmt: %d pts_time: %s\n",\
+               c->w, c->h, c->pix_fmt, width, height, format, av_ts2timestr(pts, &s->outputs[0]->time_base));\
+        av_log(s, AV_LOG_WARNING, "Changing video frame properties on the fly is not supported by all filters.\n");\
     }
 
-#define CHECK_AUDIO_PARAM_CHANGE(s, c, srate, ch_layout, ch_count, format)\
+#define CHECK_AUDIO_PARAM_CHANGE(s, c, srate, ch_layout, ch_count, format, pts)\
     if (c->sample_fmt != format || c->sample_rate != srate ||\
         c->channel_layout != ch_layout || c->channels != ch_count) {\
-        av_log(s, AV_LOG_ERROR, "Changing frame properties on the fly is not supported.\n");\
+        av_log(s, AV_LOG_INFO, "filter context - fmt: %s r: %d layout: %"PRIX64" ch: %d, incoming frame - fmt: %s r: %d layout: %"PRIX64" ch: %d pts_time: %s\n",\
+               av_get_sample_fmt_name(c->sample_fmt), c->sample_rate, c->channel_layout, c->channels,\
+               av_get_sample_fmt_name(format), srate, ch_layout, ch_count, av_ts2timestr(pts, &s->outputs[0]->time_base));\
+        av_log(s, AV_LOG_ERROR, "Changing audio frame properties on the fly is not supported.\n");\
         return AVERROR(EINVAL);\
     }
 
@@ -205,21 +211,21 @@ static int av_buffersrc_add_frame_internal(AVFilterContext *ctx,
 
     if (!(flags & AV_BUFFERSRC_FLAG_NO_CHECK_FORMAT)) {
 
-    switch (ctx->outputs[0]->type) {
-    case AVMEDIA_TYPE_VIDEO:
-        CHECK_VIDEO_PARAM_CHANGE(ctx, s, frame->width, frame->height,
-                                 frame->format);
-        break;
-    case AVMEDIA_TYPE_AUDIO:
-        /* For layouts unknown on input but known on link after negotiation. */
-        if (!frame->channel_layout)
-            frame->channel_layout = s->channel_layout;
-        CHECK_AUDIO_PARAM_CHANGE(ctx, s, frame->sample_rate, frame->channel_layout,
-                                 frame->channels, frame->format);
-        break;
-    default:
-        return AVERROR(EINVAL);
-    }
+        switch (ctx->outputs[0]->type) {
+        case AVMEDIA_TYPE_VIDEO:
+            CHECK_VIDEO_PARAM_CHANGE(ctx, s, frame->width, frame->height,
+                                     frame->format, frame->pts);
+            break;
+        case AVMEDIA_TYPE_AUDIO:
+            /* For layouts unknown on input but known on link after negotiation. */
+            if (!frame->channel_layout)
+                frame->channel_layout = s->channel_layout;
+            CHECK_AUDIO_PARAM_CHANGE(ctx, s, frame->sample_rate, frame->channel_layout,
+                                     frame->channels, frame->format, frame->pts);
+            break;
+        default:
+            return AVERROR(EINVAL);
+        }
 
     }
 
diff --git a/libavfilter/bwdif.h b/libavfilter/bwdif.h
index 8b42c760a0468..889ff772edd8d 100644
--- a/libavfilter/bwdif.h
+++ b/libavfilter/bwdif.h
@@ -21,36 +21,10 @@
 
 #include "libavutil/pixdesc.h"
 #include "avfilter.h"
-
-enum BWDIFMode {
-    BWDIF_MODE_SEND_FRAME = 0, ///< send 1 frame for each frame
-    BWDIF_MODE_SEND_FIELD = 1, ///< send 1 frame for each field
-};
-
-enum BWDIFParity {
-    BWDIF_PARITY_TFF  =  0, ///< top field first
-    BWDIF_PARITY_BFF  =  1, ///< bottom field first
-    BWDIF_PARITY_AUTO = -1, ///< auto detection
-};
-
-enum BWDIFDeint {
-    BWDIF_DEINT_ALL        = 0, ///< deinterlace all frames
-    BWDIF_DEINT_INTERLACED = 1, ///< only deinterlace frames marked as interlaced
-};
+#include "yadif.h"
 
 typedef struct BWDIFContext {
-    const AVClass *class;
-
-    int mode;           ///< BWDIFMode
-    int parity;         ///< BWDIFParity
-    int deint;          ///< BWDIFDeint
-
-    int frame_pending;
-
-    AVFrame *cur;
-    AVFrame *next;
-    AVFrame *prev;
-    AVFrame *out;
+    YADIFContext yadif;
 
     void (*filter_intra)(void *dst1, void *cur1, int w, int prefs, int mrefs,
                          int prefs3, int mrefs3, int parity, int clip_max);
@@ -61,10 +35,6 @@ typedef struct BWDIFContext {
     void (*filter_edge)(void *dst, void *prev, void *cur, void *next,
                         int w, int prefs, int mrefs, int prefs2, int mrefs2,
                         int parity, int clip_max, int spat);
-
-    const AVPixFmtDescriptor *csp;
-    int inter_field;
-    int eof;
 } BWDIFContext;
 
 void ff_bwdif_init_x86(BWDIFContext *bwdif);
diff --git a/libavfilter/ebur128.c b/libavfilter/ebur128.c
index e11008078d7f0..c8986fb5e1a07 100644
--- a/libavfilter/ebur128.c
+++ b/libavfilter/ebur128.c
@@ -368,7 +368,7 @@ EBUR128_FILTER(double, 1.0)
 
 static double ebur128_energy_to_loudness(double energy)
 {
-    return 10 * (log(energy) / log(10.0)) - 0.691;
+    return 10 * log10(energy) - 0.691;
 }
 
 static size_t find_histogram_index(double energy)
diff --git a/libavfilter/f_drawgraph.c b/libavfilter/f_drawgraph.c
index f49d5b8022012..955047368dd6d 100644
--- a/libavfilter/f_drawgraph.c
+++ b/libavfilter/f_drawgraph.c
@@ -20,6 +20,7 @@
 
 #include "float.h"
 
+#include "libavutil/avstring.h"
 #include "libavutil/eval.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/opt.h"
@@ -215,7 +216,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
         if (!e || !e->value)
             continue;
 
-        if (sscanf(e->value, "%f", &vf) != 1)
+        if (av_sscanf(e->value, "%f", &vf) != 1)
             continue;
 
         vf = av_clipf(vf, s->min, s->max);
diff --git a/libavfilter/f_ebur128.c b/libavfilter/f_ebur128.c
index e03adc9ba117b..f613d8def23f9 100644
--- a/libavfilter/f_ebur128.c
+++ b/libavfilter/f_ebur128.c
@@ -777,6 +777,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
                 int x, y, ret;
                 uint8_t *p;
                 double gauge_value;
+                int y_loudness_lu_graph, y_loudness_lu_gauge;
 
                 if (ebur128->gauge_type == GAUGE_TYPE_MOMENTARY) {
                     gauge_value = loudness_400 - ebur128->target;
@@ -784,8 +785,8 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
                     gauge_value = loudness_3000 - ebur128->target;
                 }
 
-                const int y_loudness_lu_graph = lu_to_y(ebur128, loudness_3000 - ebur128->target);
-                const int y_loudness_lu_gauge = lu_to_y(ebur128, gauge_value);
+                y_loudness_lu_graph = lu_to_y(ebur128, loudness_3000 - ebur128->target);
+                y_loudness_lu_gauge = lu_to_y(ebur128, gauge_value);
 
                 /* draw the graph using the short-term loudness */
                 p = pic->data[0] + ebur128->graph.y*pic->linesize[0] + ebur128->graph.x*3;
diff --git a/libavfilter/f_graphmonitor.c b/libavfilter/f_graphmonitor.c
new file mode 100644
index 0000000000000..c001835364b27
--- /dev/null
+++ b/libavfilter/f_graphmonitor.c
@@ -0,0 +1,425 @@
+/*
+ * Copyright (c) 2018 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "float.h"
+
+#include "libavutil/pixdesc.h"
+#include "libavutil/eval.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/opt.h"
+#include "libavutil/timestamp.h"
+#include "libavutil/xga_font_data.h"
+#include "avfilter.h"
+#include "filters.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+typedef struct GraphMonitorContext {
+    const AVClass *class;
+
+    int w, h;
+    float opacity;
+    int mode;
+    int flags;
+    AVRational frame_rate;
+
+    int64_t pts;
+    uint8_t white[4];
+    uint8_t yellow[4];
+    uint8_t red[4];
+    uint8_t green[4];
+    uint8_t bg[4];
+} GraphMonitorContext;
+
+enum {
+    MODE_QUEUE = 1 << 0,
+    MODE_FCIN  = 1 << 1,
+    MODE_FCOUT = 1 << 2,
+    MODE_PTS   = 1 << 3,
+    MODE_TIME  = 1 << 4,
+    MODE_TB    = 1 << 5,
+    MODE_FMT   = 1 << 6,
+    MODE_SIZE  = 1 << 7,
+    MODE_RATE  = 1 << 8,
+};
+
+#define OFFSET(x) offsetof(GraphMonitorContext, x)
+#define VF AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption graphmonitor_options[] = {
+    { "size", "set monitor size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, {.str="hd720"}, 0, 0, VF },
+    { "s",    "set monitor size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, {.str="hd720"}, 0, 0, VF },
+    { "opacity", "set video opacity", OFFSET(opacity), AV_OPT_TYPE_FLOAT, {.dbl=.9}, 0, 1, VF },
+    { "o",       "set video opacity", OFFSET(opacity), AV_OPT_TYPE_FLOAT, {.dbl=.9}, 0, 1, VF },
+    { "mode", "set mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, VF, "mode" },
+    { "m",    "set mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, VF, "mode" },
+        { "full",     NULL, 0, AV_OPT_TYPE_CONST, {.i64=0},   0, 0, VF, "mode" },
+        { "compact",  NULL, 0, AV_OPT_TYPE_CONST, {.i64=1},   0, 0, VF, "mode" },
+    { "flags", "set flags", OFFSET(flags), AV_OPT_TYPE_FLAGS, {.i64=MODE_QUEUE}, 0, INT_MAX, VF, "flags" },
+    { "f",     "set flags", OFFSET(flags), AV_OPT_TYPE_FLAGS, {.i64=MODE_QUEUE}, 0, INT_MAX, VF, "flags" },
+        { "queue",            NULL, 0, AV_OPT_TYPE_CONST, {.i64=MODE_QUEUE},   0, 0, VF, "flags" },
+        { "frame_count_in",   NULL, 0, AV_OPT_TYPE_CONST, {.i64=MODE_FCOUT},   0, 0, VF, "flags" },
+        { "frame_count_out",  NULL, 0, AV_OPT_TYPE_CONST, {.i64=MODE_FCIN},    0, 0, VF, "flags" },
+        { "pts",              NULL, 0, AV_OPT_TYPE_CONST, {.i64=MODE_PTS},     0, 0, VF, "flags" },
+        { "time",             NULL, 0, AV_OPT_TYPE_CONST, {.i64=MODE_TIME},    0, 0, VF, "flags" },
+        { "timebase",         NULL, 0, AV_OPT_TYPE_CONST, {.i64=MODE_TB},      0, 0, VF, "flags" },
+        { "format",           NULL, 0, AV_OPT_TYPE_CONST, {.i64=MODE_FMT},     0, 0, VF, "flags" },
+        { "size",             NULL, 0, AV_OPT_TYPE_CONST, {.i64=MODE_SIZE},    0, 0, VF, "flags" },
+        { "rate",             NULL, 0, AV_OPT_TYPE_CONST, {.i64=MODE_RATE},    0, 0, VF, "flags" },
+    { "rate", "set video rate", OFFSET(frame_rate), AV_OPT_TYPE_VIDEO_RATE, {.str = "25"}, 0, INT_MAX, VF },
+    { "r",    "set video rate", OFFSET(frame_rate), AV_OPT_TYPE_VIDEO_RATE, {.str = "25"}, 0, INT_MAX, VF },
+    { NULL }
+};
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterLink *outlink = ctx->outputs[0];
+    static const enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_RGBA,
+        AV_PIX_FMT_NONE
+    };
+    int ret;
+
+    AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
+    if ((ret = ff_formats_ref(fmts_list, &outlink->in_formats)) < 0)
+        return ret;
+
+    return 0;
+}
+
+static void clear_image(GraphMonitorContext *s, AVFrame *out, AVFilterLink *outlink)
+{
+    int bg = AV_RN32(s->bg);
+
+    for (int i = 0; i < out->height; i++)
+        for (int j = 0; j < out->width; j++)
+            AV_WN32(out->data[0] + i * out->linesize[0] + j * 4, bg);
+}
+
+static void drawtext(AVFrame *pic, int x, int y, const char *txt, uint8_t *color)
+{
+    const uint8_t *font;
+    int font_height;
+    int i;
+
+    font = avpriv_cga_font,   font_height =  8;
+
+    if (y + 8 >= pic->height ||
+        x + strlen(txt) * 8 >= pic->width)
+        return;
+
+    for (i = 0; txt[i]; i++) {
+        int char_y, mask;
+
+        uint8_t *p = pic->data[0] + y*pic->linesize[0] + (x + i*8)*4;
+        for (char_y = 0; char_y < font_height; char_y++) {
+            for (mask = 0x80; mask; mask >>= 1) {
+                if (font[txt[i] * font_height + char_y] & mask) {
+                    p[0] = color[0];
+                    p[1] = color[1];
+                    p[2] = color[2];
+                }
+                p += 4;
+            }
+            p += pic->linesize[0] - 8 * 4;
+        }
+    }
+}
+
+static int filter_have_queued(AVFilterContext *filter)
+{
+    for (int j = 0; j < filter->nb_inputs; j++) {
+        AVFilterLink *l = filter->inputs[j];
+        size_t frames = ff_inlink_queued_frames(l);
+
+        if (frames)
+            return 1;
+    }
+
+    for (int j = 0; j < filter->nb_outputs; j++) {
+        AVFilterLink *l = filter->outputs[j];
+        size_t frames = ff_inlink_queued_frames(l);
+
+        if (frames)
+            return 1;
+    }
+
+    return 0;
+}
+
+static void draw_items(AVFilterContext *ctx, AVFrame *out,
+                       int xpos, int ypos,
+                       AVFilterLink *l,
+                       size_t frames)
+{
+    GraphMonitorContext *s = ctx->priv;
+    char buffer[1024] = { 0 };
+
+    if (s->flags & MODE_FMT) {
+        if (l->type == AVMEDIA_TYPE_VIDEO) {
+            snprintf(buffer, sizeof(buffer)-1, " | format: %s",
+                     av_get_pix_fmt_name(l->format));
+        } else if (l->type == AVMEDIA_TYPE_AUDIO) {
+            snprintf(buffer, sizeof(buffer)-1, " | format: %s",
+                     av_get_sample_fmt_name(l->format));
+        }
+        drawtext(out, xpos, ypos, buffer, s->white);
+        xpos += strlen(buffer) * 8;
+    }
+    if (s->flags & MODE_SIZE) {
+        if (l->type == AVMEDIA_TYPE_VIDEO) {
+            snprintf(buffer, sizeof(buffer)-1, " | size: %dx%d", l->w, l->h);
+        } else if (l->type == AVMEDIA_TYPE_AUDIO) {
+            snprintf(buffer, sizeof(buffer)-1, " | channels: %d", l->channels);
+        }
+        drawtext(out, xpos, ypos, buffer, s->white);
+        xpos += strlen(buffer) * 8;
+    }
+    if (s->flags & MODE_RATE) {
+        if (l->type == AVMEDIA_TYPE_VIDEO) {
+            snprintf(buffer, sizeof(buffer)-1, " | fps: %d/%d", l->frame_rate.num, l->frame_rate.den);
+        } else if (l->type == AVMEDIA_TYPE_AUDIO) {
+            snprintf(buffer, sizeof(buffer)-1, " | samplerate: %d", l->sample_rate);
+        }
+        drawtext(out, xpos, ypos, buffer, s->white);
+        xpos += strlen(buffer) * 8;
+    }
+    if (s->flags & MODE_TB) {
+        snprintf(buffer, sizeof(buffer)-1, " | tb: %d/%d", l->time_base.num, l->time_base.den);
+        drawtext(out, xpos, ypos, buffer, s->white);
+        xpos += strlen(buffer) * 8;
+    }
+    if (s->flags & MODE_QUEUE) {
+        snprintf(buffer, sizeof(buffer)-1, " | queue: ");
+        drawtext(out, xpos, ypos, buffer, s->white);
+        xpos += strlen(buffer) * 8;
+        snprintf(buffer, sizeof(buffer)-1, "%"SIZE_SPECIFIER, frames);
+        drawtext(out, xpos, ypos, buffer, frames > 0 ? frames >= 10 ? frames >= 50 ? s->red : s->yellow : s->green : s->white);
+        xpos += strlen(buffer) * 8;
+    }
+    if (s->flags & MODE_FCIN) {
+        snprintf(buffer, sizeof(buffer)-1, " | in: %"PRId64, l->frame_count_in);
+        drawtext(out, xpos, ypos, buffer, s->white);
+        xpos += strlen(buffer) * 8;
+    }
+    if (s->flags & MODE_FCOUT) {
+        snprintf(buffer, sizeof(buffer)-1, " | out: %"PRId64, l->frame_count_out);
+        drawtext(out, xpos, ypos, buffer, s->white);
+        xpos += strlen(buffer) * 8;
+    }
+    if (s->flags & MODE_PTS) {
+        snprintf(buffer, sizeof(buffer)-1, " | pts: %s", av_ts2str(l->current_pts_us));
+        drawtext(out, xpos, ypos, buffer, s->white);
+        xpos += strlen(buffer) * 8;
+    }
+    if (s->flags & MODE_TIME) {
+        snprintf(buffer, sizeof(buffer)-1, " | time: %s", av_ts2timestr(l->current_pts_us, &AV_TIME_BASE_Q));
+        drawtext(out, xpos, ypos, buffer, s->white);
+        xpos += strlen(buffer) * 8;
+    }
+}
+
+static int create_frame(AVFilterContext *ctx, int64_t pts)
+{
+    GraphMonitorContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AVFrame *out;
+    int xpos, ypos = 0;
+
+    out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!out)
+        return AVERROR(ENOMEM);
+
+    clear_image(s, out, outlink);
+
+    for (int i = 0; i < ctx->graph->nb_filters; i++) {
+        AVFilterContext *filter = ctx->graph->filters[i];
+        char buffer[1024] = { 0 };
+
+        if (s->mode && !filter_have_queued(filter))
+            continue;
+
+        xpos = 0;
+        drawtext(out, xpos, ypos, filter->name, s->white);
+        xpos += strlen(filter->name) * 8 + 10;
+        drawtext(out, xpos, ypos, filter->filter->name, s->white);
+        ypos += 10;
+        for (int j = 0; j < filter->nb_inputs; j++) {
+            AVFilterLink *l = filter->inputs[j];
+            size_t frames = ff_inlink_queued_frames(l);
+
+            if (s->mode && !frames)
+                continue;
+
+            xpos = 10;
+            snprintf(buffer, sizeof(buffer)-1, "in%d: ", j);
+            drawtext(out, xpos, ypos, buffer, s->white);
+            xpos += strlen(buffer) * 8;
+            drawtext(out, xpos, ypos, l->src->name, s->white);
+            xpos += strlen(l->src->name) * 8 + 10;
+            draw_items(ctx, out, xpos, ypos, l, frames);
+            ypos += 10;
+        }
+
+        ypos += 2;
+        for (int j = 0; j < filter->nb_outputs; j++) {
+            AVFilterLink *l = filter->outputs[j];
+            size_t frames = ff_inlink_queued_frames(l);
+
+            if (s->mode && !frames)
+                continue;
+
+            xpos = 10;
+            snprintf(buffer, sizeof(buffer)-1, "out%d: ", j);
+            drawtext(out, xpos, ypos, buffer, s->white);
+            xpos += strlen(buffer) * 8;
+            drawtext(out, xpos, ypos, l->dst->name, s->white);
+            xpos += strlen(l->dst->name) * 8 + 10;
+            draw_items(ctx, out, xpos, ypos, l, frames);
+            ypos += 10;
+        }
+        ypos += 5;
+    }
+
+    out->pts = pts;
+    s->pts = pts;
+    return ff_filter_frame(outlink, out);
+}
+
+static int activate(AVFilterContext *ctx)
+{
+    GraphMonitorContext *s = ctx->priv;
+    AVFilterLink *inlink = ctx->inputs[0];
+    AVFilterLink *outlink = ctx->outputs[0];
+    int64_t pts = AV_NOPTS_VALUE;
+
+    FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
+
+    if (ff_inlink_queued_frames(inlink)) {
+        AVFrame *frame = NULL;
+        int ret;
+
+        ret = ff_inlink_consume_frame(inlink, &frame);
+        if (ret < 0)
+            return ret;
+        if (ret > 0) {
+            pts = frame->pts;
+            av_frame_free(&frame);
+        }
+    }
+
+    if (pts != AV_NOPTS_VALUE) {
+        pts = av_rescale_q(pts, inlink->time_base, outlink->time_base);
+        if (s->pts < pts && ff_outlink_frame_wanted(outlink))
+            return create_frame(ctx, pts);
+    }
+
+    FF_FILTER_FORWARD_STATUS(inlink, outlink);
+    FF_FILTER_FORWARD_WANTED(outlink, inlink);
+
+    return FFERROR_NOT_READY;
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    GraphMonitorContext *s = outlink->src->priv;
+
+    s->bg[3] = 255 * s->opacity;
+    s->white[0] = s->white[1] = s->white[2] = 255;
+    s->yellow[0] = s->yellow[1] = 255;
+    s->red[0] = 255;
+    s->green[1] = 255;
+    outlink->w = s->w;
+    outlink->h = s->h;
+    outlink->sample_aspect_ratio = (AVRational){1,1};
+    outlink->frame_rate = s->frame_rate;
+    outlink->time_base = av_inv_q(s->frame_rate);
+
+    return 0;
+}
+
+#if CONFIG_GRAPHMONITOR_FILTER
+
+AVFILTER_DEFINE_CLASS(graphmonitor);
+
+static const AVFilterPad graphmonitor_inputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+    },
+    { NULL }
+};
+
+static const AVFilterPad graphmonitor_outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_graphmonitor = {
+    .name          = "graphmonitor",
+    .description   = NULL_IF_CONFIG_SMALL("Show various filtergraph stats."),
+    .priv_size     = sizeof(GraphMonitorContext),
+    .priv_class    = &graphmonitor_class,
+    .query_formats = query_formats,
+    .activate      = activate,
+    .inputs        = graphmonitor_inputs,
+    .outputs       = graphmonitor_outputs,
+};
+
+#endif // CONFIG_GRAPHMONITOR_FILTER
+
+#if CONFIG_AGRAPHMONITOR_FILTER
+
+#define agraphmonitor_options graphmonitor_options
+AVFILTER_DEFINE_CLASS(agraphmonitor);
+
+static const AVFilterPad agraphmonitor_inputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_AUDIO,
+    },
+    { NULL }
+};
+
+static const AVFilterPad agraphmonitor_outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_avf_agraphmonitor = {
+    .name          = "agraphmonitor",
+    .description   = NULL_IF_CONFIG_SMALL("Show various filtergraph stats."),
+    .priv_size     = sizeof(GraphMonitorContext),
+    .priv_class    = &agraphmonitor_class,
+    .query_formats = query_formats,
+    .activate      = activate,
+    .inputs        = agraphmonitor_inputs,
+    .outputs       = agraphmonitor_outputs,
+};
+#endif // CONFIG_AGRAPHMONITOR_FILTER
diff --git a/libavfilter/f_loop.c b/libavfilter/f_loop.c
index 255fe643da853..d9d55f983722f 100644
--- a/libavfilter/f_loop.c
+++ b/libavfilter/f_loop.c
@@ -25,6 +25,7 @@
 #include "libavutil/opt.h"
 #include "avfilter.h"
 #include "audio.h"
+#include "filters.h"
 #include "formats.h"
 #include "internal.h"
 #include "video.h"
@@ -44,6 +45,7 @@ typedef struct LoopContext {
     int64_t ignored_samples;
 
     int loop;
+    int eof;
     int64_t size;
     int64_t start;
     int64_t pts;
@@ -267,7 +269,7 @@ static int push_frame(AVFilterContext *ctx)
 {
     AVFilterLink *outlink = ctx->outputs[0];
     LoopContext *s = ctx->priv;
-    int64_t pts;
+    int64_t pts, duration;
     int ret;
 
     AVFrame *out = av_frame_clone(s->frames[s->current_frame]);
@@ -275,7 +277,11 @@ static int push_frame(AVFilterContext *ctx)
     if (!out)
         return AVERROR(ENOMEM);
     out->pts += s->duration - s->start_pts;
-    pts = out->pts + out->pkt_duration;
+    if (out->pkt_duration)
+        duration = out->pkt_duration;
+    else
+        duration = av_rescale_q(1, av_inv_q(outlink->frame_rate), outlink->time_base);
+    pts = out->pts + duration;
     ret = ff_filter_frame(outlink, out);
     s->current_frame++;
 
@@ -295,6 +301,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
     AVFilterContext *ctx = inlink->dst;
     AVFilterLink *outlink = ctx->outputs[0];
     LoopContext *s = ctx->priv;
+    int64_t duration;
     int ret = 0;
 
     if (inlink->frame_count_out >= s->start && s->size > 0 && s->loop != 0) {
@@ -307,7 +314,11 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
                 return AVERROR(ENOMEM);
             }
             s->nb_frames++;
-            s->duration = frame->pts + frame->pkt_duration;
+            if (frame->pkt_duration)
+                duration = frame->pkt_duration;
+            else
+                duration = av_rescale_q(1, av_inv_q(outlink->frame_rate), outlink->time_base);
+            s->duration = frame->pts + duration;
             ret = ff_filter_frame(outlink, frame);
         } else {
             av_frame_free(&frame);
@@ -321,25 +332,44 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
     return ret;
 }
 
-static int request_frame(AVFilterLink *outlink)
+static int activate(AVFilterContext *ctx)
 {
-    AVFilterContext *ctx = outlink->src;
+    AVFilterLink *inlink = ctx->inputs[0];
+    AVFilterLink *outlink = ctx->outputs[0];
     LoopContext *s = ctx->priv;
-    int ret = 0;
+    AVFrame *frame = NULL;
+    int ret, status;
+    int64_t pts;
 
-    if ((!s->size) ||
-        (s->nb_frames < s->size) ||
-        (s->nb_frames >= s->size && s->loop == 0)) {
-        ret = ff_request_frame(ctx->inputs[0]);
-    } else {
-        ret = push_frame(ctx);
+    FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
+
+    if (!s->eof && (s->nb_frames < s->size || !s->loop)) {
+        ret = ff_inlink_consume_frame(inlink, &frame);
+        if (ret < 0)
+            return ret;
+        if (ret > 0)
+            return filter_frame(inlink, frame);
     }
 
-    if (ret == AVERROR_EOF && s->nb_frames > 0 && s->loop != 0) {
-        ret = push_frame(ctx);
+    if (!s->eof && ff_inlink_acknowledge_status(inlink, &status, &pts)) {
+        if (status == AVERROR_EOF)
+            s->eof = 1;
     }
 
-    return ret;
+    if (s->eof && (s->loop == 0 || s->nb_frames < s->size)) {
+        ff_outlink_set_status(outlink, AVERROR_EOF, s->duration);
+        return 0;
+    }
+
+    if (!s->eof && (!s->size ||
+        (s->nb_frames < s->size) ||
+        (s->nb_frames >= s->size && s->loop == 0))) {
+        FF_FILTER_FORWARD_WANTED(outlink, inlink);
+    } else if (s->loop && s->nb_frames == s->size) {
+        return push_frame(ctx);
+    }
+
+    return FFERROR_NOT_READY;
 }
 
 static const AVOption loop_options[] = {
@@ -353,18 +383,16 @@ AVFILTER_DEFINE_CLASS(loop);
 
 static const AVFilterPad inputs[] = {
     {
-        .name         = "default",
-        .type         = AVMEDIA_TYPE_VIDEO,
-        .filter_frame = filter_frame,
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
     },
     { NULL }
 };
 
 static const AVFilterPad outputs[] = {
     {
-        .name          = "default",
-        .type          = AVMEDIA_TYPE_VIDEO,
-        .request_frame = request_frame,
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
     },
     { NULL }
 };
@@ -376,6 +404,7 @@ AVFilter ff_vf_loop = {
     .priv_class  = &loop_class,
     .init        = init,
     .uninit      = uninit,
+    .activate    = activate,
     .inputs      = inputs,
     .outputs     = outputs,
 };
diff --git a/libavfilter/f_select.c b/libavfilter/f_select.c
index b1b2cbc21a985..1132375758800 100644
--- a/libavfilter/f_select.c
+++ b/libavfilter/f_select.c
@@ -28,12 +28,12 @@
 #include "libavutil/fifo.h"
 #include "libavutil/internal.h"
 #include "libavutil/opt.h"
-#include "libavutil/pixelutils.h"
 #include "avfilter.h"
 #include "audio.h"
 #include "formats.h"
 #include "internal.h"
 #include "video.h"
+#include "scene_sad.h"
 
 static const char *const var_names[] = {
     "TB",                ///< timebase
@@ -145,7 +145,7 @@ typedef struct SelectContext {
     AVExpr *expr;
     double var_values[VAR_VARS_NB];
     int do_scene_detect;            ///< 1 if the expression requires scene detection variables, 0 otherwise
-    av_pixelutils_sad_fn sad;       ///< Sum of the absolute difference function (scene detect only)
+    ff_scene_sad_fn sad;            ///< Sum of the absolute difference function (scene detect only)
     double prev_mafd;               ///< previous MAFD                           (scene detect only)
     AVFrame *prev_picref;           ///< previous frame                          (scene detect only)
     double select;
@@ -241,8 +241,8 @@ static int config_input(AVFilterLink *inlink)
     select->var_values[VAR_SAMPLE_RATE] =
         inlink->type == AVMEDIA_TYPE_AUDIO ? inlink->sample_rate : NAN;
 
-    if (select->do_scene_detect) {
-        select->sad = av_pixelutils_get_sad_fn(3, 3, 2, select); // 8x8 both sources aligned
+    if (CONFIG_SELECT_FILTER && select->do_scene_detect) {
+        select->sad = ff_scene_sad_get_fn(8);
         if (!select->sad)
             return AVERROR(EINVAL);
     }
@@ -258,24 +258,12 @@ static double get_scene_score(AVFilterContext *ctx, AVFrame *frame)
     if (prev_picref &&
         frame->height == prev_picref->height &&
         frame->width  == prev_picref->width) {
-        int x, y, nb_sad = 0;
-        int64_t sad = 0;
+        uint64_t sad;
         double mafd, diff;
-        uint8_t *p1 =      frame->data[0];
-        uint8_t *p2 = prev_picref->data[0];
-        const int p1_linesize =       frame->linesize[0];
-        const int p2_linesize = prev_picref->linesize[0];
-
-        for (y = 0; y < frame->height - 7; y += 8) {
-            for (x = 0; x < frame->width*3 - 7; x += 8) {
-                sad += select->sad(p1 + x, p1_linesize, p2 + x, p2_linesize);
-                nb_sad += 8 * 8;
-            }
-            p1 += 8 * p1_linesize;
-            p2 += 8 * p2_linesize;
-        }
+
+        select->sad(prev_picref->data[0], prev_picref->linesize[0], frame->data[0], frame->linesize[0], frame->width * 3, frame->height, &sad);
         emms_c();
-        mafd = nb_sad ? (double)sad / nb_sad : 0;
+        mafd = (double)sad / (frame->width * 3 * frame->height);
         diff = fabs(mafd - select->prev_mafd);
         ret  = av_clipf(FFMIN(mafd, diff) / 100., 0, 1);
         select->prev_mafd = mafd;
@@ -430,29 +418,6 @@ static av_cold void uninit(AVFilterContext *ctx)
     }
 }
 
-static int query_formats(AVFilterContext *ctx)
-{
-    SelectContext *select = ctx->priv;
-
-    if (!select->do_scene_detect) {
-        return ff_default_query_formats(ctx);
-    } else {
-        int ret;
-        static const enum AVPixelFormat pix_fmts[] = {
-            AV_PIX_FMT_RGB24, AV_PIX_FMT_BGR24,
-            AV_PIX_FMT_NONE
-        };
-        AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
-
-        if (!fmts_list)
-            return AVERROR(ENOMEM);
-        ret = ff_set_common_formats(ctx, fmts_list);
-        if (ret < 0)
-            return ret;
-    }
-    return 0;
-}
-
 #if CONFIG_ASELECT_FILTER
 
 DEFINE_OPTIONS(aselect, AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM);
@@ -498,6 +463,29 @@ AVFilter ff_af_aselect = {
 
 #if CONFIG_SELECT_FILTER
 
+static int query_formats(AVFilterContext *ctx)
+{
+    SelectContext *select = ctx->priv;
+
+    if (!select->do_scene_detect) {
+        return ff_default_query_formats(ctx);
+    } else {
+        int ret;
+        static const enum AVPixelFormat pix_fmts[] = {
+            AV_PIX_FMT_RGB24, AV_PIX_FMT_BGR24,
+            AV_PIX_FMT_NONE
+        };
+        AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
+
+        if (!fmts_list)
+            return AVERROR(ENOMEM);
+        ret = ff_set_common_formats(ctx, fmts_list);
+        if (ret < 0)
+            return ret;
+    }
+    return 0;
+}
+
 DEFINE_OPTIONS(select, AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM);
 AVFILTER_DEFINE_CLASS(select);
 
diff --git a/libavfilter/framerate.h b/libavfilter/framerate.h
index a42d5af68a293..8048dfa36a6eb 100644
--- a/libavfilter/framerate.h
+++ b/libavfilter/framerate.h
@@ -19,7 +19,7 @@
 #ifndef AVFILTER_FRAMERATE_H
 #define AVFILTER_FRAMERATE_H
 
-#include "libavutil/pixelutils.h"
+#include "scene_sad.h"
 #include "avfilter.h"
 
 #define BLEND_FUNC_PARAMS const uint8_t *src1, ptrdiff_t src1_linesize, \
@@ -48,7 +48,7 @@ typedef struct FrameRateContext {
     AVRational srce_time_base;          ///< timebase of source
     AVRational dest_time_base;          ///< timebase of destination
 
-    av_pixelutils_sad_fn sad;           ///< Sum of the absolute difference function (scene detect only)
+    ff_scene_sad_fn sad;                ///< Sum of the absolute difference function (scene detect only)
     double prev_mafd;                   ///< previous MAFD                           (scene detect only)
 
     int blend_factor_max;
diff --git a/libavfilter/opencl/transpose.cl b/libavfilter/opencl/transpose.cl
new file mode 100644
index 0000000000000..e6388aba8fc7b
--- /dev/null
+++ b/libavfilter/opencl/transpose.cl
@@ -0,0 +1,35 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+kernel void transpose(__write_only image2d_t dst,
+                      __read_only image2d_t src,
+                      int dir) {
+    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+                               CLK_ADDRESS_CLAMP_TO_EDGE   |
+                               CLK_FILTER_NEAREST);
+
+    int2 size = get_image_dim(dst);
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    int xin = (dir & 2) ? (size.y - 1 - y) : y;
+    int yin = (dir & 1) ? (size.x - 1 - x) : x;
+    float4 data = read_imagef(src, sampler, (int2)(xin, yin));
+
+    if (x < size.x && y < size.y)
+        write_imagef(dst, (int2)(x, y), data);
+}
diff --git a/libavfilter/opencl_source.h b/libavfilter/opencl_source.h
index 2f67d890b38ea..4118138c30e09 100644
--- a/libavfilter/opencl_source.h
+++ b/libavfilter/opencl_source.h
@@ -25,6 +25,7 @@ extern const char *ff_opencl_source_convolution;
 extern const char *ff_opencl_source_neighbor;
 extern const char *ff_opencl_source_overlay;
 extern const char *ff_opencl_source_tonemap;
+extern const char *ff_opencl_source_transpose;
 extern const char *ff_opencl_source_unsharp;
 
 #endif /* AVFILTER_OPENCL_SOURCE_H */
diff --git a/libavfilter/scene_sad.c b/libavfilter/scene_sad.c
new file mode 100644
index 0000000000000..73d3eacbfa816
--- /dev/null
+++ b/libavfilter/scene_sad.c
@@ -0,0 +1,72 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Scene SAD functions
+ */
+
+#include "scene_sad.h"
+
+void ff_scene_sad16_c(SCENE_SAD_PARAMS)
+{
+    uint64_t sad = 0;
+    const uint16_t *src1w = (const uint16_t *)src1;
+    const uint16_t *src2w = (const uint16_t *)src2;
+    int x, y;
+
+    stride1 /= 2;
+    stride2 /= 2;
+
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            sad += FFABS(src1w[x] - src2w[x]);
+        src1w += stride1;
+        src2w += stride2;
+    }
+    *sum = sad;
+}
+
+void ff_scene_sad_c(SCENE_SAD_PARAMS)
+{
+    uint64_t sad = 0;
+    int x, y;
+
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            sad += FFABS(src1[x] - src2[x]);
+        src1 += stride1;
+        src2 += stride2;
+    }
+    *sum = sad;
+}
+
+ff_scene_sad_fn ff_scene_sad_get_fn(int depth)
+{
+    ff_scene_sad_fn sad = NULL;
+    if (ARCH_X86)
+        sad = ff_scene_sad_get_fn_x86(depth);
+    if (!sad) {
+        if (depth == 8)
+            sad = ff_scene_sad_c;
+        if (depth == 16)
+            sad = ff_scene_sad16_c;
+    }
+    return sad;
+}
+
diff --git a/libavfilter/scene_sad.h b/libavfilter/scene_sad.h
new file mode 100644
index 0000000000000..173a051f2bb4b
--- /dev/null
+++ b/libavfilter/scene_sad.h
@@ -0,0 +1,44 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Scene SAD functions
+ */
+
+#ifndef AVFILTER_SCENE_SAD_H
+#define AVFILTER_SCENE_SAD_H
+
+#include "avfilter.h"
+
+#define SCENE_SAD_PARAMS const uint8_t *src1, ptrdiff_t stride1, \
+                         const uint8_t *src2, ptrdiff_t stride2, \
+                         ptrdiff_t width, ptrdiff_t height, \
+                         uint64_t *sum
+
+typedef void (*ff_scene_sad_fn)(SCENE_SAD_PARAMS);
+
+void ff_scene_sad_c(SCENE_SAD_PARAMS);
+
+void ff_scene_sad16_c(SCENE_SAD_PARAMS);
+
+ff_scene_sad_fn ff_scene_sad_get_fn_x86(int depth);
+
+ff_scene_sad_fn ff_scene_sad_get_fn(int depth);
+
+#endif /* AVFILTER_SCENE_SAD_H */
diff --git a/libavfilter/split.c b/libavfilter/split.c
index 8b260a9ba3371..89af360cb09d7 100644
--- a/libavfilter/split.c
+++ b/libavfilter/split.c
@@ -30,11 +30,9 @@
 #include "libavutil/mem.h"
 #include "libavutil/opt.h"
 
-#define FF_INTERNAL_FIELDS 1
-#include "framequeue.h"
-
 #include "avfilter.h"
 #include "audio.h"
+#include "filters.h"
 #include "formats.h"
 #include "internal.h"
 #include "video.h"
@@ -84,7 +82,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
     for (i = 0; i < ctx->nb_outputs; i++) {
         AVFrame *buf_out;
 
-        if (ctx->outputs[i]->status_in)
+        if (ff_outlink_get_status(ctx->outputs[i]))
             continue;
         buf_out = av_frame_clone(frame);
         if (!buf_out) {
diff --git a/libavfilter/tests/integral.c b/libavfilter/tests/integral.c
index 2a8e8ff55f38c..03b1e77367758 100644
--- a/libavfilter/tests/integral.c
+++ b/libavfilter/tests/integral.c
@@ -33,8 +33,11 @@ static void display_integral(const uint32_t *ii, int w, int h, int lz_32)
 int main(void)
 {
     int ret = 0, xoff, yoff;
+    uint32_t *ii_start;
+    uint32_t *ii_start2;
+    NLMeansDSPContext dsp = {0};
 
-    // arbitrary test source of size 6x4 and linesize=8
+    // arbitrary test source of size 6x5 and linesize=8
     const int w = 6, h = 5, lz = 8;
     static const uint8_t src[] = {
         0xb0, 0x71, 0xfb, 0xd8, 0x01, 0xd9, /***/ 0x01, 0x02,
@@ -54,16 +57,14 @@ int main(void)
     uint32_t *ii  = av_mallocz_array(ii_h + 1, ii_lz_32 * sizeof(*ii));
     uint32_t *ii2 = av_mallocz_array(ii_h + 1, ii_lz_32 * sizeof(*ii2));
 
-    uint32_t *ii_start  = ii  + ii_lz_32 + 1; // skip top 0-line and left 0-column
-    uint32_t *ii_start2 = ii2 + ii_lz_32 + 1; // skip top 0-line and left 0-column
+    if (!ii || !ii2)
+        return -1;
 
-    NLMeansDSPContext dsp = {0};
+    ii_start  = ii  + ii_lz_32 + 1; // skip top 0-line and left 0-column
+    ii_start2 = ii2 + ii_lz_32 + 1; // skip top 0-line and left 0-column
 
     ff_nlmeans_init(&dsp);
 
-    if (!ii || !ii2)
-        return -1;
-
     for (yoff = -e; yoff <= e; yoff++) {
         for (xoff = -e; xoff <= e; xoff++) {
             printf("xoff=%d yoff=%d\n", xoff, yoff);
diff --git a/libavfilter/transpose.h b/libavfilter/transpose.h
new file mode 100644
index 0000000000000..aa262b9487fb0
--- /dev/null
+++ b/libavfilter/transpose.h
@@ -0,0 +1,37 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef AVFILTER_TRANSPOSE_H
+#define AVFILTER_TRANSPOSE_H
+
+enum PassthroughType {
+    TRANSPOSE_PT_TYPE_NONE,
+    TRANSPOSE_PT_TYPE_LANDSCAPE,
+    TRANSPOSE_PT_TYPE_PORTRAIT,
+};
+
+enum TransposeDir {
+    TRANSPOSE_CCLOCK_FLIP,
+    TRANSPOSE_CLOCK,
+    TRANSPOSE_CCLOCK,
+    TRANSPOSE_CLOCK_FLIP,
+    TRANSPOSE_REVERSAL,    // rotate by half-turn
+    TRANSPOSE_HFLIP,
+    TRANSPOSE_VFLIP,
+};
+
+#endif
diff --git a/libavfilter/vaapi_vpp.h b/libavfilter/vaapi_vpp.h
index 0bc31018d4c7a..96f720f07d97a 100644
--- a/libavfilter/vaapi_vpp.h
+++ b/libavfilter/vaapi_vpp.h
@@ -27,6 +27,9 @@
 
 #include "avfilter.h"
 
+// ARGB black, for VAProcPipelineParameterBuffer.output_background_color.
+#define VAAPI_VPP_BACKGROUND_BLACK 0xff000000
+
 typedef struct VAAPIVPPContext {
     const AVClass *class;
 
diff --git a/libavfilter/version.h b/libavfilter/version.h
index 30e961b999c17..c71282c036b66 100644
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@ -30,8 +30,8 @@
 #include "libavutil/version.h"
 
 #define LIBAVFILTER_VERSION_MAJOR   7
-#define LIBAVFILTER_VERSION_MINOR  33
-#define LIBAVFILTER_VERSION_MICRO 101
+#define LIBAVFILTER_VERSION_MINOR  48
+#define LIBAVFILTER_VERSION_MICRO 100
 
 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
                                                LIBAVFILTER_VERSION_MINOR, \
diff --git a/libavfilter/vf_alphamerge.c b/libavfilter/vf_alphamerge.c
index 45fa340fcc63d..c5602b6227811 100644
--- a/libavfilter/vf_alphamerge.c
+++ b/libavfilter/vf_alphamerge.c
@@ -28,9 +28,9 @@
 #include "libavutil/imgutils.h"
 #include "libavutil/pixfmt.h"
 #include "avfilter.h"
-#include "bufferqueue.h"
 #include "drawutils.h"
 #include "formats.h"
+#include "filters.h"
 #include "internal.h"
 #include "video.h"
 
@@ -39,17 +39,10 @@ enum { Y, U, V, A };
 typedef struct AlphaMergeContext {
     int is_packed_rgb;
     uint8_t rgba_map[4];
-    struct FFBufQueue queue_main;
-    struct FFBufQueue queue_alpha;
+    AVFrame *main_frame;
+    AVFrame *alpha_frame;
 } AlphaMergeContext;
 
-static av_cold void uninit(AVFilterContext *ctx)
-{
-    AlphaMergeContext *merge = ctx->priv;
-    ff_bufqueue_discard_all(&merge->queue_main);
-    ff_bufqueue_discard_all(&merge->queue_alpha);
-}
-
 static int query_formats(AVFilterContext *ctx)
 {
     static const enum AVPixelFormat main_fmts[] = {
@@ -140,44 +133,52 @@ static void draw_frame(AVFilterContext *ctx,
     }
 }
 
-static int filter_frame(AVFilterLink *inlink, AVFrame *buf)
+static int activate(AVFilterContext *ctx)
 {
-    AVFilterContext *ctx = inlink->dst;
-    AlphaMergeContext *merge = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AlphaMergeContext *s = ctx->priv;
+    int ret;
 
-    int ret = 0;
-    int is_alpha = (inlink == ctx->inputs[1]);
-    struct FFBufQueue *queue =
-        (is_alpha ? &merge->queue_alpha : &merge->queue_main);
-    ff_bufqueue_add(ctx, queue, buf);
+    FF_FILTER_FORWARD_STATUS_BACK_ALL(outlink, ctx);
 
-    do {
-        AVFrame *main_buf, *alpha_buf;
+    if (!s->main_frame) {
+        ret = ff_inlink_consume_frame(ctx->inputs[0], &s->main_frame);
+        if (ret < 0)
+            return ret;
+    }
 
-        if (!ff_bufqueue_peek(&merge->queue_main, 0) ||
-            !ff_bufqueue_peek(&merge->queue_alpha, 0)) break;
+    if (!s->alpha_frame) {
+        ret = ff_inlink_consume_frame(ctx->inputs[1], &s->alpha_frame);
+        if (ret < 0)
+            return ret;
+    }
+
+    if (s->main_frame && s->alpha_frame) {
+        draw_frame(ctx, s->main_frame, s->alpha_frame);
+        ret = ff_filter_frame(outlink, s->main_frame);
+        av_frame_free(&s->alpha_frame);
+        s->main_frame = NULL;
+        return ret;
+    }
 
-        main_buf = ff_bufqueue_get(&merge->queue_main);
-        alpha_buf = ff_bufqueue_get(&merge->queue_alpha);
+    FF_FILTER_FORWARD_STATUS(ctx->inputs[0], outlink);
+    FF_FILTER_FORWARD_STATUS(ctx->inputs[1], outlink);
 
-        draw_frame(ctx, main_buf, alpha_buf);
-        ret = ff_filter_frame(ctx->outputs[0], main_buf);
-        av_frame_free(&alpha_buf);
-    } while (ret >= 0);
-    return ret;
-}
+    if (ff_outlink_frame_wanted(ctx->outputs[0]) &&
+        !ff_outlink_get_status(ctx->inputs[0]) &&
+        !s->main_frame) {
+        ff_inlink_request_frame(ctx->inputs[0]);
+        return 0;
+    }
 
-static int request_frame(AVFilterLink *outlink)
-{
-    AVFilterContext *ctx = outlink->src;
-    AlphaMergeContext *merge = ctx->priv;
-    int in, ret;
+    if (ff_outlink_frame_wanted(ctx->outputs[0]) &&
+        !ff_outlink_get_status(ctx->inputs[1]) &&
+        !s->alpha_frame) {
+        ff_inlink_request_frame(ctx->inputs[1]);
+        return 0;
+    }
 
-    in = ff_bufqueue_peek(&merge->queue_main, 0) ? 1 : 0;
-    ret = ff_request_frame(ctx->inputs[in]);
-    if (ret < 0)
-        return ret;
-    return 0;
+    return FFERROR_NOT_READY;
 }
 
 static const AVFilterPad alphamerge_inputs[] = {
@@ -185,12 +186,10 @@ static const AVFilterPad alphamerge_inputs[] = {
         .name             = "main",
         .type             = AVMEDIA_TYPE_VIDEO,
         .config_props     = config_input_main,
-        .filter_frame     = filter_frame,
         .needs_writable   = 1,
     },{
         .name             = "alpha",
         .type             = AVMEDIA_TYPE_VIDEO,
-        .filter_frame     = filter_frame,
     },
     { NULL }
 };
@@ -200,7 +199,6 @@ static const AVFilterPad alphamerge_outputs[] = {
         .name          = "default",
         .type          = AVMEDIA_TYPE_VIDEO,
         .config_props  = config_output,
-        .request_frame = request_frame,
     },
     { NULL }
 };
@@ -209,9 +207,9 @@ AVFilter ff_vf_alphamerge = {
     .name           = "alphamerge",
     .description    = NULL_IF_CONFIG_SMALL("Copy the luma value of the second "
                       "input into the alpha channel of the first input."),
-    .uninit         = uninit,
     .priv_size      = sizeof(AlphaMergeContext),
     .query_formats  = query_formats,
     .inputs         = alphamerge_inputs,
     .outputs        = alphamerge_outputs,
+    .activate       = activate,
 };
diff --git a/libavfilter/vf_amplify.c b/libavfilter/vf_amplify.c
index 08243ad953dac..48dcb93a67c77 100644
--- a/libavfilter/vf_amplify.c
+++ b/libavfilter/vf_amplify.c
@@ -34,6 +34,7 @@ typedef struct AmplifyContext {
     int radius;
     float factor;
     float threshold;
+    float tolerance;
     int planes;
 
     int llimit;
@@ -104,6 +105,7 @@ static int amplify_frame(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs
     const int radius = s->radius;
     const int nb_inputs = s->nb_inputs;
     const float threshold = s->threshold;
+    const float tolerance = s->tolerance;
     const float factor = s->factor;
     const int llimit = s->llimit;
     const int hlimit = s->hlimit;
@@ -136,7 +138,7 @@ static int amplify_frame(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs
 
                     avg = sum / (float)nb_inputs;
                     diff = src - avg;
-                    if (fabsf(diff) < threshold) {
+                    if (fabsf(diff) < threshold && fabsf(diff) > tolerance) {
                         int amp;
                         if (diff < 0) {
                             amp = -FFMIN(FFABS(diff * factor), llimit);
@@ -179,7 +181,7 @@ static int amplify_frame(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs
                     avg = sum / (float)nb_inputs;
                     diff = src - avg;
 
-                    if (fabsf(diff) < threshold) {
+                    if (fabsf(diff) < threshold && fabsf(diff) > tolerance) {
                         int amp;
                         if (diff < 0) {
                             amp = -FFMIN(FFABS(diff * factor), llimit);
@@ -271,6 +273,7 @@ static const AVOption amplify_options[] = {
     { "radius", "set radius", OFFSET(radius), AV_OPT_TYPE_INT, {.i64=2}, 1, 63, .flags = FLAGS },
     { "factor", "set factor", OFFSET(factor), AV_OPT_TYPE_FLOAT, {.dbl=2}, 0, UINT16_MAX, .flags = FLAGS },
     { "threshold", "set threshold", OFFSET(threshold), AV_OPT_TYPE_FLOAT, {.dbl=10}, 0, UINT16_MAX, .flags = FLAGS },
+    { "tolerance", "set tolerance", OFFSET(tolerance), AV_OPT_TYPE_FLOAT, {.dbl=0}, 0, UINT16_MAX, .flags = FLAGS },
     { "low", "set low limit for amplification", OFFSET(llimit), AV_OPT_TYPE_INT, {.i64=UINT16_MAX}, 0, UINT16_MAX, .flags = FLAGS },
     { "high", "set high limit for amplification", OFFSET(hlimit), AV_OPT_TYPE_INT, {.i64=UINT16_MAX}, 0, UINT16_MAX, .flags = FLAGS },
     { "planes", "set what planes to filter", OFFSET(planes), AV_OPT_TYPE_FLAGS, {.i64=7},    0, 15,  FLAGS },
diff --git a/libavfilter/vf_blend.c b/libavfilter/vf_blend.c
index e83a0db640233..80d3e24699d17 100644
--- a/libavfilter/vf_blend.c
+++ b/libavfilter/vf_blend.c
@@ -42,6 +42,7 @@ typedef struct BlendContext {
     enum BlendMode all_mode;
     double all_opacity;
 
+    int depth;
     FilterParams params[4];
     int tblend;
     AVFrame *prev_frame;        /* only used with tblend */
@@ -121,19 +122,22 @@ static const AVOption blend_options[] = {
 
 FRAMESYNC_DEFINE_CLASS(blend, BlendContext, fs);
 
-#define COPY(src)                                                            \
-static void blend_copy ## src(const uint8_t *top, ptrdiff_t top_linesize,    \
+#define COPY(src, depth)                                                            \
+static void blend_copy ## src##_##depth(const uint8_t *top, ptrdiff_t top_linesize,    \
                             const uint8_t *bottom, ptrdiff_t bottom_linesize,\
                             uint8_t *dst, ptrdiff_t dst_linesize,            \
                             ptrdiff_t width, ptrdiff_t height,               \
                             FilterParams *param, double *values, int starty) \
 {                                                                            \
     av_image_copy_plane(dst, dst_linesize, src, src ## _linesize,            \
-                        width, height);                                 \
+                        width * depth / 8, height);                          \
 }
 
-COPY(top)
-COPY(bottom)
+COPY(top, 8)
+COPY(bottom, 8)
+
+COPY(top, 16)
+COPY(bottom, 16)
 
 #undef COPY
 
@@ -201,15 +205,15 @@ static void blend_## name##_8bit(const uint8_t *top, ptrdiff_t top_linesize,
     }                                                                          \
 }
 
-#define DEFINE_BLEND16(name, expr)                                             \
-static void blend_## name##_16bit(const uint8_t *_top, ptrdiff_t top_linesize,       \
+#define DEFINE_BLEND16(name, expr, depth)                                            \
+static void blend_## name##_##depth##bit(const uint8_t *_top, ptrdiff_t top_linesize,\
                                   const uint8_t *_bottom, ptrdiff_t bottom_linesize, \
                                   uint8_t *_dst, ptrdiff_t dst_linesize,             \
                                   ptrdiff_t width, ptrdiff_t height,           \
                                   FilterParams *param, double *values, int starty)         \
 {                                                                              \
-    const uint16_t *top = (uint16_t*)_top;                                     \
-    const uint16_t *bottom = (uint16_t*)_bottom;                               \
+    const uint16_t *top = (const uint16_t*)_top;                               \
+    const uint16_t *bottom = (const uint16_t*)_bottom;                         \
     uint16_t *dst = (uint16_t*)_dst;                                           \
     double opacity = param->opacity;                                           \
     int i, j;                                                                  \
@@ -278,38 +282,124 @@ DEFINE_BLEND8(linearlight,av_clip_uint8((B < 128) ? B + 2 * A - 255 : B + 2 * (A
 #define BURN(a, b)        (((a) == 0) ? (a) : FFMAX(0, 65535 - ((65535 - (b)) << 16) / (a)))
 #define DODGE(a, b)       (((a) == 65535) ? (a) : FFMIN(65535, (((b) << 16) / (65535 - (a)))))
 
-DEFINE_BLEND16(addition,   FFMIN(65535, A + B))
-DEFINE_BLEND16(grainmerge, av_clip_uint16(A + B - 32768))
-DEFINE_BLEND16(average,    (A + B) / 2)
-DEFINE_BLEND16(subtract,   FFMAX(0, A - B))
-DEFINE_BLEND16(multiply,   MULTIPLY(1, A, B))
-DEFINE_BLEND16(multiply128, av_clip_uint16((A - 32768) * B / 8192. + 32768))
-DEFINE_BLEND16(negation,   65535 - FFABS(65535 - A - B))
-DEFINE_BLEND16(extremity,  FFABS(65535 - A - B))
-DEFINE_BLEND16(difference, FFABS(A - B))
-DEFINE_BLEND16(grainextract, av_clip_uint16(32768 + A - B))
-DEFINE_BLEND16(screen,     SCREEN(1, A, B))
-DEFINE_BLEND16(overlay,    (A < 32768) ? MULTIPLY(2, A, B) : SCREEN(2, A, B))
-DEFINE_BLEND16(hardlight,  (B < 32768) ? MULTIPLY(2, B, A) : SCREEN(2, B, A))
-DEFINE_BLEND16(hardmix,    (A < (65535 - B)) ? 0: 65535)
-DEFINE_BLEND16(heat,       (A == 0) ? 0 : 65535 - FFMIN(((65535 - B) * (65535 - B)) / A, 65535))
-DEFINE_BLEND16(freeze,     (B == 0) ? 0 : 65535 - FFMIN(((65535 - A) * (65535 - A)) / B, 65535))
-DEFINE_BLEND16(darken,     FFMIN(A, B))
-DEFINE_BLEND16(lighten,    FFMAX(A, B))
-DEFINE_BLEND16(divide,     av_clip_uint16(B == 0 ? 65535 : 65535 * A / B))
-DEFINE_BLEND16(dodge,      DODGE(A, B))
-DEFINE_BLEND16(burn,       BURN(A, B))
-DEFINE_BLEND16(softlight,  (A > 32767) ? B + (65535 - B) * (A - 32767.5) / 32767.5 * (0.5 - fabs(B - 32767.5) / 65535): B - B * ((32767.5 - A) / 32767.5) * (0.5 - fabs(B - 32767.5)/65535))
-DEFINE_BLEND16(exclusion,  A + B - 2 * A * B / 65535)
-DEFINE_BLEND16(pinlight,   (B < 32768) ? FFMIN(A, 2 * B) : FFMAX(A, 2 * (B - 32768)))
-DEFINE_BLEND16(phoenix,    FFMIN(A, B) - FFMAX(A, B) + 65535)
-DEFINE_BLEND16(reflect,    (B == 65535) ? B : FFMIN(65535, (A * A / (65535 - B))))
-DEFINE_BLEND16(glow,       (A == 65535) ? A : FFMIN(65535, (B * B / (65535 - A))))
-DEFINE_BLEND16(and,        A & B)
-DEFINE_BLEND16(or,         A | B)
-DEFINE_BLEND16(xor,        A ^ B)
-DEFINE_BLEND16(vividlight, (A < 32768) ? BURN(2 * A, B) : DODGE(2 * (A - 32768), B))
-DEFINE_BLEND16(linearlight,av_clip_uint16((B < 32768) ? B + 2 * A - 65535 : B + 2 * (A - 32768)))
+DEFINE_BLEND16(addition,   FFMIN(65535, A + B), 16)
+DEFINE_BLEND16(grainmerge, av_clip_uint16(A + B - 32768), 16)
+DEFINE_BLEND16(average,    (A + B) / 2, 16)
+DEFINE_BLEND16(subtract,   FFMAX(0, A - B), 16)
+DEFINE_BLEND16(multiply,   MULTIPLY(1, A, B), 16)
+DEFINE_BLEND16(multiply128, av_clip_uint16((A - 32768) * B / 8192. + 32768), 16)
+DEFINE_BLEND16(negation,   65535 - FFABS(65535 - A - B), 16)
+DEFINE_BLEND16(extremity,  FFABS(65535 - A - B), 16)
+DEFINE_BLEND16(difference, FFABS(A - B), 16)
+DEFINE_BLEND16(grainextract, av_clip_uint16(32768 + A - B), 16)
+DEFINE_BLEND16(screen,     SCREEN(1, A, B), 16)
+DEFINE_BLEND16(overlay,    (A < 32768) ? MULTIPLY(2, A, B) : SCREEN(2, A, B), 16)
+DEFINE_BLEND16(hardlight,  (B < 32768) ? MULTIPLY(2, B, A) : SCREEN(2, B, A), 16)
+DEFINE_BLEND16(hardmix,    (A < (65535 - B)) ? 0: 65535, 16)
+DEFINE_BLEND16(heat,       (A == 0) ? 0 : 65535 - FFMIN(((65535 - B) * (65535 - B)) / A, 65535), 16)
+DEFINE_BLEND16(freeze,     (B == 0) ? 0 : 65535 - FFMIN(((65535 - A) * (65535 - A)) / B, 65535), 16)
+DEFINE_BLEND16(darken,     FFMIN(A, B), 16)
+DEFINE_BLEND16(lighten,    FFMAX(A, B), 16)
+DEFINE_BLEND16(divide,     av_clip_uint16(B == 0 ? 65535 : 65535 * A / B), 16)
+DEFINE_BLEND16(dodge,      DODGE(A, B), 16)
+DEFINE_BLEND16(burn,       BURN(A, B), 16)
+DEFINE_BLEND16(softlight,  (A > 32767) ? B + (65535 - B) * (A - 32767.5) / 32767.5 * (0.5 - fabs(B - 32767.5) / 65535): B - B * ((32767.5 - A) / 32767.5) * (0.5 - fabs(B - 32767.5)/65535), 16)
+DEFINE_BLEND16(exclusion,  A + B - 2 * A * B / 65535, 16)
+DEFINE_BLEND16(pinlight,   (B < 32768) ? FFMIN(A, 2 * B) : FFMAX(A, 2 * (B - 32768)), 16)
+DEFINE_BLEND16(phoenix,    FFMIN(A, B) - FFMAX(A, B) + 65535, 16)
+DEFINE_BLEND16(reflect,    (B == 65535) ? B : FFMIN(65535, (A * A / (65535 - B))), 16)
+DEFINE_BLEND16(glow,       (A == 65535) ? A : FFMIN(65535, (B * B / (65535 - A))), 16)
+DEFINE_BLEND16(and,        A & B, 16)
+DEFINE_BLEND16(or,         A | B, 16)
+DEFINE_BLEND16(xor,        A ^ B, 16)
+DEFINE_BLEND16(vividlight, (A < 32768) ? BURN(2 * A, B) : DODGE(2 * (A - 32768), B), 16)
+DEFINE_BLEND16(linearlight,av_clip_uint16((B < 32768) ? B + 2 * A - 65535 : B + 2 * (A - 32768)), 16)
+
+#undef MULTIPLY
+#undef SCREEN
+#undef BURN
+#undef DODGE
+
+#define MULTIPLY(x, a, b) ((x) * (((a) * (b)) / 1023))
+#define SCREEN(x, a, b)   (1023 - (x) * ((1023 - (a)) * (1023 - (b)) / 1023))
+#define BURN(a, b)        (((a) == 0) ? (a) : FFMAX(0, 1023 - ((1023 - (b)) << 10) / (a)))
+#define DODGE(a, b)       (((a) == 1023) ? (a) : FFMIN(1023, (((b) << 10) / (1023 - (a)))))
+
+DEFINE_BLEND16(addition,   FFMIN(1023, A + B), 10)
+DEFINE_BLEND16(grainmerge, (int)av_clip_uintp2(A + B - 512, 10), 10)
+DEFINE_BLEND16(average,    (A + B) / 2, 10)
+DEFINE_BLEND16(subtract,   FFMAX(0, A - B), 10)
+DEFINE_BLEND16(multiply,   MULTIPLY(1, A, B), 10)
+DEFINE_BLEND16(multiply128, (int)av_clip_uintp2((A - 512) * B / 128. + 512, 10), 10)
+DEFINE_BLEND16(negation,   1023 - FFABS(1023 - A - B), 10)
+DEFINE_BLEND16(extremity,  FFABS(1023 - A - B), 10)
+DEFINE_BLEND16(difference, FFABS(A - B), 10)
+DEFINE_BLEND16(grainextract, (int)av_clip_uintp2(512 + A - B, 10), 10)
+DEFINE_BLEND16(screen,     SCREEN(1, A, B), 10)
+DEFINE_BLEND16(overlay,    (A < 512) ? MULTIPLY(2, A, B) : SCREEN(2, A, B), 10)
+DEFINE_BLEND16(hardlight,  (B < 512) ? MULTIPLY(2, B, A) : SCREEN(2, B, A), 10)
+DEFINE_BLEND16(hardmix,    (A < (1023 - B)) ? 0: 1023, 10)
+DEFINE_BLEND16(heat,       (A == 0) ? 0 : 1023 - FFMIN(((1023 - B) * (1023 - B)) / A, 1023), 10)
+DEFINE_BLEND16(freeze,     (B == 0) ? 0 : 1023 - FFMIN(((1023 - A) * (1023 - A)) / B, 1023), 10)
+DEFINE_BLEND16(darken,     FFMIN(A, B), 10)
+DEFINE_BLEND16(lighten,    FFMAX(A, B), 10)
+DEFINE_BLEND16(divide,     (int)av_clip_uintp2(B == 0 ? 1023 : 1023 * A / B, 10), 10)
+DEFINE_BLEND16(dodge,      DODGE(A, B), 10)
+DEFINE_BLEND16(burn,       BURN(A, B), 10)
+DEFINE_BLEND16(softlight,  (A > 511) ? B + (1023 - B) * (A - 511.5) / 511.5 * (0.5 - fabs(B - 511.5) / 1023): B - B * ((511.5 - A) / 511.5) * (0.5 - fabs(B - 511.5)/1023), 10)
+DEFINE_BLEND16(exclusion,  A + B - 2 * A * B / 1023, 10)
+DEFINE_BLEND16(pinlight,   (B < 512) ? FFMIN(A, 2 * B) : FFMAX(A, 2 * (B - 512)), 10)
+DEFINE_BLEND16(phoenix,    FFMIN(A, B) - FFMAX(A, B) + 1023, 10)
+DEFINE_BLEND16(reflect,    (B == 1023) ? B : FFMIN(1023, (A * A / (1023 - B))), 10)
+DEFINE_BLEND16(glow,       (A == 1023) ? A : FFMIN(1023, (B * B / (1023 - A))), 10)
+DEFINE_BLEND16(and,        A & B, 10)
+DEFINE_BLEND16(or,         A | B, 10)
+DEFINE_BLEND16(xor,        A ^ B, 10)
+DEFINE_BLEND16(vividlight, (A < 512) ? BURN(2 * A, B) : DODGE(2 * (A - 512), B), 10)
+DEFINE_BLEND16(linearlight,(int)av_clip_uintp2((B < 512) ? B + 2 * A - 1023 : B + 2 * (A - 512), 10), 10)
+
+#undef MULTIPLY
+#undef SCREEN
+#undef BURN
+#undef DODGE
+
+#define MULTIPLY(x, a, b) ((x) * (((a) * (b)) / 4095))
+#define SCREEN(x, a, b)   (4095 - (x) * ((4095 - (a)) * (4095 - (b)) / 4095))
+#define BURN(a, b)        (((a) == 0) ? (a) : FFMAX(0, 4095 - ((4095 - (b)) << 12) / (a)))
+#define DODGE(a, b)       (((a) == 4095) ? (a) : FFMIN(4095, (((b) << 12) / (4095 - (a)))))
+
+DEFINE_BLEND16(addition,   FFMIN(4095, A + B), 12)
+DEFINE_BLEND16(grainmerge, (int)av_clip_uintp2(A + B - 2048, 12), 12)
+DEFINE_BLEND16(average,    (A + B) / 2, 12)
+DEFINE_BLEND16(subtract,   FFMAX(0, A - B), 12)
+DEFINE_BLEND16(multiply,   MULTIPLY(1, A, B), 12)
+DEFINE_BLEND16(multiply128, (int)av_clip_uintp2((A - 2048) * B / 512. + 2048, 12), 12)
+DEFINE_BLEND16(negation,   4095 - FFABS(4095 - A - B), 12)
+DEFINE_BLEND16(extremity,  FFABS(4095 - A - B), 12)
+DEFINE_BLEND16(difference, FFABS(A - B), 12)
+DEFINE_BLEND16(grainextract, (int)av_clip_uintp2(2048 + A - B, 12), 12)
+DEFINE_BLEND16(screen,     SCREEN(1, A, B), 12)
+DEFINE_BLEND16(overlay,    (A < 2048) ? MULTIPLY(2, A, B) : SCREEN(2, A, B), 12)
+DEFINE_BLEND16(hardlight,  (B < 2048) ? MULTIPLY(2, B, A) : SCREEN(2, B, A), 12)
+DEFINE_BLEND16(hardmix,    (A < (4095 - B)) ? 0: 4095, 12)
+DEFINE_BLEND16(heat,       (A == 0) ? 0 : 4095 - FFMIN(((4095 - B) * (4095 - B)) / A, 4095), 12)
+DEFINE_BLEND16(freeze,     (B == 0) ? 0 : 4095 - FFMIN(((4095 - A) * (4095 - A)) / B, 4095), 12)
+DEFINE_BLEND16(darken,     FFMIN(A, B), 12)
+DEFINE_BLEND16(lighten,    FFMAX(A, B), 12)
+DEFINE_BLEND16(divide,     (int)av_clip_uintp2(B == 0 ? 4095 : 4095 * A / B, 12), 12)
+DEFINE_BLEND16(dodge,      DODGE(A, B), 12)
+DEFINE_BLEND16(burn,       BURN(A, B), 12)
+DEFINE_BLEND16(softlight,  (A > 2047) ? B + (4095 - B) * (A - 2047.5) / 2047.5 * (0.5 - fabs(B - 2047.5) / 4095): B - B * ((2047.5 - A) / 2047.5) * (0.5 - fabs(B - 2047.5)/4095), 12)
+DEFINE_BLEND16(exclusion,  A + B - 2 * A * B / 4095, 12)
+DEFINE_BLEND16(pinlight,   (B < 2048) ? FFMIN(A, 2 * B) : FFMAX(A, 2 * (B - 2048)), 12)
+DEFINE_BLEND16(phoenix,    FFMIN(A, B) - FFMAX(A, B) + 4095, 12)
+DEFINE_BLEND16(reflect,    (B == 4095) ? B : FFMIN(4095, (A * A / (4095 - B))), 12)
+DEFINE_BLEND16(glow,       (A == 4095) ? A : FFMIN(4095, (B * B / (4095 - A))), 12)
+DEFINE_BLEND16(and,        A & B, 12)
+DEFINE_BLEND16(or,         A | B, 12)
+DEFINE_BLEND16(xor,        A ^ B, 12)
+DEFINE_BLEND16(vividlight, (A < 2048) ? BURN(2 * A, B) : DODGE(2 * (A - 2048), B), 12)
+DEFINE_BLEND16(linearlight,(int)av_clip_uintp2((B < 2048) ? B + 2 * A - 4095 : B + 2 * (A - 2048), 12), 12)
 
 #define DEFINE_BLEND_EXPR(type, name, div)                                     \
 static void blend_expr_## name(const uint8_t *_top, ptrdiff_t top_linesize,          \
@@ -437,6 +527,11 @@ static int query_formats(AVFilterContext *ctx)
         AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_YUVJ422P,AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ411P,
         AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P,
         AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRAP, AV_PIX_FMT_GRAY8,
+        AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV440P10,
+        AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA444P10,
+        AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GRAY10,
+        AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV440P12,
+        AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GRAY12,
         AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16,
         AV_PIX_FMT_YUVA420P16, AV_PIX_FMT_YUVA422P16, AV_PIX_FMT_YUVA444P16,
         AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16, AV_PIX_FMT_GRAY16,
@@ -461,52 +556,173 @@ static av_cold void uninit(AVFilterContext *ctx)
         av_expr_free(s->params[i].e);
 }
 
-void ff_blend_init(FilterParams *param, int is_16bit)
+void ff_blend_init(FilterParams *param, int depth)
 {
-    switch (param->mode) {
-    case BLEND_ADDITION:   param->blend = is_16bit ? blend_addition_16bit   : blend_addition_8bit;   break;
-    case BLEND_GRAINMERGE: param->blend = is_16bit ? blend_grainmerge_16bit : blend_grainmerge_8bit; break;
-    case BLEND_AND:        param->blend = is_16bit ? blend_and_16bit        : blend_and_8bit;        break;
-    case BLEND_AVERAGE:    param->blend = is_16bit ? blend_average_16bit    : blend_average_8bit;    break;
-    case BLEND_BURN:       param->blend = is_16bit ? blend_burn_16bit       : blend_burn_8bit;       break;
-    case BLEND_DARKEN:     param->blend = is_16bit ? blend_darken_16bit     : blend_darken_8bit;     break;
-    case BLEND_DIFFERENCE: param->blend = is_16bit ? blend_difference_16bit : blend_difference_8bit; break;
-    case BLEND_GRAINEXTRACT: param->blend = is_16bit ? blend_grainextract_16bit: blend_grainextract_8bit; break;
-    case BLEND_DIVIDE:     param->blend = is_16bit ? blend_divide_16bit     : blend_divide_8bit;     break;
-    case BLEND_DODGE:      param->blend = is_16bit ? blend_dodge_16bit      : blend_dodge_8bit;      break;
-    case BLEND_EXCLUSION:  param->blend = is_16bit ? blend_exclusion_16bit  : blend_exclusion_8bit;  break;
-    case BLEND_EXTREMITY:  param->blend = is_16bit ? blend_extremity_16bit  : blend_extremity_8bit;  break;
-    case BLEND_FREEZE:     param->blend = is_16bit ? blend_freeze_16bit     : blend_freeze_8bit;     break;
-    case BLEND_GLOW:       param->blend = is_16bit ? blend_glow_16bit       : blend_glow_8bit;       break;
-    case BLEND_HARDLIGHT:  param->blend = is_16bit ? blend_hardlight_16bit  : blend_hardlight_8bit;  break;
-    case BLEND_HARDMIX:    param->blend = is_16bit ? blend_hardmix_16bit    : blend_hardmix_8bit;    break;
-    case BLEND_HEAT:       param->blend = is_16bit ? blend_heat_16bit       : blend_heat_8bit;       break;
-    case BLEND_LIGHTEN:    param->blend = is_16bit ? blend_lighten_16bit    : blend_lighten_8bit;    break;
-    case BLEND_LINEARLIGHT:param->blend = is_16bit ? blend_linearlight_16bit: blend_linearlight_8bit;break;
-    case BLEND_MULTIPLY:   param->blend = is_16bit ? blend_multiply_16bit   : blend_multiply_8bit;   break;
-    case BLEND_MULTIPLY128:param->blend = is_16bit ? blend_multiply128_16bit: blend_multiply128_8bit;break;
-    case BLEND_NEGATION:   param->blend = is_16bit ? blend_negation_16bit   : blend_negation_8bit;   break;
-    case BLEND_NORMAL:     param->blend = param->opacity == 1 ? blend_copytop :
-                                          param->opacity == 0 ? blend_copybottom :
-                                          is_16bit ? blend_normal_16bit     : blend_normal_8bit;     break;
-    case BLEND_OR:         param->blend = is_16bit ? blend_or_16bit         : blend_or_8bit;         break;
-    case BLEND_OVERLAY:    param->blend = is_16bit ? blend_overlay_16bit    : blend_overlay_8bit;    break;
-    case BLEND_PHOENIX:    param->blend = is_16bit ? blend_phoenix_16bit    : blend_phoenix_8bit;    break;
-    case BLEND_PINLIGHT:   param->blend = is_16bit ? blend_pinlight_16bit   : blend_pinlight_8bit;   break;
-    case BLEND_REFLECT:    param->blend = is_16bit ? blend_reflect_16bit    : blend_reflect_8bit;    break;
-    case BLEND_SCREEN:     param->blend = is_16bit ? blend_screen_16bit     : blend_screen_8bit;     break;
-    case BLEND_SOFTLIGHT:  param->blend = is_16bit ? blend_softlight_16bit  : blend_softlight_8bit;  break;
-    case BLEND_SUBTRACT:   param->blend = is_16bit ? blend_subtract_16bit   : blend_subtract_8bit;   break;
-    case BLEND_VIVIDLIGHT: param->blend = is_16bit ? blend_vividlight_16bit : blend_vividlight_8bit; break;
-    case BLEND_XOR:        param->blend = is_16bit ? blend_xor_16bit        : blend_xor_8bit;        break;
+    switch (depth) {
+    case 8:
+        switch (param->mode) {
+        case BLEND_ADDITION:   param->blend = blend_addition_8bit;   break;
+        case BLEND_GRAINMERGE: param->blend = blend_grainmerge_8bit; break;
+        case BLEND_AND:        param->blend = blend_and_8bit;        break;
+        case BLEND_AVERAGE:    param->blend = blend_average_8bit;    break;
+        case BLEND_BURN:       param->blend = blend_burn_8bit;       break;
+        case BLEND_DARKEN:     param->blend = blend_darken_8bit;     break;
+        case BLEND_DIFFERENCE: param->blend = blend_difference_8bit; break;
+        case BLEND_GRAINEXTRACT: param->blend = blend_grainextract_8bit; break;
+        case BLEND_DIVIDE:     param->blend = blend_divide_8bit;     break;
+        case BLEND_DODGE:      param->blend = blend_dodge_8bit;      break;
+        case BLEND_EXCLUSION:  param->blend = blend_exclusion_8bit;  break;
+        case BLEND_EXTREMITY:  param->blend = blend_extremity_8bit;  break;
+        case BLEND_FREEZE:     param->blend = blend_freeze_8bit;     break;
+        case BLEND_GLOW:       param->blend = blend_glow_8bit;       break;
+        case BLEND_HARDLIGHT:  param->blend = blend_hardlight_8bit;  break;
+        case BLEND_HARDMIX:    param->blend = blend_hardmix_8bit;    break;
+        case BLEND_HEAT:       param->blend = blend_heat_8bit;       break;
+        case BLEND_LIGHTEN:    param->blend = blend_lighten_8bit;    break;
+        case BLEND_LINEARLIGHT:param->blend = blend_linearlight_8bit;break;
+        case BLEND_MULTIPLY:   param->blend = blend_multiply_8bit;   break;
+        case BLEND_MULTIPLY128:param->blend = blend_multiply128_8bit;break;
+        case BLEND_NEGATION:   param->blend = blend_negation_8bit;   break;
+        case BLEND_NORMAL:     param->blend = param->opacity == 1 ? blend_copytop_8 :
+                                              param->opacity == 0 ? blend_copybottom_8 :
+                                              blend_normal_8bit;     break;
+        case BLEND_OR:         param->blend = blend_or_8bit;         break;
+        case BLEND_OVERLAY:    param->blend = blend_overlay_8bit;    break;
+        case BLEND_PHOENIX:    param->blend = blend_phoenix_8bit;    break;
+        case BLEND_PINLIGHT:   param->blend = blend_pinlight_8bit;   break;
+        case BLEND_REFLECT:    param->blend = blend_reflect_8bit;    break;
+        case BLEND_SCREEN:     param->blend = blend_screen_8bit;     break;
+        case BLEND_SOFTLIGHT:  param->blend = blend_softlight_8bit;  break;
+        case BLEND_SUBTRACT:   param->blend = blend_subtract_8bit;   break;
+        case BLEND_VIVIDLIGHT: param->blend = blend_vividlight_8bit; break;
+        case BLEND_XOR:        param->blend = blend_xor_8bit;        break;
+        }
+        break;
+    case 10:
+        switch (param->mode) {
+        case BLEND_ADDITION:   param->blend = blend_addition_10bit;   break;
+        case BLEND_GRAINMERGE: param->blend = blend_grainmerge_10bit; break;
+        case BLEND_AND:        param->blend = blend_and_10bit;        break;
+        case BLEND_AVERAGE:    param->blend = blend_average_10bit;    break;
+        case BLEND_BURN:       param->blend = blend_burn_10bit;       break;
+        case BLEND_DARKEN:     param->blend = blend_darken_10bit;     break;
+        case BLEND_DIFFERENCE: param->blend = blend_difference_10bit; break;
+        case BLEND_GRAINEXTRACT: param->blend = blend_grainextract_10bit; break;
+        case BLEND_DIVIDE:     param->blend = blend_divide_10bit;     break;
+        case BLEND_DODGE:      param->blend = blend_dodge_10bit;      break;
+        case BLEND_EXCLUSION:  param->blend = blend_exclusion_10bit;  break;
+        case BLEND_EXTREMITY:  param->blend = blend_extremity_10bit;  break;
+        case BLEND_FREEZE:     param->blend = blend_freeze_10bit;     break;
+        case BLEND_GLOW:       param->blend = blend_glow_10bit;       break;
+        case BLEND_HARDLIGHT:  param->blend = blend_hardlight_10bit;  break;
+        case BLEND_HARDMIX:    param->blend = blend_hardmix_10bit;    break;
+        case BLEND_HEAT:       param->blend = blend_heat_10bit;       break;
+        case BLEND_LIGHTEN:    param->blend = blend_lighten_10bit;    break;
+        case BLEND_LINEARLIGHT:param->blend = blend_linearlight_10bit;break;
+        case BLEND_MULTIPLY:   param->blend = blend_multiply_10bit;   break;
+        case BLEND_MULTIPLY128:param->blend = blend_multiply128_10bit;break;
+        case BLEND_NEGATION:   param->blend = blend_negation_10bit;   break;
+        case BLEND_NORMAL:     param->blend = param->opacity == 1 ? blend_copytop_16 :
+                                              param->opacity == 0 ? blend_copybottom_16 :
+                                              blend_normal_16bit;    break;
+        case BLEND_OR:         param->blend = blend_or_10bit;        break;
+        case BLEND_OVERLAY:    param->blend = blend_overlay_10bit;   break;
+        case BLEND_PHOENIX:    param->blend = blend_phoenix_10bit;   break;
+        case BLEND_PINLIGHT:   param->blend = blend_pinlight_10bit;  break;
+        case BLEND_REFLECT:    param->blend = blend_reflect_10bit;   break;
+        case BLEND_SCREEN:     param->blend = blend_screen_10bit;    break;
+        case BLEND_SOFTLIGHT:  param->blend = blend_softlight_10bit; break;
+        case BLEND_SUBTRACT:   param->blend = blend_subtract_10bit;  break;
+        case BLEND_VIVIDLIGHT: param->blend = blend_vividlight_10bit;break;
+        case BLEND_XOR:        param->blend = blend_xor_10bit;       break;
+        }
+        break;
+    case 12:
+        switch (param->mode) {
+        case BLEND_ADDITION:   param->blend = blend_addition_12bit;   break;
+        case BLEND_GRAINMERGE: param->blend = blend_grainmerge_12bit; break;
+        case BLEND_AND:        param->blend = blend_and_12bit;        break;
+        case BLEND_AVERAGE:    param->blend = blend_average_12bit;    break;
+        case BLEND_BURN:       param->blend = blend_burn_12bit;       break;
+        case BLEND_DARKEN:     param->blend = blend_darken_12bit;     break;
+        case BLEND_DIFFERENCE: param->blend = blend_difference_12bit; break;
+        case BLEND_GRAINEXTRACT: param->blend = blend_grainextract_12bit; break;
+        case BLEND_DIVIDE:     param->blend = blend_divide_12bit;     break;
+        case BLEND_DODGE:      param->blend = blend_dodge_12bit;      break;
+        case BLEND_EXCLUSION:  param->blend = blend_exclusion_12bit;  break;
+        case BLEND_EXTREMITY:  param->blend = blend_extremity_12bit;  break;
+        case BLEND_FREEZE:     param->blend = blend_freeze_12bit;     break;
+        case BLEND_GLOW:       param->blend = blend_glow_12bit;       break;
+        case BLEND_HARDLIGHT:  param->blend = blend_hardlight_12bit;  break;
+        case BLEND_HARDMIX:    param->blend = blend_hardmix_12bit;    break;
+        case BLEND_HEAT:       param->blend = blend_heat_12bit;       break;
+        case BLEND_LIGHTEN:    param->blend = blend_lighten_12bit;    break;
+        case BLEND_LINEARLIGHT:param->blend = blend_linearlight_12bit;break;
+        case BLEND_MULTIPLY:   param->blend = blend_multiply_12bit;   break;
+        case BLEND_MULTIPLY128:param->blend = blend_multiply128_12bit;break;
+        case BLEND_NEGATION:   param->blend = blend_negation_12bit;   break;
+        case BLEND_NORMAL:     param->blend = param->opacity == 1 ? blend_copytop_16 :
+                                              param->opacity == 0 ? blend_copybottom_16 :
+                                              blend_normal_16bit;    break;
+        case BLEND_OR:         param->blend = blend_or_12bit;        break;
+        case BLEND_OVERLAY:    param->blend = blend_overlay_12bit;   break;
+        case BLEND_PHOENIX:    param->blend = blend_phoenix_12bit;   break;
+        case BLEND_PINLIGHT:   param->blend = blend_pinlight_12bit;  break;
+        case BLEND_REFLECT:    param->blend = blend_reflect_12bit;   break;
+        case BLEND_SCREEN:     param->blend = blend_screen_12bit;    break;
+        case BLEND_SOFTLIGHT:  param->blend = blend_softlight_12bit; break;
+        case BLEND_SUBTRACT:   param->blend = blend_subtract_12bit;  break;
+        case BLEND_VIVIDLIGHT: param->blend = blend_vividlight_12bit;break;
+        case BLEND_XOR:        param->blend = blend_xor_12bit;       break;
+        }
+        break;
+    case 16:
+        switch (param->mode) {
+        case BLEND_ADDITION:   param->blend = blend_addition_16bit;   break;
+        case BLEND_GRAINMERGE: param->blend = blend_grainmerge_16bit; break;
+        case BLEND_AND:        param->blend = blend_and_16bit;        break;
+        case BLEND_AVERAGE:    param->blend = blend_average_16bit;    break;
+        case BLEND_BURN:       param->blend = blend_burn_16bit;       break;
+        case BLEND_DARKEN:     param->blend = blend_darken_16bit;     break;
+        case BLEND_DIFFERENCE: param->blend = blend_difference_16bit; break;
+        case BLEND_GRAINEXTRACT: param->blend = blend_grainextract_16bit; break;
+        case BLEND_DIVIDE:     param->blend = blend_divide_16bit;     break;
+        case BLEND_DODGE:      param->blend = blend_dodge_16bit;      break;
+        case BLEND_EXCLUSION:  param->blend = blend_exclusion_16bit;  break;
+        case BLEND_EXTREMITY:  param->blend = blend_extremity_16bit;  break;
+        case BLEND_FREEZE:     param->blend = blend_freeze_16bit;     break;
+        case BLEND_GLOW:       param->blend = blend_glow_16bit;       break;
+        case BLEND_HARDLIGHT:  param->blend = blend_hardlight_16bit;  break;
+        case BLEND_HARDMIX:    param->blend = blend_hardmix_16bit;    break;
+        case BLEND_HEAT:       param->blend = blend_heat_16bit;       break;
+        case BLEND_LIGHTEN:    param->blend = blend_lighten_16bit;    break;
+        case BLEND_LINEARLIGHT:param->blend = blend_linearlight_16bit;break;
+        case BLEND_MULTIPLY:   param->blend = blend_multiply_16bit;   break;
+        case BLEND_MULTIPLY128:param->blend = blend_multiply128_16bit;break;
+        case BLEND_NEGATION:   param->blend = blend_negation_16bit;   break;
+        case BLEND_NORMAL:     param->blend = param->opacity == 1 ? blend_copytop_16 :
+                                              param->opacity == 0 ? blend_copybottom_16 :
+                                              blend_normal_16bit;    break;
+        case BLEND_OR:         param->blend = blend_or_16bit;        break;
+        case BLEND_OVERLAY:    param->blend = blend_overlay_16bit;   break;
+        case BLEND_PHOENIX:    param->blend = blend_phoenix_16bit;   break;
+        case BLEND_PINLIGHT:   param->blend = blend_pinlight_16bit;  break;
+        case BLEND_REFLECT:    param->blend = blend_reflect_16bit;   break;
+        case BLEND_SCREEN:     param->blend = blend_screen_16bit;    break;
+        case BLEND_SOFTLIGHT:  param->blend = blend_softlight_16bit; break;
+        case BLEND_SUBTRACT:   param->blend = blend_subtract_16bit;  break;
+        case BLEND_VIVIDLIGHT: param->blend = blend_vividlight_16bit;break;
+        case BLEND_XOR:        param->blend = blend_xor_16bit;       break;
+        }
+        break;
     }
 
     if (param->opacity == 0 && param->mode != BLEND_NORMAL) {
-        param->blend = blend_copytop;
+        param->blend = depth > 8 ? blend_copytop_16 : blend_copytop_8;
     }
 
     if (ARCH_X86)
-        ff_blend_init_x86(param, is_16bit);
+        ff_blend_init_x86(param, depth);
 }
 
 static int config_output(AVFilterLink *outlink)
@@ -515,7 +731,7 @@ static int config_output(AVFilterLink *outlink)
     AVFilterLink *toplink = ctx->inputs[TOP];
     BlendContext *s = ctx->priv;
     const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(toplink->format);
-    int ret, plane, is_16bit;
+    int ret, plane;
 
     if (!s->tblend) {
         AVFilterLink *bottomlink = ctx->inputs[BOTTOM];
@@ -543,7 +759,7 @@ static int config_output(AVFilterLink *outlink)
     s->hsub = pix_desc->log2_chroma_w;
     s->vsub = pix_desc->log2_chroma_h;
 
-    is_16bit = pix_desc->comp[0].depth == 16;
+    s->depth = pix_desc->comp[0].depth;
     s->nb_planes = av_pix_fmt_count_planes(toplink->format);
 
     if (!s->tblend)
@@ -558,7 +774,7 @@ static int config_output(AVFilterLink *outlink)
         if (s->all_opacity < 1)
             param->opacity = s->all_opacity;
 
-        ff_blend_init(param, is_16bit);
+        ff_blend_init(param, s->depth);
 
         if (s->all_expr && !param->expr_str) {
             param->expr_str = av_strdup(s->all_expr);
@@ -570,7 +786,7 @@ static int config_output(AVFilterLink *outlink)
                                 NULL, NULL, NULL, NULL, 0, ctx);
             if (ret < 0)
                 return ret;
-            param->blend = is_16bit? blend_expr_16bit : blend_expr_8bit;
+            param->blend = s->depth > 8 ? blend_expr_16bit : blend_expr_8bit;
         }
     }
 
diff --git a/libavfilter/vf_bwdif.c b/libavfilter/vf_bwdif.c
index b691983611fc3..37165584cf8cd 100644
--- a/libavfilter/vf_bwdif.c
+++ b/libavfilter/vf_bwdif.c
@@ -216,10 +216,11 @@ static void filter_edge_16bit(void *dst1, void *prev1, void *cur1, void *next1,
 static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
 {
     BWDIFContext *s = ctx->priv;
+    YADIFContext *yadif = &s->yadif;
     ThreadData *td  = arg;
-    int linesize = s->cur->linesize[td->plane];
-    int clip_max = (1 << (s->csp->comp[td->plane].depth)) - 1;
-    int df = (s->csp->comp[td->plane].depth + 7) / 8;
+    int linesize = yadif->cur->linesize[td->plane];
+    int clip_max = (1 << (yadif->csp->comp[td->plane].depth)) - 1;
+    int df = (yadif->csp->comp[td->plane].depth + 7) / 8;
     int refs = linesize / df;
     int slice_start = (td->h *  jobnr   ) / nb_jobs;
     int slice_end   = (td->h * (jobnr+1)) / nb_jobs;
@@ -227,11 +228,11 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
 
     for (y = slice_start; y < slice_end; y++) {
         if ((y ^ td->parity) & 1) {
-            uint8_t *prev = &s->prev->data[td->plane][y * linesize];
-            uint8_t *cur  = &s->cur ->data[td->plane][y * linesize];
-            uint8_t *next = &s->next->data[td->plane][y * linesize];
+            uint8_t *prev = &yadif->prev->data[td->plane][y * linesize];
+            uint8_t *cur  = &yadif->cur ->data[td->plane][y * linesize];
+            uint8_t *next = &yadif->next->data[td->plane][y * linesize];
             uint8_t *dst  = &td->frame->data[td->plane][y * td->frame->linesize[td->plane]];
-            if (!s->inter_field) {
+            if (yadif->current_field == YADIF_FIELD_END) {
                 s->filter_intra(dst, cur, td->w, (y + df) < td->h ? refs : -refs,
                                 y > (df - 1) ? -refs : refs,
                                 (y + 3*df) < td->h ? 3 * refs : -refs,
@@ -252,7 +253,7 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
             }
         } else {
             memcpy(&td->frame->data[td->plane][y * td->frame->linesize[td->plane]],
-                   &s->cur->data[td->plane][y * linesize], td->w * df);
+                   &yadif->cur->data[td->plane][y * linesize], td->w * df);
         }
     }
     return 0;
@@ -262,16 +263,17 @@ static void filter(AVFilterContext *ctx, AVFrame *dstpic,
                    int parity, int tff)
 {
     BWDIFContext *bwdif = ctx->priv;
+    YADIFContext *yadif = &bwdif->yadif;
     ThreadData td = { .frame = dstpic, .parity = parity, .tff = tff };
     int i;
 
-    for (i = 0; i < bwdif->csp->nb_components; i++) {
+    for (i = 0; i < yadif->csp->nb_components; i++) {
         int w = dstpic->width;
         int h = dstpic->height;
 
         if (i == 1 || i == 2) {
-            w = AV_CEIL_RSHIFT(w, bwdif->csp->log2_chroma_w);
-            h = AV_CEIL_RSHIFT(h, bwdif->csp->log2_chroma_h);
+            w = AV_CEIL_RSHIFT(w, yadif->csp->log2_chroma_w);
+            h = AV_CEIL_RSHIFT(h, yadif->csp->log2_chroma_h);
         }
 
         td.w     = w;
@@ -280,186 +282,21 @@ static void filter(AVFilterContext *ctx, AVFrame *dstpic,
 
         ctx->internal->execute(ctx, filter_slice, &td, NULL, FFMIN(h, ff_filter_get_nb_threads(ctx)));
     }
-    if (!bwdif->inter_field) {
-        bwdif->inter_field = 1;
+    if (yadif->current_field == YADIF_FIELD_END) {
+        yadif->current_field = YADIF_FIELD_NORMAL;
     }
 
     emms_c();
 }
 
-static int return_frame(AVFilterContext *ctx, int is_second)
-{
-    BWDIFContext *bwdif = ctx->priv;
-    AVFilterLink *link  = ctx->outputs[0];
-    int tff, ret;
-
-    if (bwdif->parity == -1) {
-        tff = bwdif->cur->interlaced_frame ?
-              bwdif->cur->top_field_first : 1;
-    } else {
-        tff = bwdif->parity ^ 1;
-    }
-
-    if (is_second) {
-        bwdif->out = ff_get_video_buffer(link, link->w, link->h);
-        if (!bwdif->out)
-            return AVERROR(ENOMEM);
-
-        av_frame_copy_props(bwdif->out, bwdif->cur);
-        bwdif->out->interlaced_frame = 0;
-        if (bwdif->inter_field < 0)
-            bwdif->inter_field = 0;
-    }
-
-    filter(ctx, bwdif->out, tff ^ !is_second, tff);
-
-    if (is_second) {
-        int64_t cur_pts  = bwdif->cur->pts;
-        int64_t next_pts = bwdif->next->pts;
-
-        if (next_pts != AV_NOPTS_VALUE && cur_pts != AV_NOPTS_VALUE) {
-            bwdif->out->pts = cur_pts + next_pts;
-        } else {
-            bwdif->out->pts = AV_NOPTS_VALUE;
-        }
-    }
-    ret = ff_filter_frame(ctx->outputs[0], bwdif->out);
-
-    bwdif->frame_pending = (bwdif->mode&1) && !is_second;
-    return ret;
-}
-
-static int checkstride(BWDIFContext *bwdif, const AVFrame *a, const AVFrame *b)
-{
-    int i;
-    for (i = 0; i < bwdif->csp->nb_components; i++)
-        if (a->linesize[i] != b->linesize[i])
-            return 1;
-    return 0;
-}
-
-static void fixstride(AVFilterLink *link, AVFrame *f)
-{
-    AVFrame *dst = ff_default_get_video_buffer(link, f->width, f->height);
-    if(!dst)
-        return;
-    av_frame_copy_props(dst, f);
-    av_image_copy(dst->data, dst->linesize,
-                  (const uint8_t **)f->data, f->linesize,
-                  dst->format, dst->width, dst->height);
-    av_frame_unref(f);
-    av_frame_move_ref(f, dst);
-    av_frame_free(&dst);
-}
-
-static int filter_frame(AVFilterLink *link, AVFrame *frame)
-{
-    AVFilterContext *ctx = link->dst;
-    BWDIFContext *bwdif = ctx->priv;
-
-    av_assert0(frame);
-
-    if (bwdif->frame_pending)
-        return_frame(ctx, 1);
-
-    if (bwdif->prev)
-        av_frame_free(&bwdif->prev);
-    bwdif->prev = bwdif->cur;
-    bwdif->cur  = bwdif->next;
-    bwdif->next = frame;
-
-    if (!bwdif->cur) {
-        bwdif->cur = av_frame_clone(bwdif->next);
-        if (!bwdif->cur)
-            return AVERROR(ENOMEM);
-        bwdif->inter_field = 0;
-    }
-
-    if (checkstride(bwdif, bwdif->next, bwdif->cur)) {
-        av_log(ctx, AV_LOG_VERBOSE, "Reallocating frame due to differing stride\n");
-        fixstride(link, bwdif->next);
-    }
-    if (checkstride(bwdif, bwdif->next, bwdif->cur))
-        fixstride(link, bwdif->cur);
-    if (bwdif->prev && checkstride(bwdif, bwdif->next, bwdif->prev))
-        fixstride(link, bwdif->prev);
-    if (checkstride(bwdif, bwdif->next, bwdif->cur) || (bwdif->prev && checkstride(bwdif, bwdif->next, bwdif->prev))) {
-        av_log(ctx, AV_LOG_ERROR, "Failed to reallocate frame\n");
-        return -1;
-    }
-
-    if (!bwdif->prev)
-        return 0;
-
-    if ((bwdif->deint && !bwdif->cur->interlaced_frame) ||
-        ctx->is_disabled ||
-        (bwdif->deint && !bwdif->prev->interlaced_frame && bwdif->prev->repeat_pict) ||
-        (bwdif->deint && !bwdif->next->interlaced_frame && bwdif->next->repeat_pict)
-    ) {
-        bwdif->out  = av_frame_clone(bwdif->cur);
-        if (!bwdif->out)
-            return AVERROR(ENOMEM);
-
-        av_frame_free(&bwdif->prev);
-        if (bwdif->out->pts != AV_NOPTS_VALUE)
-            bwdif->out->pts *= 2;
-        return ff_filter_frame(ctx->outputs[0], bwdif->out);
-    }
-
-    bwdif->out = ff_get_video_buffer(ctx->outputs[0], link->w, link->h);
-    if (!bwdif->out)
-        return AVERROR(ENOMEM);
-
-    av_frame_copy_props(bwdif->out, bwdif->cur);
-    bwdif->out->interlaced_frame = 0;
-
-    if (bwdif->out->pts != AV_NOPTS_VALUE)
-        bwdif->out->pts *= 2;
-
-    return return_frame(ctx, 0);
-}
-
-static int request_frame(AVFilterLink *link)
-{
-    AVFilterContext *ctx = link->src;
-    BWDIFContext *bwdif = ctx->priv;
-    int ret;
-
-    if (bwdif->frame_pending) {
-        return_frame(ctx, 1);
-        return 0;
-    }
-
-    if (bwdif->eof)
-        return AVERROR_EOF;
-
-    ret  = ff_request_frame(link->src->inputs[0]);
-
-    if (ret == AVERROR_EOF && bwdif->cur) {
-        AVFrame *next = av_frame_clone(bwdif->next);
-
-        if (!next)
-            return AVERROR(ENOMEM);
-
-        bwdif->inter_field = -1;
-        next->pts = bwdif->next->pts * 2 - bwdif->cur->pts;
-
-        filter_frame(link->src->inputs[0], next);
-        bwdif->eof = 1;
-    } else if (ret < 0) {
-        return ret;
-    }
-
-    return 0;
-}
-
 static av_cold void uninit(AVFilterContext *ctx)
 {
     BWDIFContext *bwdif = ctx->priv;
+    YADIFContext *yadif = &bwdif->yadif;
 
-    av_frame_free(&bwdif->prev);
-    av_frame_free(&bwdif->cur );
-    av_frame_free(&bwdif->next);
+    av_frame_free(&yadif->prev);
+    av_frame_free(&yadif->cur );
+    av_frame_free(&yadif->next);
 }
 
 static int query_formats(AVFilterContext *ctx)
@@ -496,13 +333,14 @@ static int config_props(AVFilterLink *link)
 {
     AVFilterContext *ctx = link->src;
     BWDIFContext *s = link->src->priv;
+    YADIFContext *yadif = &s->yadif;
 
     link->time_base.num = link->src->inputs[0]->time_base.num;
     link->time_base.den = link->src->inputs[0]->time_base.den * 2;
     link->w             = link->src->inputs[0]->w;
     link->h             = link->src->inputs[0]->h;
 
-    if(s->mode&1)
+    if(yadif->mode&1)
         link->frame_rate = av_mul_q(link->src->inputs[0]->frame_rate, (AVRational){2,1});
 
     if (link->w < 3 || link->h < 3) {
@@ -510,8 +348,9 @@ static int config_props(AVFilterLink *link)
         return AVERROR(EINVAL);
     }
 
-    s->csp = av_pix_fmt_desc_get(link->format);
-    if (s->csp->comp[0].depth > 8) {
+    yadif->csp = av_pix_fmt_desc_get(link->format);
+    yadif->filter = filter;
+    if (yadif->csp->comp[0].depth > 8) {
         s->filter_intra = filter_intra_16bit;
         s->filter_line  = filter_line_c_16bit;
         s->filter_edge  = filter_edge_16bit;
@@ -528,24 +367,24 @@ static int config_props(AVFilterLink *link)
 }
 
 
-#define OFFSET(x) offsetof(BWDIFContext, x)
+#define OFFSET(x) offsetof(YADIFContext, x)
 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
 
 #define CONST(name, help, val, unit) { name, help, 0, AV_OPT_TYPE_CONST, {.i64=val}, INT_MIN, INT_MAX, FLAGS, unit }
 
 static const AVOption bwdif_options[] = {
-    { "mode",   "specify the interlacing mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64=BWDIF_MODE_SEND_FIELD}, 0, 1, FLAGS, "mode"},
-    CONST("send_frame", "send one frame for each frame", BWDIF_MODE_SEND_FRAME, "mode"),
-    CONST("send_field", "send one frame for each field", BWDIF_MODE_SEND_FIELD, "mode"),
+    { "mode",   "specify the interlacing mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64=YADIF_MODE_SEND_FIELD}, 0, 1, FLAGS, "mode"},
+    CONST("send_frame", "send one frame for each frame", YADIF_MODE_SEND_FRAME, "mode"),
+    CONST("send_field", "send one frame for each field", YADIF_MODE_SEND_FIELD, "mode"),
 
-    { "parity", "specify the assumed picture field parity", OFFSET(parity), AV_OPT_TYPE_INT, {.i64=BWDIF_PARITY_AUTO}, -1, 1, FLAGS, "parity" },
-    CONST("tff",  "assume top field first",    BWDIF_PARITY_TFF,  "parity"),
-    CONST("bff",  "assume bottom field first", BWDIF_PARITY_BFF,  "parity"),
-    CONST("auto", "auto detect parity",        BWDIF_PARITY_AUTO, "parity"),
+    { "parity", "specify the assumed picture field parity", OFFSET(parity), AV_OPT_TYPE_INT, {.i64=YADIF_PARITY_AUTO}, -1, 1, FLAGS, "parity" },
+    CONST("tff",  "assume top field first",    YADIF_PARITY_TFF,  "parity"),
+    CONST("bff",  "assume bottom field first", YADIF_PARITY_BFF,  "parity"),
+    CONST("auto", "auto detect parity",        YADIF_PARITY_AUTO, "parity"),
 
-    { "deint", "specify which frames to deinterlace", OFFSET(deint), AV_OPT_TYPE_INT, {.i64=BWDIF_DEINT_ALL}, 0, 1, FLAGS, "deint" },
-    CONST("all",        "deinterlace all frames",                       BWDIF_DEINT_ALL,        "deint"),
-    CONST("interlaced", "only deinterlace frames marked as interlaced", BWDIF_DEINT_INTERLACED, "deint"),
+    { "deint", "specify which frames to deinterlace", OFFSET(deint), AV_OPT_TYPE_INT, {.i64=YADIF_DEINT_ALL}, 0, 1, FLAGS, "deint" },
+    CONST("all",        "deinterlace all frames",                       YADIF_DEINT_ALL,        "deint"),
+    CONST("interlaced", "only deinterlace frames marked as interlaced", YADIF_DEINT_INTERLACED, "deint"),
 
     { NULL }
 };
@@ -556,7 +395,7 @@ static const AVFilterPad avfilter_vf_bwdif_inputs[] = {
     {
         .name          = "default",
         .type          = AVMEDIA_TYPE_VIDEO,
-        .filter_frame  = filter_frame,
+        .filter_frame  = ff_yadif_filter_frame,
     },
     { NULL }
 };
@@ -565,7 +404,7 @@ static const AVFilterPad avfilter_vf_bwdif_outputs[] = {
     {
         .name          = "default",
         .type          = AVMEDIA_TYPE_VIDEO,
-        .request_frame = request_frame,
+        .request_frame = ff_yadif_request_frame,
         .config_props  = config_props,
     },
     { NULL }
diff --git a/libavfilter/vf_chromakey.c b/libavfilter/vf_chromakey.c
index 88414783bc800..42b7d71d49406 100644
--- a/libavfilter/vf_chromakey.c
+++ b/libavfilter/vf_chromakey.c
@@ -38,6 +38,9 @@ typedef struct ChromakeyContext {
 
     int hsub_log2;
     int vsub_log2;
+
+    int (*do_slice)(AVFilterContext *ctx, void *arg,
+                    int jobnr, int nb_jobs);
 } ChromakeyContext;
 
 static uint8_t do_chromakey_pixel(ChromakeyContext *ctx, uint8_t u[9], uint8_t v[9])
@@ -103,12 +106,45 @@ static int do_chromakey_slice(AVFilterContext *avctx, void *arg, int jobnr, int
     return 0;
 }
 
+static int do_chromahold_slice(AVFilterContext *avctx, void *arg, int jobnr, int nb_jobs)
+{
+    ChromakeyContext *ctx = avctx->priv;
+    AVFrame *frame = arg;
+    const int slice_start = ((frame->height >> ctx->vsub_log2) * jobnr) / nb_jobs;
+    const int slice_end = ((frame->height >> ctx->vsub_log2) * (jobnr + 1)) / nb_jobs;
+
+    int x, y, alpha;
+
+    for (y = slice_start; y < slice_end; ++y) {
+        for (x = 0; x < frame->width >> ctx->hsub_log2; ++x) {
+            int u = frame->data[1][frame->linesize[1] * y + x];
+            int v = frame->data[2][frame->linesize[2] * y + x];
+            double diff;
+            int du, dv;
+
+            du = u - ctx->chromakey_uv[0];
+            dv = v - ctx->chromakey_uv[1];
+
+            diff = sqrt((du * du + dv * dv) / (255.0 * 255.0));
+
+            alpha = diff > ctx->similarity;
+            if (alpha) {
+                frame->data[1][frame->linesize[1] * y + x] = 128;
+                frame->data[2][frame->linesize[2] * y + x] = 128;
+            }
+        }
+    }
+
+    return 0;
+}
+
 static int filter_frame(AVFilterLink *link, AVFrame *frame)
 {
     AVFilterContext *avctx = link->dst;
+    ChromakeyContext *ctx = avctx->priv;
     int res;
 
-    if (res = avctx->internal->execute(avctx, do_chromakey_slice, frame, NULL, FFMIN(frame->height, ff_filter_get_nb_threads(avctx))))
+    if (res = avctx->internal->execute(avctx, ctx->do_slice, frame, NULL, FFMIN(frame->height, ff_filter_get_nb_threads(avctx))))
         return res;
 
     return ff_filter_frame(avctx->outputs[0], frame);
@@ -130,6 +166,12 @@ static av_cold int initialize_chromakey(AVFilterContext *avctx)
         ctx->chromakey_uv[1] = RGB_TO_V(ctx->chromakey_rgba);
     }
 
+    if (!strcmp(avctx->filter->name, "chromakey")) {
+        ctx->do_slice = do_chromakey_slice;
+    } else {
+        ctx->do_slice = do_chromahold_slice;
+    }
+
     return 0;
 }
 
@@ -142,9 +184,19 @@ static av_cold int query_formats(AVFilterContext *avctx)
         AV_PIX_FMT_NONE
     };
 
+    static const enum AVPixelFormat hold_pixel_fmts[] = {
+        AV_PIX_FMT_YUV420P,
+        AV_PIX_FMT_YUV422P,
+        AV_PIX_FMT_YUV444P,
+        AV_PIX_FMT_YUVA420P,
+        AV_PIX_FMT_YUVA422P,
+        AV_PIX_FMT_YUVA444P,
+        AV_PIX_FMT_NONE
+    };
+
     AVFilterFormats *formats = NULL;
 
-    formats = ff_make_format_list(pixel_fmts);
+    formats = ff_make_format_list(!strcmp(avctx->filter->name, "chromahold") ? hold_pixel_fmts : pixel_fmts);
     if (!formats)
         return AVERROR(ENOMEM);
 
@@ -206,3 +258,43 @@ AVFilter ff_vf_chromakey = {
     .outputs       = chromakey_outputs,
     .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
 };
+
+static const AVOption chromahold_options[] = {
+    { "color", "set the chromahold key color", OFFSET(chromakey_rgba), AV_OPT_TYPE_COLOR, { .str = "black" }, CHAR_MIN, CHAR_MAX, FLAGS },
+    { "similarity", "set the chromahold similarity value", OFFSET(similarity), AV_OPT_TYPE_FLOAT, { .dbl = 0.01 }, 0.01, 1.0, FLAGS },
+    { "yuv", "color parameter is in yuv instead of rgb", OFFSET(is_yuv), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
+    { NULL }
+};
+
+static const AVFilterPad chromahold_inputs[] = {
+    {
+        .name           = "default",
+        .type           = AVMEDIA_TYPE_VIDEO,
+        .needs_writable = 1,
+        .filter_frame   = filter_frame,
+        .config_props   = config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad chromahold_outputs[] = {
+    {
+        .name           = "default",
+        .type           = AVMEDIA_TYPE_VIDEO,
+    },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(chromahold);
+
+AVFilter ff_vf_chromahold = {
+    .name          = "chromahold",
+    .description   = NULL_IF_CONFIG_SMALL("Turns a certain color range into gray."),
+    .priv_size     = sizeof(ChromakeyContext),
+    .priv_class    = &chromahold_class,
+    .init          = initialize_chromakey,
+    .query_formats = query_formats,
+    .inputs        = chromahold_inputs,
+    .outputs       = chromahold_outputs,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
+};
diff --git a/libavfilter/vf_chromashift.c b/libavfilter/vf_chromashift.c
new file mode 100644
index 0000000000000..f4ac28ad18e25
--- /dev/null
+++ b/libavfilter/vf_chromashift.c
@@ -0,0 +1,474 @@
+/*
+ * Copyright (c) 2018 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avstring.h"
+#include "libavutil/eval.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "framesync.h"
+#include "video.h"
+
+typedef struct ChromaShiftContext {
+    const AVClass *class;
+    int cbh, cbv;
+    int crh, crv;
+    int rh, rv;
+    int gh, gv;
+    int bh, bv;
+    int ah, av;
+    int edge;
+
+    int nb_planes;
+    int depth;
+    int height[4];
+    int width[4];
+    int linesize[4];
+
+    AVFrame *in;
+
+    int is_rgbashift;
+    int (*filter_slice)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
+} ChromaShiftContext;
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat yuv_pix_fmts[] = {
+        AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA420P,
+        AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_YUVJ422P,AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ411P,
+        AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P,
+        AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV444P9,
+        AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV440P10,
+        AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA444P10,
+        AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV440P12,
+        AV_PIX_FMT_YUV444P14, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV420P14,
+        AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16,
+        AV_PIX_FMT_YUVA420P16, AV_PIX_FMT_YUVA422P16, AV_PIX_FMT_YUVA444P16,
+        AV_PIX_FMT_NONE
+    };
+    static const enum AVPixelFormat rgb_pix_fmts[] = {
+        AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRP9,
+        AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12,
+        AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
+        AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRAP16,
+        AV_PIX_FMT_NONE
+    };
+    const enum AVPixelFormat *pix_fmts;
+    AVFilterFormats *fmts_list;
+
+    if (!strcmp(ctx->filter->name, "rgbashift"))
+        pix_fmts = rgb_pix_fmts;
+    else
+        pix_fmts = yuv_pix_fmts;
+
+    fmts_list = ff_make_format_list(pix_fmts);
+    if (!fmts_list)
+        return AVERROR(ENOMEM);
+    return ff_set_common_formats(ctx, fmts_list);
+}
+
+#define DEFINE_SMEAR(depth, type, div)                                                    \
+static int smear_slice ## depth(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)  \
+{                                                                                         \
+    ChromaShiftContext *s = ctx->priv;                                                    \
+    AVFrame *in = s->in;                                                                  \
+    AVFrame *out = arg;                                                                   \
+    const int sulinesize = in->linesize[1] / div;                                         \
+    const int svlinesize = in->linesize[2] / div;                                         \
+    const int ulinesize = out->linesize[1] / div;                                         \
+    const int vlinesize = out->linesize[2] / div;                                         \
+    const int cbh = s->cbh;                                                               \
+    const int cbv = s->cbv;                                                               \
+    const int crh = s->crh;                                                               \
+    const int crv = s->crv;                                                               \
+    const int h = s->height[1];                                                           \
+    const int w = s->width[1];                                                            \
+    const int slice_start = (h * jobnr) / nb_jobs;                                        \
+    const int slice_end = (h * (jobnr+1)) / nb_jobs;                                      \
+    const type *su = (const type *)in->data[1];                                           \
+    const type *sv = (const type *)in->data[2];                                           \
+    type *du = (type *)out->data[1] + slice_start * ulinesize;                            \
+    type *dv = (type *)out->data[2] + slice_start * vlinesize;                            \
+                                                                                          \
+    for (int y = slice_start; y < slice_end; y++) {                                       \
+        const int duy = av_clip(y - cbv, 0, h-1) * sulinesize;                            \
+        const int dvy = av_clip(y - crv, 0, h-1) * svlinesize;                            \
+                                                                                          \
+        for (int x = 0; x < w; x++) {                                                     \
+            du[x] = su[av_clip(x - cbh, 0, w - 1) + duy];                                 \
+            dv[x] = sv[av_clip(x - crh, 0, w - 1) + dvy];                                 \
+        }                                                                                 \
+                                                                                          \
+        du += ulinesize;                                                                  \
+        dv += vlinesize;                                                                  \
+    }                                                                                     \
+                                                                                          \
+    return 0;                                                                             \
+}
+
+DEFINE_SMEAR(8, uint8_t, 1)
+DEFINE_SMEAR(16, uint16_t, 2)
+
+#define DEFINE_WRAP(depth, type, div)                                                     \
+static int wrap_slice ## depth(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)   \
+{                                                                                         \
+    ChromaShiftContext *s = ctx->priv;                                                    \
+    AVFrame *in = s->in;                                                                  \
+    AVFrame *out = arg;                                                                   \
+    const int sulinesize = in->linesize[1] / div;                                         \
+    const int svlinesize = in->linesize[2] / div;                                         \
+    const int ulinesize = out->linesize[1] / div;                                         \
+    const int vlinesize = out->linesize[2] / div;                                         \
+    const int cbh = s->cbh;                                                               \
+    const int cbv = s->cbv;                                                               \
+    const int crh = s->crh;                                                               \
+    const int crv = s->crv;                                                               \
+    const int h = s->height[1];                                                           \
+    const int w = s->width[1];                                                            \
+    const int slice_start = (h * jobnr) / nb_jobs;                                        \
+    const int slice_end = (h * (jobnr+1)) / nb_jobs;                                      \
+    const type *su = (const type *)in->data[1];                                           \
+    const type *sv = (const type *)in->data[2];                                           \
+    type *du = (type *)out->data[1] + slice_start * ulinesize;                            \
+    type *dv = (type *)out->data[2] + slice_start * vlinesize;                            \
+                                                                                          \
+    for (int y = slice_start; y < slice_end; y++) {                                       \
+        int uy = (y - cbv) % h;                                                           \
+        int vy = (y - crv) % h;                                                           \
+                                                                                          \
+        if (uy < 0)                                                                       \
+            uy += h;                                                                      \
+        if (vy < 0)                                                                       \
+            vy += h;                                                                      \
+                                                                                          \
+        for (int x = 0; x < w; x++) {                                                     \
+            int ux = (x - cbh) % w;                                                       \
+            int vx = (x - crh) % w;                                                       \
+                                                                                          \
+            if (ux < 0)                                                                   \
+                ux += w;                                                                  \
+            if (vx < 0)                                                                   \
+                vx += w;                                                                  \
+                                                                                          \
+            du[x] = su[ux + uy * sulinesize];                                             \
+            dv[x] = sv[vx + vy * svlinesize];                                             \
+        }                                                                                 \
+                                                                                          \
+        du += ulinesize;                                                                  \
+        dv += vlinesize;                                                                  \
+    }                                                                                     \
+                                                                                          \
+    return 0;                                                                             \
+}
+
+DEFINE_WRAP(8, uint8_t, 1)
+DEFINE_WRAP(16, uint16_t, 2)
+
+#define DEFINE_RGBASMEAR(depth, type, div)                                                    \
+static int rgbasmear_slice ## depth(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)  \
+{                                                                                         \
+    ChromaShiftContext *s = ctx->priv;                                                    \
+    AVFrame *in = s->in;                                                                  \
+    AVFrame *out = arg;                                                                   \
+    const int srlinesize = in->linesize[2] / div;                                         \
+    const int sglinesize = in->linesize[0] / div;                                         \
+    const int sblinesize = in->linesize[1] / div;                                         \
+    const int salinesize = in->linesize[3] / div;                                         \
+    const int rlinesize = out->linesize[2] / div;                                         \
+    const int glinesize = out->linesize[0] / div;                                         \
+    const int blinesize = out->linesize[1] / div;                                         \
+    const int alinesize = out->linesize[3] / div;                                         \
+    const int rh = s->rh;                                                                 \
+    const int rv = s->rv;                                                                 \
+    const int gh = s->gh;                                                                 \
+    const int gv = s->gv;                                                                 \
+    const int bh = s->bh;                                                                 \
+    const int bv = s->bv;                                                                 \
+    const int ah = s->ah;                                                                 \
+    const int av = s->av;                                                                 \
+    const int h = s->height[1];                                                           \
+    const int w = s->width[1];                                                            \
+    const int slice_start = (h * jobnr) / nb_jobs;                                        \
+    const int slice_end = (h * (jobnr+1)) / nb_jobs;                                      \
+    const type *sr = (const type *)in->data[2];                                           \
+    const type *sg = (const type *)in->data[0];                                           \
+    const type *sb = (const type *)in->data[1];                                           \
+    const type *sa = (const type *)in->data[3];                                           \
+    type *dr = (type *)out->data[2] + slice_start * rlinesize;                            \
+    type *dg = (type *)out->data[0] + slice_start * glinesize;                            \
+    type *db = (type *)out->data[1] + slice_start * blinesize;                            \
+    type *da = (type *)out->data[3] + slice_start * alinesize;                            \
+                                                                                          \
+    for (int y = slice_start; y < slice_end; y++) {                                       \
+        const int ry = av_clip(y - rv, 0, h-1) * srlinesize;                              \
+        const int gy = av_clip(y - gv, 0, h-1) * sglinesize;                              \
+        const int by = av_clip(y - bv, 0, h-1) * sblinesize;                              \
+        int ay;                                                                           \
+                                                                                          \
+        for (int x = 0; x < w; x++) {                                                     \
+            dr[x] = sr[av_clip(x - rh, 0, w - 1) + ry];                                   \
+            dg[x] = sg[av_clip(x - gh, 0, w - 1) + gy];                                   \
+            db[x] = sb[av_clip(x - bh, 0, w - 1) + by];                                   \
+        }                                                                                 \
+                                                                                          \
+        dr += rlinesize;                                                                  \
+        dg += glinesize;                                                                  \
+        db += blinesize;                                                                  \
+                                                                                          \
+        if (s->nb_planes < 4)                                                             \
+            continue;                                                                     \
+        ay = av_clip(y - av, 0, h-1) * salinesize;                                        \
+        for (int x = 0; x < w; x++) {                                                     \
+            da[x] = sa[av_clip(x - ah, 0, w - 1) + ay];                                   \
+        }                                                                                 \
+                                                                                          \
+        da += alinesize;                                                                  \
+    }                                                                                     \
+                                                                                          \
+    return 0;                                                                             \
+}
+
+DEFINE_RGBASMEAR(8, uint8_t, 1)
+DEFINE_RGBASMEAR(16, uint16_t, 2)
+
+#define DEFINE_RGBAWRAP(depth, type, div)                                                     \
+static int rgbawrap_slice ## depth(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)   \
+{                                                                                         \
+    ChromaShiftContext *s = ctx->priv;                                                    \
+    AVFrame *in = s->in;                                                                  \
+    AVFrame *out = arg;                                                                   \
+    const int srlinesize = in->linesize[2] / div;                                         \
+    const int sglinesize = in->linesize[0] / div;                                         \
+    const int sblinesize = in->linesize[1] / div;                                         \
+    const int salinesize = in->linesize[3] / div;                                         \
+    const int rlinesize = out->linesize[2] / div;                                         \
+    const int glinesize = out->linesize[0] / div;                                         \
+    const int blinesize = out->linesize[1] / div;                                         \
+    const int alinesize = out->linesize[3] / div;                                         \
+    const int rh = s->rh;                                                                 \
+    const int rv = s->rv;                                                                 \
+    const int gh = s->gh;                                                                 \
+    const int gv = s->gv;                                                                 \
+    const int bh = s->bh;                                                                 \
+    const int bv = s->bv;                                                                 \
+    const int ah = s->ah;                                                                 \
+    const int av = s->av;                                                                 \
+    const int h = s->height[1];                                                           \
+    const int w = s->width[1];                                                            \
+    const int slice_start = (h * jobnr) / nb_jobs;                                        \
+    const int slice_end = (h * (jobnr+1)) / nb_jobs;                                      \
+    const type *sr = (const type *)in->data[2];                                           \
+    const type *sg = (const type *)in->data[0];                                           \
+    const type *sb = (const type *)in->data[1];                                           \
+    const type *sa = (const type *)in->data[3];                                           \
+    type *dr = (type *)out->data[2] + slice_start * rlinesize;                            \
+    type *dg = (type *)out->data[0] + slice_start * glinesize;                            \
+    type *db = (type *)out->data[1] + slice_start * blinesize;                            \
+    type *da = (type *)out->data[3] + slice_start * alinesize;                            \
+                                                                                          \
+    for (int y = slice_start; y < slice_end; y++) {                                       \
+        int ry = (y - rv) % h;                                                            \
+        int gy = (y - gv) % h;                                                            \
+        int by = (y - bv) % h;                                                            \
+                                                                                          \
+        if (ry < 0)                                                                       \
+            ry += h;                                                                      \
+        if (gy < 0)                                                                       \
+            gy += h;                                                                      \
+        if (by < 0)                                                                       \
+            by += h;                                                                      \
+                                                                                          \
+        for (int x = 0; x < w; x++) {                                                     \
+            int rx = (x - rh) % w;                                                        \
+            int gx = (x - gh) % w;                                                        \
+            int bx = (x - bh) % w;                                                        \
+                                                                                          \
+            if (rx < 0)                                                                   \
+                rx += w;                                                                  \
+            if (gx < 0)                                                                   \
+                gx += w;                                                                  \
+            if (bx < 0)                                                                   \
+                bx += w;                                                                  \
+            dr[x] = sr[rx + ry * srlinesize];                                             \
+            dg[x] = sg[gx + gy * sglinesize];                                             \
+            db[x] = sb[bx + by * sblinesize];                                             \
+        }                                                                                 \
+                                                                                          \
+        dr += rlinesize;                                                                  \
+        dg += glinesize;                                                                  \
+        db += blinesize;                                                                  \
+                                                                                          \
+        if (s->nb_planes < 4)                                                             \
+            continue;                                                                     \
+        for (int x = 0; x < w; x++) {                                                     \
+            int ax = (x - ah) % w;                                                        \
+            int ay = (x - av) % h;                                                        \
+                                                                                          \
+            if (ax < 0)                                                                   \
+                ax += w;                                                                  \
+            if (ay < 0)                                                                   \
+                ay += h;                                                                  \
+            da[x] = sa[ax + ay * salinesize];                                             \
+        }                                                                                 \
+                                                                                          \
+        da += alinesize;                                                                  \
+    }                                                                                     \
+                                                                                          \
+    return 0;                                                                             \
+}
+
+DEFINE_RGBAWRAP(8, uint8_t, 1)
+DEFINE_RGBAWRAP(16, uint16_t, 2)
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    ChromaShiftContext *s = ctx->priv;
+    AVFrame *out;
+
+    out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!out) {
+        av_frame_free(&in);
+        return AVERROR(ENOMEM);
+    }
+    av_frame_copy_props(out, in);
+
+    s->in = in;
+    if (!s->is_rgbashift) {
+        av_image_copy_plane(out->data[0],
+                            out->linesize[0],
+                            in->data[0], in->linesize[0],
+                            s->linesize[0], s->height[0]);
+    }
+    ctx->internal->execute(ctx, s->filter_slice, out, NULL,
+                           FFMIN3(s->height[1],
+                                  s->height[2],
+                                  ff_filter_get_nb_threads(ctx)));
+    s->in = NULL;
+    av_frame_free(&in);
+    return ff_filter_frame(outlink, out);
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    ChromaShiftContext *s = ctx->priv;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+
+    s->is_rgbashift = !strcmp(ctx->filter->name, "rgbashift");
+    s->depth = desc->comp[0].depth;
+    s->nb_planes = desc->nb_components;
+    if (s->is_rgbashift) {
+        if (s->edge)
+            s->filter_slice = s->depth > 8 ? rgbawrap_slice16 : rgbawrap_slice8;
+        else
+            s->filter_slice = s->depth > 8 ? rgbasmear_slice16 : rgbasmear_slice8;
+    } else {
+        if (s->edge)
+            s->filter_slice = s->depth > 8 ? wrap_slice16 : wrap_slice8;
+        else
+            s->filter_slice = s->depth > 8 ? smear_slice16 : smear_slice8;
+    }
+    s->height[1] = s->height[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
+    s->height[0] = s->height[3] = inlink->h;
+    s->width[1] = s->width[2] = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
+    s->width[0] = s->width[3] = inlink->w;
+
+    return av_image_fill_linesizes(s->linesize, inlink->format, inlink->w);
+}
+
+#define OFFSET(x) offsetof(ChromaShiftContext, x)
+#define VF AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption chromashift_options[] = {
+    { "cbh", "shift chroma-blue horizontally", OFFSET(cbh),  AV_OPT_TYPE_INT,   {.i64=0}, -255, 255, .flags = VF },
+    { "cbv", "shift chroma-blue vertically",   OFFSET(cbv),  AV_OPT_TYPE_INT,   {.i64=0}, -255, 255, .flags = VF },
+    { "crh", "shift chroma-red horizontally",  OFFSET(crh),  AV_OPT_TYPE_INT,   {.i64=0}, -255, 255, .flags = VF },
+    { "crv", "shift chroma-red vertically",    OFFSET(crv),  AV_OPT_TYPE_INT,   {.i64=0}, -255, 255, .flags = VF },
+    { "edge", "set edge operation",            OFFSET(edge), AV_OPT_TYPE_INT,   {.i64=0},    0,   1, .flags = VF, "edge" },
+    { "smear",                              0,            0, AV_OPT_TYPE_CONST, {.i64=0},    0,   0, .flags = VF, "edge" },
+    { "wrap",                               0,            0, AV_OPT_TYPE_CONST, {.i64=1},    0,   0, .flags = VF, "edge" },
+    { NULL },
+};
+
+static const AVFilterPad inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+        .config_props = config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+    },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(chromashift);
+
+AVFilter ff_vf_chromashift = {
+    .name          = "chromashift",
+    .description   = NULL_IF_CONFIG_SMALL("Shift chroma."),
+    .priv_size     = sizeof(ChromaShiftContext),
+    .priv_class    = &chromashift_class,
+    .query_formats = query_formats,
+    .outputs       = outputs,
+    .inputs        = inputs,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
+};
+
+static const AVOption rgbashift_options[] = {
+    { "rh", "shift red horizontally",   OFFSET(rh),   AV_OPT_TYPE_INT,   {.i64=0}, -255, 255, .flags = VF },
+    { "rv", "shift red vertically",     OFFSET(rv),   AV_OPT_TYPE_INT,   {.i64=0}, -255, 255, .flags = VF },
+    { "gh", "shift green horizontally", OFFSET(gh),   AV_OPT_TYPE_INT,   {.i64=0}, -255, 255, .flags = VF },
+    { "gv", "shift green vertically",   OFFSET(gv),   AV_OPT_TYPE_INT,   {.i64=0}, -255, 255, .flags = VF },
+    { "bh", "shift blue horizontally",  OFFSET(bh),   AV_OPT_TYPE_INT,   {.i64=0}, -255, 255, .flags = VF },
+    { "bv", "shift blue vertically",    OFFSET(bv),   AV_OPT_TYPE_INT,   {.i64=0}, -255, 255, .flags = VF },
+    { "ah", "shift alpha horizontally", OFFSET(ah),   AV_OPT_TYPE_INT,   {.i64=0}, -255, 255, .flags = VF },
+    { "av", "shift alpha vertically",   OFFSET(av),   AV_OPT_TYPE_INT,   {.i64=0}, -255, 255, .flags = VF },
+    { "edge", "set edge operation",     OFFSET(edge), AV_OPT_TYPE_INT,   {.i64=0},    0,   1, .flags = VF, "edge" },
+    { "smear",                          0,         0, AV_OPT_TYPE_CONST, {.i64=0},    0,   0, .flags = VF, "edge" },
+    { "wrap",                           0,         0, AV_OPT_TYPE_CONST, {.i64=1},    0,   0, .flags = VF, "edge" },
+    { NULL },
+};
+
+AVFILTER_DEFINE_CLASS(rgbashift);
+
+AVFilter ff_vf_rgbashift = {
+    .name          = "rgbashift",
+    .description   = NULL_IF_CONFIG_SMALL("Shift RGBA."),
+    .priv_size     = sizeof(ChromaShiftContext),
+    .priv_class    = &rgbashift_class,
+    .query_formats = query_formats,
+    .outputs       = outputs,
+    .inputs        = inputs,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
+};
diff --git a/libavfilter/vf_convolution.c b/libavfilter/vf_convolution.c
index 421c169fd7944..1305569c8889a 100644
--- a/libavfilter/vf_convolution.c
+++ b/libavfilter/vf_convolution.c
@@ -157,7 +157,7 @@ static void filter16_prewitt(uint8_t *dstp, int width,
         int sumb = AV_RN16A(&c[0][2 * x]) * -1 + AV_RN16A(&c[2][2 * x]) *  1 + AV_RN16A(&c[3][2 * x]) * -1 +
                    AV_RN16A(&c[5][2 * x]) *  1 + AV_RN16A(&c[6][2 * x]) * -1 + AV_RN16A(&c[8][2 * x]) *  1;
 
-        dst[x] = av_clip(sqrt(suma*suma + sumb*sumb) * scale + delta, 0, peak);
+        dst[x] = av_clip(sqrtf(suma*suma + sumb*sumb) * scale + delta, 0, peak);
     }
 }
 
@@ -173,7 +173,7 @@ static void filter16_roberts(uint8_t *dstp, int width,
         int suma = AV_RN16A(&c[0][2 * x]) *  1 + AV_RN16A(&c[1][2 * x]) * -1;
         int sumb = AV_RN16A(&c[4][2 * x]) *  1 + AV_RN16A(&c[3][2 * x]) * -1;
 
-        dst[x] = av_clip(sqrt(suma*suma + sumb*sumb) * scale + delta, 0, peak);
+        dst[x] = av_clip(sqrtf(suma*suma + sumb*sumb) * scale + delta, 0, peak);
     }
 }
 
@@ -191,7 +191,7 @@ static void filter16_sobel(uint8_t *dstp, int width,
         int sumb = AV_RN16A(&c[0][2 * x]) * -1 + AV_RN16A(&c[2][2 * x]) *  1 + AV_RN16A(&c[3][2 * x]) * -2 +
                    AV_RN16A(&c[5][2 * x]) *  2 + AV_RN16A(&c[6][2 * x]) * -1 + AV_RN16A(&c[8][2 * x]) *  1;
 
-        dst[x] = av_clip(sqrt(suma*suma + sumb*sumb) * scale + delta, 0, peak);
+        dst[x] = av_clip(sqrtf(suma*suma + sumb*sumb) * scale + delta, 0, peak);
     }
 }
 
@@ -211,7 +211,7 @@ static void filter_prewitt(uint8_t *dst, int width,
         int sumb = c0[x] * -1 + c2[x] *  1 + c3[x] * -1 +
                    c5[x] *  1 + c6[x] * -1 + c8[x] *  1;
 
-        dst[x] = av_clip_uint8(sqrt(suma*suma + sumb*sumb) * scale + delta);
+        dst[x] = av_clip_uint8(sqrtf(suma*suma + sumb*sumb) * scale + delta);
     }
 }
 
@@ -226,7 +226,7 @@ static void filter_roberts(uint8_t *dst, int width,
         int suma = c[0][x] *  1 + c[1][x] * -1;
         int sumb = c[4][x] *  1 + c[3][x] * -1;
 
-        dst[x] = av_clip_uint8(sqrt(suma*suma + sumb*sumb) * scale + delta);
+        dst[x] = av_clip_uint8(sqrtf(suma*suma + sumb*sumb) * scale + delta);
     }
 }
 
@@ -246,7 +246,7 @@ static void filter_sobel(uint8_t *dst, int width,
         int sumb = c0[x] * -1 + c2[x] *  1 + c3[x] * -2 +
                    c5[x] *  2 + c6[x] * -1 + c8[x] *  1;
 
-        dst[x] = av_clip_uint8(sqrt(suma*suma + sumb*sumb) * scale + delta);
+        dst[x] = av_clip_uint8(sqrtf(suma*suma + sumb*sumb) * scale + delta);
     }
 }
 
diff --git a/libavfilter/vf_datascope.c b/libavfilter/vf_datascope.c
index 6bcc18e85e045..c9039a60f65d2 100644
--- a/libavfilter/vf_datascope.c
+++ b/libavfilter/vf_datascope.c
@@ -802,8 +802,8 @@ static void draw_trace8(OscilloscopeContext *s, AVFrame *frame)
             if ((1 << c) & s->components) {
                 int x = i * s->width / s->nb_values;
                 int px = (i - 1) * s->width / s->nb_values;
-                int py = s->height - s->values[i-1].p[c] * s->height / 256;
-                int y = s->height - s->values[i].p[c] * s->height / 256;
+                int py = s->height - s->values[i-1].p[s->rgba_map[c]] * s->height / 256;
+                int y = s->height - s->values[i].p[s->rgba_map[c]] * s->height / 256;
 
                 draw_line(&s->draw, s->ox + x, s->oy + y, s->ox + px, s->oy + py, frame, s->colors[c]);
             }
@@ -821,8 +821,8 @@ static void draw_trace16(OscilloscopeContext *s, AVFrame *frame)
             if ((1 << c) & s->components) {
                 int x = i * s->width / s->nb_values;
                 int px = (i - 1) * s->width / s->nb_values;
-                int py = s->height - s->values[i-1].p[c] * s->height / s->max;
-                int y = s->height - s->values[i].p[c] * s->height / s->max;
+                int py = s->height - s->values[i-1].p[s->rgba_map[c]] * s->height / s->max;
+                int y = s->height - s->values[i].p[s->rgba_map[c]] * s->height / s->max;
 
                 draw_line(&s->draw, s->ox + x, s->oy + y, s->ox + px, s->oy + py, frame, s->colors[c]);
             }
@@ -996,9 +996,9 @@ static int oscilloscope_filter_frame(AVFilterLink *inlink, AVFrame *frame)
     for (i = 0; i < s->nb_values; i++) {
         for (c = 0; c < s->nb_comps; c++) {
             if ((1 << c) & s->components) {
-                max[c] = FFMAX(max[c], s->values[i].p[c]);
-                min[c] = FFMIN(min[c], s->values[i].p[c]);
-                average[c] += s->values[i].p[c];
+                max[c] = FFMAX(max[c], s->values[i].p[s->rgba_map[c]]);
+                min[c] = FFMIN(min[c], s->values[i].p[s->rgba_map[c]]);
+                average[c] += s->values[i].p[s->rgba_map[c]];
             }
         }
     }
@@ -1013,7 +1013,7 @@ static int oscilloscope_filter_frame(AVFilterLink *inlink, AVFrame *frame)
                 const char yuva[4] = { 'Y', 'U', 'V', 'A' };
                 char text[128];
 
-                snprintf(text, sizeof(text), "%c avg:%.1f min:%d max:%d\n", s->is_rgb ? rgba[c] : yuva[c], average[s->rgba_map[c]], min[s->rgba_map[c]], max[s->rgba_map[c]]);
+                snprintf(text, sizeof(text), "%c avg:%.1f min:%d max:%d\n", s->is_rgb ? rgba[c] : yuva[c], average[c], min[c], max[c]);
                 draw_text(&s->draw, frame, &s->white, s->ox +  2 + 280 * i++, s->oy + s->height + 4, text, 0);
             }
         }
diff --git a/libavfilter/vf_dedot.c b/libavfilter/vf_dedot.c
new file mode 100644
index 0000000000000..bb0f9e5ac8f4f
--- /dev/null
+++ b/libavfilter/vf_dedot.c
@@ -0,0 +1,409 @@
+/*
+ * Copyright (c) 2018 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+
+#include "avfilter.h"
+#include "filters.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+typedef struct DedotContext {
+    const AVClass *class;
+    int m;
+    float lt;
+    float tl;
+    float tc;
+    float ct;
+
+    const AVPixFmtDescriptor *desc;
+    int depth;
+    int max;
+    int luma2d;
+    int lumaT;
+    int chromaT1;
+    int chromaT2;
+
+    int eof;
+    int eof_frames;
+    int nb_planes;
+    int planewidth[4];
+    int planeheight[4];
+
+    AVFrame *frames[5];
+
+    int (*dedotcrawl)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
+    int (*derainbow)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
+} DedotContext;
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pixel_fmts[] = {
+        AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV440P,
+        AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P,
+        AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUV420P,
+        AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ420P,
+        AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P,
+        AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV444P9,
+        AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
+        AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV440P12,
+        AV_PIX_FMT_YUV420P14, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV444P14,
+        AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16,
+        AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA444P9,
+        AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA444P10,
+        AV_PIX_FMT_YUVA420P16, AV_PIX_FMT_YUVA422P16, AV_PIX_FMT_YUVA444P16,
+        AV_PIX_FMT_NONE
+    };
+    AVFilterFormats *formats = ff_make_format_list(pixel_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    return ff_set_common_formats(ctx, formats);
+}
+
+#define DEFINE_DEDOTCRAWL(name, type, div)                       \
+static int dedotcrawl##name(AVFilterContext *ctx, void *arg,     \
+                            int jobnr, int nb_jobs)              \
+{                                                                \
+    DedotContext *s = ctx->priv;                                 \
+    AVFrame *out = arg;                                          \
+    int src_linesize = s->frames[2]->linesize[0] / div;          \
+    int dst_linesize = out->linesize[0] / div;                   \
+    int p0_linesize = s->frames[0]->linesize[0] / div;           \
+    int p1_linesize = s->frames[1]->linesize[0] / div;           \
+    int p3_linesize = s->frames[3]->linesize[0] / div;           \
+    int p4_linesize = s->frames[4]->linesize[0] / div;           \
+    const int h = s->planeheight[0];                             \
+    int slice_start = (h * jobnr) / nb_jobs;                     \
+    int slice_end = (h * (jobnr+1)) / nb_jobs;                   \
+    type *p0 = (type *)s->frames[0]->data[0];                    \
+    type *p1 = (type *)s->frames[1]->data[0];                    \
+    type *p3 = (type *)s->frames[3]->data[0];                    \
+    type *p4 = (type *)s->frames[4]->data[0];                    \
+    type *src = (type *)s->frames[2]->data[0];                   \
+    type *dst = (type *)out->data[0];                            \
+    const int luma2d = s->luma2d;                                \
+    const int lumaT = s->lumaT;                                  \
+                                                                 \
+    if (!slice_start) {                                          \
+        slice_start++;                                           \
+    }                                                            \
+    p0 += p0_linesize * slice_start;                             \
+    p1 += p1_linesize * slice_start;                             \
+    p3 += p3_linesize * slice_start;                             \
+    p4 += p4_linesize * slice_start;                             \
+    src += src_linesize * slice_start;                           \
+    dst += dst_linesize * slice_start;                           \
+    if (slice_end == h) {                                        \
+        slice_end--;                                             \
+    }                                                            \
+    for (int y = slice_start; y < slice_end; y++) {              \
+        for (int x = 1; x < s->planewidth[0] - 1; x++) {         \
+            int above = src[x - src_linesize];                   \
+            int bellow = src[x + src_linesize];                  \
+            int cur = src[x];                                    \
+            int left = src[x - 1];                               \
+            int right = src[x + 1];                              \
+                                                                 \
+            if (FFABS(above + bellow - 2 * cur) <= luma2d &&     \
+                FFABS(left + right - 2 * cur) <= luma2d)         \
+                continue;                                        \
+                                                                 \
+            if (FFABS(cur - p0[x]) <= lumaT &&                   \
+                FFABS(cur - p4[x]) <= lumaT &&                   \
+                FFABS(p1[x] - p3[x]) <= lumaT) {                 \
+                int diff1 = FFABS(cur - p1[x]);                  \
+                int diff2 = FFABS(cur - p3[x]);                  \
+                                                                 \
+                if (diff1 < diff2)                               \
+                    dst[x] = (src[x] + p1[x] + 1) >> 1;          \
+                else                                             \
+                    dst[x] = (src[x] + p3[x] + 1) >> 1;          \
+            }                                                    \
+        }                                                        \
+                                                                 \
+        dst += dst_linesize;                                     \
+        src += src_linesize;                                     \
+        p0 += p0_linesize;                                       \
+        p1 += p1_linesize;                                       \
+        p3 += p3_linesize;                                       \
+        p4 += p4_linesize;                                       \
+    }                                                            \
+    return 0;                                                    \
+}
+
+DEFINE_DEDOTCRAWL(8, uint8_t, 1)
+DEFINE_DEDOTCRAWL(16, uint16_t, 2)
+
+typedef struct ThreadData {
+    AVFrame *out;
+    int plane;
+} ThreadData;
+
+#define DEFINE_DERAINBOW(name, type, div)                    \
+static int derainbow##name(AVFilterContext *ctx, void *arg,  \
+                           int jobnr, int nb_jobs)           \
+{                                                            \
+    DedotContext *s = ctx->priv;                             \
+    ThreadData *td = arg;                                    \
+    AVFrame *out = td->out;                                  \
+    const int plane = td->plane;                             \
+    const int h = s->planeheight[plane];                     \
+    int slice_start = (h * jobnr) / nb_jobs;                 \
+    int slice_end = (h * (jobnr+1)) / nb_jobs;               \
+    int src_linesize = s->frames[2]->linesize[plane] / div;  \
+    int dst_linesize = out->linesize[plane] / div;           \
+    int p0_linesize = s->frames[0]->linesize[plane] / div;   \
+    int p1_linesize = s->frames[1]->linesize[plane] / div;   \
+    int p3_linesize = s->frames[3]->linesize[plane] / div;   \
+    int p4_linesize = s->frames[4]->linesize[plane] / div;   \
+    type *p0 = (type *)s->frames[0]->data[plane];            \
+    type *p1 = (type *)s->frames[1]->data[plane];            \
+    type *p3 = (type *)s->frames[3]->data[plane];            \
+    type *p4 = (type *)s->frames[4]->data[plane];            \
+    type *src = (type *)s->frames[2]->data[plane];           \
+    type *dst = (type *)out->data[plane];                    \
+    const int chromaT1 = s->chromaT1;                        \
+    const int chromaT2 = s->chromaT2;                        \
+                                                             \
+    p0 += slice_start * p0_linesize;                         \
+    p1 += slice_start * p1_linesize;                         \
+    p3 += slice_start * p3_linesize;                         \
+    p4 += slice_start * p4_linesize;                         \
+    src += slice_start * src_linesize;                       \
+    dst += slice_start * dst_linesize;                       \
+    for (int y = slice_start; y < slice_end; y++) {          \
+        for (int x = 0; x < s->planewidth[plane]; x++) {     \
+            int cur = src[x];                                \
+                                                             \
+            if (FFABS(cur - p0[x]) <= chromaT1 &&            \
+                FFABS(cur - p4[x]) <= chromaT1 &&            \
+                FFABS(p1[x] - p3[x]) <= chromaT1 &&          \
+                FFABS(cur - p1[x]) > chromaT2 &&             \
+                FFABS(cur - p3[x]) > chromaT2) {             \
+                int diff1 = FFABS(cur - p1[x]);              \
+                int diff2 = FFABS(cur - p3[x]);              \
+                                                             \
+                if (diff1 < diff2)                           \
+                    dst[x] = (src[x] + p1[x] + 1) >> 1;      \
+                else                                         \
+                    dst[x] = (src[x] + p3[x] + 1) >> 1;      \
+            }                                                \
+        }                                                    \
+                                                             \
+        dst += dst_linesize;                                 \
+        src += src_linesize;                                 \
+        p0 += p0_linesize;                                   \
+        p1 += p1_linesize;                                   \
+        p3 += p3_linesize;                                   \
+        p4 += p4_linesize;                                   \
+    }                                                        \
+    return 0;                                                \
+}
+
+DEFINE_DERAINBOW(8, uint8_t, 1)
+DEFINE_DERAINBOW(16, uint16_t, 2)
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    DedotContext *s = ctx->priv;
+    AVFilterLink *inlink = ctx->inputs[0];
+
+    s->desc = av_pix_fmt_desc_get(outlink->format);
+    if (!s->desc)
+        return AVERROR_BUG;
+    s->nb_planes = av_pix_fmt_count_planes(outlink->format);
+    s->depth = s->desc->comp[0].depth;
+    s->max = (1 << s->depth) - 1;
+    s->luma2d = s->lt * s->max;
+    s->lumaT = s->tl * s->max;
+    s->chromaT1 = s->tc * s->max;
+    s->chromaT2 = s->ct * s->max;
+
+    s->planewidth[1] = s->planewidth[2] = AV_CEIL_RSHIFT(inlink->w, s->desc->log2_chroma_w);
+    s->planewidth[0] = s->planewidth[3] = inlink->w;
+
+    s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, s->desc->log2_chroma_h);
+    s->planeheight[0] = s->planeheight[3] = inlink->h;
+
+    if (s->depth <= 8) {
+        s->dedotcrawl = dedotcrawl8;
+        s->derainbow = derainbow8;
+    } else {
+        s->dedotcrawl = dedotcrawl16;
+        s->derainbow = derainbow16;
+    }
+
+    return 0;
+}
+
+static int activate(AVFilterContext *ctx)
+{
+    AVFilterLink *inlink = ctx->inputs[0];
+    AVFilterLink *outlink = ctx->outputs[0];
+    DedotContext *s = ctx->priv;
+    AVFrame *frame = NULL;
+    int64_t pts;
+    int status;
+    int ret = 0;
+
+    FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
+
+    if (s->eof == 0) {
+        ret = ff_inlink_consume_frame(inlink, &frame);
+        if (ret < 0)
+            return ret;
+    }
+    if (frame || s->eof_frames > 0) {
+        AVFrame *out = NULL;
+
+        if (frame) {
+            for (int i = 2; i < 5; i++) {
+                if (!s->frames[i])
+                    s->frames[i] = av_frame_clone(frame);
+            }
+            av_frame_free(&frame);
+        } else {
+            s->eof_frames--;
+            s->frames[4] = av_frame_clone(s->frames[3]);
+        }
+
+        if (s->frames[0] &&
+            s->frames[1] &&
+            s->frames[2] &&
+            s->frames[3] &&
+            s->frames[4]) {
+            out = av_frame_clone(s->frames[2]);
+            if (out && !ctx->is_disabled) {
+                ret = av_frame_make_writable(out);
+                if (ret >= 0) {
+                    if (s->m & 1)
+                        ctx->internal->execute(ctx, s->dedotcrawl, out, NULL,
+                                               FFMIN(s->planeheight[0],
+                                               ff_filter_get_nb_threads(ctx)));
+                    if (s->m & 2) {
+                        ThreadData td;
+                        td.out = out; td.plane = 1;
+                        ctx->internal->execute(ctx, s->derainbow, &td, NULL,
+                                               FFMIN(s->planeheight[1],
+                                               ff_filter_get_nb_threads(ctx)));
+                        td.plane = 2;
+                        ctx->internal->execute(ctx, s->derainbow, &td, NULL,
+                                               FFMIN(s->planeheight[2],
+                                               ff_filter_get_nb_threads(ctx)));
+                    }
+                }
+            } else if (!out) {
+                ret = AVERROR(ENOMEM);
+            }
+        }
+
+        av_frame_free(&s->frames[0]);
+        s->frames[0] = s->frames[1];
+        s->frames[1] = s->frames[2];
+        s->frames[2] = s->frames[3];
+        s->frames[3] = s->frames[4];
+        s->frames[4] = NULL;
+
+        if (ret < 0)
+            return ret;
+        if (out)
+            return ff_filter_frame(outlink, out);
+    }
+
+    if (s->eof) {
+        if (s->eof_frames <= 0) {
+            ff_outlink_set_status(outlink, AVERROR_EOF, s->frames[2]->pts);
+        } else {
+            ff_filter_set_ready(ctx, 10);
+        }
+        return 0;
+    }
+
+    if (!s->eof && ff_inlink_acknowledge_status(inlink, &status, &pts)) {
+        if (status == AVERROR_EOF) {
+            s->eof = 1;
+            s->eof_frames = 2;
+            ff_filter_set_ready(ctx, 10);
+            return 0;
+        }
+    }
+
+    FF_FILTER_FORWARD_WANTED(outlink, inlink);
+
+    return FFERROR_NOT_READY;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    DedotContext *s = ctx->priv;
+
+    for (int i = 0; i < 5; i++)
+        av_frame_free(&s->frames[i]);
+}
+
+#define OFFSET(x) offsetof(DedotContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption dedot_options[] = {
+    { "m",   "set filtering mode",                          OFFSET( m), AV_OPT_TYPE_FLAGS, {.i64=3},    0, 3, FLAGS, "m" },
+    { "dotcrawl",                                           0,       0, AV_OPT_TYPE_CONST, {.i64=1},    0, 0, FLAGS, "m" },
+    { "rainbows",                                           0,       0, AV_OPT_TYPE_CONST, {.i64=2},    0, 0, FLAGS, "m" },
+    { "lt",  "set spatial luma threshold",                  OFFSET(lt), AV_OPT_TYPE_FLOAT, {.dbl=.079}, 0, 1, FLAGS },
+    { "tl",  "set tolerance for temporal luma",             OFFSET(tl), AV_OPT_TYPE_FLOAT, {.dbl=.079}, 0, 1, FLAGS },
+    { "tc",  "set tolerance for chroma temporal variation", OFFSET(tc), AV_OPT_TYPE_FLOAT, {.dbl=.058}, 0, 1, FLAGS },
+    { "ct",  "set temporal chroma threshold",               OFFSET(ct), AV_OPT_TYPE_FLOAT, {.dbl=.019}, 0, 1, FLAGS },
+    { NULL },
+};
+
+static const AVFilterPad inputs[] = {
+    {
+        .name           = "default",
+        .type           = AVMEDIA_TYPE_VIDEO,
+    },
+    { NULL }
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .config_props  = config_output,
+    },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(dedot);
+
+AVFilter ff_vf_dedot = {
+    .name          = "dedot",
+    .description   = NULL_IF_CONFIG_SMALL("Reduce cross-luminance and cross-color."),
+    .priv_size     = sizeof(DedotContext),
+    .priv_class    = &dedot_class,
+    .query_formats = query_formats,
+    .activate      = activate,
+    .uninit        = uninit,
+    .inputs        = inputs,
+    .outputs       = outputs,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL | AVFILTER_FLAG_SLICE_THREADS,
+};
diff --git a/libavfilter/vf_deinterlace_qsv.c b/libavfilter/vf_deinterlace_qsv.c
index d6b02e98c5b28..bee10c220f6c7 100644
--- a/libavfilter/vf_deinterlace_qsv.c
+++ b/libavfilter/vf_deinterlace_qsv.c
@@ -419,9 +419,11 @@ static int submit_frame(AVFilterContext *ctx, AVFrame *frame,
     qf->surface.Info.PicStruct = !qf->frame->interlaced_frame ? MFX_PICSTRUCT_PROGRESSIVE :
                                  (qf->frame->top_field_first ? MFX_PICSTRUCT_FIELD_TFF :
                                                            MFX_PICSTRUCT_FIELD_BFF);
-    if (qf->frame->repeat_pict == 1)
+    if (qf->frame->repeat_pict == 1) {
         qf->surface.Info.PicStruct |= MFX_PICSTRUCT_FIELD_REPEATED;
-    else if (qf->frame->repeat_pict == 2)
+        qf->surface.Info.PicStruct |= qf->frame->top_field_first ? MFX_PICSTRUCT_FIELD_TFF :
+                                                            MFX_PICSTRUCT_FIELD_BFF;
+    } else if (qf->frame->repeat_pict == 2)
         qf->surface.Info.PicStruct |= MFX_PICSTRUCT_FRAME_DOUBLING;
     else if (qf->frame->repeat_pict == 4)
         qf->surface.Info.PicStruct |= MFX_PICSTRUCT_FRAME_TRIPLING;
diff --git a/libavfilter/vf_deinterlace_vaapi.c b/libavfilter/vf_deinterlace_vaapi.c
index f7a262d0c6be1..f67a1c8e79675 100644
--- a/libavfilter/vf_deinterlace_vaapi.c
+++ b/libavfilter/vf_deinterlace_vaapi.c
@@ -113,6 +113,7 @@ static int deint_vaapi_build_filter_params(AVFilterContext *avctx)
             av_log(avctx, AV_LOG_ERROR, "Deinterlacing mode %d (%s) is "
                    "not supported.\n", ctx->mode,
                    deint_vaapi_mode_name(ctx->mode));
+            return AVERROR(EINVAL);
         }
     }
 
@@ -256,7 +257,7 @@ static int deint_vaapi_filter_frame(AVFilterLink *inlink, AVFrame *input_frame)
             ff_vaapi_vpp_colour_standard(input_frame->colorspace);
 
         params.output_region = NULL;
-        params.output_background_color = 0xff000000;
+        params.output_background_color = VAAPI_VPP_BACKGROUND_BLACK;
         params.output_color_standard = params.surface_color_standard;
 
         params.pipeline_flags = 0;
diff --git a/libavfilter/vf_detelecine.c b/libavfilter/vf_detelecine.c
index 0d1e2f2ffbb3a..0199214c3ee95 100644
--- a/libavfilter/vf_detelecine.c
+++ b/libavfilter/vf_detelecine.c
@@ -206,6 +206,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *inpicref)
 
     if (s->nskip_fields >= 2) {
         s->nskip_fields -= 2;
+        av_frame_free(&inpicref);
         return 0;
     } else if (s->nskip_fields >= 1) {
         for (i = 0; i < s->nb_planes; i++) {
@@ -216,6 +217,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *inpicref)
         }
         s->occupied = 1;
         s->nskip_fields--;
+        av_frame_free(&inpicref);
         return 0;
     }
 
diff --git a/libavfilter/vf_extractplanes.c b/libavfilter/vf_extractplanes.c
index c1c8e694cac6d..739c2420cb614 100644
--- a/libavfilter/vf_extractplanes.c
+++ b/libavfilter/vf_extractplanes.c
@@ -23,11 +23,9 @@
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
 
-#define FF_INTERNAL_FIELDS 1
-#include "libavfilter/framequeue.h"
-
 #include "avfilter.h"
 #include "drawutils.h"
+#include "filters.h"
 #include "internal.h"
 
 #define PLANE_R 0x01
@@ -101,6 +99,8 @@ AVFILTER_DEFINE_CLASS(extractplanes);
         AV_PIX_FMT_YUV422P12##suf,                             \
         AV_PIX_FMT_YUV444P12##suf,                             \
         AV_PIX_FMT_YUV440P12##suf,                             \
+        AV_PIX_FMT_YUVA422P12##suf,                            \
+        AV_PIX_FMT_YUVA444P12##suf,                            \
         AV_PIX_FMT_GBRP10##suf, AV_PIX_FMT_GBRAP10##suf,       \
         AV_PIX_FMT_GBRP12##suf, AV_PIX_FMT_GBRAP12##suf,       \
         AV_PIX_FMT_YUV420P9##suf,                              \
@@ -282,7 +282,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
         const int idx = s->map[i];
         AVFrame *out;
 
-        if (outlink->status_in)
+        if (ff_outlink_get_status(outlink))
             continue;
 
         out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
diff --git a/libavfilter/vf_fade.c b/libavfilter/vf_fade.c
index c30c41db0da4a..17eca109b6ccb 100644
--- a/libavfilter/vf_fade.c
+++ b/libavfilter/vf_fade.c
@@ -386,13 +386,13 @@ static const AVOption fade_options[] = {
                                                     OFFSET(nb_frames),   AV_OPT_TYPE_INT, { .i64 = 25 }, 0, INT_MAX, FLAGS },
     { "alpha",       "fade alpha if it is available on the input", OFFSET(alpha),       AV_OPT_TYPE_BOOL, {.i64 = 0    }, 0,       1, FLAGS },
     { "start_time",  "Number of seconds of the beginning of the effect.",
-                                                    OFFSET(start_time),  AV_OPT_TYPE_DURATION, {.i64 = 0. }, 0, INT32_MAX, FLAGS },
+                                                    OFFSET(start_time),  AV_OPT_TYPE_DURATION, {.i64 = 0. }, 0, INT64_MAX, FLAGS },
     { "st",          "Number of seconds of the beginning of the effect.",
-                                                    OFFSET(start_time),  AV_OPT_TYPE_DURATION, {.i64 = 0. }, 0, INT32_MAX, FLAGS },
+                                                    OFFSET(start_time),  AV_OPT_TYPE_DURATION, {.i64 = 0. }, 0, INT64_MAX, FLAGS },
     { "duration",    "Duration of the effect in seconds.",
-                                                    OFFSET(duration),    AV_OPT_TYPE_DURATION, {.i64 = 0. }, 0, INT32_MAX, FLAGS },
+                                                    OFFSET(duration),    AV_OPT_TYPE_DURATION, {.i64 = 0. }, 0, INT64_MAX, FLAGS },
     { "d",           "Duration of the effect in seconds.",
-                                                    OFFSET(duration),    AV_OPT_TYPE_DURATION, {.i64 = 0. }, 0, INT32_MAX, FLAGS },
+                                                    OFFSET(duration),    AV_OPT_TYPE_DURATION, {.i64 = 0. }, 0, INT64_MAX, FLAGS },
     { "color",       "set color",                   OFFSET(color_rgba),  AV_OPT_TYPE_COLOR,    {.str = "black"}, CHAR_MIN, CHAR_MAX, FLAGS },
     { "c",           "set color",                   OFFSET(color_rgba),  AV_OPT_TYPE_COLOR,    {.str = "black"}, CHAR_MIN, CHAR_MAX, FLAGS },
     { NULL }
diff --git a/libavfilter/vf_fps.c b/libavfilter/vf_fps.c
index 9167a00a13ccb..6b99f20d2bb36 100644
--- a/libavfilter/vf_fps.c
+++ b/libavfilter/vf_fps.c
@@ -249,6 +249,8 @@ static int write_frame(AVFilterContext *ctx, FPSContext *s, AVFilterLink *outlin
         frame = av_frame_clone(s->frames[0]);
         if (!frame)
             return AVERROR(ENOMEM);
+        // Make sure Closed Captions will not be duplicated
+        av_frame_remove_side_data(s->frames[0], AV_FRAME_DATA_A53_CC);
         frame->pts = s->next_pts++;
 
         av_log(ctx, AV_LOG_DEBUG, "Writing frame with pts %"PRId64" to pts %"PRId64"\n",
diff --git a/libavfilter/vf_framerate.c b/libavfilter/vf_framerate.c
index fb6538192347f..06e463e4d77d8 100644
--- a/libavfilter/vf_framerate.c
+++ b/libavfilter/vf_framerate.c
@@ -33,13 +33,13 @@
 #include "libavutil/internal.h"
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
-#include "libavutil/pixelutils.h"
 
 #include "avfilter.h"
 #include "internal.h"
 #include "video.h"
 #include "filters.h"
 #include "framerate.h"
+#include "scene_sad.h"
 
 #define OFFSET(x) offsetof(FrameRateContext, x)
 #define V AV_OPT_FLAG_VIDEO_PARAM
@@ -62,52 +62,6 @@ static const AVOption framerate_options[] = {
 
 AVFILTER_DEFINE_CLASS(framerate);
 
-static av_always_inline int64_t sad_8x8_16(const uint16_t *src1, ptrdiff_t stride1,
-                                           const uint16_t *src2, ptrdiff_t stride2)
-{
-    int sum = 0;
-    int x, y;
-
-    for (y = 0; y < 8; y++) {
-        for (x = 0; x < 8; x++)
-            sum += FFABS(src1[x] - src2[x]);
-        src1 += stride1;
-        src2 += stride2;
-    }
-    return sum;
-}
-
-static int64_t scene_sad16(FrameRateContext *s, const uint16_t *p1, int p1_linesize, const uint16_t* p2, int p2_linesize, const int width, const int height)
-{
-    int64_t sad;
-    int x, y;
-    for (sad = y = 0; y < height - 7; y += 8) {
-        for (x = 0; x < width - 7; x += 8) {
-            sad += sad_8x8_16(p1 + y * p1_linesize + x,
-                              p1_linesize,
-                              p2 + y * p2_linesize + x,
-                              p2_linesize);
-        }
-    }
-    return sad;
-}
-
-static int64_t scene_sad8(FrameRateContext *s, uint8_t *p1, int p1_linesize, uint8_t* p2, int p2_linesize, const int width, const int height)
-{
-    int64_t sad;
-    int x, y;
-    for (sad = y = 0; y < height - 7; y += 8) {
-        for (x = 0; x < width - 7; x += 8) {
-            sad += s->sad(p1 + y * p1_linesize + x,
-                          p1_linesize,
-                          p2 + y * p2_linesize + x,
-                          p2_linesize);
-        }
-    }
-    emms_c();
-    return sad;
-}
-
 static double get_scene_score(AVFilterContext *ctx, AVFrame *crnt, AVFrame *next)
 {
     FrameRateContext *s = ctx->priv;
@@ -117,16 +71,13 @@ static double get_scene_score(AVFilterContext *ctx, AVFrame *crnt, AVFrame *next
 
     if (crnt->height == next->height &&
         crnt->width  == next->width) {
-        int64_t sad;
+        uint64_t sad;
         double mafd, diff;
 
         ff_dlog(ctx, "get_scene_score() process\n");
-        if (s->bitdepth == 8)
-            sad = scene_sad8(s, crnt->data[0], crnt->linesize[0], next->data[0], next->linesize[0], crnt->width, crnt->height);
-        else
-            sad = scene_sad16(s, (const uint16_t*)crnt->data[0], crnt->linesize[0] / 2, (const uint16_t*)next->data[0], next->linesize[0] / 2, crnt->width, crnt->height);
-
-        mafd = (double)sad * 100.0 / FFMAX(1, (crnt->height & ~7) * (crnt->width & ~7)) / (1 << s->bitdepth);
+        s->sad(crnt->data[0], crnt->linesize[0], next->data[0], next->linesize[0], crnt->width, crnt->height, &sad);
+        emms_c();
+        mafd = (double)sad * 100.0 / (crnt->width * crnt->height) / (1 << s->bitdepth);
         diff = fabs(mafd - s->prev_mafd);
         ret  = av_clipf(FFMIN(mafd, diff), 0, 100.0);
         s->prev_mafd = mafd;
@@ -350,7 +301,7 @@ static int config_input(AVFilterLink *inlink)
     s->bitdepth = pix_desc->comp[0].depth;
     s->vsub = pix_desc->log2_chroma_h;
 
-    s->sad = av_pixelutils_get_sad_fn(3, 3, 2, s); // 8x8 both sources aligned
+    s->sad = ff_scene_sad_get_fn(s->bitdepth == 8 ? 8 : 16);
     if (!s->sad)
         return AVERROR(EINVAL);
 
diff --git a/libavfilter/vf_freezedetect.c b/libavfilter/vf_freezedetect.c
new file mode 100644
index 0000000000000..299a5dfbf67c5
--- /dev/null
+++ b/libavfilter/vf_freezedetect.c
@@ -0,0 +1,236 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * video freeze detection filter
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/timestamp.h"
+
+#include "avfilter.h"
+#include "filters.h"
+#include "scene_sad.h"
+
+typedef struct FreezeDetectContext {
+    const AVClass *class;
+
+    ptrdiff_t width[4];
+    ptrdiff_t height[4];
+    ff_scene_sad_fn sad;
+    int bitdepth;
+    AVFrame *reference_frame;
+    int64_t n;
+    int64_t reference_n;
+    int frozen;
+
+    double noise;
+    int64_t duration;            ///< minimum duration of frozen frame until notification
+} FreezeDetectContext;
+
+#define OFFSET(x) offsetof(FreezeDetectContext, x)
+#define V AV_OPT_FLAG_VIDEO_PARAM
+#define F AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption freezedetect_options[] = {
+    { "n",                   "set noise tolerance",                       OFFSET(noise),  AV_OPT_TYPE_DOUBLE,   {.dbl=0.001},     0,       1.0, V|F },
+    { "noise",               "set noise tolerance",                       OFFSET(noise),  AV_OPT_TYPE_DOUBLE,   {.dbl=0.001},     0,       1.0, V|F },
+    { "d",                   "set minimum duration in seconds",        OFFSET(duration),  AV_OPT_TYPE_DURATION, {.i64=2000000},   0, INT64_MAX, V|F },
+    { "duration",            "set minimum duration in seconds",        OFFSET(duration),  AV_OPT_TYPE_DURATION, {.i64=2000000},   0, INT64_MAX, V|F },
+
+    {NULL}
+};
+
+AVFILTER_DEFINE_CLASS(freezedetect);
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUYV422, AV_PIX_FMT_RGB24,
+        AV_PIX_FMT_BGR24, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P,
+        AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_GRAY8,
+        AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ444P,
+        AV_PIX_FMT_UYVY422, AV_PIX_FMT_NV12, AV_PIX_FMT_NV21, AV_PIX_FMT_ARGB,
+        AV_PIX_FMT_RGBA, AV_PIX_FMT_ABGR, AV_PIX_FMT_BGRA, AV_PIX_FMT_GRAY16,
+        AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_YUVA420P,
+        AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16,
+        AV_PIX_FMT_YA8, AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV420P10,
+        AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P9, AV_PIX_FMT_YUV444P10,
+        AV_PIX_FMT_YUV422P9, AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9,
+        AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP16, AV_PIX_FMT_YUVA422P,
+        AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA422P9,
+        AV_PIX_FMT_YUVA444P9, AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA422P10,
+        AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_YUVA420P16, AV_PIX_FMT_YUVA422P16,
+        AV_PIX_FMT_YUVA444P16, AV_PIX_FMT_NV16, AV_PIX_FMT_YVYU422,
+        AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP16, AV_PIX_FMT_YUV420P12,
+        AV_PIX_FMT_YUV420P14, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV422P14,
+        AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV444P14, AV_PIX_FMT_GBRP12,
+        AV_PIX_FMT_GBRP14, AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUV440P10,
+        AV_PIX_FMT_YUV440P12, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRAP10,
+        AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY9,
+        AV_PIX_FMT_GRAY14,
+        AV_PIX_FMT_NONE
+    };
+
+    AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
+    if (!fmts_list)
+        return AVERROR(ENOMEM);
+    return ff_set_common_formats(ctx, fmts_list);
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    FreezeDetectContext *s = ctx->priv;
+    const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(inlink->format);
+
+    s->bitdepth = pix_desc->comp[0].depth;
+
+    for (int plane = 0; plane < 4; plane++) {
+        ptrdiff_t line_size = av_image_get_linesize(inlink->format, inlink->w, plane);
+        s->width[plane] = line_size >> (s->bitdepth > 8);
+        s->height[plane] = inlink->h >> ((plane == 1 || plane == 2) ? pix_desc->log2_chroma_h : 0);
+    }
+
+    s->sad = ff_scene_sad_get_fn(s->bitdepth == 8 ? 8 : 16);
+    if (!s->sad)
+        return AVERROR(EINVAL);
+
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    FreezeDetectContext *s = ctx->priv;
+    av_frame_free(&s->reference_frame);
+}
+
+static int is_frozen(FreezeDetectContext *s, AVFrame *reference, AVFrame *frame)
+{
+    uint64_t sad = 0;
+    uint64_t count = 0;
+    double mafd;
+    for (int plane = 0; plane < 4; plane++) {
+        if (s->width[plane]) {
+            uint64_t plane_sad;
+            s->sad(frame->data[plane], frame->linesize[plane],
+                   reference->data[plane], reference->linesize[plane],
+                   s->width[plane], s->height[plane], &plane_sad);
+            sad += plane_sad;
+            count += s->width[plane] * s->height[plane];
+        }
+    }
+    emms_c();
+    mafd = (double)sad / count / (1ULL << s->bitdepth);
+    return (mafd <= s->noise);
+}
+
+static int set_meta(FreezeDetectContext *s, AVFrame *frame, const char *key, const char *value)
+{
+    av_log(s, AV_LOG_INFO, "%s: %s\n", key, value);
+    return av_dict_set(&frame->metadata, key, value, 0);
+}
+
+static int activate(AVFilterContext *ctx)
+{
+    int ret;
+    AVFilterLink *inlink = ctx->inputs[0];
+    AVFilterLink *outlink = ctx->outputs[0];
+    FreezeDetectContext *s = ctx->priv;
+    AVFrame *frame;
+
+    FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
+
+    ret = ff_inlink_consume_frame(inlink, &frame);
+    if (ret < 0)
+        return ret;
+
+    if (frame) {
+        int frozen = 0;
+        s->n++;
+
+        if (s->reference_frame) {
+            int64_t duration;
+            if (s->reference_frame->pts == AV_NOPTS_VALUE || frame->pts == AV_NOPTS_VALUE || frame->pts < s->reference_frame->pts)     // Discontinuity?
+                duration = inlink->frame_rate.num > 0 ? av_rescale_q(s->n - s->reference_n, av_inv_q(inlink->frame_rate), AV_TIME_BASE_Q) : 0;
+            else
+                duration = av_rescale_q(frame->pts - s->reference_frame->pts, inlink->time_base, AV_TIME_BASE_Q);
+
+            frozen = is_frozen(s, s->reference_frame, frame);
+            if (duration >= s->duration) {
+                if (frozen) {
+                    if (!s->frozen)
+                        set_meta(s, frame, "lavfi.freezedetect.freeze_start", av_ts2timestr(s->reference_frame->pts, &inlink->time_base));
+                } else {
+                    set_meta(s, frame, "lavfi.freezedetect.freeze_duration", av_ts2timestr(duration, &AV_TIME_BASE_Q));
+                    set_meta(s, frame, "lavfi.freezedetect.freeze_end", av_ts2timestr(frame->pts, &inlink->time_base));
+                }
+                s->frozen = frozen;
+            }
+        }
+
+        if (!frozen) {
+            av_frame_free(&s->reference_frame);
+            s->reference_frame = av_frame_clone(frame);
+            s->reference_n = s->n;
+            if (!s->reference_frame) {
+                av_frame_free(&frame);
+                return AVERROR(ENOMEM);
+            }
+        }
+        return ff_filter_frame(outlink, frame);
+    }
+
+    FF_FILTER_FORWARD_STATUS(inlink, outlink);
+    FF_FILTER_FORWARD_WANTED(outlink, inlink);
+
+    return FFERROR_NOT_READY;
+}
+
+static const AVFilterPad freezedetect_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad freezedetect_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_freezedetect = {
+    .name          = "freezedetect",
+    .description   = NULL_IF_CONFIG_SMALL("Detects frozen video input."),
+    .priv_size     = sizeof(FreezeDetectContext),
+    .priv_class    = &freezedetect_class,
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .inputs        = freezedetect_inputs,
+    .outputs       = freezedetect_outputs,
+    .activate      = activate,
+};
diff --git a/libavfilter/vf_limiter.c b/libavfilter/vf_limiter.c
index bb7f1d37cddd8..6f340069da65a 100644
--- a/libavfilter/vf_limiter.c
+++ b/libavfilter/vf_limiter.c
@@ -18,7 +18,6 @@
 
 #include "libavutil/attributes.h"
 #include "libavutil/common.h"
-#include "libavutil/eval.h"
 #include "libavutil/imgutils.h"
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
@@ -138,13 +137,14 @@ static int config_props(AVFilterLink *inlink)
     AVFilterContext *ctx = inlink->dst;
     LimiterContext *s = ctx->priv;
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
-    int vsub, hsub, ret;
+    int depth, vsub, hsub, ret;
 
     s->nb_planes = av_pix_fmt_count_planes(inlink->format);
 
     if ((ret = av_image_fill_linesizes(s->linesize, inlink->format, inlink->w)) < 0)
         return ret;
 
+    depth = desc->comp[0].depth;
     hsub = desc->log2_chroma_w;
     vsub = desc->log2_chroma_h;
     s->height[1] = s->height[2] = AV_CEIL_RSHIFT(inlink->h, vsub);
@@ -152,10 +152,11 @@ static int config_props(AVFilterLink *inlink)
     s->width[1]  = s->width[2]  = AV_CEIL_RSHIFT(inlink->w, hsub);
     s->width[0]  = s->width[3]  = inlink->w;
 
-    if (desc->comp[0].depth == 8) {
+    s->max = FFMIN(s->max, (1 << depth) - 1);
+    s->min = FFMIN(s->min, (1 << depth) - 1);
+
+    if (depth == 8) {
         s->dsp.limiter = limiter8;
-        s->max = FFMIN(s->max, 255);
-        s->min = FFMIN(s->min, 255);
     } else {
         s->dsp.limiter = limiter16;
     }
diff --git a/libavfilter/vf_lut2.c b/libavfilter/vf_lut2.c
index 66c481e570410..d765dc9ad2a44 100644
--- a/libavfilter/vf_lut2.c
+++ b/libavfilter/vf_lut2.c
@@ -54,12 +54,17 @@ typedef struct LUT2Context {
     const AVClass *class;
     FFFrameSync fs;
 
+    int odepth;
     char   *comp_expr_str[4];
 
     AVExpr *comp_expr[4];
     double var_values[VAR_VARS_NB];
     uint16_t *lut[4];  ///< lookup table for each component
     int width[4], height[4];
+    int widthx[4], heightx[4];
+    int widthy[4], heighty[4];
+    int nb_planesx;
+    int nb_planesy;
     int nb_planes;
     int depth, depthx, depthy;
     int tlut2;
@@ -77,6 +82,7 @@ static const AVOption options[] = {
     { "c1", "set component #1 expression", OFFSET(comp_expr_str[1]),  AV_OPT_TYPE_STRING, { .str = "x" }, .flags = FLAGS },
     { "c2", "set component #2 expression", OFFSET(comp_expr_str[2]),  AV_OPT_TYPE_STRING, { .str = "x" }, .flags = FLAGS },
     { "c3", "set component #3 expression", OFFSET(comp_expr_str[3]),  AV_OPT_TYPE_STRING, { .str = "x" }, .flags = FLAGS },
+    { "d",  "set output depth",            OFFSET(odepth),            AV_OPT_TYPE_INT,    { .i64 =  0  }, 0, 16, .flags = FLAGS },
     { NULL }
 };
 
@@ -96,27 +102,93 @@ static av_cold void uninit(AVFilterContext *ctx)
     }
 }
 
+#define BIT8_FMTS \
+    AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV440P, \
+    AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P, \
+    AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUV420P, \
+    AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ420P, \
+    AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P, \
+    AV_PIX_FMT_GRAY8, AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRAP,
+
+#define BIT9_FMTS \
+    AV_PIX_FMT_GBRP9, AV_PIX_FMT_GRAY9, \
+    AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV444P9, \
+    AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA444P9,
+
+#define BIT10_FMTS \
+    AV_PIX_FMT_GRAY10, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10, \
+    AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10, \
+    AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA444P10,
+
+#define BIT12_FMTS \
+    AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV440P12, \
+    AV_PIX_FMT_GRAY12, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRP12,
+
+#define BIT14_FMTS \
+    AV_PIX_FMT_YUV420P14, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV444P14, \
+    AV_PIX_FMT_GRAY12, AV_PIX_FMT_GBRP14,
+
+#define BIT16_FMTS \
+    AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16, \
+    AV_PIX_FMT_YUVA420P16, AV_PIX_FMT_YUVA422P16, AV_PIX_FMT_YUVA444P16, \
+    AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16, AV_PIX_FMT_GRAY16,
+
 static int query_formats(AVFilterContext *ctx)
 {
-    static const enum AVPixelFormat pix_fmts[] = {
-        AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV440P,
-        AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P,
-        AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUV420P,
-        AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ420P,
-        AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P,
-        AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV444P9,
-        AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
-        AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV440P12,
-        AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA444P9,
-        AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA444P10,
-        AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
-        AV_PIX_FMT_GBRP12,
-        AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12,
-        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12,
+    LUT2Context *s = ctx->priv;
+    static const enum AVPixelFormat all_pix_fmts[] = {
+        BIT8_FMTS
+        BIT9_FMTS
+        BIT10_FMTS
+        BIT12_FMTS
+        AV_PIX_FMT_NONE
+    };
+    static const enum AVPixelFormat bit8_pix_fmts[] = {
+        BIT8_FMTS
+        AV_PIX_FMT_NONE
+    };
+    static const enum AVPixelFormat bit9_pix_fmts[] = {
+        BIT9_FMTS
+        AV_PIX_FMT_NONE
+    };
+    static const enum AVPixelFormat bit10_pix_fmts[] = {
+        BIT10_FMTS
+        AV_PIX_FMT_NONE
+    };
+    static const enum AVPixelFormat bit12_pix_fmts[] = {
+        BIT12_FMTS
+        AV_PIX_FMT_NONE
+    };
+    static const enum AVPixelFormat bit14_pix_fmts[] = {
+        BIT14_FMTS
+        AV_PIX_FMT_NONE
+    };
+    static const enum AVPixelFormat bit16_pix_fmts[] = {
+        BIT16_FMTS
         AV_PIX_FMT_NONE
     };
+    const enum AVPixelFormat *pix_fmts;
+    int ret;
+
+    if (s->tlut2 || !s->odepth)
+        return ff_set_common_formats(ctx, ff_make_format_list(all_pix_fmts));
 
-    return ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
+    ret = ff_formats_ref(ff_make_format_list(all_pix_fmts), &ctx->inputs[0]->out_formats);
+    if (ret < 0)
+        return ret;
+
+    switch (s->odepth) {
+    case 8:  pix_fmts = bit8_pix_fmts;  break;
+    case 9:  pix_fmts = bit9_pix_fmts;  break;
+    case 10: pix_fmts = bit10_pix_fmts; break;
+    case 12: pix_fmts = bit12_pix_fmts; break;
+    case 14: pix_fmts = bit14_pix_fmts; break;
+    case 16: pix_fmts = bit16_pix_fmts; break;
+    default: av_log(ctx, AV_LOG_ERROR, "Unsupported output bit depth %d.\n", s->odepth);
+             return AVERROR(EINVAL);
+    }
+
+    return ff_formats_ref(ff_make_format_list(pix_fmts), &ctx->outputs[0]->in_formats);
 }
 
 static int config_inputx(AVFilterLink *inlink)
@@ -127,11 +199,11 @@ static int config_inputx(AVFilterLink *inlink)
     int hsub = desc->log2_chroma_w;
     int vsub = desc->log2_chroma_h;
 
-    s->nb_planes = av_pix_fmt_count_planes(inlink->format);
-    s->height[1] = s->height[2] = AV_CEIL_RSHIFT(inlink->h, vsub);
-    s->height[0] = s->height[3] = inlink->h;
-    s->width[1]  = s->width[2]  = AV_CEIL_RSHIFT(inlink->w, hsub);
-    s->width[0]  = s->width[3]  = inlink->w;
+    s->nb_planesx = av_pix_fmt_count_planes(inlink->format);
+    s->heightx[1] = s->heightx[2] = AV_CEIL_RSHIFT(inlink->h, vsub);
+    s->heightx[0] = s->heightx[3] = inlink->h;
+    s->widthx[1]  = s->widthx[2]  = AV_CEIL_RSHIFT(inlink->w, hsub);
+    s->widthx[0]  = s->widthx[3]  = inlink->w;
 
     s->var_values[VAR_W] = inlink->w;
     s->var_values[VAR_H] = inlink->h;
@@ -151,62 +223,58 @@ static int config_inputy(AVFilterLink *inlink)
     AVFilterContext *ctx = inlink->dst;
     LUT2Context *s = ctx->priv;
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    int hsub = desc->log2_chroma_w;
+    int vsub = desc->log2_chroma_h;
 
+    s->nb_planesy = av_pix_fmt_count_planes(inlink->format);
     s->depthy = desc->comp[0].depth;
     s->var_values[VAR_BITDEPTHY] = s->depthy;
+    s->heighty[1] = s->heighty[2] = AV_CEIL_RSHIFT(inlink->h, vsub);
+    s->heighty[0] = s->heighty[3] = inlink->h;
+    s->widthy[1]  = s->widthy[2]  = AV_CEIL_RSHIFT(inlink->w, hsub);
+    s->widthy[0]  = s->widthy[3]  = inlink->w;
 
     return 0;
 }
 
-static void lut2_8bit(struct LUT2Context *s, AVFrame *out, AVFrame *srcx, AVFrame *srcy)
-{
-    int p, y, x;
-
-    for (p = 0; p < s->nb_planes; p++) {
-        const uint16_t *lut = s->lut[p];
-        const uint8_t *srcxx, *srcyy;
-        uint8_t *dst;
-
-        dst   = out->data[p];
-        srcxx = srcx->data[p];
-        srcyy = srcy->data[p];
-
-        for (y = 0; y < s->height[p]; y++) {
-            for (x = 0; x < s->width[p]; x++) {
-                dst[x] = lut[(srcyy[x] << s->depthx) | srcxx[x]];
-            }
-
-            dst   += out->linesize[p];
-            srcxx += srcx->linesize[p];
-            srcyy += srcy->linesize[p];
-        }
-    }
+#define DEFINE_LUT2(zname, xname, yname, ztype, xtype, ytype, zdiv, xdiv, ydiv)  \
+static void lut2_##zname##_##xname##_##yname(struct LUT2Context *s,              \
+                                             AVFrame *out,                       \
+                                             AVFrame *srcx, AVFrame *srcy)       \
+{                                                                                \
+    const int odepth = s->odepth;                                                \
+    int p, y, x;                                                                 \
+                                                                                 \
+    for (p = 0; p < s->nb_planes; p++) {                                         \
+        const uint16_t *lut = s->lut[p];                                         \
+        const xtype *srcxx;                                                      \
+        const ytype *srcyy;                                                      \
+        ztype *dst;                                                              \
+                                                                                 \
+        dst   = (ztype *)out->data[p];                                           \
+        srcxx = (const xtype *)srcx->data[p];                                    \
+        srcyy = (const ytype *)srcy->data[p];                                    \
+                                                                                 \
+        for (y = 0; y < s->heightx[p]; y++) {                                    \
+            for (x = 0; x < s->widthx[p]; x++) {                                 \
+                dst[x] = av_clip_uintp2_c(lut[(srcyy[x] << s->depthx) | srcxx[x]], odepth); \
+            }                                                                    \
+                                                                                 \
+            dst   += out->linesize[p] / zdiv;                                    \
+            srcxx += srcx->linesize[p] / xdiv;                                   \
+            srcyy += srcy->linesize[p] / ydiv;                                   \
+        }                                                                        \
+    }                                                                            \
 }
 
-static void lut2_16bit(struct LUT2Context *s, AVFrame *out, AVFrame *srcx, AVFrame *srcy)
-{
-    int p, y, x;
-
-    for (p = 0; p < s->nb_planes; p++) {
-        const uint16_t *lut = s->lut[p];
-        const uint16_t *srcxx, *srcyy;
-        uint16_t *dst;
-
-        dst   = (uint16_t *)out->data[p];
-        srcxx = (uint16_t *)srcx->data[p];
-        srcyy = (uint16_t *)srcy->data[p];
-
-        for (y = 0; y < s->height[p]; y++) {
-            for (x = 0; x < s->width[p]; x++) {
-                dst[x] = lut[(srcyy[x] << s->depthx) | srcxx[x]];
-            }
-
-            dst   += out->linesize[p]  / 2;
-            srcxx += srcx->linesize[p] / 2;
-            srcyy += srcy->linesize[p] / 2;
-        }
-    }
-}
+DEFINE_LUT2(8,   8,  8,  uint8_t,  uint8_t,  uint8_t, 1, 1, 1)
+DEFINE_LUT2(8,   8, 16,  uint8_t,  uint8_t, uint16_t, 1, 1, 2)
+DEFINE_LUT2(8,  16,  8,  uint8_t, uint16_t,  uint8_t, 1, 2, 1)
+DEFINE_LUT2(8,  16, 16,  uint8_t, uint16_t, uint16_t, 1, 2, 2)
+DEFINE_LUT2(16,  8,  8, uint16_t,  uint8_t,  uint8_t, 2, 1, 1)
+DEFINE_LUT2(16,  8, 16, uint16_t,  uint8_t, uint16_t, 2, 1, 2)
+DEFINE_LUT2(16, 16,  8, uint16_t, uint16_t,  uint8_t, 2, 2, 1)
+DEFINE_LUT2(16, 16, 16, uint16_t, uint16_t, uint16_t, 2, 2, 2)
 
 static int process_frame(FFFrameSync *fs)
 {
@@ -245,8 +313,27 @@ static int config_output(AVFilterLink *outlink)
     int p, ret;
 
     s->depth = s->depthx + s->depthy;
-
-    s->lut2 = s->depth > 16 ? lut2_16bit : lut2_8bit;
+    s->nb_planes = s->nb_planesx;
+
+    s->lut2 = s->depth > 16 ? lut2_16_16_16 : lut2_8_8_8;
+    if (s->odepth) {
+        if (s->depthx == 8 && s->depthy == 8 && s->odepth > 8)
+            s->lut2 = lut2_16_8_8;
+        if (s->depthx > 8 && s->depthy == 8 && s->odepth > 8)
+            s->lut2 = lut2_16_16_8;
+        if (s->depthx == 8 && s->depthy > 8 && s->odepth > 8)
+            s->lut2 = lut2_16_8_16;
+        if (s->depthx == 8 && s->depthy == 8 && s->odepth == 8)
+            s->lut2 = lut2_8_8_8;
+        if (s->depthx > 8 && s->depthy == 8 && s->odepth == 8)
+            s->lut2 = lut2_8_16_8;
+        if (s->depthx == 8 && s->depthy > 8 && s->odepth == 8)
+            s->lut2 = lut2_8_8_16;
+        if (s->depthx > 8 && s->depthy > 8 && s->odepth == 8)
+            s->lut2 = lut2_8_16_16;
+    } else {
+        s->odepth = s->depthx;
+    }
 
     for (p = 0; p < s->nb_planes; p++) {
         s->lut[p] = av_malloc_array(1 << s->depth, sizeof(uint16_t));
@@ -271,7 +358,7 @@ static int config_output(AVFilterLink *outlink)
         }
 
         /* compute the lut */
-        for (y = 0; y < (1 << s->depthx); y++) {
+        for (y = 0; y < (1 << s->depthy); y++) {
             s->var_values[VAR_Y] = y;
             for (x = 0; x < (1 << s->depthx); x++) {
                 s->var_values[VAR_X] = x;
@@ -298,12 +385,28 @@ static int lut2_config_output(AVFilterLink *outlink)
     AVFilterLink *srcx = ctx->inputs[0];
     AVFilterLink *srcy = ctx->inputs[1];
     FFFrameSyncIn *in;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(outlink->format);
+    int hsub = desc->log2_chroma_w;
+    int vsub = desc->log2_chroma_h;
     int ret;
 
-    if (srcx->format != srcy->format) {
+    outlink->w = srcx->w;
+    outlink->h = srcx->h;
+    outlink->time_base = srcx->time_base;
+    outlink->sample_aspect_ratio = srcx->sample_aspect_ratio;
+    outlink->frame_rate = srcx->frame_rate;
+
+    s->nb_planes = av_pix_fmt_count_planes(outlink->format);
+    s->height[1] = s->height[2] = AV_CEIL_RSHIFT(outlink->h, vsub);
+    s->height[0] = s->height[3] = outlink->h;
+    s->width[1]  = s->width[2]  = AV_CEIL_RSHIFT(outlink->w, hsub);
+    s->width[0]  = s->width[3]  = outlink->w;
+
+    if (!s->odepth && srcx->format != srcy->format) {
         av_log(ctx, AV_LOG_ERROR, "inputs must be of same pixel format\n");
         return AVERROR(EINVAL);
     }
+
     if (srcx->w != srcy->w || srcx->h != srcy->h) {
         av_log(ctx, AV_LOG_ERROR, "First input link %s parameters "
                "(size %dx%d) do not match the corresponding "
@@ -314,11 +417,61 @@ static int lut2_config_output(AVFilterLink *outlink)
         return AVERROR(EINVAL);
     }
 
-    outlink->w = srcx->w;
-    outlink->h = srcx->h;
-    outlink->time_base = srcx->time_base;
-    outlink->sample_aspect_ratio = srcx->sample_aspect_ratio;
-    outlink->frame_rate = srcx->frame_rate;
+    if (s->nb_planesx != s->nb_planesy) {
+        av_log(ctx, AV_LOG_ERROR, "First input link %s number of planes "
+               "(%d) do not match the corresponding "
+               "second input link %s number of planes (%d)\n",
+               ctx->input_pads[0].name, s->nb_planesx,
+               ctx->input_pads[1].name, s->nb_planesy);
+        return AVERROR(EINVAL);
+    }
+
+    if (s->nb_planesx != s->nb_planes) {
+        av_log(ctx, AV_LOG_ERROR, "First input link %s number of planes "
+               "(%d) do not match the corresponding "
+               "output link %s number of planes (%d)\n",
+               ctx->input_pads[0].name, s->nb_planesx,
+               ctx->output_pads[0].name, s->nb_planes);
+        return AVERROR(EINVAL);
+    }
+
+    if (s->widthx[1] != s->widthy[1] || s->heightx[1] != s->heighty[1]) {
+        av_log(ctx, AV_LOG_ERROR, "First input link %s 2nd plane "
+               "(size %dx%d) do not match the corresponding "
+               "second input link %s 2nd plane (size %dx%d)\n",
+               ctx->input_pads[0].name, s->widthx[1], s->heightx[1],
+               ctx->input_pads[1].name,
+               s->widthy[1], s->heighty[1]);
+        return AVERROR(EINVAL);
+    }
+
+    if (s->widthx[2] != s->widthy[2] || s->heightx[2] != s->heighty[2]) {
+        av_log(ctx, AV_LOG_ERROR, "First input link %s 3rd plane "
+               "(size %dx%d) do not match the corresponding "
+               "second input link %s 3rd plane (size %dx%d)\n",
+               ctx->input_pads[0].name, s->widthx[2], s->heightx[2],
+               ctx->input_pads[1].name,
+               s->widthy[2], s->heighty[2]);
+        return AVERROR(EINVAL);
+    }
+
+    if (s->widthx[1] != s->width[1] || s->heightx[1] != s->height[1]) {
+        av_log(ctx, AV_LOG_ERROR, "First input link %s 2nd plane "
+               "(size %dx%d) do not match the corresponding "
+               "output link %s 2nd plane (size %dx%d)\n",
+               ctx->input_pads[0].name, s->widthx[1], s->heightx[1],
+               ctx->output_pads[0].name, s->width[1], s->height[1]);
+        return AVERROR(EINVAL);
+    }
+
+    if (s->widthx[2] != s->width[2] || s->heightx[2] != s->height[2]) {
+        av_log(ctx, AV_LOG_ERROR, "First input link %s 3rd plane "
+               "(size %dx%d) do not match the corresponding "
+               "output link %s 3rd plane (size %dx%d)\n",
+               ctx->input_pads[0].name, s->widthx[2], s->heightx[2],
+               ctx->output_pads[0].name, s->width[2], s->height[2]);
+        return AVERROR(EINVAL);
+    }
 
     if ((ret = ff_framesync_init(&s->fs, ctx, 2)) < 0)
         return ret;
@@ -429,7 +582,13 @@ static int tlut2_filter_frame(AVFilterLink *inlink, AVFrame *frame)
     return 0;
 }
 
-#define tlut2_options options
+static const AVOption tlut2_options[] = {
+    { "c0", "set component #0 expression", OFFSET(comp_expr_str[0]),  AV_OPT_TYPE_STRING, { .str = "x" }, .flags = FLAGS },
+    { "c1", "set component #1 expression", OFFSET(comp_expr_str[1]),  AV_OPT_TYPE_STRING, { .str = "x" }, .flags = FLAGS },
+    { "c2", "set component #2 expression", OFFSET(comp_expr_str[2]),  AV_OPT_TYPE_STRING, { .str = "x" }, .flags = FLAGS },
+    { "c3", "set component #3 expression", OFFSET(comp_expr_str[3]),  AV_OPT_TYPE_STRING, { .str = "x" }, .flags = FLAGS },
+    { NULL }
+};
 
 AVFILTER_DEFINE_CLASS(tlut2);
 
diff --git a/libavfilter/vf_lut3d.c b/libavfilter/vf_lut3d.c
index 4d985c599ffae..93053ed263462 100644
--- a/libavfilter/vf_lut3d.c
+++ b/libavfilter/vf_lut3d.c
@@ -367,7 +367,7 @@ static int parse_dat(AVFilterContext *ctx, FILE *f)
                 struct rgbvec *vec = &lut3d->lut[k][j][i];
                 if (k != 0 || j != 0 || i != 0)
                     NEXT_LINE(skip_line(line));
-                if (sscanf(line, "%f %f %f", &vec->r, &vec->g, &vec->b) != 3)
+                if (av_sscanf(line, "%f %f %f", &vec->r, &vec->g, &vec->b) != 3)
                     return AVERROR_INVALIDDATA;
             }
         }
@@ -384,7 +384,7 @@ static int parse_cube(AVFilterContext *ctx, FILE *f)
     float max[3] = {1.0, 1.0, 1.0};
 
     while (fgets(line, sizeof(line), f)) {
-        if (!strncmp(line, "LUT_3D_SIZE ", 12)) {
+        if (!strncmp(line, "LUT_3D_SIZE", 11)) {
             int i, j, k;
             const int size = strtol(line + 12, NULL, 0);
 
@@ -407,13 +407,15 @@ static int parse_cube(AVFilterContext *ctx, FILE *f)
                                 else if (!strncmp(line + 7, "MAX ", 4)) vals = max;
                                 if (!vals)
                                     return AVERROR_INVALIDDATA;
-                                sscanf(line + 11, "%f %f %f", vals, vals + 1, vals + 2);
+                                av_sscanf(line + 11, "%f %f %f", vals, vals + 1, vals + 2);
                                 av_log(ctx, AV_LOG_DEBUG, "min: %f %f %f | max: %f %f %f\n",
                                        min[0], min[1], min[2], max[0], max[1], max[2]);
                                 goto try_again;
+                            } else if (!strncmp(line, "TITLE", 5)) {
+                                goto try_again;
                             }
                         } while (skip_line(line));
-                        if (sscanf(line, "%f %f %f", &vec->r, &vec->g, &vec->b) != 3)
+                        if (av_sscanf(line, "%f %f %f", &vec->r, &vec->g, &vec->b) != 3)
                             return AVERROR_INVALIDDATA;
                         vec->r *= max[0] - min[0];
                         vec->g *= max[1] - min[1];
@@ -446,7 +448,7 @@ static int parse_3dl(AVFilterContext *ctx, FILE *f)
                 struct rgbvec *vec = &lut3d->lut[k][j][i];
 
                 NEXT_LINE(skip_line(line));
-                if (sscanf(line, "%d %d %d", &r, &g, &b) != 3)
+                if (av_sscanf(line, "%d %d %d", &r, &g, &b) != 3)
                     return AVERROR_INVALIDDATA;
                 vec->r = r / scale;
                 vec->g = g / scale;
@@ -510,7 +512,7 @@ static int parse_m3d(AVFilterContext *ctx, FILE *f)
                 float val[3];
 
                 NEXT_LINE(0);
-                if (sscanf(line, "%f %f %f", val, val + 1, val + 2) != 3)
+                if (av_sscanf(line, "%f %f %f", val, val + 1, val + 2) != 3)
                     return AVERROR_INVALIDDATA;
                 vec->r = val[rgb_map[0]] * scale;
                 vec->g = val[rgb_map[1]] * scale;
@@ -983,6 +985,8 @@ enum interp_1d_mode {
     INTERPOLATE_1D_NEAREST,
     INTERPOLATE_1D_LINEAR,
     INTERPOLATE_1D_CUBIC,
+    INTERPOLATE_1D_COSINE,
+    INTERPOLATE_1D_SPLINE,
     NB_INTERP_1D_MODE
 };
 
@@ -1023,7 +1027,7 @@ static int parse_cube_1d(AVFilterContext *ctx, FILE *f)
     float max[3] = {1.0, 1.0, 1.0};
 
     while (fgets(line, sizeof(line), f)) {
-        if (!strncmp(line, "LUT_1D_SIZE ", 12)) {
+        if (!strncmp(line, "LUT_1D_SIZE", 11)) {
             const int size = strtol(line + 12, NULL, 0);
             int i;
 
@@ -1042,18 +1046,20 @@ static int parse_cube_1d(AVFilterContext *ctx, FILE *f)
                         else if (!strncmp(line + 7, "MAX ", 4)) vals = max;
                         if (!vals)
                             return AVERROR_INVALIDDATA;
-                        sscanf(line + 11, "%f %f %f", vals, vals + 1, vals + 2);
+                        av_sscanf(line + 11, "%f %f %f", vals, vals + 1, vals + 2);
                         av_log(ctx, AV_LOG_DEBUG, "min: %f %f %f | max: %f %f %f\n",
                                min[0], min[1], min[2], max[0], max[1], max[2]);
                         goto try_again;
                     } else if (!strncmp(line, "LUT_1D_INPUT_RANGE ", 19)) {
-                        sscanf(line + 19, "%f %f", min, max);
+                        av_sscanf(line + 19, "%f %f", min, max);
                         min[1] = min[2] = min[0];
                         max[1] = max[2] = max[0];
                         goto try_again;
+                    } else if (!strncmp(line, "TITLE", 5)) {
+                        goto try_again;
                     }
                 } while (skip_line(line));
-                if (sscanf(line, "%f %f %f", &lut1d->lut[0][i], &lut1d->lut[1][i], &lut1d->lut[2][i]) != 3)
+                if (av_sscanf(line, "%f %f %f", &lut1d->lut[0][i], &lut1d->lut[1][i], &lut1d->lut[2][i]) != 3)
                     return AVERROR_INVALIDDATA;
                 lut1d->lut[0][i] *= max[0] - min[0];
                 lut1d->lut[1][i] *= max[1] - min[1];
@@ -1070,7 +1076,9 @@ static const AVOption lut1d_options[] = {
     { "interp", "select interpolation mode", OFFSET(interpolation),    AV_OPT_TYPE_INT, {.i64=INTERPOLATE_1D_LINEAR}, 0, NB_INTERP_1D_MODE-1, FLAGS, "interp_mode" },
         { "nearest", "use values from the nearest defined points", 0, AV_OPT_TYPE_CONST, {.i64=INTERPOLATE_1D_NEAREST},   INT_MIN, INT_MAX, FLAGS, "interp_mode" },
         { "linear",  "use values from the linear interpolation",   0, AV_OPT_TYPE_CONST, {.i64=INTERPOLATE_1D_LINEAR},    INT_MIN, INT_MAX, FLAGS, "interp_mode" },
+        { "cosine",  "use values from the cosine interpolation",   0, AV_OPT_TYPE_CONST, {.i64=INTERPOLATE_1D_COSINE},    INT_MIN, INT_MAX, FLAGS, "interp_mode" },
         { "cubic",   "use values from the cubic interpolation",    0, AV_OPT_TYPE_CONST, {.i64=INTERPOLATE_1D_CUBIC},     INT_MIN, INT_MAX, FLAGS, "interp_mode" },
+        { "spline",  "use values from the spline interpolation",   0, AV_OPT_TYPE_CONST, {.i64=INTERPOLATE_1D_SPLINE},    INT_MIN, INT_MAX, FLAGS, "interp_mode" },
     { NULL }
 };
 
@@ -1096,6 +1104,19 @@ static inline float interp_1d_linear(const LUT1DContext *lut1d,
     return lerpf(p, n, d);
 }
 
+static inline float interp_1d_cosine(const LUT1DContext *lut1d,
+                                     int idx, const float s)
+{
+    const int prev = PREV(s);
+    const int next = NEXT1D(s);
+    const float d = s - prev;
+    const float p = lut1d->lut[idx][prev];
+    const float n = lut1d->lut[idx][next];
+    const float m = (1.f - cosf(d * M_PI)) * .5f;
+
+    return lerpf(p, n, m);
+}
+
 static inline float interp_1d_cubic(const LUT1DContext *lut1d,
                                     int idx, const float s)
 {
@@ -1119,6 +1140,27 @@ static inline float interp_1d_cubic(const LUT1DContext *lut1d,
     return a0 * mu * mu2 + a1 * mu2 + a2 * mu + a3;
 }
 
+static inline float interp_1d_spline(const LUT1DContext *lut1d,
+                                     int idx, const float s)
+{
+    const int prev = PREV(s);
+    const int next = NEXT1D(s);
+    const float x = s - prev;
+    float c0, c1, c2, c3;
+
+    float y0 = lut1d->lut[idx][FFMAX(prev - 1, 0)];
+    float y1 = lut1d->lut[idx][prev];
+    float y2 = lut1d->lut[idx][next];
+    float y3 = lut1d->lut[idx][FFMIN(next + 1, lut1d->lutsize - 1)];
+
+    c0 = y1;
+    c1 = .5f * (y2 - y0);
+    c2 = y0 - 2.5f * y1 + 2.f * y2 - .5f * y3;
+    c3 = .5f * (y3 - y0) + 1.5f * (y1 - y2);
+
+    return ((c3 * x + c2) * x + c1) * x + c0;
+}
+
 #define DEFINE_INTERP_FUNC_PLANAR_1D(name, nbits, depth)                     \
 static int interp_1d_##nbits##_##name##_p##depth(AVFilterContext *ctx,       \
                                                  void *arg, int jobnr,       \
@@ -1179,27 +1221,39 @@ static int interp_1d_##nbits##_##name##_p##depth(AVFilterContext *ctx,       \
 
 DEFINE_INTERP_FUNC_PLANAR_1D(nearest,     8, 8)
 DEFINE_INTERP_FUNC_PLANAR_1D(linear,      8, 8)
+DEFINE_INTERP_FUNC_PLANAR_1D(cosine,      8, 8)
 DEFINE_INTERP_FUNC_PLANAR_1D(cubic,       8, 8)
+DEFINE_INTERP_FUNC_PLANAR_1D(spline,      8, 8)
 
 DEFINE_INTERP_FUNC_PLANAR_1D(nearest,     16, 9)
 DEFINE_INTERP_FUNC_PLANAR_1D(linear,      16, 9)
+DEFINE_INTERP_FUNC_PLANAR_1D(cosine,      16, 9)
 DEFINE_INTERP_FUNC_PLANAR_1D(cubic,       16, 9)
+DEFINE_INTERP_FUNC_PLANAR_1D(spline,      16, 9)
 
 DEFINE_INTERP_FUNC_PLANAR_1D(nearest,     16, 10)
 DEFINE_INTERP_FUNC_PLANAR_1D(linear,      16, 10)
+DEFINE_INTERP_FUNC_PLANAR_1D(cosine,      16, 10)
 DEFINE_INTERP_FUNC_PLANAR_1D(cubic,       16, 10)
+DEFINE_INTERP_FUNC_PLANAR_1D(spline,      16, 10)
 
 DEFINE_INTERP_FUNC_PLANAR_1D(nearest,     16, 12)
 DEFINE_INTERP_FUNC_PLANAR_1D(linear,      16, 12)
+DEFINE_INTERP_FUNC_PLANAR_1D(cosine,      16, 12)
 DEFINE_INTERP_FUNC_PLANAR_1D(cubic,       16, 12)
+DEFINE_INTERP_FUNC_PLANAR_1D(spline,      16, 12)
 
 DEFINE_INTERP_FUNC_PLANAR_1D(nearest,     16, 14)
 DEFINE_INTERP_FUNC_PLANAR_1D(linear,      16, 14)
+DEFINE_INTERP_FUNC_PLANAR_1D(cosine,      16, 14)
 DEFINE_INTERP_FUNC_PLANAR_1D(cubic,       16, 14)
+DEFINE_INTERP_FUNC_PLANAR_1D(spline,      16, 14)
 
 DEFINE_INTERP_FUNC_PLANAR_1D(nearest,     16, 16)
 DEFINE_INTERP_FUNC_PLANAR_1D(linear,      16, 16)
+DEFINE_INTERP_FUNC_PLANAR_1D(cosine,      16, 16)
 DEFINE_INTERP_FUNC_PLANAR_1D(cubic,       16, 16)
+DEFINE_INTERP_FUNC_PLANAR_1D(spline,      16, 16)
 
 #define DEFINE_INTERP_FUNC_1D(name, nbits)                                   \
 static int interp_1d_##nbits##_##name(AVFilterContext *ctx, void *arg,       \
@@ -1247,11 +1301,15 @@ static int interp_1d_##nbits##_##name(AVFilterContext *ctx, void *arg,       \
 
 DEFINE_INTERP_FUNC_1D(nearest,     8)
 DEFINE_INTERP_FUNC_1D(linear,      8)
+DEFINE_INTERP_FUNC_1D(cosine,      8)
 DEFINE_INTERP_FUNC_1D(cubic,       8)
+DEFINE_INTERP_FUNC_1D(spline,      8)
 
 DEFINE_INTERP_FUNC_1D(nearest,     16)
 DEFINE_INTERP_FUNC_1D(linear,      16)
+DEFINE_INTERP_FUNC_1D(cosine,      16)
 DEFINE_INTERP_FUNC_1D(cubic,       16)
+DEFINE_INTERP_FUNC_1D(spline,      16)
 
 static int config_input_1d(AVFilterLink *inlink)
 {
@@ -1303,7 +1361,9 @@ static int config_input_1d(AVFilterLink *inlink)
     switch (lut1d->interpolation) {
     case INTERPOLATE_1D_NEAREST:     SET_FUNC_1D(nearest);  break;
     case INTERPOLATE_1D_LINEAR:      SET_FUNC_1D(linear);   break;
+    case INTERPOLATE_1D_COSINE:      SET_FUNC_1D(cosine);   break;
     case INTERPOLATE_1D_CUBIC:       SET_FUNC_1D(cubic);    break;
+    case INTERPOLATE_1D_SPLINE:      SET_FUNC_1D(spline);   break;
     default:
         av_assert0(0);
     }
diff --git a/libavfilter/vf_maskfun.c b/libavfilter/vf_maskfun.c
new file mode 100644
index 0000000000000..a8c6466d221f0
--- /dev/null
+++ b/libavfilter/vf_maskfun.c
@@ -0,0 +1,279 @@
+/*
+ * Copyright (c) 2018 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/imgutils.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/opt.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+typedef struct MaskFunContext {
+    const AVClass *class;
+
+    int low, high;
+    int planes;
+    int fill;
+    int sum;
+
+    int linesize[4];
+    int width[4], height[4];
+    int nb_planes;
+    int depth;
+    int max;
+    uint64_t max_sum;
+
+    AVFrame *empty;
+    int (*getsum)(AVFilterContext *ctx, AVFrame *out);
+    int (*maskfun)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
+} MaskFunContext;
+
+#define OFFSET(x) offsetof(MaskFunContext, x)
+#define VF AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+
+static const AVOption maskfun_options[] = {
+    { "low",    "set low threshold",  OFFSET(low),    AV_OPT_TYPE_INT, {.i64=10},  0, UINT16_MAX, VF },
+    { "high",   "set high threshold", OFFSET(high),   AV_OPT_TYPE_INT, {.i64=10},  0, UINT16_MAX, VF },
+    { "planes", "set planes",         OFFSET(planes), AV_OPT_TYPE_INT, {.i64=0xF}, 0, 0xF,        VF },
+    { "fill",   "set fill value",     OFFSET(fill),   AV_OPT_TYPE_INT, {.i64=0},   0, UINT16_MAX, VF },
+    { "sum",    "set sum value",      OFFSET(sum),    AV_OPT_TYPE_INT, {.i64=10},  0, UINT16_MAX, VF },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(maskfun);
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV440P,
+        AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P,
+        AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUV420P,
+        AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ420P,
+        AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P,
+        AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV444P9,
+        AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
+        AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV440P12,
+        AV_PIX_FMT_YUV420P14, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV444P14,
+        AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16,
+        AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA444P9,
+        AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA444P10,
+        AV_PIX_FMT_YUVA420P16, AV_PIX_FMT_YUVA422P16, AV_PIX_FMT_YUVA444P16,
+        AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
+        AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
+        AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRAP16,
+        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16,
+        AV_PIX_FMT_NONE
+    };
+
+    return ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
+{
+    AVFilterContext *ctx = inlink->dst;
+    MaskFunContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
+
+    if (s->getsum(ctx, frame)) {
+        AVFrame *out = av_frame_clone(s->empty);
+
+        if (!out) {
+            av_frame_free(&frame);
+            return AVERROR(ENOMEM);
+        }
+        out->pts = frame->pts;
+        av_frame_free(&frame);
+
+        return ff_filter_frame(outlink, out);
+    }
+
+    ctx->internal->execute(ctx, s->maskfun, frame, NULL,
+                           FFMIN(s->height[1], ff_filter_get_nb_threads(ctx)));
+
+    return ff_filter_frame(outlink, frame);
+}
+
+#define GETSUM(name, type, div)                              \
+static int getsum##name(AVFilterContext *ctx, AVFrame *out)  \
+{                                                            \
+    MaskFunContext *s = ctx->priv;                           \
+    uint64_t sum = 0;                                        \
+    int p;                                                   \
+                                                             \
+    for (p = 0; p < s->nb_planes; p++) {                     \
+        const int linesize = out->linesize[p] / div;         \
+        const int w = s->width[p];                           \
+        const int h = s->height[p];                          \
+        type *dst = (type *)out->data[p];                    \
+                                                             \
+        if (!((1 << p) & s->planes))                         \
+            continue;                                        \
+                                                             \
+        for (int y = 0; y < h; y++) {                        \
+            for (int x = 0; x < w; x++)                      \
+                sum += dst[x];                               \
+            if (sum >= s->max_sum)                           \
+                return 1;                                    \
+            dst += linesize;                                 \
+        }                                                    \
+    }                                                        \
+                                                             \
+    return 0;                                                \
+}
+
+GETSUM(8, uint8_t, 1)
+GETSUM(16, uint16_t, 2)
+
+#define MASKFUN(name, type, div)                             \
+static int maskfun##name(AVFilterContext *ctx, void *arg,    \
+                         int jobnr, int nb_jobs)             \
+{                                                            \
+    MaskFunContext *s = ctx->priv;                           \
+    AVFrame *out = arg;                                      \
+    const int low = s->low;                                  \
+    const int high = s->high;                                \
+    const int max = s->max;                                  \
+    int p;                                                   \
+                                                             \
+    for (p = 0; p < s->nb_planes; p++) {                     \
+        const int linesize = out->linesize[p] / div;         \
+        const int w = s->width[p];                           \
+        const int h = s->height[p];                          \
+        const int slice_start = (h * jobnr) / nb_jobs;       \
+        const int slice_end = (h * (jobnr+1)) / nb_jobs;     \
+        type *dst = (type *)out->data[p] + slice_start * linesize; \
+                                                             \
+        if (!((1 << p) & s->planes))                         \
+            continue;                                        \
+                                                             \
+        for (int y = slice_start; y < slice_end; y++) {      \
+            for (int x = 0; x < w; x++) {                    \
+                if (dst[x] <= low)                           \
+                    dst[x] = 0;                              \
+                else if (dst[x] > high)                      \
+                    dst[x] = max;                            \
+            }                                                \
+                                                             \
+            dst += linesize;                                 \
+        }                                                    \
+    }                                                        \
+                                                             \
+    return 0;                                                \
+}
+
+MASKFUN(8, uint8_t, 1)
+MASKFUN(16, uint16_t, 2)
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    MaskFunContext *s = ctx->priv;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    int vsub, hsub, ret;
+
+    s->nb_planes = av_pix_fmt_count_planes(inlink->format);
+
+    if ((ret = av_image_fill_linesizes(s->linesize, inlink->format, inlink->w)) < 0)
+        return ret;
+
+    hsub = desc->log2_chroma_w;
+    vsub = desc->log2_chroma_h;
+    s->height[1] = s->height[2] = AV_CEIL_RSHIFT(inlink->h, vsub);
+    s->height[0] = s->height[3] = inlink->h;
+    s->width[1]  = s->width[2]  = AV_CEIL_RSHIFT(inlink->w, hsub);
+    s->width[0]  = s->width[3]  = inlink->w;
+
+    s->depth = desc->comp[0].depth;
+    s->max = (1 << s->depth) - 1;
+    s->fill = FFMIN(s->fill, s->max);
+
+    if (s->depth == 8) {
+        s->maskfun = maskfun8;
+        s->getsum = getsum8;
+    } else {
+        s->maskfun = maskfun16;
+        s->getsum = getsum16;
+    }
+
+    s->empty = ff_get_video_buffer(inlink, inlink->w, inlink->h);
+    if (!s->empty)
+        return AVERROR(ENOMEM);
+
+    if (s->depth == 8) {
+        for (int p = 0; p < s->nb_planes; p++) {
+            uint8_t *dst = s->empty->data[p];
+
+            for (int y = 0; y < s->height[p]; y++) {
+                memset(dst, s->fill, s->width[p]);
+                dst += s->empty->linesize[p];
+            }
+        }
+    } else {
+        for (int p = 0; p < s->nb_planes; p++) {
+            uint16_t *dst = (uint16_t *)s->empty->data[p];
+
+            for (int y = 0; y < s->height[p]; y++) {
+                for (int x = 0; x < s->width[p]; x++)
+                    dst[x] = s->fill;
+                dst += s->empty->linesize[p] / 2;
+            }
+        }
+    }
+
+    s->max_sum = 0;
+    for (int p = 0; p < s->nb_planes; p++) {
+        if (!((1 << p) & s->planes))
+            continue;
+        s->max_sum += (uint64_t)s->sum * s->width[p] * s->height[p];
+    }
+
+    return 0;
+}
+
+static const AVFilterPad maskfun_inputs[] = {
+    {
+        .name           = "default",
+        .type           = AVMEDIA_TYPE_VIDEO,
+        .filter_frame   = filter_frame,
+        .config_props   = config_input,
+        .needs_writable = 1,
+    },
+    { NULL }
+};
+
+static const AVFilterPad maskfun_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_maskfun = {
+    .name          = "maskfun",
+    .description   = NULL_IF_CONFIG_SMALL("Create Mask."),
+    .priv_size     = sizeof(MaskFunContext),
+    .query_formats = query_formats,
+    .inputs        = maskfun_inputs,
+    .outputs       = maskfun_outputs,
+    .priv_class    = &maskfun_class,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
+};
diff --git a/libavfilter/vf_minterpolate.c b/libavfilter/vf_minterpolate.c
index c6a5e63f90beb..b0bb238ade7b4 100644
--- a/libavfilter/vf_minterpolate.c
+++ b/libavfilter/vf_minterpolate.c
@@ -26,11 +26,11 @@
 #include "libavutil/motion_vector.h"
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
-#include "libavutil/pixelutils.h"
 #include "avfilter.h"
 #include "formats.h"
 #include "internal.h"
 #include "video.h"
+#include "scene_sad.h"
 
 #define ME_MODE_BIDIR 0
 #define ME_MODE_BILAT 1
@@ -188,7 +188,7 @@ typedef struct MIContext {
 
     int scd_method;
     int scene_changed;
-    av_pixelutils_sad_fn sad;
+    ff_scene_sad_fn sad;
     double prev_mafd;
     double scd_threshold;
 
@@ -383,7 +383,7 @@ static int config_input(AVFilterLink *inlink)
     }
 
     if (mi_ctx->scd_method == SCD_METHOD_FDIFF) {
-        mi_ctx->sad = av_pixelutils_get_sad_fn(3, 3, 2, mi_ctx);
+        mi_ctx->sad = ff_scene_sad_get_fn(8);
         if (!mi_ctx->sad)
             return AVERROR(EINVAL);
     }
@@ -826,19 +826,15 @@ static int inject_frame(AVFilterLink *inlink, AVFrame *avf_in)
 static int detect_scene_change(MIContext *mi_ctx)
 {
     AVMotionEstContext *me_ctx = &mi_ctx->me_ctx;
-    int x, y;
-    int linesize = me_ctx->linesize;
     uint8_t *p1 = mi_ctx->frames[1].avf->data[0];
+    ptrdiff_t linesize1 = mi_ctx->frames[1].avf->linesize[0];
     uint8_t *p2 = mi_ctx->frames[2].avf->data[0];
+    ptrdiff_t linesize2 = mi_ctx->frames[2].avf->linesize[0];
 
     if (mi_ctx->scd_method == SCD_METHOD_FDIFF) {
         double ret = 0, mafd, diff;
-        int64_t sad;
-
-        for (sad = y = 0; y < me_ctx->height; y += 8)
-            for (x = 0; x < linesize; x += 8)
-                sad += mi_ctx->sad(p1 + x + y * linesize, linesize, p2 + x + y * linesize, linesize);
-
+        uint64_t sad;
+        mi_ctx->sad(p1, linesize1, p2, linesize2, me_ctx->width, me_ctx->height, &sad);
         emms_c();
         mafd = (double) sad / (me_ctx->height * me_ctx->width * 3);
         diff = fabs(mafd - mi_ctx->prev_mafd);
diff --git a/libavfilter/vf_misc_vaapi.c b/libavfilter/vf_misc_vaapi.c
index 30b808a993460..e227c9ff6b9a3 100644
--- a/libavfilter/vf_misc_vaapi.c
+++ b/libavfilter/vf_misc_vaapi.c
@@ -174,7 +174,7 @@ static int misc_vaapi_filter_frame(AVFilterLink *inlink, AVFrame *input_frame)
         ff_vaapi_vpp_colour_standard(input_frame->colorspace);
 
     params.output_region = NULL;
-    params.output_background_color = 0xff000000;
+    params.output_background_color = VAAPI_VPP_BACKGROUND_BLACK;
     params.output_color_standard = params.surface_color_standard;
 
     params.pipeline_flags = 0;
diff --git a/libavfilter/vf_mix.c b/libavfilter/vf_mix.c
index d0cc7cb83071f..873bef9dc3c82 100644
--- a/libavfilter/vf_mix.c
+++ b/libavfilter/vf_mix.c
@@ -108,7 +108,7 @@ static av_cold int init(AVFilterContext *ctx)
             break;
 
         p = NULL;
-        sscanf(arg, "%f", &s->weights[i]);
+        av_sscanf(arg, "%f", &s->weights[i]);
         s->wfactor += s->weights[i];
         last = i;
     }
@@ -348,16 +348,27 @@ static int tmix_filter_frame(AVFilterLink *inlink, AVFrame *in)
     ThreadData td;
     AVFrame *out;
 
+    if (s->nb_inputs == 1)
+        return ff_filter_frame(outlink, in);
+
     if (s->nb_frames < s->nb_inputs) {
         s->frames[s->nb_frames] = in;
         s->nb_frames++;
-        return 0;
+        if (s->nb_frames < s->nb_inputs)
+            return 0;
     } else {
         av_frame_free(&s->frames[0]);
         memmove(&s->frames[0], &s->frames[1], sizeof(*s->frames) * (s->nb_inputs - 1));
         s->frames[s->nb_inputs - 1] = in;
     }
 
+    if (ctx->is_disabled) {
+        out = av_frame_clone(s->frames[0]);
+        if (!out)
+            return AVERROR(ENOMEM);
+        return ff_filter_frame(outlink, out);
+    }
+
     out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
     if (!out)
         return AVERROR(ENOMEM);
@@ -371,7 +382,7 @@ static int tmix_filter_frame(AVFilterLink *inlink, AVFrame *in)
 }
 
 static const AVOption tmix_options[] = {
-    { "frames", "set number of successive frames to mix", OFFSET(nb_inputs), AV_OPT_TYPE_INT, {.i64=3}, 2, 128, .flags = FLAGS },
+    { "frames", "set number of successive frames to mix", OFFSET(nb_inputs), AV_OPT_TYPE_INT, {.i64=3}, 1, 128, .flags = FLAGS },
     { "weights", "set weight for each frame", OFFSET(weights_str), AV_OPT_TYPE_STRING, {.str="1 1 1"}, 0, 0, .flags = FLAGS },
     { "scale", "set scale", OFFSET(scale), AV_OPT_TYPE_FLOAT, {.dbl=0}, 0, INT16_MAX, .flags = FLAGS },
     { NULL },
@@ -398,7 +409,7 @@ AVFilter ff_vf_tmix = {
     .inputs        = inputs,
     .init          = init,
     .uninit        = uninit,
-    .flags         = AVFILTER_FLAG_SLICE_THREADS,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL | AVFILTER_FLAG_SLICE_THREADS,
 };
 
 #endif /* CONFIG_TMIX_FILTER */
diff --git a/libavfilter/vf_neighbor.c b/libavfilter/vf_neighbor.c
index 2db1e5e57c3ca..e50d4b4ed06d1 100644
--- a/libavfilter/vf_neighbor.c
+++ b/libavfilter/vf_neighbor.c
@@ -41,10 +41,12 @@ typedef struct NContext {
     int coordinates;
 
     int depth;
+    int max;
     int bpc;
 
     void (*filter)(uint8_t *dst, const uint8_t *p1, int width,
-                   int threshold, const uint8_t *coordinates[], int coord);
+                   int threshold, const uint8_t *coordinates[], int coord,
+                   int maxc);
 } NContext;
 
 static int query_formats(AVFilterContext *ctx)
@@ -74,7 +76,8 @@ static int query_formats(AVFilterContext *ctx)
 }
 
 static void erosion(uint8_t *dst, const uint8_t *p1, int width,
-                    int threshold, const uint8_t *coordinates[], int coord)
+                    int threshold, const uint8_t *coordinates[], int coord,
+                    int maxc)
 {
     int x, i;
 
@@ -94,7 +97,8 @@ static void erosion(uint8_t *dst, const uint8_t *p1, int width,
 }
 
 static void erosion16(uint8_t *dstp, const uint8_t *p1, int width,
-                      int threshold, const uint8_t *coordinates[], int coord)
+                      int threshold, const uint8_t *coordinates[], int coord,
+                      int maxc)
 {
     uint16_t *dst = (uint16_t *)dstp;
     int x, i;
@@ -115,7 +119,8 @@ static void erosion16(uint8_t *dstp, const uint8_t *p1, int width,
 }
 
 static void dilation(uint8_t *dst, const uint8_t *p1, int width,
-                     int threshold, const uint8_t *coordinates[], int coord)
+                     int threshold, const uint8_t *coordinates[], int coord,
+                     int maxc)
 {
     int x, i;
 
@@ -135,14 +140,15 @@ static void dilation(uint8_t *dst, const uint8_t *p1, int width,
 }
 
 static void dilation16(uint8_t *dstp, const uint8_t *p1, int width,
-                       int threshold, const uint8_t *coordinates[], int coord)
+                       int threshold, const uint8_t *coordinates[], int coord,
+                       int maxc)
 {
     uint16_t *dst = (uint16_t *)dstp;
     int x, i;
 
     for (x = 0; x < width; x++) {
         int max = AV_RN16A(&p1[x * 2]);
-        int limit = FFMIN(max + threshold, 255);
+        int limit = FFMIN(max + threshold, maxc);
 
         for (i = 0; i < 8; i++) {
             if (coord & (1 << i)) {
@@ -156,7 +162,8 @@ static void dilation16(uint8_t *dstp, const uint8_t *p1, int width,
 }
 
 static void deflate(uint8_t *dst, const uint8_t *p1, int width,
-                    int threshold, const uint8_t *coordinates[], int coord)
+                    int threshold, const uint8_t *coordinates[], int coord,
+                    int maxc)
 {
     int x, i;
 
@@ -171,7 +178,8 @@ static void deflate(uint8_t *dst, const uint8_t *p1, int width,
 }
 
 static void deflate16(uint8_t *dstp, const uint8_t *p1, int width,
-                      int threshold, const uint8_t *coordinates[], int coord)
+                      int threshold, const uint8_t *coordinates[], int coord,
+                      int maxc)
 {
     uint16_t *dst = (uint16_t *)dstp;
     int x, i;
@@ -182,12 +190,13 @@ static void deflate16(uint8_t *dstp, const uint8_t *p1, int width,
 
         for (i = 0; i < 8; sum += AV_RN16A(coordinates[i++] + x * 2));
 
-        dst[x] = FFMAX(FFMIN(sum / 8, p1[x]), limit);
+        dst[x] = FFMAX(FFMIN(sum / 8, AV_RN16A(&p1[2 * x])), limit);
     }
 }
 
 static void inflate(uint8_t *dst, const uint8_t *p1, int width,
-                    int threshold, const uint8_t *coordinates[], int coord)
+                    int threshold, const uint8_t *coordinates[], int coord,
+                    int maxc)
 {
     int x, i;
 
@@ -202,18 +211,19 @@ static void inflate(uint8_t *dst, const uint8_t *p1, int width,
 }
 
 static void inflate16(uint8_t *dstp, const uint8_t *p1, int width,
-                      int threshold, const uint8_t *coordinates[], int coord)
+                      int threshold, const uint8_t *coordinates[], int coord,
+                      int maxc)
 {
     uint16_t *dst = (uint16_t *)dstp;
     int x, i;
 
     for (x = 0; x < width; x++) {
         int sum = 0;
-        int limit = FFMIN(AV_RN16A(&p1[2 * x]) + threshold, 255);
+        int limit = FFMIN(AV_RN16A(&p1[2 * x]) + threshold, maxc);
 
         for (i = 0; i < 8; sum += AV_RN16A(coordinates[i++] + x * 2));
 
-        dst[x] = FFMIN(FFMAX(sum / 8, p1[x]), limit);
+        dst[x] = FFMIN(FFMAX(sum / 8, AV_RN16A(&p1[x * 2])), limit);
     }
 }
 
@@ -224,6 +234,7 @@ static int config_input(AVFilterLink *inlink)
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
 
     s->depth = desc->comp[0].depth;
+    s->max = (1 << s->depth) - 1;
     s->bpc = (s->depth + 7) / 8;
 
     s->planewidth[1] = s->planewidth[2] = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
@@ -285,9 +296,9 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
                                                src + (width - 2) * bpc,                                                      src + (width - 2) * bpc,
                                                src + (width - 2) * bpc + ph * stride, src + (width - 1) * bpc + ph * stride, src + (width - 2) * bpc + ph * stride};
 
-            s->filter(dst,                     src,                     1,         threshold, coordinateslb, s->coordinates);
-            s->filter(dst          + 1  * bpc, src          + 1  * bpc, width - 2, threshold, coordinates,   s->coordinates);
-            s->filter(dst + (width - 1) * bpc, src + (width - 1) * bpc, 1,         threshold, coordinatesrb, s->coordinates);
+            s->filter(dst,                     src,                     1,         threshold, coordinateslb, s->coordinates, s->max);
+            s->filter(dst          + 1  * bpc, src          + 1  * bpc, width - 2, threshold, coordinates,   s->coordinates, s->max);
+            s->filter(dst + (width - 1) * bpc, src + (width - 1) * bpc, 1,         threshold, coordinatesrb, s->coordinates, s->max);
 
             src += stride;
             dst += dstride;
diff --git a/libavfilter/vf_nlmeans.c b/libavfilter/vf_nlmeans.c
index 82e779ce854d5..dcb5a03953345 100644
--- a/libavfilter/vf_nlmeans.c
+++ b/libavfilter/vf_nlmeans.c
@@ -43,9 +43,6 @@ struct weighted_avg {
     float sum;
 };
 
-#define WEIGHT_LUT_NBITS 9
-#define WEIGHT_LUT_SIZE  (1<<WEIGHT_LUT_NBITS)
-
 typedef struct NLMeansContext {
     const AVClass *class;
     int nb_planes;
@@ -62,8 +59,7 @@ typedef struct NLMeansContext {
     ptrdiff_t ii_lz_32;                         // linesize in 32-bit units of the integral image
     struct weighted_avg *wa;                    // weighted average of every pixel
     ptrdiff_t wa_linesize;                      // linesize for wa in struct size unit
-    float weight_lut[WEIGHT_LUT_SIZE];          // lookup table mapping (scaled) patch differences to their associated weights
-    float pdiff_lut_scale;                      // scale factor for patch differences before looking into the LUT
+    float *weight_lut;                          // lookup table mapping (scaled) patch differences to their associated weights
     uint32_t max_meaningful_diff;               // maximum difference considered (if the patch difference is too high we ignore the pixel)
     NLMeansDSPContext dsp;
 } NLMeansContext;
@@ -401,8 +397,7 @@ static int nlmeans_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs
             const uint32_t patch_diff_sq = e - d - b + a;
 
             if (patch_diff_sq < s->max_meaningful_diff) {
-                const unsigned weight_lut_idx = patch_diff_sq * s->pdiff_lut_scale;
-                const float weight = s->weight_lut[weight_lut_idx]; // exp(-patch_diff_sq * s->pdiff_scale)
+                const float weight = s->weight_lut[patch_diff_sq]; // exp(-patch_diff_sq * s->pdiff_scale)
                 wa[x].total_weight += weight;
                 wa[x].sum += weight * src[x];
             }
@@ -526,11 +521,12 @@ static av_cold int init(AVFilterContext *ctx)
     const double h = s->sigma * 10.;
 
     s->pdiff_scale = 1. / (h * h);
-    s->max_meaningful_diff = -log(1/255.) / s->pdiff_scale;
-    s->pdiff_lut_scale = 1./s->max_meaningful_diff * WEIGHT_LUT_SIZE;
-    av_assert0((s->max_meaningful_diff - 1) * s->pdiff_lut_scale < FF_ARRAY_ELEMS(s->weight_lut));
-    for (i = 0; i < WEIGHT_LUT_SIZE; i++)
-        s->weight_lut[i] = exp(-i / s->pdiff_lut_scale * s->pdiff_scale);
+    s->max_meaningful_diff = log(255.) / s->pdiff_scale;
+    s->weight_lut = av_calloc(s->max_meaningful_diff, sizeof(*s->weight_lut));
+    if (!s->weight_lut)
+        return AVERROR(ENOMEM);
+    for (i = 0; i < s->max_meaningful_diff; i++)
+        s->weight_lut[i] = exp(-i * s->pdiff_scale);
 
     CHECK_ODD_FIELD(research_size,   "Luma research window");
     CHECK_ODD_FIELD(patch_size,      "Luma patch");
@@ -558,6 +554,7 @@ static av_cold int init(AVFilterContext *ctx)
 static av_cold void uninit(AVFilterContext *ctx)
 {
     NLMeansContext *s = ctx->priv;
+    av_freep(&s->weight_lut);
     av_freep(&s->ii_orig);
     av_freep(&s->wa);
 }
diff --git a/libavfilter/vf_overlay.c b/libavfilter/vf_overlay.c
index ba25893739fc4..0a8f089c0d57d 100644
--- a/libavfilter/vf_overlay.c
+++ b/libavfilter/vf_overlay.c
@@ -380,15 +380,15 @@ static av_always_inline void blend_slice_packed_rgb(AVFilterContext *ctx,
     uint8_t *S, *sp, *d, *dp;
 
     i = FFMAX(-y, 0);
-    imax = FFMIN(-y + dst_h, src_h);
+    imax = FFMIN3(-y + dst_h, FFMIN(src_h, dst_h), y + src_h);
 
-    slice_start = (imax * jobnr) / nb_jobs;
-    slice_end = (imax * (jobnr+1)) / nb_jobs;
+    slice_start = i + (imax * jobnr) / nb_jobs;
+    slice_end = i + (imax * (jobnr+1)) / nb_jobs;
 
-    sp = src->data[0] + (i + slice_start)     * src->linesize[0];
-    dp = dst->data[0] + (y + i + slice_start) * dst->linesize[0];
+    sp = src->data[0] + (slice_start)     * src->linesize[0];
+    dp = dst->data[0] + (y + slice_start) * dst->linesize[0];
 
-    for (i = i + slice_start; i < slice_end; i++) {
+    for (i = slice_start; i < slice_end; i++) {
         j = FFMAX(-x, 0);
         S = sp + j     * sstep;
         d = dp + (x+j) * dstep;
@@ -468,19 +468,19 @@ static av_always_inline void blend_plane(AVFilterContext *ctx,
     int slice_start, slice_end;
 
     j = FFMAX(-yp, 0);
-    jmax = FFMIN(-yp + dst_hp, src_hp);
+    jmax = FFMIN3(-yp + dst_hp, FFMIN(src_hp, dst_hp), yp + src_hp);
 
-    slice_start = (jmax * jobnr) / nb_jobs;
-    slice_end = (jmax * (jobnr+1)) / nb_jobs;
+    slice_start = j + (jmax * jobnr) / nb_jobs;
+    slice_end = j + (jmax * (jobnr+1)) / nb_jobs;
 
-    sp = src->data[i] + slice_start * src->linesize[i];
+    sp = src->data[i] + (slice_start) * src->linesize[i];
     dp = dst->data[dst_plane]
                       + (yp + slice_start) * dst->linesize[dst_plane]
                       + dst_offset;
     ap = src->data[3] + (slice_start << vsub) * src->linesize[3];
     dap = dst->data[3] + ((yp + slice_start) << vsub) * dst->linesize[3];
 
-    for (j = j + slice_start; j < slice_end; j++) {
+    for (j = slice_start; j < slice_end; j++) {
         k = FFMAX(-xp, 0);
         d = dp + (xp+k) * dst_step;
         s = sp + k;
@@ -961,13 +961,13 @@ static int do_blend(FFFrameSync *fs)
                s->var_values[VAR_Y], s->y);
     }
 
-    if (s->x < mainpic->width  && s->x + second->width  >= 0 ||
+    if (s->x < mainpic->width  && s->x + second->width  >= 0 &&
         s->y < mainpic->height && s->y + second->height >= 0) {
         ThreadData td;
 
         td.dst = mainpic;
         td.src = second;
-        ctx->internal->execute(ctx, s->blend_slice, &td, NULL, FFMIN(FFMIN(mainpic->height - s->y, second->height),
+        ctx->internal->execute(ctx, s->blend_slice, &td, NULL, FFMIN(FFMAX(1, FFMIN3(s->y + second->height, FFMIN(second->height, mainpic->height), mainpic->height - s->y)),
                                                                      ff_filter_get_nb_threads(ctx)));
     }
     return ff_filter_frame(ctx->outputs[0], mainpic);
diff --git a/libavfilter/vf_overlay_qsv.c b/libavfilter/vf_overlay_qsv.c
index 20871786eea41..9aabb594ba9e2 100644
--- a/libavfilter/vf_overlay_qsv.c
+++ b/libavfilter/vf_overlay_qsv.c
@@ -160,7 +160,7 @@ static int eval_expr(AVFilterContext *ctx)
 
 static int have_alpha_planar(AVFilterLink *link)
 {
-    enum AVPixelFormat pix_fmt;
+    enum AVPixelFormat pix_fmt = link->format;
     const AVPixFmtDescriptor *desc;
     AVHWFramesContext *fctx;
 
diff --git a/libavfilter/vf_palettegen.c b/libavfilter/vf_palettegen.c
index 5ff73e6b2b0d9..44323782d22a3 100644
--- a/libavfilter/vf_palettegen.c
+++ b/libavfilter/vf_palettegen.c
@@ -245,7 +245,7 @@ static void write_palette(AVFilterContext *ctx, AVFrame *out)
                     av_log(ctx, AV_LOG_WARNING, "Dupped color: %08"PRIX32"\n", pal[x]);
                 last_color = pal[x];
             } else {
-                pal[x] = 0xff000000; // pad with black
+                pal[x] = last_color; // pad with last color
             }
         }
         pal += pal_linesize;
diff --git a/libavfilter/vf_paletteuse.c b/libavfilter/vf_paletteuse.c
index 604a8af29c655..ed128813d6513 100644
--- a/libavfilter/vf_paletteuse.c
+++ b/libavfilter/vf_paletteuse.c
@@ -119,7 +119,7 @@ static const AVOption paletteuse_options[] = {
     { "diff_mode",   "set frame difference mode",     OFFSET(diff_mode),   AV_OPT_TYPE_INT, {.i64=DIFF_MODE_NONE}, 0, NB_DIFF_MODE-1, FLAGS, "diff_mode" },
         { "rectangle", "process smallest different rectangle", 0, AV_OPT_TYPE_CONST, {.i64=DIFF_MODE_RECTANGLE}, INT_MIN, INT_MAX, FLAGS, "diff_mode" },
     { "new", "take new palette for each output frame", OFFSET(new), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
-    { "alpha_threshold", "set the alpha threshold for transparency", OFFSET(trans_thresh), AV_OPT_TYPE_INT, {.i64=128}, 0, 255 },
+    { "alpha_threshold", "set the alpha threshold for transparency", OFFSET(trans_thresh), AV_OPT_TYPE_INT, {.i64=128}, 0, 255, FLAGS },
 
     /* following are the debug options, not part of the official API */
     { "debug_kdtree", "save Graphviz graph of the kdtree in specified file", OFFSET(dot_filename), AV_OPT_TYPE_STRING, {.str=NULL}, CHAR_MIN, CHAR_MAX, FLAGS },
@@ -814,7 +814,7 @@ static void set_processing_window(enum diff_mode diff_mode,
     int width  = cur_src->width;
     int height = cur_src->height;
 
-    if (prv_src && diff_mode == DIFF_MODE_RECTANGLE) {
+    if (prv_src->data[0] && diff_mode == DIFF_MODE_RECTANGLE) {
         int y;
         int x_end = cur_src->width  - 1,
             y_end = cur_src->height - 1;
@@ -911,11 +911,10 @@ static int apply_palette(AVFilterLink *inlink, AVFrame *in, AVFrame **outf)
 
     set_processing_window(s->diff_mode, s->last_in, in,
                           s->last_out, out, &x, &y, &w, &h);
-    av_frame_free(&s->last_in);
-    av_frame_free(&s->last_out);
-    s->last_in  = av_frame_clone(in);
-    s->last_out = av_frame_clone(out);
-    if (!s->last_in || !s->last_out ||
+    av_frame_unref(s->last_in);
+    av_frame_unref(s->last_out);
+    if (av_frame_ref(s->last_in, in) < 0 ||
+        av_frame_ref(s->last_out, out) < 0 ||
         av_frame_make_writable(s->last_in) < 0) {
         av_frame_free(&in);
         av_frame_free(&out);
@@ -1086,6 +1085,14 @@ static av_cold int init(AVFilterContext *ctx)
 {
     PaletteUseContext *s = ctx->priv;
 
+    s->last_in  = av_frame_alloc();
+    s->last_out = av_frame_alloc();
+    if (!s->last_in || !s->last_out) {
+        av_frame_free(&s->last_in);
+        av_frame_free(&s->last_out);
+        return AVERROR(ENOMEM);
+    }
+
     s->set_frame = set_frame_lut[s->color_search_method][s->dither];
 
     if (s->dither == DITHERING_BAYER) {
diff --git a/libavfilter/vf_pixdesctest.c b/libavfilter/vf_pixdesctest.c
index 2d0749e20bc0b..680d1a772ab37 100644
--- a/libavfilter/vf_pixdesctest.c
+++ b/libavfilter/vf_pixdesctest.c
@@ -31,7 +31,7 @@
 
 typedef struct PixdescTestContext {
     const AVPixFmtDescriptor *pix_desc;
-    uint16_t *line;
+    uint32_t *line;
 } PixdescTestContext;
 
 static av_cold void uninit(AVFilterContext *ctx)
@@ -89,17 +89,17 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
         const int h1 = c == 1 || c == 2 ? ch : h;
 
         for (i = 0; i < h1; i++) {
-            av_read_image_line(priv->line,
+            av_read_image_line2(priv->line,
                                (void*)in->data,
                                in->linesize,
                                priv->pix_desc,
-                               0, i, c, w1, 0);
+                               0, i, c, w1, 0, 4);
 
-            av_write_image_line(priv->line,
+            av_write_image_line2(priv->line,
                                 out->data,
                                 out->linesize,
                                 priv->pix_desc,
-                                0, i, c, w1);
+                                0, i, c, w1, 4);
         }
     }
 
diff --git a/libavfilter/vf_procamp_vaapi.c b/libavfilter/vf_procamp_vaapi.c
index 10eccbe97d783..46f3ab6465a49 100644
--- a/libavfilter/vf_procamp_vaapi.c
+++ b/libavfilter/vf_procamp_vaapi.c
@@ -171,7 +171,7 @@ static int procamp_vaapi_filter_frame(AVFilterLink *inlink, AVFrame *input_frame
         ff_vaapi_vpp_colour_standard(input_frame->colorspace);
 
     params.output_region = NULL;
-    params.output_background_color = 0xff000000;
+    params.output_background_color = VAAPI_VPP_BACKGROUND_BLACK;
     params.output_color_standard = params.surface_color_standard;
 
     params.pipeline_flags = 0;
diff --git a/libavfilter/vf_program_opencl.c b/libavfilter/vf_program_opencl.c
index dfb25652bc255..ec25e931f5624 100644
--- a/libavfilter/vf_program_opencl.c
+++ b/libavfilter/vf_program_opencl.c
@@ -144,7 +144,8 @@ static int program_opencl_run(AVFilterContext *avctx)
             goto fail;
 
         av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d "
-               "(%zux%zu).\n", plane, global_work[0], global_work[1]);
+               "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",
+               plane, global_work[0], global_work[1]);
 
         cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL,
                                      global_work, NULL, 0, NULL, NULL);
diff --git a/libavfilter/vf_scale_cuda.c b/libavfilter/vf_scale_cuda.c
index 23ac27a7dc4c0..c97a802ddc6d9 100644
--- a/libavfilter/vf_scale_cuda.c
+++ b/libavfilter/vf_scale_cuda.c
@@ -20,7 +20,6 @@
 * DEALINGS IN THE SOFTWARE.
 */
 
-#include <cuda.h>
 #include <stdio.h>
 #include <string.h>
 
@@ -28,6 +27,7 @@
 #include "libavutil/common.h"
 #include "libavutil/hwcontext.h"
 #include "libavutil/hwcontext_cuda_internal.h"
+#include "libavutil/cuda_check.h"
 #include "libavutil/internal.h"
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
@@ -52,8 +52,13 @@ static const enum AVPixelFormat supported_formats[] = {
 #define BLOCKX 32
 #define BLOCKY 16
 
+#define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x)
+
 typedef struct CUDAScaleContext {
     const AVClass *class;
+
+    AVCUDADeviceContext *hwctx;
+
     enum AVPixelFormat in_fmt;
     enum AVPixelFormat out_fmt;
 
@@ -77,7 +82,6 @@ typedef struct CUDAScaleContext {
     char *h_expr;               ///< height expression string
 
     CUcontext   cu_ctx;
-    CUevent     cu_event;
     CUmodule    cu_module;
     CUfunction  cu_func_uchar;
     CUfunction  cu_func_uchar2;
@@ -85,12 +89,7 @@ typedef struct CUDAScaleContext {
     CUfunction  cu_func_ushort;
     CUfunction  cu_func_ushort2;
     CUfunction  cu_func_ushort4;
-    CUtexref    cu_tex_uchar;
-    CUtexref    cu_tex_uchar2;
-    CUtexref    cu_tex_uchar4;
-    CUtexref    cu_tex_ushort;
-    CUtexref    cu_tex_ushort2;
-    CUtexref    cu_tex_ushort4;
+    CUstream    cu_stream;
 
     CUdeviceptr srcBuffer;
     CUdeviceptr dstBuffer;
@@ -255,55 +254,49 @@ static av_cold int cudascale_config_props(AVFilterLink *outlink)
     AVHWFramesContext     *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
     AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
     CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
-    CUresult err;
+    CudaFunctions *cu = device_hwctx->internal->cuda_dl;
     int w, h;
     int ret;
 
     extern char vf_scale_cuda_ptx[];
 
-    err = cuCtxPushCurrent(cuda_ctx);
-    if (err != CUDA_SUCCESS) {
-        av_log(ctx, AV_LOG_ERROR, "Error pushing cuda context\n");
-        ret = AVERROR_UNKNOWN;
+    s->hwctx = device_hwctx;
+    s->cu_stream = s->hwctx->stream;
+
+    ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx));
+    if (ret < 0)
         goto fail;
-    }
 
-    err = cuModuleLoadData(&s->cu_module, vf_scale_cuda_ptx);
-    if (err != CUDA_SUCCESS) {
-        av_log(ctx, AV_LOG_ERROR, "Error loading module data\n");
-        ret = AVERROR_UNKNOWN;
+    ret = CHECK_CU(cu->cuModuleLoadData(&s->cu_module, vf_scale_cuda_ptx));
+    if (ret < 0)
+        goto fail;
+
+    CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Subsample_Bilinear_uchar"));
+    if (ret < 0)
         goto fail;
-    }
 
-    cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Subsample_Bilinear_uchar");
-    cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Subsample_Bilinear_uchar2");
-    cuModuleGetFunction(&s->cu_func_uchar4, s->cu_module, "Subsample_Bilinear_uchar4");
-    cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Subsample_Bilinear_ushort");
-    cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Subsample_Bilinear_ushort2");
-    cuModuleGetFunction(&s->cu_func_ushort4, s->cu_module, "Subsample_Bilinear_ushort4");
-
-    cuModuleGetTexRef(&s->cu_tex_uchar, s->cu_module, "uchar_tex");
-    cuModuleGetTexRef(&s->cu_tex_uchar2, s->cu_module, "uchar2_tex");
-    cuModuleGetTexRef(&s->cu_tex_uchar4, s->cu_module, "uchar4_tex");
-    cuModuleGetTexRef(&s->cu_tex_ushort, s->cu_module, "ushort_tex");
-    cuModuleGetTexRef(&s->cu_tex_ushort2, s->cu_module, "ushort2_tex");
-    cuModuleGetTexRef(&s->cu_tex_ushort4, s->cu_module, "ushort4_tex");
-
-    cuTexRefSetFlags(s->cu_tex_uchar, CU_TRSF_READ_AS_INTEGER);
-    cuTexRefSetFlags(s->cu_tex_uchar2, CU_TRSF_READ_AS_INTEGER);
-    cuTexRefSetFlags(s->cu_tex_uchar4, CU_TRSF_READ_AS_INTEGER);
-    cuTexRefSetFlags(s->cu_tex_ushort, CU_TRSF_READ_AS_INTEGER);
-    cuTexRefSetFlags(s->cu_tex_ushort2, CU_TRSF_READ_AS_INTEGER);
-    cuTexRefSetFlags(s->cu_tex_ushort4, CU_TRSF_READ_AS_INTEGER);
-
-    cuTexRefSetFilterMode(s->cu_tex_uchar, CU_TR_FILTER_MODE_LINEAR);
-    cuTexRefSetFilterMode(s->cu_tex_uchar2, CU_TR_FILTER_MODE_LINEAR);
-    cuTexRefSetFilterMode(s->cu_tex_uchar4, CU_TR_FILTER_MODE_LINEAR);
-    cuTexRefSetFilterMode(s->cu_tex_ushort, CU_TR_FILTER_MODE_LINEAR);
-    cuTexRefSetFilterMode(s->cu_tex_ushort2, CU_TR_FILTER_MODE_LINEAR);
-    cuTexRefSetFilterMode(s->cu_tex_ushort4, CU_TR_FILTER_MODE_LINEAR);
-
-    cuCtxPopCurrent(&dummy);
+    CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Subsample_Bilinear_uchar2"));
+    if (ret < 0)
+        goto fail;
+
+    CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar4, s->cu_module, "Subsample_Bilinear_uchar4"));
+    if (ret < 0)
+        goto fail;
+
+    CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Subsample_Bilinear_ushort"));
+    if (ret < 0)
+        goto fail;
+
+    CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Subsample_Bilinear_ushort2"));
+    if (ret < 0)
+        goto fail;
+
+    CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort4, s->cu_module, "Subsample_Bilinear_ushort4"));
+    if (ret < 0)
+        goto fail;
+
+
+    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
 
     if ((ret = ff_scale_eval_dimensions(s,
                                         s->w_expr, s->h_expr,
@@ -339,29 +332,48 @@ static av_cold int cudascale_config_props(AVFilterLink *outlink)
     return ret;
 }
 
-static int call_resize_kernel(CUDAScaleContext *s, CUfunction func, CUtexref tex, int channels,
+static int call_resize_kernel(AVFilterContext *ctx, CUfunction func, int channels,
                               uint8_t *src_dptr, int src_width, int src_height, int src_pitch,
                               uint8_t *dst_dptr, int dst_width, int dst_height, int dst_pitch,
                               int pixel_size)
 {
-    CUdeviceptr src_devptr = (CUdeviceptr)src_dptr;
+    CUDAScaleContext *s = ctx->priv;
+    CudaFunctions *cu = s->hwctx->internal->cuda_dl;
     CUdeviceptr dst_devptr = (CUdeviceptr)dst_dptr;
-    void *args_uchar[] = { &dst_devptr, &dst_width, &dst_height, &dst_pitch, &src_width, &src_height };
-    CUDA_ARRAY_DESCRIPTOR desc;
-
-    desc.Width  = src_width;
-    desc.Height = src_height;
-    desc.NumChannels = channels;
-    if (pixel_size == 1) {
-        desc.Format = CU_AD_FORMAT_UNSIGNED_INT8;
-    } else {
-        desc.Format = CU_AD_FORMAT_UNSIGNED_INT16;
-    }
+    CUtexObject tex = 0;
+    void *args_uchar[] = { &tex, &dst_devptr, &dst_width, &dst_height, &dst_pitch, &src_width, &src_height };
+    int ret;
 
-    cuTexRefSetAddress2D_v3(tex, &desc, src_devptr, src_pitch * pixel_size);
-    cuLaunchKernel(func, DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1, BLOCKX, BLOCKY, 1, 0, 0, args_uchar, NULL);
+    CUDA_TEXTURE_DESC tex_desc = {
+        .filterMode = CU_TR_FILTER_MODE_LINEAR,
+        .flags = CU_TRSF_READ_AS_INTEGER,
+    };
 
-    return 0;
+    CUDA_RESOURCE_DESC res_desc = {
+        .resType = CU_RESOURCE_TYPE_PITCH2D,
+        .res.pitch2D.format = pixel_size == 1 ?
+                              CU_AD_FORMAT_UNSIGNED_INT8 :
+                              CU_AD_FORMAT_UNSIGNED_INT16,
+        .res.pitch2D.numChannels = channels,
+        .res.pitch2D.width = src_width,
+        .res.pitch2D.height = src_height,
+        .res.pitch2D.pitchInBytes = src_pitch,
+        .res.pitch2D.devPtr = (CUdeviceptr)src_dptr,
+    };
+
+    ret = CHECK_CU(cu->cuTexObjectCreate(&tex, &res_desc, &tex_desc, NULL));
+    if (ret < 0)
+        goto exit;
+
+    ret = CHECK_CU(cu->cuLaunchKernel(func,
+                                      DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1,
+                                      BLOCKX, BLOCKY, 1, 0, s->cu_stream, args_uchar, NULL));
+
+exit:
+    if (tex)
+        CHECK_CU(cu->cuTexObjectDestroy(tex));
+
+    return ret;
 }
 
 static int scalecuda_resize(AVFilterContext *ctx,
@@ -372,59 +384,59 @@ static int scalecuda_resize(AVFilterContext *ctx,
 
     switch (in_frames_ctx->sw_format) {
     case AV_PIX_FMT_YUV420P:
-        call_resize_kernel(s, s->cu_func_uchar, s->cu_tex_uchar, 1,
+        call_resize_kernel(ctx, s->cu_func_uchar, 1,
                            in->data[0], in->width, in->height, in->linesize[0],
                            out->data[0], out->width, out->height, out->linesize[0],
                            1);
-        call_resize_kernel(s, s->cu_func_uchar, s->cu_tex_uchar, 1,
+        call_resize_kernel(ctx, s->cu_func_uchar, 1,
                            in->data[0]+in->linesize[0]*in->height, in->width/2, in->height/2, in->linesize[0]/2,
                            out->data[0]+out->linesize[0]*out->height, out->width/2, out->height/2, out->linesize[0]/2,
                            1);
-        call_resize_kernel(s, s->cu_func_uchar, s->cu_tex_uchar, 1,
+        call_resize_kernel(ctx, s->cu_func_uchar, 1,
                            in->data[0]+ ALIGN_UP((in->linesize[0]*in->height*5)/4, s->tex_alignment), in->width/2, in->height/2, in->linesize[0]/2,
                            out->data[0]+(out->linesize[0]*out->height*5)/4, out->width/2, out->height/2, out->linesize[0]/2,
                            1);
         break;
     case AV_PIX_FMT_YUV444P:
-        call_resize_kernel(s, s->cu_func_uchar, s->cu_tex_uchar, 1,
+        call_resize_kernel(ctx, s->cu_func_uchar, 1,
                            in->data[0], in->width, in->height, in->linesize[0],
                            out->data[0], out->width, out->height, out->linesize[0],
                            1);
-        call_resize_kernel(s, s->cu_func_uchar, s->cu_tex_uchar, 1,
+        call_resize_kernel(ctx, s->cu_func_uchar, 1,
                            in->data[0]+in->linesize[0]*in->height, in->width, in->height, in->linesize[0],
                            out->data[0]+out->linesize[0]*out->height, out->width, out->height, out->linesize[0],
                            1);
-        call_resize_kernel(s, s->cu_func_uchar, s->cu_tex_uchar, 1,
+        call_resize_kernel(ctx, s->cu_func_uchar, 1,
                            in->data[0]+in->linesize[0]*in->height*2, in->width, in->height, in->linesize[0],
                            out->data[0]+out->linesize[0]*out->height*2, out->width, out->height, out->linesize[0],
                            1);
         break;
     case AV_PIX_FMT_NV12:
-        call_resize_kernel(s, s->cu_func_uchar, s->cu_tex_uchar, 1,
+        call_resize_kernel(ctx, s->cu_func_uchar, 1,
                            in->data[0], in->width, in->height, in->linesize[0],
                            out->data[0], out->width, out->height, out->linesize[0],
                            1);
-        call_resize_kernel(s, s->cu_func_uchar2, s->cu_tex_uchar2, 2,
+        call_resize_kernel(ctx, s->cu_func_uchar2, 2,
                            in->data[1], in->width/2, in->height/2, in->linesize[1],
                            out->data[0] + out->linesize[0] * ((out->height + 31) & ~0x1f), out->width/2, out->height/2, out->linesize[1]/2,
                            1);
         break;
     case AV_PIX_FMT_P010LE:
-        call_resize_kernel(s, s->cu_func_ushort, s->cu_tex_ushort, 1,
+        call_resize_kernel(ctx, s->cu_func_ushort, 1,
                            in->data[0], in->width, in->height, in->linesize[0]/2,
                            out->data[0], out->width, out->height, out->linesize[0]/2,
                            2);
-        call_resize_kernel(s, s->cu_func_ushort2, s->cu_tex_ushort2, 2,
+        call_resize_kernel(ctx, s->cu_func_ushort2, 2,
                            in->data[1], in->width / 2, in->height / 2, in->linesize[1]/2,
                            out->data[0] + out->linesize[0] * ((out->height + 31) & ~0x1f), out->width / 2, out->height / 2, out->linesize[1] / 4,
                            2);
         break;
     case AV_PIX_FMT_P016LE:
-        call_resize_kernel(s, s->cu_func_ushort, s->cu_tex_ushort, 1,
+        call_resize_kernel(ctx, s->cu_func_ushort, 1,
                            in->data[0], in->width, in->height, in->linesize[0] / 2,
                            out->data[0], out->width, out->height, out->linesize[0] / 2,
                            2);
-        call_resize_kernel(s, s->cu_func_ushort2, s->cu_tex_ushort2, 2,
+        call_resize_kernel(ctx, s->cu_func_ushort2, 2,
                            in->data[1], in->width / 2, in->height / 2, in->linesize[1] / 2,
                            out->data[0] + out->linesize[0] * ((out->height + 31) & ~0x1f), out->width / 2, out->height / 2, out->linesize[1] / 4,
                            2);
@@ -463,14 +475,12 @@ static int cudascale_scale(AVFilterContext *ctx, AVFrame *out, AVFrame *in)
 
 static int cudascale_filter_frame(AVFilterLink *link, AVFrame *in)
 {
-    AVFilterContext              *ctx = link->dst;
-    CUDAScaleContext               *s = ctx->priv;
-    AVFilterLink             *outlink = ctx->outputs[0];
-    AVHWFramesContext     *frames_ctx = (AVHWFramesContext*)s->frames_ctx->data;
-    AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
+    AVFilterContext       *ctx = link->dst;
+    CUDAScaleContext        *s = ctx->priv;
+    AVFilterLink      *outlink = ctx->outputs[0];
+    CudaFunctions          *cu = s->hwctx->internal->cuda_dl;
 
     AVFrame *out = NULL;
-    CUresult err;
     CUcontext dummy;
     int ret = 0;
 
@@ -480,15 +490,13 @@ static int cudascale_filter_frame(AVFilterLink *link, AVFrame *in)
         goto fail;
     }
 
-    err = cuCtxPushCurrent(device_hwctx->cuda_ctx);
-    if (err != CUDA_SUCCESS) {
-        ret = AVERROR_UNKNOWN;
+    ret = CHECK_CU(cu->cuCtxPushCurrent(s->hwctx->cuda_ctx));
+    if (ret < 0)
         goto fail;
-    }
 
     ret = cudascale_scale(ctx, out, in);
 
-    cuCtxPopCurrent(&dummy);
+    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
     if (ret < 0)
         goto fail;
 
diff --git a/libavfilter/vf_scale_cuda.cu b/libavfilter/vf_scale_cuda.cu
index 5f5ec81989a61..3f3f40546d4bc 100644
--- a/libavfilter/vf_scale_cuda.cu
+++ b/libavfilter/vf_scale_cuda.cu
@@ -22,14 +22,8 @@
 
 extern "C" {
 
-texture<unsigned char, 2> uchar_tex;
-texture<uchar2, 2>  uchar2_tex;
-texture<uchar4, 2>  uchar4_tex;
-texture<unsigned short, 2> ushort_tex;
-texture<ushort2, 2>  ushort2_tex;
-texture<ushort4, 2>  ushort4_tex;
-
-__global__ void Subsample_Bilinear_uchar(unsigned char *dst,
+__global__ void Subsample_Bilinear_uchar(cudaTextureObject_t uchar_tex,
+                                    unsigned char *dst,
                                     int dst_width, int dst_height, int dst_pitch,
                                     int src_width, int src_height)
 {
@@ -48,15 +42,16 @@ __global__ void Subsample_Bilinear_uchar(unsigned char *dst,
         // Convert weights to two bilinear weights -> {wh,1.0,wh} -> {wh,0.5,0} + {0,0.5,wh}
         float dx = wh / (0.5f + wh);
         float dy = wv / (0.5f + wv);
-        int y0 = tex2D(uchar_tex, xi-dx, yi-dy);
-        int y1 = tex2D(uchar_tex, xi+dx, yi-dy);
-        int y2 = tex2D(uchar_tex, xi-dx, yi+dy);
-        int y3 = tex2D(uchar_tex, xi+dx, yi+dy);
+        int y0 = tex2D<unsigned char>(uchar_tex, xi-dx, yi-dy);
+        int y1 = tex2D<unsigned char>(uchar_tex, xi+dx, yi-dy);
+        int y2 = tex2D<unsigned char>(uchar_tex, xi-dx, yi+dy);
+        int y3 = tex2D<unsigned char>(uchar_tex, xi+dx, yi+dy);
         dst[yo*dst_pitch+xo] = (unsigned char)((y0+y1+y2+y3+2) >> 2);
     }
 }
 
-__global__ void Subsample_Bilinear_uchar2(uchar2 *dst,
+__global__ void Subsample_Bilinear_uchar2(cudaTextureObject_t uchar2_tex,
+                                    uchar2 *dst,
                                     int dst_width, int dst_height, int dst_pitch2,
                                     int src_width, int src_height)
 {
@@ -75,10 +70,10 @@ __global__ void Subsample_Bilinear_uchar2(uchar2 *dst,
         // Convert weights to two bilinear weights -> {wh,1.0,wh} -> {wh,0.5,0} + {0,0.5,wh}
         float dx = wh / (0.5f + wh);
         float dy = wv / (0.5f + wv);
-        uchar2 c0 = tex2D(uchar2_tex, xi-dx, yi-dy);
-        uchar2 c1 = tex2D(uchar2_tex, xi+dx, yi-dy);
-        uchar2 c2 = tex2D(uchar2_tex, xi-dx, yi+dy);
-        uchar2 c3 = tex2D(uchar2_tex, xi+dx, yi+dy);
+        uchar2 c0 = tex2D<uchar2>(uchar2_tex, xi-dx, yi-dy);
+        uchar2 c1 = tex2D<uchar2>(uchar2_tex, xi+dx, yi-dy);
+        uchar2 c2 = tex2D<uchar2>(uchar2_tex, xi-dx, yi+dy);
+        uchar2 c3 = tex2D<uchar2>(uchar2_tex, xi+dx, yi+dy);
         int2 uv;
         uv.x = ((int)c0.x+(int)c1.x+(int)c2.x+(int)c3.x+2) >> 2;
         uv.y = ((int)c0.y+(int)c1.y+(int)c2.y+(int)c3.y+2) >> 2;
@@ -86,7 +81,8 @@ __global__ void Subsample_Bilinear_uchar2(uchar2 *dst,
     }
 }
 
-__global__ void Subsample_Bilinear_uchar4(uchar4 *dst,
+__global__ void Subsample_Bilinear_uchar4(cudaTextureObject_t uchar4_tex,
+                                    uchar4 *dst,
                                     int dst_width, int dst_height, int dst_pitch,
                                     int src_width, int src_height)
 {
@@ -105,10 +101,10 @@ __global__ void Subsample_Bilinear_uchar4(uchar4 *dst,
         // Convert weights to two bilinear weights -> {wh,1.0,wh} -> {wh,0.5,0} + {0,0.5,wh}
         float dx = wh / (0.5f + wh);
         float dy = wv / (0.5f + wv);
-        uchar4 c0 = tex2D(uchar4_tex, xi-dx, yi-dy);
-        uchar4 c1 = tex2D(uchar4_tex, xi+dx, yi-dy);
-        uchar4 c2 = tex2D(uchar4_tex, xi-dx, yi+dy);
-        uchar4 c3 = tex2D(uchar4_tex, xi+dx, yi+dy);
+        uchar4 c0 = tex2D<uchar4>(uchar4_tex, xi-dx, yi-dy);
+        uchar4 c1 = tex2D<uchar4>(uchar4_tex, xi+dx, yi-dy);
+        uchar4 c2 = tex2D<uchar4>(uchar4_tex, xi-dx, yi+dy);
+        uchar4 c3 = tex2D<uchar4>(uchar4_tex, xi+dx, yi+dy);
         int4 res;
         res.x =  ((int)c0.x+(int)c1.x+(int)c2.x+(int)c3.x+2) >> 2;
         res.y =  ((int)c0.y+(int)c1.y+(int)c2.y+(int)c3.y+2) >> 2;
@@ -119,7 +115,8 @@ __global__ void Subsample_Bilinear_uchar4(uchar4 *dst,
     }
 }
 
-__global__ void Subsample_Bilinear_ushort(unsigned short *dst,
+__global__ void Subsample_Bilinear_ushort(cudaTextureObject_t ushort_tex,
+                                    unsigned short *dst,
                                     int dst_width, int dst_height, int dst_pitch,
                                     int src_width, int src_height)
 {
@@ -138,15 +135,16 @@ __global__ void Subsample_Bilinear_ushort(unsigned short *dst,
         // Convert weights to two bilinear weights -> {wh,1.0,wh} -> {wh,0.5,0} + {0,0.5,wh}
         float dx = wh / (0.5f + wh);
         float dy = wv / (0.5f + wv);
-        int y0 = tex2D(ushort_tex, xi-dx, yi-dy);
-        int y1 = tex2D(ushort_tex, xi+dx, yi-dy);
-        int y2 = tex2D(ushort_tex, xi-dx, yi+dy);
-        int y3 = tex2D(ushort_tex, xi+dx, yi+dy);
+        int y0 = tex2D<unsigned short>(ushort_tex, xi-dx, yi-dy);
+        int y1 = tex2D<unsigned short>(ushort_tex, xi+dx, yi-dy);
+        int y2 = tex2D<unsigned short>(ushort_tex, xi-dx, yi+dy);
+        int y3 = tex2D<unsigned short>(ushort_tex, xi+dx, yi+dy);
         dst[yo*dst_pitch+xo] = (unsigned short)((y0+y1+y2+y3+2) >> 2);
     }
 }
 
-__global__ void Subsample_Bilinear_ushort2(ushort2 *dst,
+__global__ void Subsample_Bilinear_ushort2(cudaTextureObject_t ushort2_tex,
+                                    ushort2 *dst,
                                     int dst_width, int dst_height, int dst_pitch2,
                                     int src_width, int src_height)
 {
@@ -165,10 +163,10 @@ __global__ void Subsample_Bilinear_ushort2(ushort2 *dst,
         // Convert weights to two bilinear weights -> {wh,1.0,wh} -> {wh,0.5,0} + {0,0.5,wh}
         float dx = wh / (0.5f + wh);
         float dy = wv / (0.5f + wv);
-        ushort2 c0 = tex2D(ushort2_tex, xi-dx, yi-dy);
-        ushort2 c1 = tex2D(ushort2_tex, xi+dx, yi-dy);
-        ushort2 c2 = tex2D(ushort2_tex, xi-dx, yi+dy);
-        ushort2 c3 = tex2D(ushort2_tex, xi+dx, yi+dy);
+        ushort2 c0 = tex2D<ushort2>(ushort2_tex, xi-dx, yi-dy);
+        ushort2 c1 = tex2D<ushort2>(ushort2_tex, xi+dx, yi-dy);
+        ushort2 c2 = tex2D<ushort2>(ushort2_tex, xi-dx, yi+dy);
+        ushort2 c3 = tex2D<ushort2>(ushort2_tex, xi+dx, yi+dy);
         int2 uv;
         uv.x = ((int)c0.x+(int)c1.x+(int)c2.x+(int)c3.x+2) >> 2;
         uv.y = ((int)c0.y+(int)c1.y+(int)c2.y+(int)c3.y+2) >> 2;
@@ -176,7 +174,8 @@ __global__ void Subsample_Bilinear_ushort2(ushort2 *dst,
     }
 }
 
-__global__ void Subsample_Bilinear_ushort4(ushort4 *dst,
+__global__ void Subsample_Bilinear_ushort4(cudaTextureObject_t ushort4_tex,
+                                    ushort4 *dst,
                                     int dst_width, int dst_height, int dst_pitch,
                                     int src_width, int src_height)
 {
@@ -195,10 +194,10 @@ __global__ void Subsample_Bilinear_ushort4(ushort4 *dst,
         // Convert weights to two bilinear weights -> {wh,1.0,wh} -> {wh,0.5,0} + {0,0.5,wh}
         float dx = wh / (0.5f + wh);
         float dy = wv / (0.5f + wv);
-        ushort4 c0 = tex2D(ushort4_tex, xi-dx, yi-dy);
-        ushort4 c1 = tex2D(ushort4_tex, xi+dx, yi-dy);
-        ushort4 c2 = tex2D(ushort4_tex, xi-dx, yi+dy);
-        ushort4 c3 = tex2D(ushort4_tex, xi+dx, yi+dy);
+        ushort4 c0 = tex2D<ushort4>(ushort4_tex, xi-dx, yi-dy);
+        ushort4 c1 = tex2D<ushort4>(ushort4_tex, xi+dx, yi-dy);
+        ushort4 c2 = tex2D<ushort4>(ushort4_tex, xi-dx, yi+dy);
+        ushort4 c3 = tex2D<ushort4>(ushort4_tex, xi+dx, yi+dy);
         int4 res;
         res.x =  ((int)c0.x+(int)c1.x+(int)c2.x+(int)c3.x+2) >> 2;
         res.y =  ((int)c0.y+(int)c1.y+(int)c2.y+(int)c3.y+2) >> 2;
diff --git a/libavfilter/vf_scale_npp.c b/libavfilter/vf_scale_npp.c
index 8a277ce8e1d33..a3e085764a88e 100644
--- a/libavfilter/vf_scale_npp.c
+++ b/libavfilter/vf_scale_npp.c
@@ -29,6 +29,7 @@
 #include "libavutil/common.h"
 #include "libavutil/hwcontext.h"
 #include "libavutil/hwcontext_cuda_internal.h"
+#include "libavutil/cuda_check.h"
 #include "libavutil/internal.h"
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
@@ -39,6 +40,8 @@
 #include "scale.h"
 #include "video.h"
 
+#define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, device_hwctx->internal->cuda_dl, x)
+
 static const enum AVPixelFormat supported_formats[] = {
     AV_PIX_FMT_YUV420P,
     AV_PIX_FMT_NV12,
@@ -498,7 +501,6 @@ static int nppscale_filter_frame(AVFilterLink *link, AVFrame *in)
     AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
 
     AVFrame *out = NULL;
-    CUresult err;
     CUcontext dummy;
     int ret = 0;
 
@@ -511,15 +513,13 @@ static int nppscale_filter_frame(AVFilterLink *link, AVFrame *in)
         goto fail;
     }
 
-    err = device_hwctx->internal->cuda_dl->cuCtxPushCurrent(device_hwctx->cuda_ctx);
-    if (err != CUDA_SUCCESS) {
-        ret = AVERROR_UNKNOWN;
+    ret = CHECK_CU(device_hwctx->internal->cuda_dl->cuCtxPushCurrent(device_hwctx->cuda_ctx));
+    if (ret < 0)
         goto fail;
-    }
 
     ret = nppscale_scale(ctx, out, in);
 
-    device_hwctx->internal->cuda_dl->cuCtxPopCurrent(&dummy);
+    CHECK_CU(device_hwctx->internal->cuda_dl->cuCtxPopCurrent(&dummy));
     if (ret < 0)
         goto fail;
 
diff --git a/libavfilter/vf_scale_vaapi.c b/libavfilter/vf_scale_vaapi.c
index d6529d5235a66..3699363140d7d 100644
--- a/libavfilter/vf_scale_vaapi.c
+++ b/libavfilter/vf_scale_vaapi.c
@@ -35,10 +35,27 @@ typedef struct ScaleVAAPIContext {
 
     char *output_format_string;
 
+    int   mode;
+
     char *w_expr;      // width expression string
     char *h_expr;      // height expression string
 } ScaleVAAPIContext;
 
+static const char *scale_vaapi_mode_name(int mode)
+{
+    switch (mode) {
+#define D(name) case VA_FILTER_SCALING_ ## name: return #name
+        D(DEFAULT);
+        D(FAST);
+        D(HQ);
+        D(NL_ANAMORPHIC);
+#undef D
+    default:
+        return "Invalid";
+    }
+}
+
+
 static int scale_vaapi_config_output(AVFilterLink *outlink)
 {
     AVFilterLink *inlink     = outlink->src->inputs[0];
@@ -70,6 +87,7 @@ static int scale_vaapi_filter_frame(AVFilterLink *inlink, AVFrame *input_frame)
     AVFilterContext *avctx   = inlink->dst;
     AVFilterLink *outlink    = avctx->outputs[0];
     VAAPIVPPContext *vpp_ctx = avctx->priv;
+    ScaleVAAPIContext *ctx   = avctx->priv;
     AVFrame *output_frame    = NULL;
     VASurfaceID input_surface, output_surface;
     VAProcPipelineParameterBuffer params;
@@ -115,11 +133,11 @@ static int scale_vaapi_filter_frame(AVFilterLink *inlink, AVFrame *input_frame)
         ff_vaapi_vpp_colour_standard(input_frame->colorspace);
 
     params.output_region = 0;
-    params.output_background_color = 0xff000000;
+    params.output_background_color = VAAPI_VPP_BACKGROUND_BLACK;
     params.output_color_standard = params.surface_color_standard;
 
     params.pipeline_flags = 0;
-    params.filter_flags = VA_FILTER_SCALING_HQ;
+    params.filter_flags = ctx->mode;
 
     err = ff_vaapi_vpp_render_picture(avctx, &params, output_surface);
     if (err < 0)
@@ -131,9 +149,10 @@ static int scale_vaapi_filter_frame(AVFilterLink *inlink, AVFrame *input_frame)
 
     av_frame_free(&input_frame);
 
-    av_log(avctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
+    av_log(avctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64"), mode: %s.\n",
            av_get_pix_fmt_name(output_frame->format),
-           output_frame->width, output_frame->height, output_frame->pts);
+           output_frame->width, output_frame->height, output_frame->pts,
+           scale_vaapi_mode_name(ctx->mode));
 
     return ff_filter_frame(outlink, output_frame);
 
@@ -174,6 +193,17 @@ static const AVOption scale_vaapi_options[] = {
       OFFSET(h_expr), AV_OPT_TYPE_STRING, {.str = "ih"}, .flags = FLAGS },
     { "format", "Output video format (software format of hardware frames)",
       OFFSET(output_format_string), AV_OPT_TYPE_STRING, .flags = FLAGS },
+    { "mode", "Scaling mode",
+      OFFSET(mode), AV_OPT_TYPE_INT, { .i64 = VA_FILTER_SCALING_HQ },
+      0, VA_FILTER_SCALING_NL_ANAMORPHIC, FLAGS, "mode" },
+        { "default", "Use the default (depend on the driver) scaling algorithm",
+          0, AV_OPT_TYPE_CONST, { .i64 = VA_FILTER_SCALING_DEFAULT }, 0, 0, FLAGS, "mode" },
+        { "fast", "Use fast scaling algorithm",
+          0, AV_OPT_TYPE_CONST, { .i64 = VA_FILTER_SCALING_FAST }, 0, 0, FLAGS, "mode" },
+        { "hq", "Use high quality scaling algorithm",
+          0, AV_OPT_TYPE_CONST, { .i64 = VA_FILTER_SCALING_HQ }, 0, 0, FLAGS,  "mode" },
+        { "nl_anamorphic", "Use nolinear anamorphic scaling algorithm",
+          0, AV_OPT_TYPE_CONST, { .i64 = VA_FILTER_SCALING_NL_ANAMORPHIC }, 0, 0, FLAGS,  "mode" },
     { NULL },
 };
 
diff --git a/libavfilter/vf_setfield.c b/libavfilter/vf_setfield.c
deleted file mode 100644
index f4dc33d7e596e..0000000000000
--- a/libavfilter/vf_setfield.c
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2012 Stefano Sabatini
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/**
- * @file
- * set field order
- */
-
-#include "libavutil/opt.h"
-#include "avfilter.h"
-#include "internal.h"
-#include "video.h"
-
-enum SetFieldMode {
-    MODE_AUTO = -1,
-    MODE_BFF,
-    MODE_TFF,
-    MODE_PROG,
-};
-
-typedef struct SetFieldContext {
-    const AVClass *class;
-    int mode;                   ///< SetFieldMode
-} SetFieldContext;
-
-#define OFFSET(x) offsetof(SetFieldContext, x)
-#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
-
-static const AVOption setfield_options[] = {
-    {"mode", "select interlace mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64=MODE_AUTO}, -1, MODE_PROG, FLAGS, "mode"},
-    {"auto", "keep the same input field",  0, AV_OPT_TYPE_CONST, {.i64=MODE_AUTO}, INT_MIN, INT_MAX, FLAGS, "mode"},
-    {"bff",  "mark as bottom-field-first", 0, AV_OPT_TYPE_CONST, {.i64=MODE_BFF},  INT_MIN, INT_MAX, FLAGS, "mode"},
-    {"tff",  "mark as top-field-first",    0, AV_OPT_TYPE_CONST, {.i64=MODE_TFF},  INT_MIN, INT_MAX, FLAGS, "mode"},
-    {"prog", "mark as progressive",        0, AV_OPT_TYPE_CONST, {.i64=MODE_PROG}, INT_MIN, INT_MAX, FLAGS, "mode"},
-    {NULL}
-};
-
-AVFILTER_DEFINE_CLASS(setfield);
-
-static int filter_frame(AVFilterLink *inlink, AVFrame *picref)
-{
-    SetFieldContext *setfield = inlink->dst->priv;
-
-    if (setfield->mode == MODE_PROG) {
-        picref->interlaced_frame = 0;
-    } else if (setfield->mode != MODE_AUTO) {
-        picref->interlaced_frame = 1;
-        picref->top_field_first = setfield->mode;
-    }
-    return ff_filter_frame(inlink->dst->outputs[0], picref);
-}
-
-static const AVFilterPad setfield_inputs[] = {
-    {
-        .name         = "default",
-        .type         = AVMEDIA_TYPE_VIDEO,
-        .filter_frame = filter_frame,
-    },
-    { NULL }
-};
-
-static const AVFilterPad setfield_outputs[] = {
-    {
-        .name = "default",
-        .type = AVMEDIA_TYPE_VIDEO,
-    },
-    { NULL }
-};
-
-AVFilter ff_vf_setfield = {
-    .name        = "setfield",
-    .description = NULL_IF_CONFIG_SMALL("Force field for the output video frame."),
-    .priv_size   = sizeof(SetFieldContext),
-    .priv_class  = &setfield_class,
-    .inputs      = setfield_inputs,
-    .outputs     = setfield_outputs,
-};
diff --git a/libavfilter/vf_setparams.c b/libavfilter/vf_setparams.c
index 8427f98ba824a..fe298e5a06eb2 100644
--- a/libavfilter/vf_setparams.c
+++ b/libavfilter/vf_setparams.c
@@ -1,4 +1,6 @@
 /*
+ * Copyright (c) 2012 Stefano Sabatini
+ *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
@@ -22,15 +24,32 @@
 #include "internal.h"
 #include "video.h"
 
+enum SetFieldMode {
+    MODE_AUTO = -1,
+    MODE_BFF,
+    MODE_TFF,
+    MODE_PROG,
+};
+
 typedef struct SetParamsContext {
     const AVClass *class;
+    int field_mode;
     int color_range;
+    int color_primaries;
+    int color_trc;
+    int colorspace;
 } SetParamsContext;
 
 #define OFFSET(x) offsetof(SetParamsContext, x)
 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
 
-static const AVOption setrange_options[] = {
+static const AVOption setparams_options[] = {
+    {"field_mode", "select interlace mode", OFFSET(field_mode), AV_OPT_TYPE_INT, {.i64=MODE_AUTO}, -1, MODE_PROG, FLAGS, "mode"},
+    {"auto", "keep the same input field",  0, AV_OPT_TYPE_CONST, {.i64=MODE_AUTO}, INT_MIN, INT_MAX, FLAGS, "mode"},
+    {"bff",  "mark as bottom-field-first", 0, AV_OPT_TYPE_CONST, {.i64=MODE_BFF},  INT_MIN, INT_MAX, FLAGS, "mode"},
+    {"tff",  "mark as top-field-first",    0, AV_OPT_TYPE_CONST, {.i64=MODE_TFF},  INT_MIN, INT_MAX, FLAGS, "mode"},
+    {"prog", "mark as progressive",        0, AV_OPT_TYPE_CONST, {.i64=MODE_PROG}, INT_MIN, INT_MAX, FLAGS, "mode"},
+
     {"range", "select color range", OFFSET(color_range), AV_OPT_TYPE_INT, {.i64=-1},-1, AVCOL_RANGE_NB-1, FLAGS, "range"},
     {"auto",  "keep the same color range",   0, AV_OPT_TYPE_CONST, {.i64=-1},                       0, 0, FLAGS, "range"},
     {"unspecified",                  NULL,   0, AV_OPT_TYPE_CONST, {.i64=AVCOL_RANGE_UNSPECIFIED},  0, 0, FLAGS, "range"},
@@ -41,18 +60,87 @@ static const AVOption setrange_options[] = {
     {"full",                         NULL,   0, AV_OPT_TYPE_CONST, {.i64=AVCOL_RANGE_JPEG},         0, 0, FLAGS, "range"},
     {"pc",                           NULL,   0, AV_OPT_TYPE_CONST, {.i64=AVCOL_RANGE_JPEG},         0, 0, FLAGS, "range"},
     {"jpeg",                         NULL,   0, AV_OPT_TYPE_CONST, {.i64=AVCOL_RANGE_JPEG},         0, 0, FLAGS, "range"},
+
+    {"color_primaries", "select color primaries", OFFSET(color_primaries), AV_OPT_TYPE_INT, {.i64=-1}, -1, AVCOL_PRI_NB-1, FLAGS, "color_primaries"},
+    {"auto", "keep the same color primaries",  0, AV_OPT_TYPE_CONST, {.i64=-1},                     INT_MIN, INT_MAX, FLAGS, "color_primaries"},
+    {"bt709",                           NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_PRI_BT709},        INT_MIN, INT_MAX, FLAGS, "color_primaries"},
+    {"unknown",                         NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_PRI_UNSPECIFIED},  INT_MIN, INT_MAX, FLAGS, "color_primaries"},
+    {"bt470m",                          NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_PRI_BT470M},       INT_MIN, INT_MAX, FLAGS, "color_primaries"},
+    {"bt470bg",                         NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_PRI_BT470BG},      INT_MIN, INT_MAX, FLAGS, "color_primaries"},
+    {"smpte170m",                       NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_PRI_SMPTE170M},    INT_MIN, INT_MAX, FLAGS, "color_primaries"},
+    {"smpte240m",                       NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_PRI_SMPTE240M},    INT_MIN, INT_MAX, FLAGS, "color_primaries"},
+    {"film",                            NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_PRI_FILM},         INT_MIN, INT_MAX, FLAGS, "color_primaries"},
+    {"bt2020",                          NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_PRI_BT2020},       INT_MIN, INT_MAX, FLAGS, "color_primaries"},
+    {"smpte428",                        NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_PRI_SMPTE428},     INT_MIN, INT_MAX, FLAGS, "color_primaries"},
+    {"smpte431",                        NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_PRI_SMPTE431},     INT_MIN, INT_MAX, FLAGS, "color_primaries"},
+    {"smpte432",                        NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_PRI_SMPTE432},     INT_MIN, INT_MAX, FLAGS, "color_primaries"},
+    {"jedec-p22",                       NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_PRI_JEDEC_P22},    INT_MIN, INT_MAX, FLAGS, "color_primaries"},
+
+    {"color_trc", "select color transfer", OFFSET(color_trc), AV_OPT_TYPE_INT, {.i64=-1}, -1, AVCOL_TRC_NB-1, FLAGS, "color_trc"},
+    {"auto", "keep the same color transfer",  0, AV_OPT_TYPE_CONST, {.i64=-1},                     INT_MIN, INT_MAX, FLAGS, "color_trc"},
+    {"bt709",                          NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_TRC_BT709},        INT_MIN, INT_MAX, FLAGS, "color_trc"},
+    {"unknown",                        NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_TRC_UNSPECIFIED},  INT_MIN, INT_MAX, FLAGS, "color_trc"},
+    {"bt470m",                         NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_TRC_GAMMA22},      INT_MIN, INT_MAX, FLAGS, "color_trc"},
+    {"bt470bg",                        NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_TRC_GAMMA28},      INT_MIN, INT_MAX, FLAGS, "color_trc"},
+    {"smpte170m",                      NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_TRC_SMPTE170M},    INT_MIN, INT_MAX, FLAGS, "color_trc"},
+    {"smpte240m",                      NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_TRC_SMPTE240M},    INT_MIN, INT_MAX, FLAGS, "color_trc"},
+    {"linear",                         NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_TRC_LINEAR},       INT_MIN, INT_MAX, FLAGS, "color_trc"},
+    {"log100",                         NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_TRC_LOG},          INT_MIN, INT_MAX, FLAGS, "color_trc"},
+    {"log316",                         NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_TRC_LOG_SQRT},     INT_MIN, INT_MAX, FLAGS, "color_trc"},
+    {"iec61966-2-4",                   NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_TRC_IEC61966_2_4}, INT_MIN, INT_MAX, FLAGS, "color_trc"},
+    {"bt1361e",                        NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_TRC_BT1361_ECG},   INT_MIN, INT_MAX, FLAGS, "color_trc"},
+    {"iec61966-2-1",                   NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_TRC_IEC61966_2_1}, INT_MIN, INT_MAX, FLAGS, "color_trc"},
+    {"bt2020-10",                      NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_TRC_BT2020_10},    INT_MIN, INT_MAX, FLAGS, "color_trc"},
+    {"bt2020-12",                      NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_TRC_BT2020_12},    INT_MIN, INT_MAX, FLAGS, "color_trc"},
+    {"smpte2084",                      NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_TRC_SMPTE2084},    INT_MIN, INT_MAX, FLAGS, "color_trc"},
+    {"smpte428",                       NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_TRC_SMPTE428},     INT_MIN, INT_MAX, FLAGS, "color_trc"},
+    {"arib-std-b67",                   NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_TRC_ARIB_STD_B67}, INT_MIN, INT_MAX, FLAGS, "color_trc"},
+
+    {"colorspace", "select colorspace", OFFSET(colorspace), AV_OPT_TYPE_INT, {.i64=-1}, -1, AVCOL_SPC_NB-1, FLAGS, "colorspace"},
+    {"auto", "keep the same colorspace",  0, AV_OPT_TYPE_CONST, {.i64=-1},                          INT_MIN, INT_MAX, FLAGS, "colorspace"},
+    {"gbr",                        NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_SPC_RGB},               INT_MIN, INT_MAX, FLAGS, "colorspace"},
+    {"bt709",                      NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_SPC_BT709},             INT_MIN, INT_MAX, FLAGS, "colorspace"},
+    {"unknown",                    NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_SPC_UNSPECIFIED},       INT_MIN, INT_MAX, FLAGS, "colorspace"},
+    {"fcc",                        NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_SPC_FCC},               INT_MIN, INT_MAX, FLAGS, "colorspace"},
+    {"bt470bg",                    NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_SPC_BT470BG},           INT_MIN, INT_MAX, FLAGS, "colorspace"},
+    {"smpte170m",                  NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_SPC_SMPTE170M},         INT_MIN, INT_MAX, FLAGS, "colorspace"},
+    {"smpte240m",                  NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_SPC_SMPTE240M},         INT_MIN, INT_MAX, FLAGS, "colorspace"},
+    {"ycgco",                      NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_SPC_YCGCO},             INT_MIN, INT_MAX, FLAGS, "colorspace"},
+    {"bt2020nc",                   NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_SPC_BT2020_NCL},        INT_MIN, INT_MAX, FLAGS, "colorspace"},
+    {"bt2020c",                    NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_SPC_BT2020_CL},         INT_MIN, INT_MAX, FLAGS, "colorspace"},
+    {"smpte2085",                  NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_SPC_SMPTE2085},         INT_MIN, INT_MAX, FLAGS, "colorspace"},
+    {"chroma-derived-nc",          NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_SPC_CHROMA_DERIVED_NCL},INT_MIN, INT_MAX, FLAGS, "colorspace"},
+    {"chroma-derived-c",           NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_SPC_CHROMA_DERIVED_CL}, INT_MIN, INT_MAX, FLAGS, "colorspace"},
+    {"ictcp",                      NULL,  0, AV_OPT_TYPE_CONST, {.i64=AVCOL_SPC_ICTCP},             INT_MIN, INT_MAX, FLAGS, "colorspace"},
     {NULL}
 };
 
-AVFILTER_DEFINE_CLASS(setrange);
+AVFILTER_DEFINE_CLASS(setparams);
 
 static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
 {
     AVFilterContext *ctx = inlink->dst;
     SetParamsContext *s = ctx->priv;
 
+    /* set field */
+    if (s->field_mode == MODE_PROG) {
+        frame->interlaced_frame = 0;
+    } else if (s->field_mode != MODE_AUTO) {
+        frame->interlaced_frame = 1;
+        frame->top_field_first = s->field_mode;
+    }
+
+    /* set range */
     if (s->color_range >= 0)
         frame->color_range = s->color_range;
+
+    /* set color prim, trc, space */
+    if (s->color_primaries >= 0)
+        frame->color_primaries = s->color_primaries;
+    if (s->color_trc >= 0)
+        frame->color_trc = s->color_trc;
+    if (s->colorspace >= 0)
+        frame->colorspace = s->colorspace;
     return ff_filter_frame(ctx->outputs[0], frame);
 }
 
@@ -73,11 +161,85 @@ static const AVFilterPad outputs[] = {
     { NULL }
 };
 
+AVFilter ff_vf_setparams = {
+    .name        = "setparams",
+    .description = NULL_IF_CONFIG_SMALL("Force field, or color property for the output video frame."),
+    .priv_size   = sizeof(SetParamsContext),
+    .priv_class  = &setparams_class,
+    .inputs      = inputs,
+    .outputs     = outputs,
+};
+
+#if CONFIG_SETRANGE_FILTER
+
+static const AVOption setrange_options[] = {
+    {"range", "select color range", OFFSET(color_range), AV_OPT_TYPE_INT, {.i64=-1},-1, AVCOL_RANGE_NB-1, FLAGS, "range"},
+    {"auto",  "keep the same color range",   0, AV_OPT_TYPE_CONST, {.i64=-1},                       0, 0, FLAGS, "range"},
+    {"unspecified",                  NULL,   0, AV_OPT_TYPE_CONST, {.i64=AVCOL_RANGE_UNSPECIFIED},  0, 0, FLAGS, "range"},
+    {"unknown",                      NULL,   0, AV_OPT_TYPE_CONST, {.i64=AVCOL_RANGE_UNSPECIFIED},  0, 0, FLAGS, "range"},
+    {"limited",                      NULL,   0, AV_OPT_TYPE_CONST, {.i64=AVCOL_RANGE_MPEG},         0, 0, FLAGS, "range"},
+    {"tv",                           NULL,   0, AV_OPT_TYPE_CONST, {.i64=AVCOL_RANGE_MPEG},         0, 0, FLAGS, "range"},
+    {"mpeg",                         NULL,   0, AV_OPT_TYPE_CONST, {.i64=AVCOL_RANGE_MPEG},         0, 0, FLAGS, "range"},
+    {"full",                         NULL,   0, AV_OPT_TYPE_CONST, {.i64=AVCOL_RANGE_JPEG},         0, 0, FLAGS, "range"},
+    {"pc",                           NULL,   0, AV_OPT_TYPE_CONST, {.i64=AVCOL_RANGE_JPEG},         0, 0, FLAGS, "range"},
+    {"jpeg",                         NULL,   0, AV_OPT_TYPE_CONST, {.i64=AVCOL_RANGE_JPEG},         0, 0, FLAGS, "range"},
+    {NULL}
+};
+
+AVFILTER_DEFINE_CLASS(setrange);
+
+static av_cold int init_setrange(AVFilterContext *ctx)
+{
+    SetParamsContext *s = ctx->priv;
+
+    s->field_mode = MODE_AUTO;/* set field mode to auto */
+    s->color_primaries = -1;
+    s->color_trc       = -1;
+    s->colorspace      = -1;
+    return 0;
+}
+
 AVFilter ff_vf_setrange = {
     .name        = "setrange",
     .description = NULL_IF_CONFIG_SMALL("Force color range for the output video frame."),
     .priv_size   = sizeof(SetParamsContext),
+    .init        = init_setrange,
     .priv_class  = &setrange_class,
     .inputs      = inputs,
     .outputs     = outputs,
 };
+#endif /* CONFIG_SETRANGE_FILTER */
+
+#if CONFIG_SETFIELD_FILTER
+static const AVOption setfield_options[] = {
+    {"mode", "select interlace mode", OFFSET(field_mode), AV_OPT_TYPE_INT, {.i64=MODE_AUTO}, -1, MODE_PROG, FLAGS, "mode"},
+    {"auto", "keep the same input field",  0, AV_OPT_TYPE_CONST, {.i64=MODE_AUTO}, INT_MIN, INT_MAX, FLAGS, "mode"},
+    {"bff",  "mark as bottom-field-first", 0, AV_OPT_TYPE_CONST, {.i64=MODE_BFF},  INT_MIN, INT_MAX, FLAGS, "mode"},
+    {"tff",  "mark as top-field-first",    0, AV_OPT_TYPE_CONST, {.i64=MODE_TFF},  INT_MIN, INT_MAX, FLAGS, "mode"},
+    {"prog", "mark as progressive",        0, AV_OPT_TYPE_CONST, {.i64=MODE_PROG}, INT_MIN, INT_MAX, FLAGS, "mode"},
+    {NULL}
+};
+
+AVFILTER_DEFINE_CLASS(setfield);
+
+static av_cold int init_setfield(AVFilterContext *ctx)
+{
+    SetParamsContext *s = ctx->priv;
+
+    s->color_range = -1;/* set range mode to auto */
+    s->color_primaries = -1;
+    s->color_trc       = -1;
+    s->colorspace      = -1;
+    return 0;
+}
+
+AVFilter ff_vf_setfield = {
+    .name        = "setfield",
+    .description = NULL_IF_CONFIG_SMALL("Force field for the output video frame."),
+    .priv_size   = sizeof(SetParamsContext),
+    .init        = init_setfield,
+    .priv_class  = &setfield_class,
+    .inputs      = inputs,
+    .outputs     = outputs,
+};
+#endif /* CONFIG_SETFIELD_FILTER */
diff --git a/libavfilter/vf_showinfo.c b/libavfilter/vf_showinfo.c
index d1d1415c0b791..e41c3309a08f9 100644
--- a/libavfilter/vf_showinfo.c
+++ b/libavfilter/vf_showinfo.c
@@ -28,15 +28,32 @@
 #include "libavutil/display.h"
 #include "libavutil/imgutils.h"
 #include "libavutil/internal.h"
+#include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
 #include "libavutil/spherical.h"
 #include "libavutil/stereo3d.h"
 #include "libavutil/timestamp.h"
+#include "libavutil/timecode.h"
 
 #include "avfilter.h"
 #include "internal.h"
 #include "video.h"
 
+typedef struct ShowInfoContext {
+    const AVClass *class;
+    int calculate_checksums;
+} ShowInfoContext;
+
+#define OFFSET(x) offsetof(ShowInfoContext, x)
+#define VF AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption showinfo_options[] = {
+    { "checksum", "calculate checksums", OFFSET(calculate_checksums), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, VF },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(showinfo);
+
 static void dump_spherical(AVFilterContext *ctx, AVFrame *frame, AVFrameSideData *sd)
 {
     AVSphericalMapping *spherical = (AVSphericalMapping *)sd->data;
@@ -94,6 +111,39 @@ static void dump_stereo3d(AVFilterContext *ctx, AVFrameSideData *sd)
         av_log(ctx, AV_LOG_INFO, " (inverted)");
 }
 
+static void dump_color_property(AVFilterContext *ctx, AVFrame *frame)
+{
+    const char *color_range_str     = av_color_range_name(frame->color_range);
+    const char *colorspace_str      = av_color_space_name(frame->colorspace);
+    const char *color_primaries_str = av_color_primaries_name(frame->color_primaries);
+    const char *color_trc_str       = av_color_transfer_name(frame->color_trc);
+
+    if (!color_range_str || frame->color_range == AVCOL_RANGE_UNSPECIFIED) {
+        av_log(ctx, AV_LOG_INFO, "color_range:unknown");
+    } else {
+        av_log(ctx, AV_LOG_INFO, "color_range:%s", color_range_str);
+    }
+
+    if (!colorspace_str || frame->colorspace == AVCOL_SPC_UNSPECIFIED) {
+        av_log(ctx, AV_LOG_INFO, " color_space:unknown");
+    } else {
+        av_log(ctx, AV_LOG_INFO, " color_space:%s", colorspace_str);
+    }
+
+    if (!color_primaries_str || frame->color_primaries == AVCOL_PRI_UNSPECIFIED) {
+        av_log(ctx, AV_LOG_INFO, " color_primaries:unknown");
+    } else {
+        av_log(ctx, AV_LOG_INFO, " color_primaries:%s", color_primaries_str);
+    }
+
+    if (!color_trc_str || frame->color_trc == AVCOL_TRC_UNSPECIFIED) {
+        av_log(ctx, AV_LOG_INFO, " color_trc:unknown");
+    } else {
+        av_log(ctx, AV_LOG_INFO, " color_trc:%s", color_trc_str);
+    }
+    av_log(ctx, AV_LOG_INFO, "\n");
+}
+
 static void update_sample_stats(const uint8_t *src, int len, int64_t *sum, int64_t *sum2)
 {
     int i;
@@ -107,13 +157,14 @@ static void update_sample_stats(const uint8_t *src, int len, int64_t *sum, int64
 static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
 {
     AVFilterContext *ctx = inlink->dst;
+    ShowInfoContext *s = ctx->priv;
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
     uint32_t plane_checksum[4] = {0}, checksum = 0;
     int64_t sum[4] = {0}, sum2[4] = {0};
     int32_t pixelcount[4] = {0};
     int i, plane, vsub = desc->log2_chroma_h;
 
-    for (plane = 0; plane < 4 && frame->data[plane] && frame->linesize[plane]; plane++) {
+    for (plane = 0; plane < 4 && s->calculate_checksums && frame->data[plane] && frame->linesize[plane]; plane++) {
         uint8_t *data = frame->data[plane];
         int h = plane == 1 || plane == 2 ? AV_CEIL_RSHIFT(inlink->h, vsub) : inlink->h;
         int linesize = av_image_get_linesize(frame->format, frame->width, plane);
@@ -133,8 +184,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
 
     av_log(ctx, AV_LOG_INFO,
            "n:%4"PRId64" pts:%7s pts_time:%-7s pos:%9"PRId64" "
-           "fmt:%s sar:%d/%d s:%dx%d i:%c iskey:%d type:%c "
-           "checksum:%08"PRIX32" plane_checksum:[%08"PRIX32,
+           "fmt:%s sar:%d/%d s:%dx%d i:%c iskey:%d type:%c ",
            inlink->frame_count_out,
            av_ts2str(frame->pts), av_ts2timestr(frame->pts, &inlink->time_base), frame->pkt_pos,
            desc->name,
@@ -143,19 +193,25 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
            !frame->interlaced_frame ? 'P' :         /* Progressive  */
            frame->top_field_first   ? 'T' : 'B',    /* Top / Bottom */
            frame->key_frame,
-           av_get_picture_type_char(frame->pict_type),
-           checksum, plane_checksum[0]);
-
-    for (plane = 1; plane < 4 && frame->data[plane] && frame->linesize[plane]; plane++)
-        av_log(ctx, AV_LOG_INFO, " %08"PRIX32, plane_checksum[plane]);
-    av_log(ctx, AV_LOG_INFO, "] mean:[");
-    for (plane = 0; plane < 4 && frame->data[plane] && frame->linesize[plane]; plane++)
-        av_log(ctx, AV_LOG_INFO, "%"PRId64" ", (sum[plane] + pixelcount[plane]/2) / pixelcount[plane]);
-    av_log(ctx, AV_LOG_INFO, "\b] stdev:[");
-    for (plane = 0; plane < 4 && frame->data[plane] && frame->linesize[plane]; plane++)
-        av_log(ctx, AV_LOG_INFO, "%3.1f ",
-               sqrt((sum2[plane] - sum[plane]*(double)sum[plane]/pixelcount[plane])/pixelcount[plane]));
-    av_log(ctx, AV_LOG_INFO, "\b]\n");
+           av_get_picture_type_char(frame->pict_type));
+
+    if (s->calculate_checksums) {
+        av_log(ctx, AV_LOG_INFO,
+               "checksum:%08"PRIX32" plane_checksum:[%08"PRIX32,
+               checksum, plane_checksum[0]);
+
+        for (plane = 1; plane < 4 && frame->data[plane] && frame->linesize[plane]; plane++)
+            av_log(ctx, AV_LOG_INFO, " %08"PRIX32, plane_checksum[plane]);
+        av_log(ctx, AV_LOG_INFO, "] mean:[");
+        for (plane = 0; plane < 4 && frame->data[plane] && frame->linesize[plane]; plane++)
+            av_log(ctx, AV_LOG_INFO, "%"PRId64" ", (sum[plane] + pixelcount[plane]/2) / pixelcount[plane]);
+        av_log(ctx, AV_LOG_INFO, "\b] stdev:[");
+        for (plane = 0; plane < 4 && frame->data[plane] && frame->linesize[plane]; plane++)
+            av_log(ctx, AV_LOG_INFO, "%3.1f ",
+                   sqrt((sum2[plane] - sum[plane]*(double)sum[plane]/pixelcount[plane])/pixelcount[plane]));
+        av_log(ctx, AV_LOG_INFO, "\b]");
+    }
+    av_log(ctx, AV_LOG_INFO, "\n");
 
     for (i = 0; i < frame->nb_side_data; i++) {
         AVFrameSideData *sd = frame->side_data[i];
@@ -174,6 +230,15 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
         case AV_FRAME_DATA_STEREO3D:
             dump_stereo3d(ctx, sd);
             break;
+        case AV_FRAME_DATA_S12M_TIMECODE: {
+            uint32_t *tc = (uint32_t*)sd->data;
+            for (int j = 1; j <= tc[0]; j++) {
+                char tcbuf[AV_TIMECODE_STR_SIZE];
+                av_timecode_make_smpte_tc_string(tcbuf, tc[j], 0);
+                av_log(ctx, AV_LOG_INFO, "timecode - %s%s", tcbuf, j != tc[0] ? ", " : "");
+            }
+            break;
+        }
         case AV_FRAME_DATA_DISPLAYMATRIX:
             av_log(ctx, AV_LOG_INFO, "displaymatrix: rotation of %.2f degrees",
                    av_display_rotation_get((int32_t *)sd->data));
@@ -190,6 +255,8 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
         av_log(ctx, AV_LOG_INFO, "\n");
     }
 
+    dump_color_property(ctx, frame);
+
     return ff_filter_frame(inlink->dst->outputs[0], frame);
 }
 
@@ -240,4 +307,6 @@ AVFilter ff_vf_showinfo = {
     .description = NULL_IF_CONFIG_SMALL("Show textual information for each video frame."),
     .inputs      = avfilter_vf_showinfo_inputs,
     .outputs     = avfilter_vf_showinfo_outputs,
+    .priv_size   = sizeof(ShowInfoContext),
+    .priv_class  = &showinfo_class,
 };
diff --git a/libavfilter/vf_signalstats.c b/libavfilter/vf_signalstats.c
index 298881bc72b90..2b8c0de4c4d58 100644
--- a/libavfilter/vf_signalstats.c
+++ b/libavfilter/vf_signalstats.c
@@ -830,7 +830,7 @@ static int filter_frame16(AVFilterLink *link, AVFrame *in)
 
             masky |= yuv;
             histy[yuv]++;
-            dify += abs(yuv - AV_RN16(prev->data[0] + pw + i * 2));
+            dify += abs(yuv - (int)AV_RN16(prev->data[0] + pw + i * 2));
         }
         w  += in->linesize[0];
         pw += prev->linesize[0];
@@ -848,9 +848,9 @@ static int filter_frame16(AVFilterLink *link, AVFrame *in)
             masku |= yuvu;
             maskv |= yuvv;
             histu[yuvu]++;
-            difu += abs(yuvu - AV_RN16(prev->data[1] + cpw + i * 2));
+            difu += abs(yuvu - (int)AV_RN16(prev->data[1] + cpw + i * 2));
             histv[yuvv]++;
-            difv += abs(yuvv - AV_RN16(prev->data[2] + cpw + i * 2));
+            difv += abs(yuvv - (int)AV_RN16(prev->data[2] + cpw + i * 2));
 
             histsat[p_sat[i]]++;
             histhue[((int16_t*)p_hue)[i]]++;
diff --git a/libavfilter/vf_stack.c b/libavfilter/vf_stack.c
index b2b8c68041d21..8731674aa71b4 100644
--- a/libavfilter/vf_stack.c
+++ b/libavfilter/vf_stack.c
@@ -29,14 +29,23 @@
 #include "framesync.h"
 #include "video.h"
 
+typedef struct StackItem {
+    int x[4], y[4];
+    int linesize[4];
+    int height[4];
+} StackItem;
+
 typedef struct StackContext {
     const AVClass *class;
     const AVPixFmtDescriptor *desc;
     int nb_inputs;
+    char *layout;
     int shortest;
     int is_vertical;
+    int is_horizontal;
     int nb_planes;
 
+    StackItem *items;
     AVFrame **frames;
     FFFrameSync fs;
 } StackContext;
@@ -66,10 +75,19 @@ static av_cold int init(AVFilterContext *ctx)
     if (!strcmp(ctx->filter->name, "vstack"))
         s->is_vertical = 1;
 
+    if (!strcmp(ctx->filter->name, "hstack"))
+        s->is_horizontal = 1;
+
     s->frames = av_calloc(s->nb_inputs, sizeof(*s->frames));
     if (!s->frames)
         return AVERROR(ENOMEM);
 
+    if (!strcmp(ctx->filter->name, "xstack")) {
+        s->items = av_calloc(s->nb_inputs, sizeof(*s->items));
+        if (!s->items)
+            return AVERROR(ENOMEM);
+    }
+
     for (i = 0; i < s->nb_inputs; i++) {
         AVFilterPad pad = { 0 };
 
@@ -112,13 +130,15 @@ static int process_frame(FFFrameSync *fs)
         int linesize[4];
         int height[4];
 
-        if ((ret = av_image_fill_linesizes(linesize, inlink->format, inlink->w)) < 0) {
-            av_frame_free(&out);
-            return ret;
-        }
+        if (s->is_horizontal || s->is_vertical) {
+            if ((ret = av_image_fill_linesizes(linesize, inlink->format, inlink->w)) < 0) {
+                av_frame_free(&out);
+                return ret;
+            }
 
-        height[1] = height[2] = AV_CEIL_RSHIFT(inlink->h, s->desc->log2_chroma_h);
-        height[0] = height[3] = inlink->h;
+            height[1] = height[2] = AV_CEIL_RSHIFT(inlink->h, s->desc->log2_chroma_h);
+            height[0] = height[3] = inlink->h;
+        }
 
         for (p = 0; p < s->nb_planes; p++) {
             if (s->is_vertical) {
@@ -128,13 +148,21 @@ static int process_frame(FFFrameSync *fs)
                                     in[i]->linesize[p],
                                     linesize[p], height[p]);
                 offset[p] += height[p];
-            } else {
+            } else if (s->is_horizontal) {
                 av_image_copy_plane(out->data[p] + offset[p],
                                     out->linesize[p],
                                     in[i]->data[p],
                                     in[i]->linesize[p],
                                     linesize[p], height[p]);
                 offset[p] += linesize[p];
+            } else {
+                StackItem *item = &s->items[i];
+
+                av_image_copy_plane(out->data[p] + out->linesize[p] * item->y[p] + item->x[p],
+                                    out->linesize[p],
+                                    in[i]->data[p],
+                                    in[i]->linesize[p],
+                                    item->linesize[p], item->height[p]);
             }
         }
     }
@@ -154,6 +182,10 @@ static int config_output(AVFilterLink *outlink)
     FFFrameSyncIn *in;
     int i, ret;
 
+    s->desc = av_pix_fmt_desc_get(outlink->format);
+    if (!s->desc)
+        return AVERROR_BUG;
+
     if (s->is_vertical) {
         for (i = 1; i < s->nb_inputs; i++) {
             if (ctx->inputs[i]->w != width) {
@@ -162,7 +194,7 @@ static int config_output(AVFilterLink *outlink)
             }
             height += ctx->inputs[i]->h;
         }
-    } else {
+    } else if (s->is_horizontal) {
         for (i = 1; i < s->nb_inputs; i++) {
             if (ctx->inputs[i]->h != height) {
                 av_log(ctx, AV_LOG_ERROR, "Input %d height %d does not match input %d height %d.\n", i, ctx->inputs[i]->h, 0, height);
@@ -170,11 +202,81 @@ static int config_output(AVFilterLink *outlink)
             }
             width += ctx->inputs[i]->w;
         }
+    } else {
+        char *arg, *p = s->layout, *saveptr = NULL;
+        char *arg2, *p2, *saveptr2 = NULL;
+        char *arg3, *p3, *saveptr3 = NULL;
+        int inw, inh, size;
+
+        for (i = 0; i < s->nb_inputs; i++) {
+            AVFilterLink *inlink = ctx->inputs[i];
+            StackItem *item = &s->items[i];
+
+            if (!(arg = av_strtok(p, "|", &saveptr)))
+                return AVERROR(EINVAL);
+
+            p = NULL;
+
+            if ((ret = av_image_fill_linesizes(item->linesize, inlink->format, inlink->w)) < 0) {
+                return ret;
+            }
+
+            item->height[1] = item->height[2] = AV_CEIL_RSHIFT(inlink->h, s->desc->log2_chroma_h);
+            item->height[0] = item->height[3] = inlink->h;
+
+            p2 = arg;
+            inw = inh = 0;
+
+            for (int j = 0; j < 2; j++) {
+                if (!(arg2 = av_strtok(p2, "_", &saveptr2)))
+                    return AVERROR(EINVAL);
+
+                p2 = NULL;
+                p3 = arg2;
+                while ((arg3 = av_strtok(p3, "+", &saveptr3))) {
+                    p3 = NULL;
+                    if (sscanf(arg3, "w%d", &size) == 1) {
+                        if (size == i || size < 0 || size >= s->nb_inputs)
+                            return AVERROR(EINVAL);
+
+                        if (!j)
+                            inw += ctx->inputs[size]->w;
+                        else
+                            inh += ctx->inputs[size]->w;
+                    } else if (sscanf(arg3, "h%d", &size) == 1) {
+                        if (size == i || size < 0 || size >= s->nb_inputs)
+                            return AVERROR(EINVAL);
+
+                        if (!j)
+                            inw += ctx->inputs[size]->h;
+                        else
+                            inh += ctx->inputs[size]->h;
+                    } else if (sscanf(arg3, "%d", &size) == 1) {
+                        if (size < 0)
+                            return AVERROR(EINVAL);
+
+                        if (!j)
+                            inw += size;
+                        else
+                            inh += size;
+                    } else {
+                        return AVERROR(EINVAL);
+                    }
+                }
+            }
+
+            if ((ret = av_image_fill_linesizes(item->x, inlink->format, inw)) < 0) {
+                return ret;
+            }
+
+            item->y[1] = item->y[2] = AV_CEIL_RSHIFT(inh, s->desc->log2_chroma_h);
+            item->y[0] = item->y[3] = inh;
+
+            width  = FFMAX(width,  inlink->w + inw);
+            height = FFMAX(height, inlink->h + inh);
+        }
     }
 
-    s->desc = av_pix_fmt_desc_get(outlink->format);
-    if (!s->desc)
-        return AVERROR_BUG;
     s->nb_planes = av_pix_fmt_count_planes(outlink->format);
 
     outlink->w          = width;
@@ -209,6 +311,7 @@ static av_cold void uninit(AVFilterContext *ctx)
 
     ff_framesync_uninit(&s->fs);
     av_freep(&s->frames);
+    av_freep(&s->items);
 
     for (i = 0; i < ctx->nb_inputs; i++)
         av_freep(&ctx->input_pads[i].name);
@@ -276,3 +379,29 @@ AVFilter ff_vf_vstack = {
 };
 
 #endif /* CONFIG_VSTACK_FILTER */
+
+#if CONFIG_XSTACK_FILTER
+
+static const AVOption xstack_options[] = {
+    { "inputs", "set number of inputs", OFFSET(nb_inputs), AV_OPT_TYPE_INT, {.i64=2}, 2, INT_MAX, .flags = FLAGS },
+    { "layout", "set custom layout", OFFSET(layout), AV_OPT_TYPE_STRING, {.str="0_0|w0_0"}, 0, 0, .flags = FLAGS },
+    { "shortest", "force termination when the shortest input terminates", OFFSET(shortest), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, .flags = FLAGS },
+    { NULL },
+};
+
+AVFILTER_DEFINE_CLASS(xstack);
+
+AVFilter ff_vf_xstack = {
+    .name          = "xstack",
+    .description   = NULL_IF_CONFIG_SMALL("Stack video inputs into custom layout."),
+    .priv_size     = sizeof(StackContext),
+    .priv_class    = &xstack_class,
+    .query_formats = query_formats,
+    .outputs       = outputs,
+    .init          = init,
+    .uninit        = uninit,
+    .activate      = activate,
+    .flags         = AVFILTER_FLAG_DYNAMIC_INPUTS,
+};
+
+#endif /* CONFIG_XSTACK_FILTER */
diff --git a/libavfilter/vf_thumbnail_cuda.c b/libavfilter/vf_thumbnail_cuda.c
index 09377ca7f400c..0c06815643756 100644
--- a/libavfilter/vf_thumbnail_cuda.c
+++ b/libavfilter/vf_thumbnail_cuda.c
@@ -20,16 +20,17 @@
 * DEALINGS IN THE SOFTWARE.
 */
 
-#include <cuda.h>
-
 #include "libavutil/hwcontext.h"
 #include "libavutil/hwcontext_cuda_internal.h"
+#include "libavutil/cuda_check.h"
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
 
 #include "avfilter.h"
 #include "internal.h"
 
+#define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x)
+
 #define HIST_SIZE (3*256)
 #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
 #define BLOCKX 32
@@ -57,6 +58,7 @@ typedef struct ThumbnailCudaContext {
     AVRational tb;              ///< copy of the input timebase to ease access
 
     AVBufferRef *hw_frames_ctx;
+    AVCUDADeviceContext *hwctx;
 
     CUmodule    cu_module;
 
@@ -64,12 +66,10 @@ typedef struct ThumbnailCudaContext {
     CUfunction  cu_func_uchar2;
     CUfunction  cu_func_ushort;
     CUfunction  cu_func_ushort2;
-    CUtexref    cu_tex_uchar;
-    CUtexref    cu_tex_uchar2;
-    CUtexref    cu_tex_ushort;
-    CUtexref    cu_tex_ushort2;
+    CUstream    cu_stream;
 
     CUdeviceptr data;
+
 } ThumbnailCudaContext;
 
 #define OFFSET(x) offsetof(ThumbnailCudaContext, x)
@@ -154,27 +154,44 @@ static AVFrame *get_best_frame(AVFilterContext *ctx)
     return picref;
 }
 
-static int thumbnail_kernel(ThumbnailCudaContext *s, CUfunction func, CUtexref tex, int channels,
+static int thumbnail_kernel(AVFilterContext *ctx, CUfunction func, int channels,
     int *histogram, uint8_t *src_dptr, int src_width, int src_height, int src_pitch, int pixel_size)
 {
-    CUdeviceptr src_devptr = (CUdeviceptr)src_dptr;
-    void *args[] = { &histogram, &src_width, &src_height };
-    CUDA_ARRAY_DESCRIPTOR desc;
-
-    desc.Width = src_width;
-    desc.Height = src_height;
-    desc.NumChannels = channels;
-    if (pixel_size == 1) {
-        desc.Format = CU_AD_FORMAT_UNSIGNED_INT8;
-    }
-    else {
-        desc.Format = CU_AD_FORMAT_UNSIGNED_INT16;
-    }
+    int ret;
+    ThumbnailCudaContext *s = ctx->priv;
+    CudaFunctions *cu = s->hwctx->internal->cuda_dl;
+    CUtexObject tex = 0;
+    void *args[] = { &tex, &histogram, &src_width, &src_height };
 
-    cuTexRefSetAddress2D_v3(tex, &desc, src_devptr, src_pitch);
-    cuLaunchKernel(func, DIV_UP(src_width, BLOCKX), DIV_UP(src_height, BLOCKY), 1, BLOCKX, BLOCKY, 1, 0, 0, args, NULL);
+    CUDA_TEXTURE_DESC tex_desc = {
+        .filterMode = CU_TR_FILTER_MODE_LINEAR,
+        .flags = CU_TRSF_READ_AS_INTEGER,
+    };
 
-    return 0;
+    CUDA_RESOURCE_DESC res_desc = {
+        .resType = CU_RESOURCE_TYPE_PITCH2D,
+        .res.pitch2D.format = pixel_size == 1 ?
+                              CU_AD_FORMAT_UNSIGNED_INT8 :
+                              CU_AD_FORMAT_UNSIGNED_INT16,
+        .res.pitch2D.numChannels = channels,
+        .res.pitch2D.width = src_width,
+        .res.pitch2D.height = src_height,
+        .res.pitch2D.pitchInBytes = src_pitch,
+        .res.pitch2D.devPtr = (CUdeviceptr)src_dptr,
+    };
+
+    ret = CHECK_CU(cu->cuTexObjectCreate(&tex, &res_desc, &tex_desc, NULL));
+    if (ret < 0)
+        goto exit;
+
+    ret = CHECK_CU(cu->cuLaunchKernel(func,
+                                      DIV_UP(src_width, BLOCKX), DIV_UP(src_height, BLOCKY), 1,
+                                      BLOCKX, BLOCKY, 1, 0, s->cu_stream, args, NULL));
+exit:
+    if (tex)
+        CHECK_CU(cu->cuTexObjectDestroy(tex));
+
+    return ret;
 }
 
 static int thumbnail(AVFilterContext *ctx, int *histogram, AVFrame *in)
@@ -184,40 +201,40 @@ static int thumbnail(AVFilterContext *ctx, int *histogram, AVFrame *in)
 
     switch (in_frames_ctx->sw_format) {
     case AV_PIX_FMT_NV12:
-        thumbnail_kernel(s, s->cu_func_uchar, s->cu_tex_uchar, 1,
+        thumbnail_kernel(ctx, s->cu_func_uchar, 1,
             histogram, in->data[0], in->width, in->height, in->linesize[0], 1);
-        thumbnail_kernel(s, s->cu_func_uchar2, s->cu_tex_uchar2, 2,
+        thumbnail_kernel(ctx, s->cu_func_uchar2, 2,
             histogram + 256, in->data[1], in->width / 2, in->height / 2, in->linesize[1], 1);
         break;
     case AV_PIX_FMT_YUV420P:
-        thumbnail_kernel(s, s->cu_func_uchar, s->cu_tex_uchar, 1,
+        thumbnail_kernel(ctx, s->cu_func_uchar, 1,
             histogram, in->data[0], in->width, in->height, in->linesize[0], 1);
-        thumbnail_kernel(s, s->cu_func_uchar, s->cu_tex_uchar, 1,
+        thumbnail_kernel(ctx, s->cu_func_uchar, 1,
             histogram + 256, in->data[1], in->width / 2, in->height / 2, in->linesize[1], 1);
-        thumbnail_kernel(s, s->cu_func_uchar, s->cu_tex_uchar, 1,
+        thumbnail_kernel(ctx, s->cu_func_uchar, 1,
             histogram + 512, in->data[2], in->width / 2, in->height / 2, in->linesize[2], 1);
         break;
     case AV_PIX_FMT_YUV444P:
-        thumbnail_kernel(s, s->cu_func_uchar, s->cu_tex_uchar, 1,
+        thumbnail_kernel(ctx, s->cu_func_uchar, 1,
             histogram, in->data[0], in->width, in->height, in->linesize[0], 1);
-        thumbnail_kernel(s, s->cu_func_uchar, s->cu_tex_uchar, 1,
+        thumbnail_kernel(ctx, s->cu_func_uchar, 1,
             histogram + 256, in->data[1], in->width, in->height, in->linesize[1], 1);
-        thumbnail_kernel(s, s->cu_func_uchar, s->cu_tex_uchar, 1,
+        thumbnail_kernel(ctx, s->cu_func_uchar, 1,
             histogram + 512, in->data[2], in->width, in->height, in->linesize[2], 1);
         break;
     case AV_PIX_FMT_P010LE:
     case AV_PIX_FMT_P016LE:
-        thumbnail_kernel(s, s->cu_func_ushort, s->cu_tex_ushort, 1,
+        thumbnail_kernel(ctx, s->cu_func_ushort, 1,
             histogram, in->data[0], in->width, in->height, in->linesize[0], 2);
-        thumbnail_kernel(s, s->cu_func_ushort2, s->cu_tex_ushort2, 2,
+        thumbnail_kernel(ctx, s->cu_func_ushort2, 2,
             histogram + 256, in->data[1], in->width / 2, in->height / 2, in->linesize[1], 2);
         break;
     case AV_PIX_FMT_YUV444P16:
-        thumbnail_kernel(s, s->cu_func_ushort2, s->cu_tex_uchar, 1,
+        thumbnail_kernel(ctx, s->cu_func_ushort2, 1,
             histogram, in->data[0], in->width, in->height, in->linesize[0], 2);
-        thumbnail_kernel(s, s->cu_func_ushort2, s->cu_tex_uchar, 1,
+        thumbnail_kernel(ctx, s->cu_func_ushort2, 1,
             histogram + 256, in->data[1], in->width, in->height, in->linesize[1], 2);
-        thumbnail_kernel(s, s->cu_func_ushort2, s->cu_tex_uchar, 1,
+        thumbnail_kernel(ctx, s->cu_func_ushort2, 1,
             histogram + 512, in->data[2], in->width, in->height, in->linesize[2], 2);
         break;
     default:
@@ -231,11 +248,10 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
 {
     AVFilterContext *ctx  = inlink->dst;
     ThumbnailCudaContext *s   = ctx->priv;
+    CudaFunctions *cu = s->hwctx->internal->cuda_dl;
     AVFilterLink *outlink = ctx->outputs[0];
     int *hist = s->frames[s->n].histogram;
     AVHWFramesContext *hw_frames_ctx = (AVHWFramesContext*)s->hw_frames_ctx->data;
-    AVCUDADeviceContext *device_hwctx = hw_frames_ctx->device_ctx->hwctx;
-    CUresult err;
     CUcontext dummy;
     CUDA_MEMCPY2D cpy = { 0 };
     int ret = 0;
@@ -243,11 +259,11 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
     // keep a reference of each frame
     s->frames[s->n].buf = frame;
 
-    err = cuCtxPushCurrent(device_hwctx->cuda_ctx);
-    if (err != CUDA_SUCCESS)
-        return AVERROR_UNKNOWN;
+    ret = CHECK_CU(cu->cuCtxPushCurrent(s->hwctx->cuda_ctx));
+    if (ret < 0)
+        return ret;
 
-    cuMemsetD8(s->data, 0, HIST_SIZE * sizeof(int));
+    CHECK_CU(cu->cuMemsetD8Async(s->data, 0, HIST_SIZE * sizeof(int), s->cu_stream));
 
     thumbnail(ctx, (int*)s->data, frame);
 
@@ -260,11 +276,9 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
     cpy.WidthInBytes = HIST_SIZE * sizeof(int);
     cpy.Height = 1;
 
-    err = cuMemcpy2D(&cpy);
-    if (err != CUDA_SUCCESS) {
-        av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
-        return AVERROR_UNKNOWN;
-    }
+    ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, s->cu_stream));
+    if (ret < 0)
+        return ret;
 
     if (hw_frames_ctx->sw_format == AV_PIX_FMT_NV12 || hw_frames_ctx->sw_format == AV_PIX_FMT_YUV420P ||
         hw_frames_ctx->sw_format == AV_PIX_FMT_P010LE || hw_frames_ctx->sw_format == AV_PIX_FMT_P016LE)
@@ -274,7 +288,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
             hist[i] = 4 * hist[i];
     }
 
-    cuCtxPopCurrent(&dummy);
+    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
     if (ret < 0)
         return ret;
 
@@ -290,14 +304,15 @@ static av_cold void uninit(AVFilterContext *ctx)
 {
     int i;
     ThumbnailCudaContext *s = ctx->priv;
+    CudaFunctions *cu = s->hwctx->internal->cuda_dl;
 
     if (s->data) {
-        cuMemFree(s->data);
+        CHECK_CU(cu->cuMemFree(s->data));
         s->data = 0;
     }
 
     if (s->cu_module) {
-        cuModuleUnload(s->cu_module);
+        CHECK_CU(cu->cuModuleUnload(s->cu_module));
         s->cu_module = NULL;
     }
 
@@ -340,49 +355,43 @@ static int config_props(AVFilterLink *inlink)
     AVHWFramesContext     *hw_frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
     AVCUDADeviceContext *device_hwctx = hw_frames_ctx->device_ctx->hwctx;
     CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
-    CUresult err;
+    CudaFunctions *cu = device_hwctx->internal->cuda_dl;
+    int ret;
 
     extern char vf_thumbnail_cuda_ptx[];
 
-    err = cuCtxPushCurrent(cuda_ctx);
-    if (err != CUDA_SUCCESS) {
-        av_log(ctx, AV_LOG_ERROR, "Error pushing cuda context\n");
-        return AVERROR_UNKNOWN;
-    }
+    s->hwctx = device_hwctx;
+    s->cu_stream = s->hwctx->stream;
 
-    err = cuModuleLoadData(&s->cu_module, vf_thumbnail_cuda_ptx);
-    if (err != CUDA_SUCCESS) {
-        av_log(ctx, AV_LOG_ERROR, "Error loading module data\n");
-        return AVERROR_UNKNOWN;
-    }
+    ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx));
+    if (ret < 0)
+        return ret;
 
-    cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Thumbnail_uchar");
-    cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Thumbnail_uchar2");
-    cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Thumbnail_ushort");
-    cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Thumbnail_ushort2");
-
-    cuModuleGetTexRef(&s->cu_tex_uchar, s->cu_module, "uchar_tex");
-    cuModuleGetTexRef(&s->cu_tex_uchar2, s->cu_module, "uchar2_tex");
-    cuModuleGetTexRef(&s->cu_tex_ushort, s->cu_module, "ushort_tex");
-    cuModuleGetTexRef(&s->cu_tex_ushort2, s->cu_module, "ushort2_tex");
-
-    cuTexRefSetFlags(s->cu_tex_uchar, CU_TRSF_READ_AS_INTEGER);
-    cuTexRefSetFlags(s->cu_tex_uchar2, CU_TRSF_READ_AS_INTEGER);
-    cuTexRefSetFlags(s->cu_tex_ushort, CU_TRSF_READ_AS_INTEGER);
-    cuTexRefSetFlags(s->cu_tex_ushort2, CU_TRSF_READ_AS_INTEGER);
-
-    cuTexRefSetFilterMode(s->cu_tex_uchar, CU_TR_FILTER_MODE_LINEAR);
-    cuTexRefSetFilterMode(s->cu_tex_uchar2, CU_TR_FILTER_MODE_LINEAR);
-    cuTexRefSetFilterMode(s->cu_tex_ushort, CU_TR_FILTER_MODE_LINEAR);
-    cuTexRefSetFilterMode(s->cu_tex_ushort2, CU_TR_FILTER_MODE_LINEAR);
-
-    err = cuMemAlloc(&s->data, HIST_SIZE * sizeof(int));
-    if (err != CUDA_SUCCESS) {
-        av_log(ctx, AV_LOG_ERROR, "Error allocating cuda memory\n");
-        return AVERROR_UNKNOWN;
-    }
+    ret = CHECK_CU(cu->cuModuleLoadData(&s->cu_module, vf_thumbnail_cuda_ptx));
+    if (ret < 0)
+        return ret;
+
+    ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Thumbnail_uchar"));
+    if (ret < 0)
+        return ret;
+
+    ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Thumbnail_uchar2"));
+    if (ret < 0)
+        return ret;
+
+    ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Thumbnail_ushort"));
+    if (ret < 0)
+        return ret;
+
+    ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Thumbnail_ushort2"));
+    if (ret < 0)
+        return ret;
+
+    ret = CHECK_CU(cu->cuMemAlloc(&s->data, HIST_SIZE * sizeof(int)));
+    if (ret < 0)
+        return ret;
 
-    cuCtxPopCurrent(&dummy);
+    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
 
     s->hw_frames_ctx = ctx->inputs[0]->hw_frames_ctx;
 
diff --git a/libavfilter/vf_thumbnail_cuda.cu b/libavfilter/vf_thumbnail_cuda.cu
index 98fad4303a74a..c73e49fbc6904 100644
--- a/libavfilter/vf_thumbnail_cuda.cu
+++ b/libavfilter/vf_thumbnail_cuda.cu
@@ -22,55 +22,54 @@
 
 extern "C" {
 
-texture<unsigned char, 2> uchar_tex;
-texture<uchar2, 2>  uchar2_tex;
-texture<unsigned short, 2> ushort_tex;
-texture<ushort2, 2>  ushort2_tex;
-
-__global__ void Thumbnail_uchar(int *histogram, int src_width, int src_height)
+__global__ void Thumbnail_uchar(cudaTextureObject_t uchar_tex,
+                                int *histogram, int src_width, int src_height)
 {
     int x = blockIdx.x * blockDim.x + threadIdx.x;
     int y = blockIdx.y * blockDim.y + threadIdx.y;
     if (y < src_height && x < src_width)
     {
-        unsigned char pixel = tex2D(uchar_tex, x, y);
+        unsigned char pixel = tex2D<unsigned char>(uchar_tex, x, y);
         atomicAdd(&histogram[pixel], 1);
     }
 }
 
-__global__ void Thumbnail_uchar2(int *histogram, int src_width, int src_height)
+__global__ void Thumbnail_uchar2(cudaTextureObject_t uchar2_tex,
+                                 int *histogram, int src_width, int src_height)
 {
     int x = blockIdx.x * blockDim.x + threadIdx.x;
     int y = blockIdx.y * blockDim.y + threadIdx.y;
 
     if (y < src_height && x < src_width)
     {
-        uchar2 pixel = tex2D(uchar2_tex, x, y);
+        uchar2 pixel = tex2D<uchar2>(uchar2_tex, x, y);
         atomicAdd(&histogram[pixel.x], 1);
         atomicAdd(&histogram[256 + pixel.y], 1);
     }
 }
 
-__global__ void Thumbnail_ushort(int *histogram, int src_width, int src_height)
+__global__ void Thumbnail_ushort(cudaTextureObject_t ushort_tex,
+                                 int *histogram, int src_width, int src_height)
 {
     int x = blockIdx.x * blockDim.x + threadIdx.x;
     int y = blockIdx.y * blockDim.y + threadIdx.y;
 
     if (y < src_height && x < src_width)
     {
-        unsigned short pixel = (tex2D(ushort_tex, x, y) + 128) >> 8;
+        unsigned short pixel = (tex2D<unsigned short>(ushort_tex, x, y) + 128) >> 8;
         atomicAdd(&histogram[pixel], 1);
     }
 }
 
-__global__ void Thumbnail_ushort2(int *histogram, int src_width, int src_height)
+__global__ void Thumbnail_ushort2(cudaTextureObject_t ushort2_tex,
+                                  int *histogram, int src_width, int src_height)
 {
     int x = blockIdx.x * blockDim.x + threadIdx.x;
     int y = blockIdx.y * blockDim.y + threadIdx.y;
 
     if (y < src_height && x < src_width)
     {
-        ushort2 pixel = tex2D(ushort2_tex, x, y);
+        ushort2 pixel = tex2D<ushort2>(ushort2_tex, x, y);
         atomicAdd(&histogram[(pixel.x + 128) >> 8], 1);
         atomicAdd(&histogram[256 + (pixel.y + 128) >> 8], 1);
     }
diff --git a/libavfilter/vf_tonemap.c b/libavfilter/vf_tonemap.c
index 98a2c4bd23ca4..efd4af5466c0f 100644
--- a/libavfilter/vf_tonemap.c
+++ b/libavfilter/vf_tonemap.c
@@ -287,13 +287,7 @@ static const AVOption tonemap_options[] = {
     { NULL }
 };
 
-static const AVClass tonemap_class = {
-    .class_name       = "tonemap",
-    .item_name        = av_default_item_name,
-    .option           = tonemap_options,
-    .version          = LIBAVUTIL_VERSION_INT,
-    .category         = AV_CLASS_CATEGORY_FILTER,
-};
+AVFILTER_DEFINE_CLASS(tonemap);
 
 static const AVFilterPad tonemap_inputs[] = {
     {
diff --git a/libavfilter/vf_tonemap_opencl.c b/libavfilter/vf_tonemap_opencl.c
index cd293c2522395..ae3f98d817007 100644
--- a/libavfilter/vf_tonemap_opencl.c
+++ b/libavfilter/vf_tonemap_opencl.c
@@ -98,12 +98,12 @@ static const struct LumaCoefficients luma_coefficients[AVCOL_SPC_NB] = {
     [AVCOL_SPC_BT2020_NCL] = { 0.2627, 0.6780, 0.0593 },
 };
 
-static struct PrimaryCoefficients primaries_table[AVCOL_PRI_NB] = {
+static const struct PrimaryCoefficients primaries_table[AVCOL_PRI_NB] = {
     [AVCOL_PRI_BT709]  = { 0.640, 0.330, 0.300, 0.600, 0.150, 0.060 },
     [AVCOL_PRI_BT2020] = { 0.708, 0.292, 0.170, 0.797, 0.131, 0.046 },
 };
 
-static struct WhitepointCoefficients whitepoint_table[AVCOL_PRI_NB] = {
+static const struct WhitepointCoefficients whitepoint_table[AVCOL_PRI_NB] = {
     [AVCOL_PRI_BT709]  = { 0.3127, 0.3290 },
     [AVCOL_PRI_BT2020] = { 0.3127, 0.3290 },
 };
diff --git a/libavfilter/vf_tpad.c b/libavfilter/vf_tpad.c
new file mode 100644
index 0000000000000..86e063090bf71
--- /dev/null
+++ b/libavfilter/vf_tpad.c
@@ -0,0 +1,218 @@
+/*
+ * Copyright (c) 2018 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/opt.h"
+#include "avfilter.h"
+#include "audio.h"
+#include "filters.h"
+#include "internal.h"
+#include "formats.h"
+#include "drawutils.h"
+
+typedef struct TPadContext {
+    const AVClass *class;
+    int pad_start;
+    int pad_stop;
+    int start_mode;
+    int stop_mode;
+    int64_t start_duration;
+    int64_t stop_duration;
+    uint8_t rgba_color[4];  ///< color for the padding area
+
+    FFDrawContext draw;
+    FFDrawColor color;
+    int64_t pts;
+    int eof;
+    AVFrame *cache_start;
+    AVFrame *cache_stop;
+} TPadContext;
+
+#define OFFSET(x) offsetof(TPadContext, x)
+#define VF AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption tpad_options[] = {
+    { "start", "set the number of frames to delay input",              OFFSET(pad_start),  AV_OPT_TYPE_INT,   {.i64=0},        0,   INT_MAX, VF },
+    { "stop",  "set the number of frames to add after input finished", OFFSET(pad_stop),   AV_OPT_TYPE_INT,   {.i64=0},       -1,   INT_MAX, VF },
+    { "start_mode", "set the mode of added frames to start",           OFFSET(start_mode), AV_OPT_TYPE_INT,   {.i64=0},        0,         1, VF, "mode" },
+    { "add",   "add solid-color frames",                               0,                  AV_OPT_TYPE_CONST, {.i64=0},        0,         0, VF, "mode" },
+    { "clone", "clone first/last frame",                               0,                  AV_OPT_TYPE_CONST, {.i64=1},        0,         0, VF, "mode" },
+    { "stop_mode",  "set the mode of added frames to end",             OFFSET(stop_mode),  AV_OPT_TYPE_INT,   {.i64=0},        0,         1, VF, "mode" },
+    { "start_duration", "set the duration to delay input",             OFFSET(start_duration), AV_OPT_TYPE_DURATION, {.i64=0}, 0, INT64_MAX, VF },
+    { "stop_duration",  "set the duration to pad input",               OFFSET(stop_duration),  AV_OPT_TYPE_DURATION, {.i64=0}, 0, INT64_MAX, VF },
+    { "color", "set the color of the added frames",                    OFFSET(rgba_color), AV_OPT_TYPE_COLOR, {.str="black"},  0,         0, VF },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(tpad);
+
+static int query_formats(AVFilterContext *ctx)
+{
+    return ff_set_common_formats(ctx, ff_draw_supported_pixel_formats(0));
+}
+
+static int activate(AVFilterContext *ctx)
+{
+    AVFilterLink *inlink = ctx->inputs[0];
+    AVFilterLink *outlink = ctx->outputs[0];
+    TPadContext *s = ctx->priv;
+    AVFrame *frame = NULL;
+    int ret, status;
+    int64_t pts;
+
+    FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
+
+    if (s->start_mode == 0 && s->pad_start > 0 && ff_outlink_frame_wanted(outlink)) {
+        frame = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+        if (!frame)
+            return AVERROR(ENOMEM);
+        ff_fill_rectangle(&s->draw, &s->color,
+                          frame->data, frame->linesize,
+                          0, 0, frame->width, frame->height);
+        frame->pts = s->pts;
+        s->pts += av_rescale_q(1, av_inv_q(outlink->frame_rate), outlink->time_base);
+        s->pad_start--;
+        return ff_filter_frame(outlink, frame);
+    }
+
+    if (s->start_mode == 1 && s->pad_start > 0) {
+        if (!s->cache_start && ff_inlink_queued_frames(inlink)) {
+            s->cache_start = ff_inlink_peek_frame(inlink, 0);
+        } else if (!s->cache_start) {
+            FF_FILTER_FORWARD_WANTED(outlink, inlink);
+        }
+        frame = av_frame_clone(s->cache_start);
+        if (!frame)
+            return AVERROR(ENOMEM);
+        frame->pts = s->pts;
+        s->pts += av_rescale_q(1, av_inv_q(outlink->frame_rate), outlink->time_base);
+        s->pad_start--;
+        if (s->pad_start == 0)
+            s->cache_start = NULL;
+        return ff_filter_frame(outlink, frame);
+    }
+
+    if (!s->eof && !s->pad_start) {
+        ret = ff_inlink_consume_frame(inlink, &frame);
+        if (ret < 0)
+            return ret;
+        if (ret > 0) {
+            if (s->stop_mode == 1 && s->pad_stop != 0) {
+                av_frame_free(&s->cache_stop);
+                s->cache_stop = av_frame_clone(frame);
+            }
+            frame->pts += s->pts;
+            return ff_filter_frame(outlink, frame);
+        }
+    }
+
+    if (!s->eof && ff_inlink_acknowledge_status(inlink, &status, &pts)) {
+        if (status == AVERROR_EOF) {
+            if (!s->pad_stop) {
+                ff_outlink_set_status(outlink, status, pts);
+                return 0;
+            }
+            s->eof = 1;
+            s->pts += pts;
+        }
+    }
+
+    if (s->eof) {
+        if (!s->pad_stop) {
+            ff_outlink_set_status(outlink, AVERROR_EOF, s->pts);
+            return 0;
+        }
+        if (s->stop_mode == 0) {
+            frame = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+            if (!frame)
+                return AVERROR(ENOMEM);
+            ff_fill_rectangle(&s->draw, &s->color,
+                              frame->data, frame->linesize,
+                              0, 0, frame->width, frame->height);
+        } else if (s->stop_mode == 1) {
+            frame = av_frame_clone(s->cache_stop);
+            if (!frame)
+                return AVERROR(ENOMEM);
+        }
+        frame->pts = s->pts;
+        s->pts += av_rescale_q(1, av_inv_q(outlink->frame_rate), outlink->time_base);
+        if (s->pad_stop > 0)
+            s->pad_stop--;
+        return ff_filter_frame(outlink, frame);
+    }
+
+    if (!s->pad_start)
+        FF_FILTER_FORWARD_WANTED(outlink, inlink);
+
+    return FFERROR_NOT_READY;
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    TPadContext *s = ctx->priv;
+
+    ff_draw_init(&s->draw, inlink->format, 0);
+    ff_draw_color(&s->draw, &s->color, s->rgba_color);
+
+    if (s->start_duration)
+        s->pad_start = av_rescale_q(s->start_duration, inlink->frame_rate, av_inv_q(AV_TIME_BASE_Q));
+    if (s->stop_duration)
+        s->pad_stop = av_rescale_q(s->stop_duration, inlink->frame_rate, av_inv_q(AV_TIME_BASE_Q));
+
+    return 0;
+}
+
+static void uninit(AVFilterContext *ctx)
+{
+    TPadContext *s = ctx->priv;
+
+    av_frame_free(&s->cache_stop);
+}
+
+static const AVFilterPad tpad_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad tpad_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_tpad = {
+    .name          = "tpad",
+    .description   = NULL_IF_CONFIG_SMALL("Temporarily pad video frames."),
+    .priv_size     = sizeof(TPadContext),
+    .priv_class    = &tpad_class,
+    .query_formats = query_formats,
+    .activate      = activate,
+    .uninit        = uninit,
+    .inputs        = tpad_inputs,
+    .outputs       = tpad_outputs,
+};
diff --git a/libavfilter/vf_transpose.c b/libavfilter/vf_transpose.c
index 74a4bbcf58b47..dd54947bd96b9 100644
--- a/libavfilter/vf_transpose.c
+++ b/libavfilter/vf_transpose.c
@@ -38,19 +38,7 @@
 #include "formats.h"
 #include "internal.h"
 #include "video.h"
-
-typedef enum {
-    TRANSPOSE_PT_TYPE_NONE,
-    TRANSPOSE_PT_TYPE_LANDSCAPE,
-    TRANSPOSE_PT_TYPE_PORTRAIT,
-} PassthroughType;
-
-enum TransposeDir {
-    TRANSPOSE_CCLOCK_FLIP,
-    TRANSPOSE_CLOCK,
-    TRANSPOSE_CCLOCK,
-    TRANSPOSE_CLOCK_FLIP,
-};
+#include "transpose.h"
 
 typedef struct TransVtable {
     void (*transpose_8x8)(uint8_t *src, ptrdiff_t src_linesize,
diff --git a/libavfilter/vf_transpose_npp.c b/libavfilter/vf_transpose_npp.c
index 1b3a5c0c69178..3ea031667caf4 100644
--- a/libavfilter/vf_transpose_npp.c
+++ b/libavfilter/vf_transpose_npp.c
@@ -23,6 +23,7 @@
 #include "libavutil/common.h"
 #include "libavutil/hwcontext.h"
 #include "libavutil/hwcontext_cuda_internal.h"
+#include "libavutil/cuda_check.h"
 #include "libavutil/internal.h"
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
@@ -32,6 +33,8 @@
 #include "internal.h"
 #include "video.h"
 
+#define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, device_hwctx->internal->cuda_dl, x)
+
 static const enum AVPixelFormat supported_formats[] = {
     AV_PIX_FMT_YUV420P,
     AV_PIX_FMT_YUV444P
@@ -397,7 +400,6 @@ static int npptranspose_filter_frame(AVFilterLink *link, AVFrame *in)
     AVHWFramesContext     *frames_ctx = (AVHWFramesContext*)outlink->hw_frames_ctx->data;
     AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
     AVFrame *out = NULL;
-    CUresult err;
     CUcontext dummy;
     int ret = 0;
 
@@ -410,15 +412,13 @@ static int npptranspose_filter_frame(AVFilterLink *link, AVFrame *in)
         goto fail;
     }
 
-    err = device_hwctx->internal->cuda_dl->cuCtxPushCurrent(device_hwctx->cuda_ctx);
-    if (err != CUDA_SUCCESS) {
-        ret = AVERROR_UNKNOWN;
+    ret = CHECK_CU(device_hwctx->internal->cuda_dl->cuCtxPushCurrent(device_hwctx->cuda_ctx));
+    if (ret < 0)
         goto fail;
-    }
 
     ret = npptranspose_filter(ctx, out, in);
 
-    device_hwctx->internal->cuda_dl->cuCtxPopCurrent(&dummy);
+    CHECK_CU(device_hwctx->internal->cuda_dl->cuCtxPopCurrent(&dummy));
     if (ret < 0)
         goto fail;
 
diff --git a/libavfilter/vf_transpose_opencl.c b/libavfilter/vf_transpose_opencl.c
new file mode 100644
index 0000000000000..dd678e91cd4bb
--- /dev/null
+++ b/libavfilter/vf_transpose_opencl.c
@@ -0,0 +1,288 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include <float.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/common.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+
+#include "avfilter.h"
+#include "internal.h"
+#include "opencl.h"
+#include "opencl_source.h"
+#include "video.h"
+#include "transpose.h"
+
+typedef struct TransposeOpenCLContext {
+    OpenCLFilterContext ocf;
+    int                   initialised;
+    int passthrough;    ///< PassthroughType, landscape passthrough mode enabled
+    int dir;            ///< TransposeDir
+    cl_kernel             kernel;
+    cl_command_queue      command_queue;
+} TransposeOpenCLContext;
+
+static int transpose_opencl_init(AVFilterContext *avctx)
+{
+    TransposeOpenCLContext *ctx = avctx->priv;
+    cl_int cle;
+    int err;
+
+    err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_transpose, 1);
+    if (err < 0)
+        goto fail;
+
+    ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context,
+                                              ctx->ocf.hwctx->device_id,
+                                              0, &cle);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL "
+                     "command queue %d.\n", cle);
+
+    ctx->kernel = clCreateKernel(ctx->ocf.program, "transpose", &cle);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create kernel %d.\n", cle);
+
+
+    ctx->initialised = 1;
+    return 0;
+
+fail:
+    if (ctx->command_queue)
+        clReleaseCommandQueue(ctx->command_queue);
+    if (ctx->kernel)
+        clReleaseKernel(ctx->kernel);
+    return err;
+}
+
+static int transpose_opencl_config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *avctx = outlink->src;
+    TransposeOpenCLContext *s = avctx->priv;
+    AVFilterLink *inlink = avctx->inputs[0];
+    const AVPixFmtDescriptor *desc_in  = av_pix_fmt_desc_get(inlink->format);
+    int ret;
+
+    if ((inlink->w >= inlink->h &&
+         s->passthrough == TRANSPOSE_PT_TYPE_LANDSCAPE) ||
+        (inlink->w <= inlink->h &&
+         s->passthrough == TRANSPOSE_PT_TYPE_PORTRAIT)) {
+        if (inlink->hw_frames_ctx) {
+            outlink->hw_frames_ctx = av_buffer_ref(inlink->hw_frames_ctx);
+            if (!outlink->hw_frames_ctx)
+                return AVERROR(ENOMEM);
+        }
+        av_log(avctx, AV_LOG_VERBOSE,
+               "w:%d h:%d -> w:%d h:%d (passthrough mode)\n",
+               inlink->w, inlink->h, inlink->w, inlink->h);
+
+        return 0;
+    } else {
+        s->passthrough = TRANSPOSE_PT_TYPE_NONE;
+    }
+
+    if (desc_in->log2_chroma_w != desc_in->log2_chroma_h) {
+        av_log(avctx, AV_LOG_ERROR, "Input format %s not supported.\n",
+               desc_in->name);
+        return AVERROR(EINVAL);
+    }
+
+    s->ocf.output_width = inlink->h;
+    s->ocf.output_height = inlink->w;
+    ret = ff_opencl_filter_config_output(outlink);
+    if (ret < 0)
+        return ret;
+
+    if (inlink->sample_aspect_ratio.num)
+        outlink->sample_aspect_ratio = av_div_q((AVRational) { 1, 1 },
+                                                inlink->sample_aspect_ratio);
+    else
+        outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
+
+    av_log(avctx, AV_LOG_VERBOSE,
+           "w:%d h:%d dir:%d -> w:%d h:%d rotation:%s vflip:%d\n",
+           inlink->w, inlink->h, s->dir, outlink->w, outlink->h,
+           s->dir == 1 || s->dir == 3 ? "clockwise" : "counterclockwise",
+           s->dir == 0 || s->dir == 3);
+    return 0;
+}
+
+static AVFrame *get_video_buffer(AVFilterLink *inlink, int w, int h)
+{
+    TransposeOpenCLContext *s = inlink->dst->priv;
+
+    return s->passthrough ?
+        ff_null_get_video_buffer   (inlink, w, h) :
+        ff_default_get_video_buffer(inlink, w, h);
+}
+
+static int transpose_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
+{
+    AVFilterContext    *avctx = inlink->dst;
+    AVFilterLink     *outlink = avctx->outputs[0];
+    TransposeOpenCLContext *ctx = avctx->priv;
+    AVFrame *output = NULL;
+    size_t global_work[2];
+    cl_mem src, dst;
+    cl_int cle;
+    int err, p;
+
+    av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
+           av_get_pix_fmt_name(input->format),
+           input->width, input->height, input->pts);
+
+    if (!input->hw_frames_ctx)
+        return AVERROR(EINVAL);
+
+    if (ctx->passthrough)
+        return ff_filter_frame(outlink, input);
+
+    output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!output) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    err = av_frame_copy_props(output, input);
+    if (err < 0)
+        goto fail;
+
+    if (input->sample_aspect_ratio.num == 0) {
+        output->sample_aspect_ratio = input->sample_aspect_ratio;
+    } else {
+        output->sample_aspect_ratio.num = input->sample_aspect_ratio.den;
+        output->sample_aspect_ratio.den = input->sample_aspect_ratio.num;
+    }
+
+    if (!ctx->initialised) {
+        err = transpose_opencl_init(avctx);
+        if (err < 0)
+            goto fail;
+    }
+
+    for (p = 0; p < FF_ARRAY_ELEMS(output->data); p++) {
+        src = (cl_mem) input->data[p];
+        dst = (cl_mem) output->data[p];
+
+        if (!dst)
+            break;
+        CL_SET_KERNEL_ARG(ctx->kernel, 0, cl_mem, &dst);
+        CL_SET_KERNEL_ARG(ctx->kernel, 1, cl_mem, &src);
+        CL_SET_KERNEL_ARG(ctx->kernel, 2, cl_int, &ctx->dir);
+
+        err = ff_opencl_filter_work_size_from_image(avctx, global_work, output,
+                                                    p, 16);
+
+        cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL,
+                                     global_work, NULL,
+                                     0, NULL, NULL);
+        CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue kernel: %d.\n", cle);
+    }
+    cle = clFinish(ctx->command_queue);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to finish command queue: %d.\n", cle);
+
+    av_frame_free(&input);
+
+    av_log(ctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
+           av_get_pix_fmt_name(output->format),
+           output->width, output->height, output->pts);
+
+    return ff_filter_frame(outlink, output);
+
+fail:
+    clFinish(ctx->command_queue);
+    av_frame_free(&input);
+    av_frame_free(&output);
+    return err;
+}
+
+static av_cold void transpose_opencl_uninit(AVFilterContext *avctx)
+{
+    TransposeOpenCLContext *ctx = avctx->priv;
+    cl_int cle;
+
+    if (ctx->kernel) {
+        cle = clReleaseKernel(ctx->kernel);
+        if (cle != CL_SUCCESS)
+            av_log(avctx, AV_LOG_ERROR, "Failed to release "
+                   "kernel: %d.\n", cle);
+    }
+
+    if (ctx->command_queue) {
+        cle = clReleaseCommandQueue(ctx->command_queue);
+        if (cle != CL_SUCCESS)
+            av_log(avctx, AV_LOG_ERROR, "Failed to release "
+                   "command queue: %d.\n", cle);
+    }
+
+    ff_opencl_filter_uninit(avctx);
+}
+
+#define OFFSET(x) offsetof(TransposeOpenCLContext, x)
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
+static const AVOption transpose_opencl_options[] = {
+    { "dir", "set transpose direction", OFFSET(dir), AV_OPT_TYPE_INT, { .i64 = TRANSPOSE_CCLOCK_FLIP }, 0, 3, FLAGS, "dir" },
+        { "cclock_flip", "rotate counter-clockwise with vertical flip", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK_FLIP }, .flags=FLAGS, .unit = "dir" },
+        { "clock",       "rotate clockwise",                            0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CLOCK       }, .flags=FLAGS, .unit = "dir" },
+        { "cclock",      "rotate counter-clockwise",                    0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK      }, .flags=FLAGS, .unit = "dir" },
+        { "clock_flip",  "rotate clockwise with vertical flip",         0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CLOCK_FLIP  }, .flags=FLAGS, .unit = "dir" },
+
+    { "passthrough", "do not apply transposition if the input matches the specified geometry",
+      OFFSET(passthrough), AV_OPT_TYPE_INT, {.i64=TRANSPOSE_PT_TYPE_NONE},  0, INT_MAX, FLAGS, "passthrough" },
+        { "none",      "always apply transposition",   0, AV_OPT_TYPE_CONST, {.i64=TRANSPOSE_PT_TYPE_NONE},      INT_MIN, INT_MAX, FLAGS, "passthrough" },
+        { "portrait",  "preserve portrait geometry",   0, AV_OPT_TYPE_CONST, {.i64=TRANSPOSE_PT_TYPE_PORTRAIT},  INT_MIN, INT_MAX, FLAGS, "passthrough" },
+        { "landscape", "preserve landscape geometry",  0, AV_OPT_TYPE_CONST, {.i64=TRANSPOSE_PT_TYPE_LANDSCAPE}, INT_MIN, INT_MAX, FLAGS, "passthrough" },
+
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(transpose_opencl);
+
+static const AVFilterPad transpose_opencl_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .get_video_buffer = get_video_buffer,
+        .filter_frame = &transpose_opencl_filter_frame,
+        .config_props = &ff_opencl_filter_config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad transpose_opencl_outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = &transpose_opencl_config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_transpose_opencl = {
+    .name           = "transpose_opencl",
+    .description    = NULL_IF_CONFIG_SMALL("Transpose input video"),
+    .priv_size      = sizeof(TransposeOpenCLContext),
+    .priv_class     = &transpose_opencl_class,
+    .init           = &ff_opencl_filter_init,
+    .uninit         = &transpose_opencl_uninit,
+    .query_formats  = &ff_opencl_filter_query_formats,
+    .inputs         = transpose_opencl_inputs,
+    .outputs        = transpose_opencl_outputs,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
diff --git a/libavfilter/vf_transpose_vaapi.c b/libavfilter/vf_transpose_vaapi.c
new file mode 100644
index 0000000000000..0e2acc9983c2d
--- /dev/null
+++ b/libavfilter/vf_transpose_vaapi.c
@@ -0,0 +1,319 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include <string.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "transpose.h"
+#include "vaapi_vpp.h"
+
+typedef struct TransposeVAAPIContext {
+    VAAPIVPPContext vpp_ctx; // must be the first field
+    int passthrough;         // PassthroughType, landscape passthrough mode enabled
+    int dir;                 // TransposeDir
+
+    int rotation_state;
+    int mirror_state;
+} TransposeVAAPIContext;
+
+static int transpose_vaapi_build_filter_params(AVFilterContext *avctx)
+{
+    VAAPIVPPContext *vpp_ctx   = avctx->priv;
+    TransposeVAAPIContext *ctx = avctx->priv;
+    VAStatus vas;
+    int support_flag;
+    VAProcPipelineCaps pipeline_caps;
+
+    memset(&pipeline_caps, 0, sizeof(pipeline_caps));
+    vas = vaQueryVideoProcPipelineCaps(vpp_ctx->hwctx->display,
+                                       vpp_ctx->va_context,
+                                       NULL, 0,
+                                       &pipeline_caps);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to query pipeline "
+               "caps: %d (%s).\n", vas, vaErrorStr(vas));
+        return AVERROR(EIO);
+    }
+
+    if (!pipeline_caps.rotation_flags) {
+        av_log(avctx, AV_LOG_ERROR, "VAAPI driver doesn't support transpose\n");
+        return AVERROR(EINVAL);
+    }
+
+    switch (ctx->dir) {
+    case TRANSPOSE_CCLOCK_FLIP:
+        ctx->rotation_state = VA_ROTATION_270;
+        ctx->mirror_state   = VA_MIRROR_VERTICAL;
+        break;
+    case TRANSPOSE_CLOCK:
+        ctx->rotation_state = VA_ROTATION_90;
+        ctx->mirror_state   = VA_MIRROR_NONE;
+        break;
+    case TRANSPOSE_CCLOCK:
+        ctx->rotation_state = VA_ROTATION_270;
+        ctx->mirror_state   = VA_MIRROR_NONE;
+        break;
+    case TRANSPOSE_CLOCK_FLIP:
+        ctx->rotation_state = VA_ROTATION_90;
+        ctx->mirror_state   = VA_MIRROR_VERTICAL;
+        break;
+    case TRANSPOSE_REVERSAL:
+        ctx->rotation_state = VA_ROTATION_180;
+        ctx->mirror_state   = VA_MIRROR_NONE;
+        break;
+    case TRANSPOSE_HFLIP:
+        ctx->rotation_state = VA_ROTATION_NONE;
+        ctx->mirror_state   = VA_MIRROR_HORIZONTAL;
+        break;
+    case TRANSPOSE_VFLIP:
+        ctx->rotation_state = VA_ROTATION_NONE;
+        ctx->mirror_state   = VA_MIRROR_VERTICAL;
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "Failed to set direction to %d\n", ctx->dir);
+        return AVERROR(EINVAL);
+    }
+
+    if (VA_ROTATION_NONE != ctx->rotation_state) {
+        support_flag = pipeline_caps.rotation_flags & (1 << ctx->rotation_state);
+        if (!support_flag) {
+            av_log(avctx, AV_LOG_ERROR, "VAAPI driver doesn't support rotation %d\n",
+                   ctx->rotation_state);
+            return AVERROR(EINVAL);
+        }
+    }
+
+    if (VA_MIRROR_NONE != ctx->mirror_state) {
+        support_flag = pipeline_caps.mirror_flags & ctx->mirror_state;
+        if (!support_flag) {
+            av_log(avctx, AV_LOG_ERROR, "VAAPI driver doesn't support mirror %d\n",
+                   ctx->mirror_state);
+            return AVERROR(EINVAL);
+        }
+    }
+
+    return 0;
+}
+
+static int transpose_vaapi_filter_frame(AVFilterLink *inlink, AVFrame *input_frame)
+{
+    AVFilterContext *avctx     = inlink->dst;
+    AVFilterLink *outlink      = avctx->outputs[0];
+    VAAPIVPPContext *vpp_ctx   = avctx->priv;
+    TransposeVAAPIContext *ctx = avctx->priv;
+    AVFrame *output_frame      = NULL;
+    VASurfaceID input_surface, output_surface;
+    VARectangle input_region, output_region;
+
+    VAProcPipelineParameterBuffer params;
+    int err;
+
+    if (ctx->passthrough)
+        return ff_filter_frame(outlink, input_frame);
+
+    av_log(avctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
+           av_get_pix_fmt_name(input_frame->format),
+           input_frame->width, input_frame->height, input_frame->pts);
+
+    if (vpp_ctx->va_context == VA_INVALID_ID)
+        return AVERROR(EINVAL);
+
+    input_surface = (VASurfaceID)(uintptr_t)input_frame->data[3];
+    av_log(avctx, AV_LOG_DEBUG, "Using surface %#x for transpose vpp input.\n",
+           input_surface);
+
+    output_frame = ff_get_video_buffer(outlink, vpp_ctx->output_width,
+                                       vpp_ctx->output_height);
+    if (!output_frame) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    output_surface = (VASurfaceID)(uintptr_t)output_frame->data[3];
+    av_log(avctx, AV_LOG_DEBUG, "Using surface %#x for transpose vpp output.\n",
+           output_surface);
+    memset(&params, 0, sizeof(params));
+    input_region = (VARectangle) {
+        .x      = 0,
+        .y      = 0,
+        .width  = input_frame->width,
+        .height = input_frame->height,
+    };
+
+    output_region = (VARectangle) {
+        .x      = 0,
+        .y      = 0,
+        .width  = output_frame->width,
+        .height = output_frame->height,
+    };
+
+    params.rotation_state = ctx->rotation_state;
+    params.mirror_state = ctx->mirror_state;
+
+    params.filters     = &vpp_ctx->filter_buffers[0];
+    params.num_filters = vpp_ctx->nb_filter_buffers;
+
+    params.surface = input_surface;
+    params.surface_region = &input_region;
+    params.surface_color_standard =
+        ff_vaapi_vpp_colour_standard(input_frame->colorspace);
+
+    params.output_region = &output_region;
+    params.output_background_color = VAAPI_VPP_BACKGROUND_BLACK;
+    params.output_color_standard = params.surface_color_standard;
+
+    err = ff_vaapi_vpp_render_picture(avctx, &params, output_surface);
+    if (err < 0)
+        goto fail;
+
+    err = av_frame_copy_props(output_frame, input_frame);
+    if (err < 0)
+        goto fail;
+    av_frame_free(&input_frame);
+
+    av_log(avctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
+           av_get_pix_fmt_name(output_frame->format),
+           output_frame->width, output_frame->height, output_frame->pts);
+
+    return ff_filter_frame(outlink, output_frame);
+
+fail:
+    av_frame_free(&input_frame);
+    av_frame_free(&output_frame);
+    return err;
+}
+
+static av_cold int transpose_vaapi_init(AVFilterContext *avctx)
+{
+    VAAPIVPPContext *vpp_ctx = avctx->priv;
+
+    ff_vaapi_vpp_ctx_init(avctx);
+    vpp_ctx->pipeline_uninit     = ff_vaapi_vpp_pipeline_uninit;
+    vpp_ctx->build_filter_params = transpose_vaapi_build_filter_params;
+    vpp_ctx->output_format       = AV_PIX_FMT_NONE;
+
+    return 0;
+}
+
+static int transpose_vaapi_vpp_config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *avctx     = outlink->src;
+    VAAPIVPPContext *vpp_ctx   = avctx->priv;
+    TransposeVAAPIContext *ctx = avctx->priv;
+    AVFilterLink *inlink       = avctx->inputs[0];
+
+    if ((inlink->w >= inlink->h && ctx->passthrough == TRANSPOSE_PT_TYPE_LANDSCAPE) ||
+        (inlink->w <= inlink->h && ctx->passthrough == TRANSPOSE_PT_TYPE_PORTRAIT)) {
+        outlink->hw_frames_ctx = av_buffer_ref(inlink->hw_frames_ctx);
+        if (!outlink->hw_frames_ctx)
+            return AVERROR(ENOMEM);
+        av_log(avctx, AV_LOG_VERBOSE,
+               "w:%d h:%d -> w:%d h:%d (passthrough mode)\n",
+               inlink->w, inlink->h, inlink->w, inlink->h);
+        return 0;
+    }
+
+    ctx->passthrough = TRANSPOSE_PT_TYPE_NONE;
+
+    switch (ctx->dir) {
+    case TRANSPOSE_CCLOCK_FLIP:
+    case TRANSPOSE_CCLOCK:
+    case TRANSPOSE_CLOCK:
+    case TRANSPOSE_CLOCK_FLIP:
+        vpp_ctx->output_width  = avctx->inputs[0]->h;
+        vpp_ctx->output_height = avctx->inputs[0]->w;
+        av_log(avctx, AV_LOG_DEBUG, "swap width and height for clock/cclock rotation\n");
+        break;
+    default:
+        break;
+    }
+
+    return ff_vaapi_vpp_config_output(outlink);
+}
+
+static AVFrame *get_video_buffer(AVFilterLink *inlink, int w, int h)
+{
+    TransposeVAAPIContext *ctx = inlink->dst->priv;
+
+    return ctx->passthrough ?
+        ff_null_get_video_buffer(inlink, w, h) :
+        ff_default_get_video_buffer(inlink, w, h);
+}
+
+#define OFFSET(x) offsetof(TransposeVAAPIContext, x)
+#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_FILTERING_PARAM)
+static const AVOption transpose_vaapi_options[] = {
+    { "dir", "set transpose direction", OFFSET(dir), AV_OPT_TYPE_INT, { .i64 = TRANSPOSE_CCLOCK_FLIP }, 0, 6, FLAGS, "dir" },
+        { "cclock_flip",   "rotate counter-clockwise with vertical flip", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK_FLIP }, .flags=FLAGS, .unit = "dir" },
+        { "clock",         "rotate clockwise",                            0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CLOCK       }, .flags=FLAGS, .unit = "dir" },
+        { "cclock",        "rotate counter-clockwise",                    0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK      }, .flags=FLAGS, .unit = "dir" },
+        { "clock_flip",    "rotate clockwise with vertical flip",         0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CLOCK_FLIP  }, .flags=FLAGS, .unit = "dir" },
+        { "reversal",      "rotate by half-turn",                         0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_REVERSAL    }, .flags=FLAGS, .unit = "dir" },
+        { "hflip",         "flip horizontally",                           0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_HFLIP       }, .flags=FLAGS, .unit = "dir" },
+        { "vflip",         "flip vertically",                             0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_VFLIP       }, .flags=FLAGS, .unit = "dir" },
+
+    { "passthrough", "do not apply transposition if the input matches the specified geometry",
+      OFFSET(passthrough), AV_OPT_TYPE_INT, {.i64=TRANSPOSE_PT_TYPE_NONE},  0, INT_MAX, FLAGS, "passthrough" },
+        { "none",      "always apply transposition",   0, AV_OPT_TYPE_CONST, {.i64=TRANSPOSE_PT_TYPE_NONE},      INT_MIN, INT_MAX, FLAGS, "passthrough" },
+        { "portrait",  "preserve portrait geometry",   0, AV_OPT_TYPE_CONST, {.i64=TRANSPOSE_PT_TYPE_PORTRAIT},  INT_MIN, INT_MAX, FLAGS, "passthrough" },
+        { "landscape", "preserve landscape geometry",  0, AV_OPT_TYPE_CONST, {.i64=TRANSPOSE_PT_TYPE_LANDSCAPE}, INT_MIN, INT_MAX, FLAGS, "passthrough" },
+
+    { NULL }
+};
+
+
+AVFILTER_DEFINE_CLASS(transpose_vaapi);
+
+static const AVFilterPad transpose_vaapi_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = &transpose_vaapi_filter_frame,
+        .get_video_buffer = get_video_buffer,
+        .config_props = &ff_vaapi_vpp_config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad transpose_vaapi_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+        .config_props = &transpose_vaapi_vpp_config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_transpose_vaapi = {
+    .name           = "transpose_vaapi",
+    .description    = NULL_IF_CONFIG_SMALL("VAAPI VPP for transpose"),
+    .priv_size      = sizeof(TransposeVAAPIContext),
+    .init           = &transpose_vaapi_init,
+    .uninit         = &ff_vaapi_vpp_ctx_uninit,
+    .query_formats  = &ff_vaapi_vpp_query_formats,
+    .inputs         = transpose_vaapi_inputs,
+    .outputs        = transpose_vaapi_outputs,
+    .priv_class     = &transpose_vaapi_class,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
diff --git a/libavfilter/vf_vibrance.c b/libavfilter/vf_vibrance.c
new file mode 100644
index 0000000000000..1e5bf26512f37
--- /dev/null
+++ b/libavfilter/vf_vibrance.c
@@ -0,0 +1,240 @@
+/*
+ * Copyright (c) 2018 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/opt.h"
+#include "libavutil/imgutils.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+typedef struct VibranceContext {
+    const AVClass *class;
+
+    float intensity;
+    float balance[3];
+    float lcoeffs[3];
+
+    int depth;
+
+    int (*do_slice)(AVFilterContext *s, void *arg,
+                    int jobnr, int nb_jobs);
+} VibranceContext;
+
+static inline float lerpf(float v0, float v1, float f)
+{
+    return v0 + (v1 - v0) * f;
+}
+
+static int vibrance_slice8(AVFilterContext *avctx, void *arg, int jobnr, int nb_jobs)
+{
+    VibranceContext *s = avctx->priv;
+    AVFrame *frame = arg;
+    const int width = frame->width;
+    const int height = frame->height;
+    const float gc = s->lcoeffs[0];
+    const float bc = s->lcoeffs[1];
+    const float rc = s->lcoeffs[2];
+    const float intensity = s->intensity;
+    const float gintensity = intensity * s->balance[0];
+    const float bintensity = intensity * s->balance[1];
+    const float rintensity = intensity * s->balance[2];
+    const int slice_start = (height * jobnr) / nb_jobs;
+    const int slice_end = (height * (jobnr + 1)) / nb_jobs;
+    const int glinesize = frame->linesize[0];
+    const int blinesize = frame->linesize[1];
+    const int rlinesize = frame->linesize[2];
+    uint8_t *gptr = frame->data[0] + slice_start * glinesize;
+    uint8_t *bptr = frame->data[1] + slice_start * blinesize;
+    uint8_t *rptr = frame->data[2] + slice_start * rlinesize;
+
+    for (int y = slice_start; y < slice_end; y++) {
+        for (int x = 0; x < width; x++) {
+            float g = gptr[x] / 255.f;
+            float b = bptr[x] / 255.f;
+            float r = rptr[x] / 255.f;
+            float max_color = FFMAX3(r, g, b);
+            float min_color = FFMIN3(r, g, b);
+            float color_saturation = max_color - min_color;
+            float luma = g * gc + r * rc + b * bc;
+            const float cg = 1.f + gintensity * (1.f - FFSIGN(gintensity) * color_saturation);
+            const float cb = 1.f + bintensity * (1.f - FFSIGN(bintensity) * color_saturation);
+            const float cr = 1.f + rintensity * (1.f - FFSIGN(rintensity) * color_saturation);
+
+            g = lerpf(luma, g, cg);
+            b = lerpf(luma, b, cb);
+            r = lerpf(luma, r, cr);
+
+            gptr[x] = av_clip_uint8(g * 255.f);
+            bptr[x] = av_clip_uint8(b * 255.f);
+            rptr[x] = av_clip_uint8(r * 255.f);
+        }
+
+        gptr += glinesize;
+        bptr += blinesize;
+        rptr += rlinesize;
+    }
+
+    return 0;
+}
+
+static int vibrance_slice16(AVFilterContext *avctx, void *arg, int jobnr, int nb_jobs)
+{
+    VibranceContext *s = avctx->priv;
+    AVFrame *frame = arg;
+    const int depth = s->depth;
+    const float max = (1 << depth) - 1;
+    const float gc = s->lcoeffs[0];
+    const float bc = s->lcoeffs[1];
+    const float rc = s->lcoeffs[2];
+    const int width = frame->width;
+    const int height = frame->height;
+    const float intensity = s->intensity;
+    const float gintensity = intensity * s->balance[0];
+    const float bintensity = intensity * s->balance[1];
+    const float rintensity = intensity * s->balance[2];
+    const int slice_start = (height * jobnr) / nb_jobs;
+    const int slice_end = (height * (jobnr + 1)) / nb_jobs;
+    const int glinesize = frame->linesize[0] / 2;
+    const int blinesize = frame->linesize[1] / 2;
+    const int rlinesize = frame->linesize[2] / 2;
+    uint16_t *gptr = (uint16_t *)frame->data[0] + slice_start * glinesize;
+    uint16_t *bptr = (uint16_t *)frame->data[1] + slice_start * blinesize;
+    uint16_t *rptr = (uint16_t *)frame->data[2] + slice_start * rlinesize;
+
+    for (int y = slice_start; y < slice_end; y++) {
+        for (int x = 0; x < width; x++) {
+            float g = gptr[x] / max;
+            float b = bptr[x] / max;
+            float r = rptr[x] / max;
+            float max_color = FFMAX3(r, g, b);
+            float min_color = FFMIN3(r, g, b);
+            float color_saturation = max_color - min_color;
+            float luma = g * gc + r * rc + b * bc;
+            const float cg = 1.f + gintensity * (1.f - FFSIGN(gintensity) * color_saturation);
+            const float cb = 1.f + bintensity * (1.f - FFSIGN(bintensity) * color_saturation);
+            const float cr = 1.f + rintensity * (1.f - FFSIGN(rintensity) * color_saturation);
+
+            g = lerpf(luma, g, cg);
+            b = lerpf(luma, b, cb);
+            r = lerpf(luma, r, cr);
+
+            gptr[x] = av_clip_uintp2_c(g * max, depth);
+            bptr[x] = av_clip_uintp2_c(b * max, depth);
+            rptr[x] = av_clip_uintp2_c(r * max, depth);
+        }
+
+        gptr += glinesize;
+        bptr += blinesize;
+        rptr += rlinesize;
+    }
+
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *link, AVFrame *frame)
+{
+    AVFilterContext *avctx = link->dst;
+    VibranceContext *s = avctx->priv;
+    int res;
+
+    if (res = avctx->internal->execute(avctx, s->do_slice, frame, NULL,
+                                       FFMIN(frame->height, ff_filter_get_nb_threads(avctx))))
+        return res;
+
+    return ff_filter_frame(avctx->outputs[0], frame);
+}
+
+static av_cold int query_formats(AVFilterContext *avctx)
+{
+    static const enum AVPixelFormat pixel_fmts[] = {
+        AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRAP,
+        AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12,
+        AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
+        AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRAP16,
+        AV_PIX_FMT_NONE
+    };
+
+    AVFilterFormats *formats = NULL;
+
+    formats = ff_make_format_list(pixel_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+
+    return ff_set_common_formats(avctx, formats);
+}
+
+static av_cold int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *avctx = inlink->dst;
+    VibranceContext *s = avctx->priv;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+
+    s->depth = desc->comp[0].depth;
+    s->do_slice = s->depth <= 8 ? vibrance_slice8 : vibrance_slice16;
+
+    return 0;
+}
+
+static const AVFilterPad vibrance_inputs[] = {
+    {
+        .name           = "default",
+        .type           = AVMEDIA_TYPE_VIDEO,
+        .needs_writable = 1,
+        .filter_frame   = filter_frame,
+        .config_props   = config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad vibrance_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+    },
+    { NULL }
+};
+
+#define OFFSET(x) offsetof(VibranceContext, x)
+#define VF AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+
+static const AVOption vibrance_options[] = {
+    { "intensity", "set the intensity value",   OFFSET(intensity),  AV_OPT_TYPE_FLOAT, {.dbl=0},       -2,  2, VF },
+    { "rbal", "set the red balance value",      OFFSET(balance[2]), AV_OPT_TYPE_FLOAT, {.dbl=1},      -10, 10, VF },
+    { "gbal", "set the green balance value",    OFFSET(balance[0]), AV_OPT_TYPE_FLOAT, {.dbl=1},      -10, 10, VF },
+    { "bbal", "set the blue balance value",     OFFSET(balance[1]), AV_OPT_TYPE_FLOAT, {.dbl=1},      -10, 10, VF },
+    { "rlum", "set the red luma coefficient",   OFFSET(lcoeffs[2]), AV_OPT_TYPE_FLOAT, {.dbl=0.072186}, 0,  1, VF },
+    { "glum", "set the green luma coefficient", OFFSET(lcoeffs[0]), AV_OPT_TYPE_FLOAT, {.dbl=0.715158}, 0,  1, VF },
+    { "blum", "set the blue luma coefficient",  OFFSET(lcoeffs[1]), AV_OPT_TYPE_FLOAT, {.dbl=0.212656}, 0,  1, VF },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(vibrance);
+
+AVFilter ff_vf_vibrance = {
+    .name          = "vibrance",
+    .description   = NULL_IF_CONFIG_SMALL("Boost or alter saturation."),
+    .priv_size     = sizeof(VibranceContext),
+    .priv_class    = &vibrance_class,
+    .query_formats = query_formats,
+    .inputs        = vibrance_inputs,
+    .outputs       = vibrance_outputs,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
+};
diff --git a/libavfilter/vf_waveform.c b/libavfilter/vf_waveform.c
index bcee57cf3b056..80336284070e6 100644
--- a/libavfilter/vf_waveform.c
+++ b/libavfilter/vf_waveform.c
@@ -102,6 +102,7 @@ typedef struct WaveformContext {
     int            shift_w[4], shift_h[4];
     GraticuleLines *glines;
     int            nb_glines;
+    int            rgb;
 
     int (*waveform_slice)(AVFilterContext *ctx, void *arg,
                           int jobnr, int nb_jobs);
@@ -2610,17 +2611,18 @@ static void graticule_row(WaveformContext *s, AVFrame *out)
     const float o1 = s->opacity;
     const float o2 = 1. - o1;
     const int height = s->display == PARADE ? out->height / s->acomp : out->height;
-    int k = 0, c, p, l, offset_x = 0, offset_y = 0;
+    int C, k = 0, c, p, l, offset_x = 0, offset_y = 0;
 
     for (c = 0; c < s->ncomp; c++) {
         if (!((1 << c) & s->pcomp) || (!s->display && k > 0))
             continue;
 
         k++;
+        C = s->rgb ? 0 : c;
         for (p = 0; p < s->ncomp; p++) {
             const int v = s->grat_yuva_color[p];
             for (l = 0; l < s->nb_glines; l++) {
-                const uint16_t pos = s->glines[l].line[c].pos;
+                const uint16_t pos = s->glines[l].line[C].pos;
                 int x = offset_x + (s->mirror ? s->size - 1 - pos : pos);
                 uint8_t *dst = out->data[p] + offset_y * out->linesize[p] + x;
 
@@ -2629,8 +2631,8 @@ static void graticule_row(WaveformContext *s, AVFrame *out)
         }
 
         for (l = 0; l < s->nb_glines && (s->flags & 1); l++) {
-            const char *name = s->glines[l].line[c].name;
-            const uint16_t pos = s->glines[l].line[c].pos;
+            const char *name = s->glines[l].line[C].name;
+            const uint16_t pos = s->glines[l].line[C].pos;
             int x = offset_x + (s->mirror ? s->size - 1 - pos : pos) - 10;
 
             if (x < 0)
@@ -2651,17 +2653,18 @@ static void graticule16_row(WaveformContext *s, AVFrame *out)
     const float o2 = 1. - o1;
     const int mult = s->max / 256;
     const int height = s->display == PARADE ? out->height / s->acomp : out->height;
-    int k = 0, c, p, l, offset_x = 0, offset_y = 0;
+    int C, k = 0, c, p, l, offset_x = 0, offset_y = 0;
 
     for (c = 0; c < s->ncomp; c++) {
         if (!((1 << c) & s->pcomp) || (!s->display && k > 0))
             continue;
 
         k++;
+        C = s->rgb ? 0 : c;
         for (p = 0; p < s->ncomp; p++) {
             const int v = s->grat_yuva_color[p] * mult;
             for (l = 0; l < s->nb_glines ; l++) {
-                const uint16_t pos = s->glines[l].line[c].pos;
+                const uint16_t pos = s->glines[l].line[C].pos;
                 int x = offset_x + (s->mirror ? s->size - 1 - pos : pos);
                 uint16_t *dst = (uint16_t *)(out->data[p] + offset_y * out->linesize[p]) + x;
 
@@ -2670,8 +2673,8 @@ static void graticule16_row(WaveformContext *s, AVFrame *out)
         }
 
         for (l = 0; l < s->nb_glines && (s->flags & 1); l++) {
-            const char *name = s->glines[l].line[c].name;
-            const uint16_t pos = s->glines[l].line[c].pos;
+            const char *name = s->glines[l].line[C].name;
+            const uint16_t pos = s->glines[l].line[C].pos;
             int x = offset_x + (s->mirror ? s->size - 1 - pos : pos) - 10;
 
             if (x < 0)
@@ -2691,17 +2694,18 @@ static void graticule_column(WaveformContext *s, AVFrame *out)
     const float o1 = s->opacity;
     const float o2 = 1. - o1;
     const int width = s->display == PARADE ? out->width / s->acomp : out->width;
-    int k = 0, c, p, l, offset_y = 0, offset_x = 0;
+    int C, k = 0, c, p, l, offset_y = 0, offset_x = 0;
 
     for (c = 0; c < s->ncomp; c++) {
         if ((!((1 << c) & s->pcomp) || (!s->display && k > 0)))
             continue;
 
         k++;
+        C = s->rgb ? 0 : c;
         for (p = 0; p < s->ncomp; p++) {
             const int v = s->grat_yuva_color[p];
             for (l = 0; l < s->nb_glines ; l++) {
-                const uint16_t pos = s->glines[l].line[c].pos;
+                const uint16_t pos = s->glines[l].line[C].pos;
                 int y = offset_y + (s->mirror ? s->size - 1 - pos : pos);
                 uint8_t *dst = out->data[p] + y * out->linesize[p] + offset_x;
 
@@ -2710,8 +2714,8 @@ static void graticule_column(WaveformContext *s, AVFrame *out)
         }
 
         for (l = 0; l < s->nb_glines && (s->flags & 1); l++) {
-            const char *name = s->glines[l].line[c].name;
-            const uint16_t pos = s->glines[l].line[c].pos;
+            const char *name = s->glines[l].line[C].name;
+            const uint16_t pos = s->glines[l].line[C].pos;
             int y = offset_y + (s->mirror ? s->size - 1 - pos : pos) - 10;
 
             if (y < 0)
@@ -2732,17 +2736,18 @@ static void graticule16_column(WaveformContext *s, AVFrame *out)
     const float o2 = 1. - o1;
     const int mult = s->max / 256;
     const int width = s->display == PARADE ? out->width / s->acomp : out->width;
-    int k = 0, c, p, l, offset_x = 0, offset_y = 0;
+    int C, k = 0, c, p, l, offset_x = 0, offset_y = 0;
 
     for (c = 0; c < s->ncomp; c++) {
         if ((!((1 << c) & s->pcomp) || (!s->display && k > 0)))
             continue;
 
         k++;
+        C = s->rgb ? 0 : c;
         for (p = 0; p < s->ncomp; p++) {
             const int v = s->grat_yuva_color[p] * mult;
             for (l = 0; l < s->nb_glines ; l++) {
-                const uint16_t pos = s->glines[l].line[c].pos;
+                const uint16_t pos = s->glines[l].line[C].pos;
                 int y = offset_y + (s->mirror ? s->size - 1 - pos : pos);
                 uint16_t *dst = (uint16_t *)(out->data[p] + y * out->linesize[p]) + offset_x;
 
@@ -2751,8 +2756,8 @@ static void graticule16_column(WaveformContext *s, AVFrame *out)
         }
 
         for (l = 0; l < s->nb_glines && (s->flags & 1); l++) {
-            const char *name = s->glines[l].line[c].name;
-            const uint16_t pos = s->glines[l].line[c].pos;
+            const char *name = s->glines[l].line[C].name;
+            const uint16_t pos = s->glines[l].line[C].pos;
             int y = offset_y + (s->mirror ? s->size - 1 - pos: pos) - 10;
 
             if (y < 0)
@@ -2996,8 +3001,8 @@ static int config_input(AVFilterLink *inlink)
     case AV_PIX_FMT_GBRP9:
     case AV_PIX_FMT_GBRP10:
     case AV_PIX_FMT_GBRP12:
+        s->rgb = 1;
         memcpy(s->bg_color, black_gbrp_color, sizeof(s->bg_color));
-        s->graticulef = graticule_none;
         break;
     default:
         memcpy(s->bg_color, black_yuva_color, sizeof(s->bg_color));
@@ -3020,6 +3025,9 @@ static int config_output(AVFilterLink *outlink)
             comp++;
     }
     s->acomp = comp;
+    if (s->acomp == 0)
+        return AVERROR(EINVAL);
+
     s->odesc = av_pix_fmt_desc_get(outlink->format);
     s->dcomp = s->odesc->nb_components;
 
diff --git a/libavfilter/vf_yadif.c b/libavfilter/vf_yadif.c
index f58d8ac2bce32..3107924932a51 100644
--- a/libavfilter/vf_yadif.c
+++ b/libavfilter/vf_yadif.c
@@ -22,7 +22,6 @@
 #include "libavutil/avassert.h"
 #include "libavutil/cpu.h"
 #include "libavutil/common.h"
-#include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
 #include "libavutil/imgutils.h"
 #include "avfilter.h"
@@ -254,166 +253,6 @@ static void filter(AVFilterContext *ctx, AVFrame *dstpic,
     emms_c();
 }
 
-static int return_frame(AVFilterContext *ctx, int is_second)
-{
-    YADIFContext *yadif = ctx->priv;
-    AVFilterLink *link  = ctx->outputs[0];
-    int tff, ret;
-
-    if (yadif->parity == -1) {
-        tff = yadif->cur->interlaced_frame ?
-              yadif->cur->top_field_first : 1;
-    } else {
-        tff = yadif->parity ^ 1;
-    }
-
-    if (is_second) {
-        yadif->out = ff_get_video_buffer(link, link->w, link->h);
-        if (!yadif->out)
-            return AVERROR(ENOMEM);
-
-        av_frame_copy_props(yadif->out, yadif->cur);
-        yadif->out->interlaced_frame = 0;
-    }
-
-    filter(ctx, yadif->out, tff ^ !is_second, tff);
-
-    if (is_second) {
-        int64_t cur_pts  = yadif->cur->pts;
-        int64_t next_pts = yadif->next->pts;
-
-        if (next_pts != AV_NOPTS_VALUE && cur_pts != AV_NOPTS_VALUE) {
-            yadif->out->pts = cur_pts + next_pts;
-        } else {
-            yadif->out->pts = AV_NOPTS_VALUE;
-        }
-    }
-    ret = ff_filter_frame(ctx->outputs[0], yadif->out);
-
-    yadif->frame_pending = (yadif->mode&1) && !is_second;
-    return ret;
-}
-
-static int checkstride(YADIFContext *yadif, const AVFrame *a, const AVFrame *b)
-{
-    int i;
-    for (i = 0; i < yadif->csp->nb_components; i++)
-        if (a->linesize[i] != b->linesize[i])
-            return 1;
-    return 0;
-}
-
-static void fixstride(AVFilterLink *link, AVFrame *f)
-{
-    AVFrame *dst = ff_default_get_video_buffer(link, f->width, f->height);
-    if(!dst)
-        return;
-    av_frame_copy_props(dst, f);
-    av_image_copy(dst->data, dst->linesize,
-                  (const uint8_t **)f->data, f->linesize,
-                  dst->format, dst->width, dst->height);
-    av_frame_unref(f);
-    av_frame_move_ref(f, dst);
-    av_frame_free(&dst);
-}
-
-static int filter_frame(AVFilterLink *link, AVFrame *frame)
-{
-    AVFilterContext *ctx = link->dst;
-    YADIFContext *yadif = ctx->priv;
-
-    av_assert0(frame);
-
-    if (yadif->frame_pending)
-        return_frame(ctx, 1);
-
-    if (yadif->prev)
-        av_frame_free(&yadif->prev);
-    yadif->prev = yadif->cur;
-    yadif->cur  = yadif->next;
-    yadif->next = frame;
-
-    if (!yadif->cur &&
-        !(yadif->cur = av_frame_clone(yadif->next)))
-        return AVERROR(ENOMEM);
-
-    if (checkstride(yadif, yadif->next, yadif->cur)) {
-        av_log(ctx, AV_LOG_VERBOSE, "Reallocating frame due to differing stride\n");
-        fixstride(link, yadif->next);
-    }
-    if (checkstride(yadif, yadif->next, yadif->cur))
-        fixstride(link, yadif->cur);
-    if (yadif->prev && checkstride(yadif, yadif->next, yadif->prev))
-        fixstride(link, yadif->prev);
-    if (checkstride(yadif, yadif->next, yadif->cur) || (yadif->prev && checkstride(yadif, yadif->next, yadif->prev))) {
-        av_log(ctx, AV_LOG_ERROR, "Failed to reallocate frame\n");
-        return -1;
-    }
-
-    if (!yadif->prev)
-        return 0;
-
-    if ((yadif->deint && !yadif->cur->interlaced_frame) ||
-        ctx->is_disabled ||
-        (yadif->deint && !yadif->prev->interlaced_frame && yadif->prev->repeat_pict) ||
-        (yadif->deint && !yadif->next->interlaced_frame && yadif->next->repeat_pict)
-    ) {
-        yadif->out  = av_frame_clone(yadif->cur);
-        if (!yadif->out)
-            return AVERROR(ENOMEM);
-
-        av_frame_free(&yadif->prev);
-        if (yadif->out->pts != AV_NOPTS_VALUE)
-            yadif->out->pts *= 2;
-        return ff_filter_frame(ctx->outputs[0], yadif->out);
-    }
-
-    yadif->out = ff_get_video_buffer(ctx->outputs[0], link->w, link->h);
-    if (!yadif->out)
-        return AVERROR(ENOMEM);
-
-    av_frame_copy_props(yadif->out, yadif->cur);
-    yadif->out->interlaced_frame = 0;
-
-    if (yadif->out->pts != AV_NOPTS_VALUE)
-        yadif->out->pts *= 2;
-
-    return return_frame(ctx, 0);
-}
-
-static int request_frame(AVFilterLink *link)
-{
-    AVFilterContext *ctx = link->src;
-    YADIFContext *yadif = ctx->priv;
-    int ret;
-
-    if (yadif->frame_pending) {
-        return_frame(ctx, 1);
-        return 0;
-    }
-
-    if (yadif->eof)
-        return AVERROR_EOF;
-
-    ret  = ff_request_frame(ctx->inputs[0]);
-
-    if (ret == AVERROR_EOF && yadif->cur) {
-        AVFrame *next = av_frame_clone(yadif->next);
-
-        if (!next)
-            return AVERROR(ENOMEM);
-
-        next->pts = yadif->next->pts * 2 - yadif->cur->pts;
-
-        filter_frame(ctx->inputs[0], next);
-        yadif->eof = 1;
-    } else if (ret < 0) {
-        return ret;
-    }
-
-    return 0;
-}
-
 static av_cold void uninit(AVFilterContext *ctx)
 {
     YADIFContext *yadif = ctx->priv;
@@ -492,6 +331,7 @@ static int config_props(AVFilterLink *link)
     }
 
     s->csp = av_pix_fmt_desc_get(link->format);
+    s->filter = filter;
     if (s->csp->comp[0].depth > 8) {
         s->filter_line  = filter_line_c_16bit;
         s->filter_edges = filter_edges_16bit;
@@ -507,37 +347,19 @@ static int config_props(AVFilterLink *link)
 }
 
 
-#define OFFSET(x) offsetof(YADIFContext, x)
-#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
-
-#define CONST(name, help, val, unit) { name, help, 0, AV_OPT_TYPE_CONST, {.i64=val}, INT_MIN, INT_MAX, FLAGS, unit }
-
-static const AVOption yadif_options[] = {
-    { "mode",   "specify the interlacing mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64=YADIF_MODE_SEND_FRAME}, 0, 3, FLAGS, "mode"},
-    CONST("send_frame",           "send one frame for each frame",                                     YADIF_MODE_SEND_FRAME,           "mode"),
-    CONST("send_field",           "send one frame for each field",                                     YADIF_MODE_SEND_FIELD,           "mode"),
-    CONST("send_frame_nospatial", "send one frame for each frame, but skip spatial interlacing check", YADIF_MODE_SEND_FRAME_NOSPATIAL, "mode"),
-    CONST("send_field_nospatial", "send one frame for each field, but skip spatial interlacing check", YADIF_MODE_SEND_FIELD_NOSPATIAL, "mode"),
-
-    { "parity", "specify the assumed picture field parity", OFFSET(parity), AV_OPT_TYPE_INT, {.i64=YADIF_PARITY_AUTO}, -1, 1, FLAGS, "parity" },
-    CONST("tff",  "assume top field first",    YADIF_PARITY_TFF,  "parity"),
-    CONST("bff",  "assume bottom field first", YADIF_PARITY_BFF,  "parity"),
-    CONST("auto", "auto detect parity",        YADIF_PARITY_AUTO, "parity"),
-
-    { "deint", "specify which frames to deinterlace", OFFSET(deint), AV_OPT_TYPE_INT, {.i64=YADIF_DEINT_ALL}, 0, 1, FLAGS, "deint" },
-    CONST("all",        "deinterlace all frames",                       YADIF_DEINT_ALL,         "deint"),
-    CONST("interlaced", "only deinterlace frames marked as interlaced", YADIF_DEINT_INTERLACED,  "deint"),
-
-    { NULL }
+static const AVClass yadif_class = {
+    .class_name = "yadif",
+    .item_name  = av_default_item_name,
+    .option     = ff_yadif_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+    .category   = AV_CLASS_CATEGORY_FILTER,
 };
 
-AVFILTER_DEFINE_CLASS(yadif);
-
 static const AVFilterPad avfilter_vf_yadif_inputs[] = {
     {
         .name          = "default",
         .type          = AVMEDIA_TYPE_VIDEO,
-        .filter_frame  = filter_frame,
+        .filter_frame  = ff_yadif_filter_frame,
     },
     { NULL }
 };
@@ -546,7 +368,7 @@ static const AVFilterPad avfilter_vf_yadif_outputs[] = {
     {
         .name          = "default",
         .type          = AVMEDIA_TYPE_VIDEO,
-        .request_frame = request_frame,
+        .request_frame = ff_yadif_request_frame,
         .config_props  = config_props,
     },
     { NULL }
diff --git a/libavfilter/vf_yadif_cuda.c b/libavfilter/vf_yadif_cuda.c
new file mode 100644
index 0000000000000..141dcb17f7777
--- /dev/null
+++ b/libavfilter/vf_yadif_cuda.c
@@ -0,0 +1,389 @@
+/*
+ * Copyright (C) 2018 Philip Langdale <philipl@overt.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/hwcontext_cuda_internal.h"
+#include "libavutil/cuda_check.h"
+#include "internal.h"
+#include "yadif.h"
+
+extern char vf_yadif_cuda_ptx[];
+
+typedef struct DeintCUDAContext {
+    YADIFContext yadif;
+
+    AVCUDADeviceContext *hwctx;
+    AVBufferRef         *device_ref;
+    AVBufferRef         *input_frames_ref;
+    AVHWFramesContext   *input_frames;
+
+    CUcontext   cu_ctx;
+    CUstream    stream;
+    CUmodule    cu_module;
+    CUfunction  cu_func_uchar;
+    CUfunction  cu_func_uchar2;
+    CUfunction  cu_func_ushort;
+    CUfunction  cu_func_ushort2;
+} DeintCUDAContext;
+
+#define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
+#define ALIGN_UP(a, b) (((a) + (b) - 1) & ~((b) - 1))
+#define BLOCKX 32
+#define BLOCKY 16
+
+#define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x)
+
+static CUresult call_kernel(AVFilterContext *ctx, CUfunction func,
+                            CUdeviceptr prev, CUdeviceptr cur, CUdeviceptr next,
+                            CUarray_format format, int channels,
+                            int src_width,  // Width is pixels per channel
+                            int src_height, // Height is pixels per channel
+                            int src_pitch,  // Pitch is bytes
+                            CUdeviceptr dst,
+                            int dst_width,  // Width is pixels per channel
+                            int dst_height, // Height is pixels per channel
+                            int dst_pitch,  // Pitch is pixels per channel
+                            int parity, int tff)
+{
+    DeintCUDAContext *s = ctx->priv;
+    CudaFunctions *cu = s->hwctx->internal->cuda_dl;
+    CUtexObject tex_prev = 0, tex_cur = 0, tex_next = 0;
+    int ret;
+    int skip_spatial_check = s->yadif.mode&2;
+
+    void *args[] = { &dst, &tex_prev, &tex_cur, &tex_next,
+                     &dst_width, &dst_height, &dst_pitch,
+                     &src_width, &src_height, &parity, &tff,
+                     &skip_spatial_check };
+
+    CUDA_TEXTURE_DESC tex_desc = {
+        .filterMode = CU_TR_FILTER_MODE_POINT,
+        .flags = CU_TRSF_READ_AS_INTEGER,
+    };
+
+    CUDA_RESOURCE_DESC res_desc = {
+        .resType = CU_RESOURCE_TYPE_PITCH2D,
+        .res.pitch2D.format = format,
+        .res.pitch2D.numChannels = channels,
+        .res.pitch2D.width = src_width,
+        .res.pitch2D.height = src_height,
+        .res.pitch2D.pitchInBytes = src_pitch,
+    };
+
+    res_desc.res.pitch2D.devPtr = (CUdeviceptr)prev;
+    ret = CHECK_CU(cu->cuTexObjectCreate(&tex_prev, &res_desc, &tex_desc, NULL));
+    if (ret < 0)
+        goto exit;
+
+    res_desc.res.pitch2D.devPtr = (CUdeviceptr)cur;
+    ret = CHECK_CU(cu->cuTexObjectCreate(&tex_cur, &res_desc, &tex_desc, NULL));
+    if (ret < 0)
+        goto exit;
+
+    res_desc.res.pitch2D.devPtr = (CUdeviceptr)next;
+    ret = CHECK_CU(cu->cuTexObjectCreate(&tex_next, &res_desc, &tex_desc, NULL));
+    if (ret < 0)
+        goto exit;
+
+    ret = CHECK_CU(cu->cuLaunchKernel(func,
+                                      DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1,
+                                      BLOCKX, BLOCKY, 1,
+                                      0, s->stream, args, NULL));
+
+exit:
+    if (tex_prev)
+        CHECK_CU(cu->cuTexObjectDestroy(tex_prev));
+    if (tex_cur)
+        CHECK_CU(cu->cuTexObjectDestroy(tex_cur));
+    if (tex_next)
+        CHECK_CU(cu->cuTexObjectDestroy(tex_next));
+
+    return ret;
+}
+
+static void filter(AVFilterContext *ctx, AVFrame *dst,
+                   int parity, int tff)
+{
+    DeintCUDAContext *s = ctx->priv;
+    YADIFContext *y = &s->yadif;
+    CudaFunctions *cu = s->hwctx->internal->cuda_dl;
+    CUcontext dummy;
+    int i, ret;
+
+    ret = CHECK_CU(cu->cuCtxPushCurrent(s->cu_ctx));
+    if (ret < 0)
+        return;
+
+    for (i = 0; i < y->csp->nb_components; i++) {
+        CUfunction func;
+        CUarray_format format;
+        int pixel_size, channels;
+        const AVComponentDescriptor *comp = &y->csp->comp[i];
+
+        if (comp->plane < i) {
+            // We process planes as a whole, so don't reprocess
+            // them for additional components
+            continue;
+        }
+
+        pixel_size = (comp->depth + comp->shift) / 8;
+        channels = comp->step / pixel_size;
+        if (pixel_size > 2 || channels > 2) {
+            av_log(ctx, AV_LOG_ERROR, "Unsupported pixel format: %s\n", y->csp->name);
+            goto exit;
+        }
+        switch (pixel_size) {
+        case 1:
+            func = channels == 1 ? s->cu_func_uchar : s->cu_func_uchar2;
+            format = CU_AD_FORMAT_UNSIGNED_INT8;
+            break;
+        case 2:
+            func = channels == 1 ? s->cu_func_ushort : s->cu_func_ushort2;
+            format = CU_AD_FORMAT_UNSIGNED_INT16;
+            break;
+        default:
+            av_log(ctx, AV_LOG_ERROR, "Unsupported pixel format: %s\n", y->csp->name);
+            goto exit;
+        }
+        av_log(ctx, AV_LOG_TRACE,
+               "Deinterlacing plane %d: pixel_size: %d channels: %d\n",
+               comp->plane, pixel_size, channels);
+        call_kernel(ctx, func,
+                    (CUdeviceptr)y->prev->data[i],
+                    (CUdeviceptr)y->cur->data[i],
+                    (CUdeviceptr)y->next->data[i],
+                    format, channels,
+                    AV_CEIL_RSHIFT(y->cur->width, i ? y->csp->log2_chroma_w : 0),
+                    AV_CEIL_RSHIFT(y->cur->height, i ? y->csp->log2_chroma_h : 0),
+                    y->cur->linesize[i],
+                    (CUdeviceptr)dst->data[i],
+                    AV_CEIL_RSHIFT(dst->width, i ? y->csp->log2_chroma_w : 0),
+                    AV_CEIL_RSHIFT(dst->height, i ? y->csp->log2_chroma_h : 0),
+                    dst->linesize[i] / comp->step,
+                    parity, tff);
+    }
+
+    CHECK_CU(cu->cuStreamSynchronize(s->stream));
+
+exit:
+    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
+    return;
+}
+
+static av_cold void deint_cuda_uninit(AVFilterContext *ctx)
+{
+    CUcontext dummy;
+    DeintCUDAContext *s = ctx->priv;
+    YADIFContext *y = &s->yadif;
+
+    if (s->hwctx && s->cu_module) {
+        CudaFunctions *cu = s->hwctx->internal->cuda_dl;
+        CHECK_CU(cu->cuCtxPushCurrent(s->cu_ctx));
+        CHECK_CU(cu->cuModuleUnload(s->cu_module));
+        CHECK_CU(cu->cuCtxPopCurrent(&dummy));
+    }
+
+    av_frame_free(&y->prev);
+    av_frame_free(&y->cur);
+    av_frame_free(&y->next);
+
+    av_buffer_unref(&s->device_ref);
+    s->hwctx = NULL;
+    av_buffer_unref(&s->input_frames_ref);
+    s->input_frames = NULL;
+}
+
+static int deint_cuda_query_formats(AVFilterContext *ctx)
+{
+    enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE,
+    };
+    int ret;
+
+    if ((ret = ff_formats_ref(ff_make_format_list(pix_fmts),
+                              &ctx->inputs[0]->out_formats)) < 0)
+        return ret;
+    if ((ret = ff_formats_ref(ff_make_format_list(pix_fmts),
+                              &ctx->outputs[0]->in_formats)) < 0)
+        return ret;
+
+    return 0;
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    DeintCUDAContext *s  = ctx->priv;
+
+    if (!inlink->hw_frames_ctx) {
+        av_log(ctx, AV_LOG_ERROR, "A hardware frames reference is "
+               "required to associate the processing device.\n");
+        return AVERROR(EINVAL);
+    }
+
+    s->input_frames_ref = av_buffer_ref(inlink->hw_frames_ctx);
+    if (!s->input_frames_ref) {
+        av_log(ctx, AV_LOG_ERROR, "A input frames reference create "
+               "failed.\n");
+        return AVERROR(ENOMEM);
+    }
+    s->input_frames = (AVHWFramesContext*)s->input_frames_ref->data;
+
+    return 0;
+}
+
+static int config_output(AVFilterLink *link)
+{
+    AVHWFramesContext *output_frames;
+    AVFilterContext *ctx = link->src;
+    DeintCUDAContext *s = ctx->priv;
+    YADIFContext *y = &s->yadif;
+    CudaFunctions *cu;
+    int ret = 0;
+    CUcontext dummy;
+
+    av_assert0(s->input_frames);
+    s->device_ref = av_buffer_ref(s->input_frames->device_ref);
+    if (!s->device_ref) {
+        av_log(ctx, AV_LOG_ERROR, "A device reference create "
+               "failed.\n");
+        return AVERROR(ENOMEM);
+    }
+    s->hwctx = ((AVHWDeviceContext*)s->device_ref->data)->hwctx;
+    s->cu_ctx = s->hwctx->cuda_ctx;
+    s->stream = s->hwctx->stream;
+    cu = s->hwctx->internal->cuda_dl;
+
+    link->hw_frames_ctx = av_hwframe_ctx_alloc(s->device_ref);
+    if (!link->hw_frames_ctx) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to create HW frame context "
+               "for output.\n");
+        ret = AVERROR(ENOMEM);
+        goto exit;
+    }
+
+    output_frames = (AVHWFramesContext*)link->hw_frames_ctx->data;
+
+    output_frames->format    = AV_PIX_FMT_CUDA;
+    output_frames->sw_format = s->input_frames->sw_format;
+    output_frames->width     = ctx->inputs[0]->w;
+    output_frames->height    = ctx->inputs[0]->h;
+
+    output_frames->initial_pool_size = 4;
+
+    ret = ff_filter_init_hw_frames(ctx, link, 10);
+    if (ret < 0)
+        goto exit;
+
+    ret = av_hwframe_ctx_init(link->hw_frames_ctx);
+    if (ret < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to initialise CUDA frame "
+               "context for output: %d\n", ret);
+        goto exit;
+    }
+
+    link->time_base.num = ctx->inputs[0]->time_base.num;
+    link->time_base.den = ctx->inputs[0]->time_base.den * 2;
+    link->w             = ctx->inputs[0]->w;
+    link->h             = ctx->inputs[0]->h;
+
+    if(y->mode & 1)
+        link->frame_rate = av_mul_q(ctx->inputs[0]->frame_rate,
+                                    (AVRational){2, 1});
+
+    if (link->w < 3 || link->h < 3) {
+        av_log(ctx, AV_LOG_ERROR, "Video of less than 3 columns or lines is not supported\n");
+        ret = AVERROR(EINVAL);
+        goto exit;
+    }
+
+    y->csp = av_pix_fmt_desc_get(output_frames->sw_format);
+    y->filter = filter;
+
+    ret = CHECK_CU(cu->cuCtxPushCurrent(s->cu_ctx));
+    if (ret < 0)
+        goto exit;
+
+    ret = CHECK_CU(cu->cuModuleLoadData(&s->cu_module, vf_yadif_cuda_ptx));
+    if (ret < 0)
+        goto exit;
+
+    ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "yadif_uchar"));
+    if (ret < 0)
+        goto exit;
+
+    ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "yadif_uchar2"));
+    if (ret < 0)
+        goto exit;
+
+    ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "yadif_ushort"));
+    if (ret < 0)
+        goto exit;
+
+    ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "yadif_ushort2"));
+    if (ret < 0)
+        goto exit;
+
+exit:
+    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
+
+    return ret;
+}
+
+static const AVClass yadif_cuda_class = {
+    .class_name = "yadif_cuda",
+    .item_name  = av_default_item_name,
+    .option     = ff_yadif_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+    .category   = AV_CLASS_CATEGORY_FILTER,
+};
+
+static const AVFilterPad deint_cuda_inputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .filter_frame  = ff_yadif_filter_frame,
+        .config_props  = config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad deint_cuda_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .request_frame = ff_yadif_request_frame,
+        .config_props  = config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_yadif_cuda = {
+    .name           = "yadif_cuda",
+    .description    = NULL_IF_CONFIG_SMALL("Deinterlace CUDA frames"),
+    .priv_size      = sizeof(DeintCUDAContext),
+    .priv_class     = &yadif_cuda_class,
+    .uninit         = deint_cuda_uninit,
+    .query_formats  = deint_cuda_query_formats,
+    .inputs         = deint_cuda_inputs,
+    .outputs        = deint_cuda_outputs,
+    .flags          = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
diff --git a/libavfilter/vf_yadif_cuda.cu b/libavfilter/vf_yadif_cuda.cu
new file mode 100644
index 0000000000000..12e7e4a443cea
--- /dev/null
+++ b/libavfilter/vf_yadif_cuda.cu
@@ -0,0 +1,299 @@
+/*
+ * Copyright (C) 2018 Philip Langdale <philipl@overt.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+template<typename T>
+__inline__ __device__ T spatial_predictor(T a, T b, T c, T d, T e, T f, T g,
+                                          T h, T i, T j, T k, T l, T m, T n)
+{
+    int spatial_pred = (d + k)/2;
+    int spatial_score = abs(c - j) + abs(d - k) + abs(e - l);
+
+    int score = abs(b - k) + abs(c - l) + abs(d - m);
+    if (score < spatial_score) {
+        spatial_pred = (c + l)/2;
+        spatial_score = score;
+        score = abs(a - l) + abs(b - m) + abs(c - n);
+        if (score < spatial_score) {
+          spatial_pred = (b + m)/2;
+          spatial_score = score;
+        }
+    }
+    score = abs(d - i) + abs(e - j) + abs(f - k);
+    if (score < spatial_score) {
+        spatial_pred = (e + j)/2;
+        spatial_score = score;
+        score = abs(e - h) + abs(f - i) + abs(g - j);
+        if (score < spatial_score) {
+          spatial_pred = (f + i)/2;
+          spatial_score = score;
+        }
+    }
+    return spatial_pred;
+}
+
+__inline__ __device__ int max3(int a, int b, int c)
+{
+    int x = max(a, b);
+    return max(x, c);
+}
+
+__inline__ __device__ int min3(int a, int b, int c)
+{
+    int x = min(a, b);
+    return min(x, c);
+}
+
+template<typename T>
+__inline__ __device__ T temporal_predictor(T A, T B, T C, T D, T E, T F,
+                                           T G, T H, T I, T J, T K, T L,
+                                           T spatial_pred, bool skip_check)
+{
+    int p0 = (C + H) / 2;
+    int p1 = F;
+    int p2 = (D + I) / 2;
+    int p3 = G;
+    int p4 = (E + J) / 2;
+
+    int tdiff0 = abs(D - I);
+    int tdiff1 = (abs(A - F) + abs(B - G)) / 2;
+    int tdiff2 = (abs(K - F) + abs(G - L)) / 2;
+
+    int diff = max3(tdiff0, tdiff1, tdiff2);
+
+    if (!skip_check) {
+      int maxi = max3(p2 - p3, p2 - p1, min(p0 - p1, p4 - p3));
+      int mini = min3(p2 - p3, p2 - p1, max(p0 - p1, p4 - p3));
+      diff = max3(diff, mini, -maxi);
+    }
+
+    if (spatial_pred > p2 + diff) {
+      spatial_pred = p2 + diff;
+    }
+    if (spatial_pred < p2 - diff) {
+      spatial_pred = p2 - diff;
+    }
+
+    return spatial_pred;
+}
+
+template<typename T>
+__inline__ __device__ void yadif_single(T *dst,
+                                        cudaTextureObject_t prev,
+                                        cudaTextureObject_t cur,
+                                        cudaTextureObject_t next,
+                                        int dst_width, int dst_height, int dst_pitch,
+                                        int src_width, int src_height,
+                                        int parity, int tff, bool skip_spatial_check)
+{
+    // Identify location
+    int xo = blockIdx.x * blockDim.x + threadIdx.x;
+    int yo = blockIdx.y * blockDim.y + threadIdx.y;
+
+    if (xo >= dst_width || yo >= dst_height) {
+        return;
+    }
+
+    // Don't modify the primary field
+    if (yo % 2 == parity) {
+      dst[yo*dst_pitch+xo] = tex2D<T>(cur, xo, yo);
+      return;
+    }
+
+    // Calculate spatial prediction
+    T a = tex2D<T>(cur, xo - 3, yo - 1);
+    T b = tex2D<T>(cur, xo - 2, yo - 1);
+    T c = tex2D<T>(cur, xo - 1, yo - 1);
+    T d = tex2D<T>(cur, xo - 0, yo - 1);
+    T e = tex2D<T>(cur, xo + 1, yo - 1);
+    T f = tex2D<T>(cur, xo + 2, yo - 1);
+    T g = tex2D<T>(cur, xo + 3, yo - 1);
+
+    T h = tex2D<T>(cur, xo - 3, yo + 1);
+    T i = tex2D<T>(cur, xo - 2, yo + 1);
+    T j = tex2D<T>(cur, xo - 1, yo + 1);
+    T k = tex2D<T>(cur, xo - 0, yo + 1);
+    T l = tex2D<T>(cur, xo + 1, yo + 1);
+    T m = tex2D<T>(cur, xo + 2, yo + 1);
+    T n = tex2D<T>(cur, xo + 3, yo + 1);
+
+    T spatial_pred =
+        spatial_predictor(a, b, c, d, e, f, g, h, i, j, k, l, m, n);
+
+    // Calculate temporal prediction
+    int is_second_field = !(parity ^ tff);
+
+    cudaTextureObject_t prev2 = prev;
+    cudaTextureObject_t prev1 = is_second_field ? cur : prev;
+    cudaTextureObject_t next1 = is_second_field ? next : cur;
+    cudaTextureObject_t next2 = next;
+
+    T A = tex2D<T>(prev2, xo,  yo - 1);
+    T B = tex2D<T>(prev2, xo,  yo + 1);
+    T C = tex2D<T>(prev1, xo,  yo - 2);
+    T D = tex2D<T>(prev1, xo,  yo + 0);
+    T E = tex2D<T>(prev1, xo,  yo + 2);
+    T F = tex2D<T>(cur,   xo,  yo - 1);
+    T G = tex2D<T>(cur,   xo,  yo + 1);
+    T H = tex2D<T>(next1, xo,  yo - 2);
+    T I = tex2D<T>(next1, xo,  yo + 0);
+    T J = tex2D<T>(next1, xo,  yo + 2);
+    T K = tex2D<T>(next2, xo,  yo - 1);
+    T L = tex2D<T>(next2, xo,  yo + 1);
+
+    spatial_pred = temporal_predictor(A, B, C, D, E, F, G, H, I, J, K, L,
+                                      spatial_pred, skip_spatial_check);
+
+    dst[yo*dst_pitch+xo] = spatial_pred;
+}
+
+template <typename T>
+__inline__ __device__ void yadif_double(T *dst,
+                                        cudaTextureObject_t prev,
+                                        cudaTextureObject_t cur,
+                                        cudaTextureObject_t next,
+                                        int dst_width, int dst_height, int dst_pitch,
+                                        int src_width, int src_height,
+                                        int parity, int tff, bool skip_spatial_check)
+{
+    int xo = blockIdx.x * blockDim.x + threadIdx.x;
+    int yo = blockIdx.y * blockDim.y + threadIdx.y;
+
+    if (xo >= dst_width || yo >= dst_height) {
+        return;
+    }
+
+    if (yo % 2 == parity) {
+      // Don't modify the primary field
+      dst[yo*dst_pitch+xo] = tex2D<T>(cur, xo, yo);
+      return;
+    }
+
+    T a = tex2D<T>(cur, xo - 3, yo - 1);
+    T b = tex2D<T>(cur, xo - 2, yo - 1);
+    T c = tex2D<T>(cur, xo - 1, yo - 1);
+    T d = tex2D<T>(cur, xo - 0, yo - 1);
+    T e = tex2D<T>(cur, xo + 1, yo - 1);
+    T f = tex2D<T>(cur, xo + 2, yo - 1);
+    T g = tex2D<T>(cur, xo + 3, yo - 1);
+
+    T h = tex2D<T>(cur, xo - 3, yo + 1);
+    T i = tex2D<T>(cur, xo - 2, yo + 1);
+    T j = tex2D<T>(cur, xo - 1, yo + 1);
+    T k = tex2D<T>(cur, xo - 0, yo + 1);
+    T l = tex2D<T>(cur, xo + 1, yo + 1);
+    T m = tex2D<T>(cur, xo + 2, yo + 1);
+    T n = tex2D<T>(cur, xo + 3, yo + 1);
+
+    T spatial_pred;
+    spatial_pred.x =
+        spatial_predictor(a.x, b.x, c.x, d.x, e.x, f.x, g.x, h.x, i.x, j.x, k.x, l.x, m.x, n.x);
+    spatial_pred.y =
+        spatial_predictor(a.y, b.y, c.y, d.y, e.y, f.y, g.y, h.y, i.y, j.y, k.y, l.y, m.y, n.y);
+
+    // Calculate temporal prediction
+    int is_second_field = !(parity ^ tff);
+
+    cudaTextureObject_t prev2 = prev;
+    cudaTextureObject_t prev1 = is_second_field ? cur : prev;
+    cudaTextureObject_t next1 = is_second_field ? next : cur;
+    cudaTextureObject_t next2 = next;
+
+    T A = tex2D<T>(prev2, xo,  yo - 1);
+    T B = tex2D<T>(prev2, xo,  yo + 1);
+    T C = tex2D<T>(prev1, xo,  yo - 2);
+    T D = tex2D<T>(prev1, xo,  yo + 0);
+    T E = tex2D<T>(prev1, xo,  yo + 2);
+    T F = tex2D<T>(cur,   xo,  yo - 1);
+    T G = tex2D<T>(cur,   xo,  yo + 1);
+    T H = tex2D<T>(next1, xo,  yo - 2);
+    T I = tex2D<T>(next1, xo,  yo + 0);
+    T J = tex2D<T>(next1, xo,  yo + 2);
+    T K = tex2D<T>(next2, xo,  yo - 1);
+    T L = tex2D<T>(next2, xo,  yo + 1);
+
+    spatial_pred.x =
+        temporal_predictor(A.x, B.x, C.x, D.x, E.x, F.x, G.x, H.x, I.x, J.x, K.x, L.x,
+                           spatial_pred.x, skip_spatial_check);
+    spatial_pred.y =
+        temporal_predictor(A.y, B.y, C.y, D.y, E.y, F.y, G.y, H.y, I.y, J.y, K.y, L.y,
+                           spatial_pred.y, skip_spatial_check);
+
+    dst[yo*dst_pitch+xo] = spatial_pred;
+}
+
+extern "C" {
+
+__global__ void yadif_uchar(unsigned char *dst,
+                            cudaTextureObject_t prev,
+                            cudaTextureObject_t cur,
+                            cudaTextureObject_t next,
+                            int dst_width, int dst_height, int dst_pitch,
+                            int src_width, int src_height,
+                            int parity, int tff, bool skip_spatial_check)
+{
+    yadif_single(dst, prev, cur, next,
+                 dst_width, dst_height, dst_pitch,
+                 src_width, src_height,
+                 parity, tff, skip_spatial_check);
+}
+
+__global__ void yadif_ushort(unsigned short *dst,
+                            cudaTextureObject_t prev,
+                            cudaTextureObject_t cur,
+                            cudaTextureObject_t next,
+                            int dst_width, int dst_height, int dst_pitch,
+                            int src_width, int src_height,
+                            int parity, int tff, bool skip_spatial_check)
+{
+    yadif_single(dst, prev, cur, next,
+                 dst_width, dst_height, dst_pitch,
+                 src_width, src_height,
+                 parity, tff, skip_spatial_check);
+}
+
+__global__ void yadif_uchar2(uchar2 *dst,
+                            cudaTextureObject_t prev,
+                            cudaTextureObject_t cur,
+                            cudaTextureObject_t next,
+                            int dst_width, int dst_height, int dst_pitch,
+                            int src_width, int src_height,
+                            int parity, int tff, bool skip_spatial_check)
+{
+    yadif_double(dst, prev, cur, next,
+                 dst_width, dst_height, dst_pitch,
+                 src_width, src_height,
+                 parity, tff, skip_spatial_check);
+}
+
+__global__ void yadif_ushort2(ushort2 *dst,
+                            cudaTextureObject_t prev,
+                            cudaTextureObject_t cur,
+                            cudaTextureObject_t next,
+                            int dst_width, int dst_height, int dst_pitch,
+                            int src_width, int src_height,
+                            int parity, int tff, bool skip_spatial_check)
+{
+    yadif_double(dst, prev, cur, next,
+                 dst_width, dst_height, dst_pitch,
+                 src_width, src_height,
+                 parity, tff, skip_spatial_check);
+}
+
+} /* extern "C" */
diff --git a/libavfilter/vf_zscale.c b/libavfilter/vf_zscale.c
index 6e1d36cb4cf23..f0309272fa90f 100644
--- a/libavfilter/vf_zscale.c
+++ b/libavfilter/vf_zscale.c
@@ -859,13 +859,7 @@ static const AVOption zscale_options[] = {
     { NULL }
 };
 
-static const AVClass zscale_class = {
-    .class_name       = "zscale",
-    .item_name        = av_default_item_name,
-    .option           = zscale_options,
-    .version          = LIBAVUTIL_VERSION_INT,
-    .category         = AV_CLASS_CATEGORY_FILTER,
-};
+AVFILTER_DEFINE_CLASS(zscale);
 
 static const AVFilterPad avfilter_vf_zscale_inputs[] = {
     {
diff --git a/libavfilter/window_func.h b/libavfilter/window_func.h
index a94482c937942..1de8f1fbdb693 100644
--- a/libavfilter/window_func.h
+++ b/libavfilter/window_func.h
@@ -30,6 +30,7 @@ enum WindowFunc     { WFUNC_RECT, WFUNC_HANNING, WFUNC_HAMMING, WFUNC_BLACKMAN,
                       WFUNC_BHARRIS, WFUNC_BNUTTALL, WFUNC_SINE, WFUNC_NUTTALL,
                       WFUNC_BHANN, WFUNC_LANCZOS, WFUNC_GAUSS, WFUNC_TUKEY,
                       WFUNC_DOLPH, WFUNC_CAUCHY, WFUNC_PARZEN, WFUNC_POISSON,
+                      WFUNC_BOHMAN,
                       NB_WFUNC };
 
 static inline void generate_window_func(float *lut, int N, int win_func,
@@ -182,6 +183,14 @@ static inline void generate_window_func(float *lut, int N, int win_func,
         }
         *overlap = 0.75;
         break;
+    case WFUNC_BOHMAN:
+        for (n = 0; n < N; n++) {
+            double x = 2 * ((n / (double)(N - 1))) - 1.;
+
+            lut[n] = (1 - fabs(x)) * cos(M_PI*fabs(x)) + 1./M_PI*sin(M_PI*fabs(x));
+        }
+        *overlap = 0.75;
+        break;
     default:
         av_assert0(0);
     }
diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile
index b484c8bd1c3ff..17499f14da5f0 100644
--- a/libavfilter/x86/Makefile
+++ b/libavfilter/x86/Makefile
@@ -1,4 +1,7 @@
+OBJS-$(CONFIG_SCENE_SAD)                     += x86/scene_sad_init.o
+
 OBJS-$(CONFIG_AFIR_FILTER)                   += x86/af_afir_init.o
+OBJS-$(CONFIG_ANLMDN_FILTER)                 += x86/af_anlmdn_init.o
 OBJS-$(CONFIG_BLEND_FILTER)                  += x86/vf_blend_init.o
 OBJS-$(CONFIG_BWDIF_FILTER)                  += x86/vf_bwdif_init.o
 OBJS-$(CONFIG_COLORSPACE_FILTER)             += x86/colorspacedsp_init.o
@@ -29,7 +32,10 @@ OBJS-$(CONFIG_VOLUME_FILTER)                 += x86/af_volume_init.o
 OBJS-$(CONFIG_W3FDIF_FILTER)                 += x86/vf_w3fdif_init.o
 OBJS-$(CONFIG_YADIF_FILTER)                  += x86/vf_yadif_init.o
 
+X86ASM-OBJS-$(CONFIG_SCENE_SAD)              += x86/scene_sad.o
+
 X86ASM-OBJS-$(CONFIG_AFIR_FILTER)            += x86/af_afir.o
+X86ASM-OBJS-$(CONFIG_ANLMDN_FILTER)          += x86/af_anlmdn.o
 X86ASM-OBJS-$(CONFIG_BLEND_FILTER)           += x86/vf_blend.o
 X86ASM-OBJS-$(CONFIG_BWDIF_FILTER)           += x86/vf_bwdif.o
 X86ASM-OBJS-$(CONFIG_COLORSPACE_FILTER)      += x86/colorspacedsp.o
diff --git a/libavfilter/x86/af_afir.asm b/libavfilter/x86/af_afir.asm
index 849d85e70fba8..2cc09709a2772 100644
--- a/libavfilter/x86/af_afir.asm
+++ b/libavfilter/x86/af_afir.asm
@@ -27,10 +27,9 @@ SECTION .text
 ; void ff_fcmul_add(float *sum, const float *t, const float *c, int len)
 ;------------------------------------------------------------------------------
 
-INIT_XMM sse3
+%macro FCMUL_ADD 0
 cglobal fcmul_add, 4,4,6, sum, t, c, len
     shl       lend, 3
-    add       lend, mmsize*2
     add         tq, lenq
     add         cq, lenq
     add       sumq, lenq
@@ -41,20 +40,30 @@ ALIGN 16
     movsldup  m3, [tq + lenq+mmsize]
     movaps    m1, [cq + lenq]
     movaps    m4, [cq + lenq+mmsize]
-    mulps     m0, m1
-    mulps     m3, m4
-    shufps    m1, m1, 0xb1
-    shufps    m4, m4, 0xb1
+    mulps     m0, m0, m1
+    mulps     m3, m3, m4
+    shufps    m1, m1, m1, 0xb1
+    shufps    m4, m4, m4, 0xb1
     movshdup  m2, [tq + lenq]
     movshdup  m5, [tq + lenq+mmsize]
-    mulps     m2, m1
-    mulps     m5, m4
-    addsubps  m0, m2
-    addsubps  m3, m5
-    addps     m0, [sumq + lenq]
-    addps     m3, [sumq + lenq+mmsize]
+    mulps     m2, m2, m1
+    mulps     m5, m5, m4
+    addsubps  m0, m0, m2
+    addsubps  m3, m3, m5
+    addps     m0, m0, [sumq + lenq]
+    addps     m3, m3, [sumq + lenq+mmsize]
     movaps    [sumq + lenq], m0
     movaps    [sumq + lenq+mmsize], m3
     add       lenq, mmsize*2
     jl .loop
-    REP_RET
+    movss xm0, [tq + lenq]
+    mulss xm0, [cq + lenq]
+    addss xm0, [sumq + lenq]
+    movss [sumq + lenq], xm0
+    RET
+%endmacro
+
+INIT_XMM sse3
+FCMUL_ADD
+INIT_YMM avx
+FCMUL_ADD
diff --git a/libavfilter/x86/af_afir_init.c b/libavfilter/x86/af_afir_init.c
index 6a652b9b83529..c37212c3812f0 100644
--- a/libavfilter/x86/af_afir_init.c
+++ b/libavfilter/x86/af_afir_init.c
@@ -24,12 +24,17 @@
 
 void ff_fcmul_add_sse3(float *sum, const float *t, const float *c,
                        ptrdiff_t len);
+void ff_fcmul_add_avx(float *sum, const float *t, const float *c,
+                      ptrdiff_t len);
 
-av_cold void ff_afir_init_x86(AudioFIRContext *s)
+av_cold void ff_afir_init_x86(AudioFIRDSPContext *s)
 {
     int cpu_flags = av_get_cpu_flags();
 
     if (EXTERNAL_SSE3(cpu_flags)) {
         s->fcmul_add = ff_fcmul_add_sse3;
     }
+    if (EXTERNAL_AVX_FAST(cpu_flags)) {
+        s->fcmul_add = ff_fcmul_add_avx;
+    }
 }
diff --git a/libavfilter/x86/af_anlmdn.asm b/libavfilter/x86/af_anlmdn.asm
new file mode 100644
index 0000000000000..7986cf443cf9a
--- /dev/null
+++ b/libavfilter/x86/af_anlmdn.asm
@@ -0,0 +1,80 @@
+;*****************************************************************************
+;* x86-optimized functions for anlmdn filter
+;* Copyright (c) 2017 Paul B Mahol
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION .text
+
+;------------------------------------------------------------------------------
+; float ff_compute_distance_ssd(float *f1, const float *f2, ptrdiff_t len)
+;------------------------------------------------------------------------------
+
+INIT_XMM sse
+cglobal compute_distance_ssd, 3,5,3, f1, f2, len, r, x
+    mov       xq, lenq
+    shl       xq, 2
+    neg       xq
+    add       f1q, xq
+    add       f2q, xq
+    xor       xq, xq
+    shl       lenq, 1
+    add       lenq, 1
+    shl       lenq, 2
+    mov       rq, lenq
+    and       rq, mmsize - 1
+    xorps     m0, m0
+    cmp       lenq, mmsize
+    jl .loop1
+    sub       lenq, rq
+ALIGN 16
+    .loop0:
+        movups    m1, [f1q + xq]
+        movups    m2, [f2q + xq]
+        subps     m1, m2
+        mulps     m1, m1
+        addps     m0, m1
+        add       xq, mmsize
+        cmp       xq, lenq
+        jl .loop0
+
+    movhlps   xmm1, xmm0
+    addps     xmm0, xmm1
+    movss     xmm1, xmm0
+    shufps    xmm0, xmm0, 1
+    addss     xmm0, xmm1
+
+    cmp       rq, 0
+    je .end
+    add       lenq, rq
+    .loop1:
+        movss    xm1, [f1q + xq]
+        subss    xm1, [f2q + xq]
+        mulss    xm1, xm1
+        addss    xm0, xm1
+        add       xq, 4
+        cmp       xq, lenq
+        jl .loop1
+    .end:
+%if ARCH_X86_64 == 0
+    movss     r0m, xm0
+    fld dword r0m
+%endif
+    RET
diff --git a/libavfilter/x86/af_anlmdn_init.c b/libavfilter/x86/af_anlmdn_init.c
new file mode 100644
index 0000000000000..30eff6f644769
--- /dev/null
+++ b/libavfilter/x86/af_anlmdn_init.c
@@ -0,0 +1,35 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavfilter/af_anlmdndsp.h"
+
+float ff_compute_distance_ssd_sse(const float *f1, const float *f2,
+                                  ptrdiff_t len);
+
+av_cold void ff_anlmdn_init_x86(AudioNLMDNDSPContext *s)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (EXTERNAL_SSE(cpu_flags)) {
+        s->compute_distance_ssd = ff_compute_distance_ssd_sse;
+    }
+}
diff --git a/libavfilter/x86/scene_sad.asm b/libavfilter/x86/scene_sad.asm
new file mode 100644
index 0000000000000..d38d71ccca1f1
--- /dev/null
+++ b/libavfilter/x86/scene_sad.asm
@@ -0,0 +1,74 @@
+;*****************************************************************************
+;* x86-optimized functions for scene SAD
+;*
+;* Copyright (C) 2018 Marton Balint
+;*
+;* Based on vf_blend.asm, Copyright (C) 2015 Paul B Mahol
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION .text
+
+
+%macro SAD_INIT 0
+cglobal scene_sad, 6, 7, 2, src1, stride1, src2, stride2, width, end, x
+    add     src1q, widthq
+    add     src2q, widthq
+    neg    widthq
+    pxor       m1, m1
+%endmacro
+
+
+%macro SAD_LOOP 0
+.nextrow:
+    mov        xq, widthq
+
+    .loop:
+        movu            m0, [src1q + xq]
+        psadbw          m0, [src2q + xq]
+        paddq           m1, m0
+        add             xq, mmsize
+    jl .loop
+    add     src1q, stride1q
+    add     src2q, stride2q
+    sub      endd, 1
+    jg .nextrow
+
+    mov         r0q, r6mp
+    movu      [r0q], m1      ; sum
+REP_RET
+%endmacro
+
+
+%macro SAD_FRAMES 0
+    SAD_INIT
+    SAD_LOOP
+%endmacro
+
+
+INIT_XMM sse2
+SAD_FRAMES
+
+%if HAVE_AVX2_EXTERNAL
+
+INIT_YMM avx2
+SAD_FRAMES
+
+%endif
diff --git a/libavfilter/x86/scene_sad_init.c b/libavfilter/x86/scene_sad_init.c
new file mode 100644
index 0000000000000..f8104dcb4f8ef
--- /dev/null
+++ b/libavfilter/x86/scene_sad_init.c
@@ -0,0 +1,60 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavfilter/scene_sad.h"
+
+#define SCENE_SAD_FUNC(FUNC_NAME, ASM_FUNC_NAME, MMSIZE)                      \
+void ASM_FUNC_NAME(SCENE_SAD_PARAMS);                                         \
+                                                                              \
+static void FUNC_NAME(SCENE_SAD_PARAMS) {                                     \
+    uint64_t sad[MMSIZE / 8] = {0};                                           \
+    ptrdiff_t awidth = width & ~(MMSIZE - 1);                                 \
+    *sum = 0;                                                                 \
+    ASM_FUNC_NAME(src1, stride1, src2, stride2, awidth, height, sad);         \
+    for (int i = 0; i < MMSIZE / 8; i++)                                      \
+        *sum += sad[i];                                                       \
+    ff_scene_sad_c(src1 + awidth, stride1,                                    \
+                   src2 + awidth, stride2,                                    \
+                   width - awidth, height, sad);                              \
+    *sum += sad[0];                                                           \
+}
+
+#if HAVE_X86ASM
+SCENE_SAD_FUNC(scene_sad_sse2, ff_scene_sad_sse2, 16);
+#if HAVE_AVX2_EXTERNAL
+SCENE_SAD_FUNC(scene_sad_avx2, ff_scene_sad_avx2, 32);
+#endif
+#endif
+
+ff_scene_sad_fn ff_scene_sad_get_fn_x86(int depth)
+{
+#if HAVE_X86ASM
+    int cpu_flags = av_get_cpu_flags();
+    if (depth == 8) {
+#if HAVE_AVX2_EXTERNAL
+        if (EXTERNAL_AVX2_FAST(cpu_flags))
+            return scene_sad_avx2;
+#endif
+        if (EXTERNAL_SSE2(cpu_flags))
+            return scene_sad_sse2;
+    }
+#endif
+    return NULL;
+}
diff --git a/libavfilter/x86/vf_blend_init.c b/libavfilter/x86/vf_blend_init.c
index acf28559ff7be..84bc55cc87735 100644
--- a/libavfilter/x86/vf_blend_init.c
+++ b/libavfilter/x86/vf_blend_init.c
@@ -100,11 +100,11 @@ BLEND_FUNC(xor_16, sse2)
 BLEND_FUNC(xor_16, avx2)
 #endif /* ARCH_X86_64 */
 
-av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
+av_cold void ff_blend_init_x86(FilterParams *param, int depth)
 {
     int cpu_flags = av_get_cpu_flags();
 
-    if (!is_16bit) {
+    if (depth == 8) {
         if (EXTERNAL_SSE2(cpu_flags) && param->opacity == 1) {
             switch (param->mode) {
             case BLEND_ADDITION:     param->blend = ff_blend_addition_sse2;     break;
@@ -156,7 +156,7 @@ av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
             case BLEND_NEGATION:     param->blend = ff_blend_negation_avx2;     break;
             }
         }
-    } else { /* is_16_bit */
+    } else if (depth == 16) {
 #if ARCH_X86_64
         if (EXTERNAL_SSE2(cpu_flags) && param->opacity == 1) {
             switch (param->mode) {
diff --git a/libavfilter/x86/vf_bwdif_init.c b/libavfilter/x86/vf_bwdif_init.c
index 1cb8438e5f6f3..b1e70b3bc680d 100644
--- a/libavfilter/x86/vf_bwdif_init.c
+++ b/libavfilter/x86/vf_bwdif_init.c
@@ -53,8 +53,9 @@ void ff_bwdif_filter_line_12bit_ssse3(void *dst, void *prev, void *cur, void *ne
 
 av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif)
 {
+    YADIFContext *yadif = &bwdif->yadif;
     int cpu_flags = av_get_cpu_flags();
-    int bit_depth = (!bwdif->csp) ? 8 : bwdif->csp->comp[0].depth;
+    int bit_depth = (!yadif->csp) ? 8 : yadif->csp->comp[0].depth;
 
     if (bit_depth <= 8) {
 #if ARCH_X86_32
diff --git a/libavfilter/yadif.h b/libavfilter/yadif.h
index d23d1380d0089..c928911b357b7 100644
--- a/libavfilter/yadif.h
+++ b/libavfilter/yadif.h
@@ -19,6 +19,7 @@
 #ifndef AVFILTER_YADIF_H
 #define AVFILTER_YADIF_H
 
+#include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
 #include "avfilter.h"
 
@@ -40,6 +41,12 @@ enum YADIFDeint {
     YADIF_DEINT_INTERLACED = 1, ///< only deinterlace frames marked as interlaced
 };
 
+enum YADIFCurrentField {
+    YADIF_FIELD_BACK_END = -1, ///< The last frame in a sequence
+    YADIF_FIELD_END      =  0, ///< The first or last field in a sequence
+    YADIF_FIELD_NORMAL   =  1, ///< A normal field in the middle of a sequence
+};
+
 typedef struct YADIFContext {
     const AVClass *class;
 
@@ -54,6 +61,8 @@ typedef struct YADIFContext {
     AVFrame *prev;
     AVFrame *out;
 
+    void (*filter)(AVFilterContext *ctx, AVFrame *dstpic, int parity, int tff);
+
     /**
      * Required alignment for filter_line
      */
@@ -67,8 +76,22 @@ typedef struct YADIFContext {
     int eof;
     uint8_t *temp_line;
     int temp_line_size;
+
+    /*
+     * An algorithm that treats first and/or last fields in a sequence
+     * differently can use this to detect those cases. It is the algorithm's
+     * responsibility to set the value to YADIF_FIELD_NORMAL after processing
+     * the first field.
+     */
+    int current_field;  ///< YADIFCurrentField
 } YADIFContext;
 
 void ff_yadif_init_x86(YADIFContext *yadif);
 
+int ff_yadif_filter_frame(AVFilterLink *link, AVFrame *frame);
+
+int ff_yadif_request_frame(AVFilterLink *link);
+
+extern const AVOption ff_yadif_options[];
+
 #endif /* AVFILTER_YADIF_H */
diff --git a/libavfilter/yadif_common.c b/libavfilter/yadif_common.c
new file mode 100644
index 0000000000000..a10cf7a17fd8a
--- /dev/null
+++ b/libavfilter/yadif_common.c
@@ -0,0 +1,215 @@
+/*
+ * Copyright (C) 2006-2011 Michael Niedermayer <michaelni@gmx.at>
+ *               2010      James Darnley <james.darnley@gmail.com>
+
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/imgutils.h"
+#include "internal.h"
+#include "yadif.h"
+
+static int return_frame(AVFilterContext *ctx, int is_second)
+{
+    YADIFContext *yadif = ctx->priv;
+    AVFilterLink *link  = ctx->outputs[0];
+    int tff, ret;
+
+    if (yadif->parity == -1) {
+        tff = yadif->cur->interlaced_frame ?
+              yadif->cur->top_field_first : 1;
+    } else {
+        tff = yadif->parity ^ 1;
+    }
+
+    if (is_second) {
+        yadif->out = ff_get_video_buffer(link, link->w, link->h);
+        if (!yadif->out)
+            return AVERROR(ENOMEM);
+
+        av_frame_copy_props(yadif->out, yadif->cur);
+        yadif->out->interlaced_frame = 0;
+        if (yadif->current_field == YADIF_FIELD_BACK_END)
+            yadif->current_field = YADIF_FIELD_END;
+    }
+
+    yadif->filter(ctx, yadif->out, tff ^ !is_second, tff);
+
+    if (is_second) {
+        int64_t cur_pts  = yadif->cur->pts;
+        int64_t next_pts = yadif->next->pts;
+
+        if (next_pts != AV_NOPTS_VALUE && cur_pts != AV_NOPTS_VALUE) {
+            yadif->out->pts = cur_pts + next_pts;
+        } else {
+            yadif->out->pts = AV_NOPTS_VALUE;
+        }
+    }
+    ret = ff_filter_frame(ctx->outputs[0], yadif->out);
+
+    yadif->frame_pending = (yadif->mode&1) && !is_second;
+    return ret;
+}
+
+static int checkstride(YADIFContext *yadif, const AVFrame *a, const AVFrame *b)
+{
+    int i;
+    for (i = 0; i < yadif->csp->nb_components; i++)
+        if (a->linesize[i] != b->linesize[i])
+            return 1;
+    return 0;
+}
+
+static void fixstride(AVFilterLink *link, AVFrame *f)
+{
+    AVFrame *dst = ff_default_get_video_buffer(link, f->width, f->height);
+    if(!dst)
+        return;
+    av_frame_copy_props(dst, f);
+    av_image_copy(dst->data, dst->linesize,
+                  (const uint8_t **)f->data, f->linesize,
+                  dst->format, dst->width, dst->height);
+    av_frame_unref(f);
+    av_frame_move_ref(f, dst);
+    av_frame_free(&dst);
+}
+
+int ff_yadif_filter_frame(AVFilterLink *link, AVFrame *frame)
+{
+    AVFilterContext *ctx = link->dst;
+    YADIFContext *yadif = ctx->priv;
+
+    av_assert0(frame);
+
+    if (yadif->frame_pending)
+        return_frame(ctx, 1);
+
+    if (yadif->prev)
+        av_frame_free(&yadif->prev);
+    yadif->prev = yadif->cur;
+    yadif->cur  = yadif->next;
+    yadif->next = frame;
+
+    if (!yadif->cur) {
+        yadif->cur = av_frame_clone(yadif->next);
+        if (!yadif->cur)
+            return AVERROR(ENOMEM);
+        yadif->current_field = YADIF_FIELD_END;
+    }
+
+    if (checkstride(yadif, yadif->next, yadif->cur)) {
+        av_log(ctx, AV_LOG_VERBOSE, "Reallocating frame due to differing stride\n");
+        fixstride(link, yadif->next);
+    }
+    if (checkstride(yadif, yadif->next, yadif->cur))
+        fixstride(link, yadif->cur);
+    if (yadif->prev && checkstride(yadif, yadif->next, yadif->prev))
+        fixstride(link, yadif->prev);
+    if (checkstride(yadif, yadif->next, yadif->cur) || (yadif->prev && checkstride(yadif, yadif->next, yadif->prev))) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to reallocate frame\n");
+        return -1;
+    }
+
+    if (!yadif->prev)
+        return 0;
+
+    if ((yadif->deint && !yadif->cur->interlaced_frame) ||
+        ctx->is_disabled ||
+        (yadif->deint && !yadif->prev->interlaced_frame && yadif->prev->repeat_pict) ||
+        (yadif->deint && !yadif->next->interlaced_frame && yadif->next->repeat_pict)
+    ) {
+        yadif->out  = av_frame_clone(yadif->cur);
+        if (!yadif->out)
+            return AVERROR(ENOMEM);
+
+        av_frame_free(&yadif->prev);
+        if (yadif->out->pts != AV_NOPTS_VALUE)
+            yadif->out->pts *= 2;
+        return ff_filter_frame(ctx->outputs[0], yadif->out);
+    }
+
+    yadif->out = ff_get_video_buffer(ctx->outputs[0], link->w, link->h);
+    if (!yadif->out)
+        return AVERROR(ENOMEM);
+
+    av_frame_copy_props(yadif->out, yadif->cur);
+    yadif->out->interlaced_frame = 0;
+
+    if (yadif->out->pts != AV_NOPTS_VALUE)
+        yadif->out->pts *= 2;
+
+    return return_frame(ctx, 0);
+}
+
+int ff_yadif_request_frame(AVFilterLink *link)
+{
+    AVFilterContext *ctx = link->src;
+    YADIFContext *yadif = ctx->priv;
+    int ret;
+
+    if (yadif->frame_pending) {
+        return_frame(ctx, 1);
+        return 0;
+    }
+
+    if (yadif->eof)
+        return AVERROR_EOF;
+
+    ret  = ff_request_frame(ctx->inputs[0]);
+
+    if (ret == AVERROR_EOF && yadif->cur) {
+        AVFrame *next = av_frame_clone(yadif->next);
+
+        if (!next)
+            return AVERROR(ENOMEM);
+
+        yadif->current_field = YADIF_FIELD_BACK_END;
+        next->pts = yadif->next->pts * 2 - yadif->cur->pts;
+
+        ff_yadif_filter_frame(ctx->inputs[0], next);
+        yadif->eof = 1;
+    } else if (ret < 0) {
+        return ret;
+    }
+
+    return 0;
+}
+
+#define OFFSET(x) offsetof(YADIFContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+#define CONST(name, help, val, unit) { name, help, 0, AV_OPT_TYPE_CONST, {.i64=val}, INT_MIN, INT_MAX, FLAGS, unit }
+
+const AVOption ff_yadif_options[] = {
+    { "mode",   "specify the interlacing mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64=YADIF_MODE_SEND_FRAME}, 0, 3, FLAGS, "mode"},
+    CONST("send_frame",           "send one frame for each frame",                                     YADIF_MODE_SEND_FRAME,           "mode"),
+    CONST("send_field",           "send one frame for each field",                                     YADIF_MODE_SEND_FIELD,           "mode"),
+    CONST("send_frame_nospatial", "send one frame for each frame, but skip spatial interlacing check", YADIF_MODE_SEND_FRAME_NOSPATIAL, "mode"),
+    CONST("send_field_nospatial", "send one frame for each field, but skip spatial interlacing check", YADIF_MODE_SEND_FIELD_NOSPATIAL, "mode"),
+
+    { "parity", "specify the assumed picture field parity", OFFSET(parity), AV_OPT_TYPE_INT, {.i64=YADIF_PARITY_AUTO}, -1, 1, FLAGS, "parity" },
+    CONST("tff",  "assume top field first",    YADIF_PARITY_TFF,  "parity"),
+    CONST("bff",  "assume bottom field first", YADIF_PARITY_BFF,  "parity"),
+    CONST("auto", "auto detect parity",        YADIF_PARITY_AUTO, "parity"),
+
+    { "deint", "specify which frames to deinterlace", OFFSET(deint), AV_OPT_TYPE_INT, {.i64=YADIF_DEINT_ALL}, 0, 1, FLAGS, "deint" },
+    CONST("all",        "deinterlace all frames",                       YADIF_DEINT_ALL,         "deint"),
+    CONST("interlaced", "only deinterlace frames marked as interlaced", YADIF_DEINT_INTERLACED,  "deint"),
+
+    { NULL }
+};
diff --git a/libavformat/Makefile b/libavformat/Makefile
index e99e9150d586f..c010fc83f977a 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -149,6 +149,7 @@ OBJS-$(CONFIG_DAUD_DEMUXER)              += dauddec.o
 OBJS-$(CONFIG_DAUD_MUXER)                += daudenc.o
 OBJS-$(CONFIG_DCSTR_DEMUXER)             += dcstr.o
 OBJS-$(CONFIG_DFA_DEMUXER)               += dfa.o
+OBJS-$(CONFIG_DHAV_DEMUXER)              += dhav.o
 OBJS-$(CONFIG_DIRAC_DEMUXER)             += diracdec.o rawdec.o
 OBJS-$(CONFIG_DIRAC_MUXER)               += rawenc.o
 OBJS-$(CONFIG_DNXHD_DEMUXER)             += dnxhddec.o rawdec.o
@@ -161,8 +162,8 @@ OBJS-$(CONFIG_DTS_DEMUXER)               += dtsdec.o rawdec.o
 OBJS-$(CONFIG_DTS_MUXER)                 += rawenc.o
 OBJS-$(CONFIG_DV_DEMUXER)                += dv.o
 OBJS-$(CONFIG_DV_MUXER)                  += dvenc.o
-OBJS-$(CONFIG_DVBSUB_DEMUXER)            += dvbsub.o
-OBJS-$(CONFIG_DVBTXT_DEMUXER)            += dvbtxt.o
+OBJS-$(CONFIG_DVBSUB_DEMUXER)            += dvbsub.o rawdec.o
+OBJS-$(CONFIG_DVBTXT_DEMUXER)            += dvbtxt.o rawdec.o
 OBJS-$(CONFIG_DXA_DEMUXER)               += dxa.o
 OBJS-$(CONFIG_EA_CDATA_DEMUXER)          += eacdata.o
 OBJS-$(CONFIG_EA_DEMUXER)                += electronicarts.o
@@ -218,6 +219,7 @@ OBJS-$(CONFIG_H263_MUXER)                += rawenc.o
 OBJS-$(CONFIG_H264_DEMUXER)              += h264dec.o rawdec.o
 OBJS-$(CONFIG_H264_MUXER)                += rawenc.o
 OBJS-$(CONFIG_HASH_MUXER)                += hashenc.o
+OBJS-$(CONFIG_HCOM_DEMUXER)              += hcom.o
 OBJS-$(CONFIG_HDS_MUXER)                 += hdsenc.o
 OBJS-$(CONFIG_HEVC_DEMUXER)              += hevcdec.o rawdec.o
 OBJS-$(CONFIG_HEVC_MUXER)                += rawenc.o
@@ -241,6 +243,7 @@ OBJS-$(CONFIG_IMAGE_BMP_PIPE_DEMUXER)     += img2dec.o img2.o
 OBJS-$(CONFIG_IMAGE_DDS_PIPE_DEMUXER)     += img2dec.o img2.o
 OBJS-$(CONFIG_IMAGE_DPX_PIPE_DEMUXER)     += img2dec.o img2.o
 OBJS-$(CONFIG_IMAGE_EXR_PIPE_DEMUXER)     += img2dec.o img2.o
+OBJS-$(CONFIG_IMAGE_GIF_PIPE_DEMUXER)     += img2dec.o img2.o
 OBJS-$(CONFIG_IMAGE_J2K_PIPE_DEMUXER)     += img2dec.o img2.o
 OBJS-$(CONFIG_IMAGE_JPEG_PIPE_DEMUXER)    += img2dec.o img2.o
 OBJS-$(CONFIG_IMAGE_JPEGLS_PIPE_DEMUXER)  += img2dec.o img2.o
@@ -411,6 +414,8 @@ OBJS-$(CONFIG_PCM_U32LE_DEMUXER)         += pcmdec.o pcm.o
 OBJS-$(CONFIG_PCM_U32LE_MUXER)           += pcmenc.o rawenc.o
 OBJS-$(CONFIG_PCM_U8_DEMUXER)            += pcmdec.o pcm.o
 OBJS-$(CONFIG_PCM_U8_MUXER)              += pcmenc.o rawenc.o
+OBJS-$(CONFIG_PCM_VIDC_DEMUXER)          += pcmdec.o pcm.o
+OBJS-$(CONFIG_PCM_VIDC_MUXER)            += pcmenc.o rawenc.o
 OBJS-$(CONFIG_PJS_DEMUXER)               += pjsdec.o subtitles.o
 OBJS-$(CONFIG_PMP_DEMUXER)               += pmpdec.o
 OBJS-$(CONFIG_PVA_DEMUXER)               += pva.o
@@ -518,6 +523,7 @@ OBJS-$(CONFIG_VC1_DEMUXER)               += rawdec.o vc1dec.o
 OBJS-$(CONFIG_VC1_MUXER)                 += rawenc.o
 OBJS-$(CONFIG_VC1T_DEMUXER)              += vc1test.o
 OBJS-$(CONFIG_VC1T_MUXER)                += vc1testenc.o
+OBJS-$(CONFIG_VIVIDAS_DEMUXER)           += vividas.o
 OBJS-$(CONFIG_VIVO_DEMUXER)              += vivo.o
 OBJS-$(CONFIG_VMD_DEMUXER)               += sierravmd.o
 OBJS-$(CONFIG_VOBSUB_DEMUXER)            += subtitles.o # mpeg demuxer is in the dependencies
diff --git a/libavformat/ac3dec.c b/libavformat/ac3dec.c
index 6f423ff7eb2d7..2718061bdc570 100644
--- a/libavformat/ac3dec.c
+++ b/libavformat/ac3dec.c
@@ -47,7 +47,7 @@ static int ac3_eac3_probe(AVProbeData *p, enum AVCodecID expected_codec_id)
             uint16_t frame_size;
             int i, ret;
 
-            if(!memcmp(buf2, "\x1\x10\0\0\0\0\0\0", 8)) {
+            if(!memcmp(buf2, "\x1\x10", 2)) {
                 if (buf2 + 16 > end)
                     break;
                 buf2+=16;
diff --git a/libavformat/acm.c b/libavformat/acm.c
index 08dd9282fa4a6..dcf8778dc792f 100644
--- a/libavformat/acm.c
+++ b/libavformat/acm.c
@@ -44,12 +44,9 @@ static int acm_read_header(AVFormatContext *s)
     st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
     st->codecpar->codec_id   = AV_CODEC_ID_INTERPLAY_ACM;
 
-    ff_alloc_extradata(st->codecpar, 14);
-    if (!st->codecpar->extradata)
-        return AVERROR(ENOMEM);
-    ret = avio_read(s->pb, st->codecpar->extradata, 14);
-    if (ret < 10)
-        return ret < 0 ? ret : AVERROR_EOF;
+    ret = ff_get_extradata(s, st->codecpar, s->pb, 14);
+    if (ret < 0)
+        return ret;
 
     st->codecpar->channels    = AV_RL16(st->codecpar->extradata +  8);
     st->codecpar->sample_rate = AV_RL16(st->codecpar->extradata + 10);
diff --git a/libavformat/allformats.c b/libavformat/allformats.c
index 9e41718685239..06844986f335a 100644
--- a/libavformat/allformats.c
+++ b/libavformat/allformats.c
@@ -110,6 +110,7 @@ extern AVInputFormat  ff_daud_demuxer;
 extern AVOutputFormat ff_daud_muxer;
 extern AVInputFormat  ff_dcstr_demuxer;
 extern AVInputFormat  ff_dfa_demuxer;
+extern AVInputFormat  ff_dhav_demuxer;
 extern AVInputFormat  ff_dirac_demuxer;
 extern AVOutputFormat ff_dirac_muxer;
 extern AVInputFormat  ff_dnxhd_demuxer;
@@ -175,6 +176,7 @@ extern AVOutputFormat ff_h263_muxer;
 extern AVInputFormat  ff_h264_demuxer;
 extern AVOutputFormat ff_h264_muxer;
 extern AVOutputFormat ff_hash_muxer;
+extern AVInputFormat  ff_hcom_demuxer;
 extern AVOutputFormat ff_hds_muxer;
 extern AVInputFormat  ff_hevc_demuxer;
 extern AVOutputFormat ff_hevc_muxer;
@@ -289,6 +291,8 @@ extern AVInputFormat  ff_pcm_alaw_demuxer;
 extern AVOutputFormat ff_pcm_alaw_muxer;
 extern AVInputFormat  ff_pcm_mulaw_demuxer;
 extern AVOutputFormat ff_pcm_mulaw_muxer;
+extern AVInputFormat  ff_pcm_vidc_demuxer;
+extern AVOutputFormat ff_pcm_vidc_muxer;
 extern AVInputFormat  ff_pcm_f64be_demuxer;
 extern AVOutputFormat ff_pcm_f64be_muxer;
 extern AVInputFormat  ff_pcm_f64le_demuxer;
@@ -419,6 +423,7 @@ extern AVInputFormat  ff_vc1_demuxer;
 extern AVOutputFormat ff_vc1_muxer;
 extern AVInputFormat  ff_vc1t_demuxer;
 extern AVOutputFormat ff_vc1t_muxer;
+extern AVInputFormat  ff_vividas_demuxer;
 extern AVInputFormat  ff_vivo_demuxer;
 extern AVInputFormat  ff_vmd_demuxer;
 extern AVInputFormat  ff_vobsub_demuxer;
@@ -460,6 +465,7 @@ extern AVInputFormat  ff_image_bmp_pipe_demuxer;
 extern AVInputFormat  ff_image_dds_pipe_demuxer;
 extern AVInputFormat  ff_image_dpx_pipe_demuxer;
 extern AVInputFormat  ff_image_exr_pipe_demuxer;
+extern AVInputFormat  ff_image_gif_pipe_demuxer;
 extern AVInputFormat  ff_image_j2k_pipe_demuxer;
 extern AVInputFormat  ff_image_jpeg_pipe_demuxer;
 extern AVInputFormat  ff_image_jpegls_pipe_demuxer;
diff --git a/libavformat/anm.c b/libavformat/anm.c
index b31757ab0adf3..a5ad2fd0ef250 100644
--- a/libavformat/anm.c
+++ b/libavformat/anm.c
@@ -132,12 +132,7 @@ static int read_header(AVFormatContext *s)
     avio_skip(pb, 58);
 
     /* color cycling and palette data */
-    st->codecpar->extradata_size = 16*8 + 4*256;
-    st->codecpar->extradata      = av_mallocz(st->codecpar->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
-    if (!st->codecpar->extradata) {
-        return AVERROR(ENOMEM);
-    }
-    ret = avio_read(pb, st->codecpar->extradata, st->codecpar->extradata_size);
+    ret = ff_get_extradata(s, st->codecpar, s->pb, 16*8 + 4*256);
     if (ret < 0)
         return ret;
 
diff --git a/libavformat/apngdec.c b/libavformat/apngdec.c
index f9a97e56813e1..53cdd4538e010 100644
--- a/libavformat/apngdec.c
+++ b/libavformat/apngdec.c
@@ -342,6 +342,10 @@ static int apng_read_packet(AVFormatContext *s, AVPacket *pkt)
 
     len = avio_rb32(pb);
     tag = avio_rl32(pb);
+
+    if (avio_feof(pb))
+        return AVERROR_EOF;
+
     switch (tag) {
     case MKTAG('f', 'c', 'T', 'L'):
         if (len != 26)
diff --git a/libavformat/asf.h b/libavformat/asf.h
index 1a5b33840f4c0..a30d3bb23b504 100644
--- a/libavformat/asf.h
+++ b/libavformat/asf.h
@@ -166,6 +166,4 @@ extern const AVMetadataConv ff_asf_metadata_conv[];
 
 #define ASF_PL_FLAG_KEY_FRAME 0x80 //1000 0000
 
-extern AVInputFormat ff_asf_demuxer;
-
 #endif /* AVFORMAT_ASF_H */
diff --git a/libavformat/async.c b/libavformat/async.c
index 54dbd2312a24c..4e295b5e10fb0 100644
--- a/libavformat/async.c
+++ b/libavformat/async.c
@@ -142,7 +142,7 @@ static int ring_size_of_read_back(RingBuffer *ring)
 static int ring_drain(RingBuffer *ring, int offset)
 {
     av_assert2(offset >= -ring_size_of_read_back(ring));
-    av_assert2(offset <= -ring_size(ring));
+    av_assert2(offset <= ring_size(ring));
     ring->read_pos += offset;
     return 0;
 }
diff --git a/libavformat/au.c b/libavformat/au.c
index 520824fc123ac..0b2b7eac15ac1 100644
--- a/libavformat/au.c
+++ b/libavformat/au.c
@@ -140,7 +140,7 @@ static int au_read_header(AVFormatContext *s)
     unsigned int tag;
     AVIOContext *pb = s->pb;
     unsigned int id, channels, rate;
-    int bps;
+    int bps, ba = 0;
     enum AVCodecID codec;
     AVStream *st;
 
@@ -178,6 +178,7 @@ static int au_read_header(AVFormatContext *s)
         } else {
             const uint8_t bpcss[] = {4, 0, 3, 5};
             av_assert0(id >= 23 && id < 23 + 4);
+            ba = bpcss[id - 23];
             bps = bpcss[id - 23];
         }
     } else if (!bps) {
@@ -205,7 +206,7 @@ static int au_read_header(AVFormatContext *s)
     st->codecpar->sample_rate = rate;
     st->codecpar->bits_per_coded_sample = bps;
     st->codecpar->bit_rate    = channels * rate * bps;
-    st->codecpar->block_align = FFMAX(bps * st->codecpar->channels / 8, 1);
+    st->codecpar->block_align = ba ? ba : FFMAX(bps * st->codecpar->channels / 8, 1);
     if (data_size != AU_UNKNOWN_SIZE)
         st->duration = (((int64_t)data_size)<<3) / (st->codecpar->channels * (int64_t)bps);
 
diff --git a/libavformat/avio.h b/libavformat/avio.h
index 75912ce6bed97..dcb8dcdf93ade 100644
--- a/libavformat/avio.h
+++ b/libavformat/avio.h
@@ -236,7 +236,7 @@ typedef struct AVIOContext {
     int (*write_packet)(void *opaque, uint8_t *buf, int buf_size);
     int64_t (*seek)(void *opaque, int64_t offset, int whence);
     int64_t pos;            /**< position in the file of the current buffer */
-    int eof_reached;        /**< true if eof reached */
+    int eof_reached;        /**< true if was unable to read due to error or eof */
     int write_flag;         /**< true if open for writing */
     int max_packet_size;
     unsigned long checksum;
@@ -566,8 +566,8 @@ static av_always_inline int64_t avio_tell(AVIOContext *s)
 int64_t avio_size(AVIOContext *s);
 
 /**
- * feof() equivalent for AVIOContext.
- * @return non zero if and only if end of file
+ * Similar to feof() but also returns nonzero on read errors.
+ * @return non zero if and only if at end of file or a read error happened when reading.
  */
 int avio_feof(AVIOContext *s);
 
diff --git a/libavformat/bfi.c b/libavformat/bfi.c
index 6c98e33ab4a48..0dad658f959a2 100644
--- a/libavformat/bfi.c
+++ b/libavformat/bfi.c
@@ -54,7 +54,7 @@ static int bfi_read_header(AVFormatContext * s)
     AVIOContext *pb = s->pb;
     AVStream *vstream;
     AVStream *astream;
-    int fps, chunk_header;
+    int ret, fps, chunk_header;
 
     /* Initialize the video codec... */
     vstream = avformat_new_stream(s, NULL);
@@ -80,12 +80,9 @@ static int bfi_read_header(AVFormatContext * s)
 
     /*Load the palette to extradata */
     avio_skip(pb, 8);
-    vstream->codecpar->extradata      = av_malloc(768);
-    if (!vstream->codecpar->extradata)
-        return AVERROR(ENOMEM);
-    vstream->codecpar->extradata_size = 768;
-    avio_read(pb, vstream->codecpar->extradata,
-               vstream->codecpar->extradata_size);
+    ret = ff_get_extradata(s, vstream->codecpar, pb, 768);
+    if (ret < 0)
+        return ret;
 
     astream->codecpar->sample_rate = avio_rl32(pb);
     if (astream->codecpar->sample_rate <= 0) {
diff --git a/libavformat/cafdec.c b/libavformat/cafdec.c
index 7652d9e238597..20956bcf977b9 100644
--- a/libavformat/cafdec.c
+++ b/libavformat/cafdec.c
@@ -310,6 +310,8 @@ static int read_header(AVFormatContext *s)
                    "skipping CAF chunk: %08"PRIX32" (%s), size %"PRId64"\n",
                    tag, av_fourcc2str(av_bswap32(tag)), size);
         case MKBETAG('f','r','e','e'):
+            if (size < 0 && found_data)
+                goto found_data;
             if (size < 0)
                 return AVERROR_INVALIDDATA;
             break;
@@ -325,6 +327,7 @@ static int read_header(AVFormatContext *s)
     if (!found_data)
         return AVERROR_INVALIDDATA;
 
+found_data:
     if (caf->bytes_per_packet > 0 && caf->frames_per_packet > 0) {
         if (caf->data_size > 0)
             st->nb_frames = (caf->data_size / caf->bytes_per_packet) * caf->frames_per_packet;
diff --git a/libavformat/concatdec.c b/libavformat/concatdec.c
index bbe13136fa297..d65da553e145a 100644
--- a/libavformat/concatdec.c
+++ b/libavformat/concatdec.c
@@ -45,6 +45,7 @@ typedef struct {
     int64_t file_start_time;
     int64_t file_inpoint;
     int64_t duration;
+    int64_t user_duration;
     int64_t next_dts;
     ConcatStream *streams;
     int64_t inpoint;
@@ -154,6 +155,7 @@ static int add_file(AVFormatContext *avf, char *filename, ConcatFile **rfile,
     file->next_dts   = AV_NOPTS_VALUE;
     file->inpoint    = AV_NOPTS_VALUE;
     file->outpoint   = AV_NOPTS_VALUE;
+    file->user_duration = AV_NOPTS_VALUE;
 
     return 0;
 
@@ -314,6 +316,19 @@ static int match_streams(AVFormatContext *avf)
     return 0;
 }
 
+static int64_t get_best_effort_duration(ConcatFile *file, AVFormatContext *avf)
+{
+    if (file->user_duration != AV_NOPTS_VALUE)
+        return file->user_duration;
+    if (file->outpoint != AV_NOPTS_VALUE)
+        return file->outpoint - file->file_inpoint;
+    if (avf->duration > 0)
+        return avf->duration - (file->file_inpoint - file->file_start_time);
+    if (file->next_dts != AV_NOPTS_VALUE)
+        return file->next_dts - file->file_inpoint;
+    return AV_NOPTS_VALUE;
+}
+
 static int open_file(AVFormatContext *avf, unsigned fileno)
 {
     ConcatContext *cat = avf->priv_data;
@@ -340,14 +355,12 @@ static int open_file(AVFormatContext *avf, unsigned fileno)
         return ret;
     }
     cat->cur_file = file;
-    if (file->start_time == AV_NOPTS_VALUE)
-        file->start_time = !fileno ? 0 :
-                           cat->files[fileno - 1].start_time +
-                           cat->files[fileno - 1].duration;
+    file->start_time = !fileno ? 0 :
+                       cat->files[fileno - 1].start_time +
+                       cat->files[fileno - 1].duration;
     file->file_start_time = (cat->avf->start_time == AV_NOPTS_VALUE) ? 0 : cat->avf->start_time;
     file->file_inpoint = (file->inpoint == AV_NOPTS_VALUE) ? file->file_start_time : file->inpoint;
-    if (file->duration == AV_NOPTS_VALUE && file->outpoint != AV_NOPTS_VALUE)
-        file->duration = file->outpoint - file->file_inpoint;
+    file->duration = get_best_effort_duration(file, cat->avf);
 
     if (cat->segment_time_metadata) {
         av_dict_set_int(&file->metadata, "lavf.concatdec.start_time", file->start_time, 0);
@@ -425,7 +438,7 @@ static int concat_read_header(AVFormatContext *avf)
                 goto fail;
             }
             if (!strcmp(keyword, "duration"))
-                file->duration = dur;
+                file->user_duration = dur;
             else if (!strcmp(keyword, "inpoint"))
                 file->inpoint = dur;
             else if (!strcmp(keyword, "outpoint"))
@@ -484,12 +497,13 @@ static int concat_read_header(AVFormatContext *avf)
             cat->files[i].start_time = time;
         else
             time = cat->files[i].start_time;
-        if (cat->files[i].duration == AV_NOPTS_VALUE) {
+        if (cat->files[i].user_duration == AV_NOPTS_VALUE) {
             if (cat->files[i].inpoint == AV_NOPTS_VALUE || cat->files[i].outpoint == AV_NOPTS_VALUE)
                 break;
-            cat->files[i].duration = cat->files[i].outpoint - cat->files[i].inpoint;
+            cat->files[i].user_duration = cat->files[i].outpoint - cat->files[i].inpoint;
         }
-        time += cat->files[i].duration;
+        cat->files[i].duration = cat->files[i].user_duration;
+        time += cat->files[i].user_duration;
     }
     if (i == cat->nb_files) {
         avf->duration = time;
@@ -514,14 +528,7 @@ static int open_next_file(AVFormatContext *avf)
     ConcatContext *cat = avf->priv_data;
     unsigned fileno = cat->cur_file - cat->files;
 
-    if (cat->cur_file->duration == AV_NOPTS_VALUE) {
-        if (cat->avf->duration > 0 || cat->cur_file->next_dts == AV_NOPTS_VALUE) {
-            cat->cur_file->duration = cat->avf->duration;
-        } else {
-            cat->cur_file->duration = cat->cur_file->next_dts;
-        }
-        cat->cur_file->duration -= (cat->cur_file->file_inpoint - cat->cur_file->file_start_time);
-    }
+    cat->cur_file->duration = get_best_effort_duration(cat->cur_file, cat->avf);
 
     if (++fileno >= cat->nb_files) {
         cat->eof = 1;
@@ -692,6 +699,13 @@ static int real_seek(AVFormatContext *avf, int stream,
 
     left  = 0;
     right = cat->nb_files;
+
+    /* Always support seek to start */
+    if (ts <= 0)
+        right = 1;
+    else if (!cat->seekable)
+        return AVERROR(ESPIPE); /* XXX: can we use it? */
+
     while (right - left > 1) {
         int mid = (left + right) / 2;
         if (ts < cat->files[mid].start_time)
@@ -728,8 +742,6 @@ static int concat_seek(AVFormatContext *avf, int stream,
     AVFormatContext *cur_avf_saved = cat->avf;
     int ret;
 
-    if (!cat->seekable)
-        return AVERROR(ESPIPE); /* XXX: can we use it? */
     if (flags & (AVSEEK_FLAG_BYTE | AVSEEK_FLAG_FRAME))
         return AVERROR(ENOSYS);
     cat->avf = NULL;
diff --git a/libavformat/dashdec.c b/libavformat/dashdec.c
index 497e7e469cbe8..89acd5807d422 100644
--- a/libavformat/dashdec.c
+++ b/libavformat/dashdec.c
@@ -140,6 +140,8 @@ typedef struct DASHContext {
     struct representation **videos;
     int n_audios;
     struct representation **audios;
+    int n_subtitles;
+    struct representation **subtitles;
 
     /* MediaPresentationDescription Attribute */
     uint64_t media_presentation_duration;
@@ -394,6 +396,17 @@ static void free_audio_list(DASHContext *c)
     c->n_audios = 0;
 }
 
+static void free_subtitle_list(DASHContext *c)
+{
+    int i;
+    for (i = 0; i < c->n_subtitles; i++) {
+        struct representation *pls = c->subtitles[i];
+        free_representation(pls);
+    }
+    av_freep(&c->subtitles);
+    c->n_subtitles = 0;
+}
+
 static int open_url(AVFormatContext *s, AVIOContext **pb, const char *url,
                     AVDictionary *opts, AVDictionary *opts2, int *is_http)
 {
@@ -493,7 +506,7 @@ static char *get_content_url(xmlNodePtr *baseurl_nodes,
     }
 
     if (val)
-        av_strlcat(tmp_str, (const char*)val, max_url_size);
+        ff_make_absolute_url(tmp_str, max_url_size, tmp_str, val);
 
     if (rep_id_val) {
         url = av_strireplace(tmp_str, "$RepresentationID$", (const char*)rep_id_val);
@@ -565,6 +578,8 @@ static enum AVMediaType get_content_type(xmlNodePtr node)
                     type = AVMEDIA_TYPE_VIDEO;
                 } else if (av_stristr((const char *)val, "audio")) {
                     type = AVMEDIA_TYPE_AUDIO;
+                } else if (av_stristr((const char *)val, "text")) {
+                    type = AVMEDIA_TYPE_SUBTITLE;
                 }
                 xmlFree(val);
             }
@@ -818,6 +833,7 @@ static int parse_manifest_representation(AVFormatContext *s, const char *url,
                                          xmlNodePtr adaptionset_supplementalproperty_node)
 {
     int32_t ret = 0;
+    int32_t subtitle_rep_idx = 0;
     int32_t audio_rep_idx = 0;
     int32_t video_rep_idx = 0;
     DASHContext *c = s->priv_data;
@@ -854,7 +870,7 @@ static int parse_manifest_representation(AVFormatContext *s, const char *url,
         type = get_content_type(adaptionset_node);
     if (type == AVMEDIA_TYPE_UNKNOWN) {
         av_log(s, AV_LOG_VERBOSE, "Parsing '%s' - skipp not supported representation type\n", url);
-    } else if (type == AVMEDIA_TYPE_VIDEO || type == AVMEDIA_TYPE_AUDIO) {
+    } else if (type == AVMEDIA_TYPE_VIDEO || type == AVMEDIA_TYPE_AUDIO || type == AVMEDIA_TYPE_SUBTITLE) {
         // convert selected representation to our internal struct
         rep = av_mallocz(sizeof(struct representation));
         if (!rep) {
@@ -1048,18 +1064,29 @@ static int parse_manifest_representation(AVFormatContext *s, const char *url,
                     av_log(s, AV_LOG_VERBOSE, "Ignoring invalid frame rate '%s'\n", rep_framerate_val);
             }
 
-            if (type == AVMEDIA_TYPE_VIDEO) {
-                rep->rep_idx = video_rep_idx;
-                dynarray_add(&c->videos, &c->n_videos, rep);
-            } else {
-                rep->rep_idx = audio_rep_idx;
-                dynarray_add(&c->audios, &c->n_audios, rep);
+            switch (type) {
+                case AVMEDIA_TYPE_VIDEO:
+                    rep->rep_idx = video_rep_idx;
+                    dynarray_add(&c->videos, &c->n_videos, rep);
+                    break;
+                case AVMEDIA_TYPE_AUDIO:
+                    rep->rep_idx = audio_rep_idx;
+                    dynarray_add(&c->audios, &c->n_audios, rep);
+                    break;
+                case AVMEDIA_TYPE_SUBTITLE:
+                    rep->rep_idx = subtitle_rep_idx;
+                    dynarray_add(&c->subtitles, &c->n_subtitles, rep);
+                    break;
+                default:
+                    av_log(s, AV_LOG_WARNING, "Unsupported the stream type %d\n", type);
+                    break;
             }
         }
     }
 
     video_rep_idx += type == AVMEDIA_TYPE_VIDEO;
     audio_rep_idx += type == AVMEDIA_TYPE_AUDIO;
+    subtitle_rep_idx += type == AVMEDIA_TYPE_SUBTITLE;
 
 end:
     if (rep_id_val)
@@ -1441,6 +1468,8 @@ static int refresh_manifest(AVFormatContext *s)
     struct representation **videos = c->videos;
     int n_audios = c->n_audios;
     struct representation **audios = c->audios;
+    int n_subtitles = c->n_subtitles;
+    struct representation **subtitles = c->subtitles;
     char *base_url = c->base_url;
 
     c->base_url = NULL;
@@ -1448,6 +1477,8 @@ static int refresh_manifest(AVFormatContext *s)
     c->videos = NULL;
     c->n_audios = 0;
     c->audios = NULL;
+    c->n_subtitles = 0;
+    c->subtitles = NULL;
     ret = parse_manifest(s, s->url, NULL);
     if (ret)
         goto finish;
@@ -1464,6 +1495,12 @@ static int refresh_manifest(AVFormatContext *s)
                n_audios, c->n_audios);
         return AVERROR_INVALIDDATA;
     }
+    if (c->n_subtitles != n_subtitles) {
+        av_log(c, AV_LOG_ERROR,
+               "new manifest has mismatched no. of subtitles representations, %d -> %d\n",
+               n_subtitles, c->n_subtitles);
+        return AVERROR_INVALIDDATA;
+    }
 
     for (i = 0; i < n_videos; i++) {
         struct representation *cur_video = videos[i];
@@ -1504,10 +1541,16 @@ static int refresh_manifest(AVFormatContext *s)
         av_free(base_url);
     else
         c->base_url  = base_url;
+
+    if (c->subtitles)
+        free_subtitle_list(c);
     if (c->audios)
         free_audio_list(c);
     if (c->videos)
         free_video_list(c);
+
+    c->n_subtitles = n_subtitles;
+    c->subtitles = subtitles;
     c->n_audios = n_audios;
     c->audios = audios;
     c->n_videos = n_videos;
@@ -1957,8 +2000,6 @@ static int dash_read_header(AVFormatContext *s)
     if ((ret = save_avio_options(s)) < 0)
         goto fail;
 
-    av_dict_set(&c->avio_opts, "seekable", "0", 0);
-
     if ((ret = parse_manifest(s, s->url, s->pb)) < 0)
         goto fail;
 
@@ -1966,6 +2007,8 @@ static int dash_read_header(AVFormatContext *s)
      * stream. */
     if (!c->is_live) {
         s->duration = (int64_t) c->media_presentation_duration * AV_TIME_BASE;
+    } else {
+        av_dict_set(&c->avio_opts, "seekable", "0", 0);
     }
 
     if(c->n_videos)
@@ -2001,6 +2044,23 @@ static int dash_read_header(AVFormatContext *s)
         ++stream_index;
     }
 
+    if (c->n_subtitles)
+        c->is_init_section_common_audio = is_common_init_section_exist(c->subtitles, c->n_subtitles);
+
+    for (i = 0; i < c->n_subtitles; i++) {
+        struct representation *cur_subtitle = c->subtitles[i];
+        if (i > 0 && c->is_init_section_common_audio) {
+            copy_init_section(cur_subtitle,c->subtitles[0]);
+        }
+        ret = open_demux_for_component(s, cur_subtitle);
+
+        if (ret)
+            goto fail;
+        cur_subtitle->stream_index = stream_index;
+        ++stream_index;
+    }
+
+
     if (!stream_index) {
         ret = AVERROR_INVALIDDATA;
         goto fail;
@@ -2034,6 +2094,14 @@ static int dash_read_header(AVFormatContext *s)
             if (pls->id[0])
                 av_dict_set(&pls->assoc_stream->metadata, "id", pls->id, 0);
         }
+        for (i = 0; i < c->n_subtitles; i++) {
+            struct representation *pls = c->subtitles[i];
+            av_program_add_stream_index(s, 0, pls->stream_index);
+            pls->assoc_stream = s->streams[pls->stream_index];
+            if (pls->id[0])
+                av_dict_set(&pls->assoc_stream->metadata, "id", pls->id, 0);
+        }
+
     }
 
     return 0;
@@ -2076,6 +2144,7 @@ static int dash_read_packet(AVFormatContext *s, AVPacket *pkt)
 
     recheck_discard_flags(s, c->videos, c->n_videos);
     recheck_discard_flags(s, c->audios, c->n_audios);
+    recheck_discard_flags(s, c->subtitles, c->n_subtitles);
 
     for (i = 0; i < c->n_videos; i++) {
         struct representation *pls = c->videos[i];
@@ -2096,6 +2165,16 @@ static int dash_read_packet(AVFormatContext *s, AVPacket *pkt)
         }
     }
 
+    for (i = 0; i < c->n_subtitles; i++) {
+        struct representation *pls = c->subtitles[i];
+        if (!pls->ctx)
+            continue;
+        if (!cur || pls->cur_timestamp < mints) {
+            cur = pls;
+            mints = pls->cur_timestamp;
+        }
+    }
+
     if (!cur) {
         return AVERROR_INVALIDDATA;
     }
@@ -2215,6 +2294,10 @@ static int dash_read_seek(AVFormatContext *s, int stream_index, int64_t timestam
         if (!ret)
             ret = dash_seek(s, c->audios[i], seek_pos_msec, flags, !c->audios[i]->ctx);
     }
+    for (i = 0; i < c->n_subtitles; i++) {
+        if (!ret)
+            ret = dash_seek(s, c->subtitles[i], seek_pos_msec, flags, !c->subtitles[i]->ctx);
+    }
 
     return ret;
 }
@@ -2242,7 +2325,7 @@ static int dash_probe(AVProbeData *p)
 static const AVOption dash_options[] = {
     {"allowed_extensions", "List of file extensions that dash is allowed to access",
         OFFSET(allowed_extensions), AV_OPT_TYPE_STRING,
-        {.str = "aac,m4a,m4s,m4v,mov,mp4"},
+        {.str = "aac,m4a,m4s,m4v,mov,mp4,webm"},
         INT_MIN, INT_MAX, FLAGS},
     {NULL}
 };
diff --git a/libavformat/dashenc.c b/libavformat/dashenc.c
index 3f5f290e255f5..1b74bce060fc6 100644
--- a/libavformat/dashenc.c
+++ b/libavformat/dashenc.c
@@ -32,6 +32,7 @@
 #include "libavutil/mathematics.h"
 #include "libavutil/opt.h"
 #include "libavutil/rational.h"
+#include "libavutil/time.h"
 #include "libavutil/time_internal.h"
 
 #include "avc.h"
@@ -49,7 +50,8 @@
 #include "dash.h"
 
 typedef enum {
-    SEGMENT_TYPE_MP4 = 0,
+    SEGMENT_TYPE_AUTO = 0,
+    SEGMENT_TYPE_MP4,
     SEGMENT_TYPE_WEBM,
     SEGMENT_TYPE_NB
 } SegmentType;
@@ -59,7 +61,8 @@ typedef struct Segment {
     int64_t start_pos;
     int range_length, index_length;
     int64_t time;
-    int duration;
+    double prog_date_time;
+    int64_t duration;
     int n;
 } Segment;
 
@@ -84,6 +87,11 @@ typedef struct OutputStream {
     int64_t first_pts, start_pts, max_pts;
     int64_t last_dts, last_pts;
     int bit_rate;
+    SegmentType segment_type;  /* segment type selected for this particular stream */
+    const char *format_name;
+    const char *single_file_name;  /* file names selected for this particular stream */
+    const char *init_seg_name;
+    const char *media_seg_name;
 
     char codec_str[100];
     int written_len;
@@ -115,8 +123,9 @@ typedef struct DASHContext {
     int64_t last_duration;
     int64_t total_duration;
     char availability_start_time[100];
+    time_t start_time_s;
     char dirname[1024];
-    const char *single_file_name;
+    const char *single_file_name;  /* file names as specified in options */
     const char *init_seg_name;
     const char *media_seg_name;
     const char *utc_timing_url;
@@ -131,8 +140,10 @@ typedef struct DASHContext {
     int64_t timeout;
     int index_correction;
     char *format_options_str;
-    SegmentType segment_type;
-    const char *format_name;
+    SegmentType segment_type_option;  /* segment type as specified in options */
+    int ignore_io_errors;
+    int lhls;
+    int master_publish_rate;
 } DASHContext;
 
 static struct codec_string {
@@ -143,6 +154,7 @@ static struct codec_string {
     { AV_CODEC_ID_VP9, "vp9" },
     { AV_CODEC_ID_VORBIS, "vorbis" },
     { AV_CODEC_ID_OPUS, "opus" },
+    { AV_CODEC_ID_FLAC, "flac" },
     { 0, NULL }
 };
 
@@ -150,6 +162,7 @@ static struct format_string {
     SegmentType segment_type;
     const char *str;
 } formats[] = {
+    { SEGMENT_TYPE_AUTO, "auto" },
     { SEGMENT_TYPE_MP4, "mp4" },
     { SEGMENT_TYPE_WEBM, "webm" },
     { 0, NULL }
@@ -167,6 +180,8 @@ static int dashenc_io_open(AVFormatContext *s, AVIOContext **pb, char *filename,
         URLContext *http_url_context = ffio_geturlcontext(*pb);
         av_assert0(http_url_context);
         err = ff_http_do_new_request(http_url_context, filename);
+        if (err < 0)
+            ff_format_io_close(s, pb);
 #endif
     }
     return err;
@@ -176,6 +191,9 @@ static void dashenc_io_close(AVFormatContext *s, AVIOContext **pb, char *filenam
     DASHContext *c = s->priv_data;
     int http_base_proto = filename ? ff_is_http_proto(filename) : 0;
 
+    if (!*pb)
+        return;
+
     if (!http_base_proto || !c->http_persistent) {
         ff_format_io_close(s, pb);
 #if CONFIG_HTTP_PROTOCOL
@@ -196,6 +214,54 @@ static const char *get_format_str(SegmentType segment_type) {
     return NULL;
 }
 
+static int handle_io_open_error(AVFormatContext *s, int err, char *url) {
+    DASHContext *c = s->priv_data;
+    char errbuf[AV_ERROR_MAX_STRING_SIZE];
+    av_strerror(err, errbuf, sizeof(errbuf));
+    av_log(s, c->ignore_io_errors ? AV_LOG_WARNING : AV_LOG_ERROR,
+           "Unable to open %s for writing: %s\n", url, errbuf);
+    return c->ignore_io_errors ? 0 : err;
+}
+
+static inline SegmentType select_segment_type(SegmentType segment_type, enum AVCodecID codec_id)
+{
+    if (segment_type == SEGMENT_TYPE_AUTO) {
+        if (codec_id == AV_CODEC_ID_OPUS || codec_id == AV_CODEC_ID_VORBIS ||
+            codec_id == AV_CODEC_ID_VP8 || codec_id == AV_CODEC_ID_VP9) {
+            segment_type = SEGMENT_TYPE_WEBM;
+        } else {
+            segment_type = SEGMENT_TYPE_MP4;
+        }
+    }
+
+    return segment_type;
+}
+
+static int init_segment_types(AVFormatContext *s)
+{
+    DASHContext *c = s->priv_data;
+    int has_mp4_streams = 0;
+    for (int i = 0; i < s->nb_streams; ++i) {
+        OutputStream *os = &c->streams[i];
+        SegmentType segment_type = select_segment_type(
+            c->segment_type_option, s->streams[i]->codecpar->codec_id);
+        os->segment_type = segment_type;
+        os->format_name = get_format_str(segment_type);
+        if (!os->format_name) {
+            av_log(s, AV_LOG_ERROR, "Could not select DASH segment type for stream %d\n", i);
+            return AVERROR_MUXER_NOT_FOUND;
+        }
+        has_mp4_streams |= segment_type == SEGMENT_TYPE_MP4;
+    }
+
+    if (c->hls_playlist && !has_mp4_streams) {
+         av_log(s, AV_LOG_WARNING, "No mp4 streams, disabling HLS manifest generation\n");
+         c->hls_playlist = 0;
+    }
+
+    return 0;
+}
+
 static int check_file_extension(const char *filename, const char *extension) {
     char *dot;
     if (!filename || !extension)
@@ -317,7 +383,8 @@ static int flush_dynbuf(OutputStream *os, int *range_length)
     // write out to file
     *range_length = avio_close_dyn_buf(os->ctx->pb, &buffer);
     os->ctx->pb = NULL;
-    avio_write(os->out, buffer + os->written_len, *range_length - os->written_len);
+    if (os->out)
+        avio_write(os->out, buffer + os->written_len, *range_length - os->written_len);
     os->written_len = 0;
     av_free(buffer);
 
@@ -345,6 +412,97 @@ static void get_hls_playlist_name(char *playlist_name, int string_size,
         snprintf(playlist_name, string_size, "media_%d.m3u8", id);
 }
 
+static void get_start_index_number(OutputStream *os, DASHContext *c,
+                                   int *start_index, int *start_number) {
+    *start_index = 0;
+    *start_number = 1;
+    if (c->window_size) {
+        *start_index  = FFMAX(os->nb_segments   - c->window_size, 0);
+        *start_number = FFMAX(os->segment_index - c->window_size, 1);
+    }
+}
+
+static void write_hls_media_playlist(OutputStream *os, AVFormatContext *s,
+                                     int representation_id, int final,
+                                     char *prefetch_url) {
+    DASHContext *c = s->priv_data;
+    int timescale = os->ctx->streams[0]->time_base.den;
+    char temp_filename_hls[1024];
+    char filename_hls[1024];
+    AVDictionary *http_opts = NULL;
+    int target_duration = 0;
+    int ret = 0;
+    const char *proto = avio_find_protocol_name(c->dirname);
+    int use_rename = proto && !strcmp(proto, "file");
+    int i, start_index, start_number;
+    double prog_date_time = 0;
+
+    get_start_index_number(os, c, &start_index, &start_number);
+
+    if (!c->hls_playlist || start_index >= os->nb_segments ||
+        os->segment_type != SEGMENT_TYPE_MP4)
+        return;
+
+    get_hls_playlist_name(filename_hls, sizeof(filename_hls),
+                          c->dirname, representation_id);
+
+    snprintf(temp_filename_hls, sizeof(temp_filename_hls), use_rename ? "%s.tmp" : "%s", filename_hls);
+
+    set_http_options(&http_opts, c);
+    ret = dashenc_io_open(s, &c->m3u8_out, temp_filename_hls, &http_opts);
+    av_dict_free(&http_opts);
+    if (ret < 0) {
+        handle_io_open_error(s, ret, temp_filename_hls);
+        return;
+    }
+    for (i = start_index; i < os->nb_segments; i++) {
+        Segment *seg = os->segments[i];
+        double duration = (double) seg->duration / timescale;
+        if (target_duration <= duration)
+            target_duration = lrint(duration);
+    }
+
+    ff_hls_write_playlist_header(c->m3u8_out, 6, -1, target_duration,
+                                 start_number, PLAYLIST_TYPE_NONE);
+
+    ff_hls_write_init_file(c->m3u8_out, os->initfile, c->single_file,
+                           os->init_range_length, os->init_start_pos);
+
+    for (i = start_index; i < os->nb_segments; i++) {
+        Segment *seg = os->segments[i];
+
+        if (prog_date_time == 0) {
+            if (os->nb_segments == 1)
+                prog_date_time = c->start_time_s;
+            else
+                prog_date_time = seg->prog_date_time;
+        }
+        seg->prog_date_time = prog_date_time;
+
+        ret = ff_hls_write_file_entry(c->m3u8_out, 0, c->single_file,
+                                (double) seg->duration / timescale, 0,
+                                seg->range_length, seg->start_pos, NULL,
+                                c->single_file ? os->initfile : seg->file,
+                                &prog_date_time);
+        if (ret < 0) {
+            av_log(os->ctx, AV_LOG_WARNING, "ff_hls_write_file_entry get error\n");
+        }
+    }
+
+    if (prefetch_url)
+        avio_printf(c->m3u8_out, "#EXT-X-PREFETCH:%s\n", prefetch_url);
+
+    if (final)
+        ff_hls_write_end_list(c->m3u8_out);
+
+    dashenc_io_close(s, &c->m3u8_out, temp_filename_hls);
+
+    if (use_rename)
+        if (avpriv_io_move(temp_filename_hls, filename_hls) < 0) {
+            av_log(os->ctx, AV_LOG_WARNING, "renaming file %s to %s failed\n\n", temp_filename_hls, filename_hls);
+        }
+}
+
 static int flush_init_segment(AVFormatContext *s, OutputStream *os)
 {
     DASHContext *c = s->priv_data;
@@ -355,8 +513,11 @@ static int flush_init_segment(AVFormatContext *s, OutputStream *os)
         return ret;
 
     os->pos = os->init_range_length = range_length;
-    if (!c->single_file)
-        ff_format_io_close(s, &os->out);
+    if (!c->single_file) {
+        char filename[1024];
+        snprintf(filename, sizeof(filename), "%s%s", c->dirname, os->initfile);
+        dashenc_io_close(s, &os->out, filename);
+    }
     return 0;
 }
 
@@ -376,8 +537,6 @@ static void dash_free(AVFormatContext *s)
         return;
     for (i = 0; i < s->nb_streams; i++) {
         OutputStream *os = &c->streams[i];
-        if (os->ctx && os->ctx_inited)
-            av_write_trailer(os->ctx);
         if (os->ctx && os->ctx->pb)
             ffio_free_dyn_buf(&os->ctx->pb);
         ff_format_io_close(s, &os->out);
@@ -386,6 +545,9 @@ static void dash_free(AVFormatContext *s)
         for (j = 0; j < os->nb_segments; j++)
             av_free(os->segments[j]);
         av_free(os->segments);
+        av_freep(&os->single_file_name);
+        av_freep(&os->init_seg_name);
+        av_freep(&os->media_seg_name);
     }
     av_freep(&c->streams);
 
@@ -397,11 +559,8 @@ static void output_segment_list(OutputStream *os, AVIOContext *out, AVFormatCont
                                 int representation_id, int final)
 {
     DASHContext *c = s->priv_data;
-    int i, start_index = 0, start_number = 1;
-    if (c->window_size) {
-        start_index  = FFMAX(os->nb_segments   - c->window_size, 0);
-        start_number = FFMAX(os->segment_index - c->window_size, 1);
-    }
+    int i, start_index, start_number;
+    get_start_index_number(os, c, &start_index, &start_number);
 
     if (c->use_template) {
         int timescale = c->use_timeline ? os->ctx->streams[0]->time_base.den : AV_TIME_BASE;
@@ -412,7 +571,7 @@ static void output_segment_list(OutputStream *os, AVIOContext *out, AVFormatCont
                 avio_printf(out, "availabilityTimeOffset=\"%.3f\" ",
                             os->availability_time_offset);
         }
-        avio_printf(out, "initialization=\"%s\" media=\"%s\" startNumber=\"%d\">\n", c->init_seg_name, c->media_seg_name, c->use_timeline ? start_number : 1);
+        avio_printf(out, "initialization=\"%s\" media=\"%s\" startNumber=\"%d\">\n", os->init_seg_name, os->media_seg_name, c->use_timeline ? start_number : 1);
         if (c->use_timeline) {
             int64_t cur_time = 0;
             avio_printf(out, "\t\t\t\t\t<SegmentTimeline>\n");
@@ -424,7 +583,7 @@ static void output_segment_list(OutputStream *os, AVIOContext *out, AVFormatCont
                     cur_time = seg->time;
                     avio_printf(out, "t=\"%"PRId64"\" ", seg->time);
                 }
-                avio_printf(out, "d=\"%d\" ", seg->duration);
+                avio_printf(out, "d=\"%"PRId64"\" ", seg->duration);
                 while (i + repeat + 1 < os->nb_segments &&
                        os->segments[i + repeat + 1]->duration == seg->duration &&
                        os->segments[i + repeat + 1]->time == os->segments[i + repeat]->time + os->segments[i + repeat]->duration)
@@ -459,59 +618,8 @@ static void output_segment_list(OutputStream *os, AVIOContext *out, AVFormatCont
         }
         avio_printf(out, "\t\t\t\t</SegmentList>\n");
     }
-    if (c->hls_playlist && start_index < os->nb_segments)
-    {
-        int timescale = os->ctx->streams[0]->time_base.den;
-        char temp_filename_hls[1024];
-        char filename_hls[1024];
-        AVDictionary *http_opts = NULL;
-        int target_duration = 0;
-        int ret = 0;
-        const char *proto = avio_find_protocol_name(c->dirname);
-        int use_rename = proto && !strcmp(proto, "file");
-
-        get_hls_playlist_name(filename_hls, sizeof(filename_hls),
-                              c->dirname, representation_id);
-
-        snprintf(temp_filename_hls, sizeof(temp_filename_hls), use_rename ? "%s.tmp" : "%s", filename_hls);
-
-        set_http_options(&http_opts, c);
-        dashenc_io_open(s, &c->m3u8_out, temp_filename_hls, &http_opts);
-        av_dict_free(&http_opts);
-        for (i = start_index; i < os->nb_segments; i++) {
-            Segment *seg = os->segments[i];
-            double duration = (double) seg->duration / timescale;
-            if (target_duration <= duration)
-                target_duration = lrint(duration);
-        }
-
-        ff_hls_write_playlist_header(c->m3u8_out, 6, -1, target_duration,
-                                     start_number, PLAYLIST_TYPE_NONE);
-
-        ff_hls_write_init_file(c->m3u8_out, os->initfile, c->single_file,
-                               os->init_range_length, os->init_start_pos);
-
-        for (i = start_index; i < os->nb_segments; i++) {
-            Segment *seg = os->segments[i];
-            ret = ff_hls_write_file_entry(c->m3u8_out, 0, c->single_file,
-                                    (double) seg->duration / timescale, 0,
-                                    seg->range_length, seg->start_pos, NULL,
-                                    c->single_file ? os->initfile : seg->file,
-                                    NULL);
-            if (ret < 0) {
-                av_log(os->ctx, AV_LOG_WARNING, "ff_hls_write_file_entry get error\n");
-            }
-        }
-
-        if (final)
-            ff_hls_write_end_list(c->m3u8_out);
-
-        dashenc_io_close(s, &c->m3u8_out, temp_filename_hls);
-
-        if (use_rename)
-            if (avpriv_io_move(temp_filename_hls, filename_hls) < 0) {
-                av_log(os->ctx, AV_LOG_WARNING, "renaming file %s to %s failed\n\n", temp_filename_hls, filename_hls);
-            }
+    if (!c->lhls || final) {
+        write_hls_media_playlist(os, s, representation_id, final, NULL);
     }
 
 }
@@ -574,12 +682,20 @@ static void write_time(AVIOContext *out, int64_t time)
 
 static void format_date_now(char *buf, int size)
 {
-    time_t t = time(NULL);
     struct tm *ptm, tmbuf;
-    ptm = gmtime_r(&t, &tmbuf);
+    int64_t time_us = av_gettime();
+    int64_t time_ms = time_us / 1000;
+    const time_t time_s = time_ms / 1000;
+    int millisec = time_ms - (time_s * 1000);
+    ptm = gmtime_r(&time_s, &tmbuf);
     if (ptm) {
-        if (!strftime(buf, size, "%Y-%m-%dT%H:%M:%SZ", ptm))
+        int len;
+        if (!strftime(buf, size, "%Y-%m-%dT%H:%M:%S", ptm)) {
             buf[0] = '\0';
+            return;
+        }
+        len = strlen(buf);
+        snprintf(buf + len, size - len, ".%03dZ", millisec);
     }
 }
 
@@ -618,13 +734,13 @@ static int write_adaptation_set(AVFormatContext *s, AVIOContext *out, int as_ind
         if (as->media_type == AVMEDIA_TYPE_VIDEO) {
             AVStream *st = s->streams[i];
             avio_printf(out, "\t\t\t<Representation id=\"%d\" mimeType=\"video/%s\" codecs=\"%s\"%s width=\"%d\" height=\"%d\"",
-                i, c->format_name, os->codec_str, bandwidth_str, s->streams[i]->codecpar->width, s->streams[i]->codecpar->height);
+                i, os->format_name, os->codec_str, bandwidth_str, s->streams[i]->codecpar->width, s->streams[i]->codecpar->height);
             if (st->avg_frame_rate.num)
                 avio_printf(out, " frameRate=\"%d/%d\"", st->avg_frame_rate.num, st->avg_frame_rate.den);
             avio_printf(out, ">\n");
         } else {
             avio_printf(out, "\t\t\t<Representation id=\"%d\" mimeType=\"audio/%s\" codecs=\"%s\"%s audioSamplingRate=\"%d\">\n",
-                i, c->format_name, os->codec_str, bandwidth_str, s->streams[i]->codecpar->sample_rate);
+                i, os->format_name, os->codec_str, bandwidth_str, s->streams[i]->codecpar->sample_rate);
             avio_printf(out, "\t\t\t\t<AudioChannelConfiguration schemeIdUri=\"urn:mpeg:dash:23003:3:audio_channel_configuration:2011\" value=\"%d\" />\n",
                 s->streams[i]->codecpar->channels);
         }
@@ -788,12 +904,11 @@ static int write_manifest(AVFormatContext *s, int final)
     snprintf(temp_filename, sizeof(temp_filename), use_rename ? "%s.tmp" : "%s", s->url);
     set_http_options(&opts, c);
     ret = dashenc_io_open(s, &c->mpd_out, temp_filename, &opts);
+    av_dict_free(&opts);
     if (ret < 0) {
-        av_log(s, AV_LOG_ERROR, "Unable to open %s for writing\n", temp_filename);
-        return ret;
+        return handle_io_open_error(s, ret, temp_filename);
     }
     out = c->mpd_out;
-    av_dict_free(&opts);
     avio_printf(out, "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n");
     avio_printf(out, "<MPD xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"
                 "\txmlns=\"urn:mpeg:dash:schema:mpd:2011\"\n"
@@ -863,13 +978,18 @@ static int write_manifest(AVFormatContext *s, int final)
             return ret;
     }
 
-    if (c->hls_playlist && !c->master_playlist_created) {
+    if (c->hls_playlist) {
         char filename_hls[1024];
         const char *audio_group = "A1";
         char audio_codec_str[128] = "\0";
         int is_default = 1;
         int max_audio_bitrate = 0;
 
+        // Publish master playlist only the configured rate
+        if (c->master_playlist_created && (!c->master_publish_rate ||
+             c->streams[0].segment_index % c->master_publish_rate))
+            return 0;
+
         if (*c->dirname)
             snprintf(filename_hls, sizeof(filename_hls), "%smaster.m3u8", c->dirname);
         else
@@ -878,14 +998,13 @@ static int write_manifest(AVFormatContext *s, int final)
         snprintf(temp_filename, sizeof(temp_filename), use_rename ? "%s.tmp" : "%s", filename_hls);
 
         set_http_options(&opts, c);
-        ret = avio_open2(&out, temp_filename, AVIO_FLAG_WRITE, NULL, &opts);
+        ret = dashenc_io_open(s, &c->m3u8_out, temp_filename, &opts);
+        av_dict_free(&opts);
         if (ret < 0) {
-            av_log(s, AV_LOG_ERROR, "Unable to open %s for writing\n", temp_filename);
-            return ret;
+            return handle_io_open_error(s, ret, temp_filename);
         }
-        av_dict_free(&opts);
 
-        ff_hls_write_playlist_version(out, 7);
+        ff_hls_write_playlist_version(c->m3u8_out, 7);
 
         for (i = 0; i < s->nb_streams; i++) {
             char playlist_file[64];
@@ -893,9 +1012,11 @@ static int write_manifest(AVFormatContext *s, int final)
             OutputStream *os = &c->streams[i];
             if (st->codecpar->codec_type != AVMEDIA_TYPE_AUDIO)
                 continue;
+            if (os->segment_type != SEGMENT_TYPE_MP4)
+                continue;
             get_hls_playlist_name(playlist_file, sizeof(playlist_file), NULL, i);
-            ff_hls_write_audio_rendition(out, (char *)audio_group,
-                                         playlist_file, i, is_default);
+            ff_hls_write_audio_rendition(c->m3u8_out, (char *)audio_group,
+                                         playlist_file, NULL, i, is_default);
             max_audio_bitrate = FFMAX(st->codecpar->bit_rate +
                                       os->muxer_overhead, max_audio_bitrate);
             if (!av_strnstr(audio_codec_str, os->codec_str, sizeof(audio_codec_str))) {
@@ -912,9 +1033,12 @@ static int write_manifest(AVFormatContext *s, int final)
             AVStream *st = s->streams[i];
             OutputStream *os = &c->streams[i];
             char *agroup = NULL;
+            char *codec_str_ptr = NULL;
             int stream_bitrate = st->codecpar->bit_rate + os->muxer_overhead;
             if (st->codecpar->codec_type != AVMEDIA_TYPE_VIDEO)
                 continue;
+            if (os->segment_type != SEGMENT_TYPE_MP4)
+                continue;
             av_strlcpy(codec_str, os->codec_str, sizeof(codec_str));
             if (max_audio_bitrate) {
                 agroup = (char *)audio_group;
@@ -922,11 +1046,15 @@ static int write_manifest(AVFormatContext *s, int final)
                 av_strlcat(codec_str, ",", sizeof(codec_str));
                 av_strlcat(codec_str, audio_codec_str, sizeof(codec_str));
             }
+            if (st->codecpar->codec_id != AV_CODEC_ID_HEVC) {
+                codec_str_ptr = codec_str;
+            }
             get_hls_playlist_name(playlist_file, sizeof(playlist_file), NULL, i);
-            ff_hls_write_stream_info(st, out, stream_bitrate, playlist_file, agroup,
-                                     codec_str, NULL);
+            ff_hls_write_stream_info(st, c->m3u8_out, stream_bitrate,
+                                     playlist_file, agroup,
+                                     codec_str_ptr, NULL);
         }
-        avio_close(out);
+        dashenc_io_close(s, &c->m3u8_out, temp_filename);
         if (use_rename)
             if ((ret = avpriv_io_move(temp_filename, filename_hls)) < 0)
                 return ret;
@@ -962,6 +1090,21 @@ static int dash_init(AVFormatContext *s)
         c->seg_duration = c->min_seg_duration;
     }
 #endif
+    if (c->lhls && s->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
+        av_log(s, AV_LOG_ERROR,
+               "LHLS is experimental, Please set -strict experimental in order to enable it.\n");
+        return AVERROR_EXPERIMENTAL;
+    }
+
+    if (c->lhls && !c->streaming) {
+        av_log(s, AV_LOG_WARNING, "LHLS option will be ignored as streaming is not enabled\n");
+        c->lhls = 0;
+    }
+
+    if (c->lhls && !c->hls_playlist) {
+        av_log(s, AV_LOG_WARNING, "LHLS option will be ignored as hls_playlist is not enabled\n");
+        c->lhls = 0;
+    }
 
     av_strlcpy(c->dirname, s->url, sizeof(c->dirname));
     ptr = strrchr(c->dirname, '/');
@@ -984,6 +1127,9 @@ static int dash_init(AVFormatContext *s)
     if ((ret = parse_adaptation_sets(s)) < 0)
         return ret;
 
+    if ((ret = init_segment_types(s)) < 0)
+        return ret;
+
     for (i = 0; i < s->nb_streams; i++) {
         OutputStream *os = &c->streams[i];
         AdaptationSet *as = &c->as[os->as_idx - 1];
@@ -1009,13 +1155,26 @@ static int dash_init(AVFormatContext *s)
         if (!ctx)
             return AVERROR(ENOMEM);
 
-        c->format_name = get_format_str(c->segment_type);
-        if (!c->format_name)
-            return AVERROR_MUXER_NOT_FOUND;
-        if (c->segment_type == SEGMENT_TYPE_WEBM) {
-            if ((!c->single_file && check_file_extension(c->init_seg_name, c->format_name) != 0) ||
-                (!c->single_file && check_file_extension(c->media_seg_name, c->format_name) != 0) ||
-                (c->single_file && check_file_extension(c->single_file_name, c->format_name) != 0)) {
+        if (c->init_seg_name) {
+            os->init_seg_name = av_strireplace(c->init_seg_name, "$ext$", os->format_name);
+            if (!os->init_seg_name)
+                return AVERROR(ENOMEM);
+        }
+        if (c->media_seg_name) {
+            os->media_seg_name = av_strireplace(c->media_seg_name, "$ext$", os->format_name);
+            if (!os->media_seg_name)
+                return AVERROR(ENOMEM);
+        }
+        if (c->single_file_name) {
+            os->single_file_name = av_strireplace(c->single_file_name, "$ext$", os->format_name);
+            if (!os->single_file_name)
+                return AVERROR(ENOMEM);
+        }
+
+        if (os->segment_type == SEGMENT_TYPE_WEBM) {
+            if ((!c->single_file && check_file_extension(os->init_seg_name, os->format_name) != 0) ||
+                (!c->single_file && check_file_extension(os->media_seg_name, os->format_name) != 0) ||
+                (c->single_file && check_file_extension(os->single_file_name, os->format_name) != 0)) {
                 av_log(s, AV_LOG_WARNING,
                        "One or many segment file names doesn't end with .webm. "
                        "Override -init_seg_name and/or -media_seg_name and/or "
@@ -1023,7 +1182,7 @@ static int dash_init(AVFormatContext *s)
             }
         }
 
-        ctx->oformat = av_guess_format(c->format_name, NULL, NULL);
+        ctx->oformat = av_guess_format(os->format_name, NULL, NULL);
         if (!ctx->oformat)
             return AVERROR_MUXER_NOT_FOUND;
         os->ctx = ctx;
@@ -1046,19 +1205,19 @@ static int dash_init(AVFormatContext *s)
             return ret;
 
         if (c->single_file) {
-            if (c->single_file_name)
-                ff_dash_fill_tmpl_params(os->initfile, sizeof(os->initfile), c->single_file_name, i, 0, os->bit_rate, 0);
+            if (os->single_file_name)
+                ff_dash_fill_tmpl_params(os->initfile, sizeof(os->initfile), os->single_file_name, i, 0, os->bit_rate, 0);
             else
-                snprintf(os->initfile, sizeof(os->initfile), "%s-stream%d.m4s", basename, i);
+                snprintf(os->initfile, sizeof(os->initfile), "%s-stream%d.%s", basename, i, os->format_name);
         } else {
-            ff_dash_fill_tmpl_params(os->initfile, sizeof(os->initfile), c->init_seg_name, i, 0, os->bit_rate, 0);
+            ff_dash_fill_tmpl_params(os->initfile, sizeof(os->initfile), os->init_seg_name, i, 0, os->bit_rate, 0);
         }
         snprintf(filename, sizeof(filename), "%s%s", c->dirname, os->initfile);
         set_http_options(&opts, c);
         ret = s->io_open(s, &os->out, filename, AVIO_FLAG_WRITE, &opts);
+        av_dict_free(&opts);
         if (ret < 0)
             return ret;
-        av_dict_free(&opts);
         os->init_start_pos = 0;
 
         if (c->format_options_str) {
@@ -1067,9 +1226,12 @@ static int dash_init(AVFormatContext *s)
                 return ret;
         }
 
-        if (c->segment_type == SEGMENT_TYPE_MP4) {
+        if (os->segment_type == SEGMENT_TYPE_MP4) {
             if (c->streaming)
-                av_dict_set(&opts, "movflags", "frag_every_frame+dash+delay_moov+global_sidx", 0);
+                // frag_every_frame : Allows lower latency streaming
+                // skip_sidx : Reduce bitrate overhead
+                // skip_trailer : Avoids growing memory usage with time
+                av_dict_set(&opts, "movflags", "frag_every_frame+dash+delay_moov+skip_sidx+skip_trailer", 0);
             else
                 av_dict_set(&opts, "movflags", "frag_custom+dash+delay_moov", 0);
         } else {
@@ -1079,11 +1241,12 @@ static int dash_init(AVFormatContext *s)
             av_dict_set_int(&opts, "dash_track_number", i + 1, 0);
             av_dict_set_int(&opts, "live", 1, 0);
         }
-        if ((ret = avformat_init_output(ctx, &opts)) < 0)
+        ret = avformat_init_output(ctx, &opts);
+        av_dict_free(&opts);
+        if (ret < 0)
             return ret;
         os->ctx_inited = 1;
         avio_flush(ctx->pb);
-        av_dict_free(&opts);
 
         av_log(s, AV_LOG_VERBOSE, "Representation %d init segment will be written to: %s\n", i, filename);
 
@@ -1132,7 +1295,7 @@ static int dash_write_header(AVFormatContext *s)
         // Flush init segment
         // Only for WebM segment, since for mp4 delay_moov is set and
         // the init segment is thus flushed after the first packets.
-        if (c->segment_type == SEGMENT_TYPE_WEBM &&
+        if (os->segment_type == SEGMENT_TYPE_WEBM &&
             (ret = flush_init_segment(s, os)) < 0)
             return ret;
     }
@@ -1140,7 +1303,7 @@ static int dash_write_header(AVFormatContext *s)
 }
 
 static int add_segment(OutputStream *os, const char *file,
-                       int64_t time, int duration,
+                       int64_t time, int64_t duration,
                        int64_t start_pos, int64_t range_length,
                        int64_t index_length, int next_exp_index)
 {
@@ -1252,12 +1415,58 @@ static void dashenc_delete_file(AVFormatContext *s, char *filename) {
         }
 
         av_dict_free(&http_opts);
-        dashenc_io_close(s, &out, filename);
-    } else if (unlink(filename) < 0) {
-        av_log(s, AV_LOG_ERROR, "failed to delete %s: %s\n", filename, strerror(errno));
+        ff_format_io_close(s, &out);
+    } else {
+        int res = avpriv_io_delete(filename);
+        if (res < 0) {
+            char errbuf[AV_ERROR_MAX_STRING_SIZE];
+            av_strerror(res, errbuf, sizeof(errbuf));
+            av_log(s, (res == AVERROR(ENOENT) ? AV_LOG_WARNING : AV_LOG_ERROR), "failed to delete %s: %s\n", filename, errbuf);
+        }
     }
 }
 
+static int dashenc_delete_segment_file(AVFormatContext *s, const char* file)
+{
+    DASHContext *c = s->priv_data;
+    size_t dirname_len, file_len;
+    char filename[1024];
+
+    dirname_len = strlen(c->dirname);
+    if (dirname_len >= sizeof(filename)) {
+        av_log(s, AV_LOG_WARNING, "Cannot delete segments as the directory path is too long: %"PRIu64" characters: %s\n",
+            (uint64_t)dirname_len, c->dirname);
+        return AVERROR(ENAMETOOLONG);
+    }
+
+    memcpy(filename, c->dirname, dirname_len);
+
+    file_len = strlen(file);
+    if ((dirname_len + file_len) >= sizeof(filename)) {
+        av_log(s, AV_LOG_WARNING, "Cannot delete segments as the path is too long: %"PRIu64" characters: %s%s\n",
+            (uint64_t)(dirname_len + file_len), c->dirname, file);
+        return AVERROR(ENAMETOOLONG);
+    }
+
+    memcpy(filename + dirname_len, file, file_len + 1); // include the terminating zero
+    dashenc_delete_file(s, filename);
+
+    return 0;
+}
+
+static inline void dashenc_delete_media_segments(AVFormatContext *s, OutputStream *os, int remove_count)
+{
+    for (int i = 0; i < remove_count; ++i) {
+        dashenc_delete_segment_file(s, os->segments[i]->file);
+
+        // Delete the segment regardless of whether the file was successfully deleted
+        av_free(os->segments[i]);
+    }
+
+    os->nb_segments -= remove_count;
+    memmove(os->segments, os->segments + remove_count, os->nb_segments * sizeof(*os->segments));
+}
+
 static int dash_flush(AVFormatContext *s, int final, int stream)
 {
     DASHContext *c = s->priv_data;
@@ -1303,7 +1512,7 @@ static int dash_flush(AVFormatContext *s, int final, int stream)
         }
 
         if (!c->single_file) {
-            if (c->segment_type == SEGMENT_TYPE_MP4 && !os->written_len)
+            if (os->segment_type == SEGMENT_TYPE_MP4 && !os->written_len)
                 write_styp(os->ctx->pb);
         } else {
             snprintf(os->full_path, sizeof(os->full_path), "%s%s", c->dirname, os->initfile);
@@ -1347,23 +1556,12 @@ static int dash_flush(AVFormatContext *s, int final, int stream)
         os->pos += range_length;
     }
 
-    if (c->window_size || (final && c->remove_at_exit)) {
+    if (c->window_size) {
         for (i = 0; i < s->nb_streams; i++) {
             OutputStream *os = &c->streams[i];
-            int j;
-            int remove = os->nb_segments - c->window_size - c->extra_window_size;
-            if (final && c->remove_at_exit)
-                remove = os->nb_segments;
-            if (remove > 0) {
-                for (j = 0; j < remove; j++) {
-                    char filename[1024];
-                    snprintf(filename, sizeof(filename), "%s%s", c->dirname, os->segments[j]->file);
-                    dashenc_delete_file(s, filename);
-                    av_free(os->segments[j]);
-                }
-                os->nb_segments -= remove;
-                memmove(os->segments, os->segments + remove, os->nb_segments * sizeof(*os->segments));
-            }
+            int remove_count = os->nb_segments - c->window_size - c->extra_window_size;
+            if (remove_count > 0)
+                dashenc_delete_media_segments(s, os, remove_count);
         }
     }
 
@@ -1406,9 +1604,12 @@ static int dash_write_packet(AVFormatContext *s, AVPacket *pkt)
         os->first_pts = pkt->pts;
     os->last_pts = pkt->pts;
 
-    if (!c->availability_start_time[0])
+    if (!c->availability_start_time[0]) {
+        int64_t start_time_us = av_gettime();
+        c->start_time_s = start_time_us / 1000000;
         format_date_now(c->availability_start_time,
                         sizeof(c->availability_start_time));
+    }
 
     if (!os->availability_time_offset && pkt->duration) {
         int64_t frame_duration = av_rescale_q(pkt->duration, st->time_base,
@@ -1479,7 +1680,7 @@ static int dash_write_packet(AVFormatContext *s, AVPacket *pkt)
         int use_rename = proto && !strcmp(proto, "file");
         os->filename[0] = os->full_path[0] = os->temp_path[0] = '\0';
         ff_dash_fill_tmpl_params(os->filename, sizeof(os->filename),
-                                 c->media_seg_name, pkt->stream_index,
+                                 os->media_seg_name, pkt->stream_index,
                                  os->segment_index, os->bit_rate, os->start_pts);
         snprintf(os->full_path, sizeof(os->full_path), "%s%s", c->dirname,
                  os->filename);
@@ -1487,22 +1688,29 @@ static int dash_write_packet(AVFormatContext *s, AVPacket *pkt)
                  use_rename ? "%s.tmp" : "%s", os->full_path);
         set_http_options(&opts, c);
         ret = dashenc_io_open(s, &os->out, os->temp_path, &opts);
-        if (ret < 0)
-            return ret;
         av_dict_free(&opts);
+        if (ret < 0) {
+            return handle_io_open_error(s, ret, os->temp_path);
+        }
+        if (c->lhls) {
+            char *prefetch_url = use_rename ? NULL : os->filename;
+            write_hls_media_playlist(os, s, pkt->stream_index, 0, prefetch_url);
+        }
     }
 
     //write out the data immediately in streaming mode
-    if (c->streaming && c->segment_type == SEGMENT_TYPE_MP4) {
+    if (c->streaming && os->segment_type == SEGMENT_TYPE_MP4) {
         int len = 0;
         uint8_t *buf = NULL;
         if (!os->written_len)
             write_styp(os->ctx->pb);
         avio_flush(os->ctx->pb);
         len = avio_get_dyn_buf (os->ctx->pb, &buf);
-        avio_write(os->out, buf + os->written_len, len - os->written_len);
+        if (os->out) {
+            avio_write(os->out, buf + os->written_len, len - os->written_len);
+            avio_flush(os->out);
+        }
         os->written_len = len;
-        avio_flush(os->out);
     }
 
     return ret;
@@ -1511,6 +1719,7 @@ static int dash_write_packet(AVFormatContext *s, AVPacket *pkt)
 static int dash_write_trailer(AVFormatContext *s)
 {
     DASHContext *c = s->priv_data;
+    int i;
 
     if (s->nb_streams > 0) {
         OutputStream *os = &c->streams[0];
@@ -1526,15 +1735,33 @@ static int dash_write_trailer(AVFormatContext *s)
     }
     dash_flush(s, 1, -1);
 
+    for (i = 0; i < s->nb_streams; ++i) {
+        OutputStream *os = &c->streams[i];
+        if (os->ctx && os->ctx_inited) {
+            av_write_trailer(os->ctx);
+        }
+
+        if (c->remove_at_exit) {
+            dashenc_delete_media_segments(s, os, os->nb_segments);
+            dashenc_delete_segment_file(s, os->initfile);
+        }
+    }
+
     if (c->remove_at_exit) {
-        char filename[1024];
-        int i;
-        for (i = 0; i < s->nb_streams; i++) {
-            OutputStream *os = &c->streams[i];
-            snprintf(filename, sizeof(filename), "%s%s", c->dirname, os->initfile);
+        dashenc_delete_file(s, s->url);
+
+        if (c->hls_playlist && c->master_playlist_created) {
+            char filename[1024];
+            for (i = 0; i < s->nb_streams; ++i) {
+                OutputStream *os = &c->streams[i];
+                if (os->segment_type == SEGMENT_TYPE_MP4) {
+                    get_hls_playlist_name(filename, sizeof(filename), c->dirname, i);
+                    dashenc_delete_file(s, filename);
+                }
+            }
+            snprintf(filename, sizeof(filename), "%smaster.m3u8", c->dirname);
             dashenc_delete_file(s, filename);
         }
-        dashenc_delete_file(s, s->url);
     }
 
     return 0;
@@ -1578,8 +1805,8 @@ static const AVOption options[] = {
     { "use_timeline", "Use SegmentTimeline in SegmentTemplate", OFFSET(use_timeline), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, E },
     { "single_file", "Store all segments in one file, accessed using byte ranges", OFFSET(single_file), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, E },
     { "single_file_name", "DASH-templated name to be used for baseURL. Implies storing all segments in one file, accessed using byte ranges", OFFSET(single_file_name), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, E },
-    { "init_seg_name", "DASH-templated name to used for the initialization segment", OFFSET(init_seg_name), AV_OPT_TYPE_STRING, {.str = "init-stream$RepresentationID$.m4s"}, 0, 0, E },
-    { "media_seg_name", "DASH-templated name to used for the media segments", OFFSET(media_seg_name), AV_OPT_TYPE_STRING, {.str = "chunk-stream$RepresentationID$-$Number%05d$.m4s"}, 0, 0, E },
+    { "init_seg_name", "DASH-templated name to used for the initialization segment", OFFSET(init_seg_name), AV_OPT_TYPE_STRING, {.str = "init-stream$RepresentationID$.$ext$"}, 0, 0, E },
+    { "media_seg_name", "DASH-templated name to used for the media segments", OFFSET(media_seg_name), AV_OPT_TYPE_STRING, {.str = "chunk-stream$RepresentationID$-$Number%05d$.$ext$"}, 0, 0, E },
     { "utc_timing_url", "URL of the page that will return the UTC timestamp in ISO format", OFFSET(utc_timing_url), AV_OPT_TYPE_STRING, { 0 }, 0, 0, E },
     { "method", "set the HTTP method", OFFSET(method), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, E },
     { "http_user_agent", "override User-Agent field in HTTP header", OFFSET(user_agent), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, E},
@@ -1589,9 +1816,13 @@ static const AVOption options[] = {
     { "timeout", "set timeout for socket I/O operations", OFFSET(timeout), AV_OPT_TYPE_DURATION, { .i64 = -1 }, -1, INT_MAX, .flags = E },
     { "index_correction", "Enable/Disable segment index correction logic", OFFSET(index_correction), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, E },
     { "format_options","set list of options for the container format (mp4/webm) used for dash", OFFSET(format_options_str), AV_OPT_TYPE_STRING, {.str = NULL},  0, 0, E},
-    { "dash_segment_type", "set dash segment files type", OFFSET(segment_type), AV_OPT_TYPE_INT, {.i64 = SEGMENT_TYPE_MP4 }, 0, SEGMENT_TYPE_NB - 1, E, "segment_type"},
+    { "dash_segment_type", "set dash segment files type", OFFSET(segment_type_option), AV_OPT_TYPE_INT, {.i64 = SEGMENT_TYPE_AUTO }, 0, SEGMENT_TYPE_NB - 1, E, "segment_type"},
+    { "auto", "select segment file format based on codec", 0, AV_OPT_TYPE_CONST, {.i64 = SEGMENT_TYPE_AUTO }, 0, UINT_MAX,   E, "segment_type"},
     { "mp4", "make segment file in ISOBMFF format", 0, AV_OPT_TYPE_CONST, {.i64 = SEGMENT_TYPE_MP4 }, 0, UINT_MAX,   E, "segment_type"},
     { "webm", "make segment file in WebM format", 0, AV_OPT_TYPE_CONST, {.i64 = SEGMENT_TYPE_WEBM }, 0, UINT_MAX,   E, "segment_type"},
+    { "ignore_io_errors", "Ignore IO errors during open and write. Useful for long-duration runs with network output", OFFSET(ignore_io_errors), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, E },
+    { "lhls", "Enable Low-latency HLS(Experimental). Adds #EXT-X-PREFETCH tag with current segment's URI", OFFSET(lhls), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, E },
+    { "master_m3u8_publish_rate", "Publish master playlist every after this many segment intervals", OFFSET(master_publish_rate), AV_OPT_TYPE_INT, {.i64 = 0}, 0, UINT_MAX, E},
     { NULL },
 };
 
diff --git a/libavformat/dhav.c b/libavformat/dhav.c
new file mode 100644
index 0000000000000..9948783c0b702
--- /dev/null
+++ b/libavformat/dhav.c
@@ -0,0 +1,384 @@
+/*
+ * DHAV demuxer
+ *
+ * Copyright (c) 2018 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/parseutils.h"
+#include "avio_internal.h"
+#include "avformat.h"
+#include "internal.h"
+
+typedef struct DHAVContext {
+    unsigned type;
+    unsigned subtype;
+    unsigned channel;
+    unsigned frame_subnumber;
+    unsigned frame_number;
+    unsigned date;
+    unsigned timestamp;
+    int width, height;
+    int video_codec;
+    int frame_rate;
+    int audio_channels;
+    int audio_codec;
+    int sample_rate;
+
+    int video_stream_index;
+    int audio_stream_index;
+} DHAVContext;
+
+typedef struct DHAVStream {
+    int64_t last_timestamp;
+    int64_t pts;
+} DHAVStream;
+
+static int dhav_probe(AVProbeData *p)
+{
+    if (!memcmp(p->buf, "DAHUA", 5))
+        return AVPROBE_SCORE_MAX;
+
+    if (memcmp(p->buf, "DHAV", 4))
+        return 0;
+
+    if (p->buf[4] == 0xf0 ||
+        p->buf[4] == 0xf1 ||
+        p->buf[4] == 0xfc ||
+        p->buf[4] == 0xfd)
+        return AVPROBE_SCORE_MAX;
+    return 0;
+}
+
+static int dhav_read_header(AVFormatContext *s)
+{
+    DHAVContext *dhav = s->priv_data;
+    uint8_t signature[5];
+
+    ffio_ensure_seekback(s->pb, 5);
+    avio_read(s->pb, signature, sizeof(signature));
+    if (!memcmp(signature, "DAHUA", 5))
+        avio_skip(s->pb, 0x400 - 5);
+    else
+        avio_seek(s->pb, -5, SEEK_CUR);
+
+    s->ctx_flags |= AVFMTCTX_NOHEADER;
+    dhav->video_stream_index = -1;
+    dhav->audio_stream_index = -1;
+
+    return 0;
+}
+
+static int64_t get_pts(AVFormatContext *s, DHAVStream *st)
+{
+    DHAVContext *dhav = s->priv_data;
+    /*
+    int year, month, day, hour, min, sec;
+    struct tm timeinfo;
+
+    sec   =   dhav->date        & 0x3F;
+    min   =  (dhav->date >>  6) & 0x3F;
+    hour  =  (dhav->date >> 12) & 0x1F;
+    day   =  (dhav->date >> 17) & 0x1F;
+    month =  (dhav->date >> 22) & 0x0F;
+    year  = ((dhav->date >> 26) & 0x3F) + 2000;
+
+    timeinfo.tm_year = year - 1900;
+    timeinfo.tm_mon  = month - 1;
+    timeinfo.tm_mday = day;
+    timeinfo.tm_hour = hour;
+    timeinfo.tm_min  = min;
+    timeinfo.tm_sec  = sec;*/
+
+    if (st->last_timestamp == AV_NOPTS_VALUE) {
+        st->last_timestamp = dhav->timestamp;
+    }
+
+    if (st->last_timestamp <= dhav->timestamp) {
+        st->pts += dhav->timestamp - st->last_timestamp;
+    } else {
+        st->pts += 65535 + dhav->timestamp - st->last_timestamp;
+    }
+
+    st->last_timestamp = dhav->timestamp;
+
+    return st->pts;
+}
+
+static const uint32_t sample_rates[] = {
+    8000, 4000, 8000, 11025, 16000,
+    20000, 22050, 32000, 44100, 48000,
+    96000, 192000, 64000,
+};
+
+static int parse_ext(AVFormatContext *s, int length)
+{
+    DHAVContext *dhav = s->priv_data;
+    int index;
+
+    while (length > 0) {
+        int type = avio_r8(s->pb);
+
+        switch (type) {
+        case 0x80:
+            avio_skip(s->pb, 1);
+            dhav->width  = 8 * avio_r8(s->pb);
+            dhav->height = 8 * avio_r8(s->pb);
+            length -= 4;
+            break;
+        case 0x81:
+            avio_skip(s->pb, 1);
+            dhav->video_codec = avio_r8(s->pb);
+            dhav->frame_rate = avio_r8(s->pb);
+            length -= 4;
+            break;
+        case 0x82:
+            avio_skip(s->pb, 3);
+            dhav->width  = avio_rl16(s->pb);
+            dhav->height = avio_rl16(s->pb);
+            length -= 8;
+            break;
+        case 0x83:
+            dhav->audio_channels = avio_r8(s->pb);
+            dhav->audio_codec = avio_r8(s->pb);
+            index = avio_r8(s->pb);
+            if (index < FF_ARRAY_ELEMS(sample_rates)) {
+                dhav->sample_rate = sample_rates[index];
+            } else {
+                dhav->sample_rate = 8000;
+            }
+            length -= 4;
+            break;
+        case 0x88:
+            avio_skip(s->pb, 7);
+            length -= 8;
+            break;
+        case 0x8c:
+            avio_skip(s->pb, 1);
+            dhav->audio_channels = avio_r8(s->pb);
+            dhav->audio_codec = avio_r8(s->pb);
+            index = avio_r8(s->pb);
+            if (index < FF_ARRAY_ELEMS(sample_rates)) {
+                dhav->sample_rate = sample_rates[index];
+            } else {
+                dhav->sample_rate = 8000;
+            }
+            avio_skip(s->pb, 3);
+            length -= 8;
+            break;
+        case 0x91:
+        case 0x92:
+        case 0x93:
+        case 0x95:
+        case 0x9a:
+        case 0x9b: // sample aspect ratio
+        case 0xb3:
+            avio_skip(s->pb, 7);
+            length -= 8;
+            break;
+        case 0x84:
+        case 0x85:
+        case 0x8b:
+        case 0x94:
+        case 0x96:
+        case 0xa0:
+        case 0xb2:
+        case 0xb4:
+            avio_skip(s->pb, 3);
+            length -= 4;
+            break;
+        default:
+            av_log(s, AV_LOG_INFO, "Unknown type: %X, skipping rest of header.\n", type);
+            avio_skip(s->pb, length - 1);
+            length = 0;
+        }
+    }
+
+    return 0;
+}
+
+static int read_chunk(AVFormatContext *s)
+{
+    DHAVContext *dhav = s->priv_data;
+    unsigned frame_length, ext_length;
+    int64_t start, end;
+    int ret;
+
+    start = avio_tell(s->pb);
+
+    if (avio_feof(s->pb))
+        return AVERROR_EOF;
+
+    if (avio_rl32(s->pb) != MKTAG('D','H','A','V'))
+        return AVERROR_INVALIDDATA;
+
+    dhav->type = avio_r8(s->pb);
+    dhav->subtype = avio_r8(s->pb);
+    dhav->channel = avio_r8(s->pb);
+    dhav->frame_subnumber = avio_r8(s->pb);
+    dhav->frame_number = avio_rl32(s->pb);
+    frame_length = avio_rl32(s->pb);
+
+    if (frame_length < 24)
+        return AVERROR_INVALIDDATA;
+    if (dhav->type == 0xf1) {
+        avio_skip(s->pb, frame_length - 16);
+        return 0;
+    }
+
+    dhav->date = avio_rl32(s->pb);
+    dhav->timestamp = avio_rl16(s->pb);
+    ext_length = avio_r8(s->pb);
+    avio_skip(s->pb, 1); // checksum
+
+    ret = parse_ext(s, ext_length);
+    if (ret < 0)
+        return ret;
+
+    end = avio_tell(s->pb);
+
+    return frame_length - 8 - (end - start);
+}
+
+static int dhav_read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    DHAVContext *dhav = s->priv_data;
+    int64_t start;
+    int ret;
+
+    start = avio_tell(s->pb);
+
+    while ((ret = read_chunk(s)) == 0)
+        ;
+
+    if (ret < 0)
+        return ret;
+
+    if (dhav->type == 0xfd && dhav->video_stream_index == -1) {
+        AVStream *st = avformat_new_stream(s, NULL);
+        DHAVStream *dst;
+
+        if (!st)
+            return AVERROR(ENOMEM);
+
+        st->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
+        switch (dhav->video_codec) {
+        case 0x1: st->codecpar->codec_id = AV_CODEC_ID_MPEG4; break;
+        case 0x3: st->codecpar->codec_id = AV_CODEC_ID_MJPEG; break;
+        case 0x2:
+        case 0x4:
+        case 0x8: st->codecpar->codec_id = AV_CODEC_ID_H264;  break;
+        case 0xc: st->codecpar->codec_id = AV_CODEC_ID_HEVC;  break;
+        default: avpriv_request_sample(s, "Unknown video codec %X\n", dhav->video_codec);
+        }
+        st->codecpar->width      = dhav->width;
+        st->codecpar->height     = dhav->height;
+        st->avg_frame_rate.num   = dhav->frame_rate;
+        st->avg_frame_rate.den   = 1;
+        st->priv_data = dst = av_mallocz(sizeof(DHAVStream));
+        if (!st->priv_data)
+            return AVERROR(ENOMEM);
+        dst->last_timestamp = AV_NOPTS_VALUE;
+        dhav->video_stream_index = st->index;
+
+        avpriv_set_pts_info(st, 64, 1, 1000);
+    } else if (dhav->type == 0xf0 && dhav->audio_stream_index == -1) {
+        AVStream *st = avformat_new_stream(s, NULL);
+        DHAVStream *dst;
+
+        if (!st)
+            return AVERROR(ENOMEM);
+
+        st->codecpar->codec_type  = AVMEDIA_TYPE_AUDIO;
+        switch (dhav->audio_codec) {
+        case 0x07: st->codecpar->codec_id = AV_CODEC_ID_PCM_S8;    break;
+        case 0x0c: st->codecpar->codec_id = AV_CODEC_ID_PCM_S16LE; break;
+        case 0x10: st->codecpar->codec_id = AV_CODEC_ID_PCM_S16LE; break;
+        case 0x0a: st->codecpar->codec_id = AV_CODEC_ID_PCM_MULAW; break;
+        case 0x16: st->codecpar->codec_id = AV_CODEC_ID_PCM_MULAW; break;
+        case 0x0e: st->codecpar->codec_id = AV_CODEC_ID_PCM_ALAW;  break;
+        case 0x1a: st->codecpar->codec_id = AV_CODEC_ID_AAC;       break;
+        case 0x1f: st->codecpar->codec_id = AV_CODEC_ID_MP2;       break;
+        case 0x21: st->codecpar->codec_id = AV_CODEC_ID_MP3;       break;
+        case 0x0d: st->codecpar->codec_id = AV_CODEC_ID_ADPCM_MS;  break;
+        default: avpriv_request_sample(s, "Unknown audio codec %X\n", dhav->audio_codec);
+        }
+        st->codecpar->channels    = dhav->audio_channels;
+        st->codecpar->sample_rate = dhav->sample_rate;
+        st->priv_data = dst = av_mallocz(sizeof(DHAVStream));
+        if (!st->priv_data)
+            return AVERROR(ENOMEM);
+        dst->last_timestamp = AV_NOPTS_VALUE;
+        dhav->audio_stream_index  = st->index;
+
+        avpriv_set_pts_info(st, 64, 1, 1000);
+    }
+
+    ret = av_get_packet(s->pb, pkt, ret);
+    if (ret < 0)
+        return ret;
+    pkt->stream_index = dhav->type == 0xf0 ? dhav->audio_stream_index : dhav->video_stream_index;
+    if (dhav->type != 0xfc)
+        pkt->flags   |= AV_PKT_FLAG_KEY;
+    if (pkt->stream_index >= 0)
+        pkt->pts = get_pts(s, s->streams[pkt->stream_index]->priv_data);
+    pkt->duration = 1;
+    pkt->pos = start;
+    if (avio_rl32(s->pb) != MKTAG('d','h','a','v'))
+        return AVERROR_INVALIDDATA;
+    avio_skip(s->pb, 4);
+
+    return ret;
+}
+
+static int dhav_read_seek(AVFormatContext *s, int stream_index,
+                          int64_t timestamp, int flags)
+{
+    AVStream *st = s->streams[stream_index];
+    int index = av_index_search_timestamp(st, timestamp, flags);
+    int64_t pts;
+
+    if (index < 0)
+        return -1;
+    if (avio_seek(s->pb, st->index_entries[index].pos, SEEK_SET) < 0)
+        return -1;
+
+    pts = st->index_entries[index].timestamp;
+
+    for (int n = 0; n < s->nb_streams; n++) {
+        AVStream *st = s->streams[n];
+        DHAVStream *dst = st->priv_data;
+
+        dst->pts = pts;
+        dst->last_timestamp = AV_NOPTS_VALUE;
+    }
+
+    return 0;
+}
+
+AVInputFormat ff_dhav_demuxer = {
+    .name           = "dhav",
+    .long_name      = NULL_IF_CONFIG_SMALL("Video DAV"),
+    .priv_data_size = sizeof(DHAVContext),
+    .read_probe     = dhav_probe,
+    .read_header    = dhav_read_header,
+    .read_packet    = dhav_read_packet,
+    .read_seek      = dhav_read_seek,
+    .extensions     = "dav",
+    .flags          = AVFMT_GENERIC_INDEX | AVFMT_NO_BYTE_SEEK,
+};
diff --git a/libavformat/dump.c b/libavformat/dump.c
index bc0f401550a70..bb8c72fc6088e 100644
--- a/libavformat/dump.c
+++ b/libavformat/dump.c
@@ -293,7 +293,7 @@ static void dump_audioservicetype(void *ctx, AVPacketSideData *sd)
         av_log(ctx, AV_LOG_INFO, "dialogue");
         break;
     case AV_AUDIO_SERVICE_TYPE_COMMENTARY:
-        av_log(ctx, AV_LOG_INFO, "comentary");
+        av_log(ctx, AV_LOG_INFO, "commentary");
         break;
     case AV_AUDIO_SERVICE_TYPE_EMERGENCY:
         av_log(ctx, AV_LOG_INFO, "emergency");
diff --git a/libavformat/ffmetadec.c b/libavformat/ffmetadec.c
index 3290b3b7bca0e..6f7133e389525 100644
--- a/libavformat/ffmetadec.c
+++ b/libavformat/ffmetadec.c
@@ -19,6 +19,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "libavutil/bprint.h"
 #include "libavutil/mathematics.h"
 #include "avformat.h"
 #include "ffmeta.h"
@@ -32,6 +33,48 @@ static int probe(AVProbeData *p)
     return 0;
 }
 
+static int64_t read_line_to_bprint_escaped(AVIOContext *s, AVBPrint *bp)
+{
+    int len, end;
+    int64_t read = 0;
+    char tmp[1024];
+    char c;
+    char prev = ' ';
+
+    do {
+        len = 0;
+        do {
+            c = avio_r8(s);
+            end = prev != '\\' && (c == '\r' || c == '\n' || c == '\0');
+            if (!end)
+                tmp[len++] = c;
+            prev = c;
+        } while (!end && len < sizeof(tmp));
+        av_bprint_append_data(bp, tmp, len);
+        read += len;
+    } while (!end);
+
+    if (c == '\r' && avio_r8(s) != '\n' && !avio_feof(s))
+        avio_skip(s, -1);
+
+    if (!c && s->error)
+        return s->error;
+
+    if (!c && !read && avio_feof(s))
+        return AVERROR_EOF;
+
+    return read;
+}
+
+static void get_bprint_line(AVIOContext *s, AVBPrint *bp)
+{
+
+    do {
+        av_bprint_clear(bp);
+        read_line_to_bprint_escaped(s, bp);
+    } while (!avio_feof(s) && (bp->str[0] == ';' || bp->str[0] == '#' || bp->str[0] == 0));
+}
+
 static void get_line(AVIOContext *s, uint8_t *buf, int size)
 {
     do {
@@ -128,12 +171,14 @@ static int read_tag(const uint8_t *line, AVDictionary **m)
 static int read_header(AVFormatContext *s)
 {
     AVDictionary **m = &s->metadata;
-    uint8_t line[1024];
+    AVBPrint bp;
+
+    av_bprint_init(&bp, 0, AV_BPRINT_SIZE_UNLIMITED);
 
     while(!avio_feof(s->pb)) {
-        get_line(s->pb, line, sizeof(line));
+        get_bprint_line(s->pb, &bp);
 
-        if (!memcmp(line, ID_STREAM, strlen(ID_STREAM))) {
+        if (!memcmp(bp.str, ID_STREAM, strlen(ID_STREAM))) {
             AVStream *st = avformat_new_stream(s, NULL);
 
             if (!st)
@@ -143,7 +188,7 @@ static int read_header(AVFormatContext *s)
             st->codecpar->codec_id   = AV_CODEC_ID_FFMETADATA;
 
             m = &st->metadata;
-        } else if (!memcmp(line, ID_CHAPTER, strlen(ID_CHAPTER))) {
+        } else if (!memcmp(bp.str, ID_CHAPTER, strlen(ID_CHAPTER))) {
             AVChapter *ch = read_chapter(s);
 
             if (!ch)
@@ -151,9 +196,11 @@ static int read_header(AVFormatContext *s)
 
             m = &ch->metadata;
         } else
-            read_tag(line, m);
+            read_tag(bp.str, m);
     }
 
+    av_bprint_finalize(&bp, NULL);
+
     s->start_time = 0;
     if (s->nb_chapters)
         s->duration = av_rescale_q(s->chapters[s->nb_chapters - 1]->end,
diff --git a/libavformat/file.c b/libavformat/file.c
index 1d321c42050e3..e613b91010281 100644
--- a/libavformat/file.c
+++ b/libavformat/file.c
@@ -173,7 +173,11 @@ static int file_delete(URLContext *h)
     av_strstart(filename, "file:", &filename);
 
     ret = rmdir(filename);
-    if (ret < 0 && errno == ENOTDIR)
+    if (ret < 0 && (errno == ENOTDIR
+#   ifdef _WIN32
+        || errno == EINVAL
+#   endif
+        ))
         ret = unlink(filename);
     if (ret < 0)
         return AVERROR(errno);
diff --git a/libavformat/flac_picture.c b/libavformat/flac_picture.c
index 38982b960de44..8317ab2fa64b8 100644
--- a/libavformat/flac_picture.c
+++ b/libavformat/flac_picture.c
@@ -20,6 +20,8 @@
  */
 
 #include "libavutil/avassert.h"
+#include "libavutil/intreadwrite.h"
+#include "libavcodec/png.h"
 #include "avformat.h"
 #include "flac_picture.h"
 #include "id3v2.h"
@@ -119,6 +121,9 @@ int ff_flac_parse_picture(AVFormatContext *s, uint8_t *buf, int buf_size)
         goto fail;
     }
 
+    if (AV_RB64(data->data) == PNGSIG)
+        id = AV_CODEC_ID_PNG;
+
     st = avformat_new_stream(s, NULL);
     if (!st) {
         RETURN_ERROR(AVERROR(ENOMEM));
diff --git a/libavformat/flacenc.c b/libavformat/flacenc.c
index 617bccdc84a88..a07260f426837 100644
--- a/libavformat/flacenc.c
+++ b/libavformat/flacenc.c
@@ -65,7 +65,7 @@ static int flac_write_block_comment(AVIOContext *pb, AVDictionary **m,
 
     ff_metadata_conv(m, ff_vorbiscomment_metadata_conv, NULL);
 
-    len = ff_vorbiscomment_length(*m, vendor);
+    len = ff_vorbiscomment_length(*m, vendor, NULL, 0);
     if (len >= ((1<<24) - 4))
         return AVERROR(EINVAL);
     p0 = av_malloc(len+4);
@@ -75,7 +75,7 @@ static int flac_write_block_comment(AVIOContext *pb, AVDictionary **m,
 
     bytestream_put_byte(&p, last_block ? 0x84 : 0x04);
     bytestream_put_be24(&p, len);
-    ff_vorbiscomment_write(&p, m, vendor);
+    ff_vorbiscomment_write(&p, m, vendor, NULL, 0);
 
     avio_write(pb, p0, len+4);
     av_freep(&p0);
diff --git a/libavformat/flv.h b/libavformat/flv.h
index df5ce3d17f8c7..3571b90279c7d 100644
--- a/libavformat/flv.h
+++ b/libavformat/flv.h
@@ -65,6 +65,7 @@ enum FlvTagType {
 enum {
     FLV_STREAM_TYPE_VIDEO,
     FLV_STREAM_TYPE_AUDIO,
+    FLV_STREAM_TYPE_SUBTITLE,
     FLV_STREAM_TYPE_DATA,
     FLV_STREAM_TYPE_NB,
 };
diff --git a/libavformat/flvdec.c b/libavformat/flvdec.c
index a2dea464e3697..972e3333133f5 100644
--- a/libavformat/flvdec.c
+++ b/libavformat/flvdec.c
@@ -72,6 +72,9 @@ typedef struct FLVContext {
     int64_t *keyframe_filepositions;
     int missing_streams;
     AVRational framerate;
+    int64_t last_ts;
+    int64_t time_offset;
+    int64_t time_pos;
 } FLVContext;
 
 static int probe(AVProbeData *p, int live)
@@ -143,7 +146,9 @@ static AVStream *create_stream(AVFormatContext *s, int codec_type)
     st->codecpar->codec_type = codec_type;
     if (s->nb_streams>=3 ||(   s->nb_streams==2
                            && s->streams[0]->codecpar->codec_type != AVMEDIA_TYPE_SUBTITLE
-                           && s->streams[1]->codecpar->codec_type != AVMEDIA_TYPE_SUBTITLE))
+                           && s->streams[1]->codecpar->codec_type != AVMEDIA_TYPE_SUBTITLE
+                           && s->streams[0]->codecpar->codec_type != AVMEDIA_TYPE_DATA
+                           && s->streams[1]->codecpar->codec_type != AVMEDIA_TYPE_DATA))
         s->ctx_flags &= ~AVFMTCTX_NOHEADER;
     if (codec_type == AVMEDIA_TYPE_AUDIO) {
         st->codecpar->bit_rate = flv->audio_bit_rate;
@@ -915,6 +920,18 @@ static int resync(AVFormatContext *s)
         flv->resync_buffer[j ] =
         flv->resync_buffer[j1] = avio_r8(s->pb);
 
+        if (i >= 8 && pos) {
+            uint8_t *d = flv->resync_buffer + j1 - 8;
+            if (d[0] == 'F' &&
+                d[1] == 'L' &&
+                d[2] == 'V' &&
+                d[3] < 5 && d[5] == 0) {
+                av_log(s, AV_LOG_WARNING, "Concatenated FLV detected, might fail to demux, decode and seek %"PRId64"\n", flv->last_ts);
+                flv->time_offset = flv->last_ts + 1;
+                flv->time_pos    = avio_tell(s->pb);
+            }
+        }
+
         if (i > 22) {
             unsigned lsize2 = AV_RB32(flv->resync_buffer + j1 - 4);
             if (lsize2 >= 11 && lsize2 + 8LL < FFMIN(i, RESYNC_BUFFER_SIZE)) {
@@ -996,12 +1013,12 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
             if ((flags & FLV_VIDEO_FRAMETYPE_MASK) == FLV_FRAME_VIDEO_INFO_CMD)
                 goto skip;
         } else if (type == FLV_TAG_TYPE_META) {
-            stream_type=FLV_STREAM_TYPE_DATA;
+            stream_type=FLV_STREAM_TYPE_SUBTITLE;
             if (size > 13 + 1 + 4) { // Header-type metadata stuff
                 int type;
                 meta_pos = avio_tell(s->pb);
                 type = flv_read_metabody(s, next);
-                if (type == 0 && dts == 0 || type < 0 || type == TYPE_UNKNOWN) {
+                if (type == 0 && dts == 0 || type < 0) {
                     if (type < 0 && flv->validate_count &&
                         flv->validate_index[0].pos     > next &&
                         flv->validate_index[0].pos - 4 < next
@@ -1015,6 +1032,8 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
                     return flv_data_packet(s, pkt, dts, next);
                 } else if (type == TYPE_ONCAPTION) {
                     return flv_data_packet(s, pkt, dts, next);
+                } else if (type == TYPE_UNKNOWN) {
+                    stream_type = FLV_STREAM_TYPE_DATA;
                 }
                 avio_seek(s->pb, meta_pos, SEEK_SET);
             }
@@ -1051,13 +1070,16 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
                 if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO &&
                     (s->video_codec_id || flv_same_video_codec(st->codecpar, flags)))
                     break;
-            } else if (stream_type == FLV_STREAM_TYPE_DATA) {
+            } else if (stream_type == FLV_STREAM_TYPE_SUBTITLE) {
                 if (st->codecpar->codec_type == AVMEDIA_TYPE_SUBTITLE)
                     break;
+            } else if (stream_type == FLV_STREAM_TYPE_DATA) {
+                if (st->codecpar->codec_type == AVMEDIA_TYPE_DATA)
+                    break;
             }
         }
         if (i == s->nb_streams) {
-            static const enum AVMediaType stream_types[] = {AVMEDIA_TYPE_VIDEO, AVMEDIA_TYPE_AUDIO, AVMEDIA_TYPE_SUBTITLE};
+            static const enum AVMediaType stream_types[] = {AVMEDIA_TYPE_VIDEO, AVMEDIA_TYPE_AUDIO, AVMEDIA_TYPE_SUBTITLE, AVMEDIA_TYPE_DATA};
             st = create_stream(s, stream_types[stream_type]);
             if (!st)
                 return AVERROR(ENOMEM);
@@ -1065,6 +1087,10 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
         }
         av_log(s, AV_LOG_TRACE, "%d %X %d \n", stream_type, flags, st->discard);
 
+        if (flv->time_pos <= pos) {
+            dts += flv->time_offset;
+        }
+
         if ((s->pb->seekable & AVIO_SEEKABLE_NORMAL) &&
             ((flags & FLV_VIDEO_FRAMETYPE_MASK) == FLV_FRAME_KEY ||
               stream_type == FLV_STREAM_TYPE_AUDIO))
@@ -1151,8 +1177,10 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
         if (ret < 0)
             return ret;
         size -= ret;
-    } else if (stream_type == FLV_STREAM_TYPE_DATA) {
+    } else if (stream_type == FLV_STREAM_TYPE_SUBTITLE) {
         st->codecpar->codec_id = AV_CODEC_ID_TEXT;
+    } else if (stream_type == FLV_STREAM_TYPE_DATA) {
+        st->codecpar->codec_id = AV_CODEC_ID_NONE; // Opaque AMF data
     }
 
     if (st->codecpar->codec_id == AV_CODEC_ID_AAC ||
@@ -1253,6 +1281,7 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
 
     if (    stream_type == FLV_STREAM_TYPE_AUDIO ||
             ((flags & FLV_VIDEO_FRAMETYPE_MASK) == FLV_FRAME_KEY) ||
+            stream_type == FLV_STREAM_TYPE_SUBTITLE ||
             stream_type == FLV_STREAM_TYPE_DATA)
         pkt->flags |= AV_PKT_FLAG_KEY;
 
@@ -1272,6 +1301,10 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
             }
         }
     }
+
+    if (ret >= 0)
+        flv->last_ts = pkt->dts;
+
     return ret;
 }
 
diff --git a/libavformat/ftp.c b/libavformat/ftp.c
index 676f1c6b4c46b..3adc04ee1fbbf 100644
--- a/libavformat/ftp.c
+++ b/libavformat/ftp.c
@@ -389,7 +389,7 @@ static int ftp_file_size(FTPContext *s)
     static const int size_codes[] = {213, 0};
 
     snprintf(command, sizeof(command), "SIZE %s\r\n", s->path);
-    if (ftp_send_command(s, command, size_codes, &res) == 213 && res) {
+    if (ftp_send_command(s, command, size_codes, &res) == 213 && res && strlen(res) > 4) {
         s->filesize = strtoll(&res[4], NULL, 10);
     } else {
         s->filesize = -1;
@@ -513,7 +513,7 @@ static int ftp_features(FTPContext *s)
     static const char *feat_command        = "FEAT\r\n";
     static const char *enable_utf8_command = "OPTS UTF8 ON\r\n";
     static const int feat_codes[] = {211, 0};
-    static const int opts_codes[] = {200, 451, 0};
+    static const int opts_codes[] = {200, 202, 451, 0};
 
     av_freep(&s->features);
     if (ftp_send_command(s, feat_command, feat_codes, &s->features) != 211) {
@@ -521,7 +521,8 @@ static int ftp_features(FTPContext *s)
     }
 
     if (ftp_has_feature(s, "UTF8")) {
-        if (ftp_send_command(s, enable_utf8_command, opts_codes, NULL) == 200)
+        int ret = ftp_send_command(s, enable_utf8_command, opts_codes, NULL);
+        if (ret == 200 || ret == 202)
             s->utf8 = 1;
     }
 
@@ -781,13 +782,13 @@ static int ftp_read(URLContext *h, unsigned char *buf, int size)
     if (s->state == DISCONNECTED) {
         /* optimization */
         if (s->position >= s->filesize)
-            return 0;
+            return AVERROR_EOF;
         if ((err = ftp_connect_data_connection(h)) < 0)
             return err;
     }
     if (s->state == READY) {
         if (s->position >= s->filesize)
-            return 0;
+            return AVERROR_EOF;
         if ((err = ftp_retrieve(s)) < 0)
             return err;
     }
diff --git a/libavformat/gdv.c b/libavformat/gdv.c
index a69c349cab3c4..3ead383892fa8 100644
--- a/libavformat/gdv.c
+++ b/libavformat/gdv.c
@@ -86,6 +86,9 @@ static int gdv_read_header(AVFormatContext *ctx)
     vst->nb_frames         = avio_rl16(pb);
 
     fps = avio_rl16(pb);
+    if (!fps)
+        return AVERROR_INVALIDDATA;
+
     snd_flags = avio_rl16(pb);
     if (snd_flags & 1) {
         ast = avformat_new_stream(ctx, 0);
diff --git a/libavformat/gif.c b/libavformat/gif.c
index 31e60bb2ff13c..7916ba1e6b2a9 100644
--- a/libavformat/gif.c
+++ b/libavformat/gif.c
@@ -27,94 +27,21 @@
 #include "libavutil/imgutils.h"
 #include "libavutil/log.h"
 #include "libavutil/opt.h"
-
-/* XXX: random value that shouldn't be taken into effect if there is no
- * transparent color in the palette (the transparency bit will be set to 0) */
-#define DEFAULT_TRANSPARENCY_INDEX 0x1f
-
-static int get_palette_transparency_index(const uint32_t *palette)
-{
-    int transparent_color_index = -1;
-    unsigned i, smallest_alpha = 0xff;
-
-    if (!palette)
-        return -1;
-
-    for (i = 0; i < AVPALETTE_COUNT; i++) {
-        const uint32_t v = palette[i];
-        if (v >> 24 < smallest_alpha) {
-            smallest_alpha = v >> 24;
-            transparent_color_index = i;
-        }
-    }
-    return smallest_alpha < 128 ? transparent_color_index : -1;
-}
-
-static int gif_image_write_header(AVIOContext *pb, AVStream *st,
-                                  int loop_count, uint32_t *palette)
-{
-    int i;
-    int64_t aspect = 0;
-    const AVRational sar = st->sample_aspect_ratio;
-
-    if (sar.num > 0 && sar.den > 0) {
-        aspect = sar.num * 64LL / sar.den - 15;
-        if (aspect < 0 || aspect > 255)
-            aspect = 0;
-    }
-
-    avio_write(pb, "GIF", 3);
-    avio_write(pb, "89a", 3);
-    avio_wl16(pb, st->codecpar->width);
-    avio_wl16(pb, st->codecpar->height);
-
-    if (palette) {
-        const int bcid = get_palette_transparency_index(palette);
-
-        avio_w8(pb, 0xf7); /* flags: global clut, 256 entries */
-        avio_w8(pb, bcid < 0 ? DEFAULT_TRANSPARENCY_INDEX : bcid); /* background color index */
-        avio_w8(pb, aspect);
-        for (i = 0; i < 256; i++) {
-            const uint32_t v = palette[i] & 0xffffff;
-            avio_wb24(pb, v);
-        }
-    } else {
-        avio_w8(pb, 0); /* flags */
-        avio_w8(pb, 0); /* background color index */
-        avio_w8(pb, aspect);
-    }
-
-
-    if (loop_count >= 0 ) {
-        /* "NETSCAPE EXTENSION" for looped animation GIF */
-        avio_w8(pb, 0x21); /* GIF Extension code */
-        avio_w8(pb, 0xff); /* Application Extension Label */
-        avio_w8(pb, 0x0b); /* Length of Application Block */
-        avio_write(pb, "NETSCAPE2.0", sizeof("NETSCAPE2.0") - 1);
-        avio_w8(pb, 0x03); /* Length of Data Sub-Block */
-        avio_w8(pb, 0x01);
-        avio_wl16(pb, (uint16_t)loop_count);
-        avio_w8(pb, 0x00); /* Data Sub-block Terminator */
-    }
-
-    avio_flush(pb);
-    return 0;
-}
+#include "libavcodec/bytestream.h"
+#include "libavcodec/gif.h"
 
 typedef struct GIFContext {
     AVClass *class;
     int loop;
     int last_delay;
-    AVPacket *prev_pkt;
     int duration;
+    int64_t last_pos;
+    int have_end;
+    AVPacket *prev_pkt;
 } GIFContext;
 
 static int gif_write_header(AVFormatContext *s)
 {
-    GIFContext *gif = s->priv_data;
-    AVCodecParameters *video_par;
-    uint32_t palette[AVPALETTE_COUNT];
-
     if (s->nb_streams != 1 ||
         s->streams[0]->codecpar->codec_type != AVMEDIA_TYPE_VIDEO ||
         s->streams[0]->codecpar->codec_id   != AV_CODEC_ID_GIF) {
@@ -123,106 +50,123 @@ static int gif_write_header(AVFormatContext *s)
         return AVERROR(EINVAL);
     }
 
-    video_par = s->streams[0]->codecpar;
-
     avpriv_set_pts_info(s->streams[0], 64, 1, 100);
-    if (avpriv_set_systematic_pal2(palette, video_par->format) < 0) {
-        av_assert0(video_par->format == AV_PIX_FMT_PAL8);
-        /* delay header writing: we wait for the first palette to put it
-         * globally */
-    } else {
-        gif_image_write_header(s->pb, s->streams[0], gif->loop, palette);
-    }
 
     return 0;
 }
 
-static int flush_packet(AVFormatContext *s, AVPacket *new)
+static int gif_parse_packet(AVFormatContext *s, uint8_t *data, int size)
 {
-    GIFContext *gif = s->priv_data;
-    int size, bcid;
-    AVIOContext *pb = s->pb;
-    const uint32_t *palette;
-    AVPacket *pkt = gif->prev_pkt;
-    uint8_t *disposal;
-    uint8_t packed;
-
-    if (!pkt)
-        return 0;
-
-    /* Mark one colour as transparent if the input palette contains at least
-     * one colour that is more than 50% transparent. */
-    palette = (uint32_t*)av_packet_get_side_data(pkt, AV_PKT_DATA_PALETTE, &size);
-    if (palette && size != AVPALETTE_SIZE) {
-        av_log(s, AV_LOG_ERROR, "Invalid palette extradata\n");
-        return AVERROR_INVALIDDATA;
-    }
-    bcid = get_palette_transparency_index(palette);
+    GetByteContext gb;
+    int x;
+
+    bytestream2_init(&gb, data, size);
+
+    while (bytestream2_get_bytes_left(&gb) > 0) {
+        x = bytestream2_get_byte(&gb);
+        if (x != GIF_EXTENSION_INTRODUCER)
+            return 0;
+
+        x = bytestream2_get_byte(&gb);
+        while (x != GIF_GCE_EXT_LABEL && bytestream2_get_bytes_left(&gb) > 0) {
+            int block_size = bytestream2_get_byte(&gb);
+            if (!block_size)
+                break;
+            bytestream2_skip(&gb, block_size);
+        }
 
-    disposal = av_packet_get_side_data(pkt, AV_PKT_DATA_GIF_FRAME_DISPOSAL, &size);
-    if (disposal && size != 1) {
-        av_log(s, AV_LOG_ERROR, "Invalid gif frame disposal extradata\n");
-        return AVERROR_INVALIDDATA;
+        if (x == GIF_GCE_EXT_LABEL)
+            return bytestream2_tell(&gb) + 2;
     }
 
+    return 0;
+}
+
+static int gif_get_delay(GIFContext *gif, AVPacket *prev, AVPacket *new)
+{
     if (new && new->pts != AV_NOPTS_VALUE)
-        gif->duration = av_clip_uint16(new->pts - gif->prev_pkt->pts);
+        gif->duration = av_clip_uint16(new->pts - prev->pts);
     else if (!new && gif->last_delay >= 0)
         gif->duration = gif->last_delay;
 
-    /* graphic control extension block */
-    if (disposal) {
-        packed = (0xff & (*disposal)<<2) | (bcid >= 0 ? 1 : 0);
-    } else {
-        packed = 1<<2 | (bcid >= 0 ? 1 : 0);
-    }
-
-    avio_w8(pb, 0x21);
-    avio_w8(pb, 0xf9);
-    avio_w8(pb, 0x04); /* block size */
-    avio_w8(pb, packed);
-    avio_wl16(pb, gif->duration);
-    avio_w8(pb, bcid < 0 ? DEFAULT_TRANSPARENCY_INDEX : bcid);
-    avio_w8(pb, 0x00);
-
-    avio_write(pb, pkt->data, pkt->size);
-
-    av_packet_unref(gif->prev_pkt);
-    if (new)
-        av_packet_ref(gif->prev_pkt, new);
-
-    return 0;
+    return gif->duration;
 }
 
-static int gif_write_packet(AVFormatContext *s, AVPacket *pkt)
+static int gif_write_packet(AVFormatContext *s, AVPacket *new_pkt)
 {
     GIFContext *gif = s->priv_data;
-    AVStream *video_st = s->streams[0];
+    AVIOContext *pb = s->pb;
+    AVPacket *pkt = gif->prev_pkt;
 
     if (!gif->prev_pkt) {
         gif->prev_pkt = av_packet_alloc();
         if (!gif->prev_pkt)
             return AVERROR(ENOMEM);
+        return av_packet_ref(gif->prev_pkt, new_pkt);
+    }
 
-        /* Write the first palette as global palette */
-        if (video_st->codecpar->format == AV_PIX_FMT_PAL8) {
-            int size;
-            void *palette = av_packet_get_side_data(pkt, AV_PKT_DATA_PALETTE, &size);
-
-            if (!palette) {
-                av_log(s, AV_LOG_ERROR, "PAL8 packet is missing palette in extradata\n");
-                return AVERROR_INVALIDDATA;
-            }
-            if (size != AVPALETTE_SIZE) {
-                av_log(s, AV_LOG_ERROR, "Invalid palette extradata\n");
-                return AVERROR_INVALIDDATA;
-            }
-            gif_image_write_header(s->pb, video_st, gif->loop, palette);
+    gif->last_pos = avio_tell(pb);
+    if (pkt->size > 0)
+        gif->have_end = pkt->data[pkt->size - 1] == GIF_TRAILER;
+
+    if (!gif->last_pos) {
+        int delay_pos;
+        int off = 13;
+
+        if (pkt->size < 13)
+            return AVERROR(EINVAL);
+
+        if (pkt->data[10] & 0x80)
+            off += 3 * (1 << ((pkt->data[10] & 0x07) + 1));
+
+        if (pkt->size < off + 2)
+            return AVERROR(EINVAL);
+
+        avio_write(pb, pkt->data, off);
+
+        if (pkt->data[off] == GIF_EXTENSION_INTRODUCER && pkt->data[off + 1] == 0xff)
+            off += 19;
+
+        if (pkt->size <= off)
+            return AVERROR(EINVAL);
+
+        /* "NETSCAPE EXTENSION" for looped animation GIF */
+        if (gif->loop >= 0) {
+            avio_w8(pb, GIF_EXTENSION_INTRODUCER); /* GIF Extension code */
+            avio_w8(pb, GIF_APP_EXT_LABEL); /* Application Extension Label */
+            avio_w8(pb, 0x0b); /* Length of Application Block */
+            avio_write(pb, "NETSCAPE2.0", sizeof("NETSCAPE2.0") - 1);
+            avio_w8(pb, 0x03); /* Length of Data Sub-Block */
+            avio_w8(pb, 0x01);
+            avio_wl16(pb, (uint16_t)gif->loop);
+            avio_w8(pb, 0x00); /* Data Sub-block Terminator */
         }
 
-        return av_packet_ref(gif->prev_pkt, pkt);
+        delay_pos = gif_parse_packet(s, pkt->data + off, pkt->size - off);
+        if (delay_pos > 0 && delay_pos < pkt->size - off - 2) {
+            avio_write(pb, pkt->data + off, delay_pos);
+            avio_wl16(pb, gif_get_delay(gif, pkt, new_pkt));
+            avio_write(pb, pkt->data + off + delay_pos + 2, pkt->size - off - delay_pos - 2);
+        } else {
+            avio_write(pb, pkt->data + off, pkt->size - off);
+        }
+    } else {
+        int delay_pos = gif_parse_packet(s, pkt->data, pkt->size);
+
+        if (delay_pos > 0 && delay_pos < pkt->size - 2) {
+            avio_write(pb, pkt->data, delay_pos);
+            avio_wl16(pb, gif_get_delay(gif, pkt, new_pkt));
+            avio_write(pb, pkt->data + delay_pos + 2, pkt->size - delay_pos - 2);
+        } else {
+            avio_write(pb, pkt->data, pkt->size);
+        }
     }
-    return flush_packet(s, pkt);
+
+    av_packet_unref(gif->prev_pkt);
+    if (new_pkt)
+        return av_packet_ref(gif->prev_pkt, new_pkt);
+
+    return 0;
 }
 
 static int gif_write_trailer(AVFormatContext *s)
@@ -230,9 +174,11 @@ static int gif_write_trailer(AVFormatContext *s)
     GIFContext *gif = s->priv_data;
     AVIOContext *pb = s->pb;
 
-    flush_packet(s, NULL);
-    av_freep(&gif->prev_pkt);
-    avio_w8(pb, 0x3b);
+    gif_write_packet(s, NULL);
+
+    if (!gif->have_end)
+        avio_w8(pb, GIF_TRAILER);
+    av_packet_free(&gif->prev_pkt);
 
     return 0;
 }
@@ -256,7 +202,7 @@ static const AVClass gif_muxer_class = {
 
 AVOutputFormat ff_gif_muxer = {
     .name           = "gif",
-    .long_name      = NULL_IF_CONFIG_SMALL("GIF Animation"),
+    .long_name      = NULL_IF_CONFIG_SMALL("CompuServe Graphics Interchange Format (GIF)"),
     .mime_type      = "image/gif",
     .extensions     = "gif",
     .priv_data_size = sizeof(GIFContext),
diff --git a/libavformat/gifdec.c b/libavformat/gifdec.c
index 8993ca615c054..7dc67ba87537b 100644
--- a/libavformat/gifdec.c
+++ b/libavformat/gifdec.c
@@ -25,6 +25,7 @@
  */
 
 #include "avformat.h"
+#include "libavutil/bprint.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/opt.h"
 #include "internal.h"
@@ -94,12 +95,25 @@ static int resync(AVIOContext *pb)
     return 0;
 }
 
+static int gif_skip_subblocks(AVIOContext *pb)
+{
+    int sb_size, ret = 0;
+
+    while (0x00 != (sb_size = avio_r8(pb))) {
+        if ((ret = avio_skip(pb, sb_size)) < 0)
+            return ret;
+    }
+
+    return ret;
+}
+
 static int gif_read_header(AVFormatContext *s)
 {
     GIFDemuxContext *gdc = s->priv_data;
     AVIOContext     *pb  = s->pb;
     AVStream        *st;
-    int width, height, ret;
+    int type, width, height, ret, n, flags;
+    int64_t nb_frames = 0, duration = 0;
 
     if ((ret = resync(pb)) < 0)
         return ret;
@@ -107,6 +121,9 @@ static int gif_read_header(AVFormatContext *s)
     gdc->delay  = gdc->default_delay;
     width  = avio_rl16(pb);
     height = avio_rl16(pb);
+    flags = avio_r8(pb);
+    avio_skip(pb, 1);
+    n      = avio_r8(pb);
 
     if (width == 0 || height == 0)
         return AVERROR_INVALIDDATA;
@@ -115,6 +132,57 @@ static int gif_read_header(AVFormatContext *s)
     if (!st)
         return AVERROR(ENOMEM);
 
+    if (flags & 0x80)
+        avio_skip(pb, 3 * (1 << ((flags & 0x07) + 1)));
+
+    while ((type = avio_r8(pb)) != GIF_TRAILER) {
+        if (avio_feof(pb))
+            break;
+        if (type == GIF_EXTENSION_INTRODUCER) {
+            int subtype = avio_r8(pb);
+            if (subtype == GIF_COM_EXT_LABEL) {
+                AVBPrint bp;
+                int block_size;
+
+                av_bprint_init(&bp, 0, -1);
+                while ((block_size = avio_r8(pb)) != 0) {
+                    avio_read_to_bprint(pb, &bp, block_size);
+                }
+                av_dict_set(&s->metadata, "comment", bp.str, 0);
+                av_bprint_finalize(&bp, NULL);
+            } else if (subtype == GIF_GCE_EXT_LABEL) {
+                int block_size = avio_r8(pb);
+
+                if (block_size == 4) {
+                    int delay;
+
+                    avio_skip(pb, 1);
+                    delay = avio_rl16(pb);
+                    if (delay < gdc->min_delay)
+                        delay = gdc->default_delay;
+                    delay = FFMIN(delay, gdc->max_delay);
+                    duration += delay;
+                    avio_skip(pb, 1);
+                } else {
+                    avio_skip(pb, block_size);
+                }
+                gif_skip_subblocks(pb);
+            } else {
+                gif_skip_subblocks(pb);
+            }
+        } else if (type == GIF_IMAGE_SEPARATOR) {
+            avio_skip(pb, 8);
+            flags = avio_r8(pb);
+            if (flags & 0x80)
+                avio_skip(pb, 3 * (1 << ((flags & 0x07) + 1)));
+            avio_skip(pb, 1);
+            gif_skip_subblocks(pb);
+            nb_frames++;
+        } else {
+            break;
+        }
+    }
+
     /* GIF format operates with time in "hundredths of second",
      * therefore timebase is 1/100 */
     avpriv_set_pts_info(st, 64, 1, 100);
@@ -122,6 +190,13 @@ static int gif_read_header(AVFormatContext *s)
     st->codecpar->codec_id   = AV_CODEC_ID_GIF;
     st->codecpar->width      = width;
     st->codecpar->height     = height;
+    st->start_time           = 0;
+    st->duration             = duration;
+    st->nb_frames            = nb_frames;
+    if (n) {
+        st->codecpar->sample_aspect_ratio.num = n + 15;
+        st->codecpar->sample_aspect_ratio.den = 64;
+    }
 
     /* jump to start because gif decoder needs header data too */
     if (avio_seek(pb, 0, SEEK_SET) != 0)
@@ -130,18 +205,6 @@ static int gif_read_header(AVFormatContext *s)
     return 0;
 }
 
-static int gif_skip_subblocks(AVIOContext *pb)
-{
-    int sb_size, ret = 0;
-
-    while (0x00 != (sb_size = avio_r8(pb))) {
-        if ((ret = avio_skip(pb, sb_size)) < 0)
-            return ret;
-    }
-
-    return ret;
-}
-
 static int gif_read_ext(AVFormatContext *s)
 {
     GIFDemuxContext *gdc = s->priv_data;
diff --git a/libavformat/hcom.c b/libavformat/hcom.c
new file mode 100644
index 0000000000000..35515cc5b277e
--- /dev/null
+++ b/libavformat/hcom.c
@@ -0,0 +1,91 @@
+/*
+ * HCOM demuxer
+ * Copyright (c) 2019 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/intreadwrite.h"
+#include "libavcodec/internal.h"
+#include "avformat.h"
+#include "internal.h"
+#include "pcm.h"
+
+static int hcom_probe(AVProbeData *p)
+{
+    if (p->buf_size < 132)
+        return 0;
+    if (!memcmp(p->buf+65, "FSSD", 4) &&
+        !memcmp(p->buf+128, "HCOM", 4))
+        return AVPROBE_SCORE_MAX;
+    return 0;
+}
+
+static int hcom_read_header(AVFormatContext *s)
+{
+    AVStream *st;
+    unsigned data_size, rsrc_size, huffcount;
+    unsigned compresstype, divisor;
+    unsigned dict_entries;
+    int ret;
+
+    avio_skip(s->pb, 83);
+    data_size = avio_rb32(s->pb);
+    rsrc_size = avio_rb32(s->pb);
+    avio_skip(s->pb, 128-91+4);
+    huffcount = avio_rb32(s->pb);
+    avio_skip(s->pb, 4);
+    compresstype = avio_rb32(s->pb);
+    if (compresstype > 1)
+        return AVERROR_INVALIDDATA;
+    divisor = avio_rb32(s->pb);
+    if (divisor == 0 || divisor > 4)
+        return AVERROR_INVALIDDATA;
+    dict_entries = avio_rb16(s->pb);
+
+    st = avformat_new_stream(s, NULL);
+    if (!st)
+        return AVERROR(ENOMEM);
+
+    st->codecpar->codec_type  = AVMEDIA_TYPE_AUDIO;
+    st->codecpar->channels    = 1;
+    st->codecpar->sample_rate = 22050 / divisor;
+    st->codecpar->codec_id    = AV_CODEC_ID_HCOM;
+    st->codecpar->bits_per_coded_sample = 8;
+    st->codecpar->block_align = 4;
+
+    ret = ff_alloc_extradata(st->codecpar, dict_entries * 4 + 7);
+    if (ret < 0)
+        return ret;
+    AV_WB16(st->codecpar->extradata, dict_entries);
+    AV_WB32(st->codecpar->extradata + 2, compresstype);
+    avio_read(s->pb, st->codecpar->extradata + 6, dict_entries * 4);
+    avio_skip(s->pb, 1);
+    st->codecpar->extradata[dict_entries * 4 + 6] = avio_r8(s->pb);
+
+    avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
+
+    return 0;
+}
+
+AVInputFormat ff_hcom_demuxer = {
+    .name           = "hcom",
+    .long_name      = NULL_IF_CONFIG_SMALL("Macintosh HCOM"),
+    .read_probe     = hcom_probe,
+    .read_header    = hcom_read_header,
+    .read_packet    = ff_pcm_read_packet,
+};
diff --git a/libavformat/hls.c b/libavformat/hls.c
index 8ad08baaed41c..8975a87153b40 100644
--- a/libavformat/hls.c
+++ b/libavformat/hls.c
@@ -223,9 +223,9 @@ static void free_segment_dynarray(struct segment **segments, int n_segments)
 
 static void free_segment_list(struct playlist *pls)
 {
-        free_segment_dynarray(pls->segments, pls->n_segments);
-        av_freep(&pls->segments);
-        pls->n_segments = 0;
+    free_segment_dynarray(pls->segments, pls->n_segments);
+    av_freep(&pls->segments);
+    pls->n_segments = 0;
 }
 
 static void free_init_section_list(struct playlist *pls)
@@ -931,6 +931,7 @@ static int parse_playlist(HLSContext *c, const char *url,
                    prev_start_seq_no, pls->start_seq_no);
         }
         free_segment_dynarray(prev_segments, prev_n_segments);
+        av_freep(&prev_segments);
     }
     if (pls)
         pls->last_load_time = av_gettime_relative();
diff --git a/libavformat/hlsenc.c b/libavformat/hlsenc.c
index 28c2dd62fcae3..258d0628bab27 100644
--- a/libavformat/hlsenc.c
+++ b/libavformat/hlsenc.c
@@ -147,13 +147,14 @@ typedef struct VariantStream {
 
     char *fmp4_init_filename;
     char *base_output_dirname;
-    int fmp4_init_mode;
 
     AVStream **streams;
     char codec_attr[128];
     CodecAttributeStatus attr_status;
     unsigned int nb_streams;
     int m3u8_created; /* status of media play-list creation */
+    int is_default; /* default status of audio group */
+    char *language; /* audio lauguage name */
     char *agroup; /* audio group name */
     char *ccgroup; /* closed caption group name */
     char *baseurl;
@@ -228,6 +229,9 @@ typedef struct HLSContext {
     AVIOContext *m3u8_out;
     AVIOContext *sub_m3u8_out;
     int64_t timeout;
+    int ignore_io_errors;
+    int has_default_key; /* has DEFAULT field of var_stream_map */
+    int has_video_m3u8; /* has video stream m3u8 list */
 } HLSContext;
 
 static int hlsenc_io_open(AVFormatContext *s, AVIOContext **pb, char *filename,
@@ -242,6 +246,9 @@ static int hlsenc_io_open(AVFormatContext *s, AVIOContext **pb, char *filename,
         URLContext *http_url_context = ffio_geturlcontext(*pb);
         av_assert0(http_url_context);
         err = ff_http_do_new_request(http_url_context, filename);
+        if (err < 0)
+            ff_format_io_close(s, pb);
+
 #endif
     }
     return err;
@@ -250,6 +257,8 @@ static int hlsenc_io_open(AVFormatContext *s, AVIOContext **pb, char *filename,
 static void hlsenc_io_close(AVFormatContext *s, AVIOContext **pb, char *filename) {
     HLSContext *hls = s->priv_data;
     int http_base_proto = filename ? ff_is_http_proto(filename) : 0;
+    if (!*pb)
+        return;
     if (!http_base_proto || !hls->http_persistent || hls->key_info_file || hls->encrypt) {
         ff_format_io_close(s, pb);
 #if CONFIG_HTTP_PROTOCOL
@@ -416,6 +425,7 @@ static int hls_delete_old_segments(AVFormatContext *s, HLSContext *hls,
     int segment_cnt = 0;
     char *dirname = NULL, *p, *sub_path;
     char *path = NULL;
+    char *vtt_dirname = NULL;
     AVDictionary *options = NULL;
     AVIOContext *out = NULL;
     const char *proto = NULL;
@@ -462,7 +472,7 @@ static int hls_delete_old_segments(AVFormatContext *s, HLSContext *hls,
         char * r_dirname = dirname;
 
         /* if %v is present in the file's directory */
-        if (av_stristr(dirname, "%v")) {
+        if (dirname && av_stristr(dirname, "%v")) {
 
             if (replace_int_data_in_filename(&r_dirname, dirname, 'v', segment->var_stream_idx) < 1) {
                 ret = AVERROR(EINVAL);
@@ -491,8 +501,11 @@ static int hls_delete_old_segments(AVFormatContext *s, HLSContext *hls,
         proto = avio_find_protocol_name(s->url);
         if (hls->method || (proto && !av_strcasecmp(proto, "http"))) {
             av_dict_set(&options, "method", "DELETE", 0);
-            if ((ret = vs->avf->io_open(vs->avf, &out, path, AVIO_FLAG_WRITE, &options)) < 0)
+            if ((ret = vs->avf->io_open(vs->avf, &out, path, AVIO_FLAG_WRITE, &options)) < 0) {
+                if (hls->ignore_io_errors)
+                    ret = 0;
                 goto fail;
+            }
             ff_format_io_close(vs->avf, &out);
         } else if (unlink(path) < 0) {
             av_log(hls, AV_LOG_ERROR, "failed to delete old segment %s: %s\n",
@@ -500,23 +513,32 @@ static int hls_delete_old_segments(AVFormatContext *s, HLSContext *hls,
         }
 
         if ((segment->sub_filename[0] != '\0')) {
-            sub_path_size = strlen(segment->sub_filename) + 1 + (dirname ? strlen(dirname) : 0);
+            vtt_dirname = av_strdup(vs->vtt_avf->url);
+            if (!vtt_dirname) {
+                ret = AVERROR(ENOMEM);
+                goto fail;
+            }
+            p = (char *)av_basename(vtt_dirname);
+            *p = '\0';
+            sub_path_size = strlen(segment->sub_filename) + 1 + strlen(vtt_dirname);
             sub_path = av_malloc(sub_path_size);
             if (!sub_path) {
                 ret = AVERROR(ENOMEM);
                 goto fail;
             }
 
-            av_strlcpy(sub_path, dirname, sub_path_size);
+            av_strlcpy(sub_path, vtt_dirname, sub_path_size);
             av_strlcat(sub_path, segment->sub_filename, sub_path_size);
 
             if (hls->method || (proto && !av_strcasecmp(proto, "http"))) {
                 av_dict_set(&options, "method", "DELETE", 0);
-                if ((ret = vs->avf->io_open(vs->avf, &out, sub_path, AVIO_FLAG_WRITE, &options)) < 0) {
+                if ((ret = vs->vtt_avf->io_open(vs->vtt_avf, &out, sub_path, AVIO_FLAG_WRITE, &options)) < 0) {
+                    if (hls->ignore_io_errors)
+                        ret = 0;
                     av_free(sub_path);
                     goto fail;
                 }
-                ff_format_io_close(vs->avf, &out);
+                ff_format_io_close(vs->vtt_avf, &out);
             } else if (unlink(sub_path) < 0) {
                 av_log(hls, AV_LOG_ERROR, "failed to delete old segment %s: %s\n",
                                          sub_path, strerror(errno));
@@ -532,6 +554,7 @@ static int hls_delete_old_segments(AVFormatContext *s, HLSContext *hls,
 fail:
     av_free(path);
     av_free(dirname);
+    av_free(vtt_dirname);
 
     return ret;
 }
@@ -733,7 +756,6 @@ static int hls_mux_init(AVFormatContext *s, VariantStream *vs)
     vs->packets_written = 1;
     vs->start_pos = 0;
     vs->new_start = 1;
-    vs->fmp4_init_mode = 0;
 
     if (hls->segment_type == SEGMENT_TYPE_FMP4) {
         if (hls->max_seg_size > 0) {
@@ -743,7 +765,6 @@ static int hls_mux_init(AVFormatContext *s, VariantStream *vs)
 
         vs->packets_written = 0;
         vs->init_range_length = 0;
-        vs->fmp4_init_mode = !byterange_mode;
         set_http_options(s, &options, hls);
         if ((ret = avio_open_dyn_buf(&oc->pb)) < 0)
             return ret;
@@ -770,7 +791,7 @@ static int hls_mux_init(AVFormatContext *s, VariantStream *vs)
 
         av_dict_copy(&options, hls->format_options, 0);
         av_dict_set(&options, "fflags", "-autobsf", 0);
-        av_dict_set(&options, "movflags", "frag_custom+dash+delay_moov", 0);
+        av_dict_set(&options, "movflags", "+frag_custom+dash+delay_moov", AV_DICT_APPEND);
         ret = avformat_init_output(oc, &options);
         if (ret < 0)
             return ret;
@@ -1241,7 +1262,7 @@ static int create_master_playlist(AVFormatContext *s,
             goto fail;
         }
 
-        ff_hls_write_audio_rendition(hls->m3u8_out, vs->agroup, m3u8_rel_name, 0, 1);
+        ff_hls_write_audio_rendition(hls->m3u8_out, vs->agroup, m3u8_rel_name, vs->language, i, hls->has_default_key ? vs->is_default : 1);
 
         av_freep(&m3u8_rel_name);
     }
@@ -1319,8 +1340,15 @@ static int create_master_playlist(AVFormatContext *s,
                         vs->ccgroup);
         }
 
-        ff_hls_write_stream_info(vid_st, hls->m3u8_out, bandwidth, m3u8_rel_name,
-                aud_st ? vs->agroup : NULL, vs->codec_attr, ccgroup);
+        if (!hls->has_default_key || !hls->has_video_m3u8) {
+            ff_hls_write_stream_info(vid_st, hls->m3u8_out, bandwidth, m3u8_rel_name,
+                    aud_st ? vs->agroup : NULL, vs->codec_attr, ccgroup);
+        } else {
+            if (vid_st) {
+                ff_hls_write_stream_info(vid_st, hls->m3u8_out, bandwidth, m3u8_rel_name,
+                                         aud_st ? vs->agroup : NULL, vs->codec_attr, ccgroup);
+            }
+        }
 
         av_freep(&m3u8_rel_name);
     }
@@ -1340,8 +1368,9 @@ static int hls_window(AVFormatContext *s, int last, VariantStream *vs)
     int ret = 0;
     char temp_filename[1024];
     int64_t sequence = FFMAX(hls->start_sequence, vs->sequence - vs->nb_entries);
-    const char *proto = avio_find_protocol_name(s->url);
-    int use_temp_file = proto && !strcmp(proto, "file") && (s->flags & HLS_TEMP_FILE);
+    const char *proto = avio_find_protocol_name(vs->m3u8_name);
+    int is_file_proto = proto && !strcmp(proto, "file");
+    int use_temp_file = is_file_proto && ((hls->flags & HLS_TEMP_FILE) || !(hls->pl_type == PLAYLIST_TYPE_VOD));
     static unsigned warned_non_file;
     char *key_uri = NULL;
     char *iv_string = NULL;
@@ -1364,13 +1393,16 @@ static int hls_window(AVFormatContext *s, int last, VariantStream *vs)
         hls->version = 7;
     }
 
-    if (!use_temp_file && !warned_non_file++)
+    if (!is_file_proto && (hls->flags & HLS_TEMP_FILE) && !warned_non_file++)
         av_log(s, AV_LOG_ERROR, "Cannot use rename on non file protocol, this may lead to races and temporary partial files\n");
 
     set_http_options(s, &options, hls);
     snprintf(temp_filename, sizeof(temp_filename), use_temp_file ? "%s.tmp" : "%s", vs->m3u8_name);
-    if ((ret = hlsenc_io_open(s, &hls->m3u8_out, temp_filename, &options)) < 0)
+    if ((ret = hlsenc_io_open(s, &hls->m3u8_out, temp_filename, &options)) < 0) {
+        if (hls->ignore_io_errors)
+            ret = 0;
         goto fail;
+    }
 
     for (en = vs->segments; en; en = en->next) {
         if (target_duration <= en->duration)
@@ -1417,8 +1449,11 @@ static int hls_window(AVFormatContext *s, int last, VariantStream *vs)
         ff_hls_write_end_list(hls->m3u8_out);
 
     if( vs->vtt_m3u8_name ) {
-        if ((ret = hlsenc_io_open(s, &hls->sub_m3u8_out, vs->vtt_m3u8_name, &options)) < 0)
+        if ((ret = hlsenc_io_open(s, &hls->sub_m3u8_out, vs->vtt_m3u8_name, &options)) < 0) {
+            if (hls->ignore_io_errors)
+                ret = 0;
             goto fail;
+        }
         ff_hls_write_playlist_header(hls->sub_m3u8_out, hls->version, hls->allowcache,
                                      target_duration, sequence, PLAYLIST_TYPE_NONE);
         for (en = vs->segments; en; en = en->next) {
@@ -1441,7 +1476,6 @@ static int hls_window(AVFormatContext *s, int last, VariantStream *vs)
     hlsenc_io_close(s, &hls->sub_m3u8_out, vs->vtt_m3u8_name);
     if (use_temp_file)
         ff_rename(temp_filename, vs->m3u8_name, s);
-
     if (ret >= 0 && hls->master_pl_name)
         if (create_master_playlist(s, vs) < 0)
             av_log(s, AV_LOG_WARNING, "Master playlist creation failed\n");
@@ -1455,8 +1489,8 @@ static int hls_start(AVFormatContext *s, VariantStream *vs)
     AVFormatContext *oc = vs->avf;
     AVFormatContext *vtt_oc = vs->vtt_avf;
     AVDictionary *options = NULL;
-    const char *proto = avio_find_protocol_name(s->url);
-    int use_temp_file = proto && !strcmp(proto, "file") && (s->flags & HLS_TEMP_FILE);
+    const char *proto = NULL;
+    int use_temp_file = 0;
     char *filename, iv_string[KEYSIZE*2 + 1];
     int err = 0;
 
@@ -1552,6 +1586,9 @@ static int hls_start(AVFormatContext *s, VariantStream *vs)
 
     set_http_options(s, &options, c);
 
+    proto = avio_find_protocol_name(oc->url);
+    use_temp_file = proto && !strcmp(proto, "file") && (c->flags & HLS_TEMP_FILE);
+
     if (use_temp_file) {
         char *new_name = av_asprintf("%s.tmp", oc->url);
         if (!new_name)
@@ -1600,13 +1637,19 @@ static int hls_start(AVFormatContext *s, VariantStream *vs)
         if (err < 0)
             return err;
     } else if (c->segment_type != SEGMENT_TYPE_FMP4) {
-        if ((err = hlsenc_io_open(s, &oc->pb, oc->url, &options)) < 0)
+        if ((err = hlsenc_io_open(s, &oc->pb, oc->url, &options)) < 0) {
+            if (c->ignore_io_errors)
+                err = 0;
             goto fail;
+        }
     }
     if (vs->vtt_basename) {
         set_http_options(s, &options, c);
-        if ((err = hlsenc_io_open(s, &vtt_oc->pb, vtt_oc->url, &options)) < 0)
+        if ((err = hlsenc_io_open(s, &vtt_oc->pb, vtt_oc->url, &options)) < 0) {
+            if (c->ignore_io_errors)
+                err = 0;
             goto fail;
+        }
     }
     av_dict_free(&options);
 
@@ -1787,7 +1830,7 @@ static int parse_variant_stream_mapstring(AVFormatContext *s)
     /**
      * Expected format for var_stream_map string is as below:
      * "a:0,v:0 a:1,v:1"
-     * "a:0,agroup:a0 a:1,agroup:a1 v:0,agroup:a0  v:1,agroup:a1"
+     * "a:0,agroup:a0,default:1,language:ENG a:1,agroup:a1,defalut:0 v:0,agroup:a0  v:1,agroup:a1"
      * This string specifies how to group the audio, video and subtitle streams
      * into different variant streams. The variant stream groups are separated
      * by space.
@@ -1818,6 +1861,7 @@ static int parse_variant_stream_mapstring(AVFormatContext *s)
         if (nb_varstreams < hls->nb_varstreams) {
             vs = &(hls->var_streams[nb_varstreams]);
             vs->var_stream_idx = nb_varstreams;
+            vs->is_default = 0;
             nb_varstreams++;
         } else
             return AVERROR(EINVAL);
@@ -1836,8 +1880,17 @@ static int parse_variant_stream_mapstring(AVFormatContext *s)
         nb_streams = 0;
         while (keyval = av_strtok(varstr, ",", &saveptr2)) {
             varstr = NULL;
-
-            if (av_strstart(keyval, "agroup:", &val)) {
+            if (av_strstart(keyval, "language:", &val)) {
+                vs->language = av_strdup(val);
+                if (!vs->language)
+                    return AVERROR(ENOMEM);
+                continue;
+            } else if (av_strstart(keyval, "default:", &val)) {
+                vs->is_default = (!av_strncasecmp(val, "YES", strlen("YES")) ||
+                                  (!av_strncasecmp(val, "1", strlen("1"))));
+                hls->has_default_key = 1;
+                continue;
+            } else if (av_strstart(keyval, "agroup:", &val)) {
                 vs->agroup = av_strdup(val);
                 if (!vs->agroup)
                     return AVERROR(ENOMEM);
@@ -1849,6 +1902,7 @@ static int parse_variant_stream_mapstring(AVFormatContext *s)
                 continue;
             } else if (av_strstart(keyval, "v:", &val)) {
                 codec_type = AVMEDIA_TYPE_VIDEO;
+                hls->has_video_m3u8 = 1;
             } else if (av_strstart(keyval, "a:", &val)) {
                 codec_type = AVMEDIA_TYPE_AUDIO;
             } else if (av_strstart(keyval, "s:", &val)) {
@@ -2045,21 +2099,14 @@ static int hls_write_header(AVFormatContext *s)
 {
     HLSContext *hls = s->priv_data;
     int ret, i, j;
-    AVDictionary *options = NULL;
     VariantStream *vs = NULL;
 
     for (i = 0; i < hls->nb_varstreams; i++) {
         vs = &hls->var_streams[i];
 
-        av_dict_copy(&options, hls->format_options, 0);
-        ret = avformat_write_header(vs->avf, &options);
-        if (av_dict_count(options)) {
-            av_log(s, AV_LOG_ERROR, "Some of provided format options in '%s' are not recognized\n", hls->format_options_str);
-            ret = AVERROR(EINVAL);
-            av_dict_free(&options);
-            goto fail;
-        }
-        av_dict_free(&options);
+        ret = avformat_write_header(vs->avf, NULL);
+        if (ret < 0)
+            return ret;
         //av_assert0(s->nb_streams == hls->avf->nb_streams);
         for (j = 0; j < vs->nb_streams; j++) {
             AVStream *inner_st;
@@ -2099,7 +2146,6 @@ static int hls_write_header(AVFormatContext *s)
             }
         }
     }
-fail:
 
     return ret;
 }
@@ -2114,8 +2160,8 @@ static int hls_write_packet(AVFormatContext *s, AVPacket *pkt)
     int ret = 0, can_split = 1, i, j;
     int stream_index = 0;
     int range_length = 0;
-    const char *proto = avio_find_protocol_name(s->url);
-    int use_temp_file = proto && !strcmp(proto, "file") && (s->flags & HLS_TEMP_FILE);
+    const char *proto = NULL;
+    int use_temp_file = 0;
     uint8_t *buffer = NULL;
     VariantStream *vs = NULL;
     AVDictionary *options = NULL;
@@ -2205,6 +2251,7 @@ static int hls_write_packet(AVFormatContext *s, AVPacket *pkt)
                 avio_flush(oc->pb);
                 range_length = avio_close_dyn_buf(oc->pb, &buffer);
                 avio_write(vs->out, buffer, range_length);
+                av_free(buffer);
                 vs->init_range_length = range_length;
                 avio_open_dyn_buf(&oc->pb);
                 vs->packets_written = 0;
@@ -2225,23 +2272,22 @@ static int hls_write_packet(AVFormatContext *s, AVPacket *pkt)
             }
         }
 
+        if (oc->url[0]) {
+            proto = avio_find_protocol_name(oc->url);
+            use_temp_file = proto && !strcmp(proto, "file") && (hls->flags & HLS_TEMP_FILE);
+        }
+
         // look to rename the asset name
-        if (use_temp_file && oc->url[0]) {
+        if (use_temp_file) {
             if (!(hls->flags & HLS_SINGLE_FILE) || (hls->max_seg_size <= 0))
-                if ((vs->avf->oformat->priv_class && vs->avf->priv_data) && hls->segment_type != SEGMENT_TYPE_FMP4) {
+                if ((vs->avf->oformat->priv_class && vs->avf->priv_data) && hls->segment_type != SEGMENT_TYPE_FMP4)
                     av_opt_set(vs->avf->priv_data, "mpegts_flags", "resend_headers", 0);
-                }
-        }
-
-        if (vs->fmp4_init_mode) {
-            vs->number--;
         }
 
         if (hls->segment_type == SEGMENT_TYPE_FMP4) {
             if (hls->flags & HLS_SINGLE_FILE) {
                 ret = flush_dynbuf(vs, &range_length);
                 if (ret < 0) {
-                    av_free(old_filename);
                     return ret;
                 }
                 vs->size = range_length;
@@ -2249,9 +2295,9 @@ static int hls_write_packet(AVFormatContext *s, AVPacket *pkt)
                 set_http_options(s, &options, hls);
                 ret = hlsenc_io_open(s, &vs->out, vs->avf->url, &options);
                 if (ret < 0) {
-                    av_log(s, AV_LOG_ERROR, "Failed to open file '%s'\n",
-                           vs->avf->url);
-                    return ret;
+                    av_log(s, hls->ignore_io_errors ? AV_LOG_WARNING : AV_LOG_ERROR,
+                           "Failed to open file '%s'\n", vs->avf->url);
+                    return hls->ignore_io_errors ? 0 : ret;
                 }
                 write_styp(vs->out);
                 ret = flush_dynbuf(vs, &range_length);
@@ -2259,20 +2305,13 @@ static int hls_write_packet(AVFormatContext *s, AVPacket *pkt)
                     return ret;
                 }
                 ff_format_io_close(s, &vs->out);
-
-                // rename that segment from .tmp to the real one
-                if (use_temp_file && oc->url[0]) {
-                    hls_rename_temp_file(s, oc);
-                    av_free(old_filename);
-                    old_filename = av_strdup(vs->avf->url);
-
-                    if (!old_filename) {
-                        return AVERROR(ENOMEM);
-                    }
-                }
             }
         }
 
+        if (use_temp_file && !(hls->flags & HLS_SINGLE_FILE)) {
+            hls_rename_temp_file(s, oc);
+        }
+
         old_filename = av_strdup(vs->avf->url);
         if (!old_filename) {
             return AVERROR(ENOMEM);
@@ -2294,7 +2333,6 @@ static int hls_write_packet(AVFormatContext *s, AVPacket *pkt)
             vs->start_pos += vs->size;
         }
 
-        vs->fmp4_init_mode = 0;
         if (hls->flags & HLS_SINGLE_FILE) {
             vs->number++;
         } else if (hls->max_seg_size > 0) {
@@ -2327,19 +2365,52 @@ static int hls_write_packet(AVFormatContext *s, AVPacket *pkt)
     }
 
     vs->packets_written++;
-    ret = ff_write_chained(oc, stream_index, pkt, s, 0);
+    if (oc->pb) {
+        ret = ff_write_chained(oc, stream_index, pkt, s, 0);
+        if (hls->ignore_io_errors)
+            ret = 0;
+    }
 
     return ret;
 }
 
+static void hls_free_variant_streams(struct HLSContext *hls)
+{
+    int i = 0;
+    AVFormatContext *vtt_oc = NULL;
+    VariantStream *vs = NULL;
+
+    for (i = 0; i < hls->nb_varstreams; i++) {
+        vs = &hls->var_streams[i];
+        vtt_oc = vs->vtt_avf;
+
+        av_freep(&vs->basename);
+        av_freep(&vs->base_output_dirname);
+        av_freep(&vs->fmp4_init_filename);
+        if (vtt_oc) {
+            av_freep(&vs->vtt_basename);
+            av_freep(&vs->vtt_m3u8_name);
+        }
+
+        hls_free_segments(vs->segments);
+        hls_free_segments(vs->old_segments);
+        av_freep(&vs->m3u8_name);
+        av_freep(&vs->streams);
+        av_freep(&vs->agroup);
+        av_freep(&vs->language);
+        av_freep(&vs->ccgroup);
+        av_freep(&vs->baseurl);
+    }
+}
+
 static int hls_write_trailer(struct AVFormatContext *s)
 {
     HLSContext *hls = s->priv_data;
     AVFormatContext *oc = NULL;
     AVFormatContext *vtt_oc = NULL;
     char *old_filename = NULL;
-    const char *proto = avio_find_protocol_name(s->url);
-    int use_temp_file = proto && !strcmp(proto, "file") && (s->flags & HLS_TEMP_FILE);
+    const char *proto = NULL;
+    int use_temp_file = 0;
     int i;
     int ret = 0;
     VariantStream *vs = NULL;
@@ -2350,12 +2421,32 @@ static int hls_write_trailer(struct AVFormatContext *s)
         oc = vs->avf;
         vtt_oc = vs->vtt_avf;
         old_filename = av_strdup(vs->avf->url);
+        use_temp_file = 0;
 
         if (!old_filename) {
             return AVERROR(ENOMEM);
         }
         if ( hls->segment_type == SEGMENT_TYPE_FMP4) {
             int range_length = 0;
+            if (!vs->init_range_length) {
+                uint8_t *buffer = NULL;
+                int range_length, byterange_mode;
+                av_write_frame(vs->avf, NULL); /* Flush any buffered data */
+                avio_flush(oc->pb);
+
+                range_length = avio_close_dyn_buf(oc->pb, &buffer);
+                avio_write(vs->out, buffer, range_length);
+                av_free(buffer);
+                vs->init_range_length = range_length;
+                avio_open_dyn_buf(&oc->pb);
+                vs->packets_written = 0;
+                vs->start_pos = range_length;
+                byterange_mode = (hls->flags & HLS_SINGLE_FILE) || (hls->max_seg_size > 0);
+                if (!byterange_mode) {
+                    ff_format_io_close(s, &vs->out);
+                    hlsenc_io_close(s, &vs->out, vs->base_output_dirname);
+                }
+            }
             if (!(hls->flags & HLS_SINGLE_FILE)) {
                 ret = hlsenc_io_open(s, &vs->out, vs->avf->url, NULL);
                 if (ret < 0) {
@@ -2368,22 +2459,26 @@ static int hls_write_trailer(struct AVFormatContext *s)
             if (ret < 0) {
                 goto failed;
             }
+            vs->size = range_length;
             ff_format_io_close(s, &vs->out);
         }
 
 failed:
         av_write_trailer(oc);
+
+        if (oc->url[0]) {
+            proto = avio_find_protocol_name(oc->url);
+            use_temp_file = proto && !strcmp(proto, "file") && (hls->flags & HLS_TEMP_FILE);
+        }
+
         if (oc->pb) {
             if (hls->segment_type != SEGMENT_TYPE_FMP4) {
                 vs->size = avio_tell(vs->avf->pb) - vs->start_pos;
-            } else {
-                vs->size = avio_tell(vs->avf->pb);
-            }
-            if (hls->segment_type != SEGMENT_TYPE_FMP4)
                 ff_format_io_close(s, &oc->pb);
+            }
 
             // rename that segment from .tmp to the real one
-            if (use_temp_file && oc->url[0] && !(hls->flags & HLS_SINGLE_FILE)) {
+            if (use_temp_file && !(hls->flags & HLS_SINGLE_FILE)) {
                 hls_rename_temp_file(s, oc);
                 av_free(old_filename);
                 old_filename = av_strdup(vs->avf->url);
@@ -2404,31 +2499,17 @@ static int hls_write_trailer(struct AVFormatContext *s)
                 av_write_trailer(vtt_oc);
             vs->size = avio_tell(vs->vtt_avf->pb) - vs->start_pos;
             ff_format_io_close(s, &vtt_oc->pb);
+            avformat_free_context(vtt_oc);
         }
-        av_freep(&vs->basename);
-        av_freep(&vs->base_output_dirname);
         avformat_free_context(oc);
 
         vs->avf = NULL;
         hls_window(s, 1, vs);
-
-        av_freep(&vs->fmp4_init_filename);
-        if (vtt_oc) {
-            av_freep(&vs->vtt_basename);
-            av_freep(&vs->vtt_m3u8_name);
-            avformat_free_context(vtt_oc);
-        }
-
-        hls_free_segments(vs->segments);
-        hls_free_segments(vs->old_segments);
         av_free(old_filename);
-        av_freep(&vs->m3u8_name);
-        av_freep(&vs->streams);
-        av_freep(&vs->agroup);
-        av_freep(&vs->ccgroup);
-        av_freep(&vs->baseurl);
     }
 
+    hls_free_variant_streams(hls);
+
     for (i = 0; i < hls->nb_ccstreams; i++) {
         ClosedCaptionsStream *ccs = &hls->cc_streams[i];
         av_freep(&ccs->ccgroup);
@@ -2461,6 +2542,8 @@ static int hls_init(AVFormatContext *s)
     int vtt_basename_size = 0;
     int fmp4_init_filename_len = strlen(hls->fmp4_init_filename) + 1;
 
+    hls->has_default_key = 0;
+    hls->has_video_m3u8 = 0;
     ret = update_variant_stream_info(s);
     if (ret < 0) {
         av_log(s, AV_LOG_ERROR, "Variant stream info update failed with status %x\n",
@@ -2657,7 +2740,11 @@ static int hls_init(AVFormatContext *s)
                 av_strlcpy(vs->fmp4_init_filename, hls->fmp4_init_filename,
                            fmp4_init_filename_len);
                 if (hls->nb_varstreams > 1) {
-                    ret = append_postfix(vs->fmp4_init_filename, fmp4_init_filename_len, i);
+                    if (av_stristr(vs->fmp4_init_filename, "%v")) {
+                        format_name(vs->fmp4_init_filename, fmp4_init_filename_len, i);
+                    } else {
+                        ret = append_postfix(vs->fmp4_init_filename, fmp4_init_filename_len, i);
+                    }
                     if (ret < 0)
                         goto fail;
                 }
@@ -2766,6 +2853,7 @@ static int hls_init(AVFormatContext *s)
             av_freep(&vs->m3u8_name);
             av_freep(&vs->vtt_m3u8_name);
             av_freep(&vs->streams);
+            av_freep(&vs->language);
             av_freep(&vs->agroup);
             av_freep(&vs->ccgroup);
             av_freep(&vs->baseurl);
@@ -2853,6 +2941,7 @@ static const AVOption options[] = {
     {"master_pl_publish_rate", "Publish master play list every after this many segment intervals", OFFSET(master_publish_rate), AV_OPT_TYPE_INT, {.i64 = 0}, 0, UINT_MAX, E},
     {"http_persistent", "Use persistent HTTP connections", OFFSET(http_persistent), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, E },
     {"timeout", "set timeout for socket I/O operations", OFFSET(timeout), AV_OPT_TYPE_DURATION, { .i64 = -1 }, -1, INT_MAX, .flags = E },
+    {"ignore_io_errors", "Ignore IO errors for stable long-duration runs with network output", OFFSET(ignore_io_errors), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, E },
     { NULL },
 };
 
diff --git a/libavformat/hlsplaylist.c b/libavformat/hlsplaylist.c
index efcbff0009db7..0537049a97851 100644
--- a/libavformat/hlsplaylist.c
+++ b/libavformat/hlsplaylist.c
@@ -36,13 +36,16 @@ void ff_hls_write_playlist_version(AVIOContext *out, int version) {
 }
 
 void ff_hls_write_audio_rendition(AVIOContext *out, char *agroup,
-                                  char *filename, int name_id, int is_default) {
+                                  char *filename, char *language, int name_id, int is_default) {
     if (!out || !agroup || !filename)
         return;
 
     avio_printf(out, "#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID=\"group_%s\"", agroup);
-    avio_printf(out, ",NAME=\"audio_%d\",DEFAULT=%s,URI=\"%s\"\n", name_id,
-                     is_default ? "YES" : "NO", filename);
+    avio_printf(out, ",NAME=\"audio_%d\",DEFAULT=%s,", name_id, is_default ? "YES" : "NO");
+    if (language) {
+        avio_printf(out, "LANGUAGE=\"%s\",", language);
+    }
+    avio_printf(out, "URI=\"%s\"\n", filename);
 }
 
 void ff_hls_write_stream_info(AVStream *st, AVIOContext *out,
diff --git a/libavformat/hlsplaylist.h b/libavformat/hlsplaylist.h
index 5054b01c8fc53..54c93a39639aa 100644
--- a/libavformat/hlsplaylist.h
+++ b/libavformat/hlsplaylist.h
@@ -38,7 +38,7 @@ typedef enum {
 
 void ff_hls_write_playlist_version(AVIOContext *out, int version);
 void ff_hls_write_audio_rendition(AVIOContext *out, char *agroup,
-                                  char *filename, int name_id, int is_default);
+                                  char *filename, char *language, int name_id, int is_default);
 void ff_hls_write_stream_info(AVStream *st, AVIOContext *out,
                               int bandwidth, char *filename, char *agroup,
                               char *codecs, char *ccgroup);
diff --git a/libavformat/hlsproto.c b/libavformat/hlsproto.c
index e7ef2d88ea8d0..e5673e5e03510 100644
--- a/libavformat/hlsproto.c
+++ b/libavformat/hlsproto.c
@@ -295,7 +295,7 @@ static int hls_read(URLContext *h, uint8_t *buf, int size)
         }
         goto retry;
     }
-    url = s->segments[s->cur_seq_no - s->start_seq_no]->url,
+    url = s->segments[s->cur_seq_no - s->start_seq_no]->url;
     av_log(h, AV_LOG_DEBUG, "opening %s\n", url);
     ret = ffurl_open_whitelist(&s->seg_hd, url, AVIO_FLAG_READ,
                                &h->interrupt_callback, NULL,
diff --git a/libavformat/http.c b/libavformat/http.c
index 3a35bc7eacb8f..ed0eb1c875da1 100644
--- a/libavformat/http.c
+++ b/libavformat/http.c
@@ -541,7 +541,7 @@ static int http_open(URLContext *h, const char *uri, int flags,
         int len = strlen(s->headers);
         if (len < 2 || strcmp("\r\n", s->headers + len - 2)) {
             av_log(h, AV_LOG_WARNING,
-                   "No trailing CRLF found in HTTP header.\n");
+                   "No trailing CRLF found in HTTP header. Adding it.\n");
             ret = av_reallocp(&s->headers, len + 3);
             if (ret < 0)
                 return ret;
@@ -915,7 +915,7 @@ static int process_line(URLContext *h, char *line, int line_count,
             while (av_isspace(*p))
                 p++;
             resource = p;
-            while (!av_isspace(*p))
+            while (*p && !av_isspace(*p))
                 p++;
             *(p++) = '\0';
             av_log(h, AV_LOG_TRACE, "Requested resource: %s\n", resource);
@@ -1650,7 +1650,7 @@ static int http_close(URLContext *h)
     av_freep(&s->inflate_buffer);
 #endif /* CONFIG_ZLIB */
 
-    if (!s->end_chunked_post)
+    if (s->hd && !s->end_chunked_post)
         /* Close the write direction by sending the end of chunked encoding. */
         ret = http_shutdown(h, h->flags);
 
@@ -1691,6 +1691,13 @@ static int64_t http_seek_internal(URLContext *h, int64_t off, int whence, int fo
     if (s->off && h->is_streamed)
         return AVERROR(ENOSYS);
 
+    /* do not try to make a new connection if seeking past the end of the file */
+    if (s->end_off || s->filesize != UINT64_MAX) {
+        uint64_t end_pos = s->end_off ? s->end_off : s->filesize;
+        if (s->off >= end_pos)
+            return s->off;
+    }
+
     /* we save the old context in case the seek fails */
     old_buf_size = s->buf_end - s->buf_ptr;
     memcpy(old_buf, s->buf_ptr, old_buf_size);
diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c
index f7de26a1d8e54..b43ab1745f27d 100644
--- a/libavformat/id3v2.c
+++ b/libavformat/id3v2.c
@@ -36,6 +36,7 @@
 #include "libavutil/bprint.h"
 #include "libavutil/dict.h"
 #include "libavutil/intreadwrite.h"
+#include "libavcodec/png.h"
 #include "avio_internal.h"
 #include "internal.h"
 #include "id3v1.h"
@@ -590,7 +591,7 @@ static void read_apic(AVFormatContext *s, AVIOContext *pb, int taglen,
                       int isv34)
 {
     int enc, pic_type;
-    char mimetype[64];
+    char mimetype[64] = {0};
     const CodecMime *mime     = ff_id3v2_mime_tags;
     enum AVCodecID id         = AV_CODEC_ID_NONE;
     ID3v2ExtraMetaAPIC *apic  = NULL;
@@ -612,7 +613,9 @@ static void read_apic(AVFormatContext *s, AVIOContext *pb, int taglen,
     if (isv34) {
         taglen -= avio_get_str(pb, taglen, mimetype, sizeof(mimetype));
     } else {
-        avio_read(pb, mimetype, 3);
+        if (avio_read(pb, mimetype, 3) < 0)
+            goto fail;
+
         mimetype[3] = 0;
         taglen    -= 3;
     }
@@ -1156,7 +1159,7 @@ int ff_id3v2_parse_apic(AVFormatContext *s, ID3v2ExtraMeta **extra_meta)
         st->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
         st->codecpar->codec_id   = apic->id;
 
-        if (AV_RB64(apic->buf->data) == 0x89504e470d0a1a0a)
+        if (AV_RB64(apic->buf->data) == PNGSIG)
             st->codecpar->codec_id = AV_CODEC_ID_PNG;
 
         if (apic->description[0])
diff --git a/libavformat/img2dec.c b/libavformat/img2dec.c
index ff4757e5322ee..ecec4988b22fb 100644
--- a/libavformat/img2dec.c
+++ b/libavformat/img2dec.c
@@ -29,6 +29,7 @@
 #include "libavutil/pixdesc.h"
 #include "libavutil/parseutils.h"
 #include "libavutil/intreadwrite.h"
+#include "libavcodec/gif.h"
 #include "avformat.h"
 #include "avio_internal.h"
 #include "internal.h"
@@ -563,29 +564,29 @@ static int img_read_seek(AVFormatContext *s, int stream_index, int64_t timestamp
 
 #define OFFSET(x) offsetof(VideoDemuxData, x)
 #define DEC AV_OPT_FLAG_DECODING_PARAM
-const AVOption ff_img_options[] = {
-    { "framerate",    "set the video framerate",             OFFSET(framerate),    AV_OPT_TYPE_VIDEO_RATE, {.str = "25"}, 0, INT_MAX,   DEC },
-    { "loop",         "force loop over input file sequence", OFFSET(loop),         AV_OPT_TYPE_BOOL,   {.i64 = 0   }, 0, 1,       DEC },
+#define COMMON_OPTIONS \
+    { "framerate",    "set the video framerate", OFFSET(framerate),    AV_OPT_TYPE_VIDEO_RATE, {.str = "25"}, 0, INT_MAX, DEC }, \
+    { "pixel_format", "set video pixel format",  OFFSET(pixel_format), AV_OPT_TYPE_STRING,     {.str = NULL}, 0, 0,       DEC }, \
+    { "video_size",   "set video size",          OFFSET(width),        AV_OPT_TYPE_IMAGE_SIZE, {.str = NULL}, 0, 0,       DEC }, \
+    { "loop",         "force loop over input file sequence", OFFSET(loop), AV_OPT_TYPE_BOOL,   {.i64 = 0   }, 0, 1,       DEC }, \
+    { NULL },
 
+#if CONFIG_IMAGE2_DEMUXER
+const AVOption ff_img_options[] = {
     { "pattern_type", "set pattern type",                    OFFSET(pattern_type), AV_OPT_TYPE_INT,    {.i64=PT_DEFAULT}, 0,       INT_MAX, DEC, "pattern_type"},
     { "glob_sequence","select glob/sequence pattern type",   0, AV_OPT_TYPE_CONST,  {.i64=PT_GLOB_SEQUENCE}, INT_MIN, INT_MAX, DEC, "pattern_type" },
     { "glob",         "select glob pattern type",            0, AV_OPT_TYPE_CONST,  {.i64=PT_GLOB         }, INT_MIN, INT_MAX, DEC, "pattern_type" },
     { "sequence",     "select sequence pattern type",        0, AV_OPT_TYPE_CONST,  {.i64=PT_SEQUENCE     }, INT_MIN, INT_MAX, DEC, "pattern_type" },
     { "none",         "disable pattern matching",            0, AV_OPT_TYPE_CONST,  {.i64=PT_NONE         }, INT_MIN, INT_MAX, DEC, "pattern_type" },
-
-    { "pixel_format", "set video pixel format",              OFFSET(pixel_format), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0,       DEC },
     { "start_number", "set first number in the sequence",    OFFSET(start_number), AV_OPT_TYPE_INT,    {.i64 = 0   }, INT_MIN, INT_MAX, DEC },
     { "start_number_range", "set range for looking at the first sequence number", OFFSET(start_number_range), AV_OPT_TYPE_INT, {.i64 = 5}, 1, INT_MAX, DEC },
-    { "video_size",   "set video size",                      OFFSET(width),        AV_OPT_TYPE_IMAGE_SIZE, {.str = NULL}, 0, 0,   DEC },
-    { "frame_size",   "force frame size in bytes",           OFFSET(frame_size),   AV_OPT_TYPE_INT,    {.i64 = 0   }, 0, INT_MAX, DEC },
     { "ts_from_file", "set frame timestamp from file's one", OFFSET(ts_from_file), AV_OPT_TYPE_INT,    {.i64 = 0   }, 0, 2,       DEC, "ts_type" },
     { "none", "none",                   0, AV_OPT_TYPE_CONST,    {.i64 = 0   }, 0, 2,       DEC, "ts_type" },
     { "sec",  "second precision",       0, AV_OPT_TYPE_CONST,    {.i64 = 1   }, 0, 2,       DEC, "ts_type" },
     { "ns",   "nano second precision",  0, AV_OPT_TYPE_CONST,    {.i64 = 2   }, 0, 2,       DEC, "ts_type" },
-    { NULL },
+    COMMON_OPTIONS
 };
 
-#if CONFIG_IMAGE2_DEMUXER
 static const AVClass img2_class = {
     .class_name = "image2 demuxer",
     .item_name  = av_default_item_name,
@@ -605,11 +606,17 @@ AVInputFormat ff_image2_demuxer = {
     .priv_class     = &img2_class,
 };
 #endif
+
+const AVOption ff_img2pipe_options[] = {
+    { "frame_size", "force frame size in bytes", OFFSET(frame_size), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, DEC },
+    COMMON_OPTIONS
+};
+
 #if CONFIG_IMAGE2PIPE_DEMUXER
 static const AVClass img2pipe_class = {
     .class_name = "image2pipe demuxer",
     .item_name  = av_default_item_name,
-    .option     = ff_img_options,
+    .option     = ff_img2pipe_options,
     .version    = LIBAVUTIL_VERSION_INT,
 };
 AVInputFormat ff_image2pipe_demuxer = {
@@ -1005,11 +1012,24 @@ static int xwd_probe(AVProbeData *p)
     return AVPROBE_SCORE_MAX / 2 + 1;
 }
 
+static int gif_probe(AVProbeData *p)
+{
+    /* check magick */
+    if (memcmp(p->buf, gif87a_sig, 6) && memcmp(p->buf, gif89a_sig, 6))
+        return 0;
+
+    /* width or height contains zero? */
+    if (!AV_RL16(&p->buf[6]) || !AV_RL16(&p->buf[8]))
+        return 0;
+
+    return AVPROBE_SCORE_MAX - 1;
+}
+
 #define IMAGEAUTO_DEMUXER(imgname, codecid)\
 static const AVClass imgname ## _class = {\
     .class_name = AV_STRINGIFY(imgname) " demuxer",\
     .item_name  = av_default_item_name,\
-    .option     = ff_img_options,\
+    .option     = ff_img2pipe_options,\
     .version    = LIBAVUTIL_VERSION_INT,\
 };\
 AVInputFormat ff_image_ ## imgname ## _pipe_demuxer = {\
@@ -1028,6 +1048,7 @@ IMAGEAUTO_DEMUXER(bmp,     AV_CODEC_ID_BMP)
 IMAGEAUTO_DEMUXER(dds,     AV_CODEC_ID_DDS)
 IMAGEAUTO_DEMUXER(dpx,     AV_CODEC_ID_DPX)
 IMAGEAUTO_DEMUXER(exr,     AV_CODEC_ID_EXR)
+IMAGEAUTO_DEMUXER(gif,     AV_CODEC_ID_GIF)
 IMAGEAUTO_DEMUXER(j2k,     AV_CODEC_ID_JPEG2000)
 IMAGEAUTO_DEMUXER(jpeg,    AV_CODEC_ID_MJPEG)
 IMAGEAUTO_DEMUXER(jpegls,  AV_CODEC_ID_JPEGLS)
diff --git a/libavformat/img2enc.c b/libavformat/img2enc.c
index a09cc8ec501e5..bec4bf81dde19 100644
--- a/libavformat/img2enc.c
+++ b/libavformat/img2enc.c
@@ -110,7 +110,8 @@ static int write_packet(AVFormatContext *s, AVPacket *pkt)
                                           AV_FRAME_FILENAME_FLAGS_MULTIPLE) < 0 &&
                    img->img_number > 1) {
             av_log(s, AV_LOG_ERROR,
-                   "Could not get frame filename number %d from pattern '%s' (either set update or use a pattern like %%03d within the filename pattern)\n",
+                   "Could not get frame filename number %d from pattern '%s'. "
+                   "Use '-frames:v 1' for a single image, or '-update' option, or use a pattern such as %%03d within the filename.\n",
                    img->img_number, img->path);
             return AVERROR(EINVAL);
         }
diff --git a/libavformat/isom.c b/libavformat/isom.c
index ca9d22e4f7411..0a4d901be5597 100644
--- a/libavformat/isom.c
+++ b/libavformat/isom.c
@@ -163,6 +163,8 @@ const AVCodecTag ff_codec_movvideo_tags[] = {
 
     { AV_CODEC_ID_HEVC, MKTAG('h', 'e', 'v', '1') }, /* HEVC/H.265 which indicates parameter sets may be in ES */
     { AV_CODEC_ID_HEVC, MKTAG('h', 'v', 'c', '1') }, /* HEVC/H.265 which indicates parameter sets shall not be in ES */
+    { AV_CODEC_ID_HEVC, MKTAG('d', 'v', 'h', 'e') }, /* HEVC-based Dolby Vision derived from hev1 */
+                                                     /* dvh1 is handled within mov.c */
 
     { AV_CODEC_ID_H264, MKTAG('a', 'v', 'c', '1') }, /* AVC-1/H.264 */
     { AV_CODEC_ID_H264, MKTAG('a', 'v', 'c', '2') },
@@ -185,6 +187,8 @@ const AVCodecTag ff_codec_movvideo_tags[] = {
     { AV_CODEC_ID_H264, MKTAG('r', 'v', '6', '4') }, /* X-Com Radvision */
     { AV_CODEC_ID_H264, MKTAG('x', 'a', 'l', 'g') }, /* XAVC-L HD422 produced by FCP */
     { AV_CODEC_ID_H264, MKTAG('a', 'v', 'l', 'g') }, /* Panasonic P2 AVC-LongG */
+    { AV_CODEC_ID_H264, MKTAG('d', 'v', 'a', '1') }, /* AVC-based Dolby Vision derived from avc1 */
+    { AV_CODEC_ID_H264, MKTAG('d', 'v', 'a', 'v') }, /* AVC-based Dolby Vision derived from avc3 */
 
     { AV_CODEC_ID_VP8,  MKTAG('v', 'p', '0', '8') }, /* VP8 */
     { AV_CODEC_ID_VP9,  MKTAG('v', 'p', '0', '9') }, /* VP9 */
diff --git a/libavformat/isom.h b/libavformat/isom.h
index e629663949659..69452cae8e58f 100644
--- a/libavformat/isom.h
+++ b/libavformat/isom.h
@@ -87,6 +87,7 @@ typedef struct MOVAtom {
 struct MOVParseTableEntry;
 
 typedef struct MOVFragment {
+    int found_tfhd;
     unsigned track_id;
     uint64_t base_data_offset;
     uint64_t moof_offset;
diff --git a/libavformat/ivfenc.c b/libavformat/ivfenc.c
index 66441a2a43304..adf72117e9343 100644
--- a/libavformat/ivfenc.c
+++ b/libavformat/ivfenc.c
@@ -97,6 +97,8 @@ static int ivf_check_bitstream(struct AVFormatContext *s, const AVPacket *pkt)
 
     if (st->codecpar->codec_id == AV_CODEC_ID_VP9)
         ret = ff_stream_add_bitstream_filter(st, "vp9_superframe", NULL);
+    else if (st->codecpar->codec_id == AV_CODEC_ID_AV1)
+        ret = ff_stream_add_bitstream_filter(st, "av1_metadata", "td=insert");
 
     return ret;
 }
diff --git a/libavformat/jacosubdec.c b/libavformat/jacosubdec.c
index 520c435cc5afc..f6be5df2d7d7a 100644
--- a/libavformat/jacosubdec.c
+++ b/libavformat/jacosubdec.c
@@ -127,7 +127,7 @@ static const char *read_ts(JACOsubContext *jacosub, const char *buf,
     ts_start  = (ts_start + jacosub->shift) * 100 / jacosub->timeres;
     ts_end    = (ts_end   + jacosub->shift) * 100 / jacosub->timeres;
     *start    = ts_start;
-    *duration = ts_start + ts_end;
+    *duration = ts_end - ts_start;
     return buf + len;
 }
 
diff --git a/libavformat/libopenmpt.c b/libavformat/libopenmpt.c
index 0fff702a36f46..a3342708477da 100644
--- a/libavformat/libopenmpt.c
+++ b/libavformat/libopenmpt.c
@@ -259,7 +259,7 @@ static int read_probe_openmpt(AVProbeData *p)
                 } else {
                     /* The file extension is unknown and we have very few data
                      * bytes available. libopenmpt cannot decide anything here,
-                     * and returning any score > 0 would result in successfull
+                     * and returning any score > 0 would result in successful
                      * probing of random data.
                      */
                     return 0;
diff --git a/libavformat/libsrt.c b/libavformat/libsrt.c
index fbfd6ace83845..b5568089fa0cc 100644
--- a/libavformat/libsrt.c
+++ b/libavformat/libsrt.c
@@ -76,6 +76,14 @@ typedef struct SRTContext {
     int64_t rcvlatency;
     int64_t peerlatency;
     enum SRTMode mode;
+    int sndbuf;
+    int rcvbuf;
+    int lossmaxttl;
+    int minversion;
+    char *streamid;
+    char *smoother;
+    int messageapi;
+    SRT_TRANSTYPE transtype;
 } SRTContext;
 
 #define D AV_OPT_FLAG_DECODING_PARAM
@@ -110,6 +118,16 @@ static const AVOption libsrt_options[] = {
     { "caller",         NULL, 0, AV_OPT_TYPE_CONST,  { .i64 = SRT_MODE_CALLER },     INT_MIN, INT_MAX, .flags = D|E, "mode" },
     { "listener",       NULL, 0, AV_OPT_TYPE_CONST,  { .i64 = SRT_MODE_LISTENER },   INT_MIN, INT_MAX, .flags = D|E, "mode" },
     { "rendezvous",     NULL, 0, AV_OPT_TYPE_CONST,  { .i64 = SRT_MODE_RENDEZVOUS }, INT_MIN, INT_MAX, .flags = D|E, "mode" },
+    { "sndbuf",         "Send buffer size (in bytes)",                                          OFFSET(sndbuf),           AV_OPT_TYPE_INT,      { .i64 = -1 }, -1, INT_MAX,   .flags = D|E },
+    { "rcvbuf",         "Receive buffer size (in bytes)",                                       OFFSET(rcvbuf),           AV_OPT_TYPE_INT,      { .i64 = -1 }, -1, INT_MAX,   .flags = D|E },
+    { "lossmaxttl",     "Maximum possible packet reorder tolerance",                            OFFSET(lossmaxttl),       AV_OPT_TYPE_INT,      { .i64 = -1 }, -1, INT_MAX,   .flags = D|E },
+    { "minversion",     "The minimum SRT version that is required from the peer",               OFFSET(minversion),       AV_OPT_TYPE_INT,      { .i64 = -1 }, -1, INT_MAX,   .flags = D|E },
+    { "streamid",       "A string of up to 512 characters that an Initiator can pass to a Responder",  OFFSET(streamid),  AV_OPT_TYPE_STRING,   { .str = NULL },              .flags = D|E },
+    { "smoother",       "The type of Smoother used for the transmission for that socket",       OFFSET(smoother),         AV_OPT_TYPE_STRING,   { .str = NULL },              .flags = D|E },
+    { "messageapi",     "Enable message API",                                                   OFFSET(messageapi),       AV_OPT_TYPE_INT,      { .i64 = -1 }, -1, 1,         .flags = D|E },
+    { "transtype",      "The transmission type for the socket",                                 OFFSET(transtype),        AV_OPT_TYPE_INT,      { .i64 = SRTT_INVALID }, SRTT_LIVE, SRTT_INVALID, .flags = D|E, "transtype" },
+    { "live",           NULL, 0, AV_OPT_TYPE_CONST,  { .i64 = SRTT_LIVE }, INT_MIN, INT_MAX, .flags = D|E, "transtype" },
+    { "file",           NULL, 0, AV_OPT_TYPE_CONST,  { .i64 = SRTT_FILE }, INT_MIN, INT_MAX, .flags = D|E, "transtype" },
     { NULL }
 };
 
@@ -297,6 +315,7 @@ static int libsrt_set_options_pre(URLContext *h, int fd)
     int connect_timeout = s->connect_timeout;
 
     if ((s->mode == SRT_MODE_RENDEZVOUS && libsrt_setsockopt(h, fd, SRTO_RENDEZVOUS, "SRTO_RENDEZVOUS", &yes, sizeof(yes)) < 0) ||
+        (s->transtype != SRTT_INVALID && libsrt_setsockopt(h, fd, SRTO_TRANSTYPE, "SRTO_TRANSTYPE", &s->transtype, sizeof(s->transtype)) < 0) ||
         (s->maxbw >= 0 && libsrt_setsockopt(h, fd, SRTO_MAXBW, "SRTO_MAXBW", &s->maxbw, sizeof(s->maxbw)) < 0) ||
         (s->pbkeylen >= 0 && libsrt_setsockopt(h, fd, SRTO_PBKEYLEN, "SRTO_PBKEYLEN", &s->pbkeylen, sizeof(s->pbkeylen)) < 0) ||
         (s->passphrase && libsrt_setsockopt(h, fd, SRTO_PASSPHRASE, "SRTO_PASSPHRASE", s->passphrase, strlen(s->passphrase)) < 0) ||
@@ -310,7 +329,15 @@ static int libsrt_set_options_pre(URLContext *h, int fd)
         (s->tlpktdrop >= 0 && libsrt_setsockopt(h, fd, SRTO_TLPKTDROP, "SRTO_TLPKDROP", &s->tlpktdrop, sizeof(s->tlpktdrop)) < 0) ||
         (s->nakreport >= 0 && libsrt_setsockopt(h, fd, SRTO_NAKREPORT, "SRTO_NAKREPORT", &s->nakreport, sizeof(s->nakreport)) < 0) ||
         (connect_timeout >= 0 && libsrt_setsockopt(h, fd, SRTO_CONNTIMEO, "SRTO_CONNTIMEO", &connect_timeout, sizeof(connect_timeout)) <0 ) ||
-        (s->payload_size >= 0 && libsrt_setsockopt(h, fd, SRTO_PAYLOADSIZE, "SRTO_PAYLOADSIZE", &s->payload_size, sizeof(s->payload_size)) < 0)) {
+        (s->sndbuf >= 0 && libsrt_setsockopt(h, fd, SRTO_SNDBUF, "SRTO_SNDBUF", &s->sndbuf, sizeof(s->sndbuf)) < 0) ||
+        (s->rcvbuf >= 0 && libsrt_setsockopt(h, fd, SRTO_RCVBUF, "SRTO_RCVBUF", &s->rcvbuf, sizeof(s->rcvbuf)) < 0) ||
+        (s->lossmaxttl >= 0 && libsrt_setsockopt(h, fd, SRTO_LOSSMAXTTL, "SRTO_LOSSMAXTTL", &s->lossmaxttl, sizeof(s->lossmaxttl)) < 0) ||
+        (s->minversion >= 0 && libsrt_setsockopt(h, fd, SRTO_MINVERSION, "SRTO_MINVERSION", &s->minversion, sizeof(s->minversion)) < 0) ||
+        (s->streamid && libsrt_setsockopt(h, fd, SRTO_STREAMID, "SRTO_STREAMID", s->streamid, strlen(s->streamid)) < 0) ||
+        (s->smoother && libsrt_setsockopt(h, fd, SRTO_SMOOTHER, "SRTO_SMOOTHER", s->smoother, strlen(s->smoother)) < 0) ||
+        (s->messageapi >= 0 && libsrt_setsockopt(h, fd, SRTO_MESSAGEAPI, "SRTO_MESSAGEAPI", &s->messageapi, sizeof(s->messageapi)) < 0) ||
+        (s->payload_size >= 0 && libsrt_setsockopt(h, fd, SRTO_PAYLOADSIZE, "SRTO_PAYLOADSIZE", &s->payload_size, sizeof(s->payload_size)) < 0) ||
+        ((h->flags & AVIO_FLAG_WRITE) && libsrt_setsockopt(h, fd, SRTO_SENDER, "SRTO_SENDER", &yes, sizeof(yes)) < 0)) {
         return AVERROR(EIO);
     }
     return 0;
@@ -522,6 +549,38 @@ static int libsrt_open(URLContext *h, const char *uri, int flags)
                 return AVERROR(EIO);
             }
         }
+        if (av_find_info_tag(buf, sizeof(buf), "sndbuf", p)) {
+            s->sndbuf = strtol(buf, NULL, 10);
+        }
+        if (av_find_info_tag(buf, sizeof(buf), "rcvbuf", p)) {
+            s->rcvbuf = strtol(buf, NULL, 10);
+        }
+        if (av_find_info_tag(buf, sizeof(buf), "lossmaxttl", p)) {
+            s->lossmaxttl = strtol(buf, NULL, 10);
+        }
+        if (av_find_info_tag(buf, sizeof(buf), "minversion", p)) {
+            s->minversion = strtol(buf, NULL, 0);
+        }
+        if (av_find_info_tag(buf, sizeof(buf), "streamid", p)) {
+            av_freep(&s->streamid);
+            s->streamid = av_strdup(buf);
+        }
+        if (av_find_info_tag(buf, sizeof(buf), "smoother", p)) {
+            av_freep(&s->smoother);
+            s->smoother = av_strdup(buf);
+        }
+        if (av_find_info_tag(buf, sizeof(buf), "messageapi", p)) {
+            s->messageapi = strtol(buf, NULL, 10);
+        }
+        if (av_find_info_tag(buf, sizeof(buf), "transtype", p)) {
+            if (!strcmp(buf, "live")) {
+                s->transtype = SRTT_LIVE;
+            } else if (!strcmp(buf, "file")) {
+                s->transtype = SRTT_FILE;
+            } else {
+                return AVERROR(EINVAL);
+            }
+        }
     }
     return libsrt_setup(h, uri, flags);
 }
diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index e6793988e1a1d..0e3a6890c10c0 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -68,6 +68,8 @@
 
 #include "qtpalette.h"
 
+#define EBML_UNKNOWN_LENGTH  UINT64_MAX /* EBML unknown length, in uint64_t */
+
 typedef enum {
     EBML_NONE,
     EBML_UINT,
@@ -869,7 +871,7 @@ static int ebml_read_length(MatroskaDemuxContext *matroska, AVIOContext *pb,
 {
     int res = ebml_read_num(matroska, pb, 8, number);
     if (res > 0 && *number + 1 == 1ULL << (7 * res))
-        *number = 0xffffffffffffffULL;
+        *number = EBML_UNKNOWN_LENGTH;
     return res;
 }
 
@@ -1049,7 +1051,7 @@ static int ebml_parse_id(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
             break;
     if (!syntax[i].id && id == MATROSKA_ID_CLUSTER &&
         matroska->num_levels > 0                   &&
-        matroska->levels[matroska->num_levels - 1].length == 0xffffffffffffff)
+        matroska->levels[matroska->num_levels - 1].length == EBML_UNKNOWN_LENGTH)
         return 0;  // we reached the end of an unknown size cluster
     if (!syntax[i].id && id != EBML_ID_VOID && id != EBML_ID_CRC32) {
         av_log(matroska->ctx, AV_LOG_DEBUG, "Unknown entry 0x%"PRIX32"\n", id);
@@ -1197,6 +1199,18 @@ static int ebml_parse_elem(MatroskaDemuxContext *matroska,
                    length, max_lengths[syntax->type], syntax->type);
             return AVERROR_INVALIDDATA;
         }
+        if (matroska->num_levels > 0) {
+            MatroskaLevel *level = &matroska->levels[matroska->num_levels - 1];
+            AVIOContext *pb = matroska->ctx->pb;
+            int64_t pos = avio_tell(pb);
+            if (level->length != EBML_UNKNOWN_LENGTH &&
+                (pos + length) > (level->start + level->length)) {
+                av_log(matroska->ctx, AV_LOG_ERROR,
+                       "Invalid length 0x%"PRIx64" > 0x%"PRIx64" in parent\n",
+                       length, level->start + level->length);
+                return AVERROR_INVALIDDATA;
+            }
+        }
     }
 
     switch (syntax->type) {
@@ -1598,7 +1612,7 @@ static int matroska_parse_seekhead_entry(MatroskaDemuxContext *matroska,
             ret = AVERROR_INVALIDDATA;
         } else {
             level.start  = 0;
-            level.length = (uint64_t) -1;
+            level.length = EBML_UNKNOWN_LENGTH;
             matroska->levels[matroska->num_levels] = level;
             matroska->num_levels++;
             matroska->current_id                   = 0;
@@ -1608,7 +1622,7 @@ static int matroska_parse_seekhead_entry(MatroskaDemuxContext *matroska,
             /* remove dummy level */
             while (matroska->num_levels) {
                 uint64_t length = matroska->levels[--matroska->num_levels].length;
-                if (length == (uint64_t) -1)
+                if (length == EBML_UNKNOWN_LENGTH)
                     break;
             }
         }
@@ -3541,7 +3555,7 @@ static int matroska_read_packet(AVFormatContext *s, AVPacket *pkt)
             ret = matroska_resync(matroska, pos);
     }
 
-    return ret;
+    return 0;
 }
 
 static int matroska_read_seek(AVFormatContext *s, int stream_index,
@@ -3931,22 +3945,19 @@ static int webm_dash_manifest_cues(AVFormatContext *s, int64_t init_range)
 
     // store cue point timestamps as a comma separated list for checking subsegment alignment in
     // the muxer. assumes that each timestamp cannot be more than 20 characters long.
-    buf = av_malloc_array(s->streams[0]->nb_index_entries, 20 * sizeof(char));
+    buf = av_malloc_array(s->streams[0]->nb_index_entries, 20);
     if (!buf) return -1;
     strcpy(buf, "");
     for (i = 0; i < s->streams[0]->nb_index_entries; i++) {
-        int ret = snprintf(buf + end, 20 * sizeof(char),
-                           "%" PRId64, s->streams[0]->index_entries[i].timestamp);
+        int ret = snprintf(buf + end, 20,
+                           "%" PRId64"%s", s->streams[0]->index_entries[i].timestamp,
+                           i != s->streams[0]->nb_index_entries - 1 ? "," : "");
         if (ret <= 0 || (ret == 20 && i ==  s->streams[0]->nb_index_entries - 1)) {
             av_log(s, AV_LOG_ERROR, "timestamp too long.\n");
             av_free(buf);
             return AVERROR_INVALIDDATA;
         }
         end += ret;
-        if (i != s->streams[0]->nb_index_entries - 1) {
-            strncat(buf, ",", sizeof(char));
-            end++;
-        }
     }
     av_dict_set(&s->streams[0]->metadata, CUE_TIMESTAMPS, buf, 0);
     av_free(buf);
diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
index aed83aef70c14..b9f99c4463925 100644
--- a/libavformat/matroskaenc.c
+++ b/libavformat/matroskaenc.c
@@ -693,7 +693,7 @@ static int put_flac_codecpriv(AVFormatContext *s,
         snprintf(buf, sizeof(buf), "0x%"PRIx64, par->channel_layout);
         av_dict_set(&dict, "WAVEFORMATEXTENSIBLE_CHANNEL_MASK", buf, 0);
 
-        len = ff_vorbiscomment_length(dict, vendor);
+        len = ff_vorbiscomment_length(dict, vendor, NULL, 0);
         if (len >= ((1<<24) - 4))
             return AVERROR(EINVAL);
 
@@ -707,7 +707,7 @@ static int put_flac_codecpriv(AVFormatContext *s,
         AV_WB24(data + 1, len);
 
         p = data + 4;
-        ff_vorbiscomment_write(&p, &dict, vendor);
+        ff_vorbiscomment_write(&p, &dict, vendor, NULL, 0);
 
         avio_write(pb, data, len + 4);
 
@@ -2781,6 +2781,7 @@ static const AVCodecTag additional_video_tags[] = {
 
 static const AVCodecTag additional_subtitle_tags[] = {
     { AV_CODEC_ID_DVB_SUBTITLE,      0xFFFFFFFF },
+    { AV_CODEC_ID_DVD_SUBTITLE,      0xFFFFFFFF },
     { AV_CODEC_ID_HDMV_PGS_SUBTITLE, 0xFFFFFFFF },
     { AV_CODEC_ID_NONE,              0xFFFFFFFF }
 };
diff --git a/libavformat/mov.c b/libavformat/mov.c
index ec57a05803301..a7d444b0eeea1 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -1234,16 +1234,12 @@ static int search_frag_moof_offset(MOVFragmentIndex *frag_index, int64_t offset)
 
 static int64_t get_stream_info_time(MOVFragmentStreamInfo * frag_stream_info)
 {
-
-    if (frag_stream_info) {
-        if (frag_stream_info->sidx_pts != AV_NOPTS_VALUE)
-            return frag_stream_info->sidx_pts;
-        if (frag_stream_info->first_tfra_pts != AV_NOPTS_VALUE)
-            return frag_stream_info->first_tfra_pts;
-        if (frag_stream_info->tfdt_dts != AV_NOPTS_VALUE)
-            return frag_stream_info->tfdt_dts;
-    }
-    return AV_NOPTS_VALUE;
+    av_assert0(frag_stream_info);
+    if (frag_stream_info->sidx_pts != AV_NOPTS_VALUE)
+        return frag_stream_info->sidx_pts;
+    if (frag_stream_info->first_tfra_pts != AV_NOPTS_VALUE)
+        return frag_stream_info->first_tfra_pts;
+    return frag_stream_info->tfdt_dts;
 }
 
 static int64_t get_frag_time(MOVFragmentIndex *frag_index,
@@ -1270,7 +1266,7 @@ static int64_t get_frag_time(MOVFragmentIndex *frag_index,
 static int search_frag_timestamp(MOVFragmentIndex *frag_index,
                                  AVStream *st, int64_t timestamp)
 {
-    int a, b, m;
+    int a, b, m, m0;
     int64_t frag_time;
     int id = -1;
 
@@ -1286,15 +1282,18 @@ static int search_frag_timestamp(MOVFragmentIndex *frag_index,
     b = frag_index->nb_items;
 
     while (b - a > 1) {
-        m = (a + b) >> 1;
-        frag_time = get_frag_time(frag_index, m, id);
-        if (frag_time != AV_NOPTS_VALUE) {
-            if (frag_time >= timestamp)
-                b = m;
-            if (frag_time <= timestamp)
-                a = m;
-        }
+        m0 = m = (a + b) >> 1;
+
+        while (m < b &&
+               (frag_time = get_frag_time(frag_index, m, id)) == AV_NOPTS_VALUE)
+            m++;
+
+        if (m < b && frag_time <= timestamp)
+            a = m;
+        else
+            b = m0;
     }
+
     return a;
 }
 
@@ -1326,6 +1325,10 @@ static int update_frag_index(MOVContext *c, int64_t offset)
         return -1;
 
     for (i = 0; i < c->fc->nb_streams; i++) {
+        // Avoid building frag index if streams lack track id.
+        if (c->fc->streams[i]->id < 0)
+            return AVERROR_INVALIDDATA;
+
         frag_stream_info[i].id = c->fc->streams[i]->id;
         frag_stream_info[i].sidx_pts = AV_NOPTS_VALUE;
         frag_stream_info[i].tfdt_dts = AV_NOPTS_VALUE;
@@ -1366,6 +1369,9 @@ static void fix_frag_index_entries(MOVFragmentIndex *frag_index, int index,
 
 static int mov_read_moof(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 {
+    // Set by mov_read_tfhd(). mov_read_trun() will reject files missing tfhd.
+    c->fragment.found_tfhd = 0;
+
     if (!c->has_looked_for_mfra && c->use_mfra_for > 0) {
         c->has_looked_for_mfra = 1;
         if (pb->seekable & AVIO_SEEKABLE_NORMAL) {
@@ -1903,6 +1909,11 @@ static int mov_read_glbl(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     if (ret < 0)
         return ret;
     if (atom.type == MKTAG('h','v','c','C') && st->codecpar->codec_tag == MKTAG('d','v','h','1'))
+        /* HEVC-based Dolby Vision derived from hvc1.
+           Happens to match with an identifier
+           previously utilized for DV. Thus, if we have
+           the hvcC extradata box available as specified,
+           set codec to HEVC */
         st->codecpar->codec_id = AV_CODEC_ID_HEVC;
 
     return 0;
@@ -2689,8 +2700,11 @@ static inline int64_t mov_get_stsc_samples(MOVStreamContext *sc, unsigned int in
 
     if (mov_stsc_index_valid(index, sc->stsc_count))
         chunk_count = sc->stsc_data[index + 1].first - sc->stsc_data[index].first;
-    else
+    else {
+        // Validation for stsc / stco  happens earlier in mov_read_stsc + mov_read_trak.
+        av_assert0(sc->stsc_data[index].first <= sc->chunk_count);
         chunk_count = sc->chunk_count - (sc->stsc_data[index].first - 1);
+    }
 
     return sc->stsc_data[index].count * (int64_t)chunk_count;
 }
@@ -2918,12 +2932,6 @@ static int mov_read_stts(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         av_log(c->fc, AV_LOG_TRACE, "sample_count=%d, sample_duration=%d\n",
                 sample_count, sample_duration);
 
-        if (   i+1 == entries
-            && i
-            && sample_count == 1
-            && total_sample_count > 100
-            && sample_duration/10 > duration / total_sample_count)
-            sample_duration = duration / total_sample_count;
         duration+=(int64_t)sample_duration*(uint64_t)sample_count;
         total_sample_count+=sample_count;
     }
@@ -3438,6 +3446,7 @@ static void mov_fix_index(MOVContext *mov, AVStream *st)
     MOVIndexRange *current_index_range;
     int i;
     int found_keyframe_after_edit = 0;
+    int found_non_empty_edit = 0;
 
     if (!msc->elst_data || msc->elst_count <= 0 || nb_old <= 0) {
         return;
@@ -3484,10 +3493,11 @@ static void mov_fix_index(MOVContext *mov, AVStream *st)
         edit_list_dts_counter = edit_list_dts_entry_end;
         edit_list_dts_entry_end += edit_list_duration;
         num_discarded_begin = 0;
-        if (edit_list_media_time == -1) {
+        if (!found_non_empty_edit && edit_list_media_time == -1) {
             empty_edits_sum_duration += edit_list_duration;
             continue;
         }
+        found_non_empty_edit = 1;
 
         // If we encounter a non-negative edit list reset the skip_samples/start_pad fields and set them
         // according to the edit list below.
@@ -4154,7 +4164,7 @@ static int mov_read_trak(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 
     st = avformat_new_stream(c->fc, NULL);
     if (!st) return AVERROR(ENOMEM);
-    st->id = c->fc->nb_streams;
+    st->id = -1;
     sc = av_mallocz(sizeof(MOVStreamContext));
     if (!sc) return AVERROR(ENOMEM);
 
@@ -4168,6 +4178,13 @@ static int mov_read_trak(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 
     c->trak_index = -1;
 
+    // Here stsc refers to a chunk not described in stco. This is technically invalid,
+    // but we can overlook it (clearing stsc) whenever stts_count == 0 (indicating no samples).
+    if (!sc->chunk_count && !sc->stts_count && sc->stsc_count) {
+        sc->stsc_count = 0;
+        av_freep(&sc->stsc_data);
+    }
+
     /* sanity checks */
     if ((sc->chunk_count && (!sc->stts_count || !sc->stsc_count ||
                             (!sc->sample_size && !sc->sample_count))) ||
@@ -4176,7 +4193,7 @@ static int mov_read_trak(MOVContext *c, AVIOContext *pb, MOVAtom atom)
                st->index);
         return 0;
     }
-    if (sc->chunk_count && sc->stsc_count && sc->stsc_data[ sc->stsc_count - 1 ].first > sc->chunk_count) {
+    if (sc->stsc_count && sc->stsc_data[ sc->stsc_count - 1 ].first > sc->chunk_count) {
         av_log(c->fc, AV_LOG_ERROR, "stream %d, contradictionary STSC and STCO\n",
                st->index);
         return AVERROR_INVALIDDATA;
@@ -4438,6 +4455,11 @@ static int mov_read_tkhd(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     st = c->fc->streams[c->fc->nb_streams-1];
     sc = st->priv_data;
 
+    // Each stream (trak) should have exactly 1 tkhd. This catches bad files and
+    // avoids corrupting AVStreams mapped to an earlier tkhd.
+    if (st->id != -1)
+        return AVERROR_INVALIDDATA;
+
     version = avio_r8(pb);
     flags = avio_rb24(pb);
     st->disposition |= (flags & MOV_TKHD_FLAG_ENABLED) ? AV_DISPOSITION_DEFAULT : 0;
@@ -4540,23 +4562,25 @@ static int mov_read_tfhd(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     MOVTrackExt *trex = NULL;
     int flags, track_id, i;
 
+    c->fragment.found_tfhd = 1;
+
     avio_r8(pb); /* version */
     flags = avio_rb24(pb);
 
     track_id = avio_rb32(pb);
     if (!track_id)
         return AVERROR_INVALIDDATA;
-    frag->track_id = track_id;
-    set_frag_stream(&c->frag_index, track_id);
     for (i = 0; i < c->trex_count; i++)
-        if (c->trex_data[i].track_id == frag->track_id) {
+        if (c->trex_data[i].track_id == track_id) {
             trex = &c->trex_data[i];
             break;
         }
     if (!trex) {
-        av_log(c->fc, AV_LOG_ERROR, "could not find corresponding trex\n");
-        return AVERROR_INVALIDDATA;
+        av_log(c->fc, AV_LOG_WARNING, "could not find corresponding trex (id %u)\n", track_id);
+        return 0;
     }
+    frag->track_id = track_id;
+    set_frag_stream(&c->frag_index, track_id);
 
     frag->base_data_offset = flags & MOV_TFHD_BASE_DATA_OFFSET ?
                              avio_rb64(pb) : flags & MOV_TFHD_DEFAULT_BASE_IS_MOOF ?
@@ -4635,8 +4659,8 @@ static int mov_read_tfdt(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         }
     }
     if (!st) {
-        av_log(c->fc, AV_LOG_ERROR, "could not find corresponding track id %u\n", frag->track_id);
-        return AVERROR_INVALIDDATA;
+        av_log(c->fc, AV_LOG_WARNING, "could not find corresponding track id %u\n", frag->track_id);
+        return 0;
     }
     sc = st->priv_data;
     if (sc->pseudo_stream_id + 1 != frag->stsd_id && sc->pseudo_stream_id != -1)
@@ -4675,6 +4699,11 @@ static int mov_read_trun(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     AVIndexEntry *new_entries;
     MOVFragmentStreamInfo * frag_stream_info;
 
+    if (!frag->found_tfhd) {
+        av_log(c->fc, AV_LOG_ERROR, "trun track id unknown, no tfhd was found\n");
+        return AVERROR_INVALIDDATA;
+    }
+
     for (i = 0; i < c->fc->nb_streams; i++) {
         if (c->fc->streams[i]->id == frag->track_id) {
             st = c->fc->streams[i];
@@ -4682,8 +4711,8 @@ static int mov_read_trun(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         }
     }
     if (!st) {
-        av_log(c->fc, AV_LOG_ERROR, "could not find corresponding track id %u\n", frag->track_id);
-        return AVERROR_INVALIDDATA;
+        av_log(c->fc, AV_LOG_WARNING, "could not find corresponding track id %u\n", frag->track_id);
+        return 0;
     }
     sc = st->priv_data;
     if (sc->pseudo_stream_id+1 != frag->stsd_id && sc->pseudo_stream_id != -1)
@@ -4704,6 +4733,7 @@ static int mov_read_trun(MOVContext *c, AVIOContext *pb, MOVAtom atom)
             break;
         }
     }
+    av_assert0(index_entry_pos <= st->nb_index_entries);
 
     avio_r8(pb); /* version */
     flags = avio_rb24(pb);
@@ -5017,7 +5047,7 @@ static int mov_read_sidx(MOVContext *c, AVIOContext *pb, MOVAtom atom)
                 }
             }
         }
-        for (i = 0; i < c->fc->nb_streams; i++) {
+        if (ref_st) for (i = 0; i < c->fc->nb_streams; i++) {
             st = c->fc->streams[i];
             sc = st->priv_data;
             if (!sc->has_sidx) {
@@ -5263,9 +5293,7 @@ static int mov_read_vpcc(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 static int mov_read_smdm(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 {
     MOVStreamContext *sc;
-    const int chroma_den = 50000;
-    const int luma_den = 10000;
-    int i, j, version;
+    int i, version;
 
     if (c->fc->nb_streams < 1)
         return AVERROR_INVALIDDATA;
@@ -5288,17 +5316,15 @@ static int mov_read_smdm(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     if (!sc->mastering)
         return AVERROR(ENOMEM);
 
-    for (i = 0; i < 3; i++)
-        for (j = 0; j < 2; j++)
-            sc->mastering->display_primaries[i][j] =
-                av_make_q(lrint(((double)avio_rb16(pb) / (1 << 16)) * chroma_den), chroma_den);
-    for (i = 0; i < 2; i++)
-        sc->mastering->white_point[i] =
-            av_make_q(lrint(((double)avio_rb16(pb) / (1 << 16)) * chroma_den), chroma_den);
-    sc->mastering->max_luminance =
-        av_make_q(lrint(((double)avio_rb32(pb) / (1 <<  8)) * luma_den), luma_den);
-    sc->mastering->min_luminance =
-        av_make_q(lrint(((double)avio_rb32(pb) / (1 << 14)) * luma_den), luma_den);
+    for (i = 0; i < 3; i++) {
+        sc->mastering->display_primaries[i][0] = av_make_q(avio_rb16(pb), 1 << 16);
+        sc->mastering->display_primaries[i][1] = av_make_q(avio_rb16(pb), 1 << 16);
+    }
+    sc->mastering->white_point[0] = av_make_q(avio_rb16(pb), 1 << 16);
+    sc->mastering->white_point[1] = av_make_q(avio_rb16(pb), 1 << 16);
+
+    sc->mastering->max_luminance = av_make_q(avio_rb32(pb), 1 << 8);
+    sc->mastering->min_luminance = av_make_q(avio_rb32(pb), 1 << 14);
 
     sc->mastering->has_primaries = 1;
     sc->mastering->has_luminance = 1;
@@ -6546,14 +6572,14 @@ static int cenc_decrypt(MOVContext *c, MOVStreamContext *sc, AVEncryptionInfo *s
     return 0;
 }
 
-static int cenc_filter(MOVContext *mov, MOVStreamContext *sc, AVPacket *pkt, int current_index)
+static int cenc_filter(MOVContext *mov, AVStream* st, MOVStreamContext *sc, AVPacket *pkt, int current_index)
 {
     MOVFragmentStreamInfo *frag_stream_info;
     MOVEncryptionIndex *encryption_index;
     AVEncryptionInfo *encrypted_sample;
     int encrypted_index, ret;
 
-    frag_stream_info = get_current_frag_stream_info(&mov->frag_index);
+    frag_stream_info = get_frag_stream_info(&mov->frag_index, mov->frag_index.current, st->id);
     encrypted_index = current_index;
     encryption_index = NULL;
     if (frag_stream_info) {
@@ -7783,7 +7809,7 @@ static int mov_read_packet(AVFormatContext *s, AVPacket *pkt)
     if (mov->aax_mode)
         aax_filter(pkt->data, pkt->size, mov);
 
-    ret = cenc_filter(mov, sc, pkt, current_index);
+    ret = cenc_filter(mov, st, sc, pkt, current_index);
     if (ret < 0)
         return ret;
 
diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index 33978ee1b0249..77943304b5a52 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -75,6 +75,7 @@ static const AVOption options[] = {
     { "frag_discont", "Signal that the next fragment is discontinuous from earlier ones", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MOV_FLAG_FRAG_DISCONT}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "movflags" },
     { "delay_moov", "Delay writing the initial moov until the first fragment is cut, or until the first fragment flush", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MOV_FLAG_DELAY_MOOV}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "movflags" },
     { "global_sidx", "Write a global sidx index at the start of the file", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MOV_FLAG_GLOBAL_SIDX}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "movflags" },
+    { "skip_sidx", "Skip writing of sidx atom", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MOV_FLAG_SKIP_SIDX}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "movflags" },
     { "write_colr", "Write colr atom (Experimental, may be renamed or changed, do not use from scripts)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MOV_FLAG_WRITE_COLR}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "movflags" },
     { "write_gama", "Write deprecated gama atom", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MOV_FLAG_WRITE_GAMA}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "movflags" },
     { "use_metadata_tags", "Use mdta atom for metadata.", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MOV_FLAG_USE_MDTA}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "movflags" },
@@ -319,8 +320,12 @@ static int mov_write_ac3_tag(AVIOContext *pb, MOVTrack *track)
     uint8_t buf[3];
     int fscod, bsid, bsmod, acmod, lfeon, frmsizecod;
 
-    if (track->vos_len < 7)
-        return -1;
+    if (track->vos_len < 7) {
+        av_log(pb, AV_LOG_ERROR,
+               "Cannot write moov atom before AC3 packets."
+               " Set the delay_moov flag to fix this.\n");
+        return AVERROR(EINVAL);
+    }
 
     avio_wb32(pb, 11);
     ffio_wfourcc(pb, "dac3");
@@ -537,11 +542,14 @@ static int mov_write_eac3_tag(AVIOContext *pb, MOVTrack *track)
     struct eac3_info *info;
     int size, i;
 
-    if (!track->eac3_priv)
+    if (!track->eac3_priv) {
+        av_log(pb, AV_LOG_ERROR,
+               "Cannot write moov atom before EAC3 packets parsed.\n");
         return AVERROR(EINVAL);
+    }
 
     info = track->eac3_priv;
-    size = 2 + 4 * (info->num_ind_sub + 1);
+    size = 2 + ((34 * (info->num_ind_sub + 1) + 7) >> 3);
     buf = av_malloc(size);
     if (!buf) {
         size = AVERROR(ENOMEM);
@@ -563,12 +571,12 @@ static int mov_write_eac3_tag(AVIOContext *pb, MOVTrack *track)
         put_bits(&pbc, 4, info->substream[i].num_dep_sub);
         if (!info->substream[i].num_dep_sub) {
             put_bits(&pbc, 1, 0); /* reserved */
-            size--;
         } else {
             put_bits(&pbc, 9, info->substream[i].chan_loc);
         }
     }
     flush_put_bits(&pbc);
+    size = put_bits_count(&pbc) >> 3;
 
     avio_wb32(pb, size + 8);
     ffio_wfourcc(pb, "dec3");
@@ -1021,6 +1029,7 @@ static int mov_write_audio_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
     int64_t pos = avio_tell(pb);
     int version = 0;
     uint32_t tag = track->tag;
+    int ret = 0;
 
     if (track->mode == MODE_MOV) {
         if (track->timescale > UINT16_MAX || !track->par->channels) {
@@ -1078,12 +1087,14 @@ static int mov_write_audio_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
             avio_wb16(pb, track->audio_vbr ? -2 : 0); /* compression ID */
         } else { /* reserved for mp4/3gp */
             if (track->par->codec_id == AV_CODEC_ID_FLAC ||
+                track->par->codec_id == AV_CODEC_ID_ALAC ||
                 track->par->codec_id == AV_CODEC_ID_OPUS) {
                 avio_wb16(pb, track->par->channels);
             } else {
                 avio_wb16(pb, 2);
             }
-            if (track->par->codec_id == AV_CODEC_ID_FLAC) {
+            if (track->par->codec_id == AV_CODEC_ID_FLAC ||
+                track->par->codec_id == AV_CODEC_ID_ALAC) {
                 avio_wb16(pb, track->par->bits_per_raw_sample);
             } else {
                 avio_wb16(pb, 16);
@@ -1122,34 +1133,41 @@ static int mov_write_audio_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
          track->par->codec_id == AV_CODEC_ID_QDM2          ||
          (mov_pcm_le_gt16(track->par->codec_id) && version==1) ||
          (mov_pcm_be_gt16(track->par->codec_id) && version==1)))
-        mov_write_wave_tag(s, pb, track);
+        ret = mov_write_wave_tag(s, pb, track);
     else if (track->tag == MKTAG('m','p','4','a'))
-        mov_write_esds_tag(pb, track);
+        ret = mov_write_esds_tag(pb, track);
     else if (track->par->codec_id == AV_CODEC_ID_AMR_NB)
-        mov_write_amr_tag(pb, track);
+        ret = mov_write_amr_tag(pb, track);
     else if (track->par->codec_id == AV_CODEC_ID_AC3)
-        mov_write_ac3_tag(pb, track);
+        ret = mov_write_ac3_tag(pb, track);
     else if (track->par->codec_id == AV_CODEC_ID_EAC3)
-        mov_write_eac3_tag(pb, track);
+        ret = mov_write_eac3_tag(pb, track);
     else if (track->par->codec_id == AV_CODEC_ID_ALAC)
-        mov_write_extradata_tag(pb, track);
+        ret = mov_write_extradata_tag(pb, track);
     else if (track->par->codec_id == AV_CODEC_ID_WMAPRO)
-        mov_write_wfex_tag(s, pb, track);
+        ret = mov_write_wfex_tag(s, pb, track);
     else if (track->par->codec_id == AV_CODEC_ID_FLAC)
-        mov_write_dfla_tag(pb, track);
+        ret = mov_write_dfla_tag(pb, track);
     else if (track->par->codec_id == AV_CODEC_ID_OPUS)
-        mov_write_dops_tag(pb, track);
+        ret = mov_write_dops_tag(pb, track);
     else if (track->vos_len > 0)
-        mov_write_glbl_tag(pb, track);
+        ret = mov_write_glbl_tag(pb, track);
 
-    if (track->mode == MODE_MOV && track->par->codec_type == AVMEDIA_TYPE_AUDIO)
-        mov_write_chan_tag(s, pb, track);
+    if (ret < 0)
+        return ret;
 
-    if (mov->encryption_scheme != MOV_ENC_NONE) {
-        ff_mov_cenc_write_sinf_tag(track, pb, mov->encryption_kid);
+    if (track->mode == MODE_MOV && track->par->codec_type == AVMEDIA_TYPE_AUDIO
+            && ((ret = mov_write_chan_tag(s, pb, track)) < 0)) {
+        return ret;
     }
 
-    return update_size(pb, pos);
+    if (mov->encryption_scheme != MOV_ENC_NONE
+            && ((ret = ff_mov_cenc_write_sinf_tag(track, pb, mov->encryption_kid)) < 0)) {
+        return ret;
+    }
+
+    ret = update_size(pb, pos);
+    return ret;
 }
 
 static int mov_write_d263_tag(AVIOContext *pb)
@@ -2214,22 +2232,27 @@ static int mov_write_gpmd_tag(AVIOContext *pb, const MOVTrack *track)
 static int mov_write_stsd_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext *mov, MOVTrack *track)
 {
     int64_t pos = avio_tell(pb);
+    int ret = 0;
     avio_wb32(pb, 0); /* size */
     ffio_wfourcc(pb, "stsd");
     avio_wb32(pb, 0); /* version & flags */
     avio_wb32(pb, 1); /* entry count */
     if (track->par->codec_type == AVMEDIA_TYPE_VIDEO)
-        mov_write_video_tag(pb, mov, track);
+        ret = mov_write_video_tag(pb, mov, track);
     else if (track->par->codec_type == AVMEDIA_TYPE_AUDIO)
-        mov_write_audio_tag(s, pb, mov, track);
+        ret = mov_write_audio_tag(s, pb, mov, track);
     else if (track->par->codec_type == AVMEDIA_TYPE_SUBTITLE)
-        mov_write_subtitle_tag(pb, track);
+        ret = mov_write_subtitle_tag(pb, track);
     else if (track->par->codec_tag == MKTAG('r','t','p',' '))
-        mov_write_rtp_tag(pb, track);
+        ret = mov_write_rtp_tag(pb, track);
     else if (track->par->codec_tag == MKTAG('t','m','c','d'))
-        mov_write_tmcd_tag(pb, track);
+        ret = mov_write_tmcd_tag(pb, track);
     else if (track->par->codec_tag == MKTAG('g','p','m','d'))
-        mov_write_gpmd_tag(pb, track);
+        ret = mov_write_gpmd_tag(pb, track);
+
+    if (ret < 0)
+        return ret;
+
     return update_size(pb, pos);
 }
 
@@ -2432,11 +2455,12 @@ static int mov_preroll_write_stbl_atoms(AVIOContext *pb, MOVTrack *track)
 static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext *mov, MOVTrack *track)
 {
     int64_t pos = avio_tell(pb);
-    int ret;
+    int ret = 0;
 
     avio_wb32(pb, 0); /* size */
     ffio_wfourcc(pb, "stbl");
-    mov_write_stsd_tag(s, pb, mov, track);
+    if ((ret = mov_write_stsd_tag(s, pb, mov, track)) < 0)
+        return ret;
     mov_write_stts_tag(pb, track);
     if ((track->par->codec_type == AVMEDIA_TYPE_VIDEO ||
          track->par->codec_tag == MKTAG('r','t','p',' ')) &&
@@ -2738,8 +2762,8 @@ static int mov_write_mdhd_tag(AVIOContext *pb, MOVMuxContext *mov,
     if (version != 0 && track->mode == MODE_MOV) {
         av_log(NULL, AV_LOG_ERROR,
                "FATAL error, file duration too long for timebase, this file will not be\n"
-               "playable with quicktime. Choose a different timebase or a different\n"
-               "container format\n");
+               "playable with QuickTime. Choose a different timebase with "
+               "-video_track_timescale or a different container format\n");
     }
 
     return 32;
@@ -4603,7 +4627,8 @@ static int mov_write_moof_tag(AVIOContext *pb, MOVMuxContext *mov, int tracks,
     mov_write_moof_tag_internal(avio_buf, mov, tracks, 0);
     moof_size = ffio_close_null_buf(avio_buf);
 
-    if (mov->flags & FF_MOV_FLAG_DASH && !(mov->flags & FF_MOV_FLAG_GLOBAL_SIDX))
+    if (mov->flags & FF_MOV_FLAG_DASH &&
+        !(mov->flags & (FF_MOV_FLAG_GLOBAL_SIDX | FF_MOV_FLAG_SKIP_SIDX)))
         mov_write_sidx_tags(pb, mov, tracks, moof_size + 8 + mdat_size);
 
     if (mov->write_prft > MOV_PRFT_NONE && mov->write_prft < MOV_PRFT_NB)
@@ -5422,7 +5447,8 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
              * the next fragment. This means the cts of the first sample must
              * be the same in all fragments, unless end_pts was updated by
              * the packet causing the fragment to be written. */
-            if ((mov->flags & FF_MOV_FLAG_DASH && !(mov->flags & FF_MOV_FLAG_GLOBAL_SIDX)) ||
+            if ((mov->flags & FF_MOV_FLAG_DASH &&
+                !(mov->flags & (FF_MOV_FLAG_GLOBAL_SIDX | FF_MOV_FLAG_SKIP_SIDX))) ||
                 mov->mode == MODE_ISM)
                 pkt->pts = pkt->dts + trk->end_pts - trk->cluster[trk->entry].dts;
         } else {
@@ -6067,6 +6093,11 @@ static int mov_init(AVFormatContext *s)
         s->flags &= ~AVFMT_FLAG_AUTO_BSF;
     }
 
+    if (mov->flags & FF_MOV_FLAG_GLOBAL_SIDX && mov->flags & FF_MOV_FLAG_SKIP_SIDX) {
+        av_log(s, AV_LOG_WARNING, "Global SIDX enabled; Ignoring skip_sidx option\n");
+        mov->flags &= ~FF_MOV_FLAG_SKIP_SIDX;
+    }
+
     if (mov->flags & FF_MOV_FLAG_FASTSTART) {
         mov->reserved_moov_size = -1;
     }
diff --git a/libavformat/movenc.h b/libavformat/movenc.h
index fe605d1ad2fac..68d6f23a5a659 100644
--- a/libavformat/movenc.h
+++ b/libavformat/movenc.h
@@ -257,6 +257,7 @@ typedef struct MOVMuxContext {
 #define FF_MOV_FLAG_SKIP_TRAILER          (1 << 18)
 #define FF_MOV_FLAG_NEGATIVE_CTS_OFFSETS  (1 << 19)
 #define FF_MOV_FLAG_FRAG_EVERY_FRAME      (1 << 20)
+#define FF_MOV_FLAG_SKIP_SIDX             (1 << 21)
 
 int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt);
 
diff --git a/libavformat/mpeg.c b/libavformat/mpeg.c
index d4369b49c2c11..c147fa72ed0e2 100644
--- a/libavformat/mpeg.c
+++ b/libavformat/mpeg.c
@@ -525,34 +525,34 @@ static int mpegps_read_packet(AVFormatContext *s,
     }
 
     es_type = m->psm_es_type[startcode & 0xff];
-        if (es_type == STREAM_TYPE_VIDEO_MPEG1) {
-            codec_id = AV_CODEC_ID_MPEG2VIDEO;
-            type     = AVMEDIA_TYPE_VIDEO;
-        } else if (es_type == STREAM_TYPE_VIDEO_MPEG2) {
-            codec_id = AV_CODEC_ID_MPEG2VIDEO;
-            type     = AVMEDIA_TYPE_VIDEO;
-        } else if (es_type == STREAM_TYPE_AUDIO_MPEG1 ||
-                   es_type == STREAM_TYPE_AUDIO_MPEG2) {
-            codec_id = AV_CODEC_ID_MP3;
-            type     = AVMEDIA_TYPE_AUDIO;
-        } else if (es_type == STREAM_TYPE_AUDIO_AAC) {
-            codec_id = AV_CODEC_ID_AAC;
-            type     = AVMEDIA_TYPE_AUDIO;
-        } else if (es_type == STREAM_TYPE_VIDEO_MPEG4) {
-            codec_id = AV_CODEC_ID_MPEG4;
-            type     = AVMEDIA_TYPE_VIDEO;
-        } else if (es_type == STREAM_TYPE_VIDEO_H264) {
-            codec_id = AV_CODEC_ID_H264;
-            type     = AVMEDIA_TYPE_VIDEO;
-        } else if (es_type == STREAM_TYPE_VIDEO_HEVC) {
-            codec_id = AV_CODEC_ID_HEVC;
-            type     = AVMEDIA_TYPE_VIDEO;
-        } else if (es_type == STREAM_TYPE_AUDIO_AC3) {
-            codec_id = AV_CODEC_ID_AC3;
-            type     = AVMEDIA_TYPE_AUDIO;
-        } else if (m->imkh_cctv && es_type == 0x91) {
-            codec_id = AV_CODEC_ID_PCM_MULAW;
-            type     = AVMEDIA_TYPE_AUDIO;
+    if (es_type == STREAM_TYPE_VIDEO_MPEG1) {
+        codec_id = AV_CODEC_ID_MPEG2VIDEO;
+        type     = AVMEDIA_TYPE_VIDEO;
+    } else if (es_type == STREAM_TYPE_VIDEO_MPEG2) {
+        codec_id = AV_CODEC_ID_MPEG2VIDEO;
+        type     = AVMEDIA_TYPE_VIDEO;
+    } else if (es_type == STREAM_TYPE_AUDIO_MPEG1 ||
+               es_type == STREAM_TYPE_AUDIO_MPEG2) {
+        codec_id = AV_CODEC_ID_MP3;
+        type     = AVMEDIA_TYPE_AUDIO;
+    } else if (es_type == STREAM_TYPE_AUDIO_AAC) {
+        codec_id = AV_CODEC_ID_AAC;
+        type     = AVMEDIA_TYPE_AUDIO;
+    } else if (es_type == STREAM_TYPE_VIDEO_MPEG4) {
+        codec_id = AV_CODEC_ID_MPEG4;
+        type     = AVMEDIA_TYPE_VIDEO;
+    } else if (es_type == STREAM_TYPE_VIDEO_H264) {
+        codec_id = AV_CODEC_ID_H264;
+        type     = AVMEDIA_TYPE_VIDEO;
+    } else if (es_type == STREAM_TYPE_VIDEO_HEVC) {
+        codec_id = AV_CODEC_ID_HEVC;
+        type     = AVMEDIA_TYPE_VIDEO;
+    } else if (es_type == STREAM_TYPE_AUDIO_AC3) {
+        codec_id = AV_CODEC_ID_AC3;
+        type     = AVMEDIA_TYPE_AUDIO;
+    } else if (m->imkh_cctv && es_type == 0x91) {
+        codec_id = AV_CODEC_ID_PCM_MULAW;
+        type     = AVMEDIA_TYPE_AUDIO;
     } else if (startcode >= 0x1e0 && startcode <= 0x1ef) {
         static const unsigned char avs_seqh[4] = { 0, 0, 1, 0xb0 };
         unsigned char buf[8];
diff --git a/libavformat/mpegenc.c b/libavformat/mpegenc.c
index 4c6fa67fb83c1..1389288b7f9a2 100644
--- a/libavformat/mpegenc.c
+++ b/libavformat/mpegenc.c
@@ -364,12 +364,7 @@ static av_cold int mpeg_mux_init(AVFormatContext *ctx)
                 stream->id = ac3_id++;
             } else if (st->codecpar->codec_id == AV_CODEC_ID_DTS) {
                 stream->id = dts_id++;
-            } else if (st->codecpar->codec_id == AV_CODEC_ID_PCM_S16BE ||
-                       st->codecpar->codec_id == AV_CODEC_ID_PCM_DVD) {
-                if (st->codecpar->bits_per_coded_sample != 16) {
-                    av_log(ctx, AV_LOG_ERROR, "Only 16 bit LPCM streams can be muxed.\n");
-                    goto fail;
-                }
+            } else if (st->codecpar->codec_id == AV_CODEC_ID_PCM_S16BE) {
                 stream->id = lpcm_id++;
                 for (j = 0; j < 4; j++) {
                     if (lpcm_freq_tab[j] == st->codecpar->sample_rate)
@@ -392,6 +387,26 @@ static av_cold int mpeg_mux_init(AVFormatContext *ctx)
                 stream->lpcm_header[1] = (st->codecpar->channels - 1) | (j << 4);
                 stream->lpcm_header[2] = 0x80;
                 stream->lpcm_align     = st->codecpar->channels * 2;
+            } else if (st->codecpar->codec_id == AV_CODEC_ID_PCM_DVD) {
+                int freq;
+
+                switch (st->codecpar->sample_rate) {
+                case 48000: freq = 0; break;
+                case 96000: freq = 1; break;
+                case 44100: freq = 2; break;
+                case 32000: freq = 3; break;
+                default:
+                    av_log(ctx, AV_LOG_ERROR, "Unsupported sample rate.\n");
+                    return AVERROR(EINVAL);
+                }
+
+                stream->lpcm_header[0] = 0x0c;
+                stream->lpcm_header[1] = (freq << 4) |
+                                         (((st->codecpar->bits_per_coded_sample - 16) / 4) << 6) |
+                                         st->codecpar->channels - 1;
+                stream->lpcm_header[2] = 0x80;
+                stream->id = lpcm_id++;
+                stream->lpcm_align = st->codecpar->channels * st->codecpar->bits_per_coded_sample / 8;
             } else {
                 stream->id = mpa_id++;
             }
diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c
index edf6b5701df48..8f686393886e5 100644
--- a/libavformat/mpegts.c
+++ b/libavformat/mpegts.c
@@ -37,6 +37,9 @@
 #include "avio_internal.h"
 #include "mpeg.h"
 #include "isom.h"
+#if CONFIG_ICONV
+#include <iconv.h>
+#endif
 
 /* maximum size in which we look for synchronization if
  * synchronization is lost */
@@ -53,6 +56,9 @@
         (prev_dividend) = (dividend);                                          \
     } while (0)
 
+#define PROBE_PACKET_MAX_BUF 8192
+#define PROBE_PACKET_MARGIN 5
+
 enum MpegTSFilterType {
     MPEGTS_PES,
     MPEGTS_SECTION,
@@ -91,6 +97,7 @@ struct MpegTSFilter {
     int es_id;
     int last_cc; /* last cc code (-1 if first packet) */
     int64_t last_pcr;
+    int discard;
     enum MpegTSFilterType type;
     union {
         MpegTSPESFilter pes_filter;
@@ -590,28 +597,42 @@ static int analyze(const uint8_t *buf, int size, int packet_size,
     return best_score - FFMAX(stat_all - 10*best_score, 0)/10;
 }
 
-/* autodetect fec presence. Must have at least 1024 bytes  */
-static int get_packet_size(const uint8_t *buf, int size)
+/* autodetect fec presence */
+static int get_packet_size(AVFormatContext* s)
 {
     int score, fec_score, dvhs_score;
+    int margin;
+    int ret;
 
-    if (size < (TS_FEC_PACKET_SIZE * 5 + 1))
-        return AVERROR_INVALIDDATA;
+    /*init buffer to store stream for probing */
+    uint8_t buf[PROBE_PACKET_MAX_BUF] = {0};
+    int buf_size = 0;
 
-    score      = analyze(buf, size, TS_PACKET_SIZE,      0);
-    dvhs_score = analyze(buf, size, TS_DVHS_PACKET_SIZE, 0);
-    fec_score  = analyze(buf, size, TS_FEC_PACKET_SIZE,  0);
-    av_log(NULL, AV_LOG_TRACE, "score: %d, dvhs_score: %d, fec_score: %d \n",
-            score, dvhs_score, fec_score);
-
-    if (score > fec_score && score > dvhs_score)
-        return TS_PACKET_SIZE;
-    else if (dvhs_score > score && dvhs_score > fec_score)
-        return TS_DVHS_PACKET_SIZE;
-    else if (score < fec_score && dvhs_score < fec_score)
-        return TS_FEC_PACKET_SIZE;
-    else
-        return AVERROR_INVALIDDATA;
+    while (buf_size < PROBE_PACKET_MAX_BUF) {
+        ret = avio_read_partial(s->pb, buf + buf_size, PROBE_PACKET_MAX_BUF - buf_size);
+        if (ret < 0)
+            return AVERROR_INVALIDDATA;
+        buf_size += ret;
+
+        score      = analyze(buf, buf_size, TS_PACKET_SIZE,      0);
+        dvhs_score = analyze(buf, buf_size, TS_DVHS_PACKET_SIZE, 0);
+        fec_score  = analyze(buf, buf_size, TS_FEC_PACKET_SIZE,  0);
+        av_log(s, AV_LOG_TRACE, "Probe: %d, score: %d, dvhs_score: %d, fec_score: %d \n",
+            buf_size, score, dvhs_score, fec_score);
+
+        margin = mid_pred(score, fec_score, dvhs_score);
+
+        if (buf_size < PROBE_PACKET_MAX_BUF)
+            margin += PROBE_PACKET_MARGIN; /*if buffer not filled */
+
+        if (score > margin)
+            return TS_PACKET_SIZE;
+        else if (dvhs_score > margin)
+            return TS_DVHS_PACKET_SIZE;
+        else if (fec_score > margin)
+            return TS_FEC_PACKET_SIZE;
+    }
+    return AVERROR_INVALIDDATA;
 }
 
 typedef struct SectionHeader {
@@ -673,6 +694,51 @@ static char *getstr8(const uint8_t **pp, const uint8_t *p_end)
         return NULL;
     if (len > p_end - p)
         return NULL;
+#if CONFIG_ICONV
+    if (len) {
+        const char *encodings[] = {
+            "ISO6937", "ISO-8859-5", "ISO-8859-6", "ISO-8859-7",
+            "ISO-8859-8", "ISO-8859-9", "ISO-8859-10", "ISO-8859-11",
+            "", "ISO-8859-13", "ISO-8859-14", "ISO-8859-15", "", "", "", "",
+            "", "UCS-2BE", "KSC_5601", "GB2312", "UCS-2BE", "UTF-8", "", "",
+            "", "", "", "", "", "", "", ""
+        };
+        iconv_t cd;
+        char *in, *out;
+        size_t inlen = len, outlen = inlen * 6 + 1;
+        if (len >= 3 && p[0] == 0x10 && !p[1] && p[2] && p[2] <= 0xf && p[2] != 0xc) {
+            char iso8859[12];
+            snprintf(iso8859, sizeof(iso8859), "ISO-8859-%d", p[2]);
+            inlen -= 3;
+            in = (char *)p + 3;
+            cd = iconv_open("UTF-8", iso8859);
+        } else if (p[0] < 0x20) {
+            inlen -= 1;
+            in = (char *)p + 1;
+            cd = iconv_open("UTF-8", encodings[*p]);
+        } else {
+            in = (char *)p;
+            cd = iconv_open("UTF-8", encodings[0]);
+        }
+        if (cd == (iconv_t)-1)
+            goto no_iconv;
+        str = out = av_malloc(outlen);
+        if (!str) {
+            iconv_close(cd);
+            return NULL;
+        }
+        if (iconv(cd, &in, &inlen, &out, &outlen) == -1) {
+            iconv_close(cd);
+            av_freep(&str);
+            goto no_iconv;
+        }
+        iconv_close(cd);
+        *out = 0;
+        *pp = p + len;
+        return str;
+    }
+no_iconv:
+#endif
     str = av_malloc(len + 1);
     if (!str)
         return NULL;
@@ -918,7 +984,7 @@ static void new_data_packet(const uint8_t *buffer, int len, AVPacket *pkt)
 
 static int new_pes_packet(PESContext *pes, AVPacket *pkt)
 {
-    char *sd;
+    uint8_t *sd;
 
     av_init_packet(pkt);
 
@@ -1219,6 +1285,7 @@ static int mpegts_push_data(MpegTSFilter *filter,
                         || pes->st->codecpar->codec_id == AV_CODEC_ID_DVB_SUBTITLE)
                     ) {
                     AVProgram *p = NULL;
+                    int pcr_found = 0;
                     while ((p = av_find_program_from_stream(pes->stream, p, pes->st->index))) {
                         if (p->pcr_pid != -1 && p->discard != AVDISCARD_ALL) {
                             MpegTSFilter *f = pes->ts->pids[p->pcr_pid];
@@ -1242,6 +1309,7 @@ static int mpegts_push_data(MpegTSFilter *filter,
                                     // and the pcr error to this packet should be no more than 100 ms.
                                     // TODO: we should interpolate the PCR, not just use the last one
                                     int64_t pcr = f->last_pcr / 300;
+                                    pcr_found = 1;
                                     pes->st->pts_wrap_reference = st->pts_wrap_reference;
                                     pes->st->pts_wrap_behavior = st->pts_wrap_behavior;
                                     if (pes->dts == AV_NOPTS_VALUE || pes->dts < pcr) {
@@ -1258,6 +1326,15 @@ static int mpegts_push_data(MpegTSFilter *filter,
                             }
                         }
                     }
+
+                    if (!pcr_found) {
+                        av_log(pes->stream, AV_LOG_VERBOSE,
+                               "Forcing DTS/PTS to be unset for a "
+                               "non-trustworthy PES packet for PID %d as "
+                               "PCR hasn't been received yet.\n",
+                               pes->pid);
+                        pes->dts = pes->pts = AV_NOPTS_VALUE;
+                    }
                 }
             }
             break;
@@ -2002,6 +2079,50 @@ int ff_parse_mpeg2_descriptor(AVFormatContext *fc, AVStream *st, int stream_type
             }
         }
         break;
+    case 0xfd: /* ARIB data coding type descriptor */
+        // STD-B24, fascicle 3, chapter 4 defines private_stream_1
+        // for captions
+        if (stream_type == STREAM_TYPE_PRIVATE_DATA) {
+            // This structure is defined in STD-B10, part 1, listing 5.4 and
+            // part 2, 6.2.20).
+            // Listing of data_component_ids is in STD-B10, part 2, Annex J.
+            // Component tag limits are documented in TR-B14, fascicle 2,
+            // Vol. 3, Section 2, 4.2.8.1
+            int actual_component_tag = st->stream_identifier - 1;
+            int picked_profile = FF_PROFILE_UNKNOWN;
+            int data_component_id = get16(pp, desc_end);
+            if (data_component_id < 0)
+                return AVERROR_INVALIDDATA;
+
+            switch (data_component_id) {
+            case 0x0008:
+                // [0x30..0x37] are component tags utilized for
+                // non-mobile captioning service ("profile A").
+                if (actual_component_tag >= 0x30 &&
+                    actual_component_tag <= 0x37) {
+                    picked_profile = FF_PROFILE_ARIB_PROFILE_A;
+                }
+                break;
+            case 0x0012:
+                // component tag 0x87 signifies a mobile/partial reception
+                // (1seg) captioning service ("profile C").
+                if (actual_component_tag == 0x87) {
+                    picked_profile = FF_PROFILE_ARIB_PROFILE_C;
+                }
+                break;
+            default:
+                break;
+            }
+
+            if (picked_profile == FF_PROFILE_UNKNOWN)
+                break;
+
+            st->codecpar->codec_type = AVMEDIA_TYPE_SUBTITLE;
+            st->codecpar->codec_id   = AV_CODEC_ID_ARIB_CAPTION;
+            st->codecpar->profile    = picked_profile;
+            st->request_probe        = 0;
+        }
+        break;
     default:
         break;
     }
@@ -2463,8 +2584,6 @@ static int handle_packet(MpegTSContext *ts, const uint8_t *packet)
     int64_t pos;
 
     pid = AV_RB16(packet + 1) & 0x1fff;
-    if (pid && discard_pid(ts, pid))
-        return 0;
     is_start = packet[1] & 0x40;
     tss = ts->pids[pid];
     if (ts->auto_guess && !tss && is_start) {
@@ -2473,6 +2592,10 @@ static int handle_packet(MpegTSContext *ts, const uint8_t *packet)
     }
     if (!tss)
         return 0;
+    if (is_start)
+        tss->discard = discard_pid(ts, pid);
+    if (tss->discard)
+        return 0;
     ts->current_pid = pid;
 
     afc = (packet[3] >> 4) & 3;
@@ -2827,8 +2950,6 @@ static int mpegts_read_header(AVFormatContext *s)
 {
     MpegTSContext *ts = s->priv_data;
     AVIOContext *pb   = s->pb;
-    uint8_t buf[8 * 1024] = {0};
-    int len;
     int64_t pos, probesize = s->probesize;
 
     s->internal->prefer_codec_framerate = 1;
@@ -2836,10 +2957,8 @@ static int mpegts_read_header(AVFormatContext *s)
     if (ffio_ensure_seekback(pb, probesize) < 0)
         av_log(s, AV_LOG_WARNING, "Failed to allocate buffers for seekback\n");
 
-    /* read the first 8192 bytes to get packet size */
     pos = avio_tell(pb);
-    len = avio_read(pb, buf, sizeof(buf));
-    ts->raw_packet_size = get_packet_size(buf, len);
+    ts->raw_packet_size = get_packet_size(s);
     if (ts->raw_packet_size <= 0) {
         av_log(s, AV_LOG_WARNING, "Could not detect TS packet size, defaulting to non-FEC/DVHS\n");
         ts->raw_packet_size = TS_PACKET_SIZE;
diff --git a/libavformat/mpegtsenc.c b/libavformat/mpegtsenc.c
index 3339e26d50714..fc0ea225c6cb1 100644
--- a/libavformat/mpegtsenc.c
+++ b/libavformat/mpegtsenc.c
@@ -54,8 +54,8 @@ typedef struct MpegTSSection {
 typedef struct MpegTSService {
     MpegTSSection pmt; /* MPEG-2 PMT table context */
     int sid;           /* service ID */
-    char *name;
-    char *provider_name;
+    uint8_t name[256];
+    uint8_t provider_name[256];
     int pcr_pid;
     int pcr_packet_count;
     int pcr_packet_period;
@@ -264,25 +264,21 @@ static void mpegts_write_pat(AVFormatContext *s)
                           data, q - data);
 }
 
-/* NOTE: !str is accepted for an empty string */
-static void putstr8(uint8_t **q_ptr, const char *str, int write_len)
+static void putbuf(uint8_t **q_ptr, const uint8_t *buf, size_t len)
 {
-    uint8_t *q;
-    int len;
+    memcpy(*q_ptr, buf, len);
+    *q_ptr += len;
+}
 
-    q = *q_ptr;
-    if (!str)
-        len = 0;
-    else
-        len = strlen(str);
-    if (write_len)
-        *q++ = len;
-    if (!str) {
-        *q_ptr = q;
-        return;
-    }
-    memcpy(q, str, len);
-    q     += len;
+static void put_registration_descriptor(uint8_t **q_ptr, uint32_t tag)
+{
+    uint8_t *q = *q_ptr;
+    *q++ = 0x05; /* MPEG-2 registration descriptor*/
+    *q++ = 4;
+    *q++ = tag;
+    *q++ = tag >> 8;
+    *q++ = tag >> 16;
+    *q++ = tag >> 24;
     *q_ptr = q;
 }
 
@@ -412,14 +408,8 @@ static int mpegts_write_pmt(AVFormatContext *s, MpegTSService *service)
                 *q++=1; // 1 byte, all flags sets to 0
                 *q++=0; // omit all fields...
             }
-            if (st->codecpar->codec_id==AV_CODEC_ID_S302M) {
-                *q++ = 0x05; /* MPEG-2 registration descriptor*/
-                *q++ = 4;
-                *q++ = 'B';
-                *q++ = 'S';
-                *q++ = 'S';
-                *q++ = 'D';
-            }
+            if (st->codecpar->codec_id==AV_CODEC_ID_S302M)
+                put_registration_descriptor(&q, MKTAG('B', 'S', 'S', 'D'));
             if (st->codecpar->codec_id==AV_CODEC_ID_OPUS) {
                 /* 6 bytes registration descriptor, 4 bytes Opus audio descriptor */
                 if (q - data > SECTION_LENGTH - 6 - 4) {
@@ -427,12 +417,7 @@ static int mpegts_write_pmt(AVFormatContext *s, MpegTSService *service)
                     break;
                 }
 
-                *q++ = 0x05; /* MPEG-2 registration descriptor*/
-                *q++ = 4;
-                *q++ = 'O';
-                *q++ = 'p';
-                *q++ = 'u';
-                *q++ = 's';
+                put_registration_descriptor(&q, MKTAG('O', 'p', 'u', 's'));
 
                 *q++ = 0x7f; /* DVB extension descriptor */
                 *q++ = 2;
@@ -618,37 +603,24 @@ static int mpegts_write_pmt(AVFormatContext *s, MpegTSService *service)
         break;
         case AVMEDIA_TYPE_VIDEO:
             if (stream_type == STREAM_TYPE_VIDEO_DIRAC) {
-                *q++ = 0x05; /*MPEG-2 registration descriptor*/
-                *q++ = 4;
-                *q++ = 'd';
-                *q++ = 'r';
-                *q++ = 'a';
-                *q++ = 'c';
+                put_registration_descriptor(&q, MKTAG('d', 'r', 'a', 'c'));
             } else if (stream_type == STREAM_TYPE_VIDEO_VC1) {
-                *q++ = 0x05; /*MPEG-2 registration descriptor*/
-                *q++ = 4;
-                *q++ = 'V';
-                *q++ = 'C';
-                *q++ = '-';
-                *q++ = '1';
+                put_registration_descriptor(&q, MKTAG('V', 'C', '-', '1'));
+            } else if (stream_type == STREAM_TYPE_VIDEO_HEVC && s->strict_std_compliance <= FF_COMPLIANCE_NORMAL) {
+                put_registration_descriptor(&q, MKTAG('H', 'E', 'V', 'C'));
             }
             break;
         case AVMEDIA_TYPE_DATA:
             if (st->codecpar->codec_id == AV_CODEC_ID_SMPTE_KLV) {
-                *q++ = 0x05; /* MPEG-2 registration descriptor */
-                *q++ = 4;
-                *q++ = 'K';
-                *q++ = 'L';
-                *q++ = 'V';
-                *q++ = 'A';
+                put_registration_descriptor(&q, MKTAG('K', 'L', 'V', 'A'));
             } else if (st->codecpar->codec_id == AV_CODEC_ID_TIMED_ID3) {
                 const char *tag = "ID3 ";
                 *q++ = 0x26; /* metadata descriptor */
                 *q++ = 13;
                 put16(&q, 0xffff);    /* metadata application format */
-                putstr8(&q, tag, 0);
+                putbuf(&q, tag, strlen(tag));
                 *q++ = 0xff;        /* metadata format */
-                putstr8(&q, tag, 0);
+                putbuf(&q, tag, strlen(tag));
                 *q++ = 0;            /* metadata service ID */
                 *q++ = 0xF;          /* metadata_locator_record_flag|MPEG_carriage_flags|reserved */
             }
@@ -695,8 +667,8 @@ static void mpegts_write_sdt(AVFormatContext *s)
         desc_len_ptr = q;
         q++;
         *q++         = ts->service_type;
-        putstr8(&q, service->provider_name, 1);
-        putstr8(&q, service->name, 1);
+        putbuf(&q, service->provider_name, service->provider_name[0] + 1);
+        putbuf(&q, service->name, service->name[0] + 1);
         desc_len_ptr[0] = q - desc_len_ptr - 1;
 
         /* fill descriptor length */
@@ -709,10 +681,47 @@ static void mpegts_write_sdt(AVFormatContext *s)
                           data, q - data);
 }
 
-static MpegTSService *mpegts_add_service(MpegTSWrite *ts, int sid,
+/* This stores a string in buf with the correct encoding and also sets the
+ * first byte as the length. !str is accepted for an empty string.
+ * If the string is already encoded, invalid UTF-8 or has no multibyte sequence
+ * then we keep it as is, otherwise we signal UTF-8 encoding. */
+static int encode_str8(uint8_t *buf, const char *str)
+{
+    size_t str_len;
+    if (!str)
+        str = "";
+    str_len = strlen(str);
+    if (str[0] && (unsigned)str[0] >= 0x20) {   /* Make sure the string is not already encoded. */
+        const uint8_t *q = str;
+        int has_multibyte = 0;
+        while (*q) {
+            uint32_t code;
+            GET_UTF8(code, *q++, goto invalid;) /* Is it valid UTF-8? */
+            has_multibyte |= (code > 127);      /* Does it have multibyte UTF-8 chars in it? */
+        }
+        if (has_multibyte) {                    /* If we have multibyte chars and valid UTF-8, then encode as such! */
+            if (str_len > 254)
+                return AVERROR(EINVAL);
+            buf[0] = str_len + 1;
+            buf[1] = 0x15;
+            memcpy(&buf[2], str, str_len);
+            return 0;
+        }
+    }
+invalid:
+    /* Otherwise let's just encode the string as is! */
+    if (str_len > 255)
+        return AVERROR(EINVAL);
+    buf[0] = str_len;
+    memcpy(&buf[1], str, str_len);
+    return 0;
+}
+
+static MpegTSService *mpegts_add_service(AVFormatContext *s, int sid,
                                          const char *provider_name,
                                          const char *name)
 {
+    MpegTSWrite *ts = s->priv_data;
     MpegTSService *service;
 
     service = av_mallocz(sizeof(MpegTSService));
@@ -721,17 +730,16 @@ static MpegTSService *mpegts_add_service(MpegTSWrite *ts, int sid,
     service->pmt.pid       = ts->pmt_start_pid + ts->nb_services;
     service->sid           = sid;
     service->pcr_pid       = 0x1fff;
-    service->provider_name = av_strdup(provider_name);
-    service->name          = av_strdup(name);
-    if (!service->provider_name || !service->name)
+    if (encode_str8(service->provider_name, provider_name) < 0 ||
+        encode_str8(service->name, name) < 0) {
+        av_log(s, AV_LOG_ERROR, "Too long service or provider name\n");
         goto fail;
+    }
     if (av_dynarray_add_nofree(&ts->services, &ts->nb_services, service) < 0)
         goto fail;
 
     return service;
 fail:
-    av_freep(&service->provider_name);
-    av_freep(&service->name);
     av_free(service);
     return NULL;
 }
@@ -790,7 +798,7 @@ static int mpegts_init(AVFormatContext *s)
         service_name  = title ? title->value : DEFAULT_SERVICE_NAME;
         provider      = av_dict_get(s->metadata, "service_provider", NULL, 0);
         provider_name = provider ? provider->value : DEFAULT_PROVIDER_NAME;
-        service       = mpegts_add_service(ts, ts->service_id,
+        service       = mpegts_add_service(s, ts->service_id,
                                            provider_name, service_name);
 
         if (!service)
@@ -809,7 +817,7 @@ static int mpegts_init(AVFormatContext *s)
             service_name  = title ? title->value : DEFAULT_SERVICE_NAME;
             provider      = av_dict_get(program->metadata, "service_provider", NULL, 0);
             provider_name = provider ? provider->value : DEFAULT_PROVIDER_NAME;
-            service       = mpegts_add_service(ts, program->id,
+            service       = mpegts_add_service(s, program->id,
                                                provider_name, service_name);
 
             if (!service)
@@ -1523,7 +1531,7 @@ static int mpegts_write_packet_internal(AVFormatContext *s, AVPacket *pkt)
     int64_t dts = pkt->dts, pts = pkt->pts;
     int opus_samples = 0;
     int side_data_size;
-    char *side_data = NULL;
+    uint8_t *side_data = NULL;
     int stream_id = -1;
 
     side_data = av_packet_get_side_data(pkt,
@@ -1839,8 +1847,6 @@ static void mpegts_deinit(AVFormatContext *s)
 
     for (i = 0; i < ts->nb_services; i++) {
         service = ts->services[i];
-        av_freep(&service->provider_name);
-        av_freep(&service->name);
         av_freep(&service);
     }
     av_freep(&ts->services);
diff --git a/libavformat/mxfdec.c b/libavformat/mxfdec.c
index f49890e140fa8..3c3e4767e2283 100644
--- a/libavformat/mxfdec.c
+++ b/libavformat/mxfdec.c
@@ -1328,6 +1328,7 @@ static const MXFCodecUL mxf_picture_essence_container_uls[] = {
     { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x01,0x0d,0x01,0x03,0x01,0x02,0x05,0x00,0x00 }, 14,   AV_CODEC_ID_RAWVIDEO, NULL, 15, RawVWrap }, /* uncompressed picture */
     { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0a,0x0e,0x0f,0x03,0x01,0x02,0x20,0x01,0x01 }, 15,     AV_CODEC_ID_HQ_HQA },
     { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0a,0x0e,0x0f,0x03,0x01,0x02,0x20,0x02,0x01 }, 15,        AV_CODEC_ID_HQX },
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0a,0x0e,0x15,0x00,0x04,0x02,0x10,0x00,0x01 }, 16,       AV_CODEC_ID_HEVC, NULL, 15 }, /* Canon XF-HEVC */
     { { 0x06,0x0e,0x2b,0x34,0x01,0x01,0x01,0xff,0x4b,0x46,0x41,0x41,0x00,0x0d,0x4d,0x4f }, 14,   AV_CODEC_ID_RAWVIDEO }, /* Legacy ?? Uncompressed Picture */
     { { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 },  0,      AV_CODEC_ID_NONE },
 };
@@ -2070,15 +2071,15 @@ static int mxf_parse_physical_source_package(MXFContext *mxf, MXFTrack *source_t
                 continue;
             }
 
-        if (physical_track->edit_rate.num <= 0 ||
-            physical_track->edit_rate.den <= 0) {
-            av_log(mxf->fc, AV_LOG_WARNING,
-                   "Invalid edit rate (%d/%d) found on structural"
-                   " component #%d, defaulting to 25/1\n",
-                   physical_track->edit_rate.num,
-                   physical_track->edit_rate.den, i);
-            physical_track->edit_rate = (AVRational){25, 1};
-        }
+            if (physical_track->edit_rate.num <= 0 ||
+                physical_track->edit_rate.den <= 0) {
+                av_log(mxf->fc, AV_LOG_WARNING,
+                       "Invalid edit rate (%d/%d) found on structural"
+                       " component #%d, defaulting to 25/1\n",
+                       physical_track->edit_rate.num,
+                       physical_track->edit_rate.den, i);
+                physical_track->edit_rate = (AVRational){25, 1};
+            }
 
             for (k = 0; k < physical_track->sequence->structural_components_count; k++) {
                 if (!(mxf_tc = mxf_resolve_timecode_component(mxf, &physical_track->sequence->structural_components_refs[k])))
@@ -2432,6 +2433,18 @@ static int mxf_parse_structural_metadata(MXFContext *mxf)
                 default:
                     av_log(mxf->fc, AV_LOG_INFO, "Unknown frame layout type: %d\n", descriptor->frame_layout);
             }
+
+            if (st->codecpar->codec_id == AV_CODEC_ID_PRORES) {
+                switch (descriptor->essence_codec_ul[14]) {
+                case 1: st->codecpar->codec_tag = MKTAG('a','p','c','o'); break;
+                case 2: st->codecpar->codec_tag = MKTAG('a','p','c','s'); break;
+                case 3: st->codecpar->codec_tag = MKTAG('a','p','c','n'); break;
+                case 4: st->codecpar->codec_tag = MKTAG('a','p','c','h'); break;
+                case 5: st->codecpar->codec_tag = MKTAG('a','p','4','h'); break;
+                case 6: st->codecpar->codec_tag = MKTAG('a','p','4','x'); break;
+                }
+            }
+
             if (st->codecpar->codec_id == AV_CODEC_ID_RAWVIDEO) {
                 st->codecpar->format = descriptor->pix_fmt;
                 if (st->codecpar->format == AV_PIX_FMT_NONE) {
@@ -2544,23 +2557,24 @@ static int mxf_parse_structural_metadata(MXFContext *mxf)
 static int64_t mxf_timestamp_to_int64(uint64_t timestamp)
 {
     struct tm time = { 0 };
+    int msecs;
     time.tm_year = (timestamp >> 48) - 1900;
     time.tm_mon  = (timestamp >> 40 & 0xFF) - 1;
     time.tm_mday = (timestamp >> 32 & 0xFF);
     time.tm_hour = (timestamp >> 24 & 0xFF);
     time.tm_min  = (timestamp >> 16 & 0xFF);
     time.tm_sec  = (timestamp >> 8  & 0xFF);
+    msecs        = (timestamp & 0xFF) * 4;
 
-    /* msvcrt versions of strftime calls the invalid parameter handler
-     * (aborting the process if one isn't set) if the parameters are out
-     * of range. */
+    /* Clip values for legacy reasons. Maybe we should return error instead? */
     time.tm_mon  = av_clip(time.tm_mon,  0, 11);
     time.tm_mday = av_clip(time.tm_mday, 1, 31);
     time.tm_hour = av_clip(time.tm_hour, 0, 23);
     time.tm_min  = av_clip(time.tm_min,  0, 59);
     time.tm_sec  = av_clip(time.tm_sec,  0, 59);
+    msecs        = av_clip(msecs, 0, 999);
 
-    return (int64_t)av_timegm(&time) * 1000000;
+    return (int64_t)av_timegm(&time) * 1000000 + msecs * 1000;
 }
 
 #define SET_STR_METADATA(pb, name, str) do { \
@@ -2578,7 +2592,7 @@ static int64_t mxf_timestamp_to_int64(uint64_t timestamp)
 
 #define SET_TS_METADATA(pb, name, var, str) do { \
     var = avio_rb64(pb); \
-    if ((ret = avpriv_dict_set_timestamp(&s->metadata, name, mxf_timestamp_to_int64(var)) < 0)) \
+    if (var && (ret = avpriv_dict_set_timestamp(&s->metadata, name, mxf_timestamp_to_int64(var))) < 0) \
         return ret; \
 } while (0)
 
diff --git a/libavformat/mxfenc.c b/libavformat/mxfenc.c
index a2f68dd4ed542..8c6db94865c99 100644
--- a/libavformat/mxfenc.c
+++ b/libavformat/mxfenc.c
@@ -142,23 +142,11 @@ enum ULIndex {
     INDEX_DV100_1080_50,
     INDEX_DV100_720_60,
     INDEX_DV100_720_50,
-    INDEX_DNXHD_1080p_10bit_HIGH,
-    INDEX_DNXHD_1080p_8bit_MEDIUM,
-    INDEX_DNXHD_1080p_8bit_HIGH,
-    INDEX_DNXHD_1080i_10bit_HIGH,
-    INDEX_DNXHD_1080i_8bit_MEDIUM,
-    INDEX_DNXHD_1080i_8bit_HIGH,
-    INDEX_DNXHD_720p_10bit,
-    INDEX_DNXHD_720p_8bit_HIGH,
-    INDEX_DNXHD_720p_8bit_MEDIUM,
-    INDEX_DNXHD_720p_8bit_LOW,
-    INDEX_DNXHR_LB,
-    INDEX_DNXHR_SQ,
-    INDEX_DNXHR_HQ,
-    INDEX_DNXHR_HQX,
-    INDEX_DNXHR_444,
+    INDEX_DNXHD,
     INDEX_JPEG2000,
     INDEX_H264,
+    INDEX_S436M,
+    INDEX_PRORES,
 };
 
 static const struct {
@@ -169,9 +157,10 @@ static const struct {
     { AV_CODEC_ID_PCM_S24LE,  INDEX_AES3 },
     { AV_CODEC_ID_PCM_S16LE,  INDEX_AES3 },
     { AV_CODEC_ID_DVVIDEO,    INDEX_DV },
-    { AV_CODEC_ID_DNXHD,      INDEX_DNXHD_1080p_10bit_HIGH },
+    { AV_CODEC_ID_DNXHD,      INDEX_DNXHD },
     { AV_CODEC_ID_JPEG2000,   INDEX_JPEG2000 },
     { AV_CODEC_ID_H264,       INDEX_H264 },
+    { AV_CODEC_ID_PRORES,     INDEX_PRORES },
     { AV_CODEC_ID_NONE }
 };
 
@@ -307,81 +296,11 @@ static const MXFContainerEssenceEntry mxf_essence_container_uls[] = {
       { 0x06,0x0E,0x2B,0x34,0x01,0x02,0x01,0x01,0x0D,0x01,0x03,0x01,0x18,0x01,0x01,0x00 },
       { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x01,0x04,0x01,0x02,0x02,0x02,0x02,0x08,0x00 },
       mxf_write_cdci_desc },
-    // DNxHD 1080p 10bit high
+    // DNxHD
     { { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x01,0x0D,0x01,0x03,0x01,0x02,0x11,0x01,0x00 },
       { 0x06,0x0E,0x2B,0x34,0x01,0x02,0x01,0x01,0x0D,0x01,0x03,0x01,0x15,0x01,0x05,0x00 },
       { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0A,0x04,0x01,0x02,0x02,0x71,0x01,0x00,0x00 },
       mxf_write_cdci_desc },
-    // DNxHD 1080p 8bit medium
-    { { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x01,0x0D,0x01,0x03,0x01,0x02,0x11,0x01,0x00 },
-      { 0x06,0x0E,0x2B,0x34,0x01,0x02,0x01,0x01,0x0D,0x01,0x03,0x01,0x15,0x01,0x05,0x00 },
-      { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0A,0x04,0x01,0x02,0x02,0x71,0x03,0x00,0x00 },
-      mxf_write_cdci_desc },
-    // DNxHD 1080p 8bit high
-    { { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x01,0x0D,0x01,0x03,0x01,0x02,0x11,0x01,0x00 },
-      { 0x06,0x0E,0x2B,0x34,0x01,0x02,0x01,0x01,0x0D,0x01,0x03,0x01,0x15,0x01,0x05,0x00 },
-      { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0A,0x04,0x01,0x02,0x02,0x71,0x04,0x00,0x00 },
-      mxf_write_cdci_desc },
-    // DNxHD 1080i 10bit high
-    { { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x01,0x0D,0x01,0x03,0x01,0x02,0x11,0x01,0x00 },
-      { 0x06,0x0E,0x2B,0x34,0x01,0x02,0x01,0x01,0x0D,0x01,0x03,0x01,0x15,0x01,0x05,0x00 },
-      { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0A,0x04,0x01,0x02,0x02,0x71,0x07,0x00,0x00 },
-      mxf_write_cdci_desc },
-    // DNxHD 1080i 8bit medium
-    { { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x01,0x0D,0x01,0x03,0x01,0x02,0x11,0x01,0x00 },
-      { 0x06,0x0E,0x2B,0x34,0x01,0x02,0x01,0x01,0x0D,0x01,0x03,0x01,0x15,0x01,0x05,0x00 },
-      { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0A,0x04,0x01,0x02,0x02,0x71,0x08,0x00,0x00 },
-      mxf_write_cdci_desc },
-    // DNxHD 1080i 8bit high
-    { { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x01,0x0D,0x01,0x03,0x01,0x02,0x11,0x01,0x00 },
-      { 0x06,0x0E,0x2B,0x34,0x01,0x02,0x01,0x01,0x0D,0x01,0x03,0x01,0x15,0x01,0x05,0x00 },
-      { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0A,0x04,0x01,0x02,0x02,0x71,0x09,0x00,0x00 },
-      mxf_write_cdci_desc },
-    // DNxHD 720p 10bit
-    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x01,0x0d,0x01,0x03,0x01,0x02,0x11,0x01,0x00 },
-      { 0x06,0x0e,0x2b,0x34,0x01,0x02,0x01,0x01,0x0d,0x01,0x03,0x01,0x15,0x01,0x05,0x00 },
-      { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0A,0x04,0x01,0x02,0x02,0x71,0x10,0x00,0x00 },
-      mxf_write_cdci_desc },
-    // DNxHD 720p 8bit high
-    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x01,0x0d,0x01,0x03,0x01,0x02,0x11,0x01,0x00 },
-      { 0x06,0x0e,0x2b,0x34,0x01,0x02,0x01,0x01,0x0d,0x01,0x03,0x01,0x15,0x01,0x05,0x00 },
-      { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0A,0x04,0x01,0x02,0x02,0x71,0x11,0x00,0x00 },
-      mxf_write_cdci_desc },
-    // DNxHD 720p 8bit medium
-    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x01,0x0d,0x01,0x03,0x01,0x02,0x11,0x01,0x00 },
-      { 0x06,0x0e,0x2b,0x34,0x01,0x02,0x01,0x01,0x0d,0x01,0x03,0x01,0x15,0x01,0x05,0x00 },
-      { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0A,0x04,0x01,0x02,0x02,0x71,0x12,0x00,0x00 },
-      mxf_write_cdci_desc },
-    // DNxHD 720p 8bit low
-    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x01,0x0d,0x01,0x03,0x01,0x02,0x11,0x01,0x00 },
-      { 0x06,0x0e,0x2b,0x34,0x01,0x02,0x01,0x01,0x0d,0x01,0x03,0x01,0x15,0x01,0x05,0x00 },
-      { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0A,0x04,0x01,0x02,0x02,0x71,0x13,0x00,0x00 },
-      mxf_write_cdci_desc },
-    // DNxHR LB - CID 1274
-    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x01,0x0d,0x01,0x03,0x01,0x02,0x11,0x01,0x00 },
-      { 0x06,0x0e,0x2b,0x34,0x01,0x02,0x01,0x01,0x0d,0x01,0x03,0x01,0x15,0x01,0x05,0x00 },
-      { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0D,0x04,0x01,0x02,0x02,0x71,0x28,0x00,0x00 },
-      mxf_write_cdci_desc },
-    // DNxHR SQ - CID 1273
-    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x01,0x0d,0x01,0x03,0x01,0x02,0x11,0x01,0x00 },
-      { 0x06,0x0e,0x2b,0x34,0x01,0x02,0x01,0x01,0x0d,0x01,0x03,0x01,0x15,0x01,0x05,0x00 },
-      { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0D,0x04,0x01,0x02,0x02,0x71,0x27,0x00,0x00 },
-      mxf_write_cdci_desc },
-    // DNxHR HQ - CID 1272
-    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x01,0x0d,0x01,0x03,0x01,0x02,0x11,0x01,0x00 },
-      { 0x06,0x0e,0x2b,0x34,0x01,0x02,0x01,0x01,0x0d,0x01,0x03,0x01,0x15,0x01,0x05,0x00 },
-      { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0D,0x04,0x01,0x02,0x02,0x71,0x26,0x00,0x00 },
-      mxf_write_cdci_desc },
-    // DNxHR HQX - CID 1271
-    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x01,0x0d,0x01,0x03,0x01,0x02,0x11,0x01,0x00 },
-      { 0x06,0x0e,0x2b,0x34,0x01,0x02,0x01,0x01,0x0d,0x01,0x03,0x01,0x15,0x01,0x05,0x00 },
-      { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0D,0x04,0x01,0x02,0x02,0x71,0x25,0x00,0x00 },
-      mxf_write_cdci_desc },
-    // DNxHR 444 - CID 1270
-    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x01,0x0d,0x01,0x03,0x01,0x02,0x11,0x01,0x00 },
-      { 0x06,0x0e,0x2b,0x34,0x01,0x02,0x01,0x01,0x0d,0x01,0x03,0x01,0x15,0x01,0x05,0x00 },
-      { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0D,0x04,0x01,0x02,0x02,0x71,0x24,0x00,0x00 },
-      mxf_write_cdci_desc },
     // JPEG2000
     { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x07,0x0d,0x01,0x03,0x01,0x02,0x0c,0x01,0x00 },
       { 0x06,0x0e,0x2b,0x34,0x01,0x02,0x01,0x01,0x0d,0x01,0x03,0x01,0x15,0x01,0x08,0x00 },
@@ -397,6 +316,11 @@ static const MXFContainerEssenceEntry mxf_essence_container_uls[] = {
       { 0x06,0x0E,0x2B,0x34,0x01,0x02,0x01,0x01,0x0D,0x01,0x03,0x01,0x17,0x01,0x02,0x00 },
       { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x01,0x01,0x5C,0x00 },
       mxf_write_s436m_anc_desc },
+    // ProRes
+    { { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0d,0x0d,0x01,0x03,0x01,0x02,0x1c,0x01,0x00 },
+      { 0x06,0x0E,0x2B,0x34,0x01,0x02,0x01,0x01,0x0d,0x01,0x03,0x01,0x15,0x01,0x17,0x00 },
+      { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0d,0x04,0x01,0x02,0x02,0x03,0x06,0x03,0x00 },
+      mxf_write_cdci_desc },
     { { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 },
       { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 },
       { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 },
@@ -1363,7 +1287,7 @@ static int64_t mxf_write_cdci_common(AVFormatContext *s, AVStream *st, const UID
     default:   f1 =  0; f2 =   0; break;
     }
 
-    if (!sc->interlaced) {
+    if (!sc->interlaced && f2) {
         f2  = 0;
         f1 *= 2;
     }
@@ -2028,90 +1952,118 @@ static int mxf_write_partition(AVFormatContext *s, int bodysid,
     return 0;
 }
 
-static int mxf_parse_dnxhd_frame(AVFormatContext *s, AVStream *st,
-AVPacket *pkt)
+static const struct {
+    int profile;
+    UID codec_ul;
+} mxf_prores_codec_uls[] = {
+    { FF_PROFILE_PRORES_PROXY,    { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0d,0x04,0x01,0x02,0x02,0x03,0x06,0x01,0x00 } },
+    { FF_PROFILE_PRORES_LT,       { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0d,0x04,0x01,0x02,0x02,0x03,0x06,0x02,0x00 } },
+    { FF_PROFILE_PRORES_STANDARD, { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0d,0x04,0x01,0x02,0x02,0x03,0x06,0x03,0x00 } },
+    { FF_PROFILE_PRORES_HQ,       { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0d,0x04,0x01,0x02,0x02,0x03,0x06,0x04,0x00 } },
+    { FF_PROFILE_PRORES_4444,     { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0d,0x04,0x01,0x02,0x02,0x03,0x06,0x05,0x00 } },
+    { FF_PROFILE_PRORES_XQ,       { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0d,0x04,0x01,0x02,0x02,0x03,0x06,0x06,0x00 } },
+};
+
+static int mxf_parse_prores_frame(AVFormatContext *s, AVStream *st, AVPacket *pkt)
 {
     MXFContext *mxf = s->priv_data;
     MXFStreamContext *sc = st->priv_data;
+    int i, profile;
+
+    if (mxf->header_written)
+        return 1;
+
+    sc->codec_ul = NULL;
+    profile = st->codecpar->profile;
+    for (i = 0; i < FF_ARRAY_ELEMS(mxf_prores_codec_uls); i++) {
+        if (profile == mxf_prores_codec_uls[i].profile) {
+            sc->codec_ul = &mxf_prores_codec_uls[i].codec_ul;
+            break;
+        }
+    }
+    if (!sc->codec_ul)
+        return 0;
+
+    sc->frame_size = pkt->size;
+
+    return 1;
+}
+
+static const struct {
     int cid;
-    uint8_t* header_cid;
-    int frame_size = 0;
+    UID codec_ul;
+} mxf_dnxhd_codec_uls[] = {
+    { 1235, { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x71,0x01,0x00,0x00 } }, // 1080p 10bit HIGH
+    { 1237, { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x71,0x03,0x00,0x00 } }, // 1080p 8bit MED
+    { 1238, { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x71,0x04,0x00,0x00 } }, // 1080p 8bit HIGH
+    { 1241, { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x71,0x07,0x00,0x00 } }, // 1080i 10bit HIGH
+    { 1242, { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x71,0x08,0x00,0x00 } }, // 1080i 8bit MED
+    { 1243, { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x71,0x09,0x00,0x00 } }, // 1080i 8bit HIGH
+    { 1244, { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x71,0x0a,0x00,0x00 } }, // 1080i 8bit TR
+    { 1250, { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x71,0x10,0x00,0x00 } }, // 720p 10bit
+    { 1251, { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x71,0x11,0x00,0x00 } }, // 720p 8bit HIGH
+    { 1252, { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x71,0x12,0x00,0x00 } }, // 720p 8bit MED
+    { 1253, { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x71,0x13,0x00,0x00 } }, // 720p 8bit LOW
+    { 1256, { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x71,0x16,0x00,0x00 } }, // 1080p 10bit 444
+    { 1258, { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x71,0x18,0x00,0x00 } }, // 720p 8bit TR
+    { 1259, { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x71,0x19,0x00,0x00 } }, // 1080p 8bit TR
+    { 1260, { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x71,0x1a,0x00,0x00 } }, // 1080i 8bit TR MBAFF
+    { 1270, { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x71,0x24,0x00,0x00 } }, // DNXHR 444
+    { 1271, { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x71,0x25,0x00,0x00 } }, // DNXHR HQX
+    { 1272, { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x71,0x26,0x00,0x00 } }, // DNXHR HQ
+    { 1273, { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x71,0x27,0x00,0x00 } }, // DNXHR SQ
+    { 1274, { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0a,0x04,0x01,0x02,0x02,0x71,0x28,0x00,0x00 } }, // DNXHR LB
+};
+
+static int mxf_parse_dnxhd_frame(AVFormatContext *s, AVStream *st, AVPacket *pkt)
+{
+    MXFContext *mxf = s->priv_data;
+    MXFStreamContext *sc = st->priv_data;
+    int i, cid, frame_size = 0;
 
     if (mxf->header_written)
         return 1;
 
     if (pkt->size < 43)
-        return -1;
+        return 0;
 
-    header_cid = pkt->data + 0x28;
-    cid = header_cid[0] << 24 | header_cid[1] << 16 | header_cid[2] << 8 | header_cid[3];
+    sc->codec_ul = NULL;
+    cid = AV_RB32(pkt->data + 0x28);
+    for (i = 0; i < FF_ARRAY_ELEMS(mxf_dnxhd_codec_uls); i++) {
+        if (cid == mxf_dnxhd_codec_uls[i].cid) {
+            sc->codec_ul = &mxf_dnxhd_codec_uls[i].codec_ul;
+            break;
+        }
+    }
+    if (!sc->codec_ul)
+        return 0;
+
+    sc->component_depth = 0;
+    switch (pkt->data[0x21] >> 5) {
+    case 1: sc->component_depth = 8; break;
+    case 2: sc->component_depth = 10; break;
+    case 3: sc->component_depth = 12; break;
+    }
+    if (!sc->component_depth)
+        return 0;
 
     if ((frame_size = avpriv_dnxhd_get_frame_size(cid)) == DNXHD_VARIABLE) {
         frame_size = avpriv_dnxhd_get_hr_frame_size(cid, st->codecpar->width, st->codecpar->height);
     }
-
     if (frame_size < 0)
-        return -1;
+        return 0;
+
     if ((sc->interlaced = avpriv_dnxhd_get_interlaced(cid)) < 0)
-        return AVERROR_INVALIDDATA;
+        return 0;
 
-    switch (cid) {
-    case 1235:
-        sc->index = INDEX_DNXHD_1080p_10bit_HIGH;
-        sc->component_depth = 10;
-        break;
-    case 1237:
-        sc->index = INDEX_DNXHD_1080p_8bit_MEDIUM;
-        break;
-    case 1238:
-        sc->index = INDEX_DNXHD_1080p_8bit_HIGH;
-        break;
-    case 1241:
-        sc->index = INDEX_DNXHD_1080i_10bit_HIGH;
-        sc->component_depth = 10;
-        break;
-    case 1242:
-        sc->index = INDEX_DNXHD_1080i_8bit_MEDIUM;
-        break;
-    case 1243:
-        sc->index = INDEX_DNXHD_1080i_8bit_HIGH;
-        break;
-    case 1250:
-        sc->index = INDEX_DNXHD_720p_10bit;
-        sc->component_depth = 10;
-        break;
-    case 1251:
-        sc->index = INDEX_DNXHD_720p_8bit_HIGH;
-        break;
-    case 1252:
-        sc->index = INDEX_DNXHD_720p_8bit_MEDIUM;
-        break;
-    case 1253:
-        sc->index = INDEX_DNXHD_720p_8bit_LOW;
-        break;
-    case 1274:
-        sc->index = INDEX_DNXHR_LB;
-        break;
-    case 1273:
-        sc->index = INDEX_DNXHR_SQ;
-        break;
-    case 1272:
-        sc->index = INDEX_DNXHR_HQ;
-        break;
-    case 1271:
-        sc->index = INDEX_DNXHR_HQX;
-        sc->component_depth = st->codecpar->bits_per_raw_sample;
-        break;
-    case 1270:
-        sc->index = INDEX_DNXHR_444;
-        sc->component_depth = st->codecpar->bits_per_raw_sample;
-        break;
-    default:
-        return -1;
+    if (cid >= 1270) { // RI raster
+        av_reduce(&sc->aspect_ratio.num, &sc->aspect_ratio.den,
+                  st->codecpar->width, st->codecpar->height,
+                  INT_MAX);
+    } else {
+        sc->aspect_ratio = (AVRational){ 16, 9 };
     }
 
-    sc->codec_ul = &mxf_essence_container_uls[sc->index].codec_ul;
-    sc->aspect_ratio = (AVRational){ 16, 9 };
     sc->frame_size = pkt->size;
 
     return 1;
@@ -2380,8 +2332,9 @@ static int mxf_parse_mpeg2_frame(AVFormatContext *s, AVStream *st,
     return !!sc->codec_ul;
 }
 
-static uint64_t mxf_parse_timestamp(time_t timestamp)
+static uint64_t mxf_parse_timestamp(int64_t timestamp64)
 {
+    time_t timestamp = timestamp64 / 1000000;
     struct tm tmbuf;
     struct tm *time = gmtime_r(&timestamp, &tmbuf);
     if (!time)
@@ -2391,7 +2344,8 @@ static uint64_t mxf_parse_timestamp(time_t timestamp)
            (uint64_t) time->tm_mday       << 32 |
                       time->tm_hour       << 24 |
                       time->tm_min        << 16 |
-                      time->tm_sec        << 8;
+                      time->tm_sec        << 8  |
+                      (timestamp64 % 1000000) / 4000;
 }
 
 static void mxf_gen_umid(AVFormatContext *s)
@@ -2460,6 +2414,11 @@ static int mxf_write_header(AVFormatContext *s)
             sc->v_chroma_sub_sample = 2;
             sc->color_siting = 0xFF;
 
+            if (st->codecpar->sample_aspect_ratio.num && st->codecpar->sample_aspect_ratio.den) {
+                sc->aspect_ratio = av_mul_q(st->codecpar->sample_aspect_ratio,
+                                            av_make_q(st->codecpar->width, st->codecpar->height));
+            }
+
             if (pix_desc) {
                 sc->component_depth     = pix_desc->comp[0].depth;
                 sc->h_chroma_sub_sample = 1 << pix_desc->log2_chroma_w;
@@ -2575,7 +2534,7 @@ static int mxf_write_header(AVFormatContext *s)
         } else if (st->codecpar->codec_type == AVMEDIA_TYPE_DATA) {
             AVDictionaryEntry *e = av_dict_get(st->metadata, "data_type", NULL, 0);
             if (e && !strcmp(e->value, "vbi_vanc_smpte_436M")) {
-                sc->index = 38;
+                sc->index = INDEX_S436M;
             } else {
                 av_log(s, AV_LOG_ERROR, "track %d: unsupported data type\n", i);
                 return -1;
@@ -2623,7 +2582,7 @@ static int mxf_write_header(AVFormatContext *s)
             sc->order = AV_RB32(sc->track_essence_element_key+12);
     }
 
-    if (ff_parse_creation_time_metadata(s, &timestamp, 1) > 0)
+    if (ff_parse_creation_time_metadata(s, &timestamp, 0) > 0)
         mxf->timestamp = mxf_parse_timestamp(timestamp);
     mxf->duration = -1;
 
@@ -2828,6 +2787,11 @@ static int mxf_write_packet(AVFormatContext *s, AVPacket *pkt)
             av_log(s, AV_LOG_ERROR, "could not get dnxhd profile\n");
             return -1;
         }
+    } else if (st->codecpar->codec_id == AV_CODEC_ID_PRORES) {
+        if (!mxf_parse_prores_frame(s, st, pkt)) {
+            av_log(s, AV_LOG_ERROR, "could not get prores profile\n");
+            return -1;
+        }
     } else if (st->codecpar->codec_id == AV_CODEC_ID_DVVIDEO) {
         if (!mxf_parse_dv_frame(s, st, pkt)) {
             av_log(s, AV_LOG_ERROR, "could not get dv profile\n");
@@ -3078,7 +3042,7 @@ static int mxf_interleave(AVFormatContext *s, AVPacket *out, AVPacket *pkt, int
 }
 
 #define MXF_COMMON_OPTIONS \
-    { "signal_standard", "Force/set Sigal Standard",\
+    { "signal_standard", "Force/set Signal Standard",\
       offsetof(MXFContext, signal_standard), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 7, AV_OPT_FLAG_ENCODING_PARAM, "signal_standard"},\
     { "bt601", "ITU-R BT.601 and BT.656, also SMPTE 125M (525 and 625 line interlaced)",\
       0, AV_OPT_TYPE_CONST, {.i64 = 1}, -1, 7, AV_OPT_FLAG_ENCODING_PARAM, "signal_standard"},\
@@ -3131,6 +3095,8 @@ static const AVOption opatom_options[] = {
     { "mxf_audio_edit_rate", "Audio edit rate for timecode",
         offsetof(MXFContext, audio_edit_rate), AV_OPT_TYPE_RATIONAL, {.dbl=25}, 0, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
     MXF_COMMON_OPTIONS
+    { "store_user_comments", "",
+      offsetof(MXFContext, store_user_comments), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, AV_OPT_FLAG_ENCODING_PARAM},
     { NULL },
 };
 
diff --git a/libavformat/nut.c b/libavformat/nut.c
index e65f42438bfad..4fbbcb1d2663c 100644
--- a/libavformat/nut.c
+++ b/libavformat/nut.c
@@ -143,6 +143,11 @@ const AVCodecTag ff_nut_video_tags[] = {
     { AV_CODEC_ID_RAWVIDEO,         MKTAG('Y', '4',   0,  10) },
     { AV_CODEC_ID_RAWVIDEO,         MKTAG(10,    0, '4', 'Y') },
 
+    { AV_CODEC_ID_RAWVIDEO,         MKTAG('Y', '4',   0,  12) },
+    { AV_CODEC_ID_RAWVIDEO,         MKTAG(12,    0, '4', 'Y') },
+    { AV_CODEC_ID_RAWVIDEO,         MKTAG('Y', '4',  10,  12) },
+    { AV_CODEC_ID_RAWVIDEO,         MKTAG(12,   10, '4', 'Y') },
+
     { AV_CODEC_ID_RAWVIDEO,         MKTAG('Y', '1',   0,  12) },
     { AV_CODEC_ID_RAWVIDEO,         MKTAG(12,    0, '1', 'Y') },
     { AV_CODEC_ID_RAWVIDEO,         MKTAG('Y', '1',   0,  16) },
diff --git a/libavformat/nutdec.c b/libavformat/nutdec.c
index 27440c88d46de..056ef59d009c3 100644
--- a/libavformat/nutdec.c
+++ b/libavformat/nutdec.c
@@ -582,7 +582,7 @@ static int decode_info_header(NUTContext *nut)
             if (stream_id_plus1 && !strcmp(name, "r_frame_rate")) {
                 sscanf(str_value, "%d/%d", &st->r_frame_rate.num, &st->r_frame_rate.den);
                 if (st->r_frame_rate.num >= 1000LL*st->r_frame_rate.den ||
-                    st->r_frame_rate.num < 0 || st->r_frame_rate.num < 0)
+                    st->r_frame_rate.num < 0 || st->r_frame_rate.den < 0)
                     st->r_frame_rate.num = st->r_frame_rate.den = 0;
                 continue;
             }
@@ -1016,9 +1016,9 @@ static int decode_frame_header(NUTContext *nut, int64_t *pts, int *stream_id,
     }
     stc = &nut->stream[*stream_id];
     if (flags & FLAG_CODED_PTS) {
-        int coded_pts = ffio_read_varlen(bc);
+        int64_t coded_pts = ffio_read_varlen(bc);
         // FIXME check last_pts validity?
-        if (coded_pts < (1 << stc->msb_pts_shift)) {
+        if (coded_pts < (1LL << stc->msb_pts_shift)) {
             *pts = ff_lsb2full(stc, coded_pts);
         } else
             *pts = coded_pts - (1LL << stc->msb_pts_shift);
diff --git a/libavformat/nutenc.c b/libavformat/nutenc.c
index a92ff55c01373..e9a3bb49db01b 100644
--- a/libavformat/nutenc.c
+++ b/libavformat/nutenc.c
@@ -1172,7 +1172,7 @@ static int nut_write_trailer(AVFormatContext *s)
 
     ret = avio_open_dyn_buf(&dyn_bc);
     if (ret >= 0 && nut->sp_count) {
-        av_assert1(nut->write_index);
+        av_assert1(nut->write_index); // sp_count should be 0 if no index is going to be written
         write_index(nut, dyn_bc);
         put_packet(nut, bc, dyn_bc, 1, INDEX_STARTCODE);
     }
diff --git a/libavformat/oggenc.c b/libavformat/oggenc.c
index 10c4eda06212c..06021c4f4ba83 100644
--- a/libavformat/oggenc.c
+++ b/libavformat/oggenc.c
@@ -291,7 +291,8 @@ static int ogg_buffer_data(AVFormatContext *s, AVStream *st,
 }
 
 static uint8_t *ogg_write_vorbiscomment(int64_t offset, int bitexact,
-                                        int *header_len, AVDictionary **m, int framing_bit)
+                                        int *header_len, AVDictionary **m, int framing_bit,
+                                        AVChapter **chapters, unsigned int nb_chapters)
 {
     const char *vendor = bitexact ? "ffmpeg" : LIBAVFORMAT_IDENT;
     int64_t size;
@@ -299,7 +300,7 @@ static uint8_t *ogg_write_vorbiscomment(int64_t offset, int bitexact,
 
     ff_metadata_conv(m, ff_vorbiscomment_metadata_conv, NULL);
 
-    size = offset + ff_vorbiscomment_length(*m, vendor) + framing_bit;
+    size = offset + ff_vorbiscomment_length(*m, vendor, chapters, nb_chapters) + framing_bit;
     if (size > INT_MAX)
         return NULL;
     p = av_mallocz(size);
@@ -308,7 +309,7 @@ static uint8_t *ogg_write_vorbiscomment(int64_t offset, int bitexact,
     p0 = p;
 
     p += offset;
-    ff_vorbiscomment_write(&p, m, vendor);
+    ff_vorbiscomment_write(&p, m, vendor, chapters, nb_chapters);
     if (framing_bit)
         bytestream_put_byte(&p, 1);
 
@@ -342,7 +343,7 @@ static int ogg_build_flac_headers(AVCodecParameters *par,
     bytestream_put_buffer(&p, par->extradata, FLAC_STREAMINFO_SIZE);
 
     // second packet: VorbisComment
-    p = ogg_write_vorbiscomment(4, bitexact, &oggstream->header_len[1], m, 0);
+    p = ogg_write_vorbiscomment(4, bitexact, &oggstream->header_len[1], m, 0, NULL, 0);
     if (!p)
         return AVERROR(ENOMEM);
     oggstream->header[1] = p;
@@ -373,7 +374,7 @@ static int ogg_build_speex_headers(AVCodecParameters *par,
     AV_WL32(&oggstream->header[0][68], 0);  // set extra_headers to 0
 
     // second packet: VorbisComment
-    p = ogg_write_vorbiscomment(0, bitexact, &oggstream->header_len[1], m, 0);
+    p = ogg_write_vorbiscomment(0, bitexact, &oggstream->header_len[1], m, 0, NULL, 0);
     if (!p)
         return AVERROR(ENOMEM);
     oggstream->header[1] = p;
@@ -385,7 +386,8 @@ static int ogg_build_speex_headers(AVCodecParameters *par,
 
 static int ogg_build_opus_headers(AVCodecParameters *par,
                                   OGGStreamContext *oggstream, int bitexact,
-                                  AVDictionary **m)
+                                  AVDictionary **m, AVChapter **chapters,
+                                  unsigned int nb_chapters)
 {
     uint8_t *p;
 
@@ -401,7 +403,7 @@ static int ogg_build_opus_headers(AVCodecParameters *par,
     bytestream_put_buffer(&p, par->extradata, par->extradata_size);
 
     /* second packet: VorbisComment */
-    p = ogg_write_vorbiscomment(8, bitexact, &oggstream->header_len[1], m, 0);
+    p = ogg_write_vorbiscomment(8, bitexact, &oggstream->header_len[1], m, 0, chapters, nb_chapters);
     if (!p)
         return AVERROR(ENOMEM);
     oggstream->header[1] = p;
@@ -446,7 +448,7 @@ static int ogg_build_vp8_headers(AVFormatContext *s, AVStream *st,
 
     /* optional second packet: VorbisComment */
     if (av_dict_get(st->metadata, "", NULL, AV_DICT_IGNORE_SUFFIX)) {
-        p = ogg_write_vorbiscomment(7, bitexact, &oggstream->header_len[1], &st->metadata, 0);
+        p = ogg_write_vorbiscomment(7, bitexact, &oggstream->header_len[1], &st->metadata, 0, NULL, 0);
         if (!p)
             return AVERROR(ENOMEM);
         oggstream->header[1] = p;
@@ -560,7 +562,7 @@ static int ogg_init(AVFormatContext *s)
         } else if (st->codecpar->codec_id == AV_CODEC_ID_OPUS) {
             int err = ogg_build_opus_headers(st->codecpar, oggstream,
                                              s->flags & AVFMT_FLAG_BITEXACT,
-                                             &st->metadata);
+                                             &st->metadata, s->chapters, s->nb_chapters);
             if (err) {
                 av_log(s, AV_LOG_ERROR, "Error writing Opus headers\n");
                 av_freep(&st->priv_data);
@@ -590,7 +592,7 @@ static int ogg_init(AVFormatContext *s)
 
             p = ogg_write_vorbiscomment(7, s->flags & AVFMT_FLAG_BITEXACT,
                                         &oggstream->header_len[1], &st->metadata,
-                                        framing_bit);
+                                        framing_bit, NULL, 0);
             oggstream->header[1] = p;
             if (!p)
                 return AVERROR(ENOMEM);
diff --git a/libavformat/os_support.h b/libavformat/os_support.h
index 7a56dc9a7c1e2..5e6b32d2dc1fc 100644
--- a/libavformat/os_support.h
+++ b/libavformat/os_support.h
@@ -76,17 +76,7 @@ static inline int is_dos_path(const char *path)
     return 0;
 }
 
-#if defined(__OS2__)
-#define SHUT_RD 0
-#define SHUT_WR 1
-#define SHUT_RDWR 2
-#endif
-
 #if defined(_WIN32)
-#define SHUT_RD SD_RECEIVE
-#define SHUT_WR SD_SEND
-#define SHUT_RDWR SD_BOTH
-
 #ifndef S_IRUSR
 #define S_IRUSR S_IREAD
 #endif
@@ -96,6 +86,19 @@ static inline int is_dos_path(const char *path)
 #endif
 
 #if CONFIG_NETWORK
+#if defined(_WIN32)
+#define SHUT_RD SD_RECEIVE
+#define SHUT_WR SD_SEND
+#define SHUT_RDWR SD_BOTH
+#else
+#include <sys/socket.h>
+#if !defined(SHUT_RD) /* OS/2, DJGPP */
+#define SHUT_RD 0
+#define SHUT_WR 1
+#define SHUT_RDWR 2
+#endif
+#endif
+
 #if !HAVE_SOCKLEN_T
 typedef int socklen_t;
 #endif
diff --git a/libavformat/pcmdec.c b/libavformat/pcmdec.c
index d0ceea6fa9f16..9895af03a43e4 100644
--- a/libavformat/pcmdec.c
+++ b/libavformat/pcmdec.c
@@ -50,9 +50,9 @@ static int pcm_read_header(AVFormatContext *s)
 
     av_opt_get(s->pb, "mime_type", AV_OPT_SEARCH_CHILDREN, &mime_type);
     if (mime_type && s->iformat->mime_type) {
-        int rate = 0, channels = 0;
+        int rate = 0, channels = 0, little_endian = 0;
         size_t len = strlen(s->iformat->mime_type);
-        if (!strncmp(s->iformat->mime_type, mime_type, len)) {
+        if (!av_strncasecmp(s->iformat->mime_type, mime_type, len)) { /* audio/L16 */
             uint8_t *options = mime_type + len;
             len = strlen(mime_type);
             while (options < mime_type + len) {
@@ -63,6 +63,12 @@ static int pcm_read_header(AVFormatContext *s)
                     sscanf(options, " rate=%d",     &rate);
                 if (!channels)
                     sscanf(options, " channels=%d", &channels);
+                if (!little_endian) {
+                     char val[14]; /* sizeof("little-endian") == 14 */
+                     if (sscanf(options, " endianness=%13s", val) == 1) {
+                         little_endian = strcmp(val, "little-endian") == 0;
+                     }
+                }
             }
             if (rate <= 0) {
                 av_log(s, AV_LOG_ERROR,
@@ -74,6 +80,8 @@ static int pcm_read_header(AVFormatContext *s)
             st->codecpar->sample_rate = rate;
             if (channels > 0)
                 st->codecpar->channels = channels;
+            if (little_endian)
+                st->codecpar->codec_id = AV_CODEC_ID_PCM_S16LE;
         }
     }
     av_freep(&mime_type);
@@ -142,10 +150,10 @@ PCMDEF(s24le, "PCM signed 24-bit little-endian",
        NULL, AV_CODEC_ID_PCM_S24LE)
 
 PCMDEF(s16be, "PCM signed 16-bit big-endian",
-       AV_NE("sw", NULL), AV_CODEC_ID_PCM_S16BE)
+       AV_NE("sw", NULL), AV_CODEC_ID_PCM_S16BE, .mime_type = "audio/L16")
 
 PCMDEF(s16le, "PCM signed 16-bit little-endian",
-       AV_NE(NULL, "sw"), AV_CODEC_ID_PCM_S16LE, .mime_type = "audio/L16",)
+       AV_NE(NULL, "sw"), AV_CODEC_ID_PCM_S16LE)
 
 PCMDEF(s8, "PCM signed 8-bit",
        "sb", AV_CODEC_ID_PCM_S8)
@@ -177,6 +185,9 @@ PCMDEF(alaw, "PCM A-law",
 PCMDEF(mulaw, "PCM mu-law",
        "ul", AV_CODEC_ID_PCM_MULAW)
 
+PCMDEF(vidc, "PCM Archimedes VIDC",
+       NULL, AV_CODEC_ID_PCM_VIDC)
+
 static const AVOption sln_options[] = {
     { "sample_rate", "", offsetof(PCMAudioDemuxerContext, sample_rate), AV_OPT_TYPE_INT, {.i64 = 8000}, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
     { "channels",    "", offsetof(PCMAudioDemuxerContext, channels),    AV_OPT_TYPE_INT, {.i64 = 1}, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
diff --git a/libavformat/pcmenc.c b/libavformat/pcmenc.c
index 3e4f30805722b..1760b3bef7ae0 100644
--- a/libavformat/pcmenc.c
+++ b/libavformat/pcmenc.c
@@ -92,3 +92,6 @@ PCMDEF(alaw, "PCM A-law",
 
 PCMDEF(mulaw, "PCM mu-law",
        "ul", AV_CODEC_ID_PCM_MULAW)
+
+PCMDEF(vidc, "PCM Archimedes VIDC",
+       NULL, AV_CODEC_ID_PCM_VIDC)
diff --git a/libavformat/rawdec.c b/libavformat/rawdec.c
index b38a4b5e5d4ba..6249352d28f78 100644
--- a/libavformat/rawdec.c
+++ b/libavformat/rawdec.c
@@ -91,6 +91,17 @@ int ff_raw_video_read_header(AVFormatContext *s)
     return ret;
 }
 
+int ff_raw_subtitle_read_header(AVFormatContext *s)
+{
+    AVStream *st = avformat_new_stream(s, NULL);
+    if (!st)
+        return AVERROR(ENOMEM);
+    st->codecpar->codec_type = AVMEDIA_TYPE_SUBTITLE;
+    st->codecpar->codec_id = s->iformat->raw_codec_id;
+    st->start_time = 0;
+    return 0;
+}
+
 int ff_raw_data_read_header(AVFormatContext *s)
 {
     AVStream *st = avformat_new_stream(s, NULL);
diff --git a/libavformat/rawdec.h b/libavformat/rawdec.h
index a464bbb4328b7..3eb416b8ee343 100644
--- a/libavformat/rawdec.h
+++ b/libavformat/rawdec.h
@@ -41,6 +41,8 @@ int ff_raw_audio_read_header(AVFormatContext *s);
 
 int ff_raw_video_read_header(AVFormatContext *s);
 
+int ff_raw_subtitle_read_header(AVFormatContext *s);
+
 int ff_raw_data_read_header(AVFormatContext *s);
 
 #define FF_RAWVIDEO_DEMUXER_CLASS(name)\
@@ -83,7 +85,7 @@ AVInputFormat ff_ ## shortname ## _demuxer = {\
     .name           = #shortname,\
     .long_name      = NULL_IF_CONFIG_SMALL(longname),\
     .read_probe     = probe,\
-    .read_header    = ff_raw_data_read_header,\
+    .read_header    = ff_raw_subtitle_read_header,\
     .read_packet    = ff_raw_read_partial_packet,\
     .extensions     = ext,\
     .flags          = flag,\
diff --git a/libavformat/riff.c b/libavformat/riff.c
index 3907e1a9f383e..8f0fd99e22dbe 100644
--- a/libavformat/riff.c
+++ b/libavformat/riff.c
@@ -475,6 +475,8 @@ const AVCodecTag ff_codec_bmp_tags[] = {
     { AV_CODEC_ID_MWSC,         MKTAG('M', 'W', 'S', 'C') },
     { AV_CODEC_ID_WCMV,         MKTAG('W', 'C', 'M', 'V') },
     { AV_CODEC_ID_RASC,         MKTAG('R', 'A', 'S', 'C') },
+    { AV_CODEC_ID_HYMT,         MKTAG('H', 'Y', 'M', 'T') },
+    { AV_CODEC_ID_ARBC,         MKTAG('A', 'R', 'B', 'C') },
     { AV_CODEC_ID_NONE,         0 }
 };
 
diff --git a/libavformat/rpl.c b/libavformat/rpl.c
index d373600478600..6b45b35c30872 100644
--- a/libavformat/rpl.c
+++ b/libavformat/rpl.c
@@ -119,6 +119,8 @@ static int rpl_read_header(AVFormatContext *s)
     AVStream *vst = NULL, *ast = NULL;
     int total_audio_size;
     int error = 0;
+    const char *endptr;
+    char audio_type[RPL_LINE_LENGTH];
 
     uint32_t i;
 
@@ -188,7 +190,9 @@ static int rpl_read_header(AVFormatContext *s)
         ast->codecpar->codec_tag       = audio_format;
         ast->codecpar->sample_rate     = read_line_and_int(pb, &error);  // audio bitrate
         ast->codecpar->channels        = read_line_and_int(pb, &error);  // number of audio channels
-        ast->codecpar->bits_per_coded_sample = read_line_and_int(pb, &error);  // audio bits per sample
+        error |= read_line(pb, line, sizeof(line));
+        ast->codecpar->bits_per_coded_sample = read_int(line, &endptr, &error);  // audio bits per sample
+        strcpy(audio_type, endptr);
         // At least one sample uses 0 for ADPCM, which is really 4 bits
         // per sample.
         if (ast->codecpar->bits_per_coded_sample == 0)
@@ -205,6 +209,17 @@ static int rpl_read_header(AVFormatContext *s)
                     // 16-bit audio is always signed
                     ast->codecpar->codec_id = AV_CODEC_ID_PCM_S16LE;
                     break;
+                } else if (ast->codecpar->bits_per_coded_sample == 8) {
+                    if(strstr(audio_type, "unsigned") != NULL) {
+                        ast->codecpar->codec_id = AV_CODEC_ID_PCM_U8;
+                        break;
+                    } else if(strstr(audio_type, "linear") != NULL) {
+                        ast->codecpar->codec_id = AV_CODEC_ID_PCM_S8;
+                        break;
+                    } else {
+                        ast->codecpar->codec_id = AV_CODEC_ID_PCM_VIDC;
+                        break;
+                    }
                 }
                 // There are some other formats listed as legal per the spec;
                 // samples needed.
diff --git a/libavformat/rtpdec.h b/libavformat/rtpdec.h
index 5a47d6f79d5fb..9144edbe8b22a 100644
--- a/libavformat/rtpdec.h
+++ b/libavformat/rtpdec.h
@@ -154,7 +154,6 @@ struct RTPDemuxContext {
     uint16_t seq;
     uint32_t timestamp;
     uint32_t base_timestamp;
-    uint32_t cur_timestamp;
     int64_t  unwrapped_timestamp;
     int64_t  range_start_offset;
     int max_payload_size;
diff --git a/libavformat/rtpenc_chain.c b/libavformat/rtpenc_chain.c
index d3c1bc96dce94..e7a4dffabac41 100644
--- a/libavformat/rtpenc_chain.c
+++ b/libavformat/rtpenc_chain.c
@@ -59,6 +59,7 @@ int ff_rtp_chain_mux_open(AVFormatContext **out, AVFormatContext *s,
     /* Copy other stream parameters. */
     rtpctx->streams[0]->sample_aspect_ratio = st->sample_aspect_ratio;
     rtpctx->flags |= s->flags & AVFMT_FLAG_BITEXACT;
+    rtpctx->strict_std_compliance = s->strict_std_compliance;
 
     /* Get the payload type from the codec */
     if (st->id < RTP_PT_PRIVATE)
diff --git a/libavformat/rtpproto.c b/libavformat/rtpproto.c
index e706300a77f14..1f0a82ac7e545 100644
--- a/libavformat/rtpproto.c
+++ b/libavformat/rtpproto.c
@@ -40,7 +40,7 @@
 #include "os_support.h"
 #include <fcntl.h>
 #if HAVE_POLL_H
-#include <sys/poll.h>
+#include <poll.h>
 #endif
 
 typedef struct RTPContext {
diff --git a/libavformat/rtsp.c b/libavformat/rtsp.c
index ceb770a3a4901..975637cf54c0c 100644
--- a/libavformat/rtsp.c
+++ b/libavformat/rtsp.c
@@ -454,7 +454,10 @@ static void sdp_parse_line(AVFormatContext *s, SDPParseState *s1,
         } else if (!strcmp(st_type, "text")) {
             codec_type = AVMEDIA_TYPE_SUBTITLE;
         }
-        if (codec_type == AVMEDIA_TYPE_UNKNOWN || !(rt->media_type_mask & (1 << codec_type))) {
+        if (codec_type == AVMEDIA_TYPE_UNKNOWN ||
+            !(rt->media_type_mask & (1 << codec_type)) ||
+            rt->nb_rtsp_streams >= s->max_streams
+        ) {
             s1->skip_media = 1;
             return;
         }
@@ -1663,7 +1666,7 @@ int ff_rtsp_connect(AVFormatContext *s)
     char tcpname[1024], cmd[2048], auth[128];
     const char *lower_rtsp_proto = "tcp";
     int port, err, tcp_fd;
-    RTSPMessageHeader reply1 = {0}, *reply = &reply1;
+    RTSPMessageHeader reply1, *reply = &reply1;
     int lower_transport_mask = 0;
     int default_port = RTSP_DEFAULT_PORT;
     char real_challenge[64] = "";
@@ -1692,6 +1695,7 @@ int ff_rtsp_connect(AVFormatContext *s)
     rt->lower_transport_mask &= (1 << RTSP_LOWER_TRANSPORT_NB) - 1;
 
 redirect:
+    memset(&reply1, 0, sizeof(reply1));
     /* extract hostname and port */
     av_url_split(proto, sizeof(proto), auth, sizeof(auth),
                  host, sizeof(host), &port, path, sizeof(path), s->url);
diff --git a/libavformat/subviewerdec.c b/libavformat/subviewerdec.c
index af084f48560dd..61eb80af8787b 100644
--- a/libavformat/subviewerdec.c
+++ b/libavformat/subviewerdec.c
@@ -27,6 +27,7 @@
 #include "avformat.h"
 #include "internal.h"
 #include "subtitles.h"
+#include "avio_internal.h"
 #include "libavcodec/internal.h"
 #include "libavutil/avstring.h"
 #include "libavutil/bprint.h"
@@ -78,6 +79,11 @@ static int subviewer_read_header(AVFormatContext *s)
 
     if (!st)
         return AVERROR(ENOMEM);
+    res = ffio_ensure_seekback(s->pb, 3);
+    if (res < 0)
+        return res;
+    if (avio_rb24(s->pb) != 0xefbbbf)
+        avio_seek(s->pb, -3, SEEK_CUR);
     avpriv_set_pts_info(st, 64, 1, 100);
     st->codecpar->codec_type = AVMEDIA_TYPE_SUBTITLE;
     st->codecpar->codec_id   = AV_CODEC_ID_SUBVIEWER;
diff --git a/libavformat/supenc.c b/libavformat/supenc.c
index f5f6b58c87f4a..643dda5ffcd13 100644
--- a/libavformat/supenc.c
+++ b/libavformat/supenc.c
@@ -32,10 +32,10 @@ static int sup_write_packet(AVFormatContext *s, AVPacket *pkt)
     uint32_t pts = 0, dts = 0;
 
     if (pkt->pts != AV_NOPTS_VALUE) {
-        pts = (uint32_t)pkt->pts;
+        pts = pkt->pts;
     }
     if (pkt->dts != AV_NOPTS_VALUE) {
-        dts = (uint32_t)pkt->dts;
+        dts = pkt->dts;
     }
 
     /*
@@ -46,8 +46,8 @@ static int sup_write_packet(AVFormatContext *s, AVPacket *pkt)
         size_t len = AV_RB16(data + 1) + 3;
 
         if (len > size) {
-            av_log(s, AV_LOG_ERROR, "Not enough data, skipping %d bytes\n",
-                     (int)size);
+            av_log(s, AV_LOG_ERROR, "Not enough data, skipping %"SIZE_SPECIFIER" bytes\n",
+                   size);
             return AVERROR_INVALIDDATA;
         }
 
@@ -63,8 +63,8 @@ static int sup_write_packet(AVFormatContext *s, AVPacket *pkt)
     }
 
     if (size > 0) {
-        av_log(s, AV_LOG_ERROR, "Skipping %d bytes after last segment in frame\n",
-                 (int)size);
+        av_log(s, AV_LOG_ERROR, "Skipping %"SIZE_SPECIFIER" bytes after last segment in frame\n",
+               size);
         return AVERROR_INVALIDDATA;
     }
 
diff --git a/libavformat/tee.c b/libavformat/tee.c
index ef3b113a47180..89a4ceb2809fe 100644
--- a/libavformat/tee.c
+++ b/libavformat/tee.c
@@ -236,6 +236,7 @@ static int open_slave(AVFormatContext *avf, char *slave, TeeSlave *tee_slave)
     avf2->io_close = avf->io_close;
     avf2->interrupt_callback = avf->interrupt_callback;
     avf2->flags = avf->flags;
+    avf2->strict_std_compliance = avf->strict_std_compliance;
 
     tee_slave->stream_map = av_calloc(avf->nb_streams, sizeof(*tee_slave->stream_map));
     if (!tee_slave->stream_map) {
diff --git a/libavformat/utils.c b/libavformat/utils.c
index 93e588ee1eaf7..9dc57063d299e 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -646,7 +646,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
 
     if (id3v2_extra_meta) {
         if (!strcmp(s->iformat->name, "mp3") || !strcmp(s->iformat->name, "aac") ||
-            !strcmp(s->iformat->name, "tta")) {
+            !strcmp(s->iformat->name, "tta") || !strcmp(s->iformat->name, "wav")) {
             if ((ret = ff_id3v2_parse_apic(s, &id3v2_extra_meta)) < 0)
                 goto fail;
             if ((ret = ff_id3v2_parse_chapters(s, &id3v2_extra_meta)) < 0)
@@ -5097,244 +5097,205 @@ AVRational av_guess_frame_rate(AVFormatContext *format, AVStream *st, AVFrame *f
     return fr;
 }
 
-int avformat_match_stream_specifier(AVFormatContext *s, AVStream *st,
-                                    const char *spec)
-{
-    if (*spec <= '9' && *spec >= '0') /* opt:index */
-        return strtol(spec, NULL, 0) == st->index;
-    else if (*spec == 'v' || *spec == 'a' || *spec == 's' || *spec == 'd' ||
-             *spec == 't' || *spec == 'V') { /* opt:[vasdtV] */
-        enum AVMediaType type;
-        int nopic = 0;
-
-        switch (*spec++) {
-        case 'v': type = AVMEDIA_TYPE_VIDEO;      break;
-        case 'a': type = AVMEDIA_TYPE_AUDIO;      break;
-        case 's': type = AVMEDIA_TYPE_SUBTITLE;   break;
-        case 'd': type = AVMEDIA_TYPE_DATA;       break;
-        case 't': type = AVMEDIA_TYPE_ATTACHMENT; break;
-        case 'V': type = AVMEDIA_TYPE_VIDEO; nopic = 1; break;
-        default:  av_assert0(0);
-        }
-#if FF_API_LAVF_AVCTX
-FF_DISABLE_DEPRECATION_WARNINGS
-        if (type != st->codecpar->codec_type
-           && (st->codecpar->codec_type != AVMEDIA_TYPE_UNKNOWN || st->codec->codec_type != type))
-            return 0;
-FF_ENABLE_DEPRECATION_WARNINGS
-#else
-        if (type != st->codecpar->codec_type)
-            return 0;
-#endif
-        if (nopic && (st->disposition & AV_DISPOSITION_ATTACHED_PIC))
-            return 0;
-        if (*spec++ == ':') { /* possibly followed by :index */
-            int i, index = strtol(spec, NULL, 0);
-            for (i = 0; i < s->nb_streams; i++) {
-#if FF_API_LAVF_AVCTX
-FF_DISABLE_DEPRECATION_WARNINGS
-                if ((s->streams[i]->codecpar->codec_type == type
-                      || s->streams[i]->codec->codec_type == type
-                    ) &&
-                    !(nopic && (st->disposition & AV_DISPOSITION_ATTACHED_PIC)) &&
-                    index-- == 0)
-                    return i == st->index;
-FF_ENABLE_DEPRECATION_WARNINGS
-#else
-                if ((s->streams[i]->codecpar->codec_type == type) &&
-                    !(nopic && (st->disposition & AV_DISPOSITION_ATTACHED_PIC)) &&
-                    index-- == 0)
-                    return i == st->index;
-#endif
+/**
+ * Matches a stream specifier (but ignores requested index).
+ *
+ * @param indexptr set to point to the requested stream index if there is one
+ *
+ * @return <0 on error
+ *         0  if st is NOT a matching stream
+ *         >0 if st is a matching stream
+ */
+static int match_stream_specifier(AVFormatContext *s, AVStream *st,
+                                  const char *spec, const char **indexptr)
+{
+    int match = 1;                      /* Stores if the specifier matches so far. */
+    while (*spec) {
+        if (*spec <= '9' && *spec >= '0') { /* opt:index */
+            if (indexptr)
+                *indexptr = spec;
+            return match;
+        } else if (*spec == 'v' || *spec == 'a' || *spec == 's' || *spec == 'd' ||
+                   *spec == 't' || *spec == 'V') { /* opt:[vasdtV] */
+            enum AVMediaType type;
+            int nopic = 0;
+
+            switch (*spec++) {
+            case 'v': type = AVMEDIA_TYPE_VIDEO;      break;
+            case 'a': type = AVMEDIA_TYPE_AUDIO;      break;
+            case 's': type = AVMEDIA_TYPE_SUBTITLE;   break;
+            case 'd': type = AVMEDIA_TYPE_DATA;       break;
+            case 't': type = AVMEDIA_TYPE_ATTACHMENT; break;
+            case 'V': type = AVMEDIA_TYPE_VIDEO; nopic = 1; break;
+            default:  av_assert0(0);
             }
-            return 0;
-        }
-        return 1;
-    } else if (*spec == 'p' && *(spec + 1) == ':') {
-        int prog_id, i, j;
-        char *endptr;
-        spec += 2;
-        prog_id = strtol(spec, &endptr, 0);
-        for (i = 0; i < s->nb_programs; i++) {
-            if (s->programs[i]->id != prog_id)
-                continue;
+            if (*spec && *spec++ != ':')         /* If we are not at the end, then another specifier must follow. */
+                return AVERROR(EINVAL);
 
-            if (*endptr++ == ':') {  // p:<id>:....
-                if ( *endptr == 'a' || *endptr == 'v' ||
-                     *endptr == 's' || *endptr == 'd') {  // p:<id>:<st_type>[:<index>]
-                    enum AVMediaType type;
-
-                    switch (*endptr++) {
-                    case 'v': type = AVMEDIA_TYPE_VIDEO;      break;
-                    case 'a': type = AVMEDIA_TYPE_AUDIO;      break;
-                    case 's': type = AVMEDIA_TYPE_SUBTITLE;   break;
-                    case 'd': type = AVMEDIA_TYPE_DATA;       break;
-                    default:  av_assert0(0);
-                    }
-                    if (*endptr++ == ':') {  // p:<id>:<st_type>:<index>
-                        int stream_idx = strtol(endptr, NULL, 0), type_counter = 0;
-                        for (j = 0; j < s->programs[i]->nb_stream_indexes; j++) {
-                            int stream_index = s->programs[i]->stream_index[j];
-                            if (st->index == s->programs[i]->stream_index[j]) {
-#if FF_API_LAVF_AVCTX
-FF_DISABLE_DEPRECATION_WARNINGS
-                                return type_counter == stream_idx &&
-                                       (type == st->codecpar->codec_type ||
-                                        type == st->codec->codec_type);
-FF_ENABLE_DEPRECATION_WARNINGS
-#else
-                                return type_counter == stream_idx &&
-                                       type == st->codecpar->codec_type;
-#endif
-                             }
 #if FF_API_LAVF_AVCTX
 FF_DISABLE_DEPRECATION_WARNINGS
-                            if (type == s->streams[stream_index]->codecpar->codec_type ||
-                                type == s->streams[stream_index]->codec->codec_type)
-                                type_counter++;
-FF_ENABLE_DEPRECATION_WARNINGS
-#else
-                            if (type == s->streams[stream_index]->codecpar->codec_type)
-                                type_counter++;
-#endif
-                        }
-                        return 0;
-                    } else {  // p:<id>:<st_type>
-                        for (j = 0; j < s->programs[i]->nb_stream_indexes; j++)
-                            if (st->index == s->programs[i]->stream_index[j]) {
-#if FF_API_LAVF_AVCTX
-FF_DISABLE_DEPRECATION_WARNINGS
-                                 return type == st->codecpar->codec_type ||
-                                        type == st->codec->codec_type;
-FF_ENABLE_DEPRECATION_WARNINGS
+            if (type != st->codecpar->codec_type
+               && (st->codecpar->codec_type != AVMEDIA_TYPE_UNKNOWN || st->codec->codec_type != type))
+                match = 0;
+    FF_ENABLE_DEPRECATION_WARNINGS
 #else
-                                 return type == st->codecpar->codec_type;
+            if (type != st->codecpar->codec_type)
+                match = 0;
 #endif
-                            }
-                        return 0;
-                    }
-
-                } else if ( *endptr == 'm') { // p:<id>:m:<metadata_spec>
-                    AVDictionaryEntry *tag;
-                    char *key, *val;
-                    int ret = 0;
-
-                    if (*(++endptr) != ':') {
-                        av_log(s, AV_LOG_ERROR, "Invalid stream specifier syntax, missing ':' sign after :m.\n");
-                        return AVERROR(EINVAL);
-                    }
-
-                    val = strchr(++endptr, ':');
-                    key = val ? av_strndup(endptr, val - endptr) : av_strdup(endptr);
-                    if (!key)
-                        return AVERROR(ENOMEM);
+            if (nopic && (st->disposition & AV_DISPOSITION_ATTACHED_PIC))
+                match = 0;
+        } else if (*spec == 'p' && *(spec + 1) == ':') {
+            int prog_id, i, j;
+            int found = 0;
+            char *endptr;
+            spec += 2;
+            prog_id = strtol(spec, &endptr, 0);
+            /* Disallow empty id and make sure that if we are not at the end, then another specifier must follow. */
+            if (spec == endptr || (*endptr && *endptr++ != ':'))
+                return AVERROR(EINVAL);
+            spec = endptr;
+            if (match) {
+                for (i = 0; i < s->nb_programs; i++) {
+                    if (s->programs[i]->id != prog_id)
+                        continue;
 
-                    for (j = 0; j < s->programs[i]->nb_stream_indexes; j++)
+                    for (j = 0; j < s->programs[i]->nb_stream_indexes; j++) {
                         if (st->index == s->programs[i]->stream_index[j]) {
-                            tag = av_dict_get(st->metadata, key, NULL, 0);
-                            if (tag && (!val || !strcmp(tag->value, val + 1)))
-                                ret = 1;
-
+                            found = 1;
+                            i = s->nb_programs;
                             break;
                         }
-
-                    av_freep(&key);
-                    return ret;
-
-                } else {  // p:<id>:<index>
-                    int stream_idx = strtol(endptr, NULL, 0);
-                    return stream_idx >= 0 &&
-                           stream_idx < s->programs[i]->nb_stream_indexes &&
-                           st->index == s->programs[i]->stream_index[stream_idx];
+                    }
                 }
             }
-
-            for (j = 0; j < s->programs[i]->nb_stream_indexes; j++)
-                if (st->index == s->programs[i]->stream_index[j])
-                    return 1;
-        }
-        return 0;
-    } else if (*spec == '#' ||
-               (*spec == 'i' && *(spec + 1) == ':')) {
-        int stream_id;
-        char *endptr;
-        spec += 1 + (*spec == 'i');
-        stream_id = strtol(spec, &endptr, 0);
-        if (!*endptr)
-            return stream_id == st->id;
-    } else if (*spec == 'm' && *(spec + 1) == ':') {
-        AVDictionaryEntry *tag;
-        char *key, *val;
-        int ret;
-
-        spec += 2;
-        val = strchr(spec, ':');
-
-        key = val ? av_strndup(spec, val - spec) : av_strdup(spec);
-        if (!key)
-            return AVERROR(ENOMEM);
-
-        tag = av_dict_get(st->metadata, key, NULL, 0);
-        if (tag) {
-            if (!val || !strcmp(tag->value, val + 1))
-                ret = 1;
-            else
-                ret = 0;
-        } else
-            ret = 0;
-
-        av_freep(&key);
-        return ret;
-    } else if (*spec == 'u') {
-        AVCodecParameters *par = st->codecpar;
+            if (!found)
+                match = 0;
+        } else if (*spec == '#' ||
+                   (*spec == 'i' && *(spec + 1) == ':')) {
+            int stream_id;
+            char *endptr;
+            spec += 1 + (*spec == 'i');
+            stream_id = strtol(spec, &endptr, 0);
+            if (spec == endptr || *endptr)                /* Disallow empty id and make sure we are at the end. */
+                return AVERROR(EINVAL);
+            return match && (stream_id == st->id);
+        } else if (*spec == 'm' && *(spec + 1) == ':') {
+            AVDictionaryEntry *tag;
+            char *key, *val;
+            int ret;
+
+            if (match) {
+               spec += 2;
+               val = strchr(spec, ':');
+
+               key = val ? av_strndup(spec, val - spec) : av_strdup(spec);
+               if (!key)
+                   return AVERROR(ENOMEM);
+
+               tag = av_dict_get(st->metadata, key, NULL, 0);
+               if (tag) {
+                   if (!val || !strcmp(tag->value, val + 1))
+                       ret = 1;
+                   else
+                       ret = 0;
+               } else
+                   ret = 0;
+
+               av_freep(&key);
+            }
+            return match && ret;
+        } else if (*spec == 'u' && *(spec + 1) == '\0') {
+            AVCodecParameters *par = st->codecpar;
 #if FF_API_LAVF_AVCTX
 FF_DISABLE_DEPRECATION_WARNINGS
-        AVCodecContext *codec = st->codec;
+            AVCodecContext *codec = st->codec;
 FF_ENABLE_DEPRECATION_WARNINGS
 #endif
-        int val;
-        switch (par->codec_type) {
-        case AVMEDIA_TYPE_AUDIO:
-            val = par->sample_rate && par->channels;
+            int val;
+            switch (par->codec_type) {
+            case AVMEDIA_TYPE_AUDIO:
+                val = par->sample_rate && par->channels;
 #if FF_API_LAVF_AVCTX
-            val = val || (codec->sample_rate && codec->channels);
+                val = val || (codec->sample_rate && codec->channels);
 #endif
-            if (par->format == AV_SAMPLE_FMT_NONE
+                if (par->format == AV_SAMPLE_FMT_NONE
 #if FF_API_LAVF_AVCTX
-                && codec->sample_fmt == AV_SAMPLE_FMT_NONE
+                    && codec->sample_fmt == AV_SAMPLE_FMT_NONE
 #endif
-                )
-                return 0;
-            break;
-        case AVMEDIA_TYPE_VIDEO:
-            val = par->width && par->height;
+                    )
+                    return 0;
+                break;
+            case AVMEDIA_TYPE_VIDEO:
+                val = par->width && par->height;
 #if FF_API_LAVF_AVCTX
-            val = val || (codec->width && codec->height);
+                val = val || (codec->width && codec->height);
 #endif
-            if (par->format == AV_PIX_FMT_NONE
+                if (par->format == AV_PIX_FMT_NONE
 #if FF_API_LAVF_AVCTX
-                && codec->pix_fmt == AV_PIX_FMT_NONE
+                    && codec->pix_fmt == AV_PIX_FMT_NONE
 #endif
-                )
-                return 0;
-            break;
-        case AVMEDIA_TYPE_UNKNOWN:
-            val = 0;
-            break;
-        default:
-            val = 1;
-            break;
-        }
+                    )
+                    return 0;
+                break;
+            case AVMEDIA_TYPE_UNKNOWN:
+                val = 0;
+                break;
+            default:
+                val = 1;
+                break;
+            }
 #if FF_API_LAVF_AVCTX
-        return (par->codec_id != AV_CODEC_ID_NONE || codec->codec_id != AV_CODEC_ID_NONE) && val != 0;
+            return match && ((par->codec_id != AV_CODEC_ID_NONE || codec->codec_id != AV_CODEC_ID_NONE) && val != 0);
 #else
-        return par->codec_id != AV_CODEC_ID_NONE && val != 0;
+            return match && (par->codec_id != AV_CODEC_ID_NONE && val != 0);
 #endif
-    } else if (!*spec) /* empty specifier, matches everything */
-        return 1;
+        } else {
+            return AVERROR(EINVAL);
+        }
+    }
+
+    return match;
+}
+
 
-    av_log(s, AV_LOG_ERROR, "Invalid stream specifier: %s.\n", spec);
-    return AVERROR(EINVAL);
+int avformat_match_stream_specifier(AVFormatContext *s, AVStream *st,
+                                    const char *spec)
+{
+    int ret, index;
+    char *endptr;
+    const char *indexptr = NULL;
+
+    ret = match_stream_specifier(s, st, spec, &indexptr);
+    if (ret < 0)
+        goto error;
+
+    if (!indexptr)
+        return ret;
+
+    index = strtol(indexptr, &endptr, 0);
+    if (*endptr) {                  /* We can't have anything after the requested index. */
+        ret = AVERROR(EINVAL);
+        goto error;
+    }
+
+    /* This is not really needed but saves us a loop for simple stream index specifiers. */
+    if (spec == indexptr)
+        return (index == st->index);
+
+    /* If we requested a matching stream index, we have to ensure st is that. */
+    for (int i = 0; i < s->nb_streams && index >= 0; i++) {
+        ret = match_stream_specifier(s, s->streams[i], spec, NULL);
+        if (ret < 0)
+            goto error;
+        if (ret > 0 && index-- == 0 && st == s->streams[i])
+            return 1;
+    }
+    return 0;
+
+error:
+    if (ret == AVERROR(EINVAL))
+        av_log(s, AV_LOG_ERROR, "Invalid stream specifier: %s.\n", spec);
+    return ret;
 }
 
 int ff_generate_avci_extradata(AVStream *st)
diff --git a/libavformat/vapoursynth.c b/libavformat/vapoursynth.c
index f3ad6910e5836..69fde1a806c9d 100644
--- a/libavformat/vapoursynth.c
+++ b/libavformat/vapoursynth.c
@@ -177,7 +177,7 @@ static av_cold int read_header_vs(AVFormatContext *s)
     char dummy;
     const VSVideoInfo *info;
     struct VSState *vss_state;
-    int err;
+    int err = 0;
 
     vss_state = av_mallocz(sizeof(*vss_state));
     if (!vss_state) {
diff --git a/libavformat/vc1test.c b/libavformat/vc1test.c
index a801f4bd22e41..d44570988b554 100644
--- a/libavformat/vc1test.c
+++ b/libavformat/vc1test.c
@@ -34,9 +34,14 @@
 
 static int vc1t_probe(AVProbeData *p)
 {
+    uint32_t size;
+
     if (p->buf_size < 24)
         return 0;
-    if (p->buf[3] != 0xC5 || AV_RL32(&p->buf[4]) != 4 || AV_RL32(&p->buf[20]) != 0xC)
+
+    size = AV_RL32(&p->buf[4]);
+    if (p->buf[3] != 0xC5 || size < 4 || size > p->buf_size - 20 ||
+        AV_RL32(&p->buf[size+16]) != 0xC)
         return 0;
 
     return AVPROBE_SCORE_EXTENSION;
@@ -48,9 +53,10 @@ static int vc1t_read_header(AVFormatContext *s)
     AVStream *st;
     int frames;
     uint32_t fps;
+    uint32_t size;
 
     frames = avio_rl24(pb);
-    if(avio_r8(pb) != 0xC5 || avio_rl32(pb) != 4)
+    if (avio_r8(pb) != 0xC5 || ((size = avio_rl32(pb)) < 4))
         return AVERROR_INVALIDDATA;
 
     /* init video codec */
@@ -63,6 +69,8 @@ static int vc1t_read_header(AVFormatContext *s)
 
     if (ff_get_extradata(s, st->codecpar, pb, VC1_EXTRADATA_SIZE) < 0)
         return AVERROR(ENOMEM);
+
+    avio_skip(pb, size - 4);
     st->codecpar->height = avio_rl32(pb);
     st->codecpar->width = avio_rl32(pb);
     if(avio_rl32(pb) != 0xC)
@@ -114,5 +122,6 @@ AVInputFormat ff_vc1t_demuxer = {
     .read_probe     = vc1t_probe,
     .read_header    = vc1t_read_header,
     .read_packet    = vc1t_read_packet,
+    .extensions     = "rcv",
     .flags          = AVFMT_GENERIC_INDEX,
 };
diff --git a/libavformat/version.h b/libavformat/version.h
index e4a69071d4950..2e83eb4f23951 100644
--- a/libavformat/version.h
+++ b/libavformat/version.h
@@ -32,8 +32,8 @@
 // Major bumping may affect Ticket5467, 5421, 5451(compatibility with Chromium)
 // Also please add any ticket numbers that you believe might be affected here
 #define LIBAVFORMAT_VERSION_MAJOR  58
-#define LIBAVFORMAT_VERSION_MINOR  19
-#define LIBAVFORMAT_VERSION_MICRO 100
+#define LIBAVFORMAT_VERSION_MINOR  26
+#define LIBAVFORMAT_VERSION_MICRO 101
 
 #define LIBAVFORMAT_VERSION_INT AV_VERSION_INT(LIBAVFORMAT_VERSION_MAJOR, \
                                                LIBAVFORMAT_VERSION_MINOR, \
diff --git a/libavformat/vividas.c b/libavformat/vividas.c
new file mode 100644
index 0000000000000..598b8bb10b089
--- /dev/null
+++ b/libavformat/vividas.c
@@ -0,0 +1,727 @@
+/*
+ * Vividas VIV format Demuxer
+ * Copyright (c) 2012 Krzysztof Klinikowski
+ * Copyright (c) 2010 Andrzej Szombierski
+ * based on vivparse Copyright (c) 2007 Måns Rullgård
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * @brief Vividas VIV (.viv) file demuxer
+ * @author Andrzej Szombierski [qq at kuku eu org] (2010-07)
+ * @sa http://wiki.multimedia.cx/index.php?title=Vividas_VIV
+ */
+
+#include "libavutil/intreadwrite.h"
+#include "avio_internal.h"
+#include "avformat.h"
+#include "internal.h"
+
+#define MAX_AUDIO_SUBPACKETS 100
+
+typedef struct VIV_SB_block {
+    int size, n_packets;
+    int64_t byte_offset;
+    int64_t packet_offset;
+} VIV_SB_block;
+
+typedef struct VIV_SB_entry {
+    int size, flag;
+} VIV_SB_entry;
+
+typedef struct VIV_AudioSubpacket {
+    int start, pcm_bytes;
+} VIV_AudioSubpacket;
+
+typedef struct VividasDemuxContext {
+    int n_sb_blocks;
+    VIV_SB_block *sb_blocks;
+
+    uint32_t sb_key;
+    int64_t sb_offset;
+
+    int current_sb, current_sb_entry;
+    uint8_t *sb_buf;
+    AVIOContext *sb_pb;
+    int n_sb_entries;
+    VIV_SB_entry *sb_entries;
+
+    int n_audio_subpackets;
+    int current_audio_subpacket;
+
+    int64_t audio_sample;
+
+    VIV_AudioSubpacket audio_subpackets[MAX_AUDIO_SUBPACKETS];
+} VividasDemuxContext;
+
+static int viv_probe(AVProbeData *p)
+{
+    if (memcmp(p->buf, "vividas03", 9))
+        return 0;
+
+    return AVPROBE_SCORE_MAX;
+}
+
+static const unsigned short keybits[32] = {
+     163,  416,  893,   82,  223,  572, 1137,  430,
+     659, 1104,   13,  626,  695,  972, 1465,  686,
+     843, 1216,  317, 1122, 1383,   92,  513, 1158,
+    1243,   48,  573, 1306, 1495,  396, 1009,  350,
+};
+
+static uint32_t decode_key(uint8_t *buf)
+{
+    uint32_t key = 0;
+
+    for (int i = 0; i < 32; i++) {
+        unsigned p = keybits[i];
+        key |= !!(buf[p>>3] & (1<<(p&7))) << i;
+    }
+
+    return key;
+}
+
+static void put_v(uint8_t *p, unsigned v)
+{
+    if (v>>28)
+        *p++ = ((v>>28)&0x7f)|0x80;
+    if (v>>21)
+        *p++ = ((v>>21)&0x7f)|0x80;
+    if (v>>14)
+        *p++ = ((v>>14)&0x7f)|0x80;
+    if (v>>7)
+        *p++ =  ((v>>7)&0x7f)|0x80;
+}
+
+static unsigned recover_key(unsigned char sample[4], unsigned expected_size)
+{
+    unsigned char plaintext[8] = { 'S', 'B' };
+
+    put_v(plaintext+2, expected_size);
+
+    return (sample[0]^plaintext[0])|
+        ((sample[1]^plaintext[1])<<8)|
+        ((sample[2]^plaintext[2])<<16)|
+        ((sample[3]^plaintext[3])<<24);
+}
+
+static void xor_block(void *p1, void *p2, unsigned size, int key, unsigned *key_ptr)
+{
+    unsigned *d1 = p1;
+    unsigned *d2 = p2;
+    unsigned k = *key_ptr;
+
+    size >>= 2;
+
+    while (size > 0) {
+        *d2 = *d1 ^ (HAVE_BIGENDIAN ? av_bswap32(k) : k);
+        k += key;
+        d1++;
+        d2++;
+        size--;
+    }
+
+    *key_ptr = k;
+}
+
+static void decode_block(uint8_t *src, uint8_t *dest, unsigned size,
+                         uint32_t key, uint32_t *key_ptr,
+                         int align)
+{
+    unsigned s = size;
+    char tmp[4];
+    int a2;
+
+    if (!size)
+        return;
+
+    align &= 3;
+    a2 = (4 - align) & 3;
+
+    if (align) {
+        uint32_t tmpkey = *key_ptr - key;
+        memcpy(tmp + align, src, a2);
+        xor_block(tmp, tmp, 4, key, &tmpkey);
+        memcpy(dest, tmp + align, a2);
+        s -= a2;
+    }
+
+    if (s >= 4) {
+        if (!align)
+            align = 4;
+        xor_block(src + a2, dest + a2, s & ~3,
+                  key, key_ptr);
+        s &= 3;
+    }
+
+    if (s) {
+        size -= s;
+        memcpy(tmp, src + size, s);
+        xor_block(&tmp, &tmp, 4, key, key_ptr);
+        memcpy(dest + size, tmp, s);
+    }
+}
+
+static uint32_t get_v(uint8_t *p)
+{
+    uint32_t v = 0;
+
+    do {
+        if (v >= UINT_MAX / 128 - *p)
+            return v;
+        v <<= 7;
+        v += *p & 0x7f;
+    } while (*p++ & 0x80);
+
+    return v;
+}
+
+static uint8_t *read_vblock(AVIOContext *src, uint32_t *size,
+                            uint32_t key, uint32_t *k2, int align)
+{
+    uint8_t tmp[4];
+    uint8_t *buf;
+    unsigned n;
+
+    if (avio_read(src, tmp, 4) != 4)
+        return NULL;
+
+    decode_block(tmp, tmp, 4, key, k2, align);
+
+    n = get_v(tmp);
+    if (!n)
+        return NULL;
+
+    buf = av_malloc(n);
+    if (!buf)
+        return NULL;
+
+    *size = n;
+    n -= 4;
+
+    memcpy(buf, tmp, 4);
+
+    if (avio_read(src, buf + 4, n) == n) {
+        decode_block(buf + 4, buf + 4, n, key, k2, align + 4);
+    } else {
+        av_free(buf);
+        buf = NULL;
+    }
+
+    return buf;
+}
+
+static uint8_t *read_sb_block(AVIOContext *src, unsigned *size,
+                              uint32_t *key, unsigned expected_size)
+{
+    uint8_t *buf;
+    uint8_t ibuf[8], sbuf[8];
+    uint32_t k2;
+    unsigned n;
+
+    if (avio_read(src, ibuf, 8) < 8)
+        return NULL;
+
+    k2 = *key;
+    decode_block(ibuf, sbuf, 8, *key, &k2, 0);
+
+    n = get_v(sbuf+2);
+
+    if (sbuf[0] != 'S' || sbuf[1] != 'B' || (expected_size>0 && n != expected_size)) {
+        uint32_t tmpkey = recover_key(ibuf, expected_size);
+        k2 = tmpkey;
+        decode_block(ibuf, sbuf, 8, tmpkey, &k2, 0);
+        n = get_v(sbuf+2);
+        if (sbuf[0] != 'S' || sbuf[1] != 'B' || expected_size != n)
+            return NULL;
+        *key = tmpkey;
+    }
+
+    buf = av_malloc(n);
+    if (!buf)
+        return NULL;
+
+    memcpy(buf, sbuf, 8);
+
+    *size = n;
+    n -= 8;
+
+    if (avio_read(src, buf+8, n) < n) {
+        av_free(buf);
+        return NULL;
+    }
+
+    decode_block(buf + 8, buf + 8, n, *key, &k2, 0);
+
+    return buf;
+}
+
+static void track_header(VividasDemuxContext *viv, AVFormatContext *s,  uint8_t *buf, int size)
+{
+    int i,j;
+    int64_t off;
+    int val_1;
+    int num_video, num_audio;
+    AVIOContext *pb;
+
+    pb = avio_alloc_context(buf, size, 0, NULL, NULL, NULL, NULL);
+    if (!pb)
+        return;
+
+    ffio_read_varlen(pb); // track_header_len
+    avio_r8(pb); // '1'
+
+    val_1 = ffio_read_varlen(pb);
+
+    for (i=0;i<val_1;i++) {
+        int c = avio_r8(pb);
+        for (j=0;j<c;j++) {
+            avio_r8(pb); // val_3
+            avio_r8(pb); // val_4
+        }
+    }
+
+    avio_r8(pb); // num_streams
+
+    off = avio_tell(pb);
+    off += ffio_read_varlen(pb); // val_5
+
+    avio_r8(pb); // '2'
+    num_video = avio_r8(pb);
+
+    avio_seek(pb, off, SEEK_SET);
+    if (num_video != 1)
+        av_log(s, AV_LOG_WARNING, "number of video tracks %d is not 1\n", num_video);
+
+    for (i = 0; i < num_video; i++) {
+        AVStream *st = avformat_new_stream(s, NULL);
+
+        st->id = i;
+
+        st->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
+        st->codecpar->codec_id = AV_CODEC_ID_VP6;
+
+        off = avio_tell(pb);
+        off += ffio_read_varlen(pb);
+        avio_r8(pb); // '3'
+        avio_r8(pb); // val_7
+        st->time_base.num = avio_rl32(pb); // frame_time
+        st->time_base.den = avio_rl32(pb); // time_base
+        st->nb_frames = avio_rl32(pb); // n frames
+        st->codecpar->width = avio_rl16(pb); // width
+        st->codecpar->height = avio_rl16(pb); // height
+        avio_r8(pb); // val_8
+        avio_rl32(pb); // val_9
+
+        avio_seek(pb, off, SEEK_SET);
+    }
+
+    off = avio_tell(pb);
+    off += ffio_read_varlen(pb); // val_10
+    avio_r8(pb); // '4'
+    num_audio = avio_r8(pb);
+    avio_seek(pb, off, SEEK_SET);
+
+    if (num_audio != 1)
+        av_log(s, AV_LOG_WARNING, "number of audio tracks %d is not 1\n", num_audio);
+
+    for(i=0;i<num_audio;i++) {
+        int q;
+        AVStream *st = avformat_new_stream(s, NULL);
+
+        st->id = num_video + i;
+
+        st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
+        st->codecpar->codec_id = AV_CODEC_ID_VORBIS;
+
+        off = avio_tell(pb);
+        off += ffio_read_varlen(pb); // length
+        avio_r8(pb); // '5'
+        avio_r8(pb); //codec_id
+        avio_rl16(pb); //codec_subid
+        st->codecpar->channels = avio_rl16(pb); // channels
+        st->codecpar->sample_rate = avio_rl32(pb); // sample_rate
+        avio_seek(pb, 10, SEEK_CUR); // data_1
+        q = avio_r8(pb);
+        avio_seek(pb, q, SEEK_CUR); // data_2
+        avio_r8(pb); // zeropad
+
+        if (avio_tell(pb) < off) {
+            int num_data;
+            int xd_size = 0;
+            int data_len[256];
+            int offset = 1;
+            uint8_t *p;
+            ffio_read_varlen(pb); // val_13
+            avio_r8(pb); // '19'
+            ffio_read_varlen(pb); // len_3
+            num_data = avio_r8(pb);
+            for (j = 0; j < num_data; j++) {
+                data_len[j] = ffio_read_varlen(pb);
+                xd_size += data_len[j];
+            }
+
+            st->codecpar->extradata_size = 64 + xd_size + xd_size / 255;
+            if (ff_alloc_extradata(st->codecpar, st->codecpar->extradata_size))
+                return;
+
+            p = st->codecpar->extradata;
+            p[0] = 2;
+
+            for (j = 0; j < num_data - 1; j++)
+                offset += av_xiphlacing(&p[offset], data_len[j]);
+
+            for (j = 0; j < num_data; j++) {
+                avio_read(pb, &p[offset], data_len[j]);
+                offset += data_len[j];
+            }
+
+            if (offset < st->codecpar->extradata_size)
+                st->codecpar->extradata_size = offset;
+        }
+    }
+
+    av_free(pb);
+}
+
+static void track_index(VividasDemuxContext *viv, AVFormatContext *s, uint8_t *buf, unsigned size)
+{
+    int64_t off;
+    int64_t poff;
+    int maxnp=0;
+    AVIOContext *pb;
+    int i;
+
+    pb = avio_alloc_context(buf, size, 0, NULL, NULL, NULL, NULL);
+    if (!pb)
+        return;
+
+    ffio_read_varlen(pb); // track_index_len
+    avio_r8(pb); // 'c'
+    viv->n_sb_blocks = ffio_read_varlen(pb);
+    viv->sb_blocks = av_calloc(viv->n_sb_blocks, sizeof(VIV_SB_block));
+    if (!viv->sb_blocks) {
+        viv->n_sb_blocks = 0;
+        av_free(pb);
+        return;
+    }
+
+    off = 0;
+    poff = 0;
+
+    for (i = 0; i < viv->n_sb_blocks; i++) {
+        viv->sb_blocks[i].byte_offset = off;
+        viv->sb_blocks[i].packet_offset = poff;
+
+        viv->sb_blocks[i].size = ffio_read_varlen(pb);
+        viv->sb_blocks[i].n_packets = ffio_read_varlen(pb);
+
+        off += viv->sb_blocks[i].size;
+        poff += viv->sb_blocks[i].n_packets;
+
+
+        if (maxnp < viv->sb_blocks[i].n_packets)
+            maxnp = viv->sb_blocks[i].n_packets;
+    }
+
+    viv->sb_entries = av_calloc(maxnp, sizeof(VIV_SB_entry));
+    av_free(pb);
+}
+
+static void load_sb_block(AVFormatContext *s, VividasDemuxContext *viv, unsigned expected_size)
+{
+    uint32_t size = 0;
+    int i;
+    AVIOContext *pb = 0;
+
+    if (viv->sb_pb) {
+        av_free(viv->sb_pb);
+        viv->sb_pb = NULL;
+    }
+
+    if (viv->sb_buf)
+        av_free(viv->sb_buf);
+
+    viv->sb_buf = read_sb_block(s->pb, &size, &viv->sb_key, expected_size);
+    if (!viv->sb_buf) {
+        return;
+    }
+
+    pb = avio_alloc_context(viv->sb_buf, size, 0, NULL, NULL, NULL, NULL);
+    if (!pb)
+        return;
+
+    viv->sb_pb = pb;
+
+    avio_r8(pb); //  'S'
+    avio_r8(pb); //  'B'
+    ffio_read_varlen(pb); //  size
+    avio_r8(pb); //  junk
+    ffio_read_varlen(pb); // first packet
+
+    viv->n_sb_entries = viv->sb_blocks[viv->current_sb].n_packets;
+
+    for (i = 0; i < viv->n_sb_entries; i++) {
+        viv->sb_entries[i].size = ffio_read_varlen(pb);
+        viv->sb_entries[i].flag = avio_r8(pb);
+    }
+
+    ffio_read_varlen(pb);
+    avio_r8(pb);
+
+    viv->current_sb_entry = 0;
+}
+
+static int viv_read_header(AVFormatContext *s)
+{
+    VividasDemuxContext *viv = s->priv_data;
+    AVIOContext *pb = s->pb;
+    int64_t header_end;
+    int num_tracks;
+    uint32_t key, k2;
+    uint32_t v;
+    uint8_t keybuffer[187];
+    uint32_t b22_size = 0;
+    uint32_t b22_key = 0;
+    uint8_t *buf = 0;
+
+    avio_skip(pb, 9);
+
+    header_end = avio_tell(pb);
+
+    header_end += ffio_read_varlen(pb);
+
+    num_tracks = avio_r8(pb);
+
+    if (num_tracks != 1) {
+        av_log(s, AV_LOG_ERROR, "number of tracks %d is not 1\n", num_tracks);
+        return AVERROR(EINVAL);
+    }
+
+    v = avio_r8(pb);
+    avio_seek(pb, v, SEEK_CUR);
+
+    avio_read(pb, keybuffer, 187);
+    key = decode_key(keybuffer);
+    viv->sb_key = key;
+
+    avio_rl32(pb);
+
+    for (;;) {
+        int64_t here = avio_tell(pb);
+        int block_len, block_type;
+
+        if (here >= header_end)
+            break;
+
+        block_len = ffio_read_varlen(pb);
+        block_type = avio_r8(pb);
+
+        if (block_type == 22) {
+            avio_read(pb, keybuffer, 187);
+            b22_key = decode_key(keybuffer);
+            b22_size = avio_rl32(pb);
+        }
+
+        avio_seek(pb, here + block_len, SEEK_SET);
+    }
+
+    if (b22_size) {
+        k2 = b22_key;
+        buf = read_vblock(pb, &v, b22_key, &k2, 0);
+        if (!buf)
+            return AVERROR(EIO);
+
+        av_free(buf);
+    }
+
+    k2 = key;
+    buf = read_vblock(pb, &v, key, &k2, 0);
+    if (!buf)
+        return AVERROR(EIO);
+    track_header(viv, s, buf, v);
+    av_free(buf);
+
+    buf = read_vblock(pb, &v, key, &k2, v);
+    if (!buf)
+        return AVERROR(EIO);
+    track_index(viv, s, buf, v);
+    av_free(buf);
+
+    viv->sb_offset = avio_tell(pb);
+    if (viv->n_sb_blocks > 0) {
+        viv->current_sb = 0;
+        load_sb_block(s, viv, viv->sb_blocks[0].size);
+    } else {
+        viv->current_sb = -1;
+    }
+
+    return 0;
+}
+
+static int viv_read_packet(AVFormatContext *s,
+                           AVPacket *pkt)
+{
+    VividasDemuxContext *viv = s->priv_data;
+    AVIOContext *pb;
+    int64_t off;
+    int ret;
+
+    if (!viv->sb_pb)
+        return AVERROR(EIO);
+    if (avio_feof(viv->sb_pb))
+        return AVERROR_EOF;
+
+    if (viv->current_audio_subpacket < viv->n_audio_subpackets) {
+        AVStream *astream;
+        int size = viv->audio_subpackets[viv->current_audio_subpacket+1].start - viv->audio_subpackets[viv->current_audio_subpacket].start;
+
+        pb = viv->sb_pb;
+        ret = av_get_packet(pb, pkt, size);
+        if (ret < 0)
+            return ret;
+        pkt->pos += viv->sb_offset + viv->sb_blocks[viv->current_sb].byte_offset;
+
+        pkt->stream_index = 1;
+        astream = s->streams[pkt->stream_index];
+
+        pkt->pts = av_rescale(viv->audio_sample, astream->time_base.den, astream->time_base.num) / astream->codecpar->sample_rate;
+        viv->audio_sample += viv->audio_subpackets[viv->current_audio_subpacket].pcm_bytes / 2 / astream->codecpar->channels;
+        pkt->flags |= AV_PKT_FLAG_KEY;
+        viv->current_audio_subpacket++;
+        return 0;
+    }
+
+    if (viv->current_sb_entry >= viv->n_sb_entries) {
+        if (viv->current_sb+1 >= viv->n_sb_blocks)
+            return AVERROR(EIO);
+        viv->current_sb++;
+
+        load_sb_block(s, viv, 0);
+        viv->current_sb_entry = 0;
+    }
+
+    pb = viv->sb_pb;
+    if (!pb)
+        return AVERROR(EIO);
+    off = avio_tell(pb);
+    off += viv->sb_entries[viv->current_sb_entry].size;
+
+    if (viv->sb_entries[viv->current_sb_entry].flag == 0) {
+        uint64_t v_size = ffio_read_varlen(pb);
+
+        ffio_read_varlen(pb);
+        if (v_size > INT_MAX)
+            return AVERROR_INVALIDDATA;
+        ret = av_get_packet(pb, pkt, v_size);
+        if (ret < 0)
+            return ret;
+        pkt->pos += viv->sb_offset + viv->sb_blocks[viv->current_sb].byte_offset;
+
+        pkt->pts = viv->sb_blocks[viv->current_sb].packet_offset + viv->current_sb_entry;
+        pkt->flags |= (pkt->data[0]&0x80)?0:AV_PKT_FLAG_KEY;
+        pkt->stream_index = 0;
+
+        for (int i = 0; i < MAX_AUDIO_SUBPACKETS - 1; i++) {
+            int start, pcm_bytes;
+            start = ffio_read_varlen(pb);
+            pcm_bytes = ffio_read_varlen(pb);
+
+            if (i > 0 && start == 0)
+                break;
+
+            viv->n_audio_subpackets = i + 1;
+            viv->audio_subpackets[i].start = start;
+            viv->audio_subpackets[i].pcm_bytes = pcm_bytes;
+        }
+        viv->audio_subpackets[viv->n_audio_subpackets].start = (int)(off - avio_tell(pb));
+        viv->current_audio_subpacket = 0;
+
+    } else {
+        uint64_t v_size = ffio_read_varlen(pb);
+
+        if (v_size > INT_MAX)
+            return AVERROR_INVALIDDATA;
+        ret = av_get_packet(pb, pkt, v_size);
+        if (ret < 0)
+            return ret;
+        pkt->pos += viv->sb_offset + viv->sb_blocks[viv->current_sb].byte_offset;
+        pkt->pts = viv->sb_blocks[viv->current_sb].packet_offset + viv->current_sb_entry;
+        pkt->flags |= (pkt->data[0] & 0x80) ? 0 : AV_PKT_FLAG_KEY;
+        pkt->stream_index = 0;
+    }
+
+    viv->current_sb_entry++;
+
+    return 0;
+}
+
+static int viv_read_close(AVFormatContext *s)
+{
+    VividasDemuxContext *viv = s->priv_data;
+
+    av_freep(&viv->sb_pb);
+    av_freep(&viv->sb_buf);
+    av_freep(&viv->sb_blocks);
+    av_freep(&viv->sb_entries);
+
+    return 0;
+}
+
+static int viv_read_seek(AVFormatContext *s, int stream_index, int64_t timestamp, int flags)
+{
+    VividasDemuxContext *viv = s->priv_data;
+    int64_t frame;
+
+    if (stream_index == 0)
+        frame = timestamp;
+    else
+        frame = av_rescale_q(timestamp, s->streams[0]->time_base, s->streams[stream_index]->time_base);
+
+    for (int i = 0; i < viv->n_sb_blocks; i++) {
+        if (frame >= viv->sb_blocks[i].packet_offset && frame < viv->sb_blocks[i].packet_offset + viv->sb_blocks[i].n_packets) {
+            // flush audio packet queue
+            viv->current_audio_subpacket = 0;
+            viv->n_audio_subpackets = 0;
+            viv->current_sb = i;
+            // seek to ith sb block
+            avio_seek(s->pb, viv->sb_offset + viv->sb_blocks[i].byte_offset, SEEK_SET);
+            // load the block
+            load_sb_block(s, viv, 0);
+            // most problematic part: guess audio offset
+            viv->audio_sample = av_rescale_q(viv->sb_blocks[i].packet_offset, av_make_q(s->streams[1]->codecpar->sample_rate, 1), av_inv_q(s->streams[0]->time_base));
+            // hand-tuned 1.s a/v offset
+            viv->audio_sample += s->streams[1]->codecpar->sample_rate;
+            viv->current_sb_entry = 0;
+            return 1;
+        }
+    }
+    return 0;
+}
+
+AVInputFormat ff_vividas_demuxer = {
+    .name           = "vividas",
+    .long_name      = NULL_IF_CONFIG_SMALL("Vividas VIV"),
+    .priv_data_size = sizeof(VividasDemuxContext),
+    .read_probe     = viv_probe,
+    .read_header    = viv_read_header,
+    .read_packet    = viv_read_packet,
+    .read_close     = viv_read_close,
+    .read_seek      = viv_read_seek,
+};
diff --git a/libavformat/vivo.c b/libavformat/vivo.c
index c9e9c37f37dfb..9b9189f307ea5 100644
--- a/libavformat/vivo.c
+++ b/libavformat/vivo.c
@@ -166,7 +166,7 @@ static int vivo_read_header(AVFormatContext *s)
             value = strchr(key, ':');
             if (!value) {
                 av_log(s, AV_LOG_WARNING, "missing colon in key:value pair '%s'\n",
-                       value);
+                       key);
                 continue;
             }
 
diff --git a/libavformat/vorbiscomment.c b/libavformat/vorbiscomment.c
index 575dd133287d9..fb5c655a23f50 100644
--- a/libavformat/vorbiscomment.c
+++ b/libavformat/vorbiscomment.c
@@ -38,10 +38,21 @@ const AVMetadataConv ff_vorbiscomment_metadata_conv[] = {
     { 0 }
 };
 
-int64_t ff_vorbiscomment_length(AVDictionary *m, const char *vendor_string)
+int64_t ff_vorbiscomment_length(AVDictionary *m, const char *vendor_string,
+                                AVChapter **chapters, unsigned int nb_chapters)
 {
     int64_t len = 8;
     len += strlen(vendor_string);
+    if (chapters && nb_chapters) {
+        for (int i = 0; i < nb_chapters; i++) {
+            AVDictionaryEntry *tag = NULL;
+            len += 4 + 12 + 1 + 10;
+            while ((tag = av_dict_get(chapters[i]->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
+                int64_t len1 = !strcmp(tag->key, "title") ? 4 : strlen(tag->key);
+                len += 4 + 10 + len1 + 1 + strlen(tag->value);
+            }
+        }
+    }
     if (m) {
         AVDictionaryEntry *tag = NULL;
         while ((tag = av_dict_get(m, "", tag, AV_DICT_IGNORE_SUFFIX))) {
@@ -52,12 +63,19 @@ int64_t ff_vorbiscomment_length(AVDictionary *m, const char *vendor_string)
 }
 
 int ff_vorbiscomment_write(uint8_t **p, AVDictionary **m,
-                           const char *vendor_string)
+                           const char *vendor_string,
+                           AVChapter **chapters, unsigned int nb_chapters)
 {
+    int cm_count = 0;
     bytestream_put_le32(p, strlen(vendor_string));
     bytestream_put_buffer(p, vendor_string, strlen(vendor_string));
+    if (chapters && nb_chapters) {
+        for (int i = 0; i < nb_chapters; i++) {
+            cm_count += av_dict_count(chapters[i]->metadata) + 1;
+        }
+    }
     if (*m) {
-        int count = av_dict_count(*m);
+        int count = av_dict_count(*m) + cm_count;
         AVDictionaryEntry *tag = NULL;
         bytestream_put_le32(p, count);
         while ((tag = av_dict_get(*m, "", tag, AV_DICT_IGNORE_SUFFIX))) {
@@ -70,6 +88,42 @@ int ff_vorbiscomment_write(uint8_t **p, AVDictionary **m,
             bytestream_put_byte(p, '=');
             bytestream_put_buffer(p, tag->value, len2);
         }
+        for (int i = 0; i < nb_chapters; i++) {
+            AVChapter *chp = chapters[i];
+            char chapter_time[13];
+            char chapter_number[4];
+            int h, m, s, ms;
+
+            s  = av_rescale(chp->start, chp->time_base.num, chp->time_base.den);
+            h  = s / 3600;
+            m  = (s / 60) % 60;
+            ms = av_rescale_q(chp->start, chp->time_base, av_make_q(   1, 1000)) % 1000;
+            s  = s % 60;
+            snprintf(chapter_number, sizeof(chapter_number), "%03d", i);
+            snprintf(chapter_time, sizeof(chapter_time), "%02d:%02d:%02d.%03d", h, m, s, ms);
+            bytestream_put_le32(p, 10+1+12);
+            bytestream_put_buffer(p, "CHAPTER", 7);
+            bytestream_put_buffer(p, chapter_number, 3);
+            bytestream_put_byte(p, '=');
+            bytestream_put_buffer(p, chapter_time, 12);
+
+            tag = NULL;
+            while ((tag = av_dict_get(chapters[i]->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
+                int64_t len1 = !strcmp(tag->key, "title") ? 4 : strlen(tag->key);
+                int64_t len2 = strlen(tag->value);
+                if (len1+1+len2+10 > UINT32_MAX)
+                    return AVERROR(EINVAL);
+                bytestream_put_le32(p, 10+len1+1+len2);
+                bytestream_put_buffer(p, "CHAPTER", 7);
+                bytestream_put_buffer(p, chapter_number, 3);
+                if (!strcmp(tag->key, "title"))
+                    bytestream_put_buffer(p, "NAME", 4);
+                else
+                    bytestream_put_buffer(p, tag->key, len1);
+                bytestream_put_byte(p, '=');
+                bytestream_put_buffer(p, tag->value, len2);
+            }
+        }
     } else
         bytestream_put_le32(p, 0);
     return 0;
diff --git a/libavformat/vorbiscomment.h b/libavformat/vorbiscomment.h
index e0d30b14a7b95..4ff3dd6c278ab 100644
--- a/libavformat/vorbiscomment.h
+++ b/libavformat/vorbiscomment.h
@@ -34,7 +34,8 @@
  * For no string, set to an empty string.
  * @return The length in bytes.
  */
-int64_t ff_vorbiscomment_length(AVDictionary *m, const char *vendor_string);
+int64_t ff_vorbiscomment_length(AVDictionary *m, const char *vendor_string,
+                                AVChapter **chapters, unsigned int nb_chapters);
 
 /**
  * Write a VorbisComment into a buffer. The buffer, p, must have enough
@@ -45,9 +46,12 @@ int64_t ff_vorbiscomment_length(AVDictionary *m, const char *vendor_string);
  * @param p The buffer in which to write.
  * @param m The metadata struct to write.
  * @param vendor_string The vendor string to write.
+ * @param chapters The chapters to write.
+ * @param nb_chapters The number of chapters to write.
  */
 int ff_vorbiscomment_write(uint8_t **p, AVDictionary **m,
-                           const char *vendor_string);
+                           const char *vendor_string,
+                           AVChapter **chapters, unsigned int nb_chapters);
 
 extern const AVMetadataConv ff_vorbiscomment_metadata_conv[];
 
diff --git a/libavformat/wavdec.c b/libavformat/wavdec.c
index e280be4d44e65..6a024de683a90 100644
--- a/libavformat/wavdec.c
+++ b/libavformat/wavdec.c
@@ -34,6 +34,7 @@
 #include "avformat.h"
 #include "avio.h"
 #include "avio_internal.h"
+#include "id3v2.h"
 #include "internal.h"
 #include "metadata.h"
 #include "pcm.h"
@@ -500,6 +501,18 @@ static int wav_read_header(AVFormatContext *s)
                 ff_read_riff_info(s, size - 4);
             }
             break;
+        case MKTAG('I', 'D', '3', ' '):
+        case MKTAG('i', 'd', '3', ' '): {
+            ID3v2ExtraMeta *id3v2_extra_meta = NULL;
+            ff_id3v2_read_dict(pb, &s->internal->id3v2_meta, ID3v2_DEFAULT_MAGIC, &id3v2_extra_meta);
+            if (id3v2_extra_meta) {
+                ff_id3v2_parse_apic(s, &id3v2_extra_meta);
+                ff_id3v2_parse_chapters(s, &id3v2_extra_meta);
+                ff_id3v2_parse_priv(s, &id3v2_extra_meta);
+            }
+            ff_id3v2_free_extra_meta(&id3v2_extra_meta);
+            }
+            break;
         }
 
         /* seek to next tag unless we know that we'll run into EOF */
diff --git a/libavformat/webmdashenc.c b/libavformat/webmdashenc.c
index 1280d8a763bee..26b87273048fe 100644
--- a/libavformat/webmdashenc.c
+++ b/libavformat/webmdashenc.c
@@ -466,6 +466,7 @@ static int parse_adaptation_sets(AVFormatContext *s)
             continue;
         else if (state == new_set && !strncmp(p, "id=", 3)) {
             void *mem = av_realloc(w->as, sizeof(*w->as) * (w->nb_as + 1));
+            const char *comma;
             if (mem == NULL)
                 return AVERROR(ENOMEM);
             w->as = mem;
@@ -474,6 +475,11 @@ static int parse_adaptation_sets(AVFormatContext *s)
             w->as[w->nb_as - 1].streams = NULL;
             p += 3; // consume "id="
             q = w->as[w->nb_as - 1].id;
+            comma = strchr(p, ',');
+            if (!comma || comma - p >= sizeof(w->as[w->nb_as - 1].id)) {
+                av_log(s, AV_LOG_ERROR, "'id' in 'adaptation_sets' is malformed.\n");
+                return AVERROR(EINVAL);
+            }
             while (*p != ',') *q++ = *p++;
             *q = 0;
             p++;
diff --git a/libavformat/wvdec.c b/libavformat/wvdec.c
index 82526563ec18a..ddcb9002cbb85 100644
--- a/libavformat/wvdec.c
+++ b/libavformat/wvdec.c
@@ -40,6 +40,7 @@ enum WV_FLAGS {
     WV_HBAL   = 0x0400,
     WV_MCINIT = 0x0800,
     WV_MCEND  = 0x1000,
+    WV_DSD    = 0x80000000,
 };
 
 static const int wv_rates[16] = {
@@ -97,6 +98,11 @@ static int wv_read_block_header(AVFormatContext *ctx, AVIOContext *pb)
         return ret;
     }
 
+    if (wc->header.flags & WV_DSD) {
+        avpriv_report_missing_feature(ctx, "WV DSD");
+        return AVERROR_PATCHWELCOME;
+    }
+
     if (wc->header.version < 0x402 || wc->header.version > 0x410) {
         avpriv_report_missing_feature(ctx, "WV version 0x%03X",
                                       wc->header.version);
@@ -153,11 +159,18 @@ static int wv_read_block_header(AVFormatContext *ctx, AVIOContext *pb)
                 case 3:
                     chmask = avio_rl32(pb);
                     break;
-                case 5:
+                case 4:
                     avio_skip(pb, 1);
                     chan  |= (avio_r8(pb) & 0xF) << 8;
+                    chan  += 1;
                     chmask = avio_rl24(pb);
                     break;
+                case 5:
+                    avio_skip(pb, 1);
+                    chan  |= (avio_r8(pb) & 0xF) << 8;
+                    chan  += 1;
+                    chmask = avio_rl32(pb);
+                    break;
                 default:
                     av_log(ctx, AV_LOG_ERROR,
                            "Invalid channel info size %d\n", size);
diff --git a/libavformat/yop.c b/libavformat/yop.c
index e6fd896668cf5..b4b5fb4e155e1 100644
--- a/libavformat/yop.c
+++ b/libavformat/yop.c
@@ -68,10 +68,6 @@ static int yop_read_header(AVFormatContext *s)
     if (!audio_stream || !video_stream)
         return AVERROR(ENOMEM);
 
-    // Extra data that will be passed to the decoder
-    if (ff_alloc_extradata(video_stream->codecpar, 8))
-        return AVERROR(ENOMEM);
-
     // Audio
     audio_par                 = audio_stream->codecpar;
     audio_par->codec_type     = AVMEDIA_TYPE_AUDIO;
@@ -94,9 +90,9 @@ static int yop_read_header(AVFormatContext *s)
 
     video_stream->sample_aspect_ratio = (AVRational){1, 2};
 
-    ret = avio_read(pb, video_par->extradata, 8);
-    if (ret < 8)
-        return ret < 0 ? ret : AVERROR_EOF;
+    ret = ff_get_extradata(s, video_par, pb, 8);
+    if (ret < 0)
+        return ret;
 
     yop->palette_size       = video_par->extradata[0] * 3 + 4;
     yop->audio_block_length = AV_RL16(video_par->extradata + 6);
diff --git a/libavutil/Makefile b/libavutil/Makefile
index 9ed24cfc822d8..53208fc58774a 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -31,6 +31,7 @@ HEADERS = adler32.h                                                     \
           file.h                                                        \
           frame.h                                                       \
           hash.h                                                        \
+          hdr_dynamic_metadata.h                                        \
           hmac.h                                                        \
           hwcontext.h                                                   \
           hwcontext_cuda.h                                              \
@@ -94,6 +95,7 @@ OBJS = adler32.o                                                        \
        aes_ctr.o                                                        \
        audio_fifo.o                                                     \
        avstring.o                                                       \
+       avsscanf.o                                                       \
        base64.o                                                         \
        blowfish.o                                                       \
        bprint.o                                                         \
@@ -118,6 +120,7 @@ OBJS = adler32.o                                                        \
        fixed_dsp.o                                                      \
        frame.o                                                          \
        hash.o                                                           \
+       hdr_dynamic_metadata.o                                           \
        hmac.o                                                           \
        hwcontext.o                                                      \
        imgutils.o                                                       \
@@ -175,7 +178,8 @@ OBJS += $(COMPAT_OBJS:%=../compat/%)
 SLIBOBJS-$(HAVE_GNU_WINDRES)            += avutilres.o
 
 SKIPHEADERS-$(HAVE_CUDA_H)             += hwcontext_cuda.h
-SKIPHEADERS-$(CONFIG_CUDA)             += hwcontext_cuda_internal.h
+SKIPHEADERS-$(CONFIG_CUDA)             += hwcontext_cuda_internal.h     \
+                                          cuda_check.h
 SKIPHEADERS-$(CONFIG_D3D11VA)          += hwcontext_d3d11va.h
 SKIPHEADERS-$(CONFIG_DXVA2)            += hwcontext_dxva2.h
 SKIPHEADERS-$(CONFIG_QSV)              += hwcontext_qsv.h
diff --git a/libavutil/aarch64/asm.S b/libavutil/aarch64/asm.S
index fd32bf784e095..5c329430fdefc 100644
--- a/libavutil/aarch64/asm.S
+++ b/libavutil/aarch64/asm.S
@@ -63,6 +63,8 @@ ELF     .size   \name, . - \name
 .else
         .section        .rodata
 .endif
+#elif defined(_WIN32)
+        .section        .rdata
 #elif !defined(__MACH__)
         .section        .rodata
 #else
diff --git a/libavutil/arm/asm.S b/libavutil/arm/asm.S
index 6744f2a200490..e3a8c7f065831 100644
--- a/libavutil/arm/asm.S
+++ b/libavutil/arm/asm.S
@@ -46,6 +46,12 @@
 #   define FPU @
 #endif
 
+#if CONFIG_THUMB && defined(__APPLE__)
+#   define TFUNC
+#else
+#   define TFUNC @
+#endif
+
 #if HAVE_AS_ARCH_DIRECTIVE
 #if   HAVE_NEON
         .arch           armv7-a
@@ -98,10 +104,12 @@ FUNC    .endfunc
         .global EXTERN_ASM\name
 ELF     .type   EXTERN_ASM\name, %function
 FUNC    .func   EXTERN_ASM\name
+TFUNC   .thumb_func EXTERN_ASM\name
 EXTERN_ASM\name:
     .else
 ELF     .type   \name, %function
 FUNC    .func   \name
+TFUNC   .thumb_func \name
 \name:
     .endif
 .endm
@@ -117,6 +125,8 @@ ELF     .size   \name, . - \name
 .else
         .section        .rodata
 .endif
+#elif defined(_WIN32)
+        .section        .rdata
 #elif !defined(__MACH__)
         .section        .rodata
 #else
diff --git a/libavutil/avsscanf.c b/libavutil/avsscanf.c
new file mode 100644
index 0000000000000..1c85412fd4d59
--- /dev/null
+++ b/libavutil/avsscanf.c
@@ -0,0 +1,970 @@
+/*
+ * Copyright (c) 2005-2014 Rich Felker, et al.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <float.h>
+
+#include "config.h"
+#include "common.h"
+#include "mem.h"
+#include "avassert.h"
+#include "avstring.h"
+#include "bprint.h"
+
+typedef struct FFFILE {
+    size_t buf_size;
+    unsigned char *buf;
+    unsigned char *rpos, *rend;
+    unsigned char *shend;
+    ptrdiff_t shlim, shcnt;
+    void *cookie;
+    size_t (*read)(struct FFFILE *, unsigned char *, size_t);
+} FFFILE;
+
+#define SIZE_hh -2
+#define SIZE_h  -1
+#define SIZE_def 0
+#define SIZE_l   1
+#define SIZE_L   2
+#define SIZE_ll  3
+
+#define shcnt(f) ((f)->shcnt + ((f)->rpos - (f)->buf))
+
+static int fftoread(FFFILE *f)
+{
+    f->rpos = f->rend = f->buf + f->buf_size;
+    return 0;
+}
+
+static size_t ffstring_read(FFFILE *f, unsigned char *buf, size_t len)
+{
+    char *src = f->cookie;
+    size_t k = len+256;
+    char *end = memchr(src, 0, k);
+
+    if (end) k = end-src;
+    if (k < len) len = k;
+    memcpy(buf, src, len);
+    f->rpos = (void *)(src+len);
+    f->rend = (void *)(src+k);
+    f->cookie = src+k;
+
+    return len;
+}
+
+static int ffuflow(FFFILE *f)
+{
+    unsigned char c;
+    if (!fftoread(f) && f->read(f, &c, 1)==1) return c;
+    return EOF;
+}
+
+static void ffshlim(FFFILE *f, ptrdiff_t lim)
+{
+    f->shlim = lim;
+    f->shcnt = f->buf - f->rpos;
+    /* If lim is nonzero, rend must be a valid pointer. */
+    if (lim && f->rend - f->rpos > lim)
+        f->shend = f->rpos + lim;
+    else
+        f->shend = f->rend;
+}
+
+static int ffshgetc(FFFILE *f)
+{
+    int c;
+    ptrdiff_t cnt = shcnt(f);
+    if (f->shlim && cnt >= f->shlim || (c=ffuflow(f)) < 0) {
+        f->shcnt = f->buf - f->rpos + cnt;
+        f->shend = 0;
+        return EOF;
+    }
+    cnt++;
+    if (f->shlim && f->rend - f->rpos > f->shlim - cnt)
+        f->shend = f->rpos + (f->shlim - cnt);
+    else
+        f->shend = f->rend;
+    f->shcnt = f->buf - f->rpos + cnt;
+    if (f->rpos[-1] != c) f->rpos[-1] = c;
+    return c;
+}
+
+#define shlim(f, lim) ffshlim((f), (lim))
+#define shgetc(f) (((f)->rpos != (f)->shend) ? *(f)->rpos++ : ffshgetc(f))
+#define shunget(f) ((f)->shend ? (void)(f)->rpos-- : (void)0)
+
+static const unsigned char table[] = { -1,
+    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+    0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,
+    -1,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,
+    25,26,27,28,29,30,31,32,33,34,35,-1,-1,-1,-1,-1,
+    -1,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,
+    25,26,27,28,29,30,31,32,33,34,35,-1,-1,-1,-1,-1,
+    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+};
+
+static unsigned long long ffintscan(FFFILE *f, unsigned base, int pok, unsigned long long lim)
+{
+    const unsigned char *val = table+1;
+    int c, neg=0;
+    unsigned x;
+    unsigned long long y;
+    if (base > 36 || base == 1) {
+        errno = EINVAL;
+        return 0;
+    }
+    while (av_isspace((c=shgetc(f))));
+    if (c=='+' || c=='-') {
+        neg = -(c=='-');
+        c = shgetc(f);
+    }
+    if ((base == 0 || base == 16) && c=='0') {
+        c = shgetc(f);
+        if ((c|32)=='x') {
+            c = shgetc(f);
+            if (val[c]>=16) {
+                shunget(f);
+                if (pok) shunget(f);
+                else shlim(f, 0);
+                return 0;
+            }
+            base = 16;
+        } else if (base == 0) {
+            base = 8;
+        }
+    } else {
+        if (base == 0) base = 10;
+        if (val[c] >= base) {
+            shunget(f);
+            shlim(f, 0);
+            errno = EINVAL;
+            return 0;
+        }
+    }
+    if (base == 10) {
+        for (x=0; c-'0'<10U && x<=UINT_MAX/10-1; c=shgetc(f))
+            x = x*10 + (c-'0');
+        for (y=x; c-'0'<10U && y<=ULLONG_MAX/10 && 10*y<=ULLONG_MAX-(c-'0'); c=shgetc(f))
+            y = y*10 + (c-'0');
+        if (c-'0'>=10U) goto done;
+    } else if (!(base & base-1)) {
+        int bs = "\0\1\2\4\7\3\6\5"[(0x17*base)>>5&7];
+        for (x=0; val[c]<base && x<=UINT_MAX/32; c=shgetc(f))
+            x = x<<bs | val[c];
+        for (y=x; val[c]<base && y<=ULLONG_MAX>>bs; c=shgetc(f))
+            y = y<<bs | val[c];
+    } else {
+        for (x=0; val[c]<base && x<=UINT_MAX/36-1; c=shgetc(f))
+            x = x*base + val[c];
+        for (y=x; val[c]<base && y<=ULLONG_MAX/base && base*y<=ULLONG_MAX-val[c]; c=shgetc(f))
+            y = y*base + val[c];
+    }
+    if (val[c]<base) {
+        for (; val[c]<base; c=shgetc(f));
+        errno = ERANGE;
+        y = lim;
+        if (lim&1) neg = 0;
+    }
+done:
+    shunget(f);
+    if (y>=lim) {
+        if (!(lim&1) && !neg) {
+            errno = ERANGE;
+            return lim-1;
+        } else if (y>lim) {
+            errno = ERANGE;
+            return lim;
+        }
+    }
+    return (y^neg)-neg;
+}
+
+static long long scanexp(FFFILE *f, int pok)
+{
+    int c;
+    int x;
+    long long y;
+    int neg = 0;
+
+    c = shgetc(f);
+    if (c=='+' || c=='-') {
+        neg = (c=='-');
+        c = shgetc(f);
+        if (c-'0'>=10U && pok) shunget(f);
+    }
+    if (c-'0'>=10U) {
+        shunget(f);
+        return LLONG_MIN;
+    }
+    for (x=0; c-'0'<10U && x<INT_MAX/10; c = shgetc(f))
+        x = 10*x + c-'0';
+    for (y=x; c-'0'<10U && y<LLONG_MAX/100; c = shgetc(f))
+        y = 10*y + c-'0';
+    for (; c-'0'<10U; c = shgetc(f));
+    shunget(f);
+    return neg ? -y : y;
+}
+
+#define LD_B1B_DIG 2
+#define LD_B1B_MAX 9007199, 254740991
+#define KMAX 128
+#define MASK (KMAX-1)
+
+static double decfloat(FFFILE *f, int c, int bits, int emin, int sign, int pok)
+{
+    uint32_t x[KMAX];
+    static const uint32_t th[] = { LD_B1B_MAX };
+    int i, j, k, a, z;
+    long long lrp=0, dc=0;
+    long long e10=0;
+    int lnz = 0;
+    int gotdig = 0, gotrad = 0;
+    int rp;
+    int e2;
+    int emax = -emin-bits+3;
+    int denormal = 0;
+    double y;
+    double frac=0;
+    double bias=0;
+    static const int p10s[] = { 10, 100, 1000, 10000,
+        100000, 1000000, 10000000, 100000000 };
+
+    j=0;
+    k=0;
+
+    /* Don't let leading zeros consume buffer space */
+    for (; c=='0'; c = shgetc(f)) gotdig=1;
+    if (c=='.') {
+        gotrad = 1;
+        for (c = shgetc(f); c=='0'; c = shgetc(f)) gotdig=1, lrp--;
+    }
+
+    x[0] = 0;
+    for (; c-'0'<10U || c=='.'; c = shgetc(f)) {
+        if (c == '.') {
+            if (gotrad) break;
+            gotrad = 1;
+            lrp = dc;
+        } else if (k < KMAX-3) {
+            dc++;
+            if (c!='0') lnz = dc;
+            if (j) x[k] = x[k]*10 + c-'0';
+            else x[k] = c-'0';
+            if (++j==9) {
+                k++;
+                j=0;
+            }
+            gotdig=1;
+        } else {
+            dc++;
+            if (c!='0') {
+                lnz = (KMAX-4)*9;
+                x[KMAX-4] |= 1;
+            }
+        }
+    }
+    if (!gotrad) lrp=dc;
+
+    if (gotdig && (c|32)=='e') {
+        e10 = scanexp(f, pok);
+        if (e10 == LLONG_MIN) {
+            if (pok) {
+                shunget(f);
+            } else {
+                shlim(f, 0);
+                return 0;
+            }
+            e10 = 0;
+        }
+        lrp += e10;
+    } else if (c>=0) {
+        shunget(f);
+    }
+    if (!gotdig) {
+        errno = EINVAL;
+        shlim(f, 0);
+        return 0;
+    }
+
+    /* Handle zero specially to avoid nasty special cases later */
+    if (!x[0]) return sign * 0.0;
+
+    /* Optimize small integers (w/no exponent) and over/under-flow */
+    if (lrp==dc && dc<10 && (bits>30 || x[0]>>bits==0))
+        return sign * (double)x[0];
+    if (lrp > -emin/2) {
+        errno = ERANGE;
+        return sign * DBL_MAX * DBL_MAX;
+    }
+    if (lrp < emin-2*DBL_MANT_DIG) {
+        errno = ERANGE;
+        return sign * DBL_MIN * DBL_MIN;
+    }
+
+    /* Align incomplete final B1B digit */
+    if (j) {
+        for (; j<9; j++) x[k]*=10;
+        k++;
+        j=0;
+    }
+
+    a = 0;
+    z = k;
+    e2 = 0;
+    rp = lrp;
+
+    /* Optimize small to mid-size integers (even in exp. notation) */
+    if (lnz<9 && lnz<=rp && rp < 18) {
+        int bitlim;
+        if (rp == 9) return sign * (double)x[0];
+        if (rp < 9) return sign * (double)x[0] / p10s[8-rp];
+        bitlim = bits-3*(int)(rp-9);
+        if (bitlim>30 || x[0]>>bitlim==0)
+            return sign * (double)x[0] * p10s[rp-10];
+    }
+
+    /* Drop trailing zeros */
+    for (; !x[z-1]; z--);
+
+    /* Align radix point to B1B digit boundary */
+    if (rp % 9) {
+        int rpm9 = rp>=0 ? rp%9 : rp%9+9;
+        int p10 = p10s[8-rpm9];
+        uint32_t carry = 0;
+        for (k=a; k!=z; k++) {
+            uint32_t tmp = x[k] % p10;
+            x[k] = x[k]/p10 + carry;
+            carry = 1000000000/p10 * tmp;
+            if (k==a && !x[k]) {
+                a = (a+1 & MASK);
+                rp -= 9;
+            }
+        }
+        if (carry) x[z++] = carry;
+        rp += 9-rpm9;
+    }
+
+    /* Upscale until desired number of bits are left of radix point */
+    while (rp < 9*LD_B1B_DIG || (rp == 9*LD_B1B_DIG && x[a]<th[0])) {
+        uint32_t carry = 0;
+        e2 -= 29;
+        for (k=(z-1 & MASK); ; k=(k-1 & MASK)) {
+            uint64_t tmp = ((uint64_t)x[k] << 29) + carry;
+            if (tmp > 1000000000) {
+                carry = tmp / 1000000000;
+                x[k] = tmp % 1000000000;
+            } else {
+                carry = 0;
+                x[k] = tmp;
+            }
+            if (k==(z-1 & MASK) && k!=a && !x[k]) z = k;
+            if (k==a) break;
+        }
+        if (carry) {
+            rp += 9;
+            a = (a-1 & MASK);
+            if (a == z) {
+                z = (z-1 & MASK);
+                x[z-1 & MASK] |= x[z];
+            }
+            x[a] = carry;
+        }
+    }
+
+    /* Downscale until exactly number of bits are left of radix point */
+    for (;;) {
+        uint32_t carry = 0;
+        int sh = 1;
+        for (i=0; i<LD_B1B_DIG; i++) {
+            k = (a+i & MASK);
+            if (k == z || x[k] < th[i]) {
+                i=LD_B1B_DIG;
+                break;
+            }
+            if (x[a+i & MASK] > th[i]) break;
+        }
+        if (i==LD_B1B_DIG && rp==9*LD_B1B_DIG) break;
+        /* FIXME: find a way to compute optimal sh */
+        if (rp > 9+9*LD_B1B_DIG) sh = 9;
+        e2 += sh;
+        for (k=a; k!=z; k=(k+1 & MASK)) {
+            uint32_t tmp = x[k] & (1<<sh)-1;
+            x[k] = (x[k]>>sh) + carry;
+            carry = (1000000000>>sh) * tmp;
+            if (k==a && !x[k]) {
+                a = (a+1 & MASK);
+                i--;
+                rp -= 9;
+            }
+        }
+        if (carry) {
+            if ((z+1 & MASK) != a) {
+                x[z] = carry;
+                z = (z+1 & MASK);
+            } else x[z-1 & MASK] |= 1;
+        }
+    }
+
+    /* Assemble desired bits into floating point variable */
+    for (y=i=0; i<LD_B1B_DIG; i++) {
+        if ((a+i & MASK)==z) x[(z=(z+1 & MASK))-1] = 0;
+        y = 1000000000.0L * y + x[a+i & MASK];
+    }
+
+    y *= sign;
+
+    /* Limit precision for denormal results */
+    if (bits > DBL_MANT_DIG+e2-emin) {
+        bits = DBL_MANT_DIG+e2-emin;
+        if (bits<0) bits=0;
+        denormal = 1;
+    }
+
+    /* Calculate bias term to force rounding, move out lower bits */
+    if (bits < DBL_MANT_DIG) {
+        bias = copysign(scalbn(1, 2*DBL_MANT_DIG-bits-1), y);
+        frac = fmod(y, scalbn(1, DBL_MANT_DIG-bits));
+        y -= frac;
+        y += bias;
+    }
+
+    /* Process tail of decimal input so it can affect rounding */
+    if ((a+i & MASK) != z) {
+        uint32_t t = x[a+i & MASK];
+        if (t < 500000000 && (t || (a+i+1 & MASK) != z))
+            frac += 0.25*sign;
+        else if (t > 500000000)
+            frac += 0.75*sign;
+        else if (t == 500000000) {
+            if ((a+i+1 & MASK) == z)
+                frac += 0.5*sign;
+            else
+                frac += 0.75*sign;
+        }
+        if (DBL_MANT_DIG-bits >= 2 && !fmod(frac, 1))
+            frac++;
+    }
+
+    y += frac;
+    y -= bias;
+
+    if ((e2+DBL_MANT_DIG & INT_MAX) > emax-5) {
+        if (fabs(y) >= pow(2, DBL_MANT_DIG)) {
+            if (denormal && bits==DBL_MANT_DIG+e2-emin)
+                denormal = 0;
+            y *= 0.5;
+            e2++;
+        }
+        if (e2+DBL_MANT_DIG>emax || (denormal && frac))
+            errno = ERANGE;
+    }
+
+    return scalbn(y, e2);
+}
+
+static double hexfloat(FFFILE *f, int bits, int emin, int sign, int pok)
+{
+    uint32_t x = 0;
+    double y = 0;
+    double scale = 1;
+    double bias = 0;
+    int gottail = 0, gotrad = 0, gotdig = 0;
+    long long rp = 0;
+    long long dc = 0;
+    long long e2 = 0;
+    int d;
+    int c;
+
+    c = shgetc(f);
+
+    /* Skip leading zeros */
+    for (; c=='0'; c = shgetc(f))
+        gotdig = 1;
+
+    if (c=='.') {
+        gotrad = 1;
+        c = shgetc(f);
+        /* Count zeros after the radix point before significand */
+        for (rp=0; c=='0'; c = shgetc(f), rp--) gotdig = 1;
+    }
+
+    for (; c-'0'<10U || (c|32)-'a'<6U || c=='.'; c = shgetc(f)) {
+        if (c=='.') {
+            if (gotrad) break;
+            rp = dc;
+            gotrad = 1;
+        } else {
+            gotdig = 1;
+            if (c > '9') d = (c|32)+10-'a';
+            else d = c-'0';
+            if (dc<8) {
+                x = x*16 + d;
+            } else if (dc < DBL_MANT_DIG/4+1) {
+                y += d*(scale/=16);
+            } else if (d && !gottail) {
+                y += 0.5*scale;
+                gottail = 1;
+            }
+            dc++;
+        }
+    }
+    if (!gotdig) {
+        shunget(f);
+        if (pok) {
+            shunget(f);
+            if (gotrad) shunget(f);
+        } else {
+            shlim(f, 0);
+        }
+        return sign * 0.0;
+    }
+    if (!gotrad) rp = dc;
+    while (dc<8) x *= 16, dc++;
+    if ((c|32)=='p') {
+        e2 = scanexp(f, pok);
+        if (e2 == LLONG_MIN) {
+            if (pok) {
+                shunget(f);
+            } else {
+                shlim(f, 0);
+                return 0;
+            }
+            e2 = 0;
+        }
+    } else {
+        shunget(f);
+    }
+    e2 += 4*rp - 32;
+
+    if (!x) return sign * 0.0;
+    if (e2 > -emin) {
+        errno = ERANGE;
+        return sign * DBL_MAX * DBL_MAX;
+    }
+    if (e2 < emin-2*DBL_MANT_DIG) {
+        errno = ERANGE;
+        return sign * DBL_MIN * DBL_MIN;
+    }
+
+    while (x < 0x80000000) {
+        if (y>=0.5) {
+            x += x + 1;
+            y += y - 1;
+        } else {
+            x += x;
+            y += y;
+        }
+        e2--;
+    }
+
+    if (bits > 32+e2-emin) {
+        bits = 32+e2-emin;
+        if (bits<0) bits=0;
+    }
+
+    if (bits < DBL_MANT_DIG)
+        bias = copysign(scalbn(1, 32+DBL_MANT_DIG-bits-1), sign);
+
+    if (bits<32 && y && !(x&1)) x++, y=0;
+
+    y = bias + sign*(double)x + sign*y;
+    y -= bias;
+
+    if (!y) errno = ERANGE;
+
+    return scalbn(y, e2);
+}
+
+static double fffloatscan(FFFILE *f, int prec, int pok)
+{
+    int sign = 1;
+    size_t i;
+    int bits;
+    int emin;
+    int c;
+
+    switch (prec) {
+    case 0:
+        bits = FLT_MANT_DIG;
+        emin = FLT_MIN_EXP-bits;
+        break;
+    case 1:
+        bits = DBL_MANT_DIG;
+        emin = DBL_MIN_EXP-bits;
+        break;
+    case 2:
+        bits = DBL_MANT_DIG;
+        emin = DBL_MIN_EXP-bits;
+        break;
+    default:
+        return 0;
+    }
+
+    while (av_isspace((c = shgetc(f))));
+
+    if (c=='+' || c=='-') {
+        sign -= 2*(c=='-');
+        c = shgetc(f);
+    }
+
+    for (i=0; i<8 && (c|32)=="infinity"[i]; i++)
+        if (i<7) c = shgetc(f);
+    if (i==3 || i==8 || (i>3 && pok)) {
+        if (i!=8) {
+            shunget(f);
+            if (pok) for (; i>3; i--) shunget(f);
+        }
+        return sign * INFINITY;
+    }
+    if (!i) for (i=0; i<3 && (c|32)=="nan"[i]; i++)
+        if (i<2) c = shgetc(f);
+    if (i==3) {
+        if (shgetc(f) != '(') {
+            shunget(f);
+            return NAN;
+        }
+        for (i=1; ; i++) {
+            c = shgetc(f);
+            if (c-'0'<10U || c-'A'<26U || c-'a'<26U || c=='_')
+                continue;
+            if (c==')') return NAN;
+            shunget(f);
+            if (!pok) {
+                errno = EINVAL;
+                shlim(f, 0);
+                return 0;
+            }
+            while (i--) shunget(f);
+            return NAN;
+        }
+        return NAN;
+    }
+
+    if (i) {
+        shunget(f);
+        errno = EINVAL;
+        shlim(f, 0);
+        return 0;
+    }
+
+    if (c=='0') {
+        c = shgetc(f);
+        if ((c|32) == 'x')
+            return hexfloat(f, bits, emin, sign, pok);
+        shunget(f);
+        c = '0';
+    }
+
+    return decfloat(f, c, bits, emin, sign, pok);
+}
+
+static void *arg_n(va_list ap, unsigned int n)
+{
+    void *p;
+    unsigned int i;
+    va_list ap2;
+    va_copy(ap2, ap);
+    for (i=n; i>1; i--) va_arg(ap2, void *);
+    p = va_arg(ap2, void *);
+    va_end(ap2);
+    return p;
+}
+
+static void store_int(void *dest, int size, unsigned long long i)
+{
+    if (!dest) return;
+    switch (size) {
+    case SIZE_hh:
+        *(char *)dest = i;
+        break;
+    case SIZE_h:
+        *(short *)dest = i;
+        break;
+    case SIZE_def:
+        *(int *)dest = i;
+        break;
+    case SIZE_l:
+        *(long *)dest = i;
+        break;
+    case SIZE_ll:
+        *(long long *)dest = i;
+        break;
+    }
+}
+
+static int ff_vfscanf(FFFILE *f, const char *fmt, va_list ap)
+{
+    int width;
+    int size;
+    int base;
+    const unsigned char *p;
+    int c, t;
+    char *s;
+    void *dest=NULL;
+    int invert;
+    int matches=0;
+    unsigned long long x;
+    double y;
+    ptrdiff_t pos = 0;
+    unsigned char scanset[257];
+    size_t i;
+
+    for (p=(const unsigned char *)fmt; *p; p++) {
+
+        if (av_isspace(*p)) {
+            while (av_isspace(p[1])) p++;
+            shlim(f, 0);
+            while (av_isspace(shgetc(f)));
+            shunget(f);
+            pos += shcnt(f);
+            continue;
+        }
+        if (*p != '%' || p[1] == '%') {
+            shlim(f, 0);
+            if (*p == '%') {
+                p++;
+                while (av_isspace((c=shgetc(f))));
+            } else {
+                c = shgetc(f);
+            }
+            if (c!=*p) {
+                shunget(f);
+                if (c<0) goto input_fail;
+                goto match_fail;
+            }
+            pos += shcnt(f);
+            continue;
+        }
+
+        p++;
+        if (*p=='*') {
+            dest = 0; p++;
+        } else if (av_isdigit(*p) && p[1]=='$') {
+            dest = arg_n(ap, *p-'0'); p+=2;
+        } else {
+            dest = va_arg(ap, void *);
+        }
+
+        for (width=0; av_isdigit(*p); p++) {
+            width = 10*width + *p - '0';
+        }
+
+        if (*p=='m') {
+            s = 0;
+            p++;
+        }
+
+        size = SIZE_def;
+        switch (*p++) {
+        case 'h':
+            if (*p == 'h') p++, size = SIZE_hh;
+            else size = SIZE_h;
+            break;
+        case 'l':
+            if (*p == 'l') p++, size = SIZE_ll;
+            else size = SIZE_l;
+            break;
+        case 'j':
+            size = SIZE_ll;
+            break;
+        case 'z':
+        case 't':
+            size = SIZE_l;
+            break;
+        case 'L':
+            size = SIZE_L;
+            break;
+        case 'd': case 'i': case 'o': case 'u': case 'x':
+        case 'a': case 'e': case 'f': case 'g':
+        case 'A': case 'E': case 'F': case 'G': case 'X':
+        case 's': case 'c': case '[':
+        case 'S': case 'C':
+        case 'p': case 'n':
+            p--;
+            break;
+        default:
+            goto fmt_fail;
+        }
+
+        t = *p;
+
+        /* C or S */
+        if ((t&0x2f) == 3) {
+            t |= 32;
+            size = SIZE_l;
+        }
+
+        switch (t) {
+            case 'c':
+                if (width < 1) width = 1;
+            case '[':
+                break;
+            case 'n':
+                store_int(dest, size, pos);
+                /* do not increment match count, etc! */
+                continue;
+            default:
+                shlim(f, 0);
+                while (av_isspace(shgetc(f)));
+                shunget(f);
+                pos += shcnt(f);
+        }
+
+        shlim(f, width);
+        if (shgetc(f) < 0) goto input_fail;
+        shunget(f);
+
+        switch (t) {
+            case 's':
+            case 'c':
+            case '[':
+                if (t == 'c' || t == 's') {
+                    memset(scanset, -1, sizeof scanset);
+                    scanset[0] = 0;
+                    if (t == 's') {
+                        scanset[1 + '\t'] = 0;
+                        scanset[1 + '\n'] = 0;
+                        scanset[1 + '\v'] = 0;
+                        scanset[1 + '\f'] = 0;
+                        scanset[1 + '\r'] = 0;
+                        scanset[1 + ' ' ] = 0;
+                    }
+                } else {
+                    if (*++p == '^') p++, invert = 1;
+                    else invert = 0;
+                    memset(scanset, invert, sizeof scanset);
+                    scanset[0] = 0;
+                    if (*p == '-') p++, scanset[1+'-'] = 1-invert;
+                    else if (*p == ']') p++, scanset[1+']'] = 1-invert;
+                    for (; *p != ']'; p++) {
+                        if (!*p) goto fmt_fail;
+                        if (*p=='-' && p[1] && p[1] != ']')
+                            for (c=p++[-1]; c<*p; c++)
+                                scanset[1+c] = 1-invert;
+                        scanset[1+*p] = 1-invert;
+                    }
+                }
+                s = 0;
+                i = 0;
+                if ((s = dest)) {
+                    while (scanset[(c=shgetc(f))+1])
+                        s[i++] = c;
+                } else {
+                    while (scanset[(c=shgetc(f))+1]);
+                }
+                shunget(f);
+                if (!shcnt(f)) goto match_fail;
+                if (t == 'c' && shcnt(f) != width) goto match_fail;
+                if (t != 'c') {
+                    if (s) s[i] = 0;
+                }
+                break;
+            case 'p':
+            case 'X':
+            case 'x':
+                base = 16;
+                goto int_common;
+            case 'o':
+                base = 8;
+                goto int_common;
+            case 'd':
+            case 'u':
+                base = 10;
+                goto int_common;
+            case 'i':
+                base = 0;
+int_common:
+                x = ffintscan(f, base, 0, ULLONG_MAX);
+                if (!shcnt(f))
+                    goto match_fail;
+                if (t=='p' && dest)
+                    *(void **)dest = (void *)(uintptr_t)x;
+                else
+                    store_int(dest, size, x);
+                break;
+            case 'a': case 'A':
+            case 'e': case 'E':
+            case 'f': case 'F':
+            case 'g': case 'G':
+                y = fffloatscan(f, size, 0);
+                if (!shcnt(f))
+                    goto match_fail;
+                if (dest) {
+                    switch (size) {
+                    case SIZE_def:
+                        *(float *)dest = y;
+                        break;
+                    case SIZE_l:
+                        *(double *)dest = y;
+                        break;
+                    case SIZE_L:
+                        *(double *)dest = y;
+                        break;
+                    }
+                }
+                break;
+        }
+
+        pos += shcnt(f);
+        if (dest) matches++;
+    }
+    if (0) {
+fmt_fail:
+input_fail:
+        if (!matches) matches--;
+    }
+match_fail:
+    return matches;
+}
+
+static int ff_vsscanf(const char *s, const char *fmt, va_list ap)
+{
+    FFFILE f = {
+        .buf = (void *)s, .cookie = (void *)s,
+        .read = ffstring_read,
+    };
+
+    return ff_vfscanf(&f, fmt, ap);
+}
+
+int av_sscanf(const char *string, const char *format, ...)
+{
+    int ret;
+    va_list ap;
+    va_start(ap, format);
+    ret = ff_vsscanf(string, format, ap);
+    va_end(ap);
+    return ret;
+}
diff --git a/libavutil/avstring.h b/libavutil/avstring.h
index 04d2695640a11..37dd4e2da0fc8 100644
--- a/libavutil/avstring.h
+++ b/libavutil/avstring.h
@@ -400,6 +400,12 @@ int av_utf8_decode(int32_t *codep, const uint8_t **bufp, const uint8_t *buf_end,
  */
 int av_match_list(const char *name, const char *list, char separator);
 
+/**
+ * See libc sscanf manual for more information.
+ * Locale-independent sscanf implementation.
+ */
+int av_sscanf(const char *string, const char *format, ...);
+
 /**
  * @}
  */
diff --git a/libavutil/cuda_check.h b/libavutil/cuda_check.h
new file mode 100644
index 0000000000000..d02ea7eec4a3e
--- /dev/null
+++ b/libavutil/cuda_check.h
@@ -0,0 +1,64 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+
+#ifndef AVUTIL_CUDA_CHECK_H
+#define AVUTIL_CUDA_CHECK_H
+
+typedef CUresult CUDAAPI cuda_check_GetErrorName(CUresult error, const char** pstr);
+typedef CUresult CUDAAPI cuda_check_GetErrorString(CUresult error, const char** pstr);
+
+/**
+ * Wrap a CUDA function call and print error information if it fails.
+ */
+static inline int ff_cuda_check(void *avctx,
+                                void *cuGetErrorName_fn, void *cuGetErrorString_fn,
+                                CUresult err, const char *func)
+{
+    const char *err_name;
+    const char *err_string;
+
+    av_log(avctx, AV_LOG_TRACE, "Calling %s\n", func);
+
+    if (err == CUDA_SUCCESS)
+        return 0;
+
+    ((cuda_check_GetErrorName *)cuGetErrorName_fn)(err, &err_name);
+    ((cuda_check_GetErrorString *)cuGetErrorString_fn)(err, &err_string);
+
+    av_log(avctx, AV_LOG_ERROR, "%s failed", func);
+    if (err_name && err_string)
+        av_log(avctx, AV_LOG_ERROR, " -> %s: %s", err_name, err_string);
+    av_log(avctx, AV_LOG_ERROR, "\n");
+
+    return AVERROR_EXTERNAL;
+}
+
+/**
+ * Convenience wrapper for ff_cuda_check when directly linking libcuda.
+ */
+
+#define FF_CUDA_CHECK(avclass, x) ff_cuda_check(avclass, cuGetErrorName, cuGetErrorString, (x), #x)
+
+/**
+ * Convenience wrapper for ff_cuda_check when dynamically loading cuda symbols.
+ */
+
+#define FF_CUDA_CHECK_DL(avclass, cudl, x) ff_cuda_check(avclass, cudl->cuGetErrorName, cudl->cuGetErrorString, (x), #x)
+
+#endif /* AVUTIL_CUDA_CHECK_H */
diff --git a/libavutil/file_open.c b/libavutil/file_open.c
index a8da283583b61..cc302f2f768d0 100644
--- a/libavutil/file_open.c
+++ b/libavutil/file_open.c
@@ -138,7 +138,7 @@ int avpriv_tempfile(const char *prefix, char **filename, int log_offset, void *l
 #else
     snprintf(*filename, len, "/tmp/%sXXXXXX", prefix);
     fd = mkstemp(*filename);
-#if defined(_WIN32) || defined (__ANDROID__)
+#if defined(_WIN32) || defined (__ANDROID__) || defined(__DJGPP__)
     if (fd < 0) {
         snprintf(*filename, len, "./%sXXXXXX", prefix);
         fd = mkstemp(*filename);
diff --git a/libavutil/frame.c b/libavutil/frame.c
index 4460325a9bba8..dcf1fc3d17b53 100644
--- a/libavutil/frame.c
+++ b/libavutil/frame.c
@@ -243,11 +243,13 @@ static int get_video_buffer(AVFrame *frame, int align)
         return ret;
 
     frame->buf[0] = av_buffer_alloc(ret + 4*plane_padding);
-    if (!frame->buf[0])
+    if (!frame->buf[0]) {
+        ret = AVERROR(ENOMEM);
         goto fail;
+    }
 
-    if (av_image_fill_pointers(frame->data, frame->format, padded_height,
-                               frame->buf[0]->data, frame->linesize) < 0)
+    if ((ret = av_image_fill_pointers(frame->data, frame->format, padded_height,
+                                      frame->buf[0]->data, frame->linesize)) < 0)
         goto fail;
 
     for (i = 1; i < 4; i++) {
@@ -260,7 +262,7 @@ static int get_video_buffer(AVFrame *frame, int align)
     return 0;
 fail:
     av_frame_unref(frame);
-    return AVERROR(ENOMEM);
+    return ret;
 }
 
 static int get_audio_buffer(AVFrame *frame, int align)
@@ -831,12 +833,15 @@ const char *av_frame_side_data_name(enum AVFrameSideDataType type)
     case AV_FRAME_DATA_MASTERING_DISPLAY_METADATA:  return "Mastering display metadata";
     case AV_FRAME_DATA_CONTENT_LIGHT_LEVEL:         return "Content light level metadata";
     case AV_FRAME_DATA_GOP_TIMECODE:                return "GOP timecode";
+    case AV_FRAME_DATA_S12M_TIMECODE:               return "SMPTE 12-1 timecode";
     case AV_FRAME_DATA_SPHERICAL:                   return "Spherical Mapping";
     case AV_FRAME_DATA_ICC_PROFILE:                 return "ICC profile";
 #if FF_API_FRAME_QP
     case AV_FRAME_DATA_QP_TABLE_PROPERTIES:         return "QP table properties";
     case AV_FRAME_DATA_QP_TABLE_DATA:               return "QP table data";
 #endif
+    case AV_FRAME_DATA_DYNAMIC_HDR_PLUS: return "HDR Dynamic Metadata SMPTE2094-40 (HDR10+)";
+    case AV_FRAME_DATA_REGIONS_OF_INTEREST: return "Regions Of Interest";
     }
     return NULL;
 }
diff --git a/libavutil/frame.h b/libavutil/frame.h
index 9d57d6ce66ff9..8aa3e88367ad3 100644
--- a/libavutil/frame.h
+++ b/libavutil/frame.h
@@ -158,6 +158,27 @@ enum AVFrameSideDataType {
      */
     AV_FRAME_DATA_QP_TABLE_DATA,
 #endif
+
+    /**
+     * Timecode which conforms to SMPTE ST 12-1. The data is an array of 4 uint32_t
+     * where the first uint32_t describes how many (1-3) of the other timecodes are used.
+     * The timecode format is described in the av_timecode_get_smpte_from_framenum()
+     * function in libavutil/timecode.c.
+     */
+    AV_FRAME_DATA_S12M_TIMECODE,
+
+    /**
+     * HDR dynamic metadata associated with a video frame. The payload is
+     * an AVDynamicHDRPlus type and contains information for color
+     * volume transform - application 4 of SMPTE 2094-40:2016 standard.
+     */
+    AV_FRAME_DATA_DYNAMIC_HDR_PLUS,
+
+    /**
+     * Regions Of Interest, the data is an array of AVRegionOfInterest type, the number of
+     * array element is implied by AVFrameSideData.size / AVRegionOfInterest.self_size.
+     */
+    AV_FRAME_DATA_REGIONS_OF_INTEREST,
 };
 
 enum AVActiveFormatDescription {
@@ -185,6 +206,35 @@ typedef struct AVFrameSideData {
     AVBufferRef *buf;
 } AVFrameSideData;
 
+/**
+ * Structure to hold Region Of Interest.
+ *
+ * self_size specifies the size of this data structure. This value
+ * should be set to sizeof(AVRegionOfInterest). EINVAL is returned if self_size is zero.
+ *
+ * Number of pixels to discard from the top/bottom/left/right border of
+ * the frame to obtain the region of interest of the frame.
+ * They are encoder dependent and will be extended internally
+ * if the codec requires an alignment.
+ * If the regions overlap, the last value in the list will be used.
+ *
+ * qoffset is quant offset, and base rule here:
+ * returns EINVAL if AVRational.den is zero.
+ * the value (num/den) range is [-1.0, 1.0], clamp to +-1.0 if out of range.
+ * 0 means no picture quality change,
+ * negative offset asks for better quality (and the best with value -1.0),
+ * positive offset asks for worse quality (and the worst with value 1.0).
+ * How to explain/implement the different quilaity requirement is encoder dependent.
+ */
+typedef struct AVRegionOfInterest {
+    uint32_t self_size;
+    int top;
+    int bottom;
+    int left;
+    int right;
+    AVRational qoffset;
+} AVRegionOfInterest;
+
 /**
  * This structure describes decoded (raw) audio or video data.
  *
@@ -381,7 +431,6 @@ typedef struct AVFrame {
      * that time,
      * the decoder reorders values as needed and sets AVFrame.reordered_opaque
      * to exactly one of the values provided by the user through AVCodecContext.reordered_opaque
-     * @deprecated in favor of pkt_pts
      */
     int64_t reordered_opaque;
 
diff --git a/libavutil/hdr_dynamic_metadata.c b/libavutil/hdr_dynamic_metadata.c
new file mode 100644
index 0000000000000..0fa1ee82de2e5
--- /dev/null
+++ b/libavutil/hdr_dynamic_metadata.c
@@ -0,0 +1,47 @@
+/**
+ * Copyright (c) 2018 Mohammad Izadi <moh.izadi at gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "hdr_dynamic_metadata.h"
+#include "mem.h"
+
+AVDynamicHDRPlus *av_dynamic_hdr_plus_alloc(size_t *size)
+{
+    AVDynamicHDRPlus *hdr_plus = av_mallocz(sizeof(AVDynamicHDRPlus));
+    if (!hdr_plus)
+        return NULL;
+
+    if (size)
+        *size = sizeof(*hdr_plus);
+
+    return hdr_plus;
+}
+
+AVDynamicHDRPlus *av_dynamic_hdr_plus_create_side_data(AVFrame *frame)
+{
+    AVFrameSideData *side_data = av_frame_new_side_data(frame,
+                                                        AV_FRAME_DATA_DYNAMIC_HDR_PLUS,
+                                                        sizeof(AVDynamicHDRPlus));
+    if (!side_data)
+        return NULL;
+
+    memset(side_data->data, 0, sizeof(AVDynamicHDRPlus));
+
+    return (AVDynamicHDRPlus *)side_data->data;
+}
diff --git a/libavutil/hdr_dynamic_metadata.h b/libavutil/hdr_dynamic_metadata.h
new file mode 100644
index 0000000000000..2d72de56aecd8
--- /dev/null
+++ b/libavutil/hdr_dynamic_metadata.h
@@ -0,0 +1,343 @@
+/*
+ * Copyright (c) 2018 Mohammad Izadi <moh.izadi at gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_HDR_DYNAMIC_METADATA_H
+#define AVUTIL_HDR_DYNAMIC_METADATA_H
+
+#include "frame.h"
+#include "rational.h"
+
+/**
+ * Option for overlapping elliptical pixel selectors in an image.
+ */
+enum AVHDRPlusOverlapProcessOption {
+    AV_HDR_PLUS_OVERLAP_PROCESS_WEIGHTED_AVERAGING = 0,
+    AV_HDR_PLUS_OVERLAP_PROCESS_LAYERING = 1,
+};
+
+/**
+ * Represents the percentile at a specific percentage in
+ * a distribution.
+ */
+typedef struct AVHDRPlusPercentile {
+    /**
+     * The percentage value corresponding to a specific percentile linearized
+     * RGB value in the processing window in the scene. The value shall be in
+     * the range of 0 to100, inclusive.
+     */
+    uint8_t percentage;
+
+    /**
+     * The linearized maxRGB value at a specific percentile in the processing
+     * window in the scene. The value shall be in the range of 0 to 1, inclusive
+     * and in multiples of 0.00001.
+     */
+    AVRational percentile;
+} AVHDRPlusPercentile;
+
+/**
+ * Color transform parameters at a processing window in a dynamic metadata for
+ * SMPTE 2094-40.
+ */
+typedef struct AVHDRPlusColorTransformParams {
+    /**
+     * The relative x coordinate of the top left pixel of the processing
+     * window. The value shall be in the range of 0 and 1, inclusive and
+     * in multiples of 1/(width of Picture - 1). The value 1 corresponds
+     * to the absolute coordinate of width of Picture - 1. The value for
+     * first processing window shall be 0.
+     */
+    AVRational window_upper_left_corner_x;
+
+    /**
+     * The relative y coordinate of the top left pixel of the processing
+     * window. The value shall be in the range of 0 and 1, inclusive and
+     * in multiples of 1/(height of Picture - 1). The value 1 corresponds
+     * to the absolute coordinate of height of Picture - 1. The value for
+     * first processing window shall be 0.
+     */
+    AVRational window_upper_left_corner_y;
+
+    /**
+     * The relative x coordinate of the bottom right pixel of the processing
+     * window. The value shall be in the range of 0 and 1, inclusive and
+     * in multiples of 1/(width of Picture - 1). The value 1 corresponds
+     * to the absolute coordinate of width of Picture - 1. The value for
+     * first processing window shall be 1.
+     */
+    AVRational window_lower_right_corner_x;
+
+    /**
+     * The relative y coordinate of the bottom right pixel of the processing
+     * window. The value shall be in the range of 0 and 1, inclusive and
+     * in multiples of 1/(height of Picture - 1). The value 1 corresponds
+     * to the absolute coordinate of height of Picture - 1. The value for
+     * first processing window shall be 1.
+     */
+    AVRational window_lower_right_corner_y;
+
+    /**
+     * The x coordinate of the center position of the concentric internal and
+     * external ellipses of the elliptical pixel selector in the processing
+     * window. The value shall be in the range of 0 to (width of Picture - 1),
+     * inclusive and in multiples of 1 pixel.
+     */
+    uint16_t center_of_ellipse_x;
+
+    /**
+     * The y coordinate of the center position of the concentric internal and
+     * external ellipses of the elliptical pixel selector in the processing
+     * window. The value shall be in the range of 0 to (height of Picture - 1),
+     * inclusive and in multiples of 1 pixel.
+     */
+    uint16_t center_of_ellipse_y;
+
+    /**
+     * The clockwise rotation angle in degree of arc with respect to the
+     * positive direction of the x-axis of the concentric internal and external
+     * ellipses of the elliptical pixel selector in the processing window. The
+     * value shall be in the range of 0 to 180, inclusive and in multiples of 1.
+     */
+    uint8_t rotation_angle;
+
+    /**
+     * The semi-major axis value of the internal ellipse of the elliptical pixel
+     * selector in amount of pixels in the processing window. The value shall be
+     * in the range of 1 to 65535, inclusive and in multiples of 1 pixel.
+     */
+    uint16_t semimajor_axis_internal_ellipse;
+
+    /**
+     * The semi-major axis value of the external ellipse of the elliptical pixel
+     * selector in amount of pixels in the processing window. The value
+     * shall not be less than semimajor_axis_internal_ellipse of the current
+     * processing window. The value shall be in the range of 1 to 65535,
+     * inclusive and in multiples of 1 pixel.
+     */
+    uint16_t semimajor_axis_external_ellipse;
+
+    /**
+     * The semi-minor axis value of the external ellipse of the elliptical pixel
+     * selector in amount of pixels in the processing window. The value shall be
+     * in the range of 1 to 65535, inclusive and in multiples of 1 pixel.
+     */
+    uint16_t semiminor_axis_external_ellipse;
+
+    /**
+     * Overlap process option indicates one of the two methods of combining
+     * rendered pixels in the processing window in an image with at least one
+     * elliptical pixel selector. For overlapping elliptical pixel selectors
+     * in an image, overlap_process_option shall have the same value.
+     */
+    enum AVHDRPlusOverlapProcessOption overlap_process_option;
+
+    /**
+     * The maximum of the color components of linearized RGB values in the
+     * processing window in the scene. The values should be in the range of 0 to
+     * 1, inclusive and in multiples of 0.00001. maxscl[ 0 ], maxscl[ 1 ], and
+     * maxscl[ 2 ] are corresponding to R, G, B color components respectively.
+     */
+    AVRational maxscl[3];
+
+    /**
+     * The average of linearized maxRGB values in the processing window in the
+     * scene. The value should be in the range of 0 to 1, inclusive and in
+     * multiples of 0.00001.
+     */
+    AVRational average_maxrgb;
+
+    /**
+     * The number of linearized maxRGB values at given percentiles in the
+     * processing window in the scene. The maximum value shall be 15.
+     */
+    uint8_t num_distribution_maxrgb_percentiles;
+
+    /**
+     * The linearized maxRGB values at given percentiles in the
+     * processing window in the scene.
+     */
+    AVHDRPlusPercentile distribution_maxrgb[15];
+
+    /**
+     * The fraction of selected pixels in the image that contains the brightest
+     * pixel in the scene. The value shall be in the range of 0 to 1, inclusive
+     * and in multiples of 0.001.
+     */
+    AVRational fraction_bright_pixels;
+
+    /**
+     * This flag indicates that the metadata for the tone mapping function in
+     * the processing window is present (for value of 1).
+     */
+    uint8_t tone_mapping_flag;
+
+    /**
+     * The x coordinate of the separation point between the linear part and the
+     * curved part of the tone mapping function. The value shall be in the range
+     * of 0 to 1, excluding 0 and in multiples of 1/4095.
+     */
+    AVRational knee_point_x;
+
+    /**
+     * The y coordinate of the separation point between the linear part and the
+     * curved part of the tone mapping function. The value shall be in the range
+     * of 0 to 1, excluding 0 and in multiples of 1/4095.
+     */
+    AVRational knee_point_y;
+
+    /**
+     * The number of the intermediate anchor parameters of the tone mapping
+     * function in the processing window. The maximum value shall be 15.
+     */
+    uint8_t num_bezier_curve_anchors;
+
+    /**
+     * The intermediate anchor parameters of the tone mapping function in the
+     * processing window in the scene. The values should be in the range of 0
+     * to 1, inclusive and in multiples of 1/1023.
+     */
+    AVRational bezier_curve_anchors[15];
+
+    /**
+     * This flag shall be equal to 0 in bitstreams conforming to this version of
+     * this Specification. Other values are reserved for future use.
+     */
+    uint8_t color_saturation_mapping_flag;
+
+    /**
+     * The color saturation gain in the processing window in the scene. The
+     * value shall be in the range of 0 to 63/8, inclusive and in multiples of
+     * 1/8. The default value shall be 1.
+     */
+    AVRational color_saturation_weight;
+} AVHDRPlusColorTransformParams;
+
+/**
+ * This struct represents dynamic metadata for color volume transform -
+ * application 4 of SMPTE 2094-40:2016 standard.
+ *
+ * To be used as payload of a AVFrameSideData or AVPacketSideData with the
+ * appropriate type.
+ *
+ * @note The struct should be allocated with
+ * av_dynamic_hdr_plus_alloc() and its size is not a part of
+ * the public ABI.
+ */
+typedef struct AVDynamicHDRPlus {
+    /**
+     * Country code by Rec. ITU-T T.35 Annex A. The value shall be 0xB5.
+     */
+    uint8_t itu_t_t35_country_code;
+
+    /**
+     * Application version in the application defining document in ST-2094
+     * suite. The value shall be set to 0.
+     */
+    uint8_t application_version;
+
+    /**
+     * The number of processing windows. The value shall be in the range
+     * of 1 to 3, inclusive.
+     */
+    uint8_t num_windows;
+
+    /**
+     * The color transform parameters for every processing window.
+     */
+    AVHDRPlusColorTransformParams params[3];
+
+    /**
+     * The nominal maximum display luminance of the targeted system display,
+     * in units of 0.0001 candelas per square metre. The value shall be in
+     * the range of 0 to 10000, inclusive.
+     */
+    AVRational targeted_system_display_maximum_luminance;
+
+    /**
+     * This flag shall be equal to 0 in bit streams conforming to this version
+     * of this Specification. The value 1 is reserved for future use.
+     */
+    uint8_t targeted_system_display_actual_peak_luminance_flag;
+
+    /**
+     * The number of rows in the targeted system_display_actual_peak_luminance
+     * array. The value shall be in the range of 2 to 25, inclusive.
+     */
+    uint8_t num_rows_targeted_system_display_actual_peak_luminance;
+
+    /**
+     * The number of columns in the
+     * targeted_system_display_actual_peak_luminance array. The value shall be
+     * in the range of 2 to 25, inclusive.
+     */
+    uint8_t num_cols_targeted_system_display_actual_peak_luminance;
+
+    /**
+     * The normalized actual peak luminance of the targeted system display. The
+     * values should be in the range of 0 to 1, inclusive and in multiples of
+     * 1/15.
+     */
+    AVRational targeted_system_display_actual_peak_luminance[25][25];
+
+    /**
+     * This flag shall be equal to 0 in bitstreams conforming to this version of
+     * this Specification. The value 1 is reserved for future use.
+     */
+    uint8_t mastering_display_actual_peak_luminance_flag;
+
+    /**
+     * The number of rows in the mastering_display_actual_peak_luminance array.
+     * The value shall be in the range of 2 to 25, inclusive.
+     */
+    uint8_t num_rows_mastering_display_actual_peak_luminance;
+
+    /**
+     * The number of columns in the mastering_display_actual_peak_luminance
+     * array. The value shall be in the range of 2 to 25, inclusive.
+     */
+    uint8_t num_cols_mastering_display_actual_peak_luminance;
+
+    /**
+     * The normalized actual peak luminance of the mastering display used for
+     * mastering the image essence. The values should be in the range of 0 to 1,
+     * inclusive and in multiples of 1/15.
+     */
+    AVRational mastering_display_actual_peak_luminance[25][25];
+} AVDynamicHDRPlus;
+
+/**
+ * Allocate an AVDynamicHDRPlus structure and set its fields to
+ * default values. The resulting struct can be freed using av_freep().
+ *
+ * @return An AVDynamicHDRPlus filled with default values or NULL
+ *         on failure.
+ */
+AVDynamicHDRPlus *av_dynamic_hdr_plus_alloc(size_t *size);
+
+/**
+ * Allocate a complete AVDynamicHDRPlus and add it to the frame.
+ * @param frame The frame which side data is added to.
+ *
+ * @return The AVDynamicHDRPlus structure to be filled by caller or NULL
+ *         on failure.
+ */
+AVDynamicHDRPlus *av_dynamic_hdr_plus_create_side_data(AVFrame *frame);
+
+#endif /* AVUTIL_HDR_DYNAMIC_METADATA_H */
diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c
index 3b1d53e799546..540a7610ef954 100644
--- a/libavutil/hwcontext_cuda.c
+++ b/libavutil/hwcontext_cuda.c
@@ -21,6 +21,7 @@
 #include "hwcontext.h"
 #include "hwcontext_internal.h"
 #include "hwcontext_cuda_internal.h"
+#include "cuda_check.h"
 #include "mem.h"
 #include "pixdesc.h"
 #include "pixfmt.h"
@@ -43,6 +44,8 @@ static const enum AVPixelFormat supported_formats[] = {
     AV_PIX_FMT_0BGR32,
 };
 
+#define CHECK_CU(x) FF_CUDA_CHECK_DL(device_ctx, cu, x)
+
 static int cuda_frames_get_constraints(AVHWDeviceContext *ctx,
                                        const void *hwconfig,
                                        AVHWFramesConstraints *constraints)
@@ -70,48 +73,48 @@ static int cuda_frames_get_constraints(AVHWDeviceContext *ctx,
 
 static void cuda_buffer_free(void *opaque, uint8_t *data)
 {
-    AVHWFramesContext *ctx = opaque;
-    AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
-    CudaFunctions *cu = hwctx->internal->cuda_dl;
+    AVHWFramesContext        *ctx = opaque;
+    AVHWDeviceContext *device_ctx = ctx->device_ctx;
+    AVCUDADeviceContext    *hwctx = device_ctx->hwctx;
+    CudaFunctions             *cu = hwctx->internal->cuda_dl;
 
     CUcontext dummy;
 
-    cu->cuCtxPushCurrent(hwctx->cuda_ctx);
+    CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx));
 
-    cu->cuMemFree((CUdeviceptr)data);
+    CHECK_CU(cu->cuMemFree((CUdeviceptr)data));
 
-    cu->cuCtxPopCurrent(&dummy);
+    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
 }
 
 static AVBufferRef *cuda_pool_alloc(void *opaque, int size)
 {
-    AVHWFramesContext     *ctx = opaque;
-    AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
-    CudaFunctions          *cu = hwctx->internal->cuda_dl;
+    AVHWFramesContext        *ctx = opaque;
+    AVHWDeviceContext *device_ctx = ctx->device_ctx;
+    AVCUDADeviceContext    *hwctx = device_ctx->hwctx;
+    CudaFunctions             *cu = hwctx->internal->cuda_dl;
 
     AVBufferRef *ret = NULL;
     CUcontext dummy = NULL;
     CUdeviceptr data;
-    CUresult err;
+    int err;
 
-    err = cu->cuCtxPushCurrent(hwctx->cuda_ctx);
-    if (err != CUDA_SUCCESS) {
-        av_log(ctx, AV_LOG_ERROR, "Error setting current CUDA context\n");
+    err = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx));
+    if (err < 0)
         return NULL;
-    }
 
-    err = cu->cuMemAlloc(&data, size);
-    if (err != CUDA_SUCCESS)
+    err = CHECK_CU(cu->cuMemAlloc(&data, size));
+    if (err < 0)
         goto fail;
 
     ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0);
     if (!ret) {
-        cu->cuMemFree(data);
+        CHECK_CU(cu->cuMemFree(data));
         goto fail;
     }
 
 fail:
-    cu->cuCtxPopCurrent(&dummy);
+    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
     return ret;
 }
 
@@ -194,17 +197,17 @@ static int cuda_transfer_get_formats(AVHWFramesContext *ctx,
 static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
                                    const AVFrame *src)
 {
-    CUDAFramesContext           *priv = ctx->internal->priv;
-    AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
-    CudaFunctions                 *cu = device_hwctx->internal->cuda_dl;
+    CUDAFramesContext       *priv = ctx->internal->priv;
+    AVHWDeviceContext *device_ctx = ctx->device_ctx;
+    AVCUDADeviceContext    *hwctx = device_ctx->hwctx;
+    CudaFunctions             *cu = hwctx->internal->cuda_dl;
 
     CUcontext dummy;
-    CUresult err;
-    int i;
+    int i, ret;
 
-    err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx);
-    if (err != CUDA_SUCCESS)
-        return AVERROR_UNKNOWN;
+    ret = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx));
+    if (ret < 0)
+        return ret;
 
     for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
         CUDA_MEMCPY2D cpy = {
@@ -218,20 +221,17 @@ static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
             .Height        = src->height >> (i ? priv->shift_height : 0),
         };
 
-        err = cu->cuMemcpy2DAsync(&cpy, device_hwctx->stream);
-        if (err != CUDA_SUCCESS) {
-            av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
-            return AVERROR_UNKNOWN;
-        }
+        ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, hwctx->stream));
+        if (ret < 0)
+            goto exit;
     }
 
-    err = cu->cuStreamSynchronize(device_hwctx->stream);
-    if (err != CUDA_SUCCESS) {
-        av_log(ctx, AV_LOG_ERROR, "Error synchronizing CUDA stream\n");
-        return AVERROR_UNKNOWN;
-    }
+    ret = CHECK_CU(cu->cuStreamSynchronize(hwctx->stream));
+    if (ret < 0)
+        goto exit;
 
-    cu->cuCtxPopCurrent(&dummy);
+exit:
+    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
 
     return 0;
 }
@@ -239,17 +239,17 @@ static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
 static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
                                  const AVFrame *src)
 {
-    CUDAFramesContext           *priv = ctx->internal->priv;
-    AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
-    CudaFunctions                 *cu = device_hwctx->internal->cuda_dl;
+    CUDAFramesContext       *priv = ctx->internal->priv;
+    AVHWDeviceContext *device_ctx = ctx->device_ctx;
+    AVCUDADeviceContext    *hwctx = device_ctx->hwctx;
+    CudaFunctions             *cu = hwctx->internal->cuda_dl;
 
     CUcontext dummy;
-    CUresult err;
-    int i;
+    int i, ret;
 
-    err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx);
-    if (err != CUDA_SUCCESS)
-        return AVERROR_UNKNOWN;
+    ret = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx));
+    if (ret < 0)
+        return ret;
 
     for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
         CUDA_MEMCPY2D cpy = {
@@ -263,31 +263,29 @@ static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
             .Height        = src->height >> (i ? priv->shift_height : 0),
         };
 
-        err = cu->cuMemcpy2DAsync(&cpy, device_hwctx->stream);
-        if (err != CUDA_SUCCESS) {
-            av_log(ctx, AV_LOG_ERROR, "Error transferring the data to the CUDA frame\n");
-            return AVERROR_UNKNOWN;
-        }
+        ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, hwctx->stream));
+        if (ret < 0)
+            goto exit;
     }
 
-    err = cu->cuStreamSynchronize(device_hwctx->stream);
-    if (err != CUDA_SUCCESS) {
-        av_log(ctx, AV_LOG_ERROR, "Error synchronizing CUDA stream\n");
-        return AVERROR_UNKNOWN;
-    }
+    ret = CHECK_CU(cu->cuStreamSynchronize(hwctx->stream));
+    if (ret < 0)
+        goto exit;
 
-    cu->cuCtxPopCurrent(&dummy);
+exit:
+    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
 
     return 0;
 }
 
-static void cuda_device_uninit(AVHWDeviceContext *ctx)
+static void cuda_device_uninit(AVHWDeviceContext *device_ctx)
 {
-    AVCUDADeviceContext *hwctx = ctx->hwctx;
+    AVCUDADeviceContext *hwctx = device_ctx->hwctx;
 
     if (hwctx->internal) {
+        CudaFunctions *cu = hwctx->internal->cuda_dl;
         if (hwctx->internal->is_allocated && hwctx->cuda_ctx) {
-            hwctx->internal->cuda_dl->cuCtxDestroy(hwctx->cuda_ctx);
+            CHECK_CU(cu->cuCtxDestroy(hwctx->cuda_ctx));
             hwctx->cuda_ctx = NULL;
         }
         cuda_free_functions(&hwctx->internal->cuda_dl);
@@ -322,53 +320,47 @@ static int cuda_device_init(AVHWDeviceContext *ctx)
     return ret;
 }
 
-static int cuda_device_create(AVHWDeviceContext *ctx, const char *device,
+static int cuda_device_create(AVHWDeviceContext *device_ctx,
+                              const char *device,
                               AVDictionary *opts, int flags)
 {
-    AVCUDADeviceContext *hwctx = ctx->hwctx;
+    AVCUDADeviceContext *hwctx = device_ctx->hwctx;
     CudaFunctions *cu;
     CUdevice cu_device;
     CUcontext dummy;
-    CUresult err;
-    int device_idx = 0;
+    int ret, device_idx = 0;
 
     if (device)
         device_idx = strtol(device, NULL, 0);
 
-    if (cuda_device_init(ctx) < 0)
+    if (cuda_device_init(device_ctx) < 0)
         goto error;
 
     cu = hwctx->internal->cuda_dl;
 
-    err = cu->cuInit(0);
-    if (err != CUDA_SUCCESS) {
-        av_log(ctx, AV_LOG_ERROR, "Could not initialize the CUDA driver API\n");
+    ret = CHECK_CU(cu->cuInit(0));
+    if (ret < 0)
         goto error;
-    }
 
-    err = cu->cuDeviceGet(&cu_device, device_idx);
-    if (err != CUDA_SUCCESS) {
-        av_log(ctx, AV_LOG_ERROR, "Could not get the device number %d\n", device_idx);
+    ret = CHECK_CU(cu->cuDeviceGet(&cu_device, device_idx));
+    if (ret < 0)
         goto error;
-    }
 
-    err = cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device);
-    if (err != CUDA_SUCCESS) {
-        av_log(ctx, AV_LOG_ERROR, "Error creating a CUDA context\n");
+    ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device));
+    if (ret < 0)
         goto error;
-    }
 
     // Setting stream to NULL will make functions automatically use the default CUstream
     hwctx->stream = NULL;
 
-    cu->cuCtxPopCurrent(&dummy);
+    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
 
     hwctx->internal->is_allocated = 1;
 
     return 0;
 
 error:
-    cuda_device_uninit(ctx);
+    cuda_device_uninit(device_ctx);
     return AVERROR_UNKNOWN;
 }
 
diff --git a/libavutil/hwcontext_opencl.c b/libavutil/hwcontext_opencl.c
index 728877553f820..d3df6221c4976 100644
--- a/libavutil/hwcontext_opencl.c
+++ b/libavutil/hwcontext_opencl.c
@@ -500,6 +500,9 @@ static int opencl_device_create_internal(AVHWDeviceContext *hwdev,
          *device_name_src   = NULL;
     int err, found, p, d;
 
+    av_assert0(selector->enumerate_platforms &&
+               selector->enumerate_devices);
+
     err = selector->enumerate_platforms(hwdev, &nb_platforms, &platforms,
                                         selector->context);
     if (err)
@@ -531,9 +534,9 @@ static int opencl_device_create_internal(AVHWDeviceContext *hwdev,
                 continue;
         }
 
-        err = opencl_enumerate_devices(hwdev, platforms[p], platform_name,
-                                       &nb_devices, &devices,
-                                       selector->context);
+        err = selector->enumerate_devices(hwdev, platforms[p], platform_name,
+                                          &nb_devices, &devices,
+                                          selector->context);
         if (err < 0)
             continue;
 
@@ -1726,10 +1729,13 @@ static void opencl_frames_uninit(AVHWFramesContext *hwfc)
     av_freep(&priv->mapped_frames);
 #endif
 
-    cle = clReleaseCommandQueue(priv->command_queue);
-    if (cle != CL_SUCCESS) {
-        av_log(hwfc, AV_LOG_ERROR, "Failed to release frame "
-               "command queue: %d.\n", cle);
+    if (priv->command_queue) {
+        cle = clReleaseCommandQueue(priv->command_queue);
+        if (cle != CL_SUCCESS) {
+            av_log(hwfc, AV_LOG_ERROR, "Failed to release frame "
+                   "command queue: %d.\n", cle);
+        }
+        priv->command_queue = NULL;
     }
 }
 
diff --git a/libavutil/imgutils.c b/libavutil/imgutils.c
index 4938a7ef6767f..c733cb5cf54a7 100644
--- a/libavutil/imgutils.c
+++ b/libavutil/imgutils.c
@@ -311,8 +311,8 @@ static void image_copy_plane(uint8_t       *dst, ptrdiff_t dst_linesize,
 {
     if (!dst || !src)
         return;
-    av_assert0(abs(src_linesize) >= bytewidth);
-    av_assert0(abs(dst_linesize) >= bytewidth);
+    av_assert0(FFABS(src_linesize) >= bytewidth);
+    av_assert0(FFABS(dst_linesize) >= bytewidth);
     for (;height > 0; height--) {
         memcpy(dst, src, bytewidth);
         dst += dst_linesize;
@@ -501,7 +501,6 @@ int av_image_copy_to_buffer(uint8_t *dst, int dst_size,
 static void memset_bytes(uint8_t *dst, size_t dst_size, uint8_t *clear,
                          size_t clear_size)
 {
-    size_t pos = 0;
     int same = 1;
     int i;
 
@@ -521,28 +520,12 @@ static void memset_bytes(uint8_t *dst, size_t dst_size, uint8_t *clear,
     if (clear_size == 1) {
         memset(dst, clear[0], dst_size);
         dst_size = 0;
-    } else if (clear_size == 2) {
-        uint16_t val = AV_RN16(clear);
-        for (; dst_size >= 2; dst_size -= 2) {
-            AV_WN16(dst, val);
-            dst += 2;
-        }
-    } else if (clear_size == 4) {
-        uint32_t val = AV_RN32(clear);
-        for (; dst_size >= 4; dst_size -= 4) {
-            AV_WN32(dst, val);
-            dst += 4;
-        }
-    } else if (clear_size == 8) {
-        uint32_t val = AV_RN64(clear);
-        for (; dst_size >= 8; dst_size -= 8) {
-            AV_WN64(dst, val);
-            dst += 8;
-        }
+    } else {
+        if (clear_size > dst_size)
+            clear_size = dst_size;
+        memcpy(dst, clear, clear_size);
+        av_memcpy_backptr(dst + clear_size, clear_size, dst_size - clear_size);
     }
-
-    for (; dst_size; dst_size--)
-        *dst++ = clear[pos++ % clear_size];
 }
 
 // Maximum size in bytes of a plane element (usually a pixel, or multiple pixels
diff --git a/libavutil/integer.c b/libavutil/integer.c
index 890e314dced6d..78e252fbde24f 100644
--- a/libavutil/integer.c
+++ b/libavutil/integer.c
@@ -74,7 +74,7 @@ AVInteger av_mul_i(AVInteger a, AVInteger b){
 
         if(a.v[i])
             for(j=i; j<AV_INTEGER_SIZE && j-i<=nb; j++){
-                carry= (carry>>16) + out.v[j] + a.v[i]*b.v[j-i];
+                carry= (carry>>16) + out.v[j] + a.v[i]*(unsigned)b.v[j-i];
                 out.v[j]= carry;
             }
     }
diff --git a/libavutil/internal.h b/libavutil/internal.h
index 06bd561e82c0c..4acbcf56cb598 100644
--- a/libavutil/internal.h
+++ b/libavutil/internal.h
@@ -52,7 +52,7 @@
 #endif
 
 #ifndef emms_c
-#   define emms_c() while(0)
+#   define emms_c() do {} while(0)
 #endif
 
 #ifndef attribute_align_arg
diff --git a/libavutil/intreadwrite.h b/libavutil/intreadwrite.h
index 67c763b135cce..4c8413a536868 100644
--- a/libavutil/intreadwrite.h
+++ b/libavutil/intreadwrite.h
@@ -542,6 +542,21 @@ union unaligned_16 { uint16_t l; } __attribute__((packed)) av_alias;
 #   define AV_WN64A(p, v) AV_WNA(64, p, v)
 #endif
 
+#if AV_HAVE_BIGENDIAN
+#   define AV_RLA(s, p)    av_bswap##s(AV_RN##s##A(p))
+#   define AV_WLA(s, p, v) AV_WN##s##A(p, av_bswap##s(v))
+#else
+#   define AV_RLA(s, p)    AV_RN##s##A(p)
+#   define AV_WLA(s, p, v) AV_WN##s##A(p, v)
+#endif
+
+#ifndef AV_RL64A
+#   define AV_RL64A(p) AV_RLA(64, p)
+#endif
+#ifndef AV_WL64A
+#   define AV_WL64A(p, v) AV_WLA(64, p, v)
+#endif
+
 /*
  * The AV_COPYxxU macros are suitable for copying data to/from unaligned
  * memory locations.
diff --git a/libavutil/mem.c b/libavutil/mem.c
index 6149755a6b88e..88fe09b1794aa 100644
--- a/libavutil/mem.c
+++ b/libavutil/mem.c
@@ -399,6 +399,18 @@ static void fill32(uint8_t *dst, int len)
 {
     uint32_t v = AV_RN32(dst - 4);
 
+#if HAVE_FAST_64BIT
+    uint64_t v2= v + ((uint64_t)v<<32);
+    while (len >= 32) {
+        AV_WN64(dst   , v2);
+        AV_WN64(dst+ 8, v2);
+        AV_WN64(dst+16, v2);
+        AV_WN64(dst+24, v2);
+        dst += 32;
+        len -= 32;
+    }
+#endif
+
     while (len >= 4) {
         AV_WN32(dst, v);
         dst += 4;
diff --git a/libavutil/mem.h b/libavutil/mem.h
index 7e0b12a8a782c..5fb1a02dd9cf1 100644
--- a/libavutil/mem.h
+++ b/libavutil/mem.h
@@ -339,7 +339,7 @@ av_alloc_size(2, 3) void *av_realloc_array(void *ptr, size_t nmemb, size_t size)
  * @warning Unlike av_malloc(), the allocated memory is not guaranteed to be
  *          correctly aligned.
  */
-av_alloc_size(2, 3) int av_reallocp_array(void *ptr, size_t nmemb, size_t size);
+int av_reallocp_array(void *ptr, size_t nmemb, size_t size);
 
 /**
  * Reallocate the given buffer if it is not large enough, otherwise do nothing.
@@ -363,10 +363,10 @@ av_alloc_size(2, 3) int av_reallocp_array(void *ptr, size_t nmemb, size_t size);
  * @endcode
  *
  * @param[in,out] ptr      Already allocated buffer, or `NULL`
- * @param[in,out] size     Pointer to current size of buffer `ptr`. `*size` is
- *                         changed to `min_size` in case of success or 0 in
- *                         case of failure
- * @param[in]     min_size New size of buffer `ptr`
+ * @param[in,out] size     Pointer to the size of buffer `ptr`. `*size` is
+ *                         updated to the new allocated size, in particular 0
+ *                         in case of failure.
+ * @param[in]     min_size Desired minimal size of buffer `ptr`
  * @return `ptr` if the buffer is large enough, a pointer to newly reallocated
  *         buffer if the buffer was not large enough, or `NULL` in case of
  *         error
@@ -397,10 +397,10 @@ void *av_fast_realloc(void *ptr, unsigned int *size, size_t min_size);
  * @param[in,out] ptr      Pointer to pointer to an already allocated buffer.
  *                         `*ptr` will be overwritten with pointer to new
  *                         buffer on success or `NULL` on failure
- * @param[in,out] size     Pointer to current size of buffer `*ptr`. `*size` is
- *                         changed to `min_size` in case of success or 0 in
- *                         case of failure
- * @param[in]     min_size New size of buffer `*ptr`
+ * @param[in,out] size     Pointer to the size of buffer `*ptr`. `*size` is
+ *                         updated to the new allocated size, in particular 0
+ *                         in case of failure.
+ * @param[in]     min_size Desired minimal size of buffer `*ptr`
  * @see av_realloc()
  * @see av_fast_mallocz()
  */
@@ -418,10 +418,10 @@ void av_fast_malloc(void *ptr, unsigned int *size, size_t min_size);
  * @param[in,out] ptr      Pointer to pointer to an already allocated buffer.
  *                         `*ptr` will be overwritten with pointer to new
  *                         buffer on success or `NULL` on failure
- * @param[in,out] size     Pointer to current size of buffer `*ptr`. `*size` is
- *                         changed to `min_size` in case of success or 0 in
- *                         case of failure
- * @param[in]     min_size New size of buffer `*ptr`
+ * @param[in,out] size     Pointer to the size of buffer `*ptr`. `*size` is
+ *                         updated to the new allocated size, in particular 0
+ *                         in case of failure.
+ * @param[in]     min_size Desired minimal size of buffer `*ptr`
  * @see av_fast_malloc()
  */
 void av_fast_mallocz(void *ptr, unsigned int *size, size_t min_size);
diff --git a/libavutil/mips/mmiutils.h b/libavutil/mips/mmiutils.h
index 76b1199ab9cdd..05f6b31155b79 100644
--- a/libavutil/mips/mmiutils.h
+++ b/libavutil/mips/mmiutils.h
@@ -250,6 +250,15 @@
       : "memory"                                                \
     );
 
+/**
+ * brief: Transpose 2X2 word packaged data.
+ * fr_i0, fr_i1: src
+ * fr_o0, fr_o1: dst
+ */
+#define TRANSPOSE_2W(fr_i0, fr_i1, fr_o0, fr_o1)                          \
+        "punpcklwd  "#fr_o0",   "#fr_i0",   "#fr_i1"                \n\t" \
+        "punpckhwd  "#fr_o1",   "#fr_i0",   "#fr_i1"                \n\t"
+
 /**
  * brief: Transpose 4X4 half word packaged data.
  * fr_i0, fr_i1, fr_i2, fr_i3: src & dst
@@ -336,5 +345,20 @@
         PSRAH_4_MMI(fp1, fp2, fp3, fp4, shift)                            \
         PSRAH_4_MMI(fp5, fp6, fp7, fp8, shift)
 
+/**
+ * brief: (((value) + (1 << ((n) - 1))) >> (n))
+ * fr_i0: src & dst
+ * fr_i1: Operand number
+ * fr_t0, fr_t1: temporary FPR
+ * gr_t0: temporary GPR
+ */
+#define ROUND_POWER_OF_TWO_MMI(fr_i0, fr_i1, fr_t0, fr_t1, gr_t0)         \
+        "li         "#gr_t0",     0x01                              \n\t" \
+        "dmtc1      "#gr_t0",     "#fr_t0"                          \n\t" \
+        "punpcklwd  "#fr_t0",     "#fr_t0",    "#fr_t0"             \n\t" \
+        "psubw      "#fr_t1",     "#fr_i1",    "#fr_t0"             \n\t" \
+        "psllw      "#fr_t1",     "#fr_t0",    "#fr_t1"             \n\t" \
+        "paddw      "#fr_i0",     "#fr_i0",    "#fr_t1"             \n\t" \
+        "psraw      "#fr_i0",     "#fr_i0",    "#fr_i1"             \n\t"
 
 #endif /* AVUTILS_MIPS_MMIUTILS_H */
diff --git a/libavutil/parseutils.c b/libavutil/parseutils.c
index 59bec6cc9d287..167e8226482f7 100644
--- a/libavutil/parseutils.c
+++ b/libavutil/parseutils.c
@@ -504,7 +504,7 @@ char *av_small_strptime(const char *p, const char *fmt, struct tm *dt)
         switch(c) {
         case 'H':
         case 'J':
-            val = date_get_num(&p, 0, c == 'H' ? 23 : INT_MAX, 2);
+            val = date_get_num(&p, 0, c == 'H' ? 23 : INT_MAX, c == 'H' ? 2 : 4);
 
             if (val == -1)
                 return NULL;
diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c
index 970a83214c506..fe38344d732ef 100644
--- a/libavutil/pixdesc.c
+++ b/libavutil/pixdesc.c
@@ -31,19 +31,22 @@
 #include "intreadwrite.h"
 #include "version.h"
 
-void av_read_image_line(uint16_t *dst,
+void av_read_image_line2(void *dst,
                         const uint8_t *data[4], const int linesize[4],
                         const AVPixFmtDescriptor *desc,
                         int x, int y, int c, int w,
-                        int read_pal_component)
+                        int read_pal_component,
+                        int dst_element_size)
 {
     AVComponentDescriptor comp = desc->comp[c];
     int plane = comp.plane;
     int depth = comp.depth;
-    int mask  = (1 << depth) - 1;
+    unsigned mask  = (1ULL << depth) - 1;
     int shift = comp.shift;
     int step  = comp.step;
     int flags = desc->flags;
+    uint16_t *dst16 = dst;
+    uint32_t *dst32 = dst;
 
     if (flags & AV_PIX_FMT_FLAG_BITSTREAM) {
         int skip = x * step + comp.offset;
@@ -57,38 +60,56 @@ void av_read_image_line(uint16_t *dst,
             shift -= step;
             p -= shift >> 3;
             shift &= 7;
-            *dst++ = val;
+            if (dst_element_size == 4) *dst32++ = val;
+            else                       *dst16++ = val;
         }
     } else {
         const uint8_t *p = data[plane] + y * linesize[plane] +
                            x * step + comp.offset;
         int is_8bit = shift + depth <= 8;
+        int is_16bit= shift + depth <=16;
 
         if (is_8bit)
             p += !!(flags & AV_PIX_FMT_FLAG_BE);
 
         while (w--) {
-            int val = is_8bit ? *p :
-                flags & AV_PIX_FMT_FLAG_BE ? AV_RB16(p) : AV_RL16(p);
+            unsigned val;
+            if     (is_8bit)  val = *p;
+            else if(is_16bit) val = flags & AV_PIX_FMT_FLAG_BE ? AV_RB16(p) : AV_RL16(p);
+            else              val = flags & AV_PIX_FMT_FLAG_BE ? AV_RB32(p) : AV_RL32(p);
             val = (val >> shift) & mask;
             if (read_pal_component)
                 val = data[1][4 * val + c];
             p += step;
-            *dst++ = val;
+            if (dst_element_size == 4) *dst32++ = val;
+            else                       *dst16++ = val;
         }
     }
 }
 
-void av_write_image_line(const uint16_t *src,
+void av_read_image_line(uint16_t *dst,
+                        const uint8_t *data[4], const int linesize[4],
+                        const AVPixFmtDescriptor *desc,
+                        int x, int y, int c, int w,
+                        int read_pal_component)
+{
+    av_read_image_line2(dst, data, linesize, desc,x, y, c, w,
+                        read_pal_component,
+                        2);
+}
+
+void av_write_image_line2(const void *src,
                          uint8_t *data[4], const int linesize[4],
                          const AVPixFmtDescriptor *desc,
-                         int x, int y, int c, int w)
+                         int x, int y, int c, int w, int src_element_size)
 {
     AVComponentDescriptor comp = desc->comp[c];
     int plane = comp.plane;
     int depth = comp.depth;
     int step  = comp.step;
     int flags = desc->flags;
+    const uint32_t *src32 = src;
+    const uint16_t *src16 = src;
 
     if (flags & AV_PIX_FMT_FLAG_BITSTREAM) {
         int skip = x * step + comp.offset;
@@ -96,7 +117,7 @@ void av_write_image_line(const uint16_t *src,
         int shift = 8 - depth - (skip & 7);
 
         while (w--) {
-            *p |= *src++ << shift;
+            *p |= (src_element_size == 4 ? *src32++ : *src16++) << shift;
             shift -= step;
             p -= shift >> 3;
             shift &= 7;
@@ -109,17 +130,28 @@ void av_write_image_line(const uint16_t *src,
         if (shift + depth <= 8) {
             p += !!(flags & AV_PIX_FMT_FLAG_BE);
             while (w--) {
-                *p |= (*src++ << shift);
+                *p |= ((src_element_size == 4 ? *src32++ : *src16++) << shift);
                 p += step;
             }
         } else {
             while (w--) {
-                if (flags & AV_PIX_FMT_FLAG_BE) {
-                    uint16_t val = AV_RB16(p) | (*src++ << shift);
-                    AV_WB16(p, val);
+                unsigned s = (src_element_size == 4 ? *src32++ : *src16++);
+                if (shift + depth <= 16) {
+                    if (flags & AV_PIX_FMT_FLAG_BE) {
+                        uint16_t val = AV_RB16(p) | (s << shift);
+                        AV_WB16(p, val);
+                    } else {
+                        uint16_t val = AV_RL16(p) | (s << shift);
+                        AV_WL16(p, val);
+                    }
                 } else {
-                    uint16_t val = AV_RL16(p) | (*src++ << shift);
-                    AV_WL16(p, val);
+                    if (flags & AV_PIX_FMT_FLAG_BE) {
+                        uint32_t val = AV_RB32(p) | (s << shift);
+                        AV_WB32(p, val);
+                    } else {
+                        uint32_t val = AV_RL32(p) | (s << shift);
+                        AV_WL32(p, val);
+                    }
                 }
                 p += step;
             }
@@ -127,6 +159,14 @@ void av_write_image_line(const uint16_t *src,
     }
 }
 
+void av_write_image_line(const uint16_t *src,
+                         uint8_t *data[4], const int linesize[4],
+                         const AVPixFmtDescriptor *desc,
+                         int x, int y, int c, int w)
+{
+    av_write_image_line2(src, data, linesize, desc, x, y, c, w, 2);
+}
+
 #if FF_API_PLUS1_MINUS1
 FF_DISABLE_DEPRECATION_WARNINGS
 #endif
@@ -2228,6 +2268,58 @@ static const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .flags = AV_PIX_FMT_FLAG_FLOAT,
         .alias = "yf32le",
     },
+    [AV_PIX_FMT_YUVA422P12BE] = {
+        .name = "yuva422p12be",
+        .nb_components = 4,
+        .log2_chroma_w = 1,
+        .log2_chroma_h = 0,
+        .comp = {
+            { 0, 2, 0, 0, 12, 1, 11, 1 },        /* Y */
+            { 1, 2, 0, 0, 12, 1, 11, 1 },        /* U */
+            { 2, 2, 0, 0, 12, 1, 11, 1 },        /* V */
+            { 3, 2, 0, 0, 12, 1, 11, 1 },        /* A */
+        },
+        .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_ALPHA,
+    },
+    [AV_PIX_FMT_YUVA422P12LE] = {
+        .name = "yuva422p12le",
+        .nb_components = 4,
+        .log2_chroma_w = 1,
+        .log2_chroma_h = 0,
+        .comp = {
+            { 0, 2, 0, 0, 12, 1, 11, 1 },        /* Y */
+            { 1, 2, 0, 0, 12, 1, 11, 1 },        /* U */
+            { 2, 2, 0, 0, 12, 1, 11, 1 },        /* V */
+            { 3, 2, 0, 0, 12, 1, 11, 1 },        /* A */
+        },
+        .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_ALPHA,
+    },
+    [AV_PIX_FMT_YUVA444P12BE] = {
+        .name = "yuva444p12be",
+        .nb_components = 4,
+        .log2_chroma_w = 0,
+        .log2_chroma_h = 0,
+        .comp = {
+            { 0, 2, 0, 0, 12, 1, 11, 1 },        /* Y */
+            { 1, 2, 0, 0, 12, 1, 11, 1 },        /* U */
+            { 2, 2, 0, 0, 12, 1, 11, 1 },        /* V */
+            { 3, 2, 0, 0, 12, 1, 11, 1 },        /* A */
+        },
+        .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_ALPHA,
+    },
+    [AV_PIX_FMT_YUVA444P12LE] = {
+        .name = "yuva444p12le",
+        .nb_components = 4,
+        .log2_chroma_w = 0,
+        .log2_chroma_h = 0,
+        .comp = {
+            { 0, 2, 0, 0, 12, 1, 11, 1 },        /* Y */
+            { 1, 2, 0, 0, 12, 1, 11, 1 },        /* U */
+            { 2, 2, 0, 0, 12, 1, 11, 1 },        /* V */
+            { 3, 2, 0, 0, 12, 1, 11, 1 },        /* A */
+        },
+        .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_ALPHA,
+    },
 };
 #if FF_API_PLUS1_MINUS1
 FF_ENABLE_DEPRECATION_WARNINGS
diff --git a/libavutil/pixdesc.h b/libavutil/pixdesc.h
index 4f9c5a271fdea..c055810ae8772 100644
--- a/libavutil/pixdesc.h
+++ b/libavutil/pixdesc.h
@@ -343,7 +343,13 @@ char *av_get_pix_fmt_string(char *buf, int buf_size,
  * format writes the values corresponding to the palette
  * component c in data[1] to dst, rather than the palette indexes in
  * data[0]. The behavior is undefined if the format is not paletted.
+ * @param dst_element_size size of elements in dst array (2 or 4 byte)
  */
+void av_read_image_line2(void *dst, const uint8_t *data[4],
+                        const int linesize[4], const AVPixFmtDescriptor *desc,
+                        int x, int y, int c, int w, int read_pal_component,
+                        int dst_element_size);
+
 void av_read_image_line(uint16_t *dst, const uint8_t *data[4],
                         const int linesize[4], const AVPixFmtDescriptor *desc,
                         int x, int y, int c, int w, int read_pal_component);
@@ -361,7 +367,12 @@ void av_read_image_line(uint16_t *dst, const uint8_t *data[4],
  * @param y the vertical coordinate of the first pixel to write
  * @param w the width of the line to write, that is the number of
  * values to write to the image line
+ * @param src_element_size size of elements in src array (2 or 4 byte)
  */
+void av_write_image_line2(const void *src, uint8_t *data[4],
+                         const int linesize[4], const AVPixFmtDescriptor *desc,
+                         int x, int y, int c, int w, int src_element_size);
+
 void av_write_image_line(const uint16_t *src, uint8_t *data[4],
                          const int linesize[4], const AVPixFmtDescriptor *desc,
                          int x, int y, int c, int w);
diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h
index 6815f8dc7b2f2..24d1b7e415388 100644
--- a/libavutil/pixfmt.h
+++ b/libavutil/pixfmt.h
@@ -340,6 +340,11 @@ enum AVPixelFormat {
     AV_PIX_FMT_GRAYF32BE,  ///< IEEE-754 single precision Y, 32bpp, big-endian
     AV_PIX_FMT_GRAYF32LE,  ///< IEEE-754 single precision Y, 32bpp, little-endian
 
+    AV_PIX_FMT_YUVA422P12BE, ///< planar YUV 4:2:2,24bpp, (1 Cr & Cb sample per 2x1 Y samples), 12b alpha, big-endian
+    AV_PIX_FMT_YUVA422P12LE, ///< planar YUV 4:2:2,24bpp, (1 Cr & Cb sample per 2x1 Y samples), 12b alpha, little-endian
+    AV_PIX_FMT_YUVA444P12BE, ///< planar YUV 4:4:4,36bpp, (1 Cr & Cb sample per 1x1 Y samples), 12b alpha, big-endian
+    AV_PIX_FMT_YUVA444P12LE, ///< planar YUV 4:4:4,36bpp, (1 Cr & Cb sample per 1x1 Y samples), 12b alpha, little-endian
+
     AV_PIX_FMT_NB         ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions
 };
 
@@ -416,6 +421,8 @@ enum AVPixelFormat {
 #define AV_PIX_FMT_YUVA420P10 AV_PIX_FMT_NE(YUVA420P10BE, YUVA420P10LE)
 #define AV_PIX_FMT_YUVA422P10 AV_PIX_FMT_NE(YUVA422P10BE, YUVA422P10LE)
 #define AV_PIX_FMT_YUVA444P10 AV_PIX_FMT_NE(YUVA444P10BE, YUVA444P10LE)
+#define AV_PIX_FMT_YUVA422P12 AV_PIX_FMT_NE(YUVA422P12BE, YUVA422P12LE)
+#define AV_PIX_FMT_YUVA444P12 AV_PIX_FMT_NE(YUVA444P12BE, YUVA444P12LE)
 #define AV_PIX_FMT_YUVA420P16 AV_PIX_FMT_NE(YUVA420P16BE, YUVA420P16LE)
 #define AV_PIX_FMT_YUVA422P16 AV_PIX_FMT_NE(YUVA422P16BE, YUVA422P16LE)
 #define AV_PIX_FMT_YUVA444P16 AV_PIX_FMT_NE(YUVA444P16BE, YUVA444P16LE)
diff --git a/libavutil/ppc/cpu.c b/libavutil/ppc/cpu.c
index 7bb7cd813c978..b022149fa0878 100644
--- a/libavutil/ppc/cpu.c
+++ b/libavutil/ppc/cpu.c
@@ -92,14 +92,14 @@ int ff_get_cpu_flags_ppc(void)
 #ifdef PPC_FEATURE_HAS_VSX
                 if (buf[i + 1] & PPC_FEATURE_HAS_VSX)
                     ret |= AV_CPU_FLAG_VSX;
-#endif
-#ifdef PPC_FEATURE_ARCH_2_07
-                if (buf[i + 1] & PPC_FEATURE_HAS_POWER8)
-                    ret |= AV_CPU_FLAG_POWER8;
 #endif
                 if (ret & AV_CPU_FLAG_VSX)
                     av_assert0(ret & AV_CPU_FLAG_ALTIVEC);
-                goto out;
+            } else if (buf[i] == AT_HWCAP2) {
+#ifdef PPC_FEATURE2_ARCH_2_07
+                if (buf[i + 1] & PPC_FEATURE2_ARCH_2_07)
+                    ret |= AV_CPU_FLAG_POWER8;
+#endif
             }
         }
     }
diff --git a/libavutil/tests/random_seed.c b/libavutil/tests/random_seed.c
index 78067dbe41a38..bf0c6c79869b4 100644
--- a/libavutil/tests/random_seed.c
+++ b/libavutil/tests/random_seed.c
@@ -47,7 +47,7 @@ int main(void)
             retry:;
         }
         if (retry >= 3) {
-            printf("rsf %d: FAIL at %d with %X\n", rsf, j, seeds[j]);
+            printf("rsf %d: FAIL at %d with %"PRIX32"\n", rsf, j, seeds[j]);
             return 1;
         }
     }
diff --git a/libavutil/version.h b/libavutil/version.h
index f84ec891544ce..1fcdea95bf409 100644
--- a/libavutil/version.h
+++ b/libavutil/version.h
@@ -79,8 +79,8 @@
  */
 
 #define LIBAVUTIL_VERSION_MAJOR  56
-#define LIBAVUTIL_VERSION_MINOR  19
-#define LIBAVUTIL_VERSION_MICRO 101
+#define LIBAVUTIL_VERSION_MINOR  26
+#define LIBAVUTIL_VERSION_MICRO 100
 
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
                                                LIBAVUTIL_VERSION_MINOR, \
diff --git a/libpostproc/postprocess_template.c b/libpostproc/postprocess_template.c
index 0a4398926681b..b0adfd168ccfa 100644
--- a/libpostproc/postprocess_template.c
+++ b/libpostproc/postprocess_template.c
@@ -1184,10 +1184,10 @@ FIND_MIN_MAX((%0, %1, 8))
 #endif
         "movq %%mm6, %%mm0                      \n\t" // max
         "psubb %%mm7, %%mm6                     \n\t" // max - min
-        "push %4                              \n\t"
-        "movd %%mm6, %k4                        \n\t"
-        "cmpb "MANGLE(deringThreshold)", %b4    \n\t"
-        "pop %4                               \n\t"
+        "push %%"FF_REG_a"                      \n\t"
+        "movd %%mm6, %%eax                      \n\t"
+        "cmpb "MANGLE(deringThreshold)", %%al   \n\t"
+        "pop %%"FF_REG_a"                       \n\t"
         " jb 1f                                 \n\t"
         PAVGB(%%mm0, %%mm7)                           // a=(max + min)/2
         "punpcklbw %%mm7, %%mm7                 \n\t"
@@ -1317,7 +1317,7 @@ DERING_CORE((%0, %1, 8)       ,(%%FF_REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,
         "1:                        \n\t"
         : : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb), "m"(c->pQPb2), "q"(tmp)
           NAMED_CONSTRAINTS_ADD(deringThreshold,b00,b02,b08)
-        : "%"FF_REG_a, "%"FF_REG_d, "%"FF_REG_sp
+        : "%"FF_REG_a, "%"FF_REG_d
     );
 #else // HAVE_7REGS && (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW)
     int y;
diff --git a/libpostproc/version.h b/libpostproc/version.h
index f3725483ce94e..373705679b504 100644
--- a/libpostproc/version.h
+++ b/libpostproc/version.h
@@ -29,7 +29,7 @@
 #include "libavutil/avutil.h"
 
 #define LIBPOSTPROC_VERSION_MAJOR  55
-#define LIBPOSTPROC_VERSION_MINOR   2
+#define LIBPOSTPROC_VERSION_MINOR   4
 #define LIBPOSTPROC_VERSION_MICRO 100
 
 #define LIBPOSTPROC_VERSION_INT AV_VERSION_INT(LIBPOSTPROC_VERSION_MAJOR, \
diff --git a/libswresample/version.h b/libswresample/version.h
index b8b5bee9b3c2b..c70cf812f80e3 100644
--- a/libswresample/version.h
+++ b/libswresample/version.h
@@ -29,7 +29,7 @@
 #include "libavutil/avutil.h"
 
 #define LIBSWRESAMPLE_VERSION_MAJOR   3
-#define LIBSWRESAMPLE_VERSION_MINOR   2
+#define LIBSWRESAMPLE_VERSION_MINOR   4
 #define LIBSWRESAMPLE_VERSION_MICRO 100
 
 #define LIBSWRESAMPLE_VERSION_INT  AV_VERSION_INT(LIBSWRESAMPLE_VERSION_MAJOR, \
diff --git a/libswscale/input.c b/libswscale/input.c
index 4099c19c2bc2d..c2dc356b5de82 100644
--- a/libswscale/input.c
+++ b/libswscale/input.c
@@ -1097,6 +1097,8 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
     case AV_PIX_FMT_YUVA420P10LE:
     case AV_PIX_FMT_YUVA422P10LE:
     case AV_PIX_FMT_YUVA444P10LE:
+    case AV_PIX_FMT_YUVA422P12LE:
+    case AV_PIX_FMT_YUVA444P12LE:
     case AV_PIX_FMT_YUVA420P16LE:
     case AV_PIX_FMT_YUVA422P16LE:
     case AV_PIX_FMT_YUVA444P16LE:
@@ -1127,6 +1129,8 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
     case AV_PIX_FMT_YUVA420P10BE:
     case AV_PIX_FMT_YUVA422P10BE:
     case AV_PIX_FMT_YUVA444P10BE:
+    case AV_PIX_FMT_YUVA422P12BE:
+    case AV_PIX_FMT_YUVA444P12BE:
     case AV_PIX_FMT_YUVA420P16BE:
     case AV_PIX_FMT_YUVA422P16BE:
     case AV_PIX_FMT_YUVA444P16BE:
@@ -1401,6 +1405,8 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
     case AV_PIX_FMT_YUVA420P10LE:
     case AV_PIX_FMT_YUVA422P10LE:
     case AV_PIX_FMT_YUVA444P10LE:
+    case AV_PIX_FMT_YUVA422P12LE:
+    case AV_PIX_FMT_YUVA444P12LE:
     case AV_PIX_FMT_YUVA420P16LE:
     case AV_PIX_FMT_YUVA422P16LE:
     case AV_PIX_FMT_YUVA444P16LE:
@@ -1441,6 +1447,8 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
     case AV_PIX_FMT_YUVA420P10BE:
     case AV_PIX_FMT_YUVA422P10BE:
     case AV_PIX_FMT_YUVA444P10BE:
+    case AV_PIX_FMT_YUVA422P12BE:
+    case AV_PIX_FMT_YUVA444P12BE:
     case AV_PIX_FMT_YUVA420P16BE:
     case AV_PIX_FMT_YUVA422P16BE:
     case AV_PIX_FMT_YUVA444P16BE:
diff --git a/libswscale/output.c b/libswscale/output.c
index de8637aa3b43a..d7c53e60d9fb0 100644
--- a/libswscale/output.c
+++ b/libswscale/output.c
@@ -900,6 +900,99 @@ YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, AV_PIX_FMT_UYVY422)
         AV_WL16(pos, val); \
     }
 
+static av_always_inline void
+yuv2ya16_X_c_template(SwsContext *c, const int16_t *lumFilter,
+                        const int32_t **lumSrc, int lumFilterSize,
+                        const int16_t *chrFilter, const int32_t **unused_chrUSrc,
+                        const int32_t **unused_chrVSrc, int unused_chrFilterSize,
+                        const int32_t **alpSrc, uint16_t *dest, int dstW,
+                        int y, enum AVPixelFormat target, int unused_hasAlpha, int unused_eightbytes)
+{
+    int hasAlpha = !!alpSrc;
+    int i;
+
+    for (i = 0; i < dstW; i++) {
+        int j;
+        int Y = 1 << 18;
+        int64_t A = 0xffff<<14;
+
+        for (j = 0; j < lumFilterSize; j++)
+            Y += lumSrc[j][i] * lumFilter[j];
+
+        Y >>= 15;
+        Y = av_clip_uint16(Y);
+
+        if (hasAlpha) {
+            for (j = 0; j < lumFilterSize; j++)
+                A += alpSrc[j][i] * lumFilter[j];
+
+            A >>= 15;
+            A = av_clip_uint16(A);
+        }
+
+        output_pixel(&dest[2 * i    ], Y);
+        output_pixel(&dest[2 * i + 1], hasAlpha ? A : 65535);
+    }
+}
+
+static av_always_inline void
+yuv2ya16_2_c_template(SwsContext *c, const int32_t *buf[2],
+                        const int32_t *unused_ubuf[2], const int32_t *unused_vbuf[2],
+                        const int32_t *abuf[2], uint16_t *dest, int dstW,
+                        int yalpha, int unused_uvalpha, int y,
+                        enum AVPixelFormat target, int unused_hasAlpha, int unused_eightbytes)
+{
+    int hasAlpha = abuf && abuf[0] && abuf[1];
+    const int32_t *buf0  = buf[0],  *buf1  = buf[1],
+    *abuf0 = hasAlpha ? abuf[0] : NULL,
+    *abuf1 = hasAlpha ? abuf[1] : NULL;
+    int  yalpha1 = 4096 - yalpha;
+    int i;
+
+    av_assert2(yalpha  <= 4096U);
+
+    for (i = 0; i < dstW; i++) {
+        int Y = (buf0[i] * yalpha1 + buf1[i] * yalpha) >> 15;
+        int A;
+
+        Y = av_clip_uint16(Y);
+
+        if (hasAlpha) {
+            A = (abuf0[i] * yalpha1 + abuf1[i] * yalpha) >> 15;
+            A = av_clip_uint16(A);
+        }
+
+        output_pixel(&dest[2 * i    ], Y);
+        output_pixel(&dest[2 * i + 1], hasAlpha ? A : 65535);
+    }
+}
+
+static av_always_inline void
+yuv2ya16_1_c_template(SwsContext *c, const int32_t *buf0,
+                        const int32_t *unused_ubuf[2], const int32_t *unused_vbuf[2],
+                        const int32_t *abuf0, uint16_t *dest, int dstW,
+                        int unused_uvalpha, int y, enum AVPixelFormat target, int unused_hasAlpha, int unused_eightbytes)
+{
+    int hasAlpha = !!abuf0;
+    int i;
+
+    for (i = 0; i < dstW; i++) {
+        int Y = buf0[i] >> 3;/* 19 - 16 */
+        int A;
+
+        Y = av_clip_uint16(Y);
+
+        if (hasAlpha) {
+            A = abuf0[i] >> 3;
+            if (A & 0x100)
+                A = av_clip_uint16(A);
+        }
+
+        output_pixel(&dest[2 * i    ], Y);
+        output_pixel(&dest[2 * i + 1], hasAlpha ? A : 65535);
+    }
+}
+
 static av_always_inline void
 yuv2rgba64_X_c_template(SwsContext *c, const int16_t *lumFilter,
                        const int32_t **lumSrc, int lumFilterSize,
@@ -1405,6 +1498,8 @@ YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64be, AV_PIX_FMT_BGRA64BE, 1, 1)
 YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64le, AV_PIX_FMT_BGRA64LE, 1, 1)
 YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64be, AV_PIX_FMT_BGRA64BE, 0, 1)
 YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64le, AV_PIX_FMT_BGRA64LE, 0, 1)
+YUV2PACKED16WRAPPER(yuv2, ya16, ya16be, AV_PIX_FMT_YA16BE, 1, 0)
+YUV2PACKED16WRAPPER(yuv2, ya16, ya16le, AV_PIX_FMT_YA16LE, 1, 0)
 
 YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48be_full, AV_PIX_FMT_RGB48BE, 0, 0)
 YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48le_full, AV_PIX_FMT_RGB48LE, 0, 0)
@@ -2835,6 +2930,16 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c,
         *yuv2packed2 = yuv2ya8_2_c;
         *yuv2packedX = yuv2ya8_X_c;
         break;
+    case AV_PIX_FMT_YA16LE:
+        *yuv2packed1 = yuv2ya16le_1_c;
+        *yuv2packed2 = yuv2ya16le_2_c;
+        *yuv2packedX = yuv2ya16le_X_c;
+        break;
+    case AV_PIX_FMT_YA16BE:
+        *yuv2packed1 = yuv2ya16be_1_c;
+        *yuv2packed2 = yuv2ya16be_2_c;
+        *yuv2packedX = yuv2ya16be_X_c;
+        break;
     case AV_PIX_FMT_AYUV64LE:
         *yuv2packedX = yuv2ayuv64le_X_c;
         break;
diff --git a/libswscale/ppc/Makefile b/libswscale/ppc/Makefile
index d1b596eb3f835..0a31a3025b878 100644
--- a/libswscale/ppc/Makefile
+++ b/libswscale/ppc/Makefile
@@ -1,3 +1,4 @@
 OBJS += ppc/swscale_altivec.o                                           \
         ppc/yuv2rgb_altivec.o                                           \
         ppc/yuv2yuv_altivec.o                                           \
+        ppc/swscale_vsx.o
diff --git a/libswscale/ppc/swscale_altivec.c b/libswscale/ppc/swscale_altivec.c
index 2fb2337769939..d72ed1e0f9801 100644
--- a/libswscale/ppc/swscale_altivec.c
+++ b/libswscale/ppc/swscale_altivec.c
@@ -32,20 +32,14 @@
 #include "libavutil/ppc/util_altivec.h"
 
 #if HAVE_ALTIVEC
+#if HAVE_BIGENDIAN
 #define vzero vec_splat_s32(0)
 
-#if HAVE_BIGENDIAN
 #define  GET_LS(a,b,c,s) {\
         vector signed short l2  = vec_ld(((b) << 1) + 16, s);\
         ls  = vec_perm(a, l2, c);\
         a = l2;\
     }
-#else
-#define  GET_LS(a,b,c,s) {\
-        ls  = a;\
-        a = vec_vsx_ld(((b) << 1)  + 16, s);\
-    }
-#endif
 
 #define yuv2planeX_8(d1, d2, l1, src, x, perm, filter) do {\
         vector signed short ls;\
@@ -59,7 +53,6 @@
         d2 = vec_add(d2, vf2);\
     } while (0)
 
-#if HAVE_BIGENDIAN
 #define LOAD_FILTER(vf,f) {\
         vector unsigned char perm0 = vec_lvsl(joffset, f);\
         vf = vec_ld(joffset, f);\
@@ -69,89 +62,7 @@
         p = vec_lvsl(xoffset, s);\
         ll1   = vec_ld(xoffset, s);\
 }
-#else
-#define LOAD_FILTER(vf,f) {\
-        vf = vec_vsx_ld(joffset, f);\
-}
-#define LOAD_L1(ll1,s,p){\
-        ll1  = vec_vsx_ld(xoffset, s);\
-}
-#endif
-
-static void yuv2planeX_16_altivec(const int16_t *filter, int filterSize,
-                                  const int16_t **src, uint8_t *dest,
-                                  const uint8_t *dither, int offset, int x)
-{
-    register int i, j;
-    LOCAL_ALIGNED(16, int, val, [16]);
-    vector signed int vo1, vo2, vo3, vo4;
-    vector unsigned short vs1, vs2;
-    vector unsigned char vf;
-    vector unsigned int altivec_vectorShiftInt19 =
-        vec_add(vec_splat_u32(10), vec_splat_u32(9));
-
-    for (i = 0; i < 16; i++)
-        val[i] = dither[(x + i + offset) & 7] << 12;
-
-    vo1 = vec_ld(0,  val);
-    vo2 = vec_ld(16, val);
-    vo3 = vec_ld(32, val);
-    vo4 = vec_ld(48, val);
-
-    for (j = 0; j < filterSize; j++) {
-        unsigned int joffset=j<<1;
-        unsigned int xoffset=x<<1;
-        vector unsigned char perm;
-        vector signed short l1,vLumFilter;
-        LOAD_FILTER(vLumFilter,filter);
-        vLumFilter = vec_splat(vLumFilter, 0);
-        LOAD_L1(l1,src[j],perm);
-        yuv2planeX_8(vo1, vo2, l1, src[j], x,     perm, vLumFilter);
-        yuv2planeX_8(vo3, vo4, l1, src[j], x + 8, perm, vLumFilter);
-    }
-
-    vo1 = vec_sra(vo1, altivec_vectorShiftInt19);
-    vo2 = vec_sra(vo2, altivec_vectorShiftInt19);
-    vo3 = vec_sra(vo3, altivec_vectorShiftInt19);
-    vo4 = vec_sra(vo4, altivec_vectorShiftInt19);
-    vs1 = vec_packsu(vo1, vo2);
-    vs2 = vec_packsu(vo3, vo4);
-    vf  = vec_packsu(vs1, vs2);
-    VEC_ST(vf, 0, dest);
-}
 
-
-static inline void yuv2planeX_u(const int16_t *filter, int filterSize,
-                                const int16_t **src, uint8_t *dest, int dstW,
-                                const uint8_t *dither, int offset, int x)
-{
-    int i, j;
-
-    for (i = x; i < dstW; i++) {
-        int t = dither[(i + offset) & 7] << 12;
-        for (j = 0; j < filterSize; j++)
-            t += src[j][i] * filter[j];
-        dest[i] = av_clip_uint8(t >> 19);
-    }
-}
-
-static void yuv2planeX_altivec(const int16_t *filter, int filterSize,
-                               const int16_t **src, uint8_t *dest, int dstW,
-                               const uint8_t *dither, int offset)
-{
-    int dst_u = -(uintptr_t)dest & 15;
-    int i;
-
-    yuv2planeX_u(filter, filterSize, src, dest, dst_u, dither, offset, 0);
-
-    for (i = dst_u; i < dstW - 15; i += 16)
-        yuv2planeX_16_altivec(filter, filterSize, src, dest + i, dither,
-                              offset, i);
-
-    yuv2planeX_u(filter, filterSize, src, dest, dstW, dither, offset, i);
-}
-
-#if HAVE_BIGENDIAN
 // The 3 above is 2 (filterSize == 4) + 1 (sizeof(short) == 2).
 
 // The neat trick: We only care for half the elements,
@@ -187,143 +98,141 @@ static void yuv2planeX_altivec(const int16_t *filter, int filterSize,
     vf1 = vec_ld((a * 2 * filterSize) + (b * 2) + 16 + off, f);\
     vf  = vec_perm(vf0, vf1, per);\
 }
-#else /* else of #if HAVE_BIGENDIAN */
-#define GET_VF4(a, vf, f) {\
-    vf = (vector signed short)vec_vsx_ld(a << 3, f);\
-    vf = vec_mergeh(vf, (vector signed short)vzero);\
+
+#define FUNC(name) name ## _altivec
+#include "swscale_ppc_template.c"
+#undef FUNC
+
+#undef vzero
+
+#endif /* HAVE_BIGENDIAN */
+
+#define output_pixel(pos, val, bias, signedness) \
+    if (big_endian) { \
+        AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
+    } else { \
+        AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
+    }
+
+static void
+yuv2plane1_float_u(const int32_t *src, float *dest, int dstW, int start)
+{
+    static const int big_endian = HAVE_BIGENDIAN;
+    static const int shift = 3;
+    static const float float_mult = 1.0f / 65535.0f;
+    int i, val;
+    uint16_t val_uint;
+
+    for (i = start; i < dstW; ++i){
+        val = src[i] + (1 << (shift - 1));
+        output_pixel(&val_uint, val, 0, uint);
+        dest[i] = float_mult * (float)val_uint;
+    }
 }
-#define FIRST_LOAD(sv, pos, s, per) {}
-#define UPDATE_PTR(s0, d0, s1, d1) {}
-#define LOAD_SRCV(pos, a, s, per, v0, v1, vf) {\
-    vf = vec_vsx_ld(pos + a, s);\
+
+static void
+yuv2plane1_float_bswap_u(const int32_t *src, uint32_t *dest, int dstW, int start)
+{
+    static const int big_endian = HAVE_BIGENDIAN;
+    static const int shift = 3;
+    static const float float_mult = 1.0f / 65535.0f;
+    int i, val;
+    uint16_t val_uint;
+
+    for (i = start; i < dstW; ++i){
+        val = src[i] + (1 << (shift - 1));
+        output_pixel(&val_uint, val, 0, uint);
+        dest[i] = av_bswap32(av_float2int(float_mult * (float)val_uint));
+    }
 }
-#define LOAD_SRCV8(pos, a, s, per, v0, v1, vf) LOAD_SRCV(pos, a, s, per, v0, v1, vf)
-#define GET_VFD(a, b, f, vf0, vf1, per, vf, off) {\
-    vf  = vec_vsx_ld((a * 2 * filterSize) + (b * 2) + off, f);\
+
+static void yuv2plane1_float_altivec(const int32_t *src, float *dest, int dstW)
+{
+    const int dst_u = -(uintptr_t)dest & 3;
+    const int shift = 3;
+    const int add = (1 << (shift - 1));
+    const int clip = (1 << 16) - 1;
+    const float fmult = 1.0f / 65535.0f;
+    const vector uint32_t vadd = (vector uint32_t) {add, add, add, add};
+    const vector uint32_t vshift = (vector uint32_t) vec_splat_u32(shift);
+    const vector uint32_t vlargest = (vector uint32_t) {clip, clip, clip, clip};
+    const vector float vmul = (vector float) {fmult, fmult, fmult, fmult};
+    const vector float vzero = (vector float) {0, 0, 0, 0};
+    vector uint32_t v;
+    vector float vd;
+    int i;
+
+    yuv2plane1_float_u(src, dest, dst_u, 0);
+
+    for (i = dst_u; i < dstW - 3; i += 4) {
+        v = vec_ld(0, (const uint32_t *) &src[i]);
+        v = vec_add(v, vadd);
+        v = vec_sr(v, vshift);
+        v = vec_min(v, vlargest);
+
+        vd = vec_ctf(v, 0);
+        vd = vec_madd(vd, vmul, vzero);
+
+        vec_st(vd, 0, &dest[i]);
+    }
+
+    yuv2plane1_float_u(src, dest, dstW, i);
 }
-#endif /* end of #if HAVE_BIGENDIAN */
 
-static void hScale_altivec_real(SwsContext *c, int16_t *dst, int dstW,
-                                const uint8_t *src, const int16_t *filter,
-                                const int32_t *filterPos, int filterSize)
+static void yuv2plane1_float_bswap_altivec(const int32_t *src, uint32_t *dest, int dstW)
 {
-    register int i;
-    LOCAL_ALIGNED(16, int, tempo, [4]);
-
-    if (filterSize % 4) {
-        for (i = 0; i < dstW; i++) {
-            register int j;
-            register int srcPos = filterPos[i];
-            register int val    = 0;
-            for (j = 0; j < filterSize; j++)
-                val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
-            dst[i] = FFMIN(val >> 7, (1 << 15) - 1);
-        }
-    } else
-        switch (filterSize) {
-        case 4:
-            for (i = 0; i < dstW; i++) {
-                register int srcPos = filterPos[i];
-
-                vector unsigned char src_vF = unaligned_load(srcPos, src);
-                vector signed short src_v, filter_v;
-                vector signed int val_vEven, val_s;
-                src_v = // vec_unpackh sign-extends...
-                        (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF));
-                // now put our elements in the even slots
-                src_v = vec_mergeh(src_v, (vector signed short)vzero);
-                GET_VF4(i, filter_v, filter);
-                val_vEven = vec_mule(src_v, filter_v);
-                val_s     = vec_sums(val_vEven, vzero);
-                vec_st(val_s, 0, tempo);
-                dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1);
-            }
-        break;
-        case 8:
-            for (i = 0; i < dstW; i++) {
-                register int srcPos = filterPos[i];
-                vector unsigned char src_vF, src_v0, src_v1;
-                vector unsigned char permS;
-                vector signed short src_v, filter_v;
-                vector signed int val_v, val_s;
-                FIRST_LOAD(src_v0, srcPos, src, permS);
-                LOAD_SRCV8(srcPos, 0, src, permS, src_v0, src_v1, src_vF);
-                src_v = // vec_unpackh sign-extends...
-                        (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF));
-                filter_v = vec_ld(i << 4, filter);
-                val_v = vec_msums(src_v, filter_v, (vector signed int)vzero);
-                val_s = vec_sums(val_v, vzero);
-                vec_st(val_s, 0, tempo);
-                dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1);
-            }
-        break;
-
-        case 16:
-            for (i = 0; i < dstW; i++) {
-                register int srcPos = filterPos[i];
-
-                vector unsigned char src_vF = unaligned_load(srcPos, src);
-                vector signed short src_vA = // vec_unpackh sign-extends...
-                                             (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF));
-                vector signed short src_vB = // vec_unpackh sign-extends...
-                                             (vector signed short)(VEC_MERGEL((vector unsigned char)vzero, src_vF));
-                vector signed short filter_v0 = vec_ld(i << 5, filter);
-                vector signed short filter_v1 = vec_ld((i << 5) + 16, filter);
-
-                vector signed int val_acc = vec_msums(src_vA, filter_v0, (vector signed int)vzero);
-                vector signed int val_v   = vec_msums(src_vB, filter_v1, val_acc);
-
-                vector signed int val_s = vec_sums(val_v, vzero);
-
-                VEC_ST(val_s, 0, tempo);
-                dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1);
-            }
-        break;
-
-        default:
-            for (i = 0; i < dstW; i++) {
-                register int j, offset = i * 2 * filterSize;
-                register int srcPos = filterPos[i];
-
-                vector signed int val_s, val_v = (vector signed int)vzero;
-                vector signed short filter_v0R;
-                vector unsigned char permF, src_v0, permS;
-                FIRST_LOAD(filter_v0R, offset, filter, permF);
-                FIRST_LOAD(src_v0, srcPos, src, permS);
-
-                for (j = 0; j < filterSize - 15; j += 16) {
-                    vector unsigned char src_v1, src_vF;
-                    vector signed short filter_v1R, filter_v2R, filter_v0, filter_v1;
-                    LOAD_SRCV(srcPos, j, src, permS, src_v0, src_v1, src_vF);
-                    vector signed short src_vA = // vec_unpackh sign-extends...
-                                                 (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF));
-                    vector signed short src_vB = // vec_unpackh sign-extends...
-                                                 (vector signed short)(VEC_MERGEL((vector unsigned char)vzero, src_vF));
-                    GET_VFD(i, j, filter, filter_v0R, filter_v1R, permF, filter_v0, 0);
-                    GET_VFD(i, j, filter, filter_v1R, filter_v2R, permF, filter_v1, 16);
-
-                    vector signed int val_acc = vec_msums(src_vA, filter_v0, val_v);
-                    val_v = vec_msums(src_vB, filter_v1, val_acc);
-                    UPDATE_PTR(filter_v2R, filter_v0R, src_v1, src_v0);
-                }
-
-                if (j < filterSize - 7) {
-                    // loading src_v0 is useless, it's already done above
-                    vector unsigned char src_v1, src_vF;
-                    vector signed short src_v, filter_v1R, filter_v;
-                    LOAD_SRCV8(srcPos, j, src, permS, src_v0, src_v1, src_vF);
-                    src_v = // vec_unpackh sign-extends...
-                            (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF));
-                    GET_VFD(i, j, filter, filter_v0R, filter_v1R, permF, filter_v, 0);
-                    val_v = vec_msums(src_v, filter_v, val_v);
-                }
-                val_s = vec_sums(val_v, vzero);
-
-                VEC_ST(val_s, 0, tempo);
-                dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1);
-            }
-        }
+    const int dst_u = -(uintptr_t)dest & 3;
+    const int shift = 3;
+    const int add = (1 << (shift - 1));
+    const int clip = (1 << 16) - 1;
+    const float fmult = 1.0f / 65535.0f;
+    const vector uint32_t vadd = (vector uint32_t) {add, add, add, add};
+    const vector uint32_t vshift = (vector uint32_t) vec_splat_u32(shift);
+    const vector uint32_t vlargest = (vector uint32_t) {clip, clip, clip, clip};
+    const vector float vmul = (vector float) {fmult, fmult, fmult, fmult};
+    const vector float vzero = (vector float) {0, 0, 0, 0};
+    const vector uint32_t vswapbig = (vector uint32_t) {16, 16, 16, 16};
+    const vector uint16_t vswapsmall = vec_splat_u16(8);
+    vector uint32_t v;
+    vector float vd;
+    int i;
+
+    yuv2plane1_float_bswap_u(src, dest, dst_u, 0);
+
+    for (i = dst_u; i < dstW - 3; i += 4) {
+        v = vec_ld(0, (const uint32_t *) &src[i]);
+        v = vec_add(v, vadd);
+        v = vec_sr(v, vshift);
+        v = vec_min(v, vlargest);
+
+        vd = vec_ctf(v, 0);
+        vd = vec_madd(vd, vmul, vzero);
+
+        vd = (vector float) vec_rl((vector uint32_t) vd, vswapbig);
+        vd = (vector float) vec_rl((vector uint16_t) vd, vswapsmall);
+
+        vec_st(vd, 0, (float *) &dest[i]);
+    }
+
+    yuv2plane1_float_bswap_u(src, dest, dstW, i);
 }
+
+#define yuv2plane1_float(template, dest_type, BE_LE) \
+static void yuv2plane1_float ## BE_LE ## _altivec(const int16_t *src, uint8_t *dest, \
+                                                  int dstW, \
+                                                  const uint8_t *dither, int offset) \
+{ \
+    template((const int32_t *)src, (dest_type *)dest, dstW); \
+}
+
+#if HAVE_BIGENDIAN
+yuv2plane1_float(yuv2plane1_float_altivec,       float,    BE)
+yuv2plane1_float(yuv2plane1_float_bswap_altivec, uint32_t, LE)
+#else
+yuv2plane1_float(yuv2plane1_float_altivec,       float,    LE)
+yuv2plane1_float(yuv2plane1_float_bswap_altivec, uint32_t, BE)
+#endif
+
 #endif /* HAVE_ALTIVEC */
 
 av_cold void ff_sws_init_swscale_ppc(SwsContext *c)
@@ -334,8 +243,9 @@ av_cold void ff_sws_init_swscale_ppc(SwsContext *c)
     if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
         return;
 
+#if HAVE_BIGENDIAN
     if (c->srcBpc == 8 && c->dstBpc <= 14) {
-        c->hyScale = c->hcScale = hScale_altivec_real;
+        c->hyScale = c->hcScale = hScale_real_altivec;
     }
     if (!is16BPS(dstFormat) && !isNBPS(dstFormat) &&
         dstFormat != AV_PIX_FMT_NV12 && dstFormat != AV_PIX_FMT_NV21 &&
@@ -343,6 +253,13 @@ av_cold void ff_sws_init_swscale_ppc(SwsContext *c)
         !c->needAlpha) {
         c->yuv2planeX = yuv2planeX_altivec;
     }
+#endif
+
+    if (dstFormat == AV_PIX_FMT_GRAYF32BE) {
+        c->yuv2plane1 = yuv2plane1_floatBE_altivec;
+    } else if (dstFormat == AV_PIX_FMT_GRAYF32LE) {
+        c->yuv2plane1 = yuv2plane1_floatLE_altivec;
+    }
 
     /* The following list of supported dstFormat values should
      * match what's found in the body of ff_yuv2packedX_altivec() */
@@ -369,4 +286,6 @@ av_cold void ff_sws_init_swscale_ppc(SwsContext *c)
         }
     }
 #endif /* HAVE_ALTIVEC */
+
+    ff_sws_init_swscale_vsx(c);
 }
diff --git a/libswscale/ppc/swscale_ppc_template.c b/libswscale/ppc/swscale_ppc_template.c
new file mode 100644
index 0000000000000..11decab0b9c64
--- /dev/null
+++ b/libswscale/ppc/swscale_ppc_template.c
@@ -0,0 +1,217 @@
+/*
+ * AltiVec-enhanced yuv2yuvX
+ *
+ * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org>
+ * based on the equivalent C code in swscale.c
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+static void FUNC(yuv2planeX_8_16)(const int16_t *filter, int filterSize,
+                                  const int16_t **src, uint8_t *dest,
+                                  const uint8_t *dither, int offset, int x)
+{
+    register int i, j;
+    LOCAL_ALIGNED(16, int, val, [16]);
+    vector signed int vo1, vo2, vo3, vo4;
+    vector unsigned short vs1, vs2;
+    vector unsigned char vf;
+    vector unsigned int altivec_vectorShiftInt19 =
+        vec_add(vec_splat_u32(10), vec_splat_u32(9));
+
+    for (i = 0; i < 16; i++)
+        val[i] = dither[(x + i + offset) & 7] << 12;
+
+    vo1 = vec_ld(0,  val);
+    vo2 = vec_ld(16, val);
+    vo3 = vec_ld(32, val);
+    vo4 = vec_ld(48, val);
+
+    for (j = 0; j < filterSize; j++) {
+        unsigned int joffset=j<<1;
+        unsigned int xoffset=x<<1;
+        vector unsigned char perm;
+        vector signed short l1,vLumFilter;
+        LOAD_FILTER(vLumFilter,filter);
+        vLumFilter = vec_splat(vLumFilter, 0);
+        LOAD_L1(l1,src[j],perm);
+        yuv2planeX_8(vo1, vo2, l1, src[j], x,     perm, vLumFilter);
+        yuv2planeX_8(vo3, vo4, l1, src[j], x + 8, perm, vLumFilter);
+    }
+
+    vo1 = vec_sra(vo1, altivec_vectorShiftInt19);
+    vo2 = vec_sra(vo2, altivec_vectorShiftInt19);
+    vo3 = vec_sra(vo3, altivec_vectorShiftInt19);
+    vo4 = vec_sra(vo4, altivec_vectorShiftInt19);
+    vs1 = vec_packsu(vo1, vo2);
+    vs2 = vec_packsu(vo3, vo4);
+    vf  = vec_packsu(vs1, vs2);
+    VEC_ST(vf, 0, dest);
+}
+
+
+static inline void yuv2planeX_u(const int16_t *filter, int filterSize,
+                                const int16_t **src, uint8_t *dest, int dstW,
+                                const uint8_t *dither, int offset, int x)
+{
+    int i, j;
+
+    for (i = x; i < dstW; i++) {
+        int t = dither[(i + offset) & 7] << 12;
+        for (j = 0; j < filterSize; j++)
+            t += src[j][i] * filter[j];
+        dest[i] = av_clip_uint8(t >> 19);
+    }
+}
+
+static void FUNC(yuv2planeX)(const int16_t *filter, int filterSize,
+                               const int16_t **src, uint8_t *dest, int dstW,
+                               const uint8_t *dither, int offset)
+{
+    int dst_u = -(uintptr_t)dest & 15;
+    int i;
+
+    yuv2planeX_u(filter, filterSize, src, dest, dst_u, dither, offset, 0);
+
+    for (i = dst_u; i < dstW - 15; i += 16)
+        FUNC(yuv2planeX_8_16)(filter, filterSize, src, dest + i, dither,
+                              offset, i);
+
+    yuv2planeX_u(filter, filterSize, src, dest, dstW, dither, offset, i);
+}
+
+static void FUNC(hScale_real)(SwsContext *c, int16_t *dst, int dstW,
+                                const uint8_t *src, const int16_t *filter,
+                                const int32_t *filterPos, int filterSize)
+{
+    register int i;
+    LOCAL_ALIGNED(16, int, tempo, [4]);
+
+    if (filterSize % 4) {
+        for (i = 0; i < dstW; i++) {
+            register int j;
+            register int srcPos = filterPos[i];
+            register int val    = 0;
+            for (j = 0; j < filterSize; j++)
+                val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
+            dst[i] = FFMIN(val >> 7, (1 << 15) - 1);
+        }
+    } else
+        switch (filterSize) {
+        case 4:
+            for (i = 0; i < dstW; i++) {
+                register int srcPos = filterPos[i];
+
+                vector unsigned char src_vF = unaligned_load(srcPos, src);
+                vector signed short src_v, filter_v;
+                vector signed int val_vEven, val_s;
+                src_v = // vec_unpackh sign-extends...
+                        (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF));
+                // now put our elements in the even slots
+                src_v = vec_mergeh(src_v, (vector signed short)vzero);
+                GET_VF4(i, filter_v, filter);
+                val_vEven = vec_mule(src_v, filter_v);
+                val_s     = vec_sums(val_vEven, vzero);
+                vec_st(val_s, 0, tempo);
+                dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1);
+            }
+        break;
+        case 8:
+            for (i = 0; i < dstW; i++) {
+                register int srcPos = filterPos[i];
+                vector unsigned char src_vF, src_v0, src_v1;
+                vector unsigned char permS;
+                vector signed short src_v, filter_v;
+                vector signed int val_v, val_s;
+                FIRST_LOAD(src_v0, srcPos, src, permS);
+                LOAD_SRCV8(srcPos, 0, src, permS, src_v0, src_v1, src_vF);
+                src_v = // vec_unpackh sign-extends...
+                        (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF));
+                filter_v = vec_ld(i << 4, filter);
+                val_v = vec_msums(src_v, filter_v, (vector signed int)vzero);
+                val_s = vec_sums(val_v, vzero);
+                vec_st(val_s, 0, tempo);
+                dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1);
+            }
+        break;
+
+        case 16:
+            for (i = 0; i < dstW; i++) {
+                register int srcPos = filterPos[i];
+
+                vector unsigned char src_vF = unaligned_load(srcPos, src);
+                vector signed short src_vA = // vec_unpackh sign-extends...
+                                             (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF));
+                vector signed short src_vB = // vec_unpackh sign-extends...
+                                             (vector signed short)(VEC_MERGEL((vector unsigned char)vzero, src_vF));
+                vector signed short filter_v0 = vec_ld(i << 5, filter);
+                vector signed short filter_v1 = vec_ld((i << 5) + 16, filter);
+
+                vector signed int val_acc = vec_msums(src_vA, filter_v0, (vector signed int)vzero);
+                vector signed int val_v   = vec_msums(src_vB, filter_v1, val_acc);
+
+                vector signed int val_s = vec_sums(val_v, vzero);
+
+                VEC_ST(val_s, 0, tempo);
+                dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1);
+            }
+        break;
+
+        default:
+            for (i = 0; i < dstW; i++) {
+                register int j, offset = i * 2 * filterSize;
+                register int srcPos = filterPos[i];
+
+                vector signed int val_s, val_v = (vector signed int)vzero;
+                vector signed short filter_v0R;
+                vector unsigned char permF, src_v0, permS;
+                FIRST_LOAD(filter_v0R, offset, filter, permF);
+                FIRST_LOAD(src_v0, srcPos, src, permS);
+
+                for (j = 0; j < filterSize - 15; j += 16) {
+                    vector unsigned char src_v1, src_vF;
+                    vector signed short filter_v1R, filter_v2R, filter_v0, filter_v1;
+                    LOAD_SRCV(srcPos, j, src, permS, src_v0, src_v1, src_vF);
+                    vector signed short src_vA = // vec_unpackh sign-extends...
+                                                 (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF));
+                    vector signed short src_vB = // vec_unpackh sign-extends...
+                                                 (vector signed short)(VEC_MERGEL((vector unsigned char)vzero, src_vF));
+                    GET_VFD(i, j, filter, filter_v0R, filter_v1R, permF, filter_v0, 0);
+                    GET_VFD(i, j, filter, filter_v1R, filter_v2R, permF, filter_v1, 16);
+
+                    vector signed int val_acc = vec_msums(src_vA, filter_v0, val_v);
+                    val_v = vec_msums(src_vB, filter_v1, val_acc);
+                    UPDATE_PTR(filter_v2R, filter_v0R, src_v1, src_v0);
+                }
+
+                if (j < filterSize - 7) {
+                    // loading src_v0 is useless, it's already done above
+                    vector unsigned char src_v1, src_vF;
+                    vector signed short src_v, filter_v1R, filter_v;
+                    LOAD_SRCV8(srcPos, j, src, permS, src_v0, src_v1, src_vF);
+                    src_v = // vec_unpackh sign-extends...
+                            (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF));
+                    GET_VFD(i, j, filter, filter_v0R, filter_v1R, permF, filter_v, 0);
+                    val_v = vec_msums(src_v, filter_v, val_v);
+                }
+                val_s = vec_sums(val_v, vzero);
+
+                VEC_ST(val_s, 0, tempo);
+                dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1);
+            }
+        }
+}
diff --git a/libswscale/ppc/swscale_vsx.c b/libswscale/ppc/swscale_vsx.c
new file mode 100644
index 0000000000000..f6c7f1d4da5ee
--- /dev/null
+++ b/libswscale/ppc/swscale_vsx.c
@@ -0,0 +1,484 @@
+/*
+ * AltiVec-enhanced yuv2yuvX
+ *
+ * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org>
+ * based on the equivalent C code in swscale.c
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <inttypes.h>
+
+#include "config.h"
+#include "libswscale/swscale.h"
+#include "libswscale/swscale_internal.h"
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "yuv2rgb_altivec.h"
+#include "libavutil/ppc/util_altivec.h"
+
+#if HAVE_VSX
+#define vzero vec_splat_s32(0)
+
+#if !HAVE_BIGENDIAN
+#define  GET_LS(a,b,c,s) {\
+        ls  = a;\
+        a = vec_vsx_ld(((b) << 1)  + 16, s);\
+    }
+
+#define yuv2planeX_8(d1, d2, l1, src, x, perm, filter) do {\
+        vector signed short ls;\
+        GET_LS(l1, x, perm, src);\
+        vector signed int   i1  = vec_mule(filter, ls);\
+        vector signed int   i2  = vec_mulo(filter, ls);\
+        vector signed int   vf1, vf2;\
+        vf1 = vec_mergeh(i1, i2);\
+        vf2 = vec_mergel(i1, i2);\
+        d1 = vec_add(d1, vf1);\
+        d2 = vec_add(d2, vf2);\
+    } while (0)
+
+#define LOAD_FILTER(vf,f) {\
+        vf = vec_vsx_ld(joffset, f);\
+}
+#define LOAD_L1(ll1,s,p){\
+        ll1  = vec_vsx_ld(xoffset, s);\
+}
+
+// The 3 above is 2 (filterSize == 4) + 1 (sizeof(short) == 2).
+
+// The neat trick: We only care for half the elements,
+// high or low depending on (i<<3)%16 (it's 0 or 8 here),
+// and we're going to use vec_mule, so we choose
+// carefully how to "unpack" the elements into the even slots.
+#define GET_VF4(a, vf, f) {\
+    vf = (vector signed short)vec_vsx_ld(a << 3, f);\
+    vf = vec_mergeh(vf, (vector signed short)vzero);\
+}
+#define FIRST_LOAD(sv, pos, s, per) {}
+#define UPDATE_PTR(s0, d0, s1, d1) {}
+#define LOAD_SRCV(pos, a, s, per, v0, v1, vf) {\
+    vf = vec_vsx_ld(pos + a, s);\
+}
+#define LOAD_SRCV8(pos, a, s, per, v0, v1, vf) LOAD_SRCV(pos, a, s, per, v0, v1, vf)
+#define GET_VFD(a, b, f, vf0, vf1, per, vf, off) {\
+    vf  = vec_vsx_ld((a * 2 * filterSize) + (b * 2) + off, f);\
+}
+
+#define FUNC(name) name ## _vsx
+#include "swscale_ppc_template.c"
+#undef FUNC
+
+#undef vzero
+
+#endif /* !HAVE_BIGENDIAN */
+
+static void yuv2plane1_8_u(const int16_t *src, uint8_t *dest, int dstW,
+                           const uint8_t *dither, int offset, int start)
+{
+    int i;
+    for (i = start; i < dstW; i++) {
+        int val = (src[i] + dither[(i + offset) & 7]) >> 7;
+        dest[i] = av_clip_uint8(val);
+    }
+}
+
+static void yuv2plane1_8_vsx(const int16_t *src, uint8_t *dest, int dstW,
+                           const uint8_t *dither, int offset)
+{
+    const int dst_u = -(uintptr_t)dest & 15;
+    int i, j;
+    LOCAL_ALIGNED(16, int16_t, val, [16]);
+    const vector uint16_t shifts = (vector uint16_t) {7, 7, 7, 7, 7, 7, 7, 7};
+    vector int16_t vi, vileft, ditherleft, ditherright;
+    vector uint8_t vd;
+
+    for (j = 0; j < 16; j++) {
+        val[j] = dither[(dst_u + offset + j) & 7];
+    }
+
+    ditherleft = vec_ld(0, val);
+    ditherright = vec_ld(0, &val[8]);
+
+    yuv2plane1_8_u(src, dest, dst_u, dither, offset, 0);
+
+    for (i = dst_u; i < dstW - 15; i += 16) {
+
+        vi = vec_vsx_ld(0, &src[i]);
+        vi = vec_adds(ditherleft, vi);
+        vileft = vec_sra(vi, shifts);
+
+        vi = vec_vsx_ld(0, &src[i + 8]);
+        vi = vec_adds(ditherright, vi);
+        vi = vec_sra(vi, shifts);
+
+        vd = vec_packsu(vileft, vi);
+        vec_st(vd, 0, &dest[i]);
+    }
+
+    yuv2plane1_8_u(src, dest, dstW, dither, offset, i);
+}
+
+#if !HAVE_BIGENDIAN
+
+#define output_pixel(pos, val) \
+    if (big_endian) { \
+        AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
+    } else { \
+        AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
+    }
+
+static void yuv2plane1_nbps_u(const int16_t *src, uint16_t *dest, int dstW,
+                              int big_endian, int output_bits, int start)
+{
+    int i;
+    int shift = 15 - output_bits;
+
+    for (i = start; i < dstW; i++) {
+        int val = src[i] + (1 << (shift - 1));
+        output_pixel(&dest[i], val);
+    }
+}
+
+static void yuv2plane1_nbps_vsx(const int16_t *src, uint16_t *dest, int dstW,
+                           int big_endian, int output_bits)
+{
+    const int dst_u = -(uintptr_t)dest & 7;
+    const int shift = 15 - output_bits;
+    const int add = (1 << (shift - 1));
+    const int clip = (1 << output_bits) - 1;
+    const vector uint16_t vadd = (vector uint16_t) {add, add, add, add, add, add, add, add};
+    const vector uint16_t vswap = (vector uint16_t) vec_splat_u16(big_endian ? 8 : 0);
+    const vector uint16_t vshift = (vector uint16_t) vec_splat_u16(shift);
+    const vector uint16_t vlargest = (vector uint16_t) {clip, clip, clip, clip, clip, clip, clip, clip};
+    vector uint16_t v;
+    int i;
+
+    yuv2plane1_nbps_u(src, dest, dst_u, big_endian, output_bits, 0);
+
+    for (i = dst_u; i < dstW - 7; i += 8) {
+        v = vec_vsx_ld(0, (const uint16_t *) &src[i]);
+        v = vec_add(v, vadd);
+        v = vec_sr(v, vshift);
+        v = vec_min(v, vlargest);
+        v = vec_rl(v, vswap);
+        vec_st(v, 0, &dest[i]);
+    }
+
+    yuv2plane1_nbps_u(src, dest, dstW, big_endian, output_bits, i);
+}
+
+static void yuv2planeX_nbps_u(const int16_t *filter, int filterSize,
+                              const int16_t **src, uint16_t *dest, int dstW,
+                              int big_endian, int output_bits, int start)
+{
+    int i;
+    int shift = 11 + 16 - output_bits;
+
+    for (i = start; i < dstW; i++) {
+        int val = 1 << (shift - 1);
+        int j;
+
+        for (j = 0; j < filterSize; j++)
+            val += src[j][i] * filter[j];
+
+        output_pixel(&dest[i], val);
+    }
+}
+
+static void yuv2planeX_nbps_vsx(const int16_t *filter, int filterSize,
+                                const int16_t **src, uint16_t *dest, int dstW,
+                                int big_endian, int output_bits)
+{
+    const int dst_u = -(uintptr_t)dest & 7;
+    const int shift = 11 + 16 - output_bits;
+    const int add = (1 << (shift - 1));
+    const int clip = (1 << output_bits) - 1;
+    const uint16_t swap = big_endian ? 8 : 0;
+    const vector uint32_t vadd = (vector uint32_t) {add, add, add, add};
+    const vector uint32_t vshift = (vector uint32_t) {shift, shift, shift, shift};
+    const vector uint16_t vswap = (vector uint16_t) {swap, swap, swap, swap, swap, swap, swap, swap};
+    const vector uint16_t vlargest = (vector uint16_t) {clip, clip, clip, clip, clip, clip, clip, clip};
+    const vector int16_t vzero = vec_splat_s16(0);
+    const vector uint8_t vperm = (vector uint8_t) {0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15};
+    vector int16_t vfilter[MAX_FILTER_SIZE], vin;
+    vector uint16_t v;
+    vector uint32_t vleft, vright, vtmp;
+    int i, j;
+
+    for (i = 0; i < filterSize; i++) {
+        vfilter[i] = (vector int16_t) {filter[i], filter[i], filter[i], filter[i],
+                                       filter[i], filter[i], filter[i], filter[i]};
+    }
+
+    yuv2planeX_nbps_u(filter, filterSize, src, dest, dst_u, big_endian, output_bits, 0);
+
+    for (i = dst_u; i < dstW - 7; i += 8) {
+        vleft = vright = vadd;
+
+        for (j = 0; j < filterSize; j++) {
+            vin = vec_vsx_ld(0, &src[j][i]);
+            vtmp = (vector uint32_t) vec_mule(vin, vfilter[j]);
+            vleft = vec_add(vleft, vtmp);
+            vtmp = (vector uint32_t) vec_mulo(vin, vfilter[j]);
+            vright = vec_add(vright, vtmp);
+        }
+
+        vleft = vec_sra(vleft, vshift);
+        vright = vec_sra(vright, vshift);
+        v = vec_packsu(vleft, vright);
+        v = (vector uint16_t) vec_max((vector int16_t) v, vzero);
+        v = vec_min(v, vlargest);
+        v = vec_rl(v, vswap);
+        v = vec_perm(v, v, vperm);
+        vec_st(v, 0, &dest[i]);
+    }
+
+    yuv2planeX_nbps_u(filter, filterSize, src, dest, dstW, big_endian, output_bits, i);
+}
+
+
+#undef output_pixel
+
+#define output_pixel(pos, val, bias, signedness) \
+    if (big_endian) { \
+        AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
+    } else { \
+        AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
+    }
+
+static void yuv2plane1_16_u(const int32_t *src, uint16_t *dest, int dstW,
+                              int big_endian, int output_bits, int start)
+{
+    int i;
+    const int shift = 3;
+
+    for (i = start; i < dstW; i++) {
+        int val = src[i] + (1 << (shift - 1));
+        output_pixel(&dest[i], val, 0, uint);
+    }
+}
+
+static void yuv2plane1_16_vsx(const int32_t *src, uint16_t *dest, int dstW,
+                           int big_endian, int output_bits)
+{
+    const int dst_u = -(uintptr_t)dest & 7;
+    const int shift = 3;
+    const int add = (1 << (shift - 1));
+    const vector uint32_t vadd = (vector uint32_t) {add, add, add, add};
+    const vector uint16_t vswap = (vector uint16_t) vec_splat_u16(big_endian ? 8 : 0);
+    const vector uint32_t vshift = (vector uint32_t) vec_splat_u32(shift);
+    vector uint32_t v, v2;
+    vector uint16_t vd;
+    int i;
+
+    yuv2plane1_16_u(src, dest, dst_u, big_endian, output_bits, 0);
+
+    for (i = dst_u; i < dstW - 7; i += 8) {
+        v = vec_vsx_ld(0, (const uint32_t *) &src[i]);
+        v = vec_add(v, vadd);
+        v = vec_sr(v, vshift);
+
+        v2 = vec_vsx_ld(0, (const uint32_t *) &src[i + 4]);
+        v2 = vec_add(v2, vadd);
+        v2 = vec_sr(v2, vshift);
+
+        vd = vec_packsu(v, v2);
+        vd = vec_rl(vd, vswap);
+
+        vec_st(vd, 0, &dest[i]);
+    }
+
+    yuv2plane1_16_u(src, dest, dstW, big_endian, output_bits, i);
+}
+
+#if HAVE_POWER8
+
+static void yuv2planeX_16_u(const int16_t *filter, int filterSize,
+                            const int32_t **src, uint16_t *dest, int dstW,
+                            int big_endian, int output_bits, int start)
+{
+    int i;
+    int shift = 15;
+
+    for (i = start; i < dstW; i++) {
+        int val = 1 << (shift - 1);
+        int j;
+
+        /* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline
+         * filters (or anything with negative coeffs, the range can be slightly
+         * wider in both directions. To account for this overflow, we subtract
+         * a constant so it always fits in the signed range (assuming a
+         * reasonable filterSize), and re-add that at the end. */
+        val -= 0x40000000;
+        for (j = 0; j < filterSize; j++)
+            val += src[j][i] * (unsigned)filter[j];
+
+        output_pixel(&dest[i], val, 0x8000, int);
+    }
+}
+
+static void yuv2planeX_16_vsx(const int16_t *filter, int filterSize,
+                              const int32_t **src, uint16_t *dest, int dstW,
+                              int big_endian, int output_bits)
+{
+    const int dst_u = -(uintptr_t)dest & 7;
+    const int shift = 15;
+    const int bias = 0x8000;
+    const int add = (1 << (shift - 1)) - 0x40000000;
+    const uint16_t swap = big_endian ? 8 : 0;
+    const vector uint32_t vadd = (vector uint32_t) {add, add, add, add};
+    const vector uint32_t vshift = (vector uint32_t) {shift, shift, shift, shift};
+    const vector uint16_t vswap = (vector uint16_t) {swap, swap, swap, swap, swap, swap, swap, swap};
+    const vector uint16_t vbias = (vector uint16_t) {bias, bias, bias, bias, bias, bias, bias, bias};
+    vector int32_t vfilter[MAX_FILTER_SIZE];
+    vector uint16_t v;
+    vector uint32_t vleft, vright, vtmp;
+    vector int32_t vin32l, vin32r;
+    int i, j;
+
+    for (i = 0; i < filterSize; i++) {
+        vfilter[i] = (vector int32_t) {filter[i], filter[i], filter[i], filter[i]};
+    }
+
+    yuv2planeX_16_u(filter, filterSize, src, dest, dst_u, big_endian, output_bits, 0);
+
+    for (i = dst_u; i < dstW - 7; i += 8) {
+        vleft = vright = vadd;
+
+        for (j = 0; j < filterSize; j++) {
+            vin32l = vec_vsx_ld(0, &src[j][i]);
+            vin32r = vec_vsx_ld(0, &src[j][i + 4]);
+
+            vtmp = (vector uint32_t) vec_mul(vin32l, vfilter[j]);
+            vleft = vec_add(vleft, vtmp);
+            vtmp = (vector uint32_t) vec_mul(vin32r, vfilter[j]);
+            vright = vec_add(vright, vtmp);
+        }
+
+        vleft = vec_sra(vleft, vshift);
+        vright = vec_sra(vright, vshift);
+        v = (vector uint16_t) vec_packs((vector int32_t) vleft, (vector int32_t) vright);
+        v = vec_add(v, vbias);
+        v = vec_rl(v, vswap);
+        vec_st(v, 0, &dest[i]);
+    }
+
+    yuv2planeX_16_u(filter, filterSize, src, dest, dstW, big_endian, output_bits, i);
+}
+
+#endif /* HAVE_POWER8 */
+
+#define yuv2NBPS(bits, BE_LE, is_be, template_size, typeX_t) \
+    yuv2NBPS1(bits, BE_LE, is_be, template_size, typeX_t) \
+    yuv2NBPSX(bits, BE_LE, is_be, template_size, typeX_t)
+
+#define yuv2NBPS1(bits, BE_LE, is_be, template_size, typeX_t) \
+static void yuv2plane1_ ## bits ## BE_LE ## _vsx(const int16_t *src, \
+                             uint8_t *dest, int dstW, \
+                             const uint8_t *dither, int offset) \
+{ \
+    yuv2plane1_ ## template_size ## _vsx((const typeX_t *) src, \
+                         (uint16_t *) dest, dstW, is_be, bits); \
+}
+
+#define yuv2NBPSX(bits, BE_LE, is_be, template_size, typeX_t) \
+static void yuv2planeX_ ## bits ## BE_LE ## _vsx(const int16_t *filter, int filterSize, \
+                              const int16_t **src, uint8_t *dest, int dstW, \
+                              const uint8_t *dither, int offset)\
+{ \
+    yuv2planeX_## template_size ## _vsx(filter, \
+                         filterSize, (const typeX_t **) src, \
+                         (uint16_t *) dest, dstW, is_be, bits); \
+}
+
+yuv2NBPS( 9, BE, 1, nbps, int16_t)
+yuv2NBPS( 9, LE, 0, nbps, int16_t)
+yuv2NBPS(10, BE, 1, nbps, int16_t)
+yuv2NBPS(10, LE, 0, nbps, int16_t)
+yuv2NBPS(12, BE, 1, nbps, int16_t)
+yuv2NBPS(12, LE, 0, nbps, int16_t)
+yuv2NBPS(14, BE, 1, nbps, int16_t)
+yuv2NBPS(14, LE, 0, nbps, int16_t)
+
+yuv2NBPS1(16, BE, 1, 16, int32_t)
+yuv2NBPS1(16, LE, 0, 16, int32_t)
+#if HAVE_POWER8
+yuv2NBPSX(16, BE, 1, 16, int32_t)
+yuv2NBPSX(16, LE, 0, 16, int32_t)
+#endif
+
+#endif /* !HAVE_BIGENDIAN */
+
+#endif /* HAVE_VSX */
+
+av_cold void ff_sws_init_swscale_vsx(SwsContext *c)
+{
+#if HAVE_VSX
+    enum AVPixelFormat dstFormat = c->dstFormat;
+    const int cpu_flags = av_get_cpu_flags();
+
+    if (!(cpu_flags & AV_CPU_FLAG_VSX))
+        return;
+
+#if !HAVE_BIGENDIAN
+    if (c->srcBpc == 8 && c->dstBpc <= 14) {
+        c->hyScale = c->hcScale = hScale_real_vsx;
+    }
+    if (!is16BPS(dstFormat) && !isNBPS(dstFormat) &&
+        dstFormat != AV_PIX_FMT_NV12 && dstFormat != AV_PIX_FMT_NV21 &&
+        dstFormat != AV_PIX_FMT_GRAYF32BE && dstFormat != AV_PIX_FMT_GRAYF32LE &&
+        !c->needAlpha) {
+        c->yuv2planeX = yuv2planeX_vsx;
+    }
+#endif
+
+    if (!(c->flags & (SWS_BITEXACT | SWS_FULL_CHR_H_INT)) && !c->needAlpha) {
+        switch (c->dstBpc) {
+        case 8:
+            c->yuv2plane1 = yuv2plane1_8_vsx;
+            break;
+#if !HAVE_BIGENDIAN
+        case 9:
+            c->yuv2plane1 = isBE(dstFormat) ? yuv2plane1_9BE_vsx  : yuv2plane1_9LE_vsx;
+            c->yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_vsx  : yuv2planeX_9LE_vsx;
+            break;
+        case 10:
+            c->yuv2plane1 = isBE(dstFormat) ? yuv2plane1_10BE_vsx  : yuv2plane1_10LE_vsx;
+            c->yuv2planeX = isBE(dstFormat) ? yuv2planeX_10BE_vsx  : yuv2planeX_10LE_vsx;
+            break;
+        case 12:
+            c->yuv2plane1 = isBE(dstFormat) ? yuv2plane1_12BE_vsx  : yuv2plane1_12LE_vsx;
+            c->yuv2planeX = isBE(dstFormat) ? yuv2planeX_12BE_vsx  : yuv2planeX_12LE_vsx;
+            break;
+        case 14:
+            c->yuv2plane1 = isBE(dstFormat) ? yuv2plane1_14BE_vsx  : yuv2plane1_14LE_vsx;
+            c->yuv2planeX = isBE(dstFormat) ? yuv2planeX_14BE_vsx  : yuv2planeX_14LE_vsx;
+            break;
+        case 16:
+            c->yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_vsx  : yuv2plane1_16LE_vsx;
+#if HAVE_POWER8
+            if (cpu_flags & AV_CPU_FLAG_POWER8) {
+                c->yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_vsx  : yuv2planeX_16LE_vsx;
+            }
+#endif /* HAVE_POWER8 */
+            break;
+#endif /* !HAVE_BIGENDIAN */
+        }
+    }
+#endif /* HAVE_VSX */
+}
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 4fa59386a67c7..a59d12745ae6c 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -868,6 +868,7 @@ void ff_sws_init_output_funcs(SwsContext *c,
                               yuv2packedX_fn *yuv2packedX,
                               yuv2anyX_fn *yuv2anyX);
 void ff_sws_init_swscale_ppc(SwsContext *c);
+void ff_sws_init_swscale_vsx(SwsContext *c);
 void ff_sws_init_swscale_x86(SwsContext *c);
 void ff_sws_init_swscale_aarch64(SwsContext *c);
 void ff_sws_init_swscale_arm(SwsContext *c);
diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index 4b3cd71e900f1..058f2b94db1ec 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -423,7 +423,7 @@ static void gray8aToPacked24(const uint8_t *src, uint8_t *dst, int num_pixels,
     }
 }
 
-static int packed_16bpc_bswap(SwsContext *c, const uint8_t *src[],
+static int bswap_16bpc(SwsContext *c, const uint8_t *src[],
                               int srcStride[], int srcSliceY, int srcSliceH,
                               uint8_t *dst[], int dstStride[])
 {
@@ -1821,6 +1821,14 @@ static int planarCopyWrapper(SwsContext *c, const uint8_t *src[],
                     srcPtr += srcStride[plane];
                     dstPtr += dstStride[plane];
                 }
+            } else if (isFloat(c->srcFormat) && isFloat(c->dstFormat) &&
+                       isBE(c->srcFormat) != isBE(c->dstFormat)) { /* swap float plane */
+                for (i = 0; i < height; i++) {
+                    for (j = 0; j < length; j++)
+                        ((uint32_t *) dstPtr)[j] = av_bswap32(((const uint32_t *) srcPtr)[j]);
+                    srcPtr += srcStride[plane];
+                    dstPtr += dstStride[plane];
+                }
             } else if (dstStride[plane] == srcStride[plane] &&
                        srcStride[plane] > 0 && srcStride[plane] == length) {
                 memcpy(dst[plane] + dstStride[plane] * y, src[plane],
@@ -2015,7 +2023,7 @@ void ff_get_unscaled_swscale(SwsContext *c)
         IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV444P12) ||
         IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV444P14) ||
         IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV444P16))
-        c->swscale = packed_16bpc_bswap;
+        c->swscale = bswap_16bpc;
 
     if (usePal(srcFormat) && isByteRGB(dstFormat))
         c->swscale = palToRgbWrapper;
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 5e56371180f6c..df68bcc0d9b30 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -191,8 +191,8 @@ static const FormatEntry format_entries[AV_PIX_FMT_NB] = {
     [AV_PIX_FMT_BGR444LE]    = { 1, 1 },
     [AV_PIX_FMT_BGR444BE]    = { 1, 1 },
     [AV_PIX_FMT_YA8]         = { 1, 1 },
-    [AV_PIX_FMT_YA16BE]      = { 1, 0 },
-    [AV_PIX_FMT_YA16LE]      = { 1, 0 },
+    [AV_PIX_FMT_YA16BE]      = { 1, 1 },
+    [AV_PIX_FMT_YA16LE]      = { 1, 1 },
     [AV_PIX_FMT_BGR48BE]     = { 1, 1 },
     [AV_PIX_FMT_BGR48LE]     = { 1, 1 },
     [AV_PIX_FMT_BGRA64BE]    = { 1, 1, 1 },
@@ -260,6 +260,10 @@ static const FormatEntry format_entries[AV_PIX_FMT_NB] = {
     [AV_PIX_FMT_P016BE]      = { 1, 1 },
     [AV_PIX_FMT_GRAYF32LE]   = { 1, 1 },
     [AV_PIX_FMT_GRAYF32BE]   = { 1, 1 },
+    [AV_PIX_FMT_YUVA422P12BE] = { 1, 1 },
+    [AV_PIX_FMT_YUVA422P12LE] = { 1, 1 },
+    [AV_PIX_FMT_YUVA444P12BE] = { 1, 1 },
+    [AV_PIX_FMT_YUVA444P12LE] = { 1, 1 },
 };
 
 int sws_isSupportedInput(enum AVPixelFormat pix_fmt)
@@ -1810,8 +1814,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
     /* unscaled special cases */
     if (unscaled && !usesHFilter && !usesVFilter &&
         (c->srcRange == c->dstRange || isAnyRGB(dstFormat) ||
-         srcFormat == AV_PIX_FMT_GRAYF32 && dstFormat == AV_PIX_FMT_GRAY8 ||
-         srcFormat == AV_PIX_FMT_GRAY8 && dstFormat == AV_PIX_FMT_GRAYF32)) {
+         isFloat(srcFormat) || isFloat(dstFormat))){
         ff_get_unscaled_swscale(c);
 
         if (c->swscale) {
diff --git a/libswscale/version.h b/libswscale/version.h
index a07bd717c8e60..0e28a76e64d27 100644
--- a/libswscale/version.h
+++ b/libswscale/version.h
@@ -27,7 +27,7 @@
 #include "libavutil/version.h"
 
 #define LIBSWSCALE_VERSION_MAJOR   5
-#define LIBSWSCALE_VERSION_MINOR   2
+#define LIBSWSCALE_VERSION_MINOR   4
 #define LIBSWSCALE_VERSION_MICRO 100
 
 #define LIBSWSCALE_VERSION_INT  AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \
diff --git a/libswscale/x86/rgb_2_rgb.asm b/libswscale/x86/rgb_2_rgb.asm
index 5fb5d2ee619e2..29b856e281424 100644
--- a/libswscale/x86/rgb_2_rgb.asm
+++ b/libswscale/x86/rgb_2_rgb.asm
@@ -2,6 +2,7 @@
 ;* Copyright Nick Kurshev
 ;* Copyright Michael (michaelni@gmx.at)
 ;* Copyright 2018 Jokyo Images
+;* Copyright Ivo van Poorten
 ;*
 ;* This file is part of FFmpeg.
 ;*
diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c
index 737cbb06c806e..d0df061e4d98b 100644
--- a/libswscale/yuv2rgb.c
+++ b/libswscale/yuv2rgb.c
@@ -993,7 +993,7 @@ av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4],
     default:
         if(!isPlanar(c->dstFormat) || bpp <= 24)
             av_log(c, AV_LOG_ERROR, "%ibpp not supported by yuv2rgb\n", bpp);
-        return -1;
+        return AVERROR(EINVAL);
     }
     return 0;
 }
diff --git a/tests/Makefile b/tests/Makefile
index 24680b815014e..ae658693db8a4 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -10,7 +10,8 @@ FFMPEG=ffmpeg$(PROGSSUF)$(EXESUF)
 $(AREF): CMP=
 
 APITESTSDIR := tests/api
-OBJDIRS += tests/data tests/vsynth1 tests/data/filtergraphs $(APITESTSDIR)/
+FATE_OUTDIRS = tests/data tests/data/fate tests/data/filtergraphs tests/data/lavf tests/data/lavf-fate tests/data/pixfmt tests/vsynth1 $(APITESTSDIR)
+OUTDIRS += $(FATE_OUTDIRS)
 
 $(VREF): tests/videogen$(HOSTEXESUF) | tests/vsynth1
 	$(M)./$< 'tests/vsynth1/'
@@ -131,6 +132,7 @@ include $(SRC_PATH)/tests/fate/gif.mak
 include $(SRC_PATH)/tests/fate/h264.mak
 include $(SRC_PATH)/tests/fate/hap.mak
 include $(SRC_PATH)/tests/fate/hevc.mak
+include $(SRC_PATH)/tests/fate/hlsenc.mak
 include $(SRC_PATH)/tests/fate/hw.mak
 include $(SRC_PATH)/tests/fate/id3v2.mak
 include $(SRC_PATH)/tests/fate/image.mak
@@ -223,7 +225,7 @@ fate-hw: $(FATE_HW-yes)
 FATE += $(FATE_HW-yes)
 
 $(FATE) $(FATE_TESTS-no): export PROGSUF = $(PROGSSUF)
-$(FATE) $(FATE_TESTS-no): $(FATE_UTILS:%=tests/%$(HOSTEXESUF))
+$(FATE) $(FATE_TESTS-no): $(FATE_UTILS:%=tests/%$(HOSTEXESUF)) | $(FATE_OUTDIRS)
 	@echo "TEST    $(@:fate-%=%)"
 	$(Q)$(SRC_PATH)/tests/fate-run.sh $@ "$(TARGET_SAMPLES)" "$(TARGET_EXEC)" "$(TARGET_PATH)" '$(CMD)' '$(CMP)' '$(REF)' '$(FUZZ)' '$(THREADS)' '$(THREAD_TYPE)' '$(CPUFLAGS)' '$(CMP_SHIFT)' '$(CMP_TARGET)' '$(SIZE_TOLERANCE)' '$(CMP_UNIT)' '$(GEN)' '$(HWACCEL)' '$(REPORT)' '$(KEEP)'
 
diff --git a/tests/api/Makefile b/tests/api/Makefile
index 759dd9d243a98..b5c4ccae23527 100644
--- a/tests/api/Makefile
+++ b/tests/api/Makefile
@@ -1,5 +1,6 @@
 APITESTPROGS-$(call ENCDEC, FLAC, FLAC) += api-flac
 APITESTPROGS-$(call DEMDEC, H264, H264) += api-h264
+APITESTPROGS-$(call DEMDEC, H264, H264) += api-h264-slice
 APITESTPROGS-yes += api-seek
 APITESTPROGS-yes += api-codec-param
 APITESTPROGS-$(call DEMDEC, H263, H263) += api-band
diff --git a/tests/api/api-flac-test.c b/tests/api/api-flac-test.c
index 2e9081266f829..e8e8cbf1e7c8a 100644
--- a/tests/api/api-flac-test.c
+++ b/tests/api/api-flac-test.c
@@ -31,7 +31,7 @@
 #include "libavutil/common.h"
 #include "libavutil/samplefmt.h"
 
-#define NUMBER_OF_FRAMES 200
+#define NUMBER_OF_AUDIO_FRAMES 200
 #define NAME_BUFF_SIZE 100
 
 /* generate i-th frame of test audio */
@@ -137,19 +137,19 @@ static int run_test(AVCodec *enc, AVCodec *dec, AVCodecContext *enc_ctx,
         return AVERROR(ENOMEM);
     }
 
-    raw_in = av_malloc(in_frame->linesize[0] * NUMBER_OF_FRAMES);
+    raw_in = av_malloc(in_frame->linesize[0] * NUMBER_OF_AUDIO_FRAMES);
     if (!raw_in) {
         av_log(NULL, AV_LOG_ERROR, "Can't allocate memory for raw_in\n");
         return AVERROR(ENOMEM);
     }
 
-    raw_out = av_malloc(in_frame->linesize[0] * NUMBER_OF_FRAMES);
+    raw_out = av_malloc(in_frame->linesize[0] * NUMBER_OF_AUDIO_FRAMES);
     if (!raw_out) {
         av_log(NULL, AV_LOG_ERROR, "Can't allocate memory for raw_out\n");
         return AVERROR(ENOMEM);
     }
 
-    for (i = 0; i < NUMBER_OF_FRAMES; i++) {
+    for (i = 0; i < NUMBER_OF_AUDIO_FRAMES; i++) {
         av_init_packet(&enc_pkt);
         enc_pkt.data = NULL;
         enc_pkt.size = 0;
@@ -209,7 +209,7 @@ static int run_test(AVCodec *enc, AVCodec *dec, AVCodecContext *enc_ctx,
         av_packet_unref(&enc_pkt);
     }
 
-    if (memcmp(raw_in, raw_out, out_frame_bytes * NUMBER_OF_FRAMES) != 0) {
+    if (memcmp(raw_in, raw_out, out_frame_bytes * NUMBER_OF_AUDIO_FRAMES) != 0) {
         av_log(NULL, AV_LOG_ERROR, "Output differs\n");
         return 1;
     }
diff --git a/tests/api/api-h264-slice-test.c b/tests/api/api-h264-slice-test.c
new file mode 100644
index 0000000000000..dee93b8349e6d
--- /dev/null
+++ b/tests/api/api-h264-slice-test.c
@@ -0,0 +1,238 @@
+/*
+ * Copyright (c) 2001 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#define MAX_SLICES 8
+
+#include "config.h"
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#if HAVE_IO_H
+#include <io.h>
+#endif
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "libavcodec/avcodec.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/hash.h"
+#include "libavutil/bswap.h"
+
+static int header = 0;
+
+static int decode(AVCodecContext *dec_ctx, AVFrame *frame,
+           AVPacket *pkt)
+{
+    static uint64_t frame_cnt = 0;
+    int ret;
+
+    ret = avcodec_send_packet(dec_ctx, pkt);
+    if (ret < 0) {
+        fprintf(stderr, "Error sending a packet for decoding: %s\n", av_err2str(ret));
+        return ret;
+    }
+
+    while (ret >= 0) {
+        const AVPixFmtDescriptor *desc;
+        char sum[AV_HASH_MAX_SIZE * 2 + 1];
+        struct AVHashContext *hash;
+
+        ret = avcodec_receive_frame(dec_ctx, frame);
+        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
+            return 0;
+        } else if (ret < 0) {
+            fprintf(stderr, "Error during decoding: %s\n", av_err2str(ret));
+            return ret;
+        }
+
+        if (!header) {
+            printf(
+            "#format: frame checksums\n"
+            "#version: 2\n"
+            "#hash: MD5\n"
+            "#tb 0: 1/30\n"
+            "#media_type 0: video\n"
+            "#codec_id 0: rawvideo\n"
+            "#dimensions 0: 352x288\n"
+            "#sar 0: 128/117\n"
+            "#stream#, dts,        pts, duration,     size, hash\n");
+            header = 1;
+        }
+        desc = av_pix_fmt_desc_get(dec_ctx->pix_fmt);
+        if ((ret = av_hash_alloc(&hash, "md5")) < 0) {
+            return ret;
+        }
+        av_hash_init(hash);
+
+        for (int i = 0; i < frame->height; i++)
+            av_hash_update(hash, &frame->data[0][i * frame->linesize[0]], frame->width);
+        for (int i = 0; i < frame->height >> desc->log2_chroma_h; i++)
+            av_hash_update(hash, &frame->data[1][i * frame->linesize[1]], frame->width >> desc->log2_chroma_w);
+        for (int i = 0; i < frame->height >> desc->log2_chroma_h; i++)
+            av_hash_update(hash, &frame->data[2][i * frame->linesize[2]], frame->width >> desc->log2_chroma_w);
+
+        av_hash_final_hex(hash, sum, av_hash_get_size(hash) * 2 + 1);
+        printf("0, %10"PRId64", %10"PRId64",        1, %8d, %s\n",
+            frame_cnt, frame_cnt,
+            (frame->width * frame->height + 2 * (frame->height >> desc->log2_chroma_h) * (frame->width >> desc->log2_chroma_w)), sum);
+        frame_cnt += 1;
+        av_hash_freep(&hash);
+    }
+    return 0;
+}
+
+int main(int argc, char **argv)
+{
+    const AVCodec *codec = NULL;
+    AVCodecContext *c = NULL;
+    AVFrame *frame = NULL;
+    unsigned int threads;
+    AVPacket *pkt;
+    FILE *file = NULL;
+    char * nal = NULL;
+    int nals = 0, ret = 0;
+    char *p;
+
+    if (argc < 3) {
+        fprintf(stderr, "Usage: %s <threads> <input file>\n", argv[0]);
+        return -1;
+    }
+
+    if (!(threads = strtoul(argv[1], NULL, 0)))
+        threads = 1;
+    else if (threads > MAX_SLICES)
+        threads = MAX_SLICES;
+
+#ifdef _WIN32
+    setmode(fileno(stdout), O_BINARY);
+#endif
+
+    if (!(pkt = av_packet_alloc())) {
+        return -1;
+    }
+
+    nal = av_malloc(MAX_SLICES * UINT16_MAX + AV_INPUT_BUFFER_PADDING_SIZE);
+    if (!nal)
+        goto err;
+    p = nal;
+
+    if (!(codec = avcodec_find_decoder(AV_CODEC_ID_H264))) {
+        fprintf(stderr, "Codec not found\n");
+        ret = -1;
+        goto err;
+    }
+
+    if (!(c = avcodec_alloc_context3(codec))) {
+        fprintf(stderr, "Could not allocate video codec context\n");
+        ret = -1;
+        goto err;
+    }
+
+    c->width  = 352;
+    c->height = 288;
+
+    c->flags2 |= AV_CODEC_FLAG2_CHUNKS;
+    c->thread_type = FF_THREAD_SLICE;
+    c->thread_count = threads;
+
+    if ((ret = avcodec_open2(c, codec, NULL)) < 0) {
+        fprintf(stderr, "Could not open codec\n");
+        goto err;
+    }
+
+#if HAVE_THREADS
+    if (c->active_thread_type != FF_THREAD_SLICE) {
+        fprintf(stderr, "Couldn't activate slice threading: %d\n", c->active_thread_type);
+        ret = -1;
+        goto err;
+    }
+#else
+    fprintf(stderr, "WARN: not using threads, only checking decoding slice NALUs\n");
+#endif
+
+    if (!(frame = av_frame_alloc())) {
+        fprintf(stderr, "Could not allocate video frame\n");
+        ret = -1;
+        goto err;
+    }
+
+    if (!(file = fopen(argv[2], "rb"))) {
+        fprintf(stderr, "Couldn't open NALU file: %s\n", argv[2]);
+        ret = -1;
+        goto err;
+    }
+
+    while(1) {
+        uint16_t size = 0;
+        size_t ret = fread(&size, 1, sizeof(uint16_t), file);
+        if (ret != sizeof(uint16_t))
+            break;
+
+        size = av_be2ne16(size);
+        ret = fread(p, 1, size, file);
+        if (ret != size) {
+            perror("Couldn't read data");
+            goto err;
+        }
+        p += ret;
+
+        if (++nals >= threads) {
+            int decret = 0;
+            pkt->data = nal;
+            pkt->size = p - nal;
+            if ((decret = decode(c, frame, pkt)) < 0) {
+                goto err;
+            }
+            memset(nal, 0, MAX_SLICES * UINT16_MAX + AV_INPUT_BUFFER_PADDING_SIZE);
+            nals = 0;
+            p = nal;
+        }
+    }
+
+    if (nals) {
+        pkt->data = nal;
+        pkt->size = p - nal;
+        if ((ret = decode(c, frame, pkt)) < 0) {
+            goto err;
+        }
+    }
+
+    ret = decode(c, frame, NULL);
+
+err:
+    if (nal)
+        av_free(nal);
+    if (file)
+        fclose(file);
+    av_frame_free(&frame);
+    avcodec_free_context(&c);
+    av_packet_free(&pkt);
+
+    return ret;
+}
diff --git a/tests/api/api-h264-test.c b/tests/api/api-h264-test.c
index 66669fa0c353c..60a3ae5ef4470 100644
--- a/tests/api/api-h264-test.c
+++ b/tests/api/api-h264-test.c
@@ -28,6 +28,7 @@
 #include "libavcodec/avcodec.h"
 #include "libavformat/avformat.h"
 #include "libavutil/imgutils.h"
+#include "libavutil/timestamp.h"
 
 static int video_decode_example(const char *input_filename)
 {
@@ -131,9 +132,9 @@ static int video_decode_example(const char *input_filename)
                     av_log(NULL, AV_LOG_ERROR, "Can't copy image to buffer\n");
                     return number_of_written_bytes;
                 }
-                printf("%d, %10"PRId64", %10"PRId64", %8"PRId64", %8d, 0x%08lx\n", video_stream,
-                        fr->pts, fr->pkt_dts, fr->pkt_duration,
-                        number_of_written_bytes, av_adler32_update(0, (const uint8_t*)byte_buffer, number_of_written_bytes));
+                printf("%d, %s, %s, %8"PRId64", %8d, 0x%08lx\n", video_stream,
+                       av_ts2str(fr->pts), av_ts2str(fr->pkt_dts), fr->pkt_duration,
+                       number_of_written_bytes, av_adler32_update(0, (const uint8_t*)byte_buffer, number_of_written_bytes));
             }
             av_packet_unref(&pkt);
             av_init_packet(&pkt);
diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index 9484acbbd794f..8cc0bff2d11e8 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -31,6 +31,7 @@ AVCODECOBJS-$(CONFIG_VP9_DECODER)       += vp9dsp.o
 CHECKASMOBJS-$(CONFIG_AVCODEC)          += $(AVCODECOBJS-yes)
 
 # libavfilter tests
+AVFILTEROBJS-$(CONFIG_AFIR_FILTER) += af_afir.o
 AVFILTEROBJS-$(CONFIG_BLEND_FILTER) += vf_blend.o
 AVFILTEROBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o
 AVFILTEROBJS-$(CONFIG_HFLIP_FILTER)      += vf_hflip.o
@@ -61,7 +62,7 @@ CHECKASMOBJS := $(sort $(CHECKASMOBJS:%=tests/checkasm/%))
 
 CHECKASMDIRS := $(sort $(dir $(CHECKASMOBJS)))
 $(CHECKASMOBJS): | $(CHECKASMDIRS)
-OBJDIRS += $(CHECKASMDIRS)
+OUTDIRS += $(CHECKASMDIRS)
 
 tests/checkasm/checkasm.o: CFLAGS += -Umain
 
diff --git a/tests/checkasm/af_afir.c b/tests/checkasm/af_afir.c
new file mode 100644
index 0000000000000..e3fb76e8e058e
--- /dev/null
+++ b/tests/checkasm/af_afir.c
@@ -0,0 +1,83 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "config.h"
+
+#include <float.h>
+#include <stdint.h>
+
+#include "libavfilter/af_afir.h"
+#include "libavutil/internal.h"
+#include "checkasm.h"
+
+#define LEN 256
+
+#define randomize_buffer(buf)                 \
+do {                                          \
+    int i;                                    \
+    double bmg[2], stddev = 10.0, mean = 0.0; \
+                                              \
+    for (i = 0; i < LEN*2+8; i += 2) {        \
+        av_bmg_get(&checkasm_lfg, bmg);       \
+        buf[i]     = bmg[0] * stddev + mean;  \
+        buf[i + 1] = bmg[1] * stddev + mean;  \
+    }                                         \
+} while(0);
+
+static void test_fcmul_add(const float *src0, const float *src1, const float *src2)
+{
+    LOCAL_ALIGNED_32(float, cdst, [LEN*2+8]);
+    LOCAL_ALIGNED_32(float, odst, [LEN*2+8]);
+    int i;
+
+    declare_func(void, float *sum, const float *t, const float *c,
+                 ptrdiff_t len);
+
+    memcpy(cdst, src0, (LEN*2+8) * sizeof(float));
+    memcpy(odst, src0, (LEN*2+8) * sizeof(float));
+    call_ref(cdst, src1, src2, LEN);
+    call_new(odst, src1, src2, LEN);
+    for (i = 0; i <= LEN*2; i++) {
+        if (!float_near_abs_eps(cdst[i], odst[i], 6.2e-05)) {
+            fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
+                    i, cdst[i], odst[i], cdst[i] - odst[i]);
+            fail();
+            break;
+        }
+    }
+    memcpy(odst, src0, (LEN*2+8) * sizeof(float));
+    bench_new(odst, src1, src2, LEN);
+}
+
+void checkasm_check_afir(void)
+{
+    LOCAL_ALIGNED_32(float, src0, [LEN*2+8]);
+    LOCAL_ALIGNED_32(float, src1, [LEN*2+8]);
+    LOCAL_ALIGNED_32(float, src2, [LEN*2+8]);
+    AudioFIRDSPContext fir = { 0 };
+
+    ff_afir_init(&fir);
+
+    randomize_buffer(src0);
+    randomize_buffer(src1);
+    randomize_buffer(src2);
+
+    if (check_func(fir.fcmul_add, "fcmul_add"))
+        test_fcmul_add(src0, src1, src2);
+    report("fcmul_add");
+}
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 721a0912fba0b..9eec41e3c4f82 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -150,6 +150,9 @@ static const struct {
     #endif
 #endif
 #if CONFIG_AVFILTER
+    #if CONFIG_AFIR_FILTER
+        { "af_afir", checkasm_check_afir },
+    #endif
     #if CONFIG_BLEND_FILTER
         { "vf_blend", checkasm_check_blend },
     #endif
@@ -609,6 +612,7 @@ static int bench_init_linux(void)
 }
 #endif
 
+#if !CONFIG_LINUX_PERF
 static int bench_init_ffmpeg(void)
 {
 #ifdef AV_READ_TIME
@@ -619,6 +623,7 @@ static int bench_init_ffmpeg(void)
     return -1;
 #endif
 }
+#endif
 
 static int bench_init(void)
 {
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index c45cfb46f88e0..9e8e879fd3afc 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -40,6 +40,7 @@
 #include "libavutil/timer.h"
 
 void checkasm_check_aacpsdsp(void);
+void checkasm_check_afir(void);
 void checkasm_check_alacdsp(void);
 void checkasm_check_audiodsp(void);
 void checkasm_check_blend(void);
diff --git a/tests/checkasm/h264dsp.c b/tests/checkasm/h264dsp.c
index 5f73bc9f08254..67c742548d912 100644
--- a/tests/checkasm/h264dsp.c
+++ b/tests/checkasm/h264dsp.c
@@ -28,6 +28,7 @@
 #include "libavutil/intreadwrite.h"
 
 static const uint32_t pixel_mask[3] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff };
+static const uint32_t pixel_mask_lf[3] = { 0xff0fff0f, 0x01ff000f, 0x03ff000f };
 
 #define SIZEOF_PIXEL ((bit_depth + 7) / 8)
 #define SIZEOF_COEF  (2 * ((bit_depth + 7) / 8))
@@ -312,9 +313,132 @@ static void check_idct_multiple(void)
     }
 }
 
+
+static void check_loop_filter(void)
+{
+    LOCAL_ALIGNED_16(uint8_t, dst, [32 * 16 * 2]);
+    LOCAL_ALIGNED_16(uint8_t, dst0, [32 * 16 * 2]);
+    LOCAL_ALIGNED_16(uint8_t, dst1, [32 * 16 * 2]);
+    H264DSPContext h;
+    int bit_depth;
+    int alphas[36], betas[36];
+    int8_t tc0[36][4];
+
+    declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *pix, ptrdiff_t stride,
+                      int alpha, int beta, int8_t *tc0);
+
+    for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
+        int i, j, a, c;
+        uint32_t mask = pixel_mask_lf[bit_depth - 8];
+        ff_h264dsp_init(&h, bit_depth, 1);
+        for (i = 35, a = 255, c = 250; i >= 0; i--) {
+            alphas[i] = a << (bit_depth - 8);
+            betas[i]  = (i + 1) / 2 << (bit_depth - 8);
+            tc0[i][0] = tc0[i][3] = (c + 6) / 10;
+            tc0[i][1] = (c + 7) / 15;
+            tc0[i][2] = (c + 9) / 20;
+            a = a*9/10;
+            c = c*9/10;
+        }
+
+#define CHECK_LOOP_FILTER(name, align, ...)                             \
+        do {                                                            \
+            if (check_func(h.name, #name "_%dbpp", bit_depth)) {        \
+                for (j = 0; j < 36; j++) {                              \
+                    intptr_t off = 8 * 32 + (j & 15) * 4 * !align;      \
+                    for (i = 0; i < 1024; i+=4) {                       \
+                        AV_WN32A(dst + i, rnd() & mask);                \
+                    }                                                   \
+                    memcpy(dst0, dst, 32 * 16 * 2);                     \
+                    memcpy(dst1, dst, 32 * 16 * 2);                     \
+                                                                        \
+                    call_ref(dst0 + off, 32, alphas[j], betas[j], tc0[j]); \
+                    call_new(dst1 + off, 32, alphas[j], betas[j], tc0[j]); \
+                    if (memcmp(dst0, dst1, 32 * 16 * SIZEOF_PIXEL)) {   \
+                        fprintf(stderr, #name ": j:%d, alpha:%d beta:%d " \
+                                "tc0:{%d,%d,%d,%d}\n", j, alphas[j], betas[j], \
+                                tc0[j][0], tc0[j][1], tc0[j][2], tc0[j][3]); \
+                        fail();                                         \
+                    }                                                   \
+                    bench_new(dst1, 32, alphas[j], betas[j], tc0[j]);   \
+                }                                                       \
+            }                                                           \
+        } while (0)
+
+        CHECK_LOOP_FILTER(h264_v_loop_filter_luma, 1);
+        CHECK_LOOP_FILTER(h264_h_loop_filter_luma, 0);
+        CHECK_LOOP_FILTER(h264_h_loop_filter_luma_mbaff, 0);
+        CHECK_LOOP_FILTER(h264_v_loop_filter_chroma, 1);
+        CHECK_LOOP_FILTER(h264_h_loop_filter_chroma, 0);
+        CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff, 0);
+#undef CHECK_LOOP_FILTER
+    }
+}
+
+static void check_loop_filter_intra(void)
+{
+    LOCAL_ALIGNED_16(uint8_t, dst, [32 * 16 * 2]);
+    LOCAL_ALIGNED_16(uint8_t, dst0, [32 * 16 * 2]);
+    LOCAL_ALIGNED_16(uint8_t, dst1, [32 * 16 * 2]);
+    H264DSPContext h;
+    int bit_depth;
+    int alphas[36], betas[36];
+
+    declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *pix, ptrdiff_t stride,
+                      int alpha, int beta);
+
+    for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
+        int i, j, a;
+        uint32_t mask = pixel_mask_lf[bit_depth - 8];
+        ff_h264dsp_init(&h, bit_depth, 1);
+        for (i = 35, a = 255; i >= 0; i--) {
+            alphas[i] = a << (bit_depth - 8);
+            betas[i]  = (i + 1) / 2 << (bit_depth - 8);
+            a = a*9/10;
+        }
+
+#define CHECK_LOOP_FILTER(name, align)                                  \
+        do {                                                            \
+            if (check_func(h.name, #name "_%dbpp", bit_depth)) {        \
+                for (j = 0; j < 36; j++) {                              \
+                    intptr_t off = 8 * 32 + (j & 15) * 4 * !align;      \
+                    for (i = 0; i < 1024; i+=4) {                       \
+                        AV_WN32A(dst + i, rnd() & mask);                \
+                    }                                                   \
+                    memcpy(dst0, dst, 32 * 16 * 2);                     \
+                    memcpy(dst1, dst, 32 * 16 * 2);                     \
+                                                                        \
+                    call_ref(dst0 + off, 32, alphas[j], betas[j]);      \
+                    call_new(dst1 + off, 32, alphas[j], betas[j]);      \
+                    if (memcmp(dst0, dst1, 32 * 16 * SIZEOF_PIXEL)) {   \
+                        fprintf(stderr, #name ": j:%d, alpha:%d beta:%d\n", \
+                                j, alphas[j], betas[j]);                \
+                        fail();                                         \
+                    }                                                   \
+                    bench_new(dst1, 32, alphas[j], betas[j]);           \
+                }                                                       \
+            }                                                           \
+        } while (0)
+
+        CHECK_LOOP_FILTER(h264_v_loop_filter_luma_intra, 1);
+        CHECK_LOOP_FILTER(h264_h_loop_filter_luma_intra, 0);
+        CHECK_LOOP_FILTER(h264_h_loop_filter_luma_mbaff_intra, 0);
+        CHECK_LOOP_FILTER(h264_v_loop_filter_chroma_intra, 1);
+        CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_intra, 0);
+        CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff_intra, 0);
+#undef CHECK_LOOP_FILTER
+    }
+}
+
 void checkasm_check_h264dsp(void)
 {
     check_idct();
     check_idct_multiple();
     report("idct");
+
+    check_loop_filter();
+    report("loop_filter");
+
+    check_loop_filter_intra();
+    report("loop_filter_intra");
 }
diff --git a/tests/fate-run.sh b/tests/fate-run.sh
index aece90a01d30b..f1a1e34c367cf 100755
--- a/tests/fate-run.sh
+++ b/tests/fate-run.sh
@@ -377,8 +377,6 @@ null(){
     :
 }
 
-mkdir -p "$outdir"
-
 # Disable globbing: command arguments may contain globbing characters and
 # must be kept verbatim
 set -f
diff --git a/tests/fate/api.mak b/tests/fate/api.mak
index eb656e68a3867..3714f900bdc78 100644
--- a/tests/fate/api.mak
+++ b/tests/fate/api.mak
@@ -12,6 +12,10 @@ FATE_API_SAMPLES_LIBAVFORMAT-$(call DEMDEC, H264, H264) += fate-api-h264
 fate-api-h264: $(APITESTSDIR)/api-h264-test$(EXESUF)
 fate-api-h264: CMD = run $(APITESTSDIR)/api-h264-test $(TARGET_SAMPLES)/h264-conformance/SVA_NL2_E.264
 
+FATE_API_SAMPLES_LIBAVFORMAT-$(call DEMDEC, H264, H264) += fate-api-h264-slice
+fate-api-h264-slice: $(APITESTSDIR)/api-h264-slice-test$(EXESUF)
+fate-api-h264-slice: CMD = run $(APITESTSDIR)/api-h264-slice-test 2 $(TARGET_SAMPLES)/h264/crew_cif.nal
+
 FATE_API_LIBAVFORMAT-$(call DEMDEC, FLV, FLV) += fate-api-seek
 fate-api-seek: $(APITESTSDIR)/api-seek-test$(EXESUF) fate-lavf-flv_fmt
 fate-api-seek: CMD = run $(APITESTSDIR)/api-seek-test $(TARGET_PATH)/tests/data/lavf/lavf.flv 0 720
diff --git a/tests/fate/cbs.mak b/tests/fate/cbs.mak
index 911e7704aa99a..f2ef04ceda1bb 100644
--- a/tests/fate/cbs.mak
+++ b/tests/fate/cbs.mak
@@ -61,7 +61,8 @@ FATE_CBS_HEVC_SAMPLES =       \
     LTRPSPS_A_Qualcomm_1.bit  \
     RPLM_A_qualcomm_4.bit     \
     CONFWIN_A_Sony_1.bit      \
-    HRD_A_Fujitsu_2.bit
+    HRD_A_Fujitsu_2.bit       \
+    SLPPLP_A_VIDYO_2.bit
 
 $(foreach N,$(FATE_CBS_HEVC_SAMPLES),$(eval $(call FATE_CBS_TEST,hevc,$(basename $(N)),hevc-conformance/$(N),hevc)))
 
diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak
index a722b4a9172f7..d59e9d293abf6 100644
--- a/tests/fate/checkasm.mak
+++ b/tests/fate/checkasm.mak
@@ -1,4 +1,5 @@
 FATE_CHECKASM = fate-checkasm-aacpsdsp                                  \
+                fate-checkasm-af_afir                                   \
                 fate-checkasm-alacdsp                                   \
                 fate-checkasm-audiodsp                                  \
                 fate-checkasm-blockdsp                                  \
diff --git a/tests/fate/filter-video.mak b/tests/fate/filter-video.mak
index 8bbdc0489690a..1042e96e54b0b 100644
--- a/tests/fate/filter-video.mak
+++ b/tests/fate/filter-video.mak
@@ -782,7 +782,7 @@ fate-filter-meta-4560-rotate0: CMD = framecrc -flags +bitexact -c:a aac_fixed -i
 REFCMP_DEPS = FFMPEG LAVFI_INDEV TESTSRC2_FILTER AVGBLUR_FILTER METADATA_FILTER
 
 FATE_FILTER_SAMPLES-$(call ALLYES, $(REFCMP_DEPS) PSNR_FILTER) += fate-filter-refcmp-psnr-rgb
-fate-filter-refcmp-psnr-rgb: CMD = refcmp_metadata psnr rgb24 0.001
+fate-filter-refcmp-psnr-rgb: CMD = refcmp_metadata psnr rgb24 0.002
 
 FATE_FILTER_SAMPLES-$(call ALLYES, $(REFCMP_DEPS) PSNR_FILTER) += fate-filter-refcmp-psnr-yuv
 fate-filter-refcmp-psnr-yuv: CMD = refcmp_metadata psnr yuv422p 0.0015
diff --git a/tests/fate/h264.mak b/tests/fate/h264.mak
index 1839b9b44eb0b..f14b46c6e05a4 100644
--- a/tests/fate/h264.mak
+++ b/tests/fate/h264.mak
@@ -196,6 +196,7 @@ FATE_H264  := $(FATE_H264:%=fate-h264-conformance-%)                    \
               fate-h264-3386                                            \
               fate-h264-missing-frame                                   \
               fate-h264-ref-pic-mod-overflow                            \
+              fate-h264-timecode
 
 FATE_H264-$(call DEMDEC, H264, H264) += $(FATE_H264)
 FATE_H264-$(call DEMDEC,  MOV, H264) += fate-h264-crop-to-container
@@ -440,6 +441,7 @@ fate-h264-twofields-packet:                       CMD = framecrc -i $(TARGET_SAM
 fate-h264-unescaped-extradata:                    CMD = framecrc -i $(TARGET_SAMPLES)/h264/unescaped_extradata.mp4 -an -frames 10
 fate-h264-3386:                                   CMD = framecrc -i $(TARGET_SAMPLES)/h264/bbc2.sample.h264
 fate-h264-missing-frame:                          CMD = framecrc -i $(TARGET_SAMPLES)/h264/nondeterministic_cut.h264
+fate-h264-timecode:                               CMD = framecrc -i $(TARGET_SAMPLES)/h264/crew_cif_timecode-2.h264
 
 fate-h264-reinit-%:                               CMD = framecrc -i $(TARGET_SAMPLES)/h264/$(@:fate-h264-%=%).h264 -vf format=yuv444p10le,scale=w=352:h=288
 
diff --git a/tests/fate/hlsenc.mak b/tests/fate/hlsenc.mak
new file mode 100644
index 0000000000000..80536239fc18d
--- /dev/null
+++ b/tests/fate/hlsenc.mak
@@ -0,0 +1,43 @@
+tests/data/live_no_endlist.m3u8: TAG = GEN
+tests/data/live_no_endlist.m3u8: ffmpeg$(PROGSSUF)$(EXESUF) | tests/data
+	$(M)$(TARGET_EXEC) $(TARGET_PATH)/$< \
+        -f lavfi -v verbose -i "aevalsrc=cos(2*PI*t)*sin(2*PI*(440+4*t)*t):d=20" -f hls -hls_time 3 -map 0 \
+        -hls_flags omit_endlist -codec:a mp2fixed -hls_segment_filename $(TARGET_PATH)/tests/data/live_no_endlist_%03d.ts \
+        $(TARGET_PATH)/tests/data/live_no_endlist.m3u8 2>/dev/null
+
+FATE_AFILTER-$(call ALLYES, HLS_DEMUXER MPEGTS_MUXER MPEGTS_DEMUXER AEVALSRC_FILTER LAVFI_INDEV MP2FIXED_ENCODER) += fate-hls-live-no-endlist
+fate-hls-live-no-endlist: tests/data/live_no_endlist.m3u8
+fate-hls-live-no-endlist: SRC = $(TARGET_PATH)/tests/data/live_no_endlist.m3u8
+fate-hls-live-no-endlist: CMD = md5 -i $(SRC) -af hdcd=process_stereo=false -t 6 -f s24le
+fate-hls-live-no-endlist: CMP = oneline
+fate-hls-live-no-endlist: REF = e038bb8e65d4c1745b9b3ed643e607a3
+
+tests/data/live_last_endlist.m3u8: TAG = GEN
+tests/data/live_last_endlist.m3u8: ffmpeg$(PROGSSUF)$(EXESUF) | tests/data
+	$(M)$(TARGET_EXEC) $(TARGET_PATH)/$< \
+        -f lavfi -v verbose -i "aevalsrc=cos(2*PI*t)*sin(2*PI*(440+4*t)*t):d=20" -f hls -hls_time 3 -map 0 \
+        -codec:a mp2fixed -hls_segment_filename $(TARGET_PATH)/tests/data/live_last_endlist_%03d.ts \
+        $(TARGET_PATH)/tests/data/live_last_endlist.m3u8 2>/dev/null
+
+FATE_AFILTER-$(call ALLYES, HLS_DEMUXER MPEGTS_MUXER MPEGTS_DEMUXER AEVALSRC_FILTER LAVFI_INDEV MP2FIXED_ENCODER) += fate-hls-live-last-endlist
+fate-hls-live-last-endlist: tests/data/live_last_endlist.m3u8
+fate-hls-live-last-endlist: SRC = $(TARGET_PATH)/tests/data/live_last_endlist.m3u8
+fate-hls-live-last-endlist: CMD = md5 -i $(SRC) -af hdcd=process_stereo=false -t 6 -f s24le
+fate-hls-live-last-endlist: CMP = oneline
+fate-hls-live-last-endlist: REF = 2ca8567092dcf01e37bedd50454d1ab7
+
+
+tests/data/live_endlist.m3u8: TAG = GEN
+tests/data/live_endlist.m3u8: ffmpeg$(PROGSSUF)$(EXESUF) | tests/data
+	$(M)$(TARGET_EXEC) $(TARGET_PATH)/$< \
+        -f lavfi -i "aevalsrc=cos(2*PI*t)*sin(2*PI*(440+4*t)*t):d=20" -f hls -hls_time 3 -map 0 \
+        -hls_list_size 0 -codec:a mp2fixed -hls_segment_filename $(TARGET_PATH)/tests/data/live_endlist_%d.ts \
+        $(TARGET_PATH)/tests/data/live_endlist.m3u8 2>/dev/null
+
+FATE_AFILTER-$(call ALLYES, HLS_DEMUXER MPEGTS_MUXER MPEGTS_DEMUXER AEVALSRC_FILTER LAVFI_INDEV MP2FIXED_ENCODER) += fate-hls-live-endlist
+fate-hls-live-endlist: tests/data/live_endlist.m3u8
+fate-hls-live-endlist: SRC = $(TARGET_PATH)/tests/data/live_endlist.m3u8
+fate-hls-live-endlist: CMD = md5 -i $(SRC) -af hdcd=process_stereo=false -t 20 -f s24le
+fate-hls-live-endlist: CMP = oneline
+fate-hls-live-endlist: REF = e189ce781d9c87882f58e3929455167b
+
diff --git a/tests/fate/libavcodec.mak b/tests/fate/libavcodec.mak
index aa4c36b1128cf..5dde1243faa25 100644
--- a/tests/fate/libavcodec.mak
+++ b/tests/fate/libavcodec.mak
@@ -46,7 +46,7 @@ fate-dct8x8: libavcodec/tests/dct$(EXESUF)
 fate-dct8x8: CMD = run libavcodec/tests/dct
 fate-dct8x8: CMP = null
 
-FATE_LIBAVCODEC-$(CONFIG_H264_VAAPI_ENCODER) += fate-h264-levels
+FATE_LIBAVCODEC-$(CONFIG_H264_METADATA_BSF) += fate-h264-levels
 fate-h264-levels: libavcodec/tests/h264_levels$(EXESUF)
 fate-h264-levels: CMD = run libavcodec/tests/h264_levels
 fate-h264-levels: REF = /dev/null
diff --git a/tests/fate/microsoft.mak b/tests/fate/microsoft.mak
index c450c0d8ea90a..a2c3b626c4d17 100644
--- a/tests/fate/microsoft.mak
+++ b/tests/fate/microsoft.mak
@@ -71,6 +71,12 @@ fate-vc1_sa20021: CMD = framecrc -i $(TARGET_SAMPLES)/vc1/SA20021.vc1
 FATE_VC1-$(CONFIG_VC1_DEMUXER) += fate-vc1_ilaced_twomv
 fate-vc1_ilaced_twomv: CMD = framecrc -flags +bitexact -i $(TARGET_SAMPLES)/vc1/ilaced_twomv.vc1
 
+FATE_VC1-$(CONFIG_VC1T_DEMUXER) += fate-vc1test_smm0005
+fate-vc1test_smm0005: CMD = framecrc -i $(TARGET_SAMPLES)/vc1/SMM0005.rcv
+
+FATE_VC1-$(CONFIG_VC1T_DEMUXER) += fate-vc1test_smm0015
+fate-vc1test_smm0015: CMD = framecrc -i $(TARGET_SAMPLES)/vc1/SMM0015.rcv
+
 FATE_VC1-$(CONFIG_MOV_DEMUXER) += fate-vc1-ism
 fate-vc1-ism: CMD = framecrc -i $(TARGET_SAMPLES)/isom/vc1-wmapro.ism -an
 
diff --git a/tests/fate/mov.mak b/tests/fate/mov.mak
index 4df0fe64906dd..8ed66cf1355d0 100644
--- a/tests/fate/mov.mak
+++ b/tests/fate/mov.mak
@@ -123,4 +123,4 @@ fate-mov-faststart-4gb-overflow: CMD = run tools/qt-faststart$(EXESUF) $(TARGET_
 fate-mov-faststart-4gb-overflow: CMP = oneline
 fate-mov-faststart-4gb-overflow: REF = bc875921f151871e787c4b4023269b29
 
-fate-mov-mp4-with-mov-in24-ver: CMD = run ffprobe -show_entries stream=codec_name -select_streams 1 $(TARGET_SAMPLES)/mov/mp4-with-mov-in24-ver.mp4
+fate-mov-mp4-with-mov-in24-ver: CMD = run ffprobe$(PROGSSUF)$(EXESUF) -show_entries stream=codec_name -select_streams 1 $(TARGET_SAMPLES)/mov/mp4-with-mov-in24-ver.mp4
diff --git a/tests/fate/mxf.mak b/tests/fate/mxf.mak
index dce23d522ea65..62e4ec01cb6ac 100644
--- a/tests/fate/mxf.mak
+++ b/tests/fate/mxf.mak
@@ -37,9 +37,22 @@ FATE_MXF_REEL_NAME-$(call ENCDEC2, MPEG2VIDEO, PCM_S16LE, MXF) += fate-mxf-reel_
 fate-mxf-reel_name: $(TARGET_SAMPLES)/mxf/Sony-00001.mxf
 fate-mxf-reel_name: CMD = md5 -y -i $(TARGET_SAMPLES)/mxf/Sony-00001.mxf  -c copy -timecode 00:00:00:00 -metadata "reel_name=test_reel" -fflags +bitexact -f mxf
 
+FATE_MXF_USER_COMMENTS-$(call ENCDEC2, MPEG2VIDEO, PCM_S16LE, MXF) += fate-mxf-user-comments
+fate-mxf-user-comments: $(TARGET_SAMPLES)/mxf/Sony-00001.mxf
+fate-mxf-user-comments: CMD = md5 -y -i $(TARGET_SAMPLES)/mxf/Sony-00001.mxf -c copy -metadata "comment_test=value" -fflags +bitexact -f mxf
+
+FATE_MXF_D10_USER_COMMENTS-$(call ENCDEC2, MPEG2VIDEO, PCM_S16LE, MXF) += fate-mxf-d10-user-comments
+fate-mxf-d10-user-comments: $(TARGET_SAMPLES)/mxf/Sony-00001.mxf
+fate-mxf-d10-user-comments: CMD = md5 -y -i $(TARGET_SAMPLES)/mxf/Sony-00001.mxf -c copy -metadata "comment_test=value" -store_user_comments 1 -fflags +bitexact -f mxf_d10
+
+FATE_MXF_OPATOM_USER_COMMENTS-$(call ENCDEC2, MPEG2VIDEO, PCM_S16LE, MXF) += fate-mxf-opatom-user-comments
+fate-mxf-opatom-user-comments: $(TARGET_SAMPLES)/mxf/Sony-00001.mxf
+fate-mxf-opatom-user-comments: CMD = md5 -y -i $(TARGET_SAMPLES)/mxf/Sony-00001.mxf -an -vcodec copy -metadata "comment_test=value" -fflags +bitexact -f mxf_opatom
+
 FATE_MXF-$(CONFIG_MXF_DEMUXER) += $(FATE_MXF)
 
 FATE_SAMPLES_AVCONV += $(FATE_MXF-yes) $(FATE_MXF_REEL_NAME-yes)
+FATE_SAMPLES_AVCONV += $(FATE_MXF_USER_COMMENTS-yes) $(FATE_MXF_D10_USER_COMMENTS-yes) $(FATE_MXF_OPATOM_USER_COMMENTS-yes)
 FATE_SAMPLES_FFPROBE += $(FATE_MXF_PROBE-yes)
 
-fate-mxf: $(FATE_MXF-yes) $(FATE_MXF_PROBE-yes) $(FATE_MXF_REEL_NAME-yes)
+fate-mxf: $(FATE_MXF-yes) $(FATE_MXF_PROBE-yes) $(FATE_MXF_REEL_NAME-yes) $(FATE_MXF_USER_COMMENTS-yes) $(FATE_MXF_D10_USER_COMMENTS-yes) $(FATE_MXF_OPATOM_USER_COMMENTS-yes)
diff --git a/tests/fate/opus.mak b/tests/fate/opus.mak
index 7f289455cf5d8..c50d88f5e9ae3 100644
--- a/tests/fate/opus.mak
+++ b/tests/fate/opus.mak
@@ -36,6 +36,7 @@ fate-opus-testvector09:      CMP_TARGET = 0
 fate-opus-testvector10:      CMP_TARGET = 38
 fate-opus-testvector11:      CMP_TARGET = 0
 fate-opus-testvector12:      CMP_TARGET = 160
+fate-opus-tron.6ch.tinypkts: CMP_SHIFT = 1440
 fate-opus-tron.6ch.tinypkts: CMP_TARGET = 0
 
 $(FATE_OPUS_CELT): CMP = oneoff
diff --git a/tests/fate/prores.mak b/tests/fate/prores.mak
index f7f52ca7fce91..b7fcc7449a85e 100644
--- a/tests/fate/prores.mak
+++ b/tests/fate/prores.mak
@@ -15,8 +15,14 @@ fate-prores-422:       CMD = framecrc -flags +bitexact -i $(TARGET_SAMPLES)/pror
 fate-prores-422_hq:    CMD = framecrc -flags +bitexact -i $(TARGET_SAMPLES)/prores/Sequence_1-Apple_ProRes_422_HQ.mov -pix_fmt yuv422p10le
 fate-prores-422_lt:    CMD = framecrc -flags +bitexact -i $(TARGET_SAMPLES)/prores/Sequence_1-Apple_ProRes_422_LT.mov -pix_fmt yuv422p10le
 fate-prores-422_proxy: CMD = framecrc -flags +bitexact -i $(TARGET_SAMPLES)/prores/Sequence_1-Apple_ProRes_422_Proxy.mov -pix_fmt yuv422p10le
-fate-prores-alpha:     CMD = framecrc -flags +bitexact -i $(TARGET_SAMPLES)/prores/Sequence_1-Apple_ProRes_with_Alpha.mov -pix_fmt yuva444p10le
-fate-prores-alpha_skip: CMD = framecrc -flags +bitexact -skip_alpha 1 -i $(TARGET_SAMPLES)/prores/Sequence_1-Apple_ProRes_with_Alpha.mov -pix_fmt yuv444p10le
-fate-prores-transparency: CMD = framecrc -flags +bitexact -i $(TARGET_SAMPLES)/prores/prores4444_with_transparency.mov -pix_fmt yuva444p10le
-fate-prores-transparency_skip: CMD = framecrc -flags +bitexact -skip_alpha 1 -i $(TARGET_SAMPLES)/prores/prores4444_with_transparency.mov -pix_fmt yuv444p10le
+fate-prores-alpha:     CMD = framecrc -flags +bitexact -i $(TARGET_SAMPLES)/prores/Sequence_1-Apple_ProRes_with_Alpha.mov -pix_fmt yuva444p12le
+fate-prores-alpha_skip: CMD = framecrc -flags +bitexact -skip_alpha 1 -i $(TARGET_SAMPLES)/prores/Sequence_1-Apple_ProRes_with_Alpha.mov -pix_fmt yuv444p12le
+fate-prores-transparency: CMD = framecrc -flags +bitexact -i $(TARGET_SAMPLES)/prores/prores4444_with_transparency.mov -pix_fmt yuva444p12le
+fate-prores-transparency_skip: CMD = framecrc -flags +bitexact -skip_alpha 1 -i $(TARGET_SAMPLES)/prores/prores4444_with_transparency.mov -pix_fmt yuv444p12le
 fate-prores-gray:      CMD = framecrc -flags +bitexact -c:a aac_fixed -i $(TARGET_SAMPLES)/prores/gray.mov -pix_fmt yuv422p10le
+
+#Test bsf prores-metadata
+FATE_PRORES_METADATA_BSF += fate-prores-metadata
+fate-prores-metadata: CMD = md5 -i $(TARGET_SAMPLES)/prores/Sequence_1-Apple_ProRes_422_Proxy.mov -c:v copy -bsf:v prores_metadata=color_primaries=bt470bg:color_trc=bt709:colorspace=smpte170m -bitexact -f mov
+
+FATE_SAMPLES_FFMPEG-$(call ALLYES, MOV_DEMUXER PRORES_METADATA_BSF) += $(FATE_PRORES_METADATA_BSF)
diff --git a/tests/fate/qtrle.mak b/tests/fate/qtrle.mak
index 774a81652b4bc..1f26ffea7be5a 100644
--- a/tests/fate/qtrle.mak
+++ b/tests/fate/qtrle.mak
@@ -17,7 +17,7 @@ FATE_QTRLE += fate-qtrle-24bit
 fate-qtrle-24bit: CMD = framecrc -i $(TARGET_SAMPLES)/qtrle/aletrek-rle.mov
 
 FATE_QTRLE += fate-qtrle-32bit
-fate-qtrle-32bit: CMD = framecrc -i $(TARGET_SAMPLES)/qtrle/ultra_demo_720_480_32bpp_rle.mov -pix_fmt rgb24
+fate-qtrle-32bit: CMD = framecrc -i $(TARGET_SAMPLES)/qtrle/ultra_demo_720_480_32bpp_rle.mov -pix_fmt bgra
 
 FATE_SAMPLES_AVCONV-$(call DEMDEC, MOV, QTRLE) += $(FATE_QTRLE)
 fate-qtrle: $(FATE_QTRLE)
diff --git a/tests/fate/vcodec.mak b/tests/fate/vcodec.mak
index da6da9e09bdb8..2705d2227ac34 100644
--- a/tests/fate/vcodec.mak
+++ b/tests/fate/vcodec.mak
@@ -336,9 +336,21 @@ fate-vsynth%-mpng:               CODEC   = png
 
 FATE_VCODEC-$(call ENCDEC, MSVIDEO1, AVI) += msvideo1
 
-FATE_VCODEC-$(call ENCDEC, PRORES, MOV) += prores prores_ks
+FATE_VCODEC-$(call ENCDEC, PRORES, MOV) += prores prores_int prores_444 prores_444_int prores_ks
 fate-vsynth%-prores:             FMT     = mov
 
+fate-vsynth%-prores_int:         CODEC   = prores
+fate-vsynth%-prores_int:         ENCOPTS = -flags +ildct
+fate-vsynth%-prores_int:         FMT     = mov
+
+fate-vsynth%-prores_444:         CODEC   = prores
+fate-vsynth%-prores_444:         ENCOPTS = -pix_fmt yuv444p10
+fate-vsynth%-prores_444:         FMT     = mov
+
+fate-vsynth%-prores_444_int:     CODEC   = prores
+fate-vsynth%-prores_444_int:     ENCOPTS = -pix_fmt yuv444p10 -flags +ildct
+fate-vsynth%-prores_444_int:     FMT     = mov
+
 fate-vsynth%-prores_ks:          ENCOPTS = -profile hq
 fate-vsynth%-prores_ks:          FMT     = mov
 
diff --git a/tests/lavf-regression.sh b/tests/lavf-regression.sh
index 45c877e4ac14a..c5b6734815b21 100755
--- a/tests/lavf-regression.sh
+++ b/tests/lavf-regression.sh
@@ -24,8 +24,9 @@ do_lavf_fate()
 do_lavf()
 {
     file=${outfile}lavf.$1
-    do_avconv $file $DEC_OPTS -f image2 -vcodec pgmyuv -i $raw_src $DEC_OPTS -ar 44100 -f s16le $2 -i $pcm_src $ENC_OPTS -b:a 64k -t 1 -qscale:v 10 $3
-    do_avconv_crc $file $DEC_OPTS -i $target_path/$file $4
+    do_avconv $file $DEC_OPTS -f image2 -c:v pgmyuv -i $raw_src $DEC_OPTS -ar 44100 -f s16le $2 -i $pcm_src $ENC_OPTS -b:a 64k -t 1 -qscale:v 10 $3
+    test $5 = "disable_crc" ||
+        do_avconv_crc $file $DEC_OPTS -i $target_path/$file $4
 }
 
 do_lavf_timecode_nodrop() { do_lavf $1 "" "$2 -timecode 02:56:14:13"; }
@@ -73,9 +74,8 @@ fi
 
 if [ -n "$do_rm" ] ; then
 file=${outfile}lavf.rm
-do_avconv $file $DEC_OPTS -f image2 -vcodec pgmyuv -i $raw_src $DEC_OPTS -ar 44100 -f s16le -i $pcm_src $ENC_OPTS -t 1 -qscale 10 -acodec ac3_fixed -ab 64k
-# broken
-#do_avconv_crc $file -i $target_path/$file
+# The RealMedia muxer is broken.
+do_lavf rm "" "-c:a ac3_fixed" "" disable_crc
 fi
 
 if [ -n "$do_mpg" ] ; then
@@ -235,7 +235,7 @@ fi
 if [ -n "$do_yuv4mpeg" ] ; then
 file=${outfile}lavf.y4m
 do_avconv $file $DEC_OPTS -f image2 -vcodec pgmyuv -i $raw_src $ENC_OPTS -t 1 -qscale 10
-#do_avconv_crc $file -i $target_path/$file
+do_avconv_crc $file -i $target_path/$file
 fi
 
 if [ -n "$do_fits" ] ; then
@@ -411,7 +411,6 @@ fi
 
 if [ -n "$do_pixfmt" ] ; then
 outfile="$datadir/pixfmt/"
-mkdir -p "$outfile"
 conversions="yuv420p yuv422p yuv444p yuyv422 yuv410p yuv411p yuvj420p \
              yuvj422p yuvj444p rgb24 bgr24 rgb32 rgb565 rgb555 gray monow \
              monob yuv440p yuvj440p"
diff --git a/tests/ref/fate/api-h264-slice b/tests/ref/fate/api-h264-slice
new file mode 100644
index 0000000000000..1d463d30ac021
--- /dev/null
+++ b/tests/ref/fate/api-h264-slice
@@ -0,0 +1,309 @@
+#format: frame checksums
+#version: 2
+#hash: MD5
+#tb 0: 1/30
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 352x288
+#sar 0: 128/117
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,        1,   152064, 28a2f99d62b553403fcffc1f680d5403
+0,          1,          1,        1,   152064, cd95f40841e08160ace0d64506f8adbf
+0,          2,          2,        1,   152064, 32f37a1b3ddc2b8b0f6283f0c403a976
+0,          3,          3,        1,   152064, 643c0b0702072038578ef5ae2000c1a0
+0,          4,          4,        1,   152064, 8d9c9660705f7533e7f49f11693aedf9
+0,          5,          5,        1,   152064, 66a794f8a116c055451091e0e4cd911e
+0,          6,          6,        1,   152064, 8ad529648796ae6da279de0b7ca34f72
+0,          7,          7,        1,   152064, 898ad4170eb740d713de254eb4bfe255
+0,          8,          8,        1,   152064, f96cfc1f00df10003e144529a5fae6c6
+0,          9,          9,        1,   152064, 0351a3b68dc87ba5963624e1461788da
+0,         10,         10,        1,   152064, 6718e4086a0039584bcc37dcd4be6a67
+0,         11,         11,        1,   152064, fb4fec78d9434b9579b31f8ad0472762
+0,         12,         12,        1,   152064, ec2dcc547d84e15383dcee8462bb9d0c
+0,         13,         13,        1,   152064, ea62711bf59b4d1d56cb9dbcb68a8eda
+0,         14,         14,        1,   152064, 75b1cb899a9d9e695106f187c20b91f8
+0,         15,         15,        1,   152064, 44a13e4235c2ed3692af5ef698efe4d3
+0,         16,         16,        1,   152064, 6d5f1249d96573782fa95e9228d1ad0a
+0,         17,         17,        1,   152064, fce8503dd9472fc7932ffbe21425d45a
+0,         18,         18,        1,   152064, e93489a6b4c38d611493a3721aa994d7
+0,         19,         19,        1,   152064, 04580677a663ddba1b747c2ab0d598e6
+0,         20,         20,        1,   152064, a28cceb666c92eaecc3da3e092b83715
+0,         21,         21,        1,   152064, ba9ce0f84fb16c27453666265ad54e35
+0,         22,         22,        1,   152064, 946e014822ab2b45c52d3e08e7db97c2
+0,         23,         23,        1,   152064, b7a40ebb6ac72b322ccfca2568fa521a
+0,         24,         24,        1,   152064, cb5a0564af00a00496950ad705a160ce
+0,         25,         25,        1,   152064, dbad9e8e79c04b1df497884ec28e3a59
+0,         26,         26,        1,   152064, 3c748cdc0e6ec79ca72482e22f0c3ef8
+0,         27,         27,        1,   152064, 1740911da2ebbdc729cbbea0df466c44
+0,         28,         28,        1,   152064, 3b322e03fcc16d6a0dea651634ce0b40
+0,         29,         29,        1,   152064, a7fca405425015b85cb58ec1aece575b
+0,         30,         30,        1,   152064, 2004eec0c923f1855b4b918e6dcb5e02
+0,         31,         31,        1,   152064, 20542f58f1622f58f3cbf4b2bf0b772f
+0,         32,         32,        1,   152064, 8872f5cb900ed8d317f3abe50a147934
+0,         33,         33,        1,   152064, 774ba43dc0cd7932099e3e0633e25721
+0,         34,         34,        1,   152064, 4f2d9b7e6d115bd103ccd9945f85582f
+0,         35,         35,        1,   152064, f53542ca7f6d5ec462770ffff3f4bfd0
+0,         36,         36,        1,   152064, 43b5f8c4e6dc3dc1acc903687bc90293
+0,         37,         37,        1,   152064, aa7d265ab285ded777a970debe6a08d5
+0,         38,         38,        1,   152064, 818ae082b3dd9557e04710d7cfd700be
+0,         39,         39,        1,   152064, 44cfe472ccedf8a44d0b90c97932caae
+0,         40,         40,        1,   152064, 5d2756c81c90bb10484e2e892fce0e0c
+0,         41,         41,        1,   152064, c1b254a4b66dc9d769e10316976f5538
+0,         42,         42,        1,   152064, 03808a3f7b01293dbe6089b33e3dc103
+0,         43,         43,        1,   152064, 8c689a6143a8a89415d2e645bb0fe925
+0,         44,         44,        1,   152064, 24268cac7d78eefd8e247ec599e60b4f
+0,         45,         45,        1,   152064, ac3195c57a3ebe3871992acfc3182e2f
+0,         46,         46,        1,   152064, 8730a99fb5a2573475f61d4e7998ba44
+0,         47,         47,        1,   152064, 651042c34273096db82879fcdd91310d
+0,         48,         48,        1,   152064, 972b47241098a9b6471ba6c8ccc2b83b
+0,         49,         49,        1,   152064, e8e53022355e6bf7ac50d53bcb1bdc92
+0,         50,         50,        1,   152064, e5b003b04a88e0d60d446eb5b600cc66
+0,         51,         51,        1,   152064, 1c2317c071a33b0b465bbcea04411ddf
+0,         52,         52,        1,   152064, 7bd53f4e852370aaeb6b0d042d24b94a
+0,         53,         53,        1,   152064, 6ef30966b0d9c0d92b2350e2f45197a8
+0,         54,         54,        1,   152064, 354e2afff0d056193d6c4c2667c638d3
+0,         55,         55,        1,   152064, e67767e97f44c3ef80ea4acee41af0ff
+0,         56,         56,        1,   152064, 32589395bf7d07c1c6df644d43b5bbba
+0,         57,         57,        1,   152064, 2f2b56210b87142fa3620cce5c56af02
+0,         58,         58,        1,   152064, 599781773ace555c82ac591cc2acf8ec
+0,         59,         59,        1,   152064, 2465cf6313dab6bda9171993ff6168de
+0,         60,         60,        1,   152064, 54cc6c8a9b3fd95b9700d319c4a69297
+0,         61,         61,        1,   152064, 9e813429ebf7ee4e11fcd4976974fea0
+0,         62,         62,        1,   152064, fac0303897b4d1bd1a202fe0a7d1c6f7
+0,         63,         63,        1,   152064, accb382b99f2d27cefbc9f7ea315f80c
+0,         64,         64,        1,   152064, b88711feaee9f7f84da34028b7e2cc81
+0,         65,         65,        1,   152064, 80549aebdcc5629dfc3bee8112536bac
+0,         66,         66,        1,   152064, 9c63aa480b5d9937d839b809ba67eee2
+0,         67,         67,        1,   152064, 5e5a729c45a995ba2a97083fca69c9e9
+0,         68,         68,        1,   152064, 6b59c5d4460d78fa337b94b080e72215
+0,         69,         69,        1,   152064, 166d675f774f4f74dbce7e2728afd16c
+0,         70,         70,        1,   152064, 3051629ff9281ea8879bce0ed62c1e71
+0,         71,         71,        1,   152064, c2ff4493434ca4fea45c724c10bcbe55
+0,         72,         72,        1,   152064, 907274f16ebeb7c09de8bc124c1fe586
+0,         73,         73,        1,   152064, 88efefc9fc00d88fbce20f4d74a55d25
+0,         74,         74,        1,   152064, 357bdbcb828c088b748022df9e47b9c7
+0,         75,         75,        1,   152064, 4550087923c1d4195ab536899a99d429
+0,         76,         76,        1,   152064, 2b07777d9109577eaedc87321dd3ff69
+0,         77,         77,        1,   152064, 4a377c552c62cba06d5285aa8478a540
+0,         78,         78,        1,   152064, a0e893c028e106c2394f0578f00ae88a
+0,         79,         79,        1,   152064, c0db0e2ee3768d2e4e71581a2be30707
+0,         80,         80,        1,   152064, 954e0cd38b00ea2181b8c322511536f0
+0,         81,         81,        1,   152064, 53ff687670a2490ac3f94405014251b8
+0,         82,         82,        1,   152064, 64a588b7adbc560ec4ad6823c37e41c6
+0,         83,         83,        1,   152064, 60d65e899976214cd3b2b5abb10f2b83
+0,         84,         84,        1,   152064, 1bf0efef4b204a72d05b21c18a569585
+0,         85,         85,        1,   152064, 65d814be6b698ab1185f082c9c9d7de3
+0,         86,         86,        1,   152064, de26e7e663aaeb642e0a94776c5bf22c
+0,         87,         87,        1,   152064, f7b0b259ccf21e59fcacd95945a013c3
+0,         88,         88,        1,   152064, 7f3185bb4dc3368733bd29c9aa9e08eb
+0,         89,         89,        1,   152064, 3cedc14798d145fcdc8b8a3082de3b88
+0,         90,         90,        1,   152064, c5792622ca4a04d21e1f2c2b2ff692fe
+0,         91,         91,        1,   152064, d13199fa94e53643902a8a26e33c9862
+0,         92,         92,        1,   152064, b380359e836896d7698a8dadfe6d6fdc
+0,         93,         93,        1,   152064, 4c7c5f1f093f7bcaddf06068b9b1d2e5
+0,         94,         94,        1,   152064, 73f33e1eedea9aa5e6c3f2b636dd2c23
+0,         95,         95,        1,   152064, f0d2aad6477ace945f87b25cb44f3ff0
+0,         96,         96,        1,   152064, d90bddd7c2279bbd0266a26915202712
+0,         97,         97,        1,   152064, 0b377a48dea8fc2702395796808af63f
+0,         98,         98,        1,   152064, ea0099179e806a9680f019446e39d125
+0,         99,         99,        1,   152064, a77dd3069c54b255e45b261f31be80d2
+0,        100,        100,        1,   152064, d362dcbe415329e713ff6ef9e6447d87
+0,        101,        101,        1,   152064, 15441bcb307ac24766ceba8db42f9413
+0,        102,        102,        1,   152064, 79b953e72d11d3fa6d6974e4b8b13392
+0,        103,        103,        1,   152064, ec8c35c829fac56ca8ae2f0160ae5d7f
+0,        104,        104,        1,   152064, c104f8f1d17629b0449f4a2af2e40f73
+0,        105,        105,        1,   152064, 4661c4b3c2b1a03a8e23e7e88e974f22
+0,        106,        106,        1,   152064, 7cb48bae9841f67294b2e25a73d46a8e
+0,        107,        107,        1,   152064, bddcb2c64a4257760f50714ec8c49243
+0,        108,        108,        1,   152064, c2123750802357c25c352f09bd1b1de2
+0,        109,        109,        1,   152064, 6eb5af4f3ad69cc88e0c08f6aa9bb034
+0,        110,        110,        1,   152064, 063991a633a051d6889f0fff41059e5f
+0,        111,        111,        1,   152064, fa736839a01ad04fe08d437c7fa60a2d
+0,        112,        112,        1,   152064, 85a43397c5a1defe15b61464c8d1457a
+0,        113,        113,        1,   152064, da50c437613be59486012b69c7953f63
+0,        114,        114,        1,   152064, eb32e24757a98192928324d3a389a3dc
+0,        115,        115,        1,   152064, 1bf511fb8245e3be71ebefdcf506575d
+0,        116,        116,        1,   152064, 4479c195c4cd4111afe561a07c0f626d
+0,        117,        117,        1,   152064, 0b1815f0c28bb55aae515a5dc3a34f3b
+0,        118,        118,        1,   152064, 300d3c32442bd554384b3c804dd519ad
+0,        119,        119,        1,   152064, 197df868e0488b8b12c0b42d8c4b2aec
+0,        120,        120,        1,   152064, 03bce34c3214e0144a0928b9b9acc8e8
+0,        121,        121,        1,   152064, ba73a879b8fca5db4a354075b26ccb6a
+0,        122,        122,        1,   152064, b1c34c6d2535bf1e7af3a6936d1627df
+0,        123,        123,        1,   152064, 77d162995974428c5c7766ee5627eac1
+0,        124,        124,        1,   152064, fa4c70aa68850bcae2579046557c0b5f
+0,        125,        125,        1,   152064, 63ce618e67f380000030c97db78ac4ae
+0,        126,        126,        1,   152064, 7e32538d501127faf058792e83fbbe43
+0,        127,        127,        1,   152064, 61bc1d685553a97a7c3b0cbb3790faad
+0,        128,        128,        1,   152064, 57f3b97e4a80ded30b9e8f12cfc8ff44
+0,        129,        129,        1,   152064, 31db51a64307ca6f1db866a01befa301
+0,        130,        130,        1,   152064, 59924d342068caf1ad7329b947279e2d
+0,        131,        131,        1,   152064, 2f0f9dd3056cac40c17684bcccdf267d
+0,        132,        132,        1,   152064, b00df17142f99bdc077cb2e4c5c8b747
+0,        133,        133,        1,   152064, e7c40734dea5433038b975666be7b21e
+0,        134,        134,        1,   152064, 51d77965d3a9d431a2c409244c9bc465
+0,        135,        135,        1,   152064, 15b54bdc5e2098fe7c01ce689babe08b
+0,        136,        136,        1,   152064, 3fa3284ae3f714ea197ad614bff7c5c5
+0,        137,        137,        1,   152064, c6512a19b7b1b29c05c7b825b41ab930
+0,        138,        138,        1,   152064, b13c8bc436186d47595dc69186f1f927
+0,        139,        139,        1,   152064, d5eff490784883a93dd3aaea08c46d5b
+0,        140,        140,        1,   152064, a005ac77851ea3a44e261d9067ee835f
+0,        141,        141,        1,   152064, 6706b74dc10c72f27e9f6341710e56ac
+0,        142,        142,        1,   152064, 46479f86f53f55d2094354eb9bed78df
+0,        143,        143,        1,   152064, 17f5cd040eb766ece29d1c1e283e9c20
+0,        144,        144,        1,   152064, 4f34c43eeeac2c751aac486ba42d9b9a
+0,        145,        145,        1,   152064, 24c16b9d01c316305686af1a12f7be49
+0,        146,        146,        1,   152064, 9ae9b1f109fa3d02f226fefdaf395be6
+0,        147,        147,        1,   152064, eb98c1c6e473d8b53069206ffc69a9cb
+0,        148,        148,        1,   152064, f0768d9cb981d277b70d1b3b928f4915
+0,        149,        149,        1,   152064, c1a5cef2bdb3f3b932a051c29d31f889
+0,        150,        150,        1,   152064, 8f75fb3a6f994b90999f8b0c664ad7c4
+0,        151,        151,        1,   152064, 3a778c9c86afaf03f2e60668d849e25b
+0,        152,        152,        1,   152064, 4c3dd11965a2cf55790088a99289981a
+0,        153,        153,        1,   152064, 763f810845e6f4e798a6edb6633f5506
+0,        154,        154,        1,   152064, 6b305b9d79151c1644c924d522091eea
+0,        155,        155,        1,   152064, e981ce0e01f24eca2e89c7c81480fb07
+0,        156,        156,        1,   152064, 91349f36d44383dc1cd72f0a3f9c76ed
+0,        157,        157,        1,   152064, 9a67f029ed2370983ff3e24d8c2c65d2
+0,        158,        158,        1,   152064, cf5717cb593fbafad6abf8bdb7ca2737
+0,        159,        159,        1,   152064, 7ece8c2497ca72e4f8e9eb69048391f8
+0,        160,        160,        1,   152064, 9dccce22ca32a7ec8890f77e4de1fa42
+0,        161,        161,        1,   152064, f418dc75e266c47ba84275741f0635cb
+0,        162,        162,        1,   152064, aeddab213baab78ed0c44abb7409e291
+0,        163,        163,        1,   152064, a0b5e3c0616105580a310529ed71d072
+0,        164,        164,        1,   152064, e0e96da8724b472868634b6b145ebb2e
+0,        165,        165,        1,   152064, bdaaf9623f5d329c8706e4850db0beea
+0,        166,        166,        1,   152064, 6566ddd82da9096458e039caa7d56674
+0,        167,        167,        1,   152064, b882cb5f1c6059d338273e8fdb18e41e
+0,        168,        168,        1,   152064, f9723e59ce02828e64c16d32216441b2
+0,        169,        169,        1,   152064, 98b5a843bf125eeae0240bde40016d6a
+0,        170,        170,        1,   152064, 8958b81f8a028928c4b9a7024a4eebff
+0,        171,        171,        1,   152064, 25a8acfdd14a472a8090d41626472070
+0,        172,        172,        1,   152064, 6faf859c0b264b6d76e0823c6045cebd
+0,        173,        173,        1,   152064, 0774a3470360c37ede375d19aebe1844
+0,        174,        174,        1,   152064, 5dd921d4f05976fb6bbf5cc6996254e0
+0,        175,        175,        1,   152064, d03d789e3c439420a07e3e919ddd1cf0
+0,        176,        176,        1,   152064, 1fad139023f7d7022f8f65a6e31f68a9
+0,        177,        177,        1,   152064, 0c706070d649da054eeaf686d2e14a1d
+0,        178,        178,        1,   152064, 51e4156b19bdc55e993d1956473827e3
+0,        179,        179,        1,   152064, e447458fd86c022852cedf56dc58f34f
+0,        180,        180,        1,   152064, 59732caeb824f052044b4434ef564227
+0,        181,        181,        1,   152064, cf5ccf671ddc89e1f430878afb86fced
+0,        182,        182,        1,   152064, e3e98f92e4cf8f0ccce27482407ebbf1
+0,        183,        183,        1,   152064, 089d236d04d1918b319524e3002d21c8
+0,        184,        184,        1,   152064, 7063afc35aa2c24b1e3dc781bb612af1
+0,        185,        185,        1,   152064, 902e5153028215ac60bf0f998673e3ca
+0,        186,        186,        1,   152064, 2360fb2ed2b0e7c37a318fb7f9df7550
+0,        187,        187,        1,   152064, be0788a6a06906f57f7ad1e0e4c0aba7
+0,        188,        188,        1,   152064, db90ee89bbeefcd54b79f022ed9d62d9
+0,        189,        189,        1,   152064, 7237b5c1e6f182805d4e324e636f2a45
+0,        190,        190,        1,   152064, e5da5c0643e457087f54935cfa50f7c0
+0,        191,        191,        1,   152064, 89b5d462accdc4cfaed1e57de4589f39
+0,        192,        192,        1,   152064, b670710e2f897f20d83c42bcd0ee7d85
+0,        193,        193,        1,   152064, 9c7ceba12895f2a670e4a1498d28951c
+0,        194,        194,        1,   152064, 4b426b0719a67bc228e1928e83b47b53
+0,        195,        195,        1,   152064, b2c646cd4d3b100619fd6e626ea8b3cb
+0,        196,        196,        1,   152064, ad9abc825e1b87ec0defb1df032364e6
+0,        197,        197,        1,   152064, 21423e23c708f43a9d615bc2bc700d97
+0,        198,        198,        1,   152064, 14a42211968cd4b8416ebc0285eb02b3
+0,        199,        199,        1,   152064, a45eb0c4f6a9c5beeb90a292be71461e
+0,        200,        200,        1,   152064, f9bfba991f0a0ea6bbfdde5d23bd8785
+0,        201,        201,        1,   152064, 49d33752288ddef86dc702652f688c75
+0,        202,        202,        1,   152064, 97b50290b4a1e2f31c704cc302fe89d8
+0,        203,        203,        1,   152064, c3006dcc89d2f45379c840c7dd5f7559
+0,        204,        204,        1,   152064, 4a861c22e63478ffe73571909da9a15f
+0,        205,        205,        1,   152064, e7a8bff496059d3cd40470920fb26c75
+0,        206,        206,        1,   152064, 989d818e0d7d8eea14da209c37ad3e0b
+0,        207,        207,        1,   152064, 1732c746805ca221c85fb5570911378d
+0,        208,        208,        1,   152064, 60ece5f795f5756bef34ba84fb6fec2a
+0,        209,        209,        1,   152064, 9fd355648ef40dd0e15c81554b111637
+0,        210,        210,        1,   152064, 2a3b9220b98ea648e395ab9ea12023d2
+0,        211,        211,        1,   152064, eea2a06e68196917ba2a754563953cd5
+0,        212,        212,        1,   152064, 3c2ec831a9802a968654df1bee81ca40
+0,        213,        213,        1,   152064, 590abeedce1cfa9df8a00d7ab9cf2c8e
+0,        214,        214,        1,   152064, bc07f89391568a78854f99ad9fd62c49
+0,        215,        215,        1,   152064, 0bd866450376be96a85690595d96d909
+0,        216,        216,        1,   152064, 33483531a4d760bdc30a77d5de49aff7
+0,        217,        217,        1,   152064, b0294c6e784fa3f15532331063c5036f
+0,        218,        218,        1,   152064, f4f3ba2781b2a9be3c2dd5b4c445e0d9
+0,        219,        219,        1,   152064, 8550626512e0602a1c53bfb8c51427d8
+0,        220,        220,        1,   152064, 0c2d0229196825910e5f487c33b45ef3
+0,        221,        221,        1,   152064, 93dbbed468f0012b921aa0b2b6751a70
+0,        222,        222,        1,   152064, 2f0d99dc6d4b5c65bc18946b1e6cdc4c
+0,        223,        223,        1,   152064, fb25cbe655fc050bbcbfe9cc3fa06ffe
+0,        224,        224,        1,   152064, 376d3f894957b3bac2308f2662ad5c82
+0,        225,        225,        1,   152064, 46b5c54ea38987b9e3d371a64d06300d
+0,        226,        226,        1,   152064, 9bd24bc1a94aed633ff63aac5b720269
+0,        227,        227,        1,   152064, df0bb3f7724048f67c4a60a1dbb3d5e6
+0,        228,        228,        1,   152064, a9d1c8b8007ea61c0ab2f97b3cfc2aea
+0,        229,        229,        1,   152064, fd5a4ccab51773b09edca30e665999e8
+0,        230,        230,        1,   152064, 0eaf8218244c9b2e78660cf923755557
+0,        231,        231,        1,   152064, 40f4fc64016fd148b14aea2da7652125
+0,        232,        232,        1,   152064, 6f075b312e9f7e1b4c3343441a9e1f7f
+0,        233,        233,        1,   152064, 93f7523632abfe91fa701208aafdc29a
+0,        234,        234,        1,   152064, 3c3ea7aa12a89df2309b76c22053b0ff
+0,        235,        235,        1,   152064, 2181a1aec4278efa70dec025878d88c0
+0,        236,        236,        1,   152064, 35dffda6543fdf43ad182484564abda8
+0,        237,        237,        1,   152064, bf2b65551a8fcf3b1b4185e0ebfca2a7
+0,        238,        238,        1,   152064, 49fd2dd18ddbb7f005c3705910bff99f
+0,        239,        239,        1,   152064, 9f6826599ebd45a1159e46d293fc8f7b
+0,        240,        240,        1,   152064, 5b88b8ec1da51a165e2741f8a6b710ad
+0,        241,        241,        1,   152064, a81229c0d464cc8d376f8b0153b50fc2
+0,        242,        242,        1,   152064, 07ef482c1c9967700a6cef5cdd010384
+0,        243,        243,        1,   152064, d4ebe4de6e096f7cccd5ae2be856e983
+0,        244,        244,        1,   152064, 6daf25ffb2c2baf02e483e84733fc37b
+0,        245,        245,        1,   152064, d52f485c747e945bfe34aeeaaec4fe78
+0,        246,        246,        1,   152064, 408e5b502af7a10454af6f388e2722be
+0,        247,        247,        1,   152064, 684d285dc9c08791ce16e02a1f65e22b
+0,        248,        248,        1,   152064, 5de9b8f8678c6b7a1ff04f217ef8c0c3
+0,        249,        249,        1,   152064, b60f9e37dcfc3924adcfc96d08fb2656
+0,        250,        250,        1,   152064, 8975d551bb7c01cb520b5694e73d1809
+0,        251,        251,        1,   152064, af55f9897a3fa51eacdcebf3a21f5fe5
+0,        252,        252,        1,   152064, 10c21c5167cba09ce54f361e88e6e3c9
+0,        253,        253,        1,   152064, 8cb92c4a8d32fe00a92c5bd4a163cc45
+0,        254,        254,        1,   152064, 3d39fd1222c8421f0eed3c8249c3d772
+0,        255,        255,        1,   152064, 43c5629af47dc4fd659bffe481e84999
+0,        256,        256,        1,   152064, ad6d5a0f4d2d2738809b7f610f6da823
+0,        257,        257,        1,   152064, d2f0dbca68098d58468e94b84ef0fb8b
+0,        258,        258,        1,   152064, 247487ae60500313df92dd0175ac4e0f
+0,        259,        259,        1,   152064, cfbbabb4b8c93c87c221f76a155bb0fc
+0,        260,        260,        1,   152064, c708254a644abc41788d717dd59b8baf
+0,        261,        261,        1,   152064, fa710d87bddd1a65970c5618a8a0158f
+0,        262,        262,        1,   152064, 31210937c8a67c6aafda2e03226b9770
+0,        263,        263,        1,   152064, ac518a56fc537de251f3d28d380e25cb
+0,        264,        264,        1,   152064, afcb7642c336bcef9b605a40e518d305
+0,        265,        265,        1,   152064, 15fd29e16aaebae6f74e49455631c1f8
+0,        266,        266,        1,   152064, 938b90999b05595e9875c6d4f9836407
+0,        267,        267,        1,   152064, 2fe744b939902a5f4bb69e9243c55d08
+0,        268,        268,        1,   152064, a902057edac1638a1cd218fe5b88bfc2
+0,        269,        269,        1,   152064, 78087115b9600b5499866c127d175c0f
+0,        270,        270,        1,   152064, 877c729e2d2b599dd6cac1f59f12e068
+0,        271,        271,        1,   152064, 77e6b4b761902fbe27fb0ff9eb6d02ac
+0,        272,        272,        1,   152064, dd3ee373cb4935eca46947aedda3b991
+0,        273,        273,        1,   152064, b3ee6b4a18f6d20f9b9fd8dc9e8af90e
+0,        274,        274,        1,   152064, 492afb7421667468fa95017c693ec47b
+0,        275,        275,        1,   152064, 9abb912d8101de895b8f482c199934c2
+0,        276,        276,        1,   152064, 08ca372dfb5e90382f1b58345a0e51b1
+0,        277,        277,        1,   152064, 805559cb3f3385e7865df692336dba29
+0,        278,        278,        1,   152064, c5cc85e4d44010e048fd2013535d7180
+0,        279,        279,        1,   152064, ef9a05a7a4e0b5beff9a8119af44ebc7
+0,        280,        280,        1,   152064, e6983be0a0c1705cfede1e7476aad381
+0,        281,        281,        1,   152064, a4bb0c3d4deb17784b07d3713db05302
+0,        282,        282,        1,   152064, 0fd5bb9259e8c27aba7670b08cd9a26b
+0,        283,        283,        1,   152064, 43d6df9fd672b13e2c59db924e9fe30b
+0,        284,        284,        1,   152064, 3aaf3b87705c46495c9d1b9f4ea706bf
+0,        285,        285,        1,   152064, 0d2ba631f5c716d9c5e5b2a75d3b6433
+0,        286,        286,        1,   152064, bf29cc016dce85e621aaa7647fae1544
+0,        287,        287,        1,   152064, 3374284a808d79e9be32bf3610b0fd17
+0,        288,        288,        1,   152064, ea3f305e76009f3bf2cd5014d339eafa
+0,        289,        289,        1,   152064, 95ce7320a841a71b5a8871cef385ce41
+0,        290,        290,        1,   152064, 88613d96dbda681edab4ed41c3f08536
+0,        291,        291,        1,   152064, b9e9e9045b91c4f7917274088de64a5e
+0,        292,        292,        1,   152064, e0b90055449e7403289a8dda9c02add0
+0,        293,        293,        1,   152064, 367ee1603fa7778dad3e99be8db779ee
+0,        294,        294,        1,   152064, 6bb0eaa6140d673b452eee6ac6c262c2
+0,        295,        295,        1,   152064, 9af4ef919ae61e1597db1b9acd6af95a
+0,        296,        296,        1,   152064, e8f29872e86e54ac26b5fb0a20f10d3e
+0,        297,        297,        1,   152064, 09aaad95cd7d173bfe609b79440cbfc8
+0,        298,        298,        1,   152064, c03abe502be10f76e33d93e1c40cc674
+0,        299,        299,        1,   152064, 3e7e315be8aef281714a63f4cf086085
diff --git a/tests/ref/fate/api-mjpeg-codec-param b/tests/ref/fate/api-mjpeg-codec-param
index 290f941ff3c0d..0815919d7d96d 100644
--- a/tests/ref/fate/api-mjpeg-codec-param
+++ b/tests/ref/fate/api-mjpeg-codec-param
@@ -3,6 +3,7 @@ stream=0, decode=0
     ab=0
     bt=4000000
     flags=0x00000000
+    flags2=0x00000000
     time_base=0/1
     g=12
     ar=0
@@ -56,14 +57,9 @@ stream=0, decode=0
     aspect=180/180
     sar=180/180
     debug=0x00000000
-    cmp=0
-    subcmp=0
-    mbcmp=0
-    ildctcmp=8
     dia_size=0
     last_pred=0
     preme=0
-    precmp=0
     pre_dia_size=0
     subq=8
     me_range=0
@@ -75,7 +71,6 @@ stream=0, decode=0
     sc_threshold=0
     nr=0
     rc_init_occupancy=0
-    flags2=0x00000000
     threads=1
     dc=0
     nssew=8
@@ -88,6 +83,11 @@ stream=0, decode=0
     skip_factor=0
     skip_exp=0
     skipcmp=13
+    cmp=0
+    subcmp=0
+    mbcmp=0
+    ildctcmp=8
+    precmp=0
     mblmin=236
     mblmax=3658
     mepc=256
@@ -138,11 +138,13 @@ stream=0, decode=0
     max_pixels=2147483647
     hwaccel_flags=0x00000001
     extra_hw_frames=-1
+    discard_damaged_percentage=95
 stream=0, decode=1
     b=0
     ab=0
     bt=4000000
     flags=0x00000000
+    flags2=0x00000000
     time_base=0/1
     g=12
     ar=0
@@ -196,14 +198,9 @@ stream=0, decode=1
     aspect=180/180
     sar=180/180
     debug=0x00000000
-    cmp=0
-    subcmp=0
-    mbcmp=0
-    ildctcmp=8
     dia_size=0
     last_pred=0
     preme=0
-    precmp=0
     pre_dia_size=0
     subq=8
     me_range=0
@@ -215,7 +212,6 @@ stream=0, decode=1
     sc_threshold=0
     nr=0
     rc_init_occupancy=0
-    flags2=0x00000000
     threads=1
     dc=0
     nssew=8
@@ -228,6 +224,11 @@ stream=0, decode=1
     skip_factor=0
     skip_exp=0
     skipcmp=13
+    cmp=0
+    subcmp=0
+    mbcmp=0
+    ildctcmp=8
+    precmp=0
     mblmin=236
     mblmax=3658
     mepc=256
@@ -278,3 +279,4 @@ stream=0, decode=1
     max_pixels=2147483647
     hwaccel_flags=0x00000001
     extra_hw_frames=-1
+    discard_damaged_percentage=95
diff --git a/tests/ref/fate/api-png-codec-param b/tests/ref/fate/api-png-codec-param
index f04ffa757dc2d..a47d0963da026 100644
--- a/tests/ref/fate/api-png-codec-param
+++ b/tests/ref/fate/api-png-codec-param
@@ -3,6 +3,7 @@ stream=0, decode=0
     ab=0
     bt=4000000
     flags=0x00000000
+    flags2=0x00000000
     time_base=0/1
     g=12
     ar=0
@@ -56,14 +57,9 @@ stream=0, decode=0
     aspect=2835/2835
     sar=2835/2835
     debug=0x00000000
-    cmp=0
-    subcmp=0
-    mbcmp=0
-    ildctcmp=8
     dia_size=0
     last_pred=0
     preme=0
-    precmp=0
     pre_dia_size=0
     subq=8
     me_range=0
@@ -75,7 +71,6 @@ stream=0, decode=0
     sc_threshold=0
     nr=0
     rc_init_occupancy=0
-    flags2=0x00000000
     threads=1
     dc=0
     nssew=8
@@ -88,6 +83,11 @@ stream=0, decode=0
     skip_factor=0
     skip_exp=0
     skipcmp=13
+    cmp=0
+    subcmp=0
+    mbcmp=0
+    ildctcmp=8
+    precmp=0
     mblmin=236
     mblmax=3658
     mepc=256
@@ -138,11 +138,13 @@ stream=0, decode=0
     max_pixels=2147483647
     hwaccel_flags=0x00000001
     extra_hw_frames=-1
+    discard_damaged_percentage=95
 stream=0, decode=1
     b=0
     ab=0
     bt=4000000
     flags=0x00000000
+    flags2=0x00000000
     time_base=0/1
     g=12
     ar=0
@@ -196,14 +198,9 @@ stream=0, decode=1
     aspect=2835/2835
     sar=2835/2835
     debug=0x00000000
-    cmp=0
-    subcmp=0
-    mbcmp=0
-    ildctcmp=8
     dia_size=0
     last_pred=0
     preme=0
-    precmp=0
     pre_dia_size=0
     subq=8
     me_range=0
@@ -215,7 +212,6 @@ stream=0, decode=1
     sc_threshold=0
     nr=0
     rc_init_occupancy=0
-    flags2=0x00000000
     threads=1
     dc=0
     nssew=8
@@ -228,6 +224,11 @@ stream=0, decode=1
     skip_factor=0
     skip_exp=0
     skipcmp=13
+    cmp=0
+    subcmp=0
+    mbcmp=0
+    ildctcmp=8
+    precmp=0
     mblmin=236
     mblmax=3658
     mepc=256
@@ -278,3 +279,4 @@ stream=0, decode=1
     max_pixels=2147483647
     hwaccel_flags=0x00000001
     extra_hw_frames=-1
+    discard_damaged_percentage=95
diff --git a/tests/ref/fate/cbs-hevc-SLPPLP_A_VIDYO_2 b/tests/ref/fate/cbs-hevc-SLPPLP_A_VIDYO_2
new file mode 100644
index 0000000000000..5945221a9126c
--- /dev/null
+++ b/tests/ref/fate/cbs-hevc-SLPPLP_A_VIDYO_2
@@ -0,0 +1 @@
+e5309cf061c06496f8fe2b8b313e6c39
diff --git a/tests/ref/fate/copy-trac3074 b/tests/ref/fate/copy-trac3074
index 5ce56942604a1..ff66900253f88 100644
--- a/tests/ref/fate/copy-trac3074
+++ b/tests/ref/fate/copy-trac3074
@@ -1,5 +1,5 @@
-39aef1afff761d673fd1be07182941d1 *tests/data/fate/copy-trac3074.mp4
-333991 tests/data/fate/copy-trac3074.mp4
+f92a201033712bda262f1e071e25544a *tests/data/fate/copy-trac3074.mp4
+333992 tests/data/fate/copy-trac3074.mp4
 #tb 0: 1/48000
 #media_type 0: audio
 #codec_id 0: eac3
diff --git a/tests/ref/fate/filter-palettegen-2 b/tests/ref/fate/filter-palettegen-2
index aa07b6cb939de..9abec0fe8ef4b 100644
--- a/tests/ref/fate/filter-palettegen-2
+++ b/tests/ref/fate/filter-palettegen-2
@@ -3,4 +3,4 @@
 #codec_id 0: rawvideo
 #dimensions 0: 16x16
 #sar 0: 1/1
-0,          0,          0,        1,     1024, 0x906ff5aa
+0,          0,          0,        1,     1024, 0x23e072c8
diff --git a/tests/ref/fate/filter-pixdesc-grayf32be b/tests/ref/fate/filter-pixdesc-grayf32be
index 423bbfbebc76d..171475483adac 100644
--- a/tests/ref/fate/filter-pixdesc-grayf32be
+++ b/tests/ref/fate/filter-pixdesc-grayf32be
@@ -1 +1 @@
-pixdesc-grayf32be   381c8d0f19d286809b91cd6e6c0048ab
+pixdesc-grayf32be   9b23c74e8e8ffae5d7c7e82bbf5929da
diff --git a/tests/ref/fate/filter-pixdesc-grayf32le b/tests/ref/fate/filter-pixdesc-grayf32le
index a76e0a995e231..d598d123b4edf 100644
--- a/tests/ref/fate/filter-pixdesc-grayf32le
+++ b/tests/ref/fate/filter-pixdesc-grayf32le
@@ -1 +1 @@
-pixdesc-grayf32le   381c8d0f19d286809b91cd6e6c0048ab
+pixdesc-grayf32le   291f074a24c44799a1f437d1c55556f1
diff --git a/tests/ref/fate/filter-pixdesc-ya16be b/tests/ref/fate/filter-pixdesc-ya16be
new file mode 100644
index 0000000000000..3fadfa355da48
--- /dev/null
+++ b/tests/ref/fate/filter-pixdesc-ya16be
@@ -0,0 +1 @@
+pixdesc-ya16be      c5bf539478020302a30f36c5059b7695
diff --git a/tests/ref/fate/filter-pixdesc-ya16le b/tests/ref/fate/filter-pixdesc-ya16le
new file mode 100644
index 0000000000000..ae5764c1fe713
--- /dev/null
+++ b/tests/ref/fate/filter-pixdesc-ya16le
@@ -0,0 +1 @@
+pixdesc-ya16le      d238b5905b3ab79f7f00d5ea03ee4b87
diff --git a/tests/ref/fate/filter-pixdesc-yuva422p12be b/tests/ref/fate/filter-pixdesc-yuva422p12be
new file mode 100644
index 0000000000000..16df4e45c41f6
--- /dev/null
+++ b/tests/ref/fate/filter-pixdesc-yuva422p12be
@@ -0,0 +1 @@
+pixdesc-yuva422p12be0420bebaa8a56fea28a06fd565f8e6b3
diff --git a/tests/ref/fate/filter-pixdesc-yuva422p12le b/tests/ref/fate/filter-pixdesc-yuva422p12le
new file mode 100644
index 0000000000000..b7452ccf78bf7
--- /dev/null
+++ b/tests/ref/fate/filter-pixdesc-yuva422p12le
@@ -0,0 +1 @@
+pixdesc-yuva422p12le5dff3afb6301abbc0e2a85761b8c5c64
diff --git a/tests/ref/fate/filter-pixdesc-yuva444p12be b/tests/ref/fate/filter-pixdesc-yuva444p12be
new file mode 100644
index 0000000000000..cfe454f499818
--- /dev/null
+++ b/tests/ref/fate/filter-pixdesc-yuva444p12be
@@ -0,0 +1 @@
+pixdesc-yuva444p12be952d5e6b4a3494c5228527237fdfc413
diff --git a/tests/ref/fate/filter-pixdesc-yuva444p12le b/tests/ref/fate/filter-pixdesc-yuva444p12le
new file mode 100644
index 0000000000000..95aa2a984c605
--- /dev/null
+++ b/tests/ref/fate/filter-pixdesc-yuva444p12le
@@ -0,0 +1 @@
+pixdesc-yuva444p12le235218d42dbbe40fbc044755825c6a95
diff --git a/tests/ref/fate/filter-pixfmts-copy b/tests/ref/fate/filter-pixfmts-copy
index 5385036a82ef6..0609344c2af9e 100644
--- a/tests/ref/fate/filter-pixfmts-copy
+++ b/tests/ref/fate/filter-pixfmts-copy
@@ -76,6 +76,8 @@ rgba64le            b91e1d77f799eb92241a2d2d28437b15
 uyvy422             3bcf3c80047592f2211fae3260b1b65d
 xyz12be             a1ef56bf746d71f59669c28e48fc8450
 xyz12le             831ff03c1ba4ef19374686f16a064d8c
+ya16be              2f2c27f1854ac00c73d13861dcab2705
+ya16le              2c1fbd127c9f0435adc0e9b2ea3f486b
 ya8                 dbb99fbcdc204aaa1a7397ff561f1a67
 yuv410p             5d4d992a7728431aa4e0700f87fb7fd8
 yuv411p             7e1300e89f5bc07939e2c4a6acbdf267
@@ -127,6 +129,8 @@ yuva420p9le         0e9c9803aaaddc9f38e419de587793c2
 yuva422p            8f6bb778647e5dee62f544d646321171
 yuva422p10be        2f7204c93a1e5bfb04538852f99e4074
 yuva422p10le        c8082548aca999edde77ef2749b1ff4c
+yuva422p12be        19f8205cca3d19bfd4ad9cd2bfb07a0e
+yuva422p12le        38e6c7d87332852d660df5594529fa6e
 yuva422p16be        427ad55f7464121bb3ce164641772bc6
 yuva422p16le        af6f8df651275de58129e010bb45ffcd
 yuva422p9be         47579cc2cea861ca1461589b80c4720f
@@ -134,6 +138,8 @@ yuva422p9le         aaeab2bfe80a29390e8666103ed8bb40
 yuva444p            459fad5abfd16db9bb6a52761dc74cc1
 yuva444p10be        fa16bae4fc25429deb944ffa9f5b28a0
 yuva444p10le        92f820d3481b7ebcb48b98a73e7b4c90
+yuva444p12be        d8193387128a1b1efc51f36a12c85385
+yuva444p12le        9c17bf72b083f93040d1e19516b54de7
 yuva444p16be        c80c1899789a6411d0025730efc8f01c
 yuva444p16le        2ed56ea50fafda4d226c9b133755dad8
 yuva444p9be         4903fde22b15d28da90761ac1cfcb1c5
diff --git a/tests/ref/fate/filter-pixfmts-crop b/tests/ref/fate/filter-pixfmts-crop
index ae48c2bf42e64..4e4b6e4a63aac 100644
--- a/tests/ref/fate/filter-pixfmts-crop
+++ b/tests/ref/fate/filter-pixfmts-crop
@@ -73,6 +73,8 @@ rgba64be            89910046972ab3c68e2a348302cc8ca9
 rgba64le            fea8ebfc869b52adf353778f29eac7a7
 xyz12be             cb4571f9aaa7b59f999ef327276104b7
 xyz12le             cd6aae8d26b18bdb4b9d068586276d91
+ya16be              029a3b7c523de988e3161484d41ea15c
+ya16le              32929a08d11982aec66ea1e665cfba3a
 ya8                 51a8dd297e35d40b06d3ebe8f4717895
 yuv410p             3bb6c7b64f2c46bc5e8b77198ce4ea58
 yuv411p             693e4afe96998e6dd91734037d75d887
@@ -124,6 +126,8 @@ yuva420p9le         abff60ecbe6e867949399710953bd1fc
 yuva422p            ad564e513a8c08ff0ec99324e204dfbd
 yuva422p10be        61c806e5e02ea4c90ad3156c90957a18
 yuva422p10le        41507ce136674ad458e562d44c67ddca
+yuva422p12be        e686b37d6738aae3c1d25bd36f3e518c
+yuva422p12le        95bb91f2922bdd2eb9aa6fabe86d4994
 yuva422p16be        3437cce47184e4b9a7a681831816e1ea
 yuva422p16le        0d534fcd61fc54fa9d4bbae5bde537ec
 yuva422p9be         588f72cd85285ed71a519525a947dedc
@@ -131,6 +135,8 @@ yuva422p9le         d5a5d50f414caa1563700207931f0e0a
 yuva444p            64bd3debe7c2b8cca91bc1d6e2a8d80e
 yuva444p10be        1291045203be7d60b9015fa7e34b5716
 yuva444p10le        75865370fb0c018fb8663958bafcdc51
+yuva444p12be        ff58aed06f42e1258ae04509bccaad0a
+yuva444p12le        00ab9d34f0fd4ba23b48e8ae4bf93c17
 yuva444p16be        f817caf234aaf5848b2bc9679582ed56
 yuva444p16le        b32ad623fc423f897ff31c4073ea2a6f
 yuva444p9be         48498d994c3c9070f31773e39da306dd
diff --git a/tests/ref/fate/filter-pixfmts-field b/tests/ref/fate/filter-pixfmts-field
index 857ded1c410ae..d59c982880d4a 100644
--- a/tests/ref/fate/filter-pixfmts-field
+++ b/tests/ref/fate/filter-pixfmts-field
@@ -76,6 +76,8 @@ rgba64le            dfdba4de4a7cac9abf08852666c341d3
 uyvy422             1c49e44ab3f060e85fc4a3a9464f045e
 xyz12be             d2fa69ec91d3ed862f2dac3f8e7a3437
 xyz12le             02bccd5e0b6824779a1f848b0ea3e3b5
+ya16be              c0ce74d2a3da641ea634a3898dda7455
+ya16le              9b098d425e5bc27fa8a8ac8b176d592d
 ya8                 28cea4f98ed452bd3da9c752e5e3399c
 yuv410p             a85920d6bd26f51306e2ecbe71d1c554
 yuv411p             9106e283d5dbcfba01c611886d58871a
@@ -127,6 +129,8 @@ yuva420p9le         00d48d3b1b0383b92ccdb26543cfb7f8
 yuva422p            801bfb0d4c9e7a524d97bfa11f7995fd
 yuva422p10be        a0d743770698deea2be35ceb2bb0eaad
 yuva422p10le        6a0861e5c887f3213dc34d8674611950
+yuva422p12be        87dc9f7efc97d5e1bbea7564eca62a2a
+yuva422p12le        137f75ade26b6d631d3998127e411a00
 yuva422p16be        c870b697714221ef9168b6984294e501
 yuva422p16le        fef7d5f29dfec9ee36216b7a610b4b39
 yuva422p9be         83ff1ae035bb587dc59acf4121602dac
@@ -134,6 +138,8 @@ yuva422p9le         75baca2a93a8e76b27ea208858213819
 yuva444p            cfbd995b538c34dee9c107ecf875b283
 yuva444p10be        de8b80b4c3a12624412530f09de6dd39
 yuva444p10le        04c93877f724a29b47bc8c0a10a3036b
+yuva444p12be        a9efd9944314bb7fc9ba359bc3446446
+yuva444p12le        acc831041bbf13e75636c0426f82aa19
 yuva444p16be        b10fd7c1b61ac22bdb285f0d91a390f1
 yuva444p16le        cac82ffc36b7052747407663fc5ed510
 yuva444p9be         a6f66d08b3370fdd90987a6143b7b91c
diff --git a/tests/ref/fate/filter-pixfmts-fieldorder b/tests/ref/fate/filter-pixfmts-fieldorder
index fc003457fcce9..1996649e10af5 100644
--- a/tests/ref/fate/filter-pixfmts-fieldorder
+++ b/tests/ref/fate/filter-pixfmts-fieldorder
@@ -67,6 +67,8 @@ rgba64le            b34e6e30621ae579519a2d91a96a0acf
 uyvy422             75de70e31c435dde878002d3f22b238a
 xyz12be             15f5cda71de5fef9cec5e75e3833b6bc
 xyz12le             7be6c8781f38c21a6b8f602f62ca31e6
+ya16be              205d6a21890c1f057c9c20fbbba590e2
+ya16le              f35616fdb5d3fbf767a4f11118cf8ad1
 ya8                 055ac5ab5ff8533dd319edc17a398af1
 yuv411p             e4a040e0e786c4dae07d9d3f90a54905
 yuv422p             16ce67249c6ce7ef57a433646ad6dfc1
@@ -94,6 +96,8 @@ yuv444p9le          9bb3dde9fadb3c03a0ef8305b5f67a7e
 yuva422p            c470da57cde22b452deb8874df710dce
 yuva422p10be        a2ffa080ae661c1033aa38be28002922
 yuva422p10le        a4f5e8006f8ea3f964206605045e0fe0
+yuva422p12be        ec44f9fc083d5f39051f2a65daea6890
+yuva422p12le        5fd785e19f68538ba76e6e505447239c
 yuva422p16be        929ec5d4bcfac13ba8a02f12e3f5fc7f
 yuva422p16le        7155a6036e25719f2e4d2d47212f077d
 yuva422p9be         b1af62d553d790e041e80cf89608efe3
@@ -101,6 +105,8 @@ yuva422p9le         65c80faeb0021deb232ee451f77c89e3
 yuva444p            9ac54882677f1fc5553a97ea558e942d
 yuva444p10be        3326267d176a8dfed2c7511b926962e6
 yuva444p10le        bfe957d1b5fea3585b3942cbfdd529ad
+yuva444p12be        ecaa57b7f0b04c3e8779a5f5dcd23032
+yuva444p12le        6b645eeaff8363d82ff686b3b020cae3
 yuva444p16be        2f80d411847856e1364659dee8b23485
 yuva444p16le        5796be8d66371b60037fc8053c27e900
 yuva444p9be         a83599c0e9fca08f6b7c6e02c2413fcf
diff --git a/tests/ref/fate/filter-pixfmts-hflip b/tests/ref/fate/filter-pixfmts-hflip
index e97c185f6e647..f171a95fa3ef6 100644
--- a/tests/ref/fate/filter-pixfmts-hflip
+++ b/tests/ref/fate/filter-pixfmts-hflip
@@ -73,6 +73,8 @@ rgba64be            c910444019f4cfbf4d995227af55da8d
 rgba64le            0c810d8b3a6bca10321788e1cb145340
 xyz12be             25f90259ff8a226befdaec3dfe82996e
 xyz12le             926c0791d59aaff61b2778e8ada3316d
+ya16be              632b2e6e8e20c3edcfe99356fa7fca9e
+ya16le              e2ff5a2fb969c70dcc862937f9224873
 ya8                 4ad5920716de3d2fbbc49f95adb60345
 yuv410p             c49fd0c55c41185b1580aac77211992b
 yuv411p             c416371077dce13d31bf1dc706111ae7
@@ -124,6 +126,8 @@ yuva420p9le         15e6654f50eddf97f852070d8dd8f009
 yuva422p            902dc911ee175d9b1f2addcc03aab9a3
 yuva422p10be        fb927978446e975f7424600495a7acde
 yuva422p10le        1d0b5a5946b824810b1b83a7ce429274
+yuva422p12be        5407e82d1c4356472ad549e5ae38ea1e
+yuva422p12le        5dc0e1910b248e12989ae937ddeec342
 yuva422p16be        88977e5ddcee9377525c3f251fdeb25e
 yuva422p16le        d6a50a91be5fc720ca97fc2f6fbd3bb2
 yuva422p9be         996d8d73648c602c73c51e2d95b6cf9b
@@ -131,6 +135,8 @@ yuva422p9le         2d64684a8844967e9ffe827c04e3c917
 yuva444p            53247be24822d158c0866cd58d2ceeba
 yuva444p10be        9f3c25c3b9d26787d0bf8da2b15c75c6
 yuva444p10le        1721b843b721629da7a0cd76ac665708
+yuva444p12be        9feb6e1d8bfe9b7fd5e352465f278704
+yuva444p12le        12a949ebcf0f94e4a2f9915ef778680a
 yuva444p16be        635fb2720470e0042a7c9b70bf908a2c
 yuva444p16le        6d5bd13f8bb804bd1158c1af732a24e1
 yuva444p9be         3d3e7491192aa4e396015bf8e3755a24
diff --git a/tests/ref/fate/filter-pixfmts-il b/tests/ref/fate/filter-pixfmts-il
index a006fc19a3a8b..0839a77ed207d 100644
--- a/tests/ref/fate/filter-pixfmts-il
+++ b/tests/ref/fate/filter-pixfmts-il
@@ -75,6 +75,8 @@ rgba64le            a8a2daae04374a27219bc1c890204007
 uyvy422             d6ee3ca43356d08c392382b24b22cda5
 xyz12be             7c7d54c55f136cbbc50b18029f3be0b3
 xyz12le             090ba6b1170baf2b1358b43b971d33b0
+ya16be              bf2cf1e89c9fdb5bc10425db567ba2da
+ya16le              4e9c9097fae615b8a5f4c3b237f752f0
 ya8                 a38d6e288f582f1a04310232ed764afc
 yuv410p             dea1ab8843465adf5b8240b2d98fd85b
 yuv411p             8bf73777a5ff43c126be274245aceff1
@@ -126,6 +128,8 @@ yuva420p9le         fbc3300867510900fec84caf718e8601
 yuva422p            6c347a539965cd63cddfeec598858c11
 yuva422p10be        d34bf5f06ac5c67e12409aa111b4e21b
 yuva422p10le        ecb3cc8d77cee05e54d4de60831be159
+yuva422p12be        de287adba746037e68ff81cf7dc59e57
+yuva422p12le        761e18a8a73b55113e57ec39de33a4e6
 yuva422p16be        47a8d3c98492d31bce0ed0d6f74f5131
 yuva422p16le        a950acae1f7ffc47a0951a40e3309b09
 yuva422p9be         0217ba7015245e017ceb0a3eeb39fa56
@@ -133,6 +137,8 @@ yuva422p9le         1bbb5ba2b649bbe47c928012deb4e3ae
 yuva444p            c8153b52159fb0f392459b8be406c294
 yuva444p10be        b0456e5ecdd87c983573e1016db85178
 yuva444p10le        f9b10500666ba220ce42b478a6785dae
+yuva444p12be        eb83a157ff3d750d3ee71c9c598c01b9
+yuva444p12le        6cc82bf7cc18194db81397debfc01951
 yuva444p16be        97f8cb6ed835c7c5cd2fb112b1e135c7
 yuva444p16le        47170401a8c348d3f05f6530607d066b
 yuva444p9be         d5c0170b41221a9607e6ae586880a383
diff --git a/tests/ref/fate/filter-pixfmts-null b/tests/ref/fate/filter-pixfmts-null
index 5385036a82ef6..0609344c2af9e 100644
--- a/tests/ref/fate/filter-pixfmts-null
+++ b/tests/ref/fate/filter-pixfmts-null
@@ -76,6 +76,8 @@ rgba64le            b91e1d77f799eb92241a2d2d28437b15
 uyvy422             3bcf3c80047592f2211fae3260b1b65d
 xyz12be             a1ef56bf746d71f59669c28e48fc8450
 xyz12le             831ff03c1ba4ef19374686f16a064d8c
+ya16be              2f2c27f1854ac00c73d13861dcab2705
+ya16le              2c1fbd127c9f0435adc0e9b2ea3f486b
 ya8                 dbb99fbcdc204aaa1a7397ff561f1a67
 yuv410p             5d4d992a7728431aa4e0700f87fb7fd8
 yuv411p             7e1300e89f5bc07939e2c4a6acbdf267
@@ -127,6 +129,8 @@ yuva420p9le         0e9c9803aaaddc9f38e419de587793c2
 yuva422p            8f6bb778647e5dee62f544d646321171
 yuva422p10be        2f7204c93a1e5bfb04538852f99e4074
 yuva422p10le        c8082548aca999edde77ef2749b1ff4c
+yuva422p12be        19f8205cca3d19bfd4ad9cd2bfb07a0e
+yuva422p12le        38e6c7d87332852d660df5594529fa6e
 yuva422p16be        427ad55f7464121bb3ce164641772bc6
 yuva422p16le        af6f8df651275de58129e010bb45ffcd
 yuva422p9be         47579cc2cea861ca1461589b80c4720f
@@ -134,6 +138,8 @@ yuva422p9le         aaeab2bfe80a29390e8666103ed8bb40
 yuva444p            459fad5abfd16db9bb6a52761dc74cc1
 yuva444p10be        fa16bae4fc25429deb944ffa9f5b28a0
 yuva444p10le        92f820d3481b7ebcb48b98a73e7b4c90
+yuva444p12be        d8193387128a1b1efc51f36a12c85385
+yuva444p12le        9c17bf72b083f93040d1e19516b54de7
 yuva444p16be        c80c1899789a6411d0025730efc8f01c
 yuva444p16le        2ed56ea50fafda4d226c9b133755dad8
 yuva444p9be         4903fde22b15d28da90761ac1cfcb1c5
diff --git a/tests/ref/fate/filter-pixfmts-pad b/tests/ref/fate/filter-pixfmts-pad
index 71f5ddf10093d..c863d541f60a1 100644
--- a/tests/ref/fate/filter-pixfmts-pad
+++ b/tests/ref/fate/filter-pixfmts-pad
@@ -27,6 +27,7 @@ rgb0                78d500c8361ab6423a4826a00268c908
 rgb24               17f9e2e0c609009acaf2175c42d4a2a5
 rgba                b157c90191463d34fb3ce77b36c96386
 xyz12le             85abf80b77a9236a76ba0b00fcbdea2d
+ya16le              17cbe58356d56ff0f0f00280a31e6ca6
 ya8                 5fc0f471207ddf7aa01b07027d56b672
 yuv410p             cb871dcc1e84a7ef1d21f9237b88cf6e
 yuv411p             aec2c1740de9a62db0d41f4dda9121b0
@@ -57,10 +58,12 @@ yuva420p16le        ff45de790e7bdd3c25d8aad51289aba9
 yuva420p9le         8ef1f3b3e01b5ce222e4caeec3dec396
 yuva422p            91dcecc4bfdff1f0db9ef8b9b5b9ac2a
 yuva422p10le        1ba292c74c8646fd077a6116142b1bc8
+yuva422p12le        50e59879a9b64ac0fc7f76a110537baa
 yuva422p16le        383226550fe9c93d6e8bf0d45d1423d1
 yuva422p9le         0fb76788c905c6d448143aa3c5eae116
 yuva444p            fb60941a57596b277417a3c7c00aa194
 yuva444p10le        251ea4ead8300d752eb355a08cbb0352
+yuva444p12le        f38b7c5747b43bcc6d647f143cb069cf
 yuva444p16le        5b65287e1862d2d9f1ad2cfdcde94661
 yuva444p9le         e6946c10b94c271e7ea24b3bcff314e1
 yuvj411p            87dbac57b211ab4823c1abbd702f1516
diff --git a/tests/ref/fate/filter-pixfmts-scale b/tests/ref/fate/filter-pixfmts-scale
index 05879ee3c7b17..3226e8b53c23e 100644
--- a/tests/ref/fate/filter-pixfmts-scale
+++ b/tests/ref/fate/filter-pixfmts-scale
@@ -76,6 +76,8 @@ rgba64le            783d2779adfafe3548bdb671ec0de69e
 uyvy422             aeb4ba4f9f003ae21f6d18089198244f
 xyz12be             c7ba8345998c0141ddc079cdd29b1a40
 xyz12le             95f5d3a0de834cc495c9032a14987cde
+ya16be              372195dc947eee1bcb6f733a3544272e
+ya16le              3923551514cfa588cf528e6f48e8cb9a
 ya8                 0a9db5bb4b009de9197eede5e9d19e16
 yuv410p             e8f49b5fb9335b62c074f7f8bb0234fc
 yuv411p             5af32557c93beb482e26e7af693104c6
@@ -127,6 +129,8 @@ yuva420p9le         1fdfcde9cf7ef1d41002175b5793435f
 yuva422p            3a80cb3e08782033aabfeff1e8969403
 yuva422p10be        94b13db95ceb970ded9773c095ade7d4
 yuva422p10le        544965627ee94964b9cc57750c385b31
+yuva422p12be        4285def72fb83a9c918d78453220a4b7
+yuva422p12le        accda73617ccfa3c45ced5e2a48a8323
 yuva422p16be        95be33f9599958669f3c1cb24e54a5e5
 yuva422p16le        ed83cc6e8fb70306d0622962504d1fcf
 yuva422p9be         5ff6aeca90b8392133d7b1addfbd639b
@@ -134,6 +138,8 @@ yuva422p9le         451965b076c628b0eabaa4ef33f98ded
 yuva444p            f120326d9d940c9ac5cf5fd160969b82
 yuva444p10be        1838cd61a24fda56a7379c9cd9cb1629
 yuva444p10le        c5c2f602caab63c58954f5a80691436a
+yuva444p12be        befc6a3602bc58df4f4e8490ae6782cf
+yuva444p12le        8d13b714cdbadd2aa4e16c1ec673c0e2
 yuva444p16be        39ca2e32aa61b210b6c528855d24a16b
 yuva444p16le        cd2e0a001d8175f2204b2eb411c6a801
 yuva444p9be         58add24afbf43ff0ff7079cc1948fb56
diff --git a/tests/ref/fate/filter-pixfmts-swapuv b/tests/ref/fate/filter-pixfmts-swapuv
index e02380988fa07..676a4406990fb 100644
--- a/tests/ref/fate/filter-pixfmts-swapuv
+++ b/tests/ref/fate/filter-pixfmts-swapuv
@@ -48,6 +48,8 @@ yuva420p9le         7f445bfa33b6875cc65b47cee24c3154
 yuva422p            a1bd5c90b7bde1d3657025612a6f231e
 yuva422p10be        05bcc2394cfb0406d37b942423f6dbe7
 yuva422p10le        c166994709af82b9cdcebe4a7057348c
+yuva422p12be        30f4f9d6a2a623c45c4474e4aac8f7b9
+yuva422p12le        79dfb02173b19beb76b9f0b76a96d1d5
 yuva422p16be        9599ed5721f52028c3acfeb7b33ddc87
 yuva422p16le        dbb74e0bb2cf732e39e669f60ec6e1cf
 yuva422p9be         39093fec98b02e83b7e20dab3e5e4cf6
@@ -55,6 +57,8 @@ yuva422p9le         a0a6dd2e62ea01e8a2268c7d88385252
 yuva444p            ff2441de373fbfaed7bc199a7abe5a3e
 yuva444p10be        857043b712213eee329d2fb584d74c3e
 yuva444p10le        c3c451b3605af959a5d80146f4170e9d
+yuva444p12be        d60fd21b48fa6a3677a9070d5b2eafdb
+yuva444p12le        8f43565353cf0ce0a06f6b1261e42a53
 yuva444p16be        356d72791dfd91861b21630e315d40cb
 yuva444p16le        176591ce074ba8befc5fb279446ca1be
 yuva444p9be         675f0ed3e6572b05f06d9e44611bdff5
diff --git a/tests/ref/fate/filter-pixfmts-transpose b/tests/ref/fate/filter-pixfmts-transpose
index 44644099c6c51..7bcb88c38b690 100644
--- a/tests/ref/fate/filter-pixfmts-transpose
+++ b/tests/ref/fate/filter-pixfmts-transpose
@@ -72,6 +72,8 @@ rgba64be            a60041217f4c0cd796d19d3940a12a41
 rgba64le            ad47197774858858ae7b0c177dffa459
 xyz12be             68e5cba640f6e4ef72dff950e88b5342
 xyz12le             8b6b6a6db4d7561e80db88ccaecce7a9
+ya16be              41b7ad48693e3ce8b4d3220016ef6b15
+ya16le              8ea70315667011a6ed50b6750f42b142
 ya8                 d4b7a62f80681fa44c977ff3a64f4ce4
 yuv410p             4c0143429edd30aa01493447c90132ea
 yuv420p             2fa5b2201c75034206cc20e2c6134aed
@@ -106,6 +108,8 @@ yuva420p9le         6e5cb3e761a9c45e26370307c49f8831
 yuva444p            4f9e649fbc2c0c91178d1576e462bb31
 yuva444p10be        9450fbac30b5f9da7414c895695591a9
 yuva444p10le        84a93637bf2c7e498380beff9b1fc503
+yuva444p12be        3eae4234e38ce068600dd7fdb39e04d2
+yuva444p12le        c4a4a3601a7fb9ef02770384e155f3d7
 yuva444p16be        9fd2f00ea9bef8e488228bc0b47b28cb
 yuva444p16le        ae9fd8d1baea0f8626b963816d667d2d
 yuva444p9be         4ce11ae57780f74c78cdd5c06be4bded
diff --git a/tests/ref/fate/filter-pixfmts-vflip b/tests/ref/fate/filter-pixfmts-vflip
index 51628f14ce76e..933ea0c8155af 100644
--- a/tests/ref/fate/filter-pixfmts-vflip
+++ b/tests/ref/fate/filter-pixfmts-vflip
@@ -76,6 +76,8 @@ rgba64le            48f45b10503b7dd140329c3dd0d54c98
 uyvy422             3a237e8376264e0cfa78f8a3fdadec8a
 xyz12be             810644e008deb231850d779aaa27cc7e
 xyz12le             829701db461b43533cf9241e0743bc61
+ya16be              01fa2780505ce1bd187ae7f9dcc5fcc3
+ya16le              492f528782acf22769b0b633187be212
 ya8                 4299c6ca3b470a7d8a420e26eb485b1d
 yuv410p             c7adfe96c8e043a6cb9290c39bf8063c
 yuv411p             3fce29db403a25f81be39e01aaf6ff3a
@@ -127,6 +129,8 @@ yuva420p9le         aec21fa8f1088b7898cd80a30f382224
 yuva422p            39707b0dfdaadeefa20819080365db15
 yuva422p10be        53fbfe6d7eb01e2007003383c5d91850
 yuva422p10le        df1f95630ccd7bf05b95b6b3061cbeef
+yuva422p12be        6b13c0f628b4369c2ecabb3aaf02dbc6
+yuva422p12le        5413c3f022d30fbdd9f1d4e35468c8c4
 yuva422p16be        35ad91fa92b04e13d6b557d2f250ade1
 yuva422p16le        8fb93970118fde962f5dbcd156966722
 yuva422p9be         2b16b2dc102ad688a3023f30e3c6f9d9
@@ -134,6 +138,8 @@ yuva422p9le         6a23d290358691a9d8bab49582265764
 yuva444p            442a690385166bed3e785d9262c1c501
 yuva444p10be        bb6d52902c30f5cc63ddc3fbe3346bf5
 yuva444p10le        6e43f7c44e070fce492dcb1b038de85e
+yuva444p12be        437e75242255bcaeb853c35284f3b58c
+yuva444p12le        5864b489dff8cd98f46ca86b32f357e2
 yuva444p16be        b8801dccf64b3eadc2a5b5db67ae0b0f
 yuva444p16le        8e72ae66754badf5d1eeb094e6bf0ddc
 yuva444p9be         bcd845394351ca6d15e947342802957d
diff --git a/tests/ref/fate/gifenc-bgr4_byte b/tests/ref/fate/gifenc-bgr4_byte
index 43d72e245a1ad..2cf3b7f93cd61 100644
--- a/tests/ref/fate/gifenc-bgr4_byte
+++ b/tests/ref/fate/gifenc-bgr4_byte
@@ -3,176 +3,176 @@
 #codec_id 0: gif
 #dimensions 0: 217x217
 #sar 0: 0/1
-0,          0,          0,        1,      508, 0xa1b80fc0, S=1,        1, 0x00010001
-0,          1,          1,        1,      213, 0x4f554bd7, S=2,        1, 0x00010001,     1024, 0xb6327c81
-0,          2,          2,        1,      131, 0x283b2988, S=2,        1, 0x00010001,     1024, 0xae3a7c81
-0,          3,          3,        1,      384, 0xc4fea72a, S=2,        1, 0x00010001,     1024, 0xb6327c81
-0,          4,          4,        1,      381, 0x050ba2b8, S=2,        1, 0x00010001,     1024, 0x9e4a7c81
-0,          5,          5,        1,      430, 0x00cfb2ae, S=2,        1, 0x00010001,     1024, 0x9e4a7c81
-0,          6,          6,        1,      518, 0xc8e5d827, S=2,        1, 0x00010001,     1024, 0x9e4a7c81
-0,          7,          7,        1,      535, 0x326ce62a, S=2,        1, 0x00010001,     1024, 0x9e4a7c81
-0,          8,          8,        1,      438, 0x34d6b7c0, S=2,        1, 0x00010001,     1024, 0xb6327c81
-0,          9,          9,        1,      923, 0x9fb1a37c, S=2,        1, 0x00010001,     1024, 0xb6327c81
-0,         10,         10,        1,      694, 0xf20449a5, S=2,        1, 0x00010001,     1024, 0xb6327c81
-0,         11,         11,        1,     1194, 0x67cd2ab5, S=2,        1, 0x00010001,     1024, 0xb6327c81
-0,         12,         12,        1,     1291, 0x1d23539d, S=2,        1, 0x00010001,     1024, 0xb6327c81
-0,         13,         13,        1,     1245, 0x065f32e6, S=2,        1, 0x00010001,     1024, 0xb6327c81
-0,         14,         14,        1,     1330, 0x83ec51a4, S=2,        1, 0x00010001,     1024, 0xb6327c81
-0,         15,         15,        1,     1276, 0x2acf38dc, S=2,        1, 0x00010001,     1024, 0xb6327c81
-0,         16,         16,        1,     1475, 0x4cd197ef, S=2,        1, 0x00010001,     1024, 0xb6327c81
-0,         17,         17,        1,     1784, 0xd1e84ae6, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         18,         18,        1,     1675, 0x092dfa86, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         19,         19,        1,     1509, 0x639aaa00, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         20,         20,        1,     1705, 0xfd3719d5, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         21,         21,        1,     1745, 0x8a761db4, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         22,         22,        1,     1642, 0x18830245, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         23,         23,        1,     1718, 0x3c8d1ebe, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         24,         24,        1,     1900, 0x2ea879d1, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         25,         25,        1,     1807, 0x02b35230, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         26,         26,        1,     1915, 0x22d48344, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         27,         27,        1,     2100, 0x55fcd063, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         28,         28,        1,     2700, 0x7cc5f08b, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         29,         29,        1,     2673, 0xb997a80d, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         30,         30,        1,     2895, 0xab69484d, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         31,         31,        1,     3257, 0xf753cf24, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         32,         32,        1,     3179, 0x34f2c13b, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         33,         33,        1,     3296, 0x7c06e72f, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         34,         34,        1,     3600, 0x4ca67634, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         35,         35,        1,     3699, 0xabe89fe3, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         36,         36,        1,     3814, 0x1869d3f4, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         37,         37,        1,     3627, 0x19bd7da7, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         38,         38,        1,     2950, 0x048a6055, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         39,         39,        1,     3086, 0x64ec8fc2, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         40,         40,        1,     3094, 0x1a388553, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         41,         41,        1,     3456, 0x01432c82, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         42,         42,        1,     4108, 0xf9505c66, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         43,         43,        1,     4217, 0x7f985ba4, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         44,         44,        1,     3613, 0xd0684d83, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         45,         45,        1,     3910, 0x0070e692, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         46,         46,        1,     4461, 0x5cc9e33d, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         47,         47,        1,     4593, 0x33a32dd1, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         48,         48,        1,     4822, 0x59549883, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         49,         49,        1,     5398, 0xb7bac31e, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         50,         50,        1,     5266, 0x21c695aa, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         51,         51,        1,     5416, 0xf305e3ed, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         52,         52,        1,     5519, 0x857d071f, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         53,         53,        1,     5701, 0x8f885c9c, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         54,         54,        1,     6160, 0x48523e83, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         55,         55,        1,     6233, 0x8fd2511e, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         56,         56,        1,     5911, 0x92d4c516, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         57,         57,        1,     5997, 0xbd7cfa15, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         58,         58,        1,     5946, 0x8f5fedff, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         59,         59,        1,     6468, 0x45c0cb8c, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         60,         60,        1,     6737, 0x4e1e39ac, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         61,         61,        1,     6275, 0x1d5e8f4c, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         62,         62,        1,     6641, 0x844b3aad, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         63,         63,        1,     6378, 0x52568640, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         64,         64,        1,     6257, 0xfabc585f, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         65,         65,        1,     6908, 0xf261701c, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         66,         66,        1,     7230, 0xb4f524ce, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         67,         67,        1,     7556, 0x89c1a712, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         68,         68,        1,     7413, 0x553970a4, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         69,         69,        1,     7476, 0x24d2a761, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         70,         70,        1,     7596, 0xf072e431, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         71,         71,        1,     7756, 0x131205c0, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         72,         72,        1,     8015, 0xf4536a7f, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         73,         73,        1,     8128, 0xba80be2b, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         74,         74,        1,     8101, 0x44ceb3a2, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         75,         75,        1,     7863, 0x55043dfd, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         76,         76,        1,     7960, 0x38399182, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         77,         77,        1,     8238, 0x1d52ecf3, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         78,         78,        1,     8321, 0xd8d24a5c, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         79,         79,        1,     8562, 0x4a0cc02b, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         80,         80,        1,     8746, 0x2db40da7, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         81,         81,        1,     8578, 0x46f9a4c1, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         82,         82,        1,     8878, 0xf58d5a19, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         83,         83,        1,     9077, 0x78de57f6, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         84,         84,        1,     9310, 0x8c10f77a, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         85,         85,        1,     9394, 0x741f431e, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         86,         86,        1,     9161, 0x6f499587, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         87,         87,        1,     9462, 0x628936c3, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         88,         88,        1,     9650, 0x4cb4936e, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         89,         89,        1,     9701, 0x5e069c40, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         90,         90,        1,     9523, 0x66a13c83, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         91,         91,        1,     9891, 0x43ea0e93, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         92,         92,        1,    10005, 0x96a849e7, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         93,         93,        1,    10038, 0x68032d25, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         94,         94,        1,    10086, 0xef59458d, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         95,         95,        1,    10438, 0x3466fed0, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         96,         96,        1,    10583, 0x8bdd5477, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         97,         97,        1,    10581, 0x69d27fee, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         98,         98,        1,    10807, 0xde62d6e3, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,         99,         99,        1,    11111, 0x34eb4c13, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        100,        100,        1,    11194, 0x584f6b73, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        101,        101,        1,    11240, 0xc90ba13f, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        102,        102,        1,    11483, 0x59c4f3c5, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        103,        103,        1,    11680, 0xc62c5bc1, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        104,        104,        1,    11785, 0xc9bab793, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        105,        105,        1,    11436, 0xc9c40809, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        106,        106,        1,    11928, 0x4b77c9a7, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        107,        107,        1,    11932, 0x722abcbe, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        108,        108,        1,    12281, 0x0d136f53, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        109,        109,        1,    12334, 0x04a47f78, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        110,        110,        1,    12452, 0xa02db188, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        111,        111,        1,    12695, 0x1a813b2e, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        112,        112,        1,    12668, 0x81b24f79, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        113,        113,        1,    12957, 0x4da59f8c, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        114,        114,        1,    13054, 0x7abedf5a, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        115,        115,        1,    13147, 0x138f2bbd, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        116,        116,        1,    13171, 0x43c1195f, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        117,        117,        1,    13198, 0x2c8d58d4, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        118,        118,        1,    13211, 0x12c36193, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        119,        119,        1,    13210, 0xfe496107, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        120,        120,        1,    13467, 0x4d8ea128, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        121,        121,        1,    13665, 0x94caddde, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        122,        122,        1,    13692, 0xe38febd9, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        123,        123,        1,    13821, 0xee592e62, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        124,        124,        1,    13946, 0xceb09235, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        125,        125,        1,    14063, 0x7361d2f5, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        126,        126,        1,    14124, 0x226bcac1, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        127,        127,        1,    14331, 0x0649512b, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        128,        128,        1,    14469, 0x0d7da45b, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        129,        129,        1,    14536, 0x73cca242, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        130,        130,        1,    14608, 0x1f3dd14e, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        131,        131,        1,    14898, 0xd13d258e, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        132,        132,        1,    14978, 0xfa049fea, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        133,        133,        1,    15142, 0x1dfad60c, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        134,        134,        1,    15129, 0x5962bae7, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        135,        135,        1,    15243, 0x2c2c113b, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        136,        136,        1,    15337, 0x3cab623b, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        137,        137,        1,    15638, 0xbff3a100, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        138,        138,        1,    15912, 0x13bf1fb2, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        139,        139,        1,    16041, 0x01134246, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        140,        140,        1,    16228, 0xe2f80035, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        141,        141,        1,    16262, 0xc8d3ea51, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        142,        142,        1,    16371, 0xe7da07f2, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        143,        143,        1,    16661, 0x10ada592, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        144,        144,        1,    16917, 0xbfb717e5, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        145,        145,        1,    17149, 0x4074ca41, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        146,        146,        1,    17172, 0xf749b49f, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        147,        147,        1,    17315, 0x2abea8a0, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        148,        148,        1,    17397, 0x14f71122, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        149,        149,        1,    17431, 0xce49f2d3, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        150,        150,        1,    17576, 0x7c6552ad, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        151,        151,        1,    17764, 0x1d198d60, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        152,        152,        1,    17826, 0xe1727f57, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        153,        153,        1,    17918, 0xb78d9b9f, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        154,        154,        1,    17823, 0xc9fabf19, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        155,        155,        1,    18142, 0xeb5b21a9, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        156,        156,        1,    18257, 0x7b38822c, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        157,        157,        1,    18337, 0xd395c279, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        158,        158,        1,    18293, 0x6c3b3766, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        159,        159,        1,    18418, 0x2abcbcf8, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        160,        160,        1,    18607, 0x79424730, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        161,        161,        1,    18916, 0x8707bbc6, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        162,        162,        1,    19073, 0xd82c03f6, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        163,        163,        1,    19168, 0xb7d6fe27, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        164,        164,        1,    19210, 0x79f301eb, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        165,        165,        1,    19398, 0x0a5663c6, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        166,        166,        1,    19480, 0x4fe09e5b, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        167,        167,        1,    19659, 0xab971088, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        168,        168,        1,    19672, 0x2e331553, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        169,        169,        1,    19936, 0x2eea628a, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        170,        170,        1,    19975, 0xd6bb9ab2, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        171,        171,        1,    20021, 0xf7e98dc5, S=2,        1, 0x00010001,     1024, 0xde0a7c81
-0,        172,        172,        1,    20060, 0x20017807, S=2,        1, 0x00010001,     1024, 0xde0a7c81
+0,          0,          0,        1,     1297, 0x53e8b1c1
+0,          1,          1,        1,      221, 0x52d24d05, F=0x0
+0,          2,          2,        1,      139, 0xc9e32ab4, F=0x0
+0,          3,          3,        1,      392, 0x9244a858, F=0x0
+0,          4,          4,        1,      389, 0xc5bea3e0, F=0x0
+0,          5,          5,        1,      438, 0xfa2ab3d6, F=0x0
+0,          6,          6,        1,      526, 0x281ed94f, F=0x0
+0,          7,          7,        1,      543, 0xa53ee752, F=0x0
+0,          8,          8,        1,      446, 0x41d0b8ee, F=0x0
+0,          9,          9,        1,      931, 0xe8efa4aa, F=0x0
+0,         10,         10,        1,      702, 0x2d1c4ad3, F=0x0
+0,         11,         11,        1,     1202, 0xf0cc2be3, F=0x0
+0,         12,         12,        1,     1299, 0x189f54cb, F=0x0
+0,         13,         13,        1,     1253, 0xcb883414, F=0x0
+0,         14,         14,        1,     1338, 0xad6a52d2, F=0x0
+0,         15,         15,        1,     1284, 0x14993a0a, F=0x0
+0,         16,         16,        1,     1483, 0x216c991d, F=0x0
+0,         17,         17,        1,     1792, 0x58eb4c1e, F=0x0
+0,         18,         18,        1,     1683, 0x0b49fbbe, F=0x0
+0,         19,         19,        1,     1517, 0x9b57ab38, F=0x0
+0,         20,         20,        1,     1713, 0x23f21b0d, F=0x0
+0,         21,         21,        1,     1753, 0xe1e21eec, F=0x0
+0,         22,         22,        1,     1650, 0xf258037d, F=0x0
+0,         23,         23,        1,     1726, 0x73111ff6, F=0x0
+0,         24,         24,        1,     1908, 0x430b7b09, F=0x0
+0,         25,         25,        1,     1815, 0xa5af5368, F=0x0
+0,         26,         26,        1,     1923, 0x497f847c, F=0x0
+0,         27,         27,        1,     2108, 0x5e2ed19b, F=0x0
+0,         28,         28,        1,     2708, 0x6064f1c3, F=0x0
+0,         29,         29,        1,     2681, 0x7c4ea945, F=0x0
+0,         30,         30,        1,     2903, 0x7cbf4985, F=0x0
+0,         31,         31,        1,     3265, 0x81f7d05c, F=0x0
+0,         32,         32,        1,     3187, 0x6077c273, F=0x0
+0,         33,         33,        1,     3304, 0x3632e867, F=0x0
+0,         34,         34,        1,     3608, 0x7961776c, F=0x0
+0,         35,         35,        1,     3707, 0x515aa11b, F=0x0
+0,         36,         36,        1,     3822, 0x4a03d52c, F=0x0
+0,         37,         37,        1,     3635, 0x67607edf, F=0x0
+0,         38,         38,        1,     2958, 0x18e8618d, F=0x0
+0,         39,         39,        1,     3094, 0x1f1990fa, F=0x0
+0,         40,         40,        1,     3102, 0xde16868b, F=0x0
+0,         41,         41,        1,     3464, 0x7e6f2dba, F=0x0
+0,         42,         42,        1,     4116, 0x91585d9e, F=0x0
+0,         43,         43,        1,     4225, 0x9c785cdc, F=0x0
+0,         44,         44,        1,     3621, 0x0d0a4ebb, F=0x0
+0,         45,         45,        1,     3918, 0xa70ae7ca, F=0x0
+0,         46,         46,        1,     4469, 0xa318e475, F=0x0
+0,         47,         47,        1,     4601, 0x1ae12f09, F=0x0
+0,         48,         48,        1,     4830, 0x57b999bb, F=0x0
+0,         49,         49,        1,     5406, 0x744cc456, F=0x0
+0,         50,         50,        1,     5274, 0x3d6996e2, F=0x0
+0,         51,         51,        1,     5424, 0xc587e525, F=0x0
+0,         52,         52,        1,     5527, 0xd5870857, F=0x0
+0,         53,         53,        1,     5709, 0xbd715dd4, F=0x0
+0,         54,         54,        1,     6168, 0xa5c13fbb, F=0x0
+0,         55,         55,        1,     6241, 0x46485256, F=0x0
+0,         56,         56,        1,     5919, 0xc0bcc64e, F=0x0
+0,         57,         57,        1,     6005, 0x5443fb4d, F=0x0
+0,         58,         58,        1,     5954, 0xe7efef37, F=0x0
+0,         59,         59,        1,     6476, 0x1aadccc4, F=0x0
+0,         60,         60,        1,     6745, 0x6af23ae4, F=0x0
+0,         61,         61,        1,     6283, 0x07049084, F=0x0
+0,         62,         62,        1,     6649, 0x2c1f3be5, F=0x0
+0,         63,         63,        1,     6386, 0xb9848778, F=0x0
+0,         64,         64,        1,     6265, 0xce725997, F=0x0
+0,         65,         65,        1,     6916, 0xdfac7154, F=0x0
+0,         66,         66,        1,     7238, 0x2ace2606, F=0x0
+0,         67,         67,        1,     7564, 0x8cf9a84a, F=0x0
+0,         68,         68,        1,     7421, 0xaa1a71dc, F=0x0
+0,         69,         69,        1,     7484, 0xc67ba899, F=0x0
+0,         70,         70,        1,     7604, 0x2479e569, F=0x0
+0,         71,         71,        1,     7764, 0x0a1906f8, F=0x0
+0,         72,         72,        1,     8023, 0x27206bb7, F=0x0
+0,         73,         73,        1,     8136, 0x7705bf63, F=0x0
+0,         74,         74,        1,     8109, 0xe05cb4da, F=0x0
+0,         75,         75,        1,     7871, 0xce733f35, F=0x0
+0,         76,         76,        1,     7968, 0x27ef92ba, F=0x0
+0,         77,         77,        1,     8246, 0x5fe7ee2b, F=0x0
+0,         78,         78,        1,     8329, 0x809e4b94, F=0x0
+0,         79,         79,        1,     8570, 0x179fc163, F=0x0
+0,         80,         80,        1,     8754, 0xdb870edf, F=0x0
+0,         81,         81,        1,     8586, 0x280ca5f9, F=0x0
+0,         82,         82,        1,     8886, 0x445e5b51, F=0x0
+0,         83,         83,        1,     9085, 0xba37592e, F=0x0
+0,         84,         84,        1,     9318, 0xe970f8b2, F=0x0
+0,         85,         85,        1,     9402, 0x37ee4456, F=0x0
+0,         86,         86,        1,     9169, 0x171196bf, F=0x0
+0,         87,         87,        1,     9470, 0x793837fb, F=0x0
+0,         88,         88,        1,     9658, 0x489294a6, F=0x0
+0,         89,         89,        1,     9709, 0x980c9d78, F=0x0
+0,         90,         90,        1,     9531, 0xc7a83dbb, F=0x0
+0,         91,         91,        1,     9899, 0x658f0fcb, F=0x0
+0,         92,         92,        1,    10013, 0x434c4b1f, F=0x0
+0,         93,         93,        1,    10046, 0x3cdf2e5d, F=0x0
+0,         94,         94,        1,    10094, 0xfeb546c5, F=0x0
+0,         95,         95,        1,    10446, 0xf0d10017, F=0x0
+0,         96,         96,        1,    10591, 0xf90f55af, F=0x0
+0,         97,         97,        1,    10589, 0xd4948126, F=0x0
+0,         98,         98,        1,    10815, 0x5cb2d81b, F=0x0
+0,         99,         99,        1,    11119, 0x25ca4d4b, F=0x0
+0,        100,        100,        1,    11202, 0xae566cab, F=0x0
+0,        101,        101,        1,    11248, 0x5731a277, F=0x0
+0,        102,        102,        1,    11491, 0x1021f4fd, F=0x0
+0,        103,        103,        1,    11688, 0x6cb05cf9, F=0x0
+0,        104,        104,        1,    11793, 0xf036b8cb, F=0x0
+0,        105,        105,        1,    11444, 0x46d90941, F=0x0
+0,        106,        106,        1,    11936, 0x204acadf, F=0x0
+0,        107,        107,        1,    11940, 0x4bddbdf6, F=0x0
+0,        108,        108,        1,    12289, 0x902d708b, F=0x0
+0,        109,        109,        1,    12342, 0xc85680b0, F=0x0
+0,        110,        110,        1,    12460, 0xf3beb2c0, F=0x0
+0,        111,        111,        1,    12703, 0x96493c66, F=0x0
+0,        112,        112,        1,    12676, 0xdc9250b1, F=0x0
+0,        113,        113,        1,    12965, 0x08dba0c4, F=0x0
+0,        114,        114,        1,    13062, 0xac2ce092, F=0x0
+0,        115,        115,        1,    13155, 0xb6552cf5, F=0x0
+0,        116,        116,        1,    13179, 0x03d61a97, F=0x0
+0,        117,        117,        1,    13206, 0x0d8a5a0c, F=0x0
+0,        118,        118,        1,    13219, 0x039862cb, F=0x0
+0,        119,        119,        1,    13218, 0xede6623f, F=0x0
+0,        120,        120,        1,    13475, 0x7672a260, F=0x0
+0,        121,        121,        1,    13673, 0xaf0ddf16, F=0x0
+0,        122,        122,        1,    13700, 0x1ec9ed11, F=0x0
+0,        123,        123,        1,    13829, 0xc6cb2f9a, F=0x0
+0,        124,        124,        1,    13954, 0x3f89936d, F=0x0
+0,        125,        125,        1,    14071, 0x72d2d42d, F=0x0
+0,        126,        126,        1,    14132, 0x6c34cbf9, F=0x0
+0,        127,        127,        1,    14339, 0x4c695263, F=0x0
+0,        128,        128,        1,    14477, 0xfbcda593, F=0x0
+0,        129,        129,        1,    14544, 0xb3d3a37a, F=0x0
+0,        130,        130,        1,    14616, 0xb704d286, F=0x0
+0,        131,        131,        1,    14906, 0xca9226c6, F=0x0
+0,        132,        132,        1,    14986, 0x54e8a122, F=0x0
+0,        133,        133,        1,    15150, 0x40bed744, F=0x0
+0,        134,        134,        1,    15137, 0x6c4ebc1f, F=0x0
+0,        135,        135,        1,    15251, 0xca081273, F=0x0
+0,        136,        136,        1,    15345, 0x4d266373, F=0x0
+0,        137,        137,        1,    15646, 0x3f64a238, F=0x0
+0,        138,        138,        1,    15920, 0xe12020ea, F=0x0
+0,        139,        139,        1,    16049, 0x6bbb437e, F=0x0
+0,        140,        140,        1,    16236, 0x31a6016d, F=0x0
+0,        141,        141,        1,    16270, 0x40f1eb89, F=0x0
+0,        142,        142,        1,    16379, 0xe4d0092a, F=0x0
+0,        143,        143,        1,    16669, 0x6f22a6ca, F=0x0
+0,        144,        144,        1,    16925, 0x564a191d, F=0x0
+0,        145,        145,        1,    17157, 0xf1c7cb79, F=0x0
+0,        146,        146,        1,    17180, 0xc4b3b5d7, F=0x0
+0,        147,        147,        1,    17323, 0xa670a9d8, F=0x0
+0,        148,        148,        1,    17405, 0xf499125a, F=0x0
+0,        149,        149,        1,    17439, 0xd76af40b, F=0x0
+0,        150,        150,        1,    17584, 0x364d53e5, F=0x0
+0,        151,        151,        1,    17772, 0xbc218e98, F=0x0
+0,        152,        152,        1,    17834, 0xcc19808f, F=0x0
+0,        153,        153,        1,    17926, 0x12639cd7, F=0x0
+0,        154,        154,        1,    17831, 0xb0f9c051, F=0x0
+0,        155,        155,        1,    18150, 0x574022e1, F=0x0
+0,        156,        156,        1,    18265, 0x73458364, F=0x0
+0,        157,        157,        1,    18345, 0x2d31c3b1, F=0x0
+0,        158,        158,        1,    18301, 0x9028389e, F=0x0
+0,        159,        159,        1,    18426, 0xe701be30, F=0x0
+0,        160,        160,        1,    18615, 0x1bfd4868, F=0x0
+0,        161,        161,        1,    18924, 0xa269bcfe, F=0x0
+0,        162,        162,        1,    19081, 0xb2f5052e, F=0x0
+0,        163,        163,        1,    19176, 0x0676ff5f, F=0x0
+0,        164,        164,        1,    19218, 0xfbb40323, F=0x0
+0,        165,        165,        1,    19406, 0x714664fe, F=0x0
+0,        166,        166,        1,    19488, 0x1acf9f93, F=0x0
+0,        167,        167,        1,    19667, 0x50bd11c0, F=0x0
+0,        168,        168,        1,    19680, 0xe322168b, F=0x0
+0,        169,        169,        1,    19944, 0x25b763c2, F=0x0
+0,        170,        170,        1,    19983, 0xfd109bea, F=0x0
+0,        171,        171,        1,    20029, 0x565d8efd, F=0x0
+0,        172,        172,        1,    20068, 0xadee793f, F=0x0
diff --git a/tests/ref/fate/gifenc-bgr8 b/tests/ref/fate/gifenc-bgr8
index 2aede83c73126..58767a6b04543 100644
--- a/tests/ref/fate/gifenc-bgr8
+++ b/tests/ref/fate/gifenc-bgr8
@@ -3,176 +3,176 @@
 #codec_id 0: gif
 #dimensions 0: 217x217
 #sar 0: 0/1
-0,          0,          0,        1,      552, 0x271a2dd3, S=1,        1, 0x00010001
-0,          1,          1,        1,      297, 0x90168a95, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,          2,          2,        1,      438, 0x91efce1b, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,          3,          3,        1,      450, 0x7c2dcfad, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,          4,          4,        1,      547, 0xc131fd3b, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,          5,          5,        1,      614, 0x68182006, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,          6,          6,        1,      642, 0x78bb1f5f, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,          7,          7,        1,      660, 0x35c033a2, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,          8,          8,        1,      821, 0xaf30790b, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,          9,          9,        1,     1157, 0x741c2da1, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         10,         10,        1,      179, 0x3a27517c, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         11,         11,        1,     1333, 0x5ee76f3c, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         12,         12,        1,     1638, 0x5f640e86, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         13,         13,        1,     1531, 0xccb8e437, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         14,         14,        1,     1720, 0xc95d45ec, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         15,         15,        1,     1910, 0x56cc831e, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         16,         16,        1,     2124, 0x9cc8e130, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         17,         17,        1,     2248, 0x05a325b1, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         18,         18,        1,     2311, 0xdc633703, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         19,         19,        1,     2408, 0x91c26f3e, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         20,         20,        1,     2601, 0x8cf3c157, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         21,         21,        1,     2687, 0x8f6400e6, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         22,         22,        1,     2784, 0xaa880e55, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         23,         23,        1,     2884, 0x46f546f6, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         24,         24,        1,     2982, 0x807c7ad5, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         25,         25,        1,     3101, 0xbcc89bec, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         26,         26,        1,     3253, 0xd032f3fa, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         27,         27,        1,     3329, 0xe4d42430, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         28,         28,        1,     3572, 0xf8058aa0, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         29,         29,        1,     3807, 0x3d2af9f3, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         30,         30,        1,     2750, 0x814d1c33, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         31,         31,        1,     4031, 0x3b077006, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         32,         32,        1,     3025, 0x86729c1c, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         33,         33,        1,     4295, 0xf71b0b38, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         34,         34,        1,     2044, 0x5adcb93b, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         35,         35,        1,     3212, 0xcf79eeed, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         36,         36,        1,     2292, 0xb4386334, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         37,         37,        1,     3633, 0x0010992f, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         38,         38,        1,     3552, 0x23697490, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         39,         39,        1,     3690, 0x62afdbb8, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         40,         40,        1,     1559, 0x5baef54a, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         41,         41,        1,      954, 0xca75ca79, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         42,         42,        1,      273, 0x3687799b, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         43,         43,        1,      930, 0x29f3b0c4, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         44,         44,        1,      271, 0x305e8094, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         45,         45,        1,      196, 0xf5ab51ee, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         46,         46,        1,     4299, 0x67ec0d55, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         47,         47,        1,     4895, 0xb394406c, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         48,         48,        1,     4928, 0x233919d7, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         49,         49,        1,     4941, 0x58a357da, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         50,         50,        1,     4154, 0x21f2ac33, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         51,         51,        1,     4678, 0xab3cc050, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         52,         52,        1,     4741, 0x1974b581, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         53,         53,        1,     4982, 0x891456d5, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         54,         54,        1,     5179, 0x860fc6a1, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         55,         55,        1,     5046, 0xce9183d3, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         56,         56,        1,     5140, 0xa6d7b9af, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         57,         57,        1,     4301, 0x03b6ef3f, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         58,         58,        1,     5079, 0xa8d59e01, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         59,         59,        1,     5284, 0xea34e3b3, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         60,         60,        1,     5426, 0x556a15cd, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         61,         61,        1,     4645, 0x061e8936, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         62,         62,        1,     5263, 0x7536cf7d, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         63,         63,        1,     5221, 0x9fbac3ca, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         64,         64,        1,     5217, 0x02269bd2, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         65,         65,        1,     5395, 0x120fff66, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         66,         66,        1,     5220, 0x77cedcc5, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         67,         67,        1,     5704, 0xba42dd96, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         68,         68,        1,     5636, 0xcb91a25b, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         69,         69,        1,     5818, 0x8dc0df92, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         70,         70,        1,     5763, 0x51d5d5f0, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         71,         71,        1,     6116, 0x09558b48, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         72,         72,        1,     6069, 0x41926817, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         73,         73,        1,     5796, 0x7fbeda44, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         74,         74,        1,     5999, 0xe07d3770, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         75,         75,        1,     6220, 0x6607b06f, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         76,         76,        1,     6374, 0x7628e533, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         77,         77,        1,     6465, 0xfe956b15, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         78,         78,        1,     7019, 0x6c9a1aef, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         79,         79,        1,     7255, 0x5fa5c1bf, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         80,         80,        1,     8197, 0xf11d6ef2, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         81,         81,        1,     8358, 0x027279e8, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         82,         82,        1,     7708, 0x607f8e8b, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         83,         83,        1,     7412, 0x6bb2105f, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         84,         84,        1,     7541, 0xfdc02154, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         85,         85,        1,     7948, 0x916ecd8b, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         86,         86,        1,     8408, 0x1f97d414, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         87,         87,        1,     8056, 0x9cbf159c, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         88,         88,        1,     7401, 0x2625addb, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         89,         89,        1,     7494, 0x2877eacb, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         90,         90,        1,     7806, 0xe32574a3, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         91,         91,        1,     7768, 0x25ed7ee7, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         92,         92,        1,     7749, 0x6d8e978e, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         93,         93,        1,     8047, 0xec4b150c, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         94,         94,        1,     7618, 0x88cf30d5, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         95,         95,        1,     7979, 0x0eb1cf2a, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         96,         96,        1,    12062, 0xb49d9125, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         97,         97,        1,    12317, 0x2d8fd6e9, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         98,         98,        1,    12217, 0x9b3be549, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,         99,         99,        1,    11227, 0x067e9118, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        100,        100,        1,    11108, 0x5e5b0afd, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        101,        101,        1,    11366, 0xb38e8d15, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        102,        102,        1,    11896, 0xeb3e35ca, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        103,        103,        1,    11479, 0xbf7581e9, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        104,        104,        1,    13395, 0x415b38d8, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        105,        105,        1,    12913, 0x61544631, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        106,        106,        1,    13864, 0xd39fe768, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        107,        107,        1,    13551, 0x76c167d1, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        108,        108,        1,    14041, 0x2f206888, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        109,        109,        1,    14144, 0x9ec030d3, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        110,        110,        1,    14277, 0xa84b3a9b, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        111,        111,        1,    14424, 0xf5f1e06e, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        112,        112,        1,    14689, 0xbca0adb5, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        113,        113,        1,    14598, 0xc1d45745, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        114,        114,        1,    15213, 0x8f3080fc, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        115,        115,        1,    15425, 0xb0aa8f59, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        116,        116,        1,    15595, 0x1406e5d5, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        117,        117,        1,    15598, 0x48ec7d08, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        118,        118,        1,    15863, 0x5381db7b, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        119,        119,        1,    15717, 0xb87a1b87, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        120,        120,        1,    16078, 0x5bab2453, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        121,        121,        1,    16225, 0xa1f88113, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        122,        122,        1,    16135, 0x6af2f4e1, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        123,        123,        1,    16661, 0xf02a3343, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        124,        124,        1,    16619, 0xc71935a4, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        125,        125,        1,    16829, 0x29849844, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        126,        126,        1,    16944, 0x3423ae77, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        127,        127,        1,    17119, 0x609b4409, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        128,        128,        1,    17150, 0xf85dfd31, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        129,        129,        1,    17321, 0x38eccb10, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        130,        130,        1,    17395, 0x0ba08b85, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        131,        131,        1,    17666, 0x6fbc0264, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        132,        132,        1,    17730, 0x3dcc64a6, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        133,        133,        1,    17934, 0xb539974b, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        134,        134,        1,    17944, 0x2214ec94, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        135,        135,        1,    18238, 0x70f9ff1d, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        136,        136,        1,    18391, 0x4b149209, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        137,        137,        1,    18543, 0x45a1c02f, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        138,        138,        1,    18939, 0x2789a88c, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        139,        139,        1,    19145, 0x5daafd7a, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        140,        140,        1,    19120, 0x565f80e6, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        141,        141,        1,    19130, 0xff70cc21, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        142,        142,        1,    19494, 0xbfa284db, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        143,        143,        1,    19534, 0x3d40743b, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        144,        144,        1,    19747, 0x33c9b108, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        145,        145,        1,    20114, 0x9d223e36, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        146,        146,        1,    20257, 0xe7bdaf43, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        147,        147,        1,    20370, 0x0c5f1970, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        148,        148,        1,    20292, 0x6986d20e, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        149,        149,        1,    20491, 0xd88e4c08, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        150,        150,        1,    20647, 0x1aefaffc, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        151,        151,        1,    20666, 0x43e4aaaa, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        152,        152,        1,    21007, 0xa7ca3ef0, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        153,        153,        1,    21058, 0x06814351, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        154,        154,        1,    21153, 0x3c852b10, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        155,        155,        1,    21078, 0x8df15855, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        156,        156,        1,    21458, 0xd3a531d6, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        157,        157,        1,    21669, 0x88baca53, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        158,        158,        1,    21581, 0xd692fa1f, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        159,        159,        1,    21654, 0x30fb9061, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        160,        160,        1,    21987, 0xe7646d8b, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        161,        161,        1,    22205, 0x0fc55b6a, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        162,        162,        1,    22475, 0x4bc4c032, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        163,        163,        1,    22490, 0x58ca23f6, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        164,        164,        1,    22460, 0xf9ceb0ac, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        165,        165,        1,    22861, 0xb05f0f84, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        166,        166,        1,    22746, 0x0df23a5c, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        167,        167,        1,    23165, 0xbd7147ad, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        168,        168,        1,    23273, 0x9781a34f, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        169,        169,        1,    23211, 0x69c7606b, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        170,        170,        1,    23648, 0xdafde037, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        171,        171,        1,    23675, 0x2a2147ed, S=2,        1, 0x00010001,     1024, 0xf351799f
-0,        172,        172,        1,    23874, 0x12c184b6, S=2,        1, 0x00010001,     1024, 0xf351799f
+0,          0,          0,        1,     1341, 0xe4e2af18
+0,          1,          1,        1,      305, 0xefa98bbd, F=0x0
+0,          2,          2,        1,      446, 0x9499cf43, F=0x0
+0,          3,          3,        1,      458, 0x8cb7d0d5, F=0x0
+0,          4,          4,        1,      555, 0x41f2fe63, F=0x0
+0,          5,          5,        1,      622, 0x3651212e, F=0x0
+0,          6,          6,        1,      650, 0x67542087, F=0x0
+0,          7,          7,        1,      668, 0x392934ca, F=0x0
+0,          8,          8,        1,      829, 0x6cd07a33, F=0x0
+0,          9,          9,        1,     1165, 0xb64b2ec9, F=0x0
+0,         10,         10,        1,      187, 0x114a52a4, F=0x0
+0,         11,         11,        1,     1341, 0x6ca57064, F=0x0
+0,         12,         12,        1,     1646, 0xcdd90fae, F=0x0
+0,         13,         13,        1,     1539, 0xbf75e55f, F=0x0
+0,         14,         14,        1,     1728, 0x96b14714, F=0x0
+0,         15,         15,        1,     1918, 0xffd08446, F=0x0
+0,         16,         16,        1,     2132, 0x3d5ae258, F=0x0
+0,         17,         17,        1,     2256, 0x359526d9, F=0x0
+0,         18,         18,        1,     2319, 0x553c382b, F=0x0
+0,         19,         19,        1,     2416, 0x7ac37066, F=0x0
+0,         20,         20,        1,     2609, 0x552bc27f, F=0x0
+0,         21,         21,        1,     2695, 0xbb0c020e, F=0x0
+0,         22,         22,        1,     2792, 0x46670f7d, F=0x0
+0,         23,         23,        1,     2892, 0x5674481e, F=0x0
+0,         24,         24,        1,     2990, 0x015a7bfd, F=0x0
+0,         25,         25,        1,     3109, 0xc73e9d14, F=0x0
+0,         26,         26,        1,     3261, 0x8a77f522, F=0x0
+0,         27,         27,        1,     3337, 0xf6f92558, F=0x0
+0,         28,         28,        1,     3580, 0x23408bc8, F=0x0
+0,         29,         29,        1,     3815, 0x781dfb1b, F=0x0
+0,         30,         30,        1,     2758, 0xf5cd1d5b, F=0x0
+0,         31,         31,        1,     4039, 0x7909712e, F=0x0
+0,         32,         32,        1,     3033, 0x39089d44, F=0x0
+0,         33,         33,        1,     4303, 0x667b0c60, F=0x0
+0,         34,         34,        1,     2052, 0x9edfba63, F=0x0
+0,         35,         35,        1,     3220, 0x5a56f015, F=0x0
+0,         36,         36,        1,     2300, 0x1719645c, F=0x0
+0,         37,         37,        1,     3641, 0x71c49a57, F=0x0
+0,         38,         38,        1,     3560, 0x377575b8, F=0x0
+0,         39,         39,        1,     3698, 0x165adce0, F=0x0
+0,         40,         40,        1,     1567, 0x6ecbf672, F=0x0
+0,         41,         41,        1,      962, 0x21eccba1, F=0x0
+0,         42,         42,        1,      281, 0x7a5a7ac3, F=0x0
+0,         43,         43,        1,      938, 0x659bb1ec, F=0x0
+0,         44,         44,        1,      279, 0x71e181bc, F=0x0
+0,         45,         45,        1,      204, 0xe0765316, F=0x0
+0,         46,         46,        1,     4307, 0xdbdd0e7d, F=0x0
+0,         47,         47,        1,     4903, 0xd8d24194, F=0x0
+0,         48,         48,        1,     4936, 0x6e9f1aff, F=0x0
+0,         49,         49,        1,     4949, 0xb3115902, F=0x0
+0,         50,         50,        1,     4162, 0xee2cad5b, F=0x0
+0,         51,         51,        1,     4686, 0xd583c178, F=0x0
+0,         52,         52,        1,     4749, 0x8c93b6a9, F=0x0
+0,         53,         53,        1,     4990, 0x12f957fd, F=0x0
+0,         54,         54,        1,     5187, 0xf3bcc7c9, F=0x0
+0,         55,         55,        1,     5054, 0xa27684fb, F=0x0
+0,         56,         56,        1,     5148, 0xe76cbad7, F=0x0
+0,         57,         57,        1,     4309, 0x79f7f067, F=0x0
+0,         58,         58,        1,     5087, 0xa2e29f29, F=0x0
+0,         59,         59,        1,     5292, 0xd158e4db, F=0x0
+0,         60,         60,        1,     5434, 0xe0be16f5, F=0x0
+0,         61,         61,        1,     4653, 0x0a3d8a5e, F=0x0
+0,         62,         62,        1,     5271, 0x4412d0a5, F=0x0
+0,         63,         63,        1,     5229, 0x3e06c4f2, F=0x0
+0,         64,         64,        1,     5225, 0x9bc39cfa, F=0x0
+0,         65,         65,        1,     5403, 0x798b009d, F=0x0
+0,         66,         66,        1,     5228, 0x14f2dded, F=0x0
+0,         67,         67,        1,     5712, 0x8724debe, F=0x0
+0,         68,         68,        1,     5644, 0x49d3a383, F=0x0
+0,         69,         69,        1,     5826, 0xde72e0ba, F=0x0
+0,         70,         70,        1,     5771, 0x62efd718, F=0x0
+0,         71,         71,        1,     6124, 0xb2a68c70, F=0x0
+0,         72,         72,        1,     6077, 0xb48b693f, F=0x0
+0,         73,         73,        1,     5804, 0xb700db6c, F=0x0
+0,         74,         74,        1,     6007, 0x02953898, F=0x0
+0,         75,         75,        1,     6228, 0x87a7b197, F=0x0
+0,         76,         76,        1,     6382, 0x49e7e65b, F=0x0
+0,         77,         77,        1,     6473, 0x3b9b6c3d, F=0x0
+0,         78,         78,        1,     7027, 0x2a4e1c17, F=0x0
+0,         79,         79,        1,     7263, 0x2e48c2e7, F=0x0
+0,         80,         80,        1,     8205, 0x013b701a, F=0x0
+0,         81,         81,        1,     8366, 0xcca97b10, F=0x0
+0,         82,         82,        1,     7716, 0x3b088fb3, F=0x0
+0,         83,         83,        1,     7420, 0xefdd1187, F=0x0
+0,         84,         84,        1,     7549, 0x1731227c, F=0x0
+0,         85,         85,        1,     7956, 0x8186ceb3, F=0x0
+0,         86,         86,        1,     8416, 0x23add53c, F=0x0
+0,         87,         87,        1,     8064, 0x09c616c4, F=0x0
+0,         88,         88,        1,     7409, 0x9d98af03, F=0x0
+0,         89,         89,        1,     7502, 0x0b81ebf3, F=0x0
+0,         90,         90,        1,     7814, 0x2f0d75cb, F=0x0
+0,         91,         91,        1,     7776, 0x45d6800f, F=0x0
+0,         92,         92,        1,     7757, 0x777f98b6, F=0x0
+0,         93,         93,        1,     8055, 0x4eea1634, F=0x0
+0,         94,         94,        1,     7626, 0xfb3931fd, F=0x0
+0,         95,         95,        1,     7987, 0x22a1d052, F=0x0
+0,         96,         96,        1,    12070, 0x3aa2924d, F=0x0
+0,         97,         97,        1,    12325, 0xda6cd811, F=0x0
+0,         98,         98,        1,    12225, 0xd478e671, F=0x0
+0,         99,         99,        1,    11235, 0xc6c09240, F=0x0
+0,        100,        100,        1,    11116, 0x95050c25, F=0x0
+0,        101,        101,        1,    11374, 0x14a68e3d, F=0x0
+0,        102,        102,        1,    11904, 0xb14436f2, F=0x0
+0,        103,        103,        1,    11487, 0xa3358311, F=0x0
+0,        104,        104,        1,    13403, 0xccf33a00, F=0x0
+0,        105,        105,        1,    12921, 0xbf7e4759, F=0x0
+0,        106,        106,        1,    13872, 0x7dace890, F=0x0
+0,        107,        107,        1,    13559, 0xb6c868f9, F=0x0
+0,        108,        108,        1,    14049, 0xa5d569b0, F=0x0
+0,        109,        109,        1,    14152, 0x8c9c31fb, F=0x0
+0,        110,        110,        1,    14285, 0x2ffe3bc3, F=0x0
+0,        111,        111,        1,    14432, 0x27abe196, F=0x0
+0,        112,        112,        1,    14697, 0x20d1aedd, F=0x0
+0,        113,        113,        1,    14606, 0xbcbe586d, F=0x0
+0,        114,        114,        1,    15221, 0x515f8224, F=0x0
+0,        115,        115,        1,    15433, 0x68089081, F=0x0
+0,        116,        116,        1,    15603, 0x8ff4e6fd, F=0x0
+0,        117,        117,        1,    15606, 0xc8527e30, F=0x0
+0,        118,        118,        1,    15871, 0x056ddca3, F=0x0
+0,        119,        119,        1,    15725, 0xc1871caf, F=0x0
+0,        120,        120,        1,    16086, 0x063e257b, F=0x0
+0,        121,        121,        1,    16233, 0xf683823b, F=0x0
+0,        122,        122,        1,    16143, 0x576df609, F=0x0
+0,        123,        123,        1,    16669, 0x3d02346b, F=0x0
+0,        124,        124,        1,    16627, 0xe35236cc, F=0x0
+0,        125,        125,        1,    16837, 0x389c996c, F=0x0
+0,        126,        126,        1,    16952, 0xc833af9f, F=0x0
+0,        127,        127,        1,    17127, 0xbf124531, F=0x0
+0,        128,        128,        1,    17158, 0x7abbfe59, F=0x0
+0,        129,        129,        1,    17329, 0x8102cc38, F=0x0
+0,        130,        130,        1,    17403, 0xa9468cad, F=0x0
+0,        131,        131,        1,    17674, 0x46d8038c, F=0x0
+0,        132,        132,        1,    17738, 0x5ee865ce, F=0x0
+0,        133,        133,        1,    17942, 0xc2449873, F=0x0
+0,        134,        134,        1,    17952, 0x3aafedbc, F=0x0
+0,        135,        135,        1,    18246, 0xdd930054, F=0x0
+0,        136,        136,        1,    18399, 0x68a59331, F=0x0
+0,        137,        137,        1,    18551, 0x1301c157, F=0x0
+0,        138,        138,        1,    18947, 0xbed8a9b4, F=0x0
+0,        139,        139,        1,    19153, 0xe338fea2, F=0x0
+0,        140,        140,        1,    19128, 0xbf05820e, F=0x0
+0,        141,        141,        1,    19138, 0x73b5cd49, F=0x0
+0,        142,        142,        1,    19502, 0xd8d68603, F=0x0
+0,        143,        143,        1,    19542, 0x84b47563, F=0x0
+0,        144,        144,        1,    19755, 0x7194b230, F=0x0
+0,        145,        145,        1,    20122, 0x83633f5e, F=0x0
+0,        146,        146,        1,    20265, 0x7365b06b, F=0x0
+0,        147,        147,        1,    20378, 0x1aaf1a98, F=0x0
+0,        148,        148,        1,    20300, 0x1da6d336, F=0x0
+0,        149,        149,        1,    20499, 0x72d54d30, F=0x0
+0,        150,        150,        1,    20655, 0x6996b124, F=0x0
+0,        151,        151,        1,    20674, 0xa883abd2, F=0x0
+0,        152,        152,        1,    21015, 0x96cf4018, F=0x0
+0,        153,        153,        1,    21066, 0x307e4479, F=0x0
+0,        154,        154,        1,    21161, 0xd45a2c38, F=0x0
+0,        155,        155,        1,    21086, 0xcf0e597d, F=0x0
+0,        156,        156,        1,    21466, 0xcc4032fe, F=0x0
+0,        157,        157,        1,    21677, 0x755ccb7b, F=0x0
+0,        158,        158,        1,    21589, 0x5d74fb47, F=0x0
+0,        159,        159,        1,    21662, 0x0c459189, F=0x0
+0,        160,        160,        1,    21995, 0x43d46eb3, F=0x0
+0,        161,        161,        1,    22213, 0x68455c92, F=0x0
+0,        162,        162,        1,    22483, 0xdc83c15a, F=0x0
+0,        163,        163,        1,    22498, 0xfae1251e, F=0x0
+0,        164,        164,        1,    22468, 0x7944b1d4, F=0x0
+0,        165,        165,        1,    22869, 0xff8c10ac, F=0x0
+0,        166,        166,        1,    22754, 0xd8183b84, F=0x0
+0,        167,        167,        1,    23173, 0x6c3c48d5, F=0x0
+0,        168,        168,        1,    23281, 0xc32ca477, F=0x0
+0,        169,        169,        1,    23219, 0x4dc26193, F=0x0
+0,        170,        170,        1,    23656, 0xb85ee15f, F=0x0
+0,        171,        171,        1,    23683, 0x26ba4915, F=0x0
+0,        172,        172,        1,    23882, 0xf57285de, F=0x0
diff --git a/tests/ref/fate/gifenc-gray b/tests/ref/fate/gifenc-gray
index 7d999c70b2f56..324aff49bdf38 100644
--- a/tests/ref/fate/gifenc-gray
+++ b/tests/ref/fate/gifenc-gray
@@ -3,176 +3,176 @@
 #codec_id 0: gif
 #dimensions 0: 217x217
 #sar 0: 0/1
-0,          0,          0,        1,      579, 0x0d0e3ab8, S=1,        1, 0x00010001
-0,          1,          1,        1,      150, 0x178b3a8c, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,          2,          2,        1,      155, 0x941743f5, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,          3,          3,        1,      144, 0x68c73711, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,          4,          4,        1,      152, 0xaf9a3f2e, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,          5,          5,        1,      136, 0x68593d85, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,          6,          6,        1,      134, 0x0dcb373f, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,          7,          7,        1,      129, 0x3baf3279, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,          8,          8,        1,      123, 0x9c963148, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,          9,          9,        1,      123, 0x5c272d6b, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         10,         10,        1,      150, 0x5f8d41aa, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         11,         11,        1,      134, 0x6f582fee, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         12,         12,        1,      134, 0x85d53038, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         13,         13,        1,      123, 0x6d2a2cb2, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         14,         14,        1,      127, 0x1e78327b, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         15,         15,        1,      119, 0xbafc2c31, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         16,         16,        1,      138, 0x57553638, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         17,         17,        1,      140, 0xf7423adb, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         18,         18,        1,      122, 0x7e592f8b, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         19,         19,        1,      123, 0xaa7d313c, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         20,         20,        1,      140, 0x4fd63b34, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         21,         21,        1,      123, 0x67753163, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         22,         22,        1,      123, 0x02193147, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         23,         23,        1,      124, 0xa85131e9, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         24,         24,        1,      122, 0xef8731e2, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         25,         25,        1,      122, 0x06d432c9, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         26,         26,        1,      123, 0xcc8831cd, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         27,         27,        1,      118, 0xa1d33166, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         28,         28,        1,      159, 0xcc8c454c, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         29,         29,        1,      140, 0x8a0231ad, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         30,         30,        1,      163, 0xe78248d2, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         31,         31,        1,      142, 0x3b293489, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         32,         32,        1,      170, 0x5f504b12, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         33,         33,        1,      146, 0x38a53693, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         34,         34,        1,      132, 0xb18a3499, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         35,         35,        1,      113, 0x55182bda, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         36,         36,        1,      132, 0xaced3333, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         37,         37,        1,      120, 0x9ffe2e4f, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         38,         38,        1,      135, 0x6223351e, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         39,         39,        1,      123, 0x269b3058, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         40,         40,        1,      119, 0x17052def, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         41,         41,        1,      119, 0x36da2ee2, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         42,         42,        1,      120, 0x984e31be, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         43,         43,        1,      114, 0xfd382c9d, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         44,         44,        1,      125, 0x926a36c6, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         45,         45,        1,      117, 0xbceb3183, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         46,         46,        1,      116, 0xf4c72d82, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         47,         47,        1,      124, 0x0c19343c, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         48,         48,        1,      117, 0x1f032eb1, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         49,         49,        1,      135, 0x31a437e6, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         50,         50,        1,      131, 0x4c1735fe, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         51,         51,        1,      122, 0xb7603463, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         52,         52,        1,      122, 0x7f5e34e1, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         53,         53,        1,      124, 0x9562350f, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         54,         54,        1,      126, 0x18b33759, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         55,         55,        1,      117, 0x748f3243, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         56,         56,        1,      109, 0x72832fe7, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         57,         57,        1,      120, 0x748a2e38, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         58,         58,        1,      120, 0x61f82fb2, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         59,         59,        1,      122, 0x2a6b3282, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         60,         60,        1,      116, 0x8b542de6, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         61,         61,        1,      119, 0xf33c318e, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         62,         62,        1,      116, 0xff182f36, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         63,         63,        1,      119, 0xeb9e2fcc, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         64,         64,        1,      118, 0xe82d304e, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         65,         65,        1,      137, 0x98303d30, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         66,         66,        1,      149, 0x01123fff, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         67,         67,        1,      115, 0x4ca92f75, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         68,         68,        1,      131, 0xf4193bc0, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         69,         69,        1,      115, 0xda5e2f30, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         70,         70,        1,      100, 0x9ba32a58, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         71,         71,        1,      109, 0xa47e2c91, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         72,         72,        1,      120, 0x22452fd6, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         73,         73,        1,      116, 0xd3c52c26, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         74,         74,        1,      106, 0x95b42c9f, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         75,         75,        1,       96, 0xfdc12639, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         76,         76,        1,       99, 0x210f251b, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         77,         77,        1,      119, 0x173b341c, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         78,         78,        1,      119, 0x3bca2f29, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         79,         79,        1,      213, 0x9e905d4c, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         80,         80,        1,      209, 0xa0015e94, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         81,         81,        1,      120, 0x36762bd4, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         82,         82,        1,      119, 0x019b2edc, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         83,         83,        1,      124, 0x211d30e7, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         84,         84,        1,      125, 0x538732ff, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         85,         85,        1,      123, 0x2887308a, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         86,         86,        1,      119, 0x7ff930f4, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         87,         87,        1,      119, 0xa50c2e16, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         88,         88,        1,      107, 0x9ed02cea, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         89,         89,        1,      119, 0xc234332a, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         90,         90,        1,      115, 0x38353092, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         91,         91,        1,      162, 0x6cda4644, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         92,         92,        1,      124, 0x2f683081, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         93,         93,        1,      116, 0x72952d04, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         94,         94,        1,       84, 0x1a532301, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         95,         95,        1,      176, 0xfb3c5400, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         96,         96,        1,      137, 0x253132d1, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         97,         97,        1,      179, 0x2b38528b, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         98,         98,        1,      150, 0xbe413cbe, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,         99,         99,        1,      140, 0x9e93392a, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        100,        100,        1,      129, 0x577e331e, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        101,        101,        1,      146, 0x16ff3924, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        102,        102,        1,      133, 0x756a3163, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        103,        103,        1,      190, 0x3e865b77, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        104,        104,        1,      159, 0xdf393fc8, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        105,        105,        1,      188, 0x84be5168, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        106,        106,        1,      163, 0x4c0e41f0, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        107,        107,        1,      144, 0x5fda3792, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        108,        108,        1,      136, 0x028c3800, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        109,        109,        1,      150, 0x75d43a8d, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        110,        110,        1,      134, 0x81123999, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        111,        111,        1,      198, 0x0a875baa, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        112,        112,        1,      169, 0xfdd7458c, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        113,        113,        1,      210, 0x9b195be4, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        114,        114,        1,      174, 0x0a424a76, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        115,        115,        1,      137, 0xb1b535fd, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        116,        116,        1,      122, 0x4d3f327b, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        117,        117,        1,      152, 0x5e423b0c, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        118,        118,        1,      137, 0xd13a39f7, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        119,        119,        1,      156, 0x40864321, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        120,        120,        1,      140, 0xbe1e393c, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        121,        121,        1,      179, 0xaf204635, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        122,        122,        1,      116, 0x5ac83123, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        123,        123,        1,      118, 0x22bc2ec5, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        124,        124,        1,      123, 0xc9b5302d, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        125,        125,        1,      125, 0x5cee3077, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        126,        126,        1,      194, 0xccc159ca, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        127,        127,        1,      122, 0x4d243229, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        128,        128,        1,      124, 0x948f330b, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        129,        129,        1,      133, 0xd53c35ca, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        130,        130,        1,      126, 0xc5543710, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        131,        131,        1,      208, 0x6cf15ea2, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        132,        132,        1,      131, 0xa8d33505, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        133,        133,        1,      114, 0x0ae53001, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        134,        134,        1,      129, 0xe9ff37c4, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        135,        135,        1,      120, 0x02623359, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        136,        136,        1,      164, 0x9dc545e5, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        137,        137,        1,      245, 0xc170715a, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        138,        138,        1,      215, 0xc93d5fbe, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        139,        139,        1,      225, 0x14866349, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        140,        140,        1,      123, 0x70cd2b64, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        141,        141,        1,      124, 0xe9002fb5, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        142,        142,        1,      125, 0x106e309b, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        143,        143,        1,      122, 0x050e32b0, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        144,        144,        1,      224, 0xf548614f, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        145,        145,        1,      239, 0x125c6ade, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        146,        146,        1,      127, 0x398734b6, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        147,        147,        1,      126, 0x2ff431e5, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        148,        148,        1,      124, 0x9583313b, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        149,        149,        1,      126, 0xc1fc3692, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        150,        150,        1,      123, 0xd0bf3170, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        151,        151,        1,      117, 0x651f3032, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        152,        152,        1,      119, 0x268a3078, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        153,        153,        1,      117, 0x9e4d3283, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        154,        154,        1,      149, 0x8f1043ba, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        155,        155,        1,      127, 0x352338bc, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        156,        156,        1,      113, 0xf877314e, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        157,        157,        1,      128, 0x88103a62, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        158,        158,        1,      111, 0xbf0630d9, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        159,        159,        1,      146, 0x159c44f7, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        160,        160,        1,      237, 0x4e45662e, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        161,        161,        1,      233, 0x8f9e6354, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        162,        162,        1,      160, 0x9c3f431f, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        163,        163,        1,      125, 0xbd2b33c6, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        164,        164,        1,      131, 0x3ecd3ba5, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        165,        165,        1,      231, 0xdf286db6, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        166,        166,        1,      153, 0xb6da408d, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        167,        167,        1,      126, 0x6741365e, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        168,        168,        1,      113, 0x658f2c90, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        169,        169,        1,      125, 0xc0033320, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        170,        170,        1,      122, 0xe38a2db1, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        171,        171,        1,      145, 0x29d63e83, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
-0,        172,        172,        1,      171, 0xc0e44b70, S=2,        1, 0x00010001,     1024, 0xc2f67c9f
+0,          0,          0,        1,     1368, 0x6cf0befd
+0,          1,          1,        1,      158, 0xcd173bb4, F=0x0
+0,          2,          2,        1,      163, 0x4f7a451d, F=0x0
+0,          3,          3,        1,      152, 0x17723839, F=0x0
+0,          4,          4,        1,      160, 0x67854056, F=0x0
+0,          5,          5,        1,      144, 0x0dc43ead, F=0x0
+0,          6,          6,        1,      142, 0xb0d73867, F=0x0
+0,          7,          7,        1,      137, 0xd8f333a1, F=0x0
+0,          8,          8,        1,      131, 0x32f93270, F=0x0
+0,          9,          9,        1,      131, 0xf27b2e93, F=0x0
+0,         10,         10,        1,      158, 0x152842d2, F=0x0
+0,         11,         11,        1,      142, 0x12733116, F=0x0
+0,         12,         12,        1,      142, 0x28f03160, F=0x0
+0,         13,         13,        1,      131, 0x038d2dda, F=0x0
+0,         14,         14,        1,      135, 0xb96c33a3, F=0x0
+0,         15,         15,        1,      127, 0x4cbf2d59, F=0x0
+0,         16,         16,        1,      146, 0xff013760, F=0x0
+0,         17,         17,        1,      148, 0xa14d3c03, F=0x0
+0,         18,         18,        1,      130, 0x139430b3, F=0x0
+0,         19,         19,        1,      131, 0x40e03264, F=0x0
+0,         20,         20,        1,      148, 0xf9d23c5c, F=0x0
+0,         21,         21,        1,      131, 0xfdc9328b, F=0x0
+0,         22,         22,        1,      131, 0x986d326f, F=0x0
+0,         23,         23,        1,      132, 0x3fdc3311, F=0x0
+0,         24,         24,        1,      130, 0x84c2330a, F=0x0
+0,         25,         25,        1,      130, 0x9c0033f1, F=0x0
+0,         26,         26,        1,      131, 0x62eb32f5, F=0x0
+0,         27,         27,        1,      126, 0x326e328e, F=0x0
+0,         28,         28,        1,      167, 0x8c8f4674, F=0x0
+0,         29,         29,        1,      148, 0x340d32d5, F=0x0
+0,         30,         30,        1,      171, 0xac2549fa, F=0x0
+0,         31,         31,        1,      150, 0xe77535b1, F=0x0
+0,         32,         32,        1,      178, 0x2c0b4c3a, F=0x0
+0,         33,         33,        1,      154, 0xe99137bb, F=0x0
+0,         34,         34,        1,      140, 0x525535c1, F=0x0
+0,         35,         35,        1,      121, 0xdfdc2d02, F=0x0
+0,         36,         36,        1,      140, 0x4db8345b, F=0x0
+0,         37,         37,        1,      128, 0x32e92f77, F=0x0
+0,         38,         38,        1,      143, 0x06663646, F=0x0
+0,         39,         39,        1,      131, 0xbcef3180, F=0x0
+0,         40,         40,        1,      127, 0xa8b92f17, F=0x0
+0,         41,         41,        1,      127, 0xc88e300a, F=0x0
+0,         42,         42,        1,      128, 0x2b3932e6, F=0x0
+0,         43,         43,        1,      122, 0x89332dc5, F=0x0
+0,         44,         44,        1,      133, 0x2b1d37ee, F=0x0
+0,         45,         45,        1,      125, 0x4c5e32ab, F=0x0
+0,         46,         46,        1,      124, 0x83122eaa, F=0x0
+0,         47,         47,        1,      132, 0xa3953564, F=0x0
+0,         48,         48,        1,      125, 0xae672fd9, F=0x0
+0,         49,         49,        1,      143, 0xd5d8390e, F=0x0
+0,         50,         50,        1,      139, 0xebab3726, F=0x0
+0,         51,         51,        1,      130, 0x4c9b358b, F=0x0
+0,         52,         52,        1,      130, 0x14993609, F=0x0
+0,         53,         53,        1,      132, 0x2ced3637, F=0x0
+0,         54,         54,        1,      134, 0xb27f3881, F=0x0
+0,         55,         55,        1,      125, 0x0402336b, F=0x0
+0,         56,         56,        1,      117, 0xf8a7310f, F=0x0
+0,         57,         57,        1,      128, 0x07752f60, F=0x0
+0,         58,         58,        1,      128, 0xf4d430da, F=0x0
+0,         59,         59,        1,      130, 0xbf9733aa, F=0x0
+0,         60,         60,        1,      124, 0x199f2f0e, F=0x0
+0,         61,         61,        1,      127, 0x84ff32b6, F=0x0
+0,         62,         62,        1,      124, 0x8d63305e, F=0x0
+0,         63,         63,        1,      127, 0x7d6130f4, F=0x0
+0,         64,         64,        1,      126, 0x78c83176, F=0x0
+0,         65,         65,        1,      145, 0x3ec33e58, F=0x0
+0,         66,         66,        1,      157, 0xb5764127, F=0x0
+0,         67,         67,        1,      123, 0xd9bd309d, F=0x0
+0,         68,         68,        1,      139, 0x93bc3ce8, F=0x0
+0,         69,         69,        1,      123, 0x67813058, F=0x0
+0,         70,         70,        1,      108, 0x176e2b80, F=0x0
+0,         71,         71,        1,      117, 0x2ab12db9, F=0x0
+0,         72,         72,        1,      128, 0xb52130fe, F=0x0
+0,         73,         73,        1,      124, 0x62102d4e, F=0x0
+0,         74,         74,        1,      114, 0x186f2dc7, F=0x0
+0,         75,         75,        1,      104, 0x74ec2761, F=0x0
+0,         76,         76,        1,      107, 0x9ba32643, F=0x0
+0,         77,         77,        1,      127, 0xa8ef3544, F=0x0
+0,         78,         78,        1,      127, 0xcd7e3051, F=0x0
+0,         79,         79,        1,      221, 0x9d035e74, F=0x0
+0,         80,         80,        1,      217, 0x99d45fbc, F=0x0
+0,         81,         81,        1,      128, 0xc9522cfc, F=0x0
+0,         82,         82,        1,      127, 0x934f3004, F=0x0
+0,         83,         83,        1,      132, 0xb899320f, F=0x0
+0,         84,         84,        1,      133, 0xec2b3427, F=0x0
+0,         85,         85,        1,      131, 0xbedb31b2, F=0x0
+0,         86,         86,        1,      127, 0x11bc321c, F=0x0
+0,         87,         87,        1,      127, 0x36cf2f3e, F=0x0
+0,         88,         88,        1,      115, 0x22b32e12, F=0x0
+0,         89,         89,        1,      127, 0x53f73452, F=0x0
+0,         90,         90,        1,      123, 0xc54931ba, F=0x0
+0,         91,         91,        1,      170, 0x3055476c, F=0x0
+0,         92,         92,        1,      132, 0xc6e431a9, F=0x0
+0,         93,         93,        1,      124, 0x00e02e2c, F=0x0
+0,         94,         94,        1,       92, 0x838f2429, F=0x0
+0,         95,         95,        1,      184, 0xcee75528, F=0x0
+0,         96,         96,        1,      145, 0xcbb533f9, F=0x0
+0,         97,         97,        1,      187, 0x025b53b3, F=0x0
+0,         98,         98,        1,      158, 0x73dc3de6, F=0x0
+0,         99,         99,        1,      148, 0x489e3a52, F=0x0
+0,        100,        100,        1,      137, 0xf4c23446, F=0x0
+0,        101,        101,        1,      154, 0xc7eb3a4c, F=0x0
+0,        102,        102,        1,      141, 0x175d328b, F=0x0
+0,        103,        103,        1,      198, 0x22615c9f, F=0x0
+0,        104,        104,        1,      167, 0x9f3c40f0, F=0x0
+0,        105,        105,        1,      196, 0x66495290, F=0x0
+0,        106,        106,        1,      171, 0x10b14318, F=0x0
+0,        107,        107,        1,      152, 0x0e8538ba, F=0x0
+0,        108,        108,        1,      144, 0xa7e83928, F=0x0
+0,        109,        109,        1,      158, 0x2b6f3bb5, F=0x0
+0,        110,        110,        1,      142, 0x242d3ac1, F=0x0
+0,        111,        111,        1,      206, 0xf7935cd2, F=0x0
+0,        112,        112,        1,      177, 0xc96a46b4, F=0x0
+0,        113,        113,        1,      218, 0x96145d0c, F=0x0
+0,        114,        114,        1,      182, 0xdb8e4b9e, F=0x0
+0,        115,        115,        1,      145, 0x58483725, F=0x0
+0,        116,        116,        1,      130, 0xe26b33a3, F=0x0
+0,        117,        117,        1,      160, 0x162d3c34, F=0x0
+0,        118,        118,        1,      145, 0x77cd3b1f, F=0x0
+0,        119,        119,        1,      164, 0xfd024449, F=0x0
+0,        120,        120,        1,      148, 0x68293a64, F=0x0
+0,        121,        121,        1,      187, 0x8643475d, F=0x0
+0,        122,        122,        1,      124, 0xe904324b, F=0x0
+0,        123,        123,        1,      126, 0xb3482fed, F=0x0
+0,        124,        124,        1,      131, 0x60183155, F=0x0
+0,        125,        125,        1,      133, 0xf592319f, F=0x0
+0,        126,        126,        1,      202, 0xb53c5af2, F=0x0
+0,        127,        127,        1,      130, 0xe2503351, F=0x0
+0,        128,        128,        1,      132, 0x2c1a3433, F=0x0
+0,        129,        129,        1,      141, 0x772f36f2, F=0x0
+0,        130,        130,        1,      134, 0x5f2f3838, F=0x0
+0,        131,        131,        1,      216, 0x659c5fca, F=0x0
+0,        132,        132,        1,      139, 0x4876362d, F=0x0
+0,        133,        133,        1,      122, 0x96d13129, F=0x0
+0,        134,        134,        1,      137, 0x875238ec, F=0x0
+0,        135,        135,        1,      128, 0x953e3481, F=0x0
+0,        136,        136,        1,      172, 0x6390470d, F=0x0
+0,        137,        137,        1,      253, 0xe4e37282, F=0x0
+0,        138,        138,        1,      223, 0xca0060e6, F=0x0
+0,        139,        139,        1,      233, 0x20d96471, F=0x0
+0,        140,        140,        1,      131, 0x07302c8c, F=0x0
+0,        141,        141,        1,      132, 0x808b30dd, F=0x0
+0,        142,        142,        1,      133, 0xa91231c3, F=0x0
+0,        143,        143,        1,      130, 0x9a3a33d8, F=0x0
+0,        144,        144,        1,      232, 0x00826277, F=0x0
+0,        145,        145,        1,      247, 0x2edf6c06, F=0x0
+0,        146,        146,        1,      135, 0xd47b35de, F=0x0
+0,        147,        147,        1,      134, 0xc9c0330d, F=0x0
+0,        148,        148,        1,      132, 0x2d0e3263, F=0x0
+0,        149,        149,        1,      134, 0x5bd737ba, F=0x0
+0,        150,        150,        1,      131, 0x67223298, F=0x0
+0,        151,        151,        1,      125, 0xf483315a, F=0x0
+0,        152,        152,        1,      127, 0xb83e31a0, F=0x0
+0,        153,        153,        1,      125, 0x2dc033ab, F=0x0
+0,        154,        154,        1,      157, 0x438344e2, F=0x0
+0,        155,        155,        1,      135, 0xd01739e4, F=0x0
+0,        156,        156,        1,      121, 0x834a3276, F=0x0
+0,        157,        157,        1,      136, 0x243b3b8a, F=0x0
+0,        158,        158,        1,      119, 0x47893201, F=0x0
+0,        159,        159,        1,      154, 0xc688461f, F=0x0
+0,        160,        160,        1,      245, 0x68786756, F=0x0
+0,        161,        161,        1,      241, 0xa531647c, F=0x0
+0,        162,        162,        1,      168, 0x5d6a4447, F=0x0
+0,        163,        163,        1,      133, 0x55de34ee, F=0x0
+0,        164,        164,        1,      139, 0xde613ccd, F=0x0
+0,        165,        165,        1,      239, 0xf26b6ede, F=0x0
+0,        166,        166,        1,      161, 0x6fed41b5, F=0x0
+0,        167,        167,        1,      134, 0x011c3786, F=0x0
+0,        168,        168,        1,      121, 0xf0532db8, F=0x0
+0,        169,        169,        1,      133, 0x58b63448, F=0x0
+0,        170,        170,        1,      130, 0x78c52ed9, F=0x0
+0,        171,        171,        1,      153, 0xd99a3fab, F=0x0
+0,        172,        172,        1,      179, 0x8ec74c98, F=0x0
diff --git a/tests/ref/fate/gifenc-pal8 b/tests/ref/fate/gifenc-pal8
index 00ccc5464b5b5..3ed39357f6d6c 100644
--- a/tests/ref/fate/gifenc-pal8
+++ b/tests/ref/fate/gifenc-pal8
@@ -3,176 +3,176 @@
 #codec_id 0: gif
 #dimensions 0: 217x217
 #sar 0: 0/1
-0,          0,          0,        1,      552, 0x271a2dd3, S=2,     1024, 0xec907a9e,        1, 0x00010001
-0,          1,          1,        1,      297, 0x90168a95, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,          2,          2,        1,      438, 0x91efce1b, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,          3,          3,        1,      450, 0x7c2dcfad, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,          4,          4,        1,      547, 0xc131fd3b, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,          5,          5,        1,      614, 0x68182006, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,          6,          6,        1,      642, 0x78bb1f5f, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,          7,          7,        1,      660, 0x35c033a2, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,          8,          8,        1,      821, 0xaf30790b, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,          9,          9,        1,     1157, 0x741c2da1, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         10,         10,        1,      179, 0x3a27517c, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         11,         11,        1,     1333, 0x5ee76f3c, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         12,         12,        1,     1638, 0x5f640e86, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         13,         13,        1,     1531, 0xccb8e437, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         14,         14,        1,     1720, 0xc95d45ec, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         15,         15,        1,     1910, 0x56cc831e, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         16,         16,        1,     2124, 0x9cc8e130, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         17,         17,        1,     2248, 0x05a325b1, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         18,         18,        1,     2311, 0xdc633703, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         19,         19,        1,     2408, 0x91c26f3e, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         20,         20,        1,     2601, 0x8cf3c157, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         21,         21,        1,     2687, 0x8f6400e6, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         22,         22,        1,     2784, 0xaa880e55, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         23,         23,        1,     2884, 0x46f546f6, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         24,         24,        1,     2982, 0x807c7ad5, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         25,         25,        1,     3101, 0xbcc89bec, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         26,         26,        1,     3253, 0xd032f3fa, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         27,         27,        1,     3329, 0xe4d42430, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         28,         28,        1,     3572, 0xf8058aa0, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         29,         29,        1,     3807, 0x3d2af9f3, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         30,         30,        1,     2750, 0x814d1c33, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         31,         31,        1,     4031, 0x3b077006, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         32,         32,        1,     3025, 0x86729c1c, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         33,         33,        1,     4295, 0xf71b0b38, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         34,         34,        1,     2044, 0x5adcb93b, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         35,         35,        1,     3212, 0xcf79eeed, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         36,         36,        1,     2292, 0xb4386334, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         37,         37,        1,     3633, 0x0010992f, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         38,         38,        1,     3552, 0x23697490, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         39,         39,        1,     3690, 0x62afdbb8, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         40,         40,        1,     1559, 0x5baef54a, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         41,         41,        1,      954, 0xca75ca79, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         42,         42,        1,      273, 0x3687799b, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         43,         43,        1,      930, 0x29f3b0c4, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         44,         44,        1,      271, 0x305e8094, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         45,         45,        1,      196, 0xf5ab51ee, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         46,         46,        1,     4299, 0x67ec0d55, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         47,         47,        1,     4895, 0xb394406c, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         48,         48,        1,     4928, 0x233919d7, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         49,         49,        1,     4941, 0x58a357da, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         50,         50,        1,     4154, 0x21f2ac33, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         51,         51,        1,     4678, 0xab3cc050, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         52,         52,        1,     4741, 0x1974b581, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         53,         53,        1,     4982, 0x891456d5, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         54,         54,        1,     5179, 0x860fc6a1, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         55,         55,        1,     5046, 0xce9183d3, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         56,         56,        1,     5140, 0xa6d7b9af, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         57,         57,        1,     4301, 0x03b6ef3f, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         58,         58,        1,     5079, 0xa8d59e01, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         59,         59,        1,     5284, 0xea34e3b3, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         60,         60,        1,     5426, 0x556a15cd, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         61,         61,        1,     4645, 0x061e8936, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         62,         62,        1,     5263, 0x7536cf7d, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         63,         63,        1,     5221, 0x9fbac3ca, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         64,         64,        1,     5217, 0x02269bd2, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         65,         65,        1,     5395, 0x120fff66, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         66,         66,        1,     5220, 0x77cedcc5, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         67,         67,        1,     5704, 0xba42dd96, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         68,         68,        1,     5636, 0xcb91a25b, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         69,         69,        1,     5818, 0x8dc0df92, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         70,         70,        1,     5763, 0x51d5d5f0, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         71,         71,        1,     6116, 0x09558b48, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         72,         72,        1,     6069, 0x41926817, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         73,         73,        1,     5796, 0x7fbeda44, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         74,         74,        1,     5999, 0xe07d3770, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         75,         75,        1,     6220, 0x6607b06f, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         76,         76,        1,     6374, 0x7628e533, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         77,         77,        1,     6465, 0xfe956b15, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         78,         78,        1,     7019, 0x6c9a1aef, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         79,         79,        1,     7255, 0x5fa5c1bf, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         80,         80,        1,     8197, 0xf11d6ef2, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         81,         81,        1,     8358, 0x027279e8, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         82,         82,        1,     7708, 0x607f8e8b, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         83,         83,        1,     7412, 0x6bb2105f, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         84,         84,        1,     7541, 0xfdc02154, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         85,         85,        1,     7948, 0x916ecd8b, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         86,         86,        1,     8408, 0x1f97d414, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         87,         87,        1,     8056, 0x9cbf159c, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         88,         88,        1,     7401, 0x2625addb, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         89,         89,        1,     7494, 0x2877eacb, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         90,         90,        1,     7806, 0xe32574a3, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         91,         91,        1,     7768, 0x25ed7ee7, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         92,         92,        1,     7749, 0x6d8e978e, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         93,         93,        1,     8047, 0xec4b150c, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         94,         94,        1,     7618, 0x88cf30d5, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         95,         95,        1,     7979, 0x0eb1cf2a, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         96,         96,        1,    12062, 0xb49d9125, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         97,         97,        1,    12317, 0x2d8fd6e9, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         98,         98,        1,    12217, 0x9b3be549, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,         99,         99,        1,    11227, 0x067e9118, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        100,        100,        1,    11108, 0x5e5b0afd, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        101,        101,        1,    11366, 0xb38e8d15, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        102,        102,        1,    11896, 0xeb3e35ca, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        103,        103,        1,    11479, 0xbf7581e9, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        104,        104,        1,    13395, 0x415b38d8, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        105,        105,        1,    12913, 0x61544631, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        106,        106,        1,    13864, 0xd39fe768, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        107,        107,        1,    13551, 0x76c167d1, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        108,        108,        1,    14041, 0x2f206888, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        109,        109,        1,    14144, 0x9ec030d3, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        110,        110,        1,    14277, 0xa84b3a9b, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        111,        111,        1,    14424, 0xf5f1e06e, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        112,        112,        1,    14689, 0xbca0adb5, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        113,        113,        1,    14598, 0xc1d45745, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        114,        114,        1,    15213, 0x8f3080fc, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        115,        115,        1,    15425, 0xb0aa8f59, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        116,        116,        1,    15595, 0x1406e5d5, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        117,        117,        1,    15598, 0x48ec7d08, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        118,        118,        1,    15863, 0x5381db7b, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        119,        119,        1,    15717, 0xb87a1b87, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        120,        120,        1,    16078, 0x5bab2453, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        121,        121,        1,    16225, 0xa1f88113, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        122,        122,        1,    16135, 0x6af2f4e1, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        123,        123,        1,    16661, 0xf02a3343, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        124,        124,        1,    16619, 0xc71935a4, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        125,        125,        1,    16829, 0x29849844, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        126,        126,        1,    16944, 0x3423ae77, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        127,        127,        1,    17119, 0x609b4409, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        128,        128,        1,    17150, 0xf85dfd31, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        129,        129,        1,    17321, 0x38eccb10, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        130,        130,        1,    17395, 0x0ba08b85, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        131,        131,        1,    17666, 0x6fbc0264, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        132,        132,        1,    17730, 0x3dcc64a6, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        133,        133,        1,    17934, 0xb539974b, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        134,        134,        1,    17944, 0x2214ec94, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        135,        135,        1,    18238, 0x70f9ff1d, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        136,        136,        1,    18391, 0x4b149209, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        137,        137,        1,    18543, 0x45a1c02f, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        138,        138,        1,    18939, 0x2789a88c, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        139,        139,        1,    19145, 0x5daafd7a, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        140,        140,        1,    19120, 0x565f80e6, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        141,        141,        1,    19130, 0xff70cc21, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        142,        142,        1,    19494, 0xbfa284db, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        143,        143,        1,    19534, 0x3d40743b, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        144,        144,        1,    19747, 0x33c9b108, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        145,        145,        1,    20114, 0x9d223e36, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        146,        146,        1,    20257, 0xe7bdaf43, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        147,        147,        1,    20370, 0x0c5f1970, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        148,        148,        1,    20292, 0x6986d20e, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        149,        149,        1,    20491, 0xd88e4c08, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        150,        150,        1,    20647, 0x1aefaffc, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        151,        151,        1,    20666, 0x43e4aaaa, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        152,        152,        1,    21007, 0xa7ca3ef0, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        153,        153,        1,    21058, 0x06814351, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        154,        154,        1,    21153, 0x3c852b10, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        155,        155,        1,    21078, 0x8df15855, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        156,        156,        1,    21458, 0xd3a531d6, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        157,        157,        1,    21669, 0x88baca53, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        158,        158,        1,    21581, 0xd692fa1f, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        159,        159,        1,    21654, 0x30fb9061, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        160,        160,        1,    21987, 0xe7646d8b, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        161,        161,        1,    22205, 0x0fc55b6a, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        162,        162,        1,    22475, 0x4bc4c032, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        163,        163,        1,    22490, 0x58ca23f6, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        164,        164,        1,    22460, 0xf9ceb0ac, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        165,        165,        1,    22861, 0xb05f0f84, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        166,        166,        1,    22746, 0x0df23a5c, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        167,        167,        1,    23165, 0xbd7147ad, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        168,        168,        1,    23273, 0x9781a34f, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        169,        169,        1,    23211, 0x69c7606b, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        170,        170,        1,    23648, 0xdafde037, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        171,        171,        1,    23675, 0x2a2147ed, S=2,     1024, 0xf351799f,        1, 0x00010001
-0,        172,        172,        1,    23874, 0x12c184b6, S=2,     1024, 0xf351799f,        1, 0x00010001
+0,          0,          0,        1,     2109, 0x39642b3d
+0,          1,          1,        1,      305, 0xefa98bbd, F=0x0
+0,          2,          2,        1,      446, 0x9499cf43, F=0x0
+0,          3,          3,        1,      458, 0x8cb7d0d5, F=0x0
+0,          4,          4,        1,      555, 0x41f2fe63, F=0x0
+0,          5,          5,        1,      622, 0x3651212e, F=0x0
+0,          6,          6,        1,      650, 0x67542087, F=0x0
+0,          7,          7,        1,      668, 0x392934ca, F=0x0
+0,          8,          8,        1,      829, 0x6cd07a33, F=0x0
+0,          9,          9,        1,     1165, 0xb64b2ec9, F=0x0
+0,         10,         10,        1,      187, 0x114a52a4, F=0x0
+0,         11,         11,        1,     1341, 0x6ca57064, F=0x0
+0,         12,         12,        1,     1646, 0xcdd90fae, F=0x0
+0,         13,         13,        1,     1539, 0xbf75e55f, F=0x0
+0,         14,         14,        1,     1728, 0x96b14714, F=0x0
+0,         15,         15,        1,     1918, 0xffd08446, F=0x0
+0,         16,         16,        1,     2132, 0x3d5ae258, F=0x0
+0,         17,         17,        1,     2256, 0x359526d9, F=0x0
+0,         18,         18,        1,     2319, 0x553c382b, F=0x0
+0,         19,         19,        1,     2416, 0x7ac37066, F=0x0
+0,         20,         20,        1,     2609, 0x552bc27f, F=0x0
+0,         21,         21,        1,     2695, 0xbb0c020e, F=0x0
+0,         22,         22,        1,     2792, 0x46670f7d, F=0x0
+0,         23,         23,        1,     2892, 0x5674481e, F=0x0
+0,         24,         24,        1,     2990, 0x015a7bfd, F=0x0
+0,         25,         25,        1,     3109, 0xc73e9d14, F=0x0
+0,         26,         26,        1,     3261, 0x8a77f522, F=0x0
+0,         27,         27,        1,     3337, 0xf6f92558, F=0x0
+0,         28,         28,        1,     3580, 0x23408bc8, F=0x0
+0,         29,         29,        1,     3815, 0x781dfb1b, F=0x0
+0,         30,         30,        1,     2758, 0xf5cd1d5b, F=0x0
+0,         31,         31,        1,     4039, 0x7909712e, F=0x0
+0,         32,         32,        1,     3033, 0x39089d44, F=0x0
+0,         33,         33,        1,     4303, 0x667b0c60, F=0x0
+0,         34,         34,        1,     2052, 0x9edfba63, F=0x0
+0,         35,         35,        1,     3220, 0x5a56f015, F=0x0
+0,         36,         36,        1,     2300, 0x1719645c, F=0x0
+0,         37,         37,        1,     3641, 0x71c49a57, F=0x0
+0,         38,         38,        1,     3560, 0x377575b8, F=0x0
+0,         39,         39,        1,     3698, 0x165adce0, F=0x0
+0,         40,         40,        1,     1567, 0x6ecbf672, F=0x0
+0,         41,         41,        1,      962, 0x21eccba1, F=0x0
+0,         42,         42,        1,      281, 0x7a5a7ac3, F=0x0
+0,         43,         43,        1,      938, 0x659bb1ec, F=0x0
+0,         44,         44,        1,      279, 0x71e181bc, F=0x0
+0,         45,         45,        1,      204, 0xe0765316, F=0x0
+0,         46,         46,        1,     4307, 0xdbdd0e7d, F=0x0
+0,         47,         47,        1,     4903, 0xd8d24194, F=0x0
+0,         48,         48,        1,     4936, 0x6e9f1aff, F=0x0
+0,         49,         49,        1,     4949, 0xb3115902, F=0x0
+0,         50,         50,        1,     4162, 0xee2cad5b, F=0x0
+0,         51,         51,        1,     4686, 0xd583c178, F=0x0
+0,         52,         52,        1,     4749, 0x8c93b6a9, F=0x0
+0,         53,         53,        1,     4990, 0x12f957fd, F=0x0
+0,         54,         54,        1,     5187, 0xf3bcc7c9, F=0x0
+0,         55,         55,        1,     5054, 0xa27684fb, F=0x0
+0,         56,         56,        1,     5148, 0xe76cbad7, F=0x0
+0,         57,         57,        1,     4309, 0x79f7f067, F=0x0
+0,         58,         58,        1,     5087, 0xa2e29f29, F=0x0
+0,         59,         59,        1,     5292, 0xd158e4db, F=0x0
+0,         60,         60,        1,     5434, 0xe0be16f5, F=0x0
+0,         61,         61,        1,     4653, 0x0a3d8a5e, F=0x0
+0,         62,         62,        1,     5271, 0x4412d0a5, F=0x0
+0,         63,         63,        1,     5229, 0x3e06c4f2, F=0x0
+0,         64,         64,        1,     5225, 0x9bc39cfa, F=0x0
+0,         65,         65,        1,     5403, 0x798b009d, F=0x0
+0,         66,         66,        1,     5228, 0x14f2dded, F=0x0
+0,         67,         67,        1,     5712, 0x8724debe, F=0x0
+0,         68,         68,        1,     5644, 0x49d3a383, F=0x0
+0,         69,         69,        1,     5826, 0xde72e0ba, F=0x0
+0,         70,         70,        1,     5771, 0x62efd718, F=0x0
+0,         71,         71,        1,     6124, 0xb2a68c70, F=0x0
+0,         72,         72,        1,     6077, 0xb48b693f, F=0x0
+0,         73,         73,        1,     5804, 0xb700db6c, F=0x0
+0,         74,         74,        1,     6007, 0x02953898, F=0x0
+0,         75,         75,        1,     6228, 0x87a7b197, F=0x0
+0,         76,         76,        1,     6382, 0x49e7e65b, F=0x0
+0,         77,         77,        1,     6473, 0x3b9b6c3d, F=0x0
+0,         78,         78,        1,     7027, 0x2a4e1c17, F=0x0
+0,         79,         79,        1,     7263, 0x2e48c2e7, F=0x0
+0,         80,         80,        1,     8205, 0x013b701a, F=0x0
+0,         81,         81,        1,     8366, 0xcca97b10, F=0x0
+0,         82,         82,        1,     7716, 0x3b088fb3, F=0x0
+0,         83,         83,        1,     7420, 0xefdd1187, F=0x0
+0,         84,         84,        1,     7549, 0x1731227c, F=0x0
+0,         85,         85,        1,     7956, 0x8186ceb3, F=0x0
+0,         86,         86,        1,     8416, 0x23add53c, F=0x0
+0,         87,         87,        1,     8064, 0x09c616c4, F=0x0
+0,         88,         88,        1,     7409, 0x9d98af03, F=0x0
+0,         89,         89,        1,     7502, 0x0b81ebf3, F=0x0
+0,         90,         90,        1,     7814, 0x2f0d75cb, F=0x0
+0,         91,         91,        1,     7776, 0x45d6800f, F=0x0
+0,         92,         92,        1,     7757, 0x777f98b6, F=0x0
+0,         93,         93,        1,     8055, 0x4eea1634, F=0x0
+0,         94,         94,        1,     7626, 0xfb3931fd, F=0x0
+0,         95,         95,        1,     7987, 0x22a1d052, F=0x0
+0,         96,         96,        1,    12070, 0x3aa2924d, F=0x0
+0,         97,         97,        1,    12325, 0xda6cd811, F=0x0
+0,         98,         98,        1,    12225, 0xd478e671, F=0x0
+0,         99,         99,        1,    11235, 0xc6c09240, F=0x0
+0,        100,        100,        1,    11116, 0x95050c25, F=0x0
+0,        101,        101,        1,    11374, 0x14a68e3d, F=0x0
+0,        102,        102,        1,    11904, 0xb14436f2, F=0x0
+0,        103,        103,        1,    11487, 0xa3358311, F=0x0
+0,        104,        104,        1,    13403, 0xccf33a00, F=0x0
+0,        105,        105,        1,    12921, 0xbf7e4759, F=0x0
+0,        106,        106,        1,    13872, 0x7dace890, F=0x0
+0,        107,        107,        1,    13559, 0xb6c868f9, F=0x0
+0,        108,        108,        1,    14049, 0xa5d569b0, F=0x0
+0,        109,        109,        1,    14152, 0x8c9c31fb, F=0x0
+0,        110,        110,        1,    14285, 0x2ffe3bc3, F=0x0
+0,        111,        111,        1,    14432, 0x27abe196, F=0x0
+0,        112,        112,        1,    14697, 0x20d1aedd, F=0x0
+0,        113,        113,        1,    14606, 0xbcbe586d, F=0x0
+0,        114,        114,        1,    15221, 0x515f8224, F=0x0
+0,        115,        115,        1,    15433, 0x68089081, F=0x0
+0,        116,        116,        1,    15603, 0x8ff4e6fd, F=0x0
+0,        117,        117,        1,    15606, 0xc8527e30, F=0x0
+0,        118,        118,        1,    15871, 0x056ddca3, F=0x0
+0,        119,        119,        1,    15725, 0xc1871caf, F=0x0
+0,        120,        120,        1,    16086, 0x063e257b, F=0x0
+0,        121,        121,        1,    16233, 0xf683823b, F=0x0
+0,        122,        122,        1,    16143, 0x576df609, F=0x0
+0,        123,        123,        1,    16669, 0x3d02346b, F=0x0
+0,        124,        124,        1,    16627, 0xe35236cc, F=0x0
+0,        125,        125,        1,    16837, 0x389c996c, F=0x0
+0,        126,        126,        1,    16952, 0xc833af9f, F=0x0
+0,        127,        127,        1,    17127, 0xbf124531, F=0x0
+0,        128,        128,        1,    17158, 0x7abbfe59, F=0x0
+0,        129,        129,        1,    17329, 0x8102cc38, F=0x0
+0,        130,        130,        1,    17403, 0xa9468cad, F=0x0
+0,        131,        131,        1,    17674, 0x46d8038c, F=0x0
+0,        132,        132,        1,    17738, 0x5ee865ce, F=0x0
+0,        133,        133,        1,    17942, 0xc2449873, F=0x0
+0,        134,        134,        1,    17952, 0x3aafedbc, F=0x0
+0,        135,        135,        1,    18246, 0xdd930054, F=0x0
+0,        136,        136,        1,    18399, 0x68a59331, F=0x0
+0,        137,        137,        1,    18551, 0x1301c157, F=0x0
+0,        138,        138,        1,    18947, 0xbed8a9b4, F=0x0
+0,        139,        139,        1,    19153, 0xe338fea2, F=0x0
+0,        140,        140,        1,    19128, 0xbf05820e, F=0x0
+0,        141,        141,        1,    19138, 0x73b5cd49, F=0x0
+0,        142,        142,        1,    19502, 0xd8d68603, F=0x0
+0,        143,        143,        1,    19542, 0x84b47563, F=0x0
+0,        144,        144,        1,    19755, 0x7194b230, F=0x0
+0,        145,        145,        1,    20122, 0x83633f5e, F=0x0
+0,        146,        146,        1,    20265, 0x7365b06b, F=0x0
+0,        147,        147,        1,    20378, 0x1aaf1a98, F=0x0
+0,        148,        148,        1,    20300, 0x1da6d336, F=0x0
+0,        149,        149,        1,    20499, 0x72d54d30, F=0x0
+0,        150,        150,        1,    20655, 0x6996b124, F=0x0
+0,        151,        151,        1,    20674, 0xa883abd2, F=0x0
+0,        152,        152,        1,    21015, 0x96cf4018, F=0x0
+0,        153,        153,        1,    21066, 0x307e4479, F=0x0
+0,        154,        154,        1,    21161, 0xd45a2c38, F=0x0
+0,        155,        155,        1,    21086, 0xcf0e597d, F=0x0
+0,        156,        156,        1,    21466, 0xcc4032fe, F=0x0
+0,        157,        157,        1,    21677, 0x755ccb7b, F=0x0
+0,        158,        158,        1,    21589, 0x5d74fb47, F=0x0
+0,        159,        159,        1,    21662, 0x0c459189, F=0x0
+0,        160,        160,        1,    21995, 0x43d46eb3, F=0x0
+0,        161,        161,        1,    22213, 0x68455c92, F=0x0
+0,        162,        162,        1,    22483, 0xdc83c15a, F=0x0
+0,        163,        163,        1,    22498, 0xfae1251e, F=0x0
+0,        164,        164,        1,    22468, 0x7944b1d4, F=0x0
+0,        165,        165,        1,    22869, 0xff8c10ac, F=0x0
+0,        166,        166,        1,    22754, 0xd8183b84, F=0x0
+0,        167,        167,        1,    23173, 0x6c3c48d5, F=0x0
+0,        168,        168,        1,    23281, 0xc32ca477, F=0x0
+0,        169,        169,        1,    23219, 0x4dc26193, F=0x0
+0,        170,        170,        1,    23656, 0xb85ee15f, F=0x0
+0,        171,        171,        1,    23683, 0x26ba4915, F=0x0
+0,        172,        172,        1,    23882, 0xf57285de, F=0x0
diff --git a/tests/ref/fate/gifenc-rgb4_byte b/tests/ref/fate/gifenc-rgb4_byte
index 4377312b77fc3..c99c983f87b99 100644
--- a/tests/ref/fate/gifenc-rgb4_byte
+++ b/tests/ref/fate/gifenc-rgb4_byte
@@ -3,176 +3,176 @@
 #codec_id 0: gif
 #dimensions 0: 217x217
 #sar 0: 0/1
-0,          0,          0,        1,      508, 0xf04a113b, S=1,        1, 0x00010001
-0,          1,          1,        1,      213, 0x23c24d3d, S=2,        1, 0x00010001,     1024, 0xf7700427
-0,          2,          2,        1,      131, 0x56d22a39, S=2,        1, 0x00010001,     1024, 0x03730427
-0,          3,          3,        1,      384, 0xb1d8a4bd, S=2,        1, 0x00010001,     1024, 0xf7700427
-0,          4,          4,        1,      381, 0x37a3a2c9, S=2,        1, 0x00010001,     1024, 0xf3740427
-0,          5,          5,        1,      430, 0x162bb3d3, S=2,        1, 0x00010001,     1024, 0xf3740427
-0,          6,          6,        1,      518, 0x195bd738, S=2,        1, 0x00010001,     1024, 0xf3740427
-0,          7,          7,        1,      535, 0x12cde6b7, S=2,        1, 0x00010001,     1024, 0xf3740427
-0,          8,          8,        1,      438, 0xa653b946, S=2,        1, 0x00010001,     1024, 0x0b6b0427
-0,          9,          9,        1,      923, 0xd2e2a35f, S=2,        1, 0x00010001,     1024, 0x0b6b0427
-0,         10,         10,        1,      694, 0xe1cf4a1f, S=2,        1, 0x00010001,     1024, 0x0b6b0427
-0,         11,         11,        1,     1194, 0xa6152c8a, S=2,        1, 0x00010001,     1024, 0x0b6b0427
-0,         12,         12,        1,     1291, 0x94d25581, S=2,        1, 0x00010001,     1024, 0x0b6b0427
-0,         13,         13,        1,     1245, 0x5b483525, S=2,        1, 0x00010001,     1024, 0x0b6b0427
-0,         14,         14,        1,     1330, 0xfb5351c8, S=2,        1, 0x00010001,     1024, 0x0b6b0427
-0,         15,         15,        1,     1276, 0x6f403914, S=2,        1, 0x00010001,     1024, 0x0b6b0427
-0,         16,         16,        1,     1475, 0xbf459755, S=2,        1, 0x00010001,     1024, 0x0b6b0427
-0,         17,         17,        1,     1784, 0xe9954aa7, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         18,         18,        1,     1675, 0x219dfaf8, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         19,         19,        1,     1509, 0xd7f5abbe, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         20,         20,        1,     1705, 0x44a01729, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         21,         21,        1,     1745, 0x31ff1f89, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         22,         22,        1,     1642, 0x55420147, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         23,         23,        1,     1718, 0x68ef1cb8, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         24,         24,        1,     1900, 0xd7737a09, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         25,         25,        1,     1807, 0x4f6c5140, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         26,         26,        1,     1915, 0x976d80e6, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         27,         27,        1,     2100, 0x0ae6d1ce, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         28,         28,        1,     2700, 0x7a89f104, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         29,         29,        1,     2673, 0xf6b6a71d, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         30,         30,        1,     2895, 0x9079484b, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         31,         31,        1,     3257, 0x0b0cd125, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         32,         32,        1,     3179, 0x3ee2c161, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         33,         33,        1,     3296, 0x6230e506, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         34,         34,        1,     3600, 0x021775d7, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         35,         35,        1,     3699, 0xfb03a043, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         36,         36,        1,     3814, 0x96a8d57e, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         37,         37,        1,     3627, 0x33a37f8f, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         38,         38,        1,     2950, 0x50806197, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         39,         39,        1,     3086, 0x72068d4c, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         40,         40,        1,     3094, 0x2880861f, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         41,         41,        1,     3456, 0x6d232a96, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         42,         42,        1,     4108, 0x46d75ebb, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         43,         43,        1,     4217, 0x04a258f4, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         44,         44,        1,     3613, 0x667f4ff8, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         45,         45,        1,     3910, 0x8f37e73e, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         46,         46,        1,     4461, 0x5db9e0bf, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         47,         47,        1,     4593, 0x883f2f49, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         48,         48,        1,     4822, 0x03d99b73, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         49,         49,        1,     5398, 0x39f7bff4, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         50,         50,        1,     5266, 0xd5ab9630, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         51,         51,        1,     5416, 0x5876e16f, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         52,         52,        1,     5519, 0x30ed05d8, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         53,         53,        1,     5701, 0x5bae5af7, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         54,         54,        1,     6160, 0x98364177, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         55,         55,        1,     6233, 0x52a05075, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         56,         56,        1,     5911, 0x04bfc46a, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         57,         57,        1,     5997, 0xf1e6f586, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         58,         58,        1,     5946, 0xe6f3f055, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         59,         59,        1,     6468, 0xc8a3cf61, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         60,         60,        1,     6737, 0xc27b3b79, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         61,         61,        1,     6275, 0x84d88e2b, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         62,         62,        1,     6641, 0xb44b3534, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         63,         63,        1,     6378, 0x3965888b, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         64,         64,        1,     6257, 0x12115750, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         65,         65,        1,     6908, 0x57137217, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         66,         66,        1,     7230, 0xbacc24ee, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         67,         67,        1,     7556, 0x1aa2a694, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         68,         68,        1,     7413, 0xbc9e7718, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         69,         69,        1,     7476, 0xb2a1aba0, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         70,         70,        1,     7596, 0x3301e56d, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         71,         71,        1,     7756, 0x8f2504f8, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         72,         72,        1,     8015, 0xd4146c80, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         73,         73,        1,     8128, 0x11b2bf4c, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         74,         74,        1,     8101, 0xc627adbe, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         75,         75,        1,     7863, 0xe99f3f3b, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         76,         76,        1,     7960, 0x4bc091b8, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         77,         77,        1,     8238, 0x1086ea8a, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         78,         78,        1,     8321, 0x3a404791, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         79,         79,        1,     8562, 0xcbdcc01e, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         80,         80,        1,     8746, 0xec190b22, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         81,         81,        1,     8578, 0x12e7a4e8, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         82,         82,        1,     8878, 0x51c05771, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         83,         83,        1,     9077, 0xe12b589b, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         84,         84,        1,     9310, 0xde3bf881, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         85,         85,        1,     9394, 0x1eba46cc, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         86,         86,        1,     9161, 0x7c359911, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         87,         87,        1,     9462, 0xccda3664, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         88,         88,        1,     9650, 0x6e6292fc, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         89,         89,        1,     9701, 0x08909b95, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         90,         90,        1,     9523, 0xe61b38bb, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         91,         91,        1,     9891, 0x96b90b98, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         92,         92,        1,    10005, 0x2db84c80, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         93,         93,        1,    10038, 0x37e52a72, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         94,         94,        1,    10086, 0x135a43e4, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         95,         95,        1,    10438, 0x472c0372, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         96,         96,        1,    10583, 0xcf4c5862, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         97,         97,        1,    10581, 0xce658137, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         98,         98,        1,    10807, 0x3954dad9, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,         99,         99,        1,    11111, 0x5f8d504f, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        100,        100,        1,    11194, 0x3c7e6a77, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        101,        101,        1,    11240, 0x5112a0a3, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        102,        102,        1,    11483, 0xaf10f4fa, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        103,        103,        1,    11680, 0x44a25971, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        104,        104,        1,    11785, 0x7350b5db, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        105,        105,        1,    11436, 0xe3170ad5, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        106,        106,        1,    11928, 0x13d8c885, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        107,        107,        1,    11932, 0xecb5bdf7, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        108,        108,        1,    12281, 0x18bb76d5, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        109,        109,        1,    12334, 0x16147fc3, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        110,        110,        1,    12452, 0x61a8b3d7, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        111,        111,        1,    12695, 0x8b703e74, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        112,        112,        1,    12668, 0x19505176, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        113,        113,        1,    12957, 0x3b839f0d, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        114,        114,        1,    13054, 0xb8a5e3db, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        115,        115,        1,    13147, 0xdf5c2e68, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        116,        116,        1,    13171, 0x15961ca2, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        117,        117,        1,    13198, 0xfd855718, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        118,        118,        1,    13211, 0x1a625e31, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        119,        119,        1,    13210, 0x246661c9, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        120,        120,        1,    13467, 0xfcaaa461, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        121,        121,        1,    13665, 0x8100dbf2, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        122,        122,        1,    13692, 0xddd1eab9, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        123,        123,        1,    13821, 0xc70e2af0, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        124,        124,        1,    13946, 0xe15d9134, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        125,        125,        1,    14063, 0xf652d232, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        126,        126,        1,    14124, 0x756ccc81, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        127,        127,        1,    14331, 0x56d64fe8, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        128,        128,        1,    14469, 0x4c3faa7f, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        129,        129,        1,    14536, 0xad02a19b, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        130,        130,        1,    14608, 0x0971d168, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        131,        131,        1,    14898, 0x1a6827b3, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        132,        132,        1,    14978, 0xf9709fef, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        133,        133,        1,    15142, 0x3598da63, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        134,        134,        1,    15129, 0x062fb976, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        135,        135,        1,    15243, 0x0a6a12f9, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        136,        136,        1,    15337, 0x0f9a65d6, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        137,        137,        1,    15638, 0xf7bc9ef5, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        138,        138,        1,    15912, 0x2d5b26bb, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        139,        139,        1,    16041, 0xbfaf4857, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        140,        140,        1,    16228, 0xdac701f0, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        141,        141,        1,    16262, 0xcd0ae5e4, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        142,        142,        1,    16371, 0x9d4f0e73, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        143,        143,        1,    16661, 0xd37ba990, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        144,        144,        1,    16917, 0xd5b01774, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        145,        145,        1,    17149, 0x435ecdd4, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        146,        146,        1,    17172, 0x045fb234, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        147,        147,        1,    17315, 0xc5ddadab, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        148,        148,        1,    17397, 0xff8e15b6, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        149,        149,        1,    17431, 0x6832f8c0, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        150,        150,        1,    17576, 0x5c2a5445, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        151,        151,        1,    17764, 0x609f8c3b, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        152,        152,        1,    17826, 0x538c8532, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        153,        153,        1,    17918, 0x84fc9a95, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        154,        154,        1,    17823, 0x788fbada, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        155,        155,        1,    18142, 0x56881e47, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        156,        156,        1,    18257, 0xa35b86cf, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        157,        157,        1,    18337, 0x82ddbc21, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        158,        158,        1,    18293, 0xf0d838d6, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        159,        159,        1,    18418, 0x7ed8bba6, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        160,        160,        1,    18607, 0xccea47f6, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        161,        161,        1,    18916, 0x880ebd63, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        162,        162,        1,    19073, 0x055f02e3, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        163,        163,        1,    19168, 0xcc2c02d7, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        164,        164,        1,    19210, 0xa538ffc1, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        165,        165,        1,    19398, 0x4777644d, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        166,        166,        1,    19480, 0xcb2aa0fa, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        167,        167,        1,    19659, 0xe3c1122d, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        168,        168,        1,    19672, 0x1d1e193f, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        169,        169,        1,    19936, 0xcd036346, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        170,        170,        1,    19975, 0x96529b21, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        171,        171,        1,    20021, 0xcdaf8bb5, S=2,        1, 0x00010001,     1024, 0xecb30526
-0,        172,        172,        1,    20060, 0x1cea7784, S=2,        1, 0x00010001,     1024, 0xecb30526
+0,          0,          0,        1,     1297, 0x5618fe71
+0,          1,          1,        1,      221, 0x230c4e66, F=0x0
+0,          2,          2,        1,      139, 0xf87a2b65, F=0x0
+0,          3,          3,        1,      392, 0x7794a5e6, F=0x0
+0,          4,          4,        1,      389, 0xf856a3f1, F=0x0
+0,          5,          5,        1,      438, 0x0f95b4fb, F=0x0
+0,          6,          6,        1,      526, 0x7885d860, F=0x0
+0,          7,          7,        1,      543, 0x859fe7df, F=0x0
+0,          8,          8,        1,      446, 0xb34dba74, F=0x0
+0,          9,          9,        1,      931, 0x1c2fa48d, F=0x0
+0,         10,         10,        1,      702, 0x1ce74b4d, F=0x0
+0,         11,         11,        1,     1202, 0x2f232db8, F=0x0
+0,         12,         12,        1,     1299, 0x904e56af, F=0x0
+0,         13,         13,        1,     1253, 0x20803653, F=0x0
+0,         14,         14,        1,     1338, 0x24e052f6, F=0x0
+0,         15,         15,        1,     1284, 0x590a3a42, F=0x0
+0,         16,         16,        1,     1483, 0x93e09883, F=0x0
+0,         17,         17,        1,     1792, 0x70984bdf, F=0x0
+0,         18,         18,        1,     1683, 0x23b9fc30, F=0x0
+0,         19,         19,        1,     1517, 0x0fc1acf6, F=0x0
+0,         20,         20,        1,     1713, 0x6b4c1861, F=0x0
+0,         21,         21,        1,     1753, 0x896b20c1, F=0x0
+0,         22,         22,        1,     1650, 0x2f26027f, F=0x0
+0,         23,         23,        1,     1726, 0x9f731df0, F=0x0
+0,         24,         24,        1,     1908, 0xebd67b41, F=0x0
+0,         25,         25,        1,     1815, 0xf2685278, F=0x0
+0,         26,         26,        1,     1923, 0xbe18821e, F=0x0
+0,         27,         27,        1,     2108, 0x1318d306, F=0x0
+0,         28,         28,        1,     2708, 0x5e28f23c, F=0x0
+0,         29,         29,        1,     2681, 0xb96da855, F=0x0
+0,         30,         30,        1,     2903, 0x61cf4983, F=0x0
+0,         31,         31,        1,     3265, 0x95a1d25d, F=0x0
+0,         32,         32,        1,     3187, 0x6a67c299, F=0x0
+0,         33,         33,        1,     3304, 0x1c5ce63e, F=0x0
+0,         34,         34,        1,     3608, 0x2ed2770f, F=0x0
+0,         35,         35,        1,     3707, 0xa075a17b, F=0x0
+0,         36,         36,        1,     3822, 0xc842d6b6, F=0x0
+0,         37,         37,        1,     3635, 0x814680c7, F=0x0
+0,         38,         38,        1,     2958, 0x64de62cf, F=0x0
+0,         39,         39,        1,     3094, 0x2c338e84, F=0x0
+0,         40,         40,        1,     3102, 0xec5e8757, F=0x0
+0,         41,         41,        1,     3464, 0xea4f2bce, F=0x0
+0,         42,         42,        1,     4116, 0xded05ff3, F=0x0
+0,         43,         43,        1,     4225, 0x21825a2c, F=0x0
+0,         44,         44,        1,     3621, 0xa3125130, F=0x0
+0,         45,         45,        1,     3918, 0x35e0e876, F=0x0
+0,         46,         46,        1,     4469, 0xa408e1f7, F=0x0
+0,         47,         47,        1,     4601, 0x6f7d3081, F=0x0
+0,         48,         48,        1,     4830, 0x023e9cab, F=0x0
+0,         49,         49,        1,     5406, 0xf67ac12c, F=0x0
+0,         50,         50,        1,     5274, 0xf14e9768, F=0x0
+0,         51,         51,        1,     5424, 0x2af8e2a7, F=0x0
+0,         52,         52,        1,     5527, 0x80f70710, F=0x0
+0,         53,         53,        1,     5709, 0x89975c2f, F=0x0
+0,         54,         54,        1,     6168, 0xf5a542af, F=0x0
+0,         55,         55,        1,     6241, 0x091651ad, F=0x0
+0,         56,         56,        1,     5919, 0x32a7c5a2, F=0x0
+0,         57,         57,        1,     6005, 0x88adf6be, F=0x0
+0,         58,         58,        1,     5954, 0x3f92f18d, F=0x0
+0,         59,         59,        1,     6476, 0x9d90d099, F=0x0
+0,         60,         60,        1,     6745, 0xdf4f3cb1, F=0x0
+0,         61,         61,        1,     6283, 0x6e7e8f63, F=0x0
+0,         62,         62,        1,     6649, 0x5c1f366c, F=0x0
+0,         63,         63,        1,     6386, 0xa09389c3, F=0x0
+0,         64,         64,        1,     6265, 0xe5b85888, F=0x0
+0,         65,         65,        1,     6916, 0x445e734f, F=0x0
+0,         66,         66,        1,     7238, 0x30a52626, F=0x0
+0,         67,         67,        1,     7564, 0x1ddaa7cc, F=0x0
+0,         68,         68,        1,     7421, 0x118e7850, F=0x0
+0,         69,         69,        1,     7484, 0x5459acd8, F=0x0
+0,         70,         70,        1,     7604, 0x66f9e6a5, F=0x0
+0,         71,         71,        1,     7764, 0x862c0630, F=0x0
+0,         72,         72,        1,     8023, 0x06e16db8, F=0x0
+0,         73,         73,        1,     8136, 0xce28c084, F=0x0
+0,         74,         74,        1,     8109, 0x61c4aef6, F=0x0
+0,         75,         75,        1,     7871, 0x631d4073, F=0x0
+0,         76,         76,        1,     7968, 0x3b7692f0, F=0x0
+0,         77,         77,        1,     8246, 0x531bebc2, F=0x0
+0,         78,         78,        1,     8329, 0xe1fd48c9, F=0x0
+0,         79,         79,        1,     8570, 0x996fc156, F=0x0
+0,         80,         80,        1,     8754, 0x99fb0c5a, F=0x0
+0,         81,         81,        1,     8586, 0xf3eba620, F=0x0
+0,         82,         82,        1,     8886, 0xa08258a9, F=0x0
+0,         83,         83,        1,     9085, 0x229359d3, F=0x0
+0,         84,         84,        1,     9318, 0x3baaf9b9, F=0x0
+0,         85,         85,        1,     9402, 0xe27a4804, F=0x0
+0,         86,         86,        1,     9169, 0x23fd9a49, F=0x0
+0,         87,         87,        1,     9470, 0xe389379c, F=0x0
+0,         88,         88,        1,     9658, 0x6a409434, F=0x0
+0,         89,         89,        1,     9709, 0x42969ccd, F=0x0
+0,         90,         90,        1,     9531, 0x473139f3, F=0x0
+0,         91,         91,        1,     9899, 0xb85e0cd0, F=0x0
+0,         92,         92,        1,    10013, 0xda4d4db8, F=0x0
+0,         93,         93,        1,    10046, 0x0cc12baa, F=0x0
+0,         94,         94,        1,    10094, 0x22b6451c, F=0x0
+0,         95,         95,        1,    10446, 0x03a604aa, F=0x0
+0,         96,         96,        1,    10591, 0x3c8d599a, F=0x0
+0,         97,         97,        1,    10589, 0x3936826f, F=0x0
+0,         98,         98,        1,    10815, 0xb795dc11, F=0x0
+0,         99,         99,        1,    11119, 0x506c5187, F=0x0
+0,        100,        100,        1,    11202, 0x92856baf, F=0x0
+0,        101,        101,        1,    11248, 0xdf29a1db, F=0x0
+0,        102,        102,        1,    11491, 0x656df632, F=0x0
+0,        103,        103,        1,    11688, 0xeb175aa9, F=0x0
+0,        104,        104,        1,    11793, 0x99ccb713, F=0x0
+0,        105,        105,        1,    11444, 0x602c0c0d, F=0x0
+0,        106,        106,        1,    11936, 0xe89cc9bd, F=0x0
+0,        107,        107,        1,    11940, 0xc668bf2f, F=0x0
+0,        108,        108,        1,    12289, 0x9bd5780d, F=0x0
+0,        109,        109,        1,    12342, 0xd9c680fb, F=0x0
+0,        110,        110,        1,    12460, 0xb539b50f, F=0x0
+0,        111,        111,        1,    12703, 0x07473fac, F=0x0
+0,        112,        112,        1,    12676, 0x743052ae, F=0x0
+0,        113,        113,        1,    12965, 0xf6aaa045, F=0x0
+0,        114,        114,        1,    13062, 0xea13e513, F=0x0
+0,        115,        115,        1,    13155, 0x82312fa0, F=0x0
+0,        116,        116,        1,    13179, 0xd59c1dda, F=0x0
+0,        117,        117,        1,    13206, 0xde825850, F=0x0
+0,        118,        118,        1,    13219, 0x0b375f69, F=0x0
+0,        119,        119,        1,    13218, 0x14036301, F=0x0
+0,        120,        120,        1,    13475, 0x259da599, F=0x0
+0,        121,        121,        1,    13673, 0x9b43dd2a, F=0x0
+0,        122,        122,        1,    13700, 0x190bebf1, F=0x0
+0,        123,        123,        1,    13829, 0x9f802c28, F=0x0
+0,        124,        124,        1,    13954, 0x5236926c, F=0x0
+0,        125,        125,        1,    14071, 0xf5c3d36a, F=0x0
+0,        126,        126,        1,    14132, 0xbf35cdb9, F=0x0
+0,        127,        127,        1,    14339, 0x9cf65120, F=0x0
+0,        128,        128,        1,    14477, 0x3a9eabb7, F=0x0
+0,        129,        129,        1,    14544, 0xed09a2d3, F=0x0
+0,        130,        130,        1,    14616, 0xa138d2a0, F=0x0
+0,        131,        131,        1,    14906, 0x13bd28eb, F=0x0
+0,        132,        132,        1,    14986, 0x5454a127, F=0x0
+0,        133,        133,        1,    15150, 0x585cdb9b, F=0x0
+0,        134,        134,        1,    15137, 0x191bbaae, F=0x0
+0,        135,        135,        1,    15251, 0xa8461431, F=0x0
+0,        136,        136,        1,    15345, 0x2015670e, F=0x0
+0,        137,        137,        1,    15646, 0x772da02d, F=0x0
+0,        138,        138,        1,    15920, 0xfabc27f3, F=0x0
+0,        139,        139,        1,    16049, 0x2a66498f, F=0x0
+0,        140,        140,        1,    16236, 0x29750328, F=0x0
+0,        141,        141,        1,    16270, 0x4528e71c, F=0x0
+0,        142,        142,        1,    16379, 0x9a450fab, F=0x0
+0,        143,        143,        1,    16669, 0x31ffaac8, F=0x0
+0,        144,        144,        1,    16925, 0x6c4318ac, F=0x0
+0,        145,        145,        1,    17157, 0xf4b1cf0c, F=0x0
+0,        146,        146,        1,    17180, 0xd1bab36c, F=0x0
+0,        147,        147,        1,    17323, 0x419eaee3, F=0x0
+0,        148,        148,        1,    17405, 0xdf3f16ee, F=0x0
+0,        149,        149,        1,    17439, 0x7153f9f8, F=0x0
+0,        150,        150,        1,    17584, 0x1612557d, F=0x0
+0,        151,        151,        1,    17772, 0xffa78d73, F=0x0
+0,        152,        152,        1,    17834, 0x3e33866a, F=0x0
+0,        153,        153,        1,    17926, 0xdfc39bcd, F=0x0
+0,        154,        154,        1,    17831, 0x5f8ebc12, F=0x0
+0,        155,        155,        1,    18150, 0xc25e1f7f, F=0x0
+0,        156,        156,        1,    18265, 0x9b688807, F=0x0
+0,        157,        157,        1,    18345, 0xdc6abd59, F=0x0
+0,        158,        158,        1,    18301, 0x14d43a0e, F=0x0
+0,        159,        159,        1,    18426, 0x3b2cbcde, F=0x0
+0,        160,        160,        1,    18615, 0x6fa5492e, F=0x0
+0,        161,        161,        1,    18924, 0xa370be9b, F=0x0
+0,        162,        162,        1,    19081, 0xe019041b, F=0x0
+0,        163,        163,        1,    19176, 0x1acc040f, F=0x0
+0,        164,        164,        1,    19218, 0x27080108, F=0x0
+0,        165,        165,        1,    19406, 0xae676585, F=0x0
+0,        166,        166,        1,    19488, 0x9619a232, F=0x0
+0,        167,        167,        1,    19667, 0x88e71365, F=0x0
+0,        168,        168,        1,    19680, 0xd20d1a77, F=0x0
+0,        169,        169,        1,    19944, 0xc3d0647e, F=0x0
+0,        170,        170,        1,    19983, 0xbca79c59, F=0x0
+0,        171,        171,        1,    20029, 0x2c238ced, F=0x0
+0,        172,        172,        1,    20068, 0xaad778bc, F=0x0
diff --git a/tests/ref/fate/gifenc-rgb8 b/tests/ref/fate/gifenc-rgb8
index 40e7a39b60e22..c5ff394d993ce 100644
--- a/tests/ref/fate/gifenc-rgb8
+++ b/tests/ref/fate/gifenc-rgb8
@@ -3,176 +3,176 @@
 #codec_id 0: gif
 #dimensions 0: 217x217
 #sar 0: 0/1
-0,          0,          0,        1,      552, 0x47602c6c, S=1,        1, 0x00010001
-0,          1,          1,        1,      297, 0x49dd8847, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,          2,          2,        1,      438, 0x4776d352, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,          3,          3,        1,      450, 0x2254d187, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,          4,          4,        1,      547, 0xe16104bc, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,          5,          5,        1,      614, 0x0fdc2027, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,          6,          6,        1,      642, 0xa0af1edf, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,          7,          7,        1,      660, 0xd0763931, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,          8,          8,        1,      821, 0xc38f7fac, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,          9,          9,        1,     1157, 0x4c112ecd, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         10,         10,        1,      179, 0x0690541c, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         11,         11,        1,     1333, 0x216f70a7, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         12,         12,        1,     1638, 0x901c093d, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         13,         13,        1,     1531, 0xc9bae5ff, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         14,         14,        1,     1720, 0xce854743, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         15,         15,        1,     1910, 0x2690866d, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         16,         16,        1,     2124, 0xa586dad0, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         17,         17,        1,     2248, 0x9ddc2a88, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         18,         18,        1,     2311, 0xd64235af, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         19,         19,        1,     2408, 0xe2a66cc9, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         20,         20,        1,     2601, 0xeab6c267, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         21,         21,        1,     2687, 0xfe1d0311, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         22,         22,        1,     2784, 0xca600dee, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         23,         23,        1,     2884, 0xc7134b99, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         24,         24,        1,     2982, 0x0b1e7825, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         25,         25,        1,     3101, 0x3e029e0e, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         26,         26,        1,     3253, 0x846af678, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         27,         27,        1,     3329, 0x29a81b71, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         28,         28,        1,     3572, 0xa3e08a52, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         29,         29,        1,     3807, 0x18e1fed2, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         30,         30,        1,     2750, 0xff6e1f9e, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         31,         31,        1,     4031, 0x6d4f7329, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         32,         32,        1,     3025, 0xb43c9e94, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         33,         33,        1,     4295, 0xc1850a80, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         34,         34,        1,     2044, 0x0440c072, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         35,         35,        1,     3212, 0xe91af08f, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         36,         36,        1,     2292, 0x6765633e, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         37,         37,        1,     3633, 0xac779aa3, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         38,         38,        1,     3552, 0xed2c75b2, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         39,         39,        1,     3690, 0x2020dd0d, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         40,         40,        1,     1559, 0x596ef330, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         41,         41,        1,      954, 0xac12c9c5, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         42,         42,        1,      273, 0x138c7831, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         43,         43,        1,      930, 0xf1c3ae3f, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         44,         44,        1,      271, 0x921a80af, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         45,         45,        1,      196, 0xa5de5322, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         46,         46,        1,     4299, 0x5bac0d86, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         47,         47,        1,     4895, 0xc43639a6, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         48,         48,        1,     4928, 0xf17d13e8, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         49,         49,        1,     4941, 0x71915520, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         50,         50,        1,     4154, 0xc860b8a6, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         51,         51,        1,     4678, 0x2651c339, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         52,         52,        1,     4741, 0xffd6bb45, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         53,         53,        1,     4982, 0x132c5977, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         54,         54,        1,     5179, 0x97aac3a1, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         55,         55,        1,     5046, 0x836a80cd, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         56,         56,        1,     5140, 0xa725c1e7, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         57,         57,        1,     4301, 0x0203f239, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         58,         58,        1,     5079, 0xb2e7a2de, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         59,         59,        1,     5284, 0xb757dfe1, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         60,         60,        1,     5426, 0xf9f11e57, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         61,         61,        1,     4645, 0xf0f289e1, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         62,         62,        1,     5263, 0x8617d7e9, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         63,         63,        1,     5221, 0x26e3ca43, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         64,         64,        1,     5217, 0x90989cfb, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         65,         65,        1,     5395, 0xe29a01cb, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         66,         66,        1,     5220, 0xe2dee355, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         67,         67,        1,     5704, 0xcfbcd55e, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         68,         68,        1,     5636, 0x7fc2a1e5, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         69,         69,        1,     5818, 0x6090ebbd, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         70,         70,        1,     5763, 0xc110c791, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         71,         71,        1,     6116, 0xb4ee8e30, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         72,         72,        1,     6069, 0x21b263db, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         73,         73,        1,     5796, 0x2514df52, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         74,         74,        1,     5999, 0x1c3c3701, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         75,         75,        1,     6220, 0x8340b150, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         76,         76,        1,     6374, 0x00d8eaa5, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         77,         77,        1,     6465, 0x74c4778a, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         78,         78,        1,     7019, 0xdb1a28a3, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         79,         79,        1,     7255, 0x1e19b76e, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         80,         80,        1,     8197, 0x26bc6a79, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         81,         81,        1,     8358, 0x118781e0, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         82,         82,        1,     7708, 0xfc0c963d, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         83,         83,        1,     7412, 0xdcc311ee, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         84,         84,        1,     7541, 0x4d2819c1, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         85,         85,        1,     7948, 0xf12eca3d, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         86,         86,        1,     8408, 0x43add468, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         87,         87,        1,     8056, 0x2d162377, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         88,         88,        1,     7401, 0x26ebb649, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         89,         89,        1,     7494, 0x35fcf9ae, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         90,         90,        1,     7806, 0x4238723d, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         91,         91,        1,     7768, 0xb01e795a, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         92,         92,        1,     7749, 0x6ab39c12, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         93,         93,        1,     8047, 0x0e5f24aa, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         94,         94,        1,     7618, 0xd787340f, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         95,         95,        1,     7979, 0x0824c4df, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         96,         96,        1,    12062, 0xc46d9d92, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         97,         97,        1,    12317, 0x1314dc0c, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         98,         98,        1,    12217, 0x78c2ed30, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,         99,         99,        1,    11227, 0x2a578eb9, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        100,        100,        1,    11108, 0x4eaa068c, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        101,        101,        1,    11366, 0x48f8993f, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        102,        102,        1,    11896, 0x32414841, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        103,        103,        1,    11479, 0xeaa38225, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        104,        104,        1,    13395, 0xaa9d4c72, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        105,        105,        1,    12913, 0x28854353, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        106,        106,        1,    13864, 0x663df630, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        107,        107,        1,    13551, 0xf7ba7be7, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        108,        108,        1,    14041, 0x2dc071b9, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        109,        109,        1,    14144, 0x33a03d1d, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        110,        110,        1,    14277, 0x6bda5935, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        111,        111,        1,    14424, 0xa696efd8, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        112,        112,        1,    14689, 0x8e3ad12c, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        113,        113,        1,    14598, 0x544668b4, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        114,        114,        1,    15213, 0x60009558, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        115,        115,        1,    15425, 0x86e5adf4, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        116,        116,        1,    15595, 0x878d09b9, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        117,        117,        1,    15598, 0x10daabc4, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        118,        118,        1,    15863, 0x2462016c, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        119,        119,        1,    15717, 0xe05041c4, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        120,        120,        1,    16078, 0x7c8f3a8c, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        121,        121,        1,    16225, 0x9771a52e, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        122,        122,        1,    16135, 0x2dfc1692, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        123,        123,        1,    16661, 0x09c96d7e, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        124,        124,        1,    16619, 0xc4735b56, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        125,        125,        1,    16829, 0x589dc13f, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        126,        126,        1,    16944, 0x997cd18f, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        127,        127,        1,    17119, 0x6c396b60, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        128,        128,        1,    17150, 0x8e603d31, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        129,        129,        1,    17321, 0x0bbcee5a, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        130,        130,        1,    17395, 0x99f0c974, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        131,        131,        1,    17666, 0x37184223, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        132,        132,        1,    17730, 0xa0d385b3, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        133,        133,        1,    17934, 0xb22cc97d, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        134,        134,        1,    17944, 0x0cd309c6, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        135,        135,        1,    18238, 0x6b7e3237, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        136,        136,        1,    18391, 0x4df3c48a, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        137,        137,        1,    18543, 0x90a2f238, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        138,        138,        1,    18939, 0xc57dda5b, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        139,        139,        1,    19145, 0x1267294a, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        140,        140,        1,    19120, 0xeac6a9c3, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        141,        141,        1,    19130, 0x31f3edbc, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        142,        142,        1,    19494, 0x3259a2f3, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        143,        143,        1,    19534, 0xda22a752, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        144,        144,        1,    19747, 0x8805c379, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        145,        145,        1,    20114, 0xaaf96864, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        146,        146,        1,    20257, 0x7223da26, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        147,        147,        1,    20370, 0x08ef382a, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        148,        148,        1,    20292, 0x4b47f207, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        149,        149,        1,    20491, 0xeedd6d1c, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        150,        150,        1,    20647, 0xb0d1dd45, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        151,        151,        1,    20666, 0x382cc8a4, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        152,        152,        1,    21007, 0x398f4f7d, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        153,        153,        1,    21058, 0xd6616a9d, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        154,        154,        1,    21153, 0x988749db, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        155,        155,        1,    21078, 0x1b328059, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        156,        156,        1,    21458, 0x6348529c, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        157,        157,        1,    21669, 0xcf63e2de, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        158,        158,        1,    21581, 0x1fc021af, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        159,        159,        1,    21654, 0x899dab18, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        160,        160,        1,    21987, 0x634086fe, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        161,        161,        1,    22205, 0x617a7335, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        162,        162,        1,    22475, 0x9fa2e01c, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        163,        163,        1,    22490, 0x7dc5376c, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        164,        164,        1,    22460, 0x33e6bbfe, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        165,        165,        1,    22861, 0x18993510, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        166,        166,        1,    22746, 0xdff85615, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        167,        167,        1,    23165, 0xf0ac66a3, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        168,        168,        1,    23273, 0x13869ad9, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        169,        169,        1,    23211, 0xd30b6205, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        170,        170,        1,    23648, 0xa0cef01b, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        171,        171,        1,    23675, 0x760460b9, S=2,        1, 0x00010001,     1024, 0xcfc8799f
-0,        172,        172,        1,    23874, 0xacf998c5, S=2,        1, 0x00010001,     1024, 0xcfc8799f
+0,          0,          0,        1,     1341, 0xaa85adb1
+0,          1,          1,        1,      305, 0xa970896f, F=0x0
+0,          2,          2,        1,      446, 0x4a20d47a, F=0x0
+0,          3,          3,        1,      458, 0x32ded2af, F=0x0
+0,          4,          4,        1,      555, 0x622205e4, F=0x0
+0,          5,          5,        1,      622, 0xde06214f, F=0x0
+0,          6,          6,        1,      650, 0x8f482007, F=0x0
+0,          7,          7,        1,      668, 0xd3df3a59, F=0x0
+0,          8,          8,        1,      829, 0x812f80d4, F=0x0
+0,          9,          9,        1,     1165, 0x8e402ff5, F=0x0
+0,         10,         10,        1,      187, 0xdda45544, F=0x0
+0,         11,         11,        1,     1341, 0x2f2d71cf, F=0x0
+0,         12,         12,        1,     1646, 0xfe910a65, F=0x0
+0,         13,         13,        1,     1539, 0xbc77e727, F=0x0
+0,         14,         14,        1,     1728, 0x9bd9486b, F=0x0
+0,         15,         15,        1,     1918, 0xcf948795, F=0x0
+0,         16,         16,        1,     2132, 0x4618dbf8, F=0x0
+0,         17,         17,        1,     2256, 0xcdce2bb0, F=0x0
+0,         18,         18,        1,     2319, 0x4f1b36d7, F=0x0
+0,         19,         19,        1,     2416, 0xcba76df1, F=0x0
+0,         20,         20,        1,     2609, 0xb2eec38f, F=0x0
+0,         21,         21,        1,     2695, 0x29d40439, F=0x0
+0,         22,         22,        1,     2792, 0x663f0f16, F=0x0
+0,         23,         23,        1,     2892, 0xd6924cc1, F=0x0
+0,         24,         24,        1,     2990, 0x8bed794d, F=0x0
+0,         25,         25,        1,     3109, 0x48789f36, F=0x0
+0,         26,         26,        1,     3261, 0x3eaff7a0, F=0x0
+0,         27,         27,        1,     3337, 0x3bcd1c99, F=0x0
+0,         28,         28,        1,     3580, 0xcf0c8b7a, F=0x0
+0,         29,         29,        1,     3815, 0x53d40009, F=0x0
+0,         30,         30,        1,     2758, 0x73fd20c6, F=0x0
+0,         31,         31,        1,     4039, 0xab517451, F=0x0
+0,         32,         32,        1,     3033, 0x66d29fbc, F=0x0
+0,         33,         33,        1,     4303, 0x30e50ba8, F=0x0
+0,         34,         34,        1,     2052, 0x4843c19a, F=0x0
+0,         35,         35,        1,     3220, 0x73f7f1b7, F=0x0
+0,         36,         36,        1,     2300, 0xca376466, F=0x0
+0,         37,         37,        1,     3641, 0x1e3a9bcb, F=0x0
+0,         38,         38,        1,     3560, 0x014776da, F=0x0
+0,         39,         39,        1,     3698, 0xd3bcde35, F=0x0
+0,         40,         40,        1,     1567, 0x6c8bf458, F=0x0
+0,         41,         41,        1,      962, 0x0389caed, F=0x0
+0,         42,         42,        1,      281, 0x575f7959, F=0x0
+0,         43,         43,        1,      938, 0x2d7aaf67, F=0x0
+0,         44,         44,        1,      279, 0xd39d81d7, F=0x0
+0,         45,         45,        1,      204, 0x90a9544a, F=0x0
+0,         46,         46,        1,     4307, 0xcf9d0eae, F=0x0
+0,         47,         47,        1,     4903, 0xe9743ace, F=0x0
+0,         48,         48,        1,     4936, 0x3cf21510, F=0x0
+0,         49,         49,        1,     4949, 0xcbff5648, F=0x0
+0,         50,         50,        1,     4162, 0x94a9b9ce, F=0x0
+0,         51,         51,        1,     4686, 0x5098c461, F=0x0
+0,         52,         52,        1,     4749, 0x7304bc6d, F=0x0
+0,         53,         53,        1,     4990, 0x9d025a9f, F=0x0
+0,         54,         54,        1,     5187, 0x0566c4c9, F=0x0
+0,         55,         55,        1,     5054, 0x574f81f5, F=0x0
+0,         56,         56,        1,     5148, 0xe7bac30f, F=0x0
+0,         57,         57,        1,     4309, 0x7844f361, F=0x0
+0,         58,         58,        1,     5087, 0xacf4a406, F=0x0
+0,         59,         59,        1,     5292, 0x9e7be109, F=0x0
+0,         60,         60,        1,     5434, 0x85541f7f, F=0x0
+0,         61,         61,        1,     4653, 0xf5118b09, F=0x0
+0,         62,         62,        1,     5271, 0x54f3d911, F=0x0
+0,         63,         63,        1,     5229, 0xc520cb6b, F=0x0
+0,         64,         64,        1,     5225, 0x2a449e23, F=0x0
+0,         65,         65,        1,     5403, 0x4a2502f3, F=0x0
+0,         66,         66,        1,     5228, 0x8002e47d, F=0x0
+0,         67,         67,        1,     5712, 0x9c9ed686, F=0x0
+0,         68,         68,        1,     5644, 0xfdf5a30d, F=0x0
+0,         69,         69,        1,     5826, 0xb142ece5, F=0x0
+0,         70,         70,        1,     5771, 0xd22ac8b9, F=0x0
+0,         71,         71,        1,     6124, 0x5e4e8f58, F=0x0
+0,         72,         72,        1,     6077, 0x94ab6503, F=0x0
+0,         73,         73,        1,     5804, 0x5c56e07a, F=0x0
+0,         74,         74,        1,     6007, 0x3e453829, F=0x0
+0,         75,         75,        1,     6228, 0xa4e0b278, F=0x0
+0,         76,         76,        1,     6382, 0xd488ebcd, F=0x0
+0,         77,         77,        1,     6473, 0xb1bb78b2, F=0x0
+0,         78,         78,        1,     7027, 0x98ce29cb, F=0x0
+0,         79,         79,        1,     7263, 0xecadb896, F=0x0
+0,         80,         80,        1,     8205, 0x36cb6ba1, F=0x0
+0,         81,         81,        1,     8366, 0xdbbe8308, F=0x0
+0,         82,         82,        1,     7716, 0xd6959765, F=0x0
+0,         83,         83,        1,     7420, 0x60fd1316, F=0x0
+0,         84,         84,        1,     7549, 0x668a1ae9, F=0x0
+0,         85,         85,        1,     7956, 0xe146cb65, F=0x0
+0,         86,         86,        1,     8416, 0x47c3d590, F=0x0
+0,         87,         87,        1,     8064, 0x9a0e249f, F=0x0
+0,         88,         88,        1,     7409, 0x9e5eb771, F=0x0
+0,         89,         89,        1,     7502, 0x1906fad6, F=0x0
+0,         90,         90,        1,     7814, 0x8e117365, F=0x0
+0,         91,         91,        1,     7776, 0xd0077a82, F=0x0
+0,         92,         92,        1,     7757, 0x74a49d3a, F=0x0
+0,         93,         93,        1,     8055, 0x70ef25d2, F=0x0
+0,         94,         94,        1,     7626, 0x4a003537, F=0x0
+0,         95,         95,        1,     7987, 0x1c14c607, F=0x0
+0,         96,         96,        1,    12070, 0x4a729eba, F=0x0
+0,         97,         97,        1,    12325, 0xbff1dd34, F=0x0
+0,         98,         98,        1,    12225, 0xb1ffee58, F=0x0
+0,         99,         99,        1,    11235, 0xea998fe1, F=0x0
+0,        100,        100,        1,    11116, 0x855407b4, F=0x0
+0,        101,        101,        1,    11374, 0xaa019a67, F=0x0
+0,        102,        102,        1,    11904, 0xf8384969, F=0x0
+0,        103,        103,        1,    11487, 0xce63834d, F=0x0
+0,        104,        104,        1,    13403, 0x36444d9a, F=0x0
+0,        105,        105,        1,    12921, 0x86af447b, F=0x0
+0,        106,        106,        1,    13872, 0x104af758, F=0x0
+0,        107,        107,        1,    13559, 0x37d07d0f, F=0x0
+0,        108,        108,        1,    14049, 0xa47572e1, F=0x0
+0,        109,        109,        1,    14152, 0x217c3e45, F=0x0
+0,        110,        110,        1,    14285, 0xf37e5a5d, F=0x0
+0,        111,        111,        1,    14432, 0xd841f100, F=0x0
+0,        112,        112,        1,    14697, 0xf25cd254, F=0x0
+0,        113,        113,        1,    14606, 0x4f3069dc, F=0x0
+0,        114,        114,        1,    15221, 0x222f9680, F=0x0
+0,        115,        115,        1,    15433, 0x3e43af1c, F=0x0
+0,        116,        116,        1,    15603, 0x038a0ae1, F=0x0
+0,        117,        117,        1,    15606, 0x9040acec, F=0x0
+0,        118,        118,        1,    15871, 0xd63f0294, F=0x0
+0,        119,        119,        1,    15725, 0xe95d42ec, F=0x0
+0,        120,        120,        1,    16086, 0x27223bb4, F=0x0
+0,        121,        121,        1,    16233, 0xebfca656, F=0x0
+0,        122,        122,        1,    16143, 0x1a7717ba, F=0x0
+0,        123,        123,        1,    16669, 0x56926ea6, F=0x0
+0,        124,        124,        1,    16627, 0xe0ac5c7e, F=0x0
+0,        125,        125,        1,    16837, 0x67b5c267, F=0x0
+0,        126,        126,        1,    16952, 0x2d9bd2b7, F=0x0
+0,        127,        127,        1,    17127, 0xcab06c88, F=0x0
+0,        128,        128,        1,    17158, 0x10be3e59, F=0x0
+0,        129,        129,        1,    17329, 0x53d2ef82, F=0x0
+0,        130,        130,        1,    17403, 0x37a5ca9c, F=0x0
+0,        131,        131,        1,    17674, 0x0e34434b, F=0x0
+0,        132,        132,        1,    17738, 0xc1ef86db, F=0x0
+0,        133,        133,        1,    17942, 0xbf37caa5, F=0x0
+0,        134,        134,        1,    17952, 0x256e0aee, F=0x0
+0,        135,        135,        1,    18246, 0xd818335f, F=0x0
+0,        136,        136,        1,    18399, 0x6b84c5b2, F=0x0
+0,        137,        137,        1,    18551, 0x5e02f360, F=0x0
+0,        138,        138,        1,    18947, 0x5cdbdb83, F=0x0
+0,        139,        139,        1,    19153, 0x97f52a72, F=0x0
+0,        140,        140,        1,    19128, 0x537baaeb, F=0x0
+0,        141,        141,        1,    19138, 0xa629eee4, F=0x0
+0,        142,        142,        1,    19502, 0x4b8da41b, F=0x0
+0,        143,        143,        1,    19542, 0x21a5a87a, F=0x0
+0,        144,        144,        1,    19755, 0xc5d0c4a1, F=0x0
+0,        145,        145,        1,    20122, 0x913a698c, F=0x0
+0,        146,        146,        1,    20265, 0xfdbcdb4e, F=0x0
+0,        147,        147,        1,    20378, 0x173f3952, F=0x0
+0,        148,        148,        1,    20300, 0xff58f32f, F=0x0
+0,        149,        149,        1,    20499, 0x89246e44, F=0x0
+0,        150,        150,        1,    20655, 0xff78de6d, F=0x0
+0,        151,        151,        1,    20674, 0x9ccbc9cc, F=0x0
+0,        152,        152,        1,    21015, 0x289450a5, F=0x0
+0,        153,        153,        1,    21066, 0x006d6bc5, F=0x0
+0,        154,        154,        1,    21161, 0x306b4b03, F=0x0
+0,        155,        155,        1,    21086, 0x5c4f8181, F=0x0
+0,        156,        156,        1,    21466, 0x5be353c4, F=0x0
+0,        157,        157,        1,    21677, 0xbc05e406, F=0x0
+0,        158,        158,        1,    21589, 0xa69322d7, F=0x0
+0,        159,        159,        1,    21662, 0x64e7ac40, F=0x0
+0,        160,        160,        1,    21995, 0xbfa18826, F=0x0
+0,        161,        161,        1,    22213, 0xb9fa745d, F=0x0
+0,        162,        162,        1,    22483, 0x3070e144, F=0x0
+0,        163,        163,        1,    22498, 0x1feb3894, F=0x0
+0,        164,        164,        1,    22468, 0xb34dbd26, F=0x0
+0,        165,        165,        1,    22869, 0x67c63638, F=0x0
+0,        166,        166,        1,    22754, 0xaa2d573d, F=0x0
+0,        167,        167,        1,    23173, 0x9f7767cb, F=0x0
+0,        168,        168,        1,    23281, 0x3f319c01, F=0x0
+0,        169,        169,        1,    23219, 0xb706632d, F=0x0
+0,        170,        170,        1,    23656, 0x7e2ff143, F=0x0
+0,        171,        171,        1,    23683, 0x729d61e1, F=0x0
+0,        172,        172,        1,    23882, 0x8fb999ed, F=0x0
diff --git a/tests/ref/fate/h264-timecode b/tests/ref/fate/h264-timecode
new file mode 100644
index 0000000000000..b78f700c6d298
--- /dev/null
+++ b/tests/ref/fate/h264-timecode
@@ -0,0 +1,305 @@
+#tb 0: 1/30
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 352x288
+#sar 0: 128/117
+0,          0,          0,        1,   152064, 0x70684c80
+0,          1,          1,        1,   152064, 0xb5c8b300
+0,          2,          2,        1,   152064, 0x5777ac60
+0,          3,          3,        1,   152064, 0xb27646a5
+0,          4,          4,        1,   152064, 0x20bd98ec
+0,          5,          5,        1,   152064, 0xcf5ac1b0
+0,          6,          6,        1,   152064, 0x85a42952
+0,          7,          7,        1,   152064, 0xc25aa530
+0,          8,          8,        1,   152064, 0x97b14be9
+0,          9,          9,        1,   152064, 0xf67ec91a
+0,         10,         10,        1,   152064, 0x3890d6a3
+0,         11,         11,        1,   152064, 0xc52c8467
+0,         12,         12,        1,   152064, 0x30a7af36
+0,         13,         13,        1,   152064, 0x27528a98
+0,         14,         14,        1,   152064, 0x245c08c5
+0,         15,         15,        1,   152064, 0x7e0220f3
+0,         16,         16,        1,   152064, 0x4b254c89
+0,         17,         17,        1,   152064, 0x1586e3e5
+0,         18,         18,        1,   152064, 0x594dfc58
+0,         19,         19,        1,   152064, 0x85ba9c8e
+0,         20,         20,        1,   152064, 0x1e235100
+0,         21,         21,        1,   152064, 0xa02c6a72
+0,         22,         22,        1,   152064, 0xd1166fb6
+0,         23,         23,        1,   152064, 0xcc9b1546
+0,         24,         24,        1,   152064, 0x55e35a35
+0,         25,         25,        1,   152064, 0xea63e2ae
+0,         26,         26,        1,   152064, 0x936a1802
+0,         27,         27,        1,   152064, 0x354a749c
+0,         28,         28,        1,   152064, 0x5cd0f246
+0,         29,         29,        1,   152064, 0x0376e69b
+0,         30,         30,        1,   152064, 0x5af5fb61
+0,         31,         31,        1,   152064, 0x9a053ab8
+0,         32,         32,        1,   152064, 0x57cbbfcc
+0,         33,         33,        1,   152064, 0x81f19e93
+0,         34,         34,        1,   152064, 0x0812953d
+0,         35,         35,        1,   152064, 0x0ae2a166
+0,         36,         36,        1,   152064, 0x193125b8
+0,         37,         37,        1,   152064, 0xab7eca7b
+0,         38,         38,        1,   152064, 0x91ff1870
+0,         39,         39,        1,   152064, 0x8f522dde
+0,         40,         40,        1,   152064, 0x98faab46
+0,         41,         41,        1,   152064, 0xa2119231
+0,         42,         42,        1,   152064, 0xfe591321
+0,         43,         43,        1,   152064, 0x6c8a1bf5
+0,         44,         44,        1,   152064, 0x857c925c
+0,         45,         45,        1,   152064, 0xe81a77f2
+0,         46,         46,        1,   152064, 0x08234e83
+0,         47,         47,        1,   152064, 0x76cb39f6
+0,         48,         48,        1,   152064, 0x26168d25
+0,         49,         49,        1,   152064, 0x4dd3b273
+0,         50,         50,        1,   152064, 0xd6e8398e
+0,         51,         51,        1,   152064, 0x55986a57
+0,         52,         52,        1,   152064, 0x9c2768fb
+0,         53,         53,        1,   152064, 0x03517efe
+0,         54,         54,        1,   152064, 0x3a48451f
+0,         55,         55,        1,   152064, 0x1f6d6b87
+0,         56,         56,        1,   152064, 0x0917fb2a
+0,         57,         57,        1,   152064, 0x0f49e7a9
+0,         58,         58,        1,   152064, 0x3c56d4e1
+0,         59,         59,        1,   152064, 0x487cca35
+0,         60,         60,        1,   152064, 0x5c6b8b1c
+0,         61,         61,        1,   152064, 0x767d8a34
+0,         62,         62,        1,   152064, 0xcd8d692a
+0,         63,         63,        1,   152064, 0x788b3ebf
+0,         64,         64,        1,   152064, 0x4cae3852
+0,         65,         65,        1,   152064, 0x1150f0aa
+0,         66,         66,        1,   152064, 0x9d4b3366
+0,         67,         67,        1,   152064, 0xedcb8863
+0,         68,         68,        1,   152064, 0x2c09ca8c
+0,         69,         69,        1,   152064, 0x20930842
+0,         70,         70,        1,   152064, 0xd653b16f
+0,         71,         71,        1,   152064, 0x41f38d77
+0,         72,         72,        1,   152064, 0xa5f69360
+0,         73,         73,        1,   152064, 0xf0f5ce27
+0,         74,         74,        1,   152064, 0xf2a6246c
+0,         75,         75,        1,   152064, 0x7e76fabc
+0,         76,         76,        1,   152064, 0xf76e1982
+0,         77,         77,        1,   152064, 0x40c1be5a
+0,         78,         78,        1,   152064, 0x132ca50e
+0,         79,         79,        1,   152064, 0xae0c69ed
+0,         80,         80,        1,   152064, 0x5f775778
+0,         81,         81,        1,   152064, 0x62bb9790
+0,         82,         82,        1,   152064, 0x8b448e83
+0,         83,         83,        1,   152064, 0xcc35d9fe
+0,         84,         84,        1,   152064, 0x51560127
+0,         85,         85,        1,   152064, 0xb915829b
+0,         86,         86,        1,   152064, 0x3a3f2b0c
+0,         87,         87,        1,   152064, 0x4e2d2260
+0,         88,         88,        1,   152064, 0x9fdb7567
+0,         89,         89,        1,   152064, 0xe34b2f4e
+0,         90,         90,        1,   152064, 0x8650ec13
+0,         91,         91,        1,   152064, 0xdff3e299
+0,         92,         92,        1,   152064, 0x100f8f0c
+0,         93,         93,        1,   152064, 0xa9aff101
+0,         94,         94,        1,   152064, 0xa80add4c
+0,         95,         95,        1,   152064, 0xa7994880
+0,         96,         96,        1,   152064, 0xc74ecb79
+0,         97,         97,        1,   152064, 0xbada663d
+0,         98,         98,        1,   152064, 0xff7f0592
+0,         99,         99,        1,   152064, 0x44731be5
+0,        100,        100,        1,   152064, 0x1a61f9ac
+0,        101,        101,        1,   152064, 0x848ace19
+0,        102,        102,        1,   152064, 0x22858567
+0,        103,        103,        1,   152064, 0x2b3a9ba7
+0,        104,        104,        1,   152064, 0x02889774
+0,        105,        105,        1,   152064, 0x29a54516
+0,        106,        106,        1,   152064, 0x737f2833
+0,        107,        107,        1,   152064, 0x28b5a183
+0,        108,        108,        1,   152064, 0xaff9112a
+0,        109,        109,        1,   152064, 0x0a7652b5
+0,        110,        110,        1,   152064, 0x03fa3e91
+0,        111,        111,        1,   152064, 0x9deade68
+0,        112,        112,        1,   152064, 0xb9af1a27
+0,        113,        113,        1,   152064, 0xe9f07f00
+0,        114,        114,        1,   152064, 0x1b03894a
+0,        115,        115,        1,   152064, 0xf89e26c5
+0,        116,        116,        1,   152064, 0x6d6b5508
+0,        117,        117,        1,   152064, 0x735ce75d
+0,        118,        118,        1,   152064, 0x30017005
+0,        119,        119,        1,   152064, 0x606ad5ab
+0,        120,        120,        1,   152064, 0xb442ac30
+0,        121,        121,        1,   152064, 0xac321998
+0,        122,        122,        1,   152064, 0x4507990b
+0,        123,        123,        1,   152064, 0xe40f986d
+0,        124,        124,        1,   152064, 0xc9840540
+0,        125,        125,        1,   152064, 0x74cfbc82
+0,        126,        126,        1,   152064, 0x1ac9744b
+0,        127,        127,        1,   152064, 0x8ac2a889
+0,        128,        128,        1,   152064, 0x3074a1bc
+0,        129,        129,        1,   152064, 0x389ae633
+0,        130,        130,        1,   152064, 0xaadb4325
+0,        131,        131,        1,   152064, 0x7d1a91b5
+0,        132,        132,        1,   152064, 0xaa047ddc
+0,        133,        133,        1,   152064, 0xe5cafebc
+0,        134,        134,        1,   152064, 0x24314a0c
+0,        135,        135,        1,   152064, 0x530cfa1c
+0,        136,        136,        1,   152064, 0x3f973f68
+0,        137,        137,        1,   152064, 0xf51d3e20
+0,        138,        138,        1,   152064, 0x24aca84c
+0,        139,        139,        1,   152064, 0x96b411e9
+0,        140,        140,        1,   152064, 0x6d046ea3
+0,        141,        141,        1,   152064, 0x9237974f
+0,        142,        142,        1,   152064, 0x0a808964
+0,        143,        143,        1,   152064, 0x9d6ad957
+0,        144,        144,        1,   152064, 0x9d6381ea
+0,        145,        145,        1,   152064, 0xfeceab64
+0,        146,        146,        1,   152064, 0x7fa00e6f
+0,        147,        147,        1,   152064, 0x635ac444
+0,        148,        148,        1,   152064, 0xf0db3036
+0,        149,        149,        1,   152064, 0xc5ddef73
+0,        150,        150,        1,   152064, 0x7fea7516
+0,        151,        151,        1,   152064, 0x7f3f7460
+0,        152,        152,        1,   152064, 0x446dfa20
+0,        153,        153,        1,   152064, 0x5d7167c4
+0,        154,        154,        1,   152064, 0xf9da05b7
+0,        155,        155,        1,   152064, 0xc007383d
+0,        156,        156,        1,   152064, 0xbf461f08
+0,        157,        157,        1,   152064, 0xf722508f
+0,        158,        158,        1,   152064, 0x2699fa56
+0,        159,        159,        1,   152064, 0xa49ca6d8
+0,        160,        160,        1,   152064, 0x58f70dfd
+0,        161,        161,        1,   152064, 0x391383db
+0,        162,        162,        1,   152064, 0xb859f2fd
+0,        163,        163,        1,   152064, 0xbb77d0a7
+0,        164,        164,        1,   152064, 0xd4c9881d
+0,        165,        165,        1,   152064, 0xb46d7272
+0,        166,        166,        1,   152064, 0x78237e5e
+0,        167,        167,        1,   152064, 0xbcd9f633
+0,        168,        168,        1,   152064, 0x17e09080
+0,        169,        169,        1,   152064, 0x4a9bdacf
+0,        170,        170,        1,   152064, 0x600c972f
+0,        171,        171,        1,   152064, 0x858e399a
+0,        172,        172,        1,   152064, 0xf9ef200d
+0,        173,        173,        1,   152064, 0x6aec0fda
+0,        174,        174,        1,   152064, 0x4d7ba9a8
+0,        175,        175,        1,   152064, 0x0df5dbdb
+0,        176,        176,        1,   152064, 0x77d598f8
+0,        177,        177,        1,   152064, 0x7d78c129
+0,        178,        178,        1,   152064, 0xf6b79ad2
+0,        179,        179,        1,   152064, 0x2b458750
+0,        180,        180,        1,   152064, 0xdbec9727
+0,        181,        181,        1,   152064, 0xcb073a1a
+0,        182,        182,        1,   152064, 0xa95e913a
+0,        183,        183,        1,   152064, 0x5ca9da6e
+0,        184,        184,        1,   152064, 0x82e09caf
+0,        185,        185,        1,   152064, 0x319f59c5
+0,        186,        186,        1,   152064, 0x11003b19
+0,        187,        187,        1,   152064, 0xcdfc5077
+0,        188,        188,        1,   152064, 0xa56fc40d
+0,        189,        189,        1,   152064, 0x3d2425dc
+0,        190,        190,        1,   152064, 0x907f51d3
+0,        191,        191,        1,   152064, 0xc52dc2dc
+0,        192,        192,        1,   152064, 0xea800778
+0,        193,        193,        1,   152064, 0xc0b022f9
+0,        194,        194,        1,   152064, 0x106b4ea2
+0,        195,        195,        1,   152064, 0x50c6cbf2
+0,        196,        196,        1,   152064, 0x480711b5
+0,        197,        197,        1,   152064, 0x1954bca7
+0,        198,        198,        1,   152064, 0x7894a1c1
+0,        199,        199,        1,   152064, 0xaa39601a
+0,        200,        200,        1,   152064, 0x07652fa2
+0,        201,        201,        1,   152064, 0x84ac1bce
+0,        202,        202,        1,   152064, 0x89104737
+0,        203,        203,        1,   152064, 0x832bf2b0
+0,        204,        204,        1,   152064, 0x45fa87f4
+0,        205,        205,        1,   152064, 0xde5b6e82
+0,        206,        206,        1,   152064, 0x8d88f89b
+0,        207,        207,        1,   152064, 0xba6488c8
+0,        208,        208,        1,   152064, 0xd9bc3312
+0,        209,        209,        1,   152064, 0xdba30d10
+0,        210,        210,        1,   152064, 0xd208cb34
+0,        211,        211,        1,   152064, 0x0642aadc
+0,        212,        212,        1,   152064, 0xf392e67a
+0,        213,        213,        1,   152064, 0xec6041d0
+0,        214,        214,        1,   152064, 0x52463e92
+0,        215,        215,        1,   152064, 0x218174a8
+0,        216,        216,        1,   152064, 0x9408f728
+0,        217,        217,        1,   152064, 0xabd31db7
+0,        218,        218,        1,   152064, 0x3e72f003
+0,        219,        219,        1,   152064, 0x638e603b
+0,        220,        220,        1,   152064, 0xf1f896c7
+0,        221,        221,        1,   152064, 0x786554ff
+0,        222,        222,        1,   152064, 0x9bb909f5
+0,        223,        223,        1,   152064, 0x726cf59e
+0,        224,        224,        1,   152064, 0xc18c15a1
+0,        225,        225,        1,   152064, 0x45ea8f83
+0,        226,        226,        1,   152064, 0xcb88e67a
+0,        227,        227,        1,   152064, 0x18d09432
+0,        228,        228,        1,   152064, 0x99d02a0a
+0,        229,        229,        1,   152064, 0x7ddc3691
+0,        230,        230,        1,   152064, 0x47710c00
+0,        231,        231,        1,   152064, 0xe28646c7
+0,        232,        232,        1,   152064, 0xe8a2a4e5
+0,        233,        233,        1,   152064, 0xed19f345
+0,        234,        234,        1,   152064, 0xceffaf7f
+0,        235,        235,        1,   152064, 0x8d116def
+0,        236,        236,        1,   152064, 0xccb68ae8
+0,        237,        237,        1,   152064, 0x3529b3db
+0,        238,        238,        1,   152064, 0x529911b8
+0,        239,        239,        1,   152064, 0x3a676438
+0,        240,        240,        1,   152064, 0x18508f5d
+0,        241,        241,        1,   152064, 0x4577d18b
+0,        242,        242,        1,   152064, 0x420f5881
+0,        243,        243,        1,   152064, 0x60341b86
+0,        244,        244,        1,   152064, 0x2f51de6a
+0,        245,        245,        1,   152064, 0xc70bbf8d
+0,        246,        246,        1,   152064, 0xc1ff63f7
+0,        247,        247,        1,   152064, 0x2dc1662b
+0,        248,        248,        1,   152064, 0x1bbb3b70
+0,        249,        249,        1,   152064, 0x74f44ec2
+0,        250,        250,        1,   152064, 0x9b93084e
+0,        251,        251,        1,   152064, 0x1493f82d
+0,        252,        252,        1,   152064, 0x069d9869
+0,        253,        253,        1,   152064, 0xc9a4f706
+0,        254,        254,        1,   152064, 0xf80092ed
+0,        255,        255,        1,   152064, 0xdc347577
+0,        256,        256,        1,   152064, 0x1df12299
+0,        257,        257,        1,   152064, 0x40d19951
+0,        258,        258,        1,   152064, 0xfb63dbf1
+0,        259,        259,        1,   152064, 0x9153714c
+0,        260,        260,        1,   152064, 0x6cfd514c
+0,        261,        261,        1,   152064, 0xc0ef7bf3
+0,        262,        262,        1,   152064, 0x5fce6828
+0,        263,        263,        1,   152064, 0xe7d0074d
+0,        264,        264,        1,   152064, 0x9e3f7351
+0,        265,        265,        1,   152064, 0x3a0c5d56
+0,        266,        266,        1,   152064, 0xd5581f3c
+0,        267,        267,        1,   152064, 0x9a4ec0d1
+0,        268,        268,        1,   152064, 0x150b9a54
+0,        269,        269,        1,   152064, 0x950eb994
+0,        270,        270,        1,   152064, 0xda31e3bf
+0,        271,        271,        1,   152064, 0x14ff5d3c
+0,        272,        272,        1,   152064, 0xd593bafc
+0,        273,        273,        1,   152064, 0xd4cf7c58
+0,        274,        274,        1,   152064, 0x2be70997
+0,        275,        275,        1,   152064, 0xe551703b
+0,        276,        276,        1,   152064, 0x7adaf447
+0,        277,        277,        1,   152064, 0x0435ea0f
+0,        278,        278,        1,   152064, 0x87e5bba1
+0,        279,        279,        1,   152064, 0xea1fdf88
+0,        280,        280,        1,   152064, 0xaea5b4c4
+0,        281,        281,        1,   152064, 0x32f79e89
+0,        282,        282,        1,   152064, 0xcd5694bc
+0,        283,        283,        1,   152064, 0x6b12830f
+0,        284,        284,        1,   152064, 0xaf681652
+0,        285,        285,        1,   152064, 0x3b26e20b
+0,        286,        286,        1,   152064, 0x2a9eee33
+0,        287,        287,        1,   152064, 0x8d5fe982
+0,        288,        288,        1,   152064, 0xa4cb5d02
+0,        289,        289,        1,   152064, 0x867dd0b0
+0,        290,        290,        1,   152064, 0x23c885e9
+0,        291,        291,        1,   152064, 0x99fd7b2b
+0,        292,        292,        1,   152064, 0xa710e871
+0,        293,        293,        1,   152064, 0x3ecbaaeb
+0,        294,        294,        1,   152064, 0x3d1c7de2
+0,        295,        295,        1,   152064, 0x378935f3
+0,        296,        296,        1,   152064, 0xce893553
+0,        297,        297,        1,   152064, 0xa834374c
+0,        298,        298,        1,   152064, 0x665094f4
+0,        299,        299,        1,   152064, 0x3fee89c6
diff --git a/tests/ref/fate/mxf-d10-user-comments b/tests/ref/fate/mxf-d10-user-comments
new file mode 100644
index 0000000000000..e78765020cfa5
--- /dev/null
+++ b/tests/ref/fate/mxf-d10-user-comments
@@ -0,0 +1 @@
+b659c1204f8d04e2a5607af083590dca
diff --git a/tests/ref/fate/mxf-opatom-user-comments b/tests/ref/fate/mxf-opatom-user-comments
new file mode 100644
index 0000000000000..1834b9e074374
--- /dev/null
+++ b/tests/ref/fate/mxf-opatom-user-comments
@@ -0,0 +1 @@
+892cf02e44bf7d61b6d6f01e41db9375
diff --git a/tests/ref/fate/mxf-user-comments b/tests/ref/fate/mxf-user-comments
new file mode 100644
index 0000000000000..4b734a0f851f4
--- /dev/null
+++ b/tests/ref/fate/mxf-user-comments
@@ -0,0 +1 @@
+683bacb0105e5bc5bbf46aa430c644d1
diff --git a/tests/ref/fate/prores-alpha b/tests/ref/fate/prores-alpha
index f451d83ae9008..d5e3e6877d64d 100644
--- a/tests/ref/fate/prores-alpha
+++ b/tests/ref/fate/prores-alpha
@@ -3,5 +3,5 @@
 #codec_id 0: rawvideo
 #dimensions 0: 1920x1080
 #sar 0: 0/1
-0,          0,          0,        1, 16588800, 0x8e4dac48
-0,          1,          1,        1, 16588800, 0x8e4dac48
+0,          0,          0,        1, 16588800, 0xb035f658
+0,          1,          1,        1, 16588800, 0xb035f658
diff --git a/tests/ref/fate/prores-alpha_skip b/tests/ref/fate/prores-alpha_skip
index a17cef810c19e..09691310ed371 100644
--- a/tests/ref/fate/prores-alpha_skip
+++ b/tests/ref/fate/prores-alpha_skip
@@ -3,5 +3,5 @@
 #codec_id 0: rawvideo
 #dimensions 0: 1920x1080
 #sar 0: 0/1
-0,          0,          0,        1, 12441600, 0xf11685dd
-0,          1,          1,        1, 12441600, 0xf11685dd
+0,          0,          0,        1, 12441600, 0x65e009b8
+0,          1,          1,        1, 12441600, 0x65e009b8
diff --git a/tests/ref/fate/prores-metadata b/tests/ref/fate/prores-metadata
new file mode 100644
index 0000000000000..56153c3946347
--- /dev/null
+++ b/tests/ref/fate/prores-metadata
@@ -0,0 +1 @@
+3492734b4bd0dd097cd9f264457c8345
diff --git a/tests/ref/fate/prores-transparency b/tests/ref/fate/prores-transparency
index 7b3efc6335911..4e8815283461e 100644
--- a/tests/ref/fate/prores-transparency
+++ b/tests/ref/fate/prores-transparency
@@ -9,6 +9,6 @@
 #sample_rate 1: 48000
 #channel_layout 1: 3
 #channel_layout_name 1: stereo
-0,          0,          0,        1, 16588800, 0x7163b01a
+0,          0,          0,        1, 16588800, 0xcfb3d806
 1,          0,          0,     1024,     4096, 0x00000000
 1,       1024,       1024,      896,     3584, 0x00000000
diff --git a/tests/ref/fate/prores-transparency_skip b/tests/ref/fate/prores-transparency_skip
index 5c98d3e4383b2..82c180d174373 100644
--- a/tests/ref/fate/prores-transparency_skip
+++ b/tests/ref/fate/prores-transparency_skip
@@ -9,6 +9,6 @@
 #sample_rate 1: 48000
 #channel_layout 1: 3
 #channel_layout_name 1: stereo
-0,          0,          0,        1, 12441600, 0x627d1548
+0,          0,          0,        1, 12441600, 0x74f53304
 1,          0,          0,     1024,     4096, 0x00000000
 1,       1024,       1024,      896,     3584, 0x00000000
diff --git a/tests/ref/fate/qtrle-32bit b/tests/ref/fate/qtrle-32bit
index aaaf8d64701af..5e2b58dad4688 100644
--- a/tests/ref/fate/qtrle-32bit
+++ b/tests/ref/fate/qtrle-32bit
@@ -3,29 +3,29 @@
 #codec_id 0: rawvideo
 #dimensions 0: 720x480
 #sar 0: 0/1
-0,          0,          0,        1,  1036800, 0x2a90d062
-0,          1,          1,        1,  1036800, 0x6565aded
-0,          2,          2,        1,  1036800, 0xf0b587d2
-0,          3,          3,        1,  1036800, 0xf0b4e53f
-0,          4,          4,        1,  1036800, 0x5ba4b96a
-0,          5,          5,        1,  1036800, 0x501df9c1
-0,          6,          6,        1,  1036800, 0xcf45b940
-0,          7,          7,        1,  1036800, 0xa454df07
-0,          8,          8,        1,  1036800, 0xc504d152
-0,          9,          9,        1,  1036800, 0xd90ecac7
-0,         10,         10,        1,  1036800, 0xe30368df
-0,         11,         11,        1,  1036800, 0x0ca35522
-0,         12,         12,        1,  1036800, 0xe76b8d43
-0,         13,         13,        1,  1036800, 0x7c85a447
-0,         14,         14,        1,  1036800, 0x3e2d1b5f
-0,         15,         15,        1,  1036800, 0x230fa5a6
-0,         16,         16,        1,  1036800, 0x4fad025e
-0,         17,         17,        1,  1036800, 0x7d3366ae
-0,         18,         18,        1,  1036800, 0xa83720f7
-0,         19,         19,        1,  1036800, 0x5dbd13b1
-0,         20,         20,        1,  1036800, 0xd0ebd56d
-0,         21,         21,        1,  1036800, 0x4d7c67f3
-0,         22,         22,        1,  1036800, 0x226baa3f
-0,         23,         23,        1,  1036800, 0xc0e93acf
-0,         24,         24,        1,  1036800, 0x5a466c17
-0,         25,         25,        1,  1036800, 0xfdb7d2ea
+0,          0,          0,        1,  1382400, 0xfe6e6fd3
+0,          1,          1,        1,  1382400, 0x3c7809c8
+0,          2,          2,        1,  1382400, 0x06901a04
+0,          3,          3,        1,  1382400, 0xd7d7c910
+0,          4,          4,        1,  1382400, 0x017e8fda
+0,          5,          5,        1,  1382400, 0xab9ee842
+0,          6,          6,        1,  1382400, 0x08615b93
+0,          7,          7,        1,  1382400, 0x105cb681
+0,          8,          8,        1,  1382400, 0xd9dd224f
+0,          9,          9,        1,  1382400, 0x058cabbf
+0,         10,         10,        1,  1382400, 0x73424fc0
+0,         11,         11,        1,  1382400, 0x39d0a78b
+0,         12,         12,        1,  1382400, 0x956d1393
+0,         13,         13,        1,  1382400, 0xe4aff472
+0,         14,         14,        1,  1382400, 0x83ff20ce
+0,         15,         15,        1,  1382400, 0xb53383df
+0,         16,         16,        1,  1382400, 0x181e55e8
+0,         17,         17,        1,  1382400, 0x0716c90c
+0,         18,         18,        1,  1382400, 0x092aae25
+0,         19,         19,        1,  1382400, 0x0a23b4cb
+0,         20,         20,        1,  1382400, 0xe72000b0
+0,         21,         21,        1,  1382400, 0x0b8d02e8
+0,         22,         22,        1,  1382400, 0xec2a3b0d
+0,         23,         23,        1,  1382400, 0x4ddc4e19
+0,         24,         24,        1,  1382400, 0xc4656abb
+0,         25,         25,        1,  1382400, 0x60d78517
diff --git a/tests/ref/fate/source b/tests/ref/fate/source
index 4b9467aa77766..8e12582ce8d57 100644
--- a/tests/ref/fate/source
+++ b/tests/ref/fate/source
@@ -26,6 +26,7 @@ compat/avisynth/avxsynth_c.h
 compat/avisynth/windowsPorts/basicDataTypeConversions.h
 compat/avisynth/windowsPorts/windows2linux.h
 compat/cuda/dynlink_loader.h
+compat/djgpp/math.h
 compat/float/float.h
 compat/float/limits.h
 Use of av_clip() where av_clip_uintp2() could be used:
diff --git a/tests/ref/fate/sub-jacosub b/tests/ref/fate/sub-jacosub
index a30fe4a196018..5f282cdcf64e4 100644
--- a/tests/ref/fate/sub-jacosub
+++ b/tests/ref/fate/sub-jacosub
@@ -10,14 +10,14 @@ Style: Default,Arial,16,&Hffffff,&Hffffff,&H0,&H0,0,0,0,0,100,100,0,0,1,1,0,2,10
 
 [Events]
 Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
-Dialogue: 0,0:00:00.12,0:00:04.36,Default,,0,0,0,,{\an5}JACOsub\N\NThis script demonstrates some of the capabilities of JACOsub.
-Dialogue: 0,0:00:04.12,0:00:14.86,Default,,0,0,0,,{\an8}Text may be positioned at the top,
-Dialogue: 0,0:00:05.12,0:00:17.46,Default,,0,0,0,,{\an5}middle,
-Dialogue: 0,0:00:06.12,0:00:20.06,Default,,0,0,0,,{\an2}or bottom of the screen.
-Dialogue: 0,0:00:08.12,0:00:27.36,Default,,0,0,0,,{\an5}{this is a comment} (And, you just saw, {another comment} timing ranges for different lines of text.
-Dialogue: 0,0:00:11.12,0:00:35.86,Default,,0,0,0,,{\an1}Within margin constraints\Nthat you set, text may be\Nleft justified,
-Dialogue: 0,0:00:13.62,0:00:42.11,Default,,0,0,0,,{\an2}{the JC is redundant - it's the default}center\Njustified,
-Dialogue: 0,0:00:14.87,0:00:45.86,Default,,0,0,0,,{\an3}and also\Nright justified.
-Dialogue: 0,0:00:22.42,0:01:12.76,Default,,0,0,0,,Text may appear in different styles\N(Normal, {\b1}Bold{\r}, {\i1}Italic{\r})
-Dialogue: 0,0:01:16.12,0:03:53.36,Default,,0,0,0,,{\an5}\N\NAt that time, you may press any key to return to the Editor.
-Dialogue: 0,0:01:16.12,0:03:53.36,Default,,0,0,0,,OK, this script will be finished when the screen goes blank.
+Dialogue: 0,0:00:00.12,0:00:04.12,Default,,0,0,0,,{\an5}JACOsub\N\NThis script demonstrates some of the capabilities of JACOsub.
+Dialogue: 0,0:00:04.12,0:00:06.62,Default,,0,0,0,,{\an8}Text may be positioned at the top,
+Dialogue: 0,0:00:05.12,0:00:07.22,Default,,0,0,0,,{\an5}middle,
+Dialogue: 0,0:00:06.12,0:00:07.82,Default,,0,0,0,,{\an2}or bottom of the screen.
+Dialogue: 0,0:00:08.12,0:00:11.12,Default,,0,0,0,,{\an5}{this is a comment} (And, you just saw, {another comment} timing ranges for different lines of text.
+Dialogue: 0,0:00:11.12,0:00:13.62,Default,,0,0,0,,{\an1}Within margin constraints\Nthat you set, text may be\Nleft justified,
+Dialogue: 0,0:00:13.62,0:00:14.87,Default,,0,0,0,,{\an2}{the JC is redundant - it's the default}center\Njustified,
+Dialogue: 0,0:00:14.87,0:00:16.12,Default,,0,0,0,,{\an3}and also\Nright justified.
+Dialogue: 0,0:00:22.42,0:00:27.92,Default,,0,0,0,,Text may appear in different styles\N(Normal, {\b1}Bold{\r}, {\i1}Italic{\r})
+Dialogue: 0,0:01:16.12,0:01:21.12,Default,,0,0,0,,{\an5}\N\NAt that time, you may press any key to return to the Editor.
+Dialogue: 0,0:01:16.12,0:01:21.12,Default,,0,0,0,,OK, this script will be finished when the screen goes blank.
diff --git a/tests/ref/fate/sws-pixdesc-query b/tests/ref/fate/sws-pixdesc-query
index 451c7d83b9197..6c41a86e1e362 100644
--- a/tests/ref/fate/sws-pixdesc-query
+++ b/tests/ref/fate/sws-pixdesc-query
@@ -93,10 +93,14 @@ isNBPS:
   yuva420p9le
   yuva422p10be
   yuva422p10le
+  yuva422p12be
+  yuva422p12le
   yuva422p9be
   yuva422p9le
   yuva444p10be
   yuva444p10le
+  yuva444p12be
+  yuva444p12le
   yuva444p9be
   yuva444p9le
 
@@ -158,9 +162,11 @@ isBE:
   yuva420p16be
   yuva420p9be
   yuva422p10be
+  yuva422p12be
   yuva422p16be
   yuva422p9be
   yuva444p10be
+  yuva444p12be
   yuva444p16be
   yuva444p9be
 
@@ -233,6 +239,8 @@ isYUV:
   yuva422p
   yuva422p10be
   yuva422p10le
+  yuva422p12be
+  yuva422p12le
   yuva422p16be
   yuva422p16le
   yuva422p9be
@@ -240,6 +248,8 @@ isYUV:
   yuva444p
   yuva444p10be
   yuva444p10le
+  yuva444p12be
+  yuva444p12le
   yuva444p16be
   yuva444p16le
   yuva444p9be
@@ -312,6 +322,8 @@ isPlanarYUV:
   yuva422p
   yuva422p10be
   yuva422p10le
+  yuva422p12be
+  yuva422p12le
   yuva422p16be
   yuva422p16le
   yuva422p9be
@@ -319,6 +331,8 @@ isPlanarYUV:
   yuva444p
   yuva444p10be
   yuva444p10le
+  yuva444p12be
+  yuva444p12le
   yuva444p16be
   yuva444p16le
   yuva444p9be
@@ -581,6 +595,8 @@ ALPHA:
   yuva422p
   yuva422p10be
   yuva422p10le
+  yuva422p12be
+  yuva422p12le
   yuva422p16be
   yuva422p16le
   yuva422p9be
@@ -588,6 +604,8 @@ ALPHA:
   yuva444p
   yuva444p10be
   yuva444p10le
+  yuva444p12be
+  yuva444p12le
   yuva444p16be
   yuva444p16le
   yuva444p9be
@@ -739,6 +757,8 @@ Planar:
   yuva422p
   yuva422p10be
   yuva422p10le
+  yuva422p12be
+  yuva422p12le
   yuva422p16be
   yuva422p16le
   yuva422p9be
@@ -746,6 +766,8 @@ Planar:
   yuva444p
   yuva444p10be
   yuva444p10le
+  yuva444p12be
+  yuva444p12le
   yuva444p16be
   yuva444p16le
   yuva444p9be
diff --git a/tests/ref/fate/vc1test_smm0005 b/tests/ref/fate/vc1test_smm0005
new file mode 100644
index 0000000000000..0c189ca558906
--- /dev/null
+++ b/tests/ref/fate/vc1test_smm0005
@@ -0,0 +1,29 @@
+#tb 0: 1/1
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 720x480
+#sar 0: 0/1
+0,          0,          0,        1,   518400, 0xfc2e6c0f
+0,          1,          1,        1,   518400, 0xb6fa68ba
+0,          2,          2,        1,   518400, 0x0c13a4c9
+0,          3,          3,        1,   518400, 0x351e940d
+0,          4,          4,        1,   518400, 0x07ad35cb
+0,          5,          5,        1,   518400, 0x0c2b1413
+0,          6,          6,        1,   518400, 0xf770b86b
+0,          7,          7,        1,   518400, 0xb1ec5f47
+0,          8,          8,        1,   518400, 0xee591da8
+0,          9,          9,        1,   518400, 0x18046c55
+0,         10,         10,        1,   518400, 0xcfe7bb62
+0,         11,         11,        1,   518400, 0x260e7ecd
+0,         12,         12,        1,   518400, 0x3a3034cc
+0,         13,         13,        1,   518400, 0xee7800dd
+0,         14,         14,        1,   518400, 0xdc4176b4
+0,         15,         15,        1,   518400, 0x7bb037db
+0,         16,         16,        1,   518400, 0x4d4721d1
+0,         17,         17,        1,   518400, 0xbd5cc3b4
+0,         18,         18,        1,   518400, 0x678acc34
+0,         19,         19,        1,   518400, 0xb3ffba44
+0,         20,         20,        1,   518400, 0xda9ebefb
+0,         21,         21,        1,   518400, 0xb810bc59
+0,         22,         22,        1,   518400, 0xc79fc02d
+0,         23,         23,        1,   518400, 0x28d9ad0d
diff --git a/tests/ref/fate/vc1test_smm0015 b/tests/ref/fate/vc1test_smm0015
new file mode 100644
index 0000000000000..3183d6b7ab0fe
--- /dev/null
+++ b/tests/ref/fate/vc1test_smm0015
@@ -0,0 +1,30 @@
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 720x576
+#sar 0: 0/1
+0,          0,          0,        1,   622080, 0x68c84e70
+0,          1,          1,        1,   622080, 0xc29f7155
+0,          2,          2,        1,   622080, 0x78900210
+0,          3,          3,        1,   622080, 0x4d44c353
+0,          4,          4,        1,   622080, 0xd230b3dd
+0,          5,          5,        1,   622080, 0xd72a029c
+0,          6,          6,        1,   622080, 0xc76acb50
+0,          7,          7,        1,   622080, 0xf90816ab
+0,          8,          8,        1,   622080, 0x56996b77
+0,          9,          9,        1,   622080, 0x1784890f
+0,         10,         10,        1,   622080, 0xbc4c81f2
+0,         11,         11,        1,   622080, 0xab5f02ac
+0,         12,         12,        1,   622080, 0x539bb59c
+0,         13,         13,        1,   622080, 0xa34bf120
+0,         14,         14,        1,   622080, 0xa671eaac
+0,         15,         15,        1,   622080, 0x51d77a74
+0,         16,         16,        1,   622080, 0x6d15c7b0
+0,         17,         17,        1,   622080, 0x1bf7535a
+0,         18,         18,        1,   622080, 0x8ed2e73c
+0,         19,         19,        1,   622080, 0x9f4f86a6
+0,         20,         20,        1,   622080, 0x95e05b22
+0,         21,         21,        1,   622080, 0x29c570a0
+0,         22,         22,        1,   622080, 0xc612f057
+0,         23,         23,        1,   622080, 0xe782d106
+0,         24,         24,        1,   622080, 0xeee9a3b7
diff --git a/tests/ref/fate/vp60 b/tests/ref/fate/vp60
index 4becf2a8e2fee..2381c2775a116 100644
--- a/tests/ref/fate/vp60
+++ b/tests/ref/fate/vp60
@@ -18,114 +18,114 @@
 0,         12,         12,        1,    55296, 0xe76b7df7
 0,         13,         13,        1,    55296, 0x5a049f33
 0,         14,         14,        1,    55296, 0xc83d9b90
-0,         15,         15,        1,    55296, 0x567877b8
-0,         16,         16,        1,    55296, 0x334c7f6e
-0,         17,         17,        1,    55296, 0x9317945c
-0,         18,         18,        1,    55296, 0xf032831e
-0,         19,         19,        1,    55296, 0x7b6c8d2c
-0,         20,         20,        1,    55296, 0x37109fd6
-0,         21,         21,        1,    55296, 0xe9b0b61b
-0,         22,         22,        1,    55296, 0x7385dae8
-0,         23,         23,        1,    55296, 0x74a8a9f5
-0,         24,         24,        1,    55296, 0xbcd2e218
-0,         25,         25,        1,    55296, 0x0aa6c623
-0,         26,         26,        1,    55296, 0x2224d6d6
-0,         27,         27,        1,    55296, 0x8c8ee4d9
-0,         28,         28,        1,    55296, 0x0d4ceccc
-0,         29,         29,        1,    55296, 0x623f10c7
-0,         30,         30,        1,    55296, 0x13a61f8f
-0,         31,         31,        1,    55296, 0x5343fa8d
-0,         32,         32,        1,    55296, 0x21fef1b5
-0,         33,         33,        1,    55296, 0x380de6b4
-0,         34,         34,        1,    55296, 0x04bedfd3
-0,         35,         35,        1,    55296, 0x428cf510
-0,         36,         36,        1,    55296, 0xbca8c214
-0,         37,         37,        1,    55296, 0x947faa34
-0,         38,         38,        1,    55296, 0x70769f45
-0,         39,         39,        1,    55296, 0xcb9483ad
+0,         15,         15,        1,    55296, 0x464d77d6
+0,         16,         16,        1,    55296, 0x725d7fa2
+0,         17,         17,        1,    55296, 0xc30494d5
+0,         18,         18,        1,    55296, 0x5687839f
+0,         19,         19,        1,    55296, 0x38be8df5
+0,         20,         20,        1,    55296, 0x62afa0ca
+0,         21,         21,        1,    55296, 0x683ab733
+0,         22,         22,        1,    55296, 0xccbedc72
+0,         23,         23,        1,    55296, 0x43c4abc5
+0,         24,         24,        1,    55296, 0xf2f2e3f5
+0,         25,         25,        1,    55296, 0x5fb8c813
+0,         26,         26,        1,    55296, 0x7814d907
+0,         27,         27,        1,    55296, 0xbb87e71a
+0,         28,         28,        1,    55296, 0x41c6ef34
+0,         29,         29,        1,    55296, 0x3f041373
+0,         30,         30,        1,    55296, 0x14b62281
+0,         31,         31,        1,    55296, 0x9a41fddb
+0,         32,         32,        1,    55296, 0x8961f556
+0,         33,         33,        1,    55296, 0x98edea61
+0,         34,         34,        1,    55296, 0x434ae3dd
+0,         35,         35,        1,    55296, 0x0aa4fa23
+0,         36,         36,        1,    55296, 0x0b8bc77f
+0,         37,         37,        1,    55296, 0x79dfafbc
+0,         38,         38,        1,    55296, 0x199ea4da
+0,         39,         39,        1,    55296, 0xd270896d
 0,         40,         40,        1,    55296, 0xac4ea82b
-0,         41,         41,        1,    55296, 0xa3816977
-0,         42,         42,        1,    55296, 0xcfd54ec4
-0,         43,         43,        1,    55296, 0x97743f0e
-0,         44,         44,        1,    55296, 0x4cb4424d
-0,         45,         45,        1,    55296, 0x0b503c11
-0,         46,         46,        1,    55296, 0x879f333a
-0,         47,         47,        1,    55296, 0x6ff9eb8f
-0,         48,         48,        1,    55296, 0x7cd6e5af
-0,         49,         49,        1,    55296, 0x44e2c36f
-0,         50,         50,        1,    55296, 0x4e8993fb
-0,         51,         51,        1,    55296, 0xf0bb9664
-0,         52,         52,        1,    55296, 0xde608458
-0,         53,         53,        1,    55296, 0xb3017f01
-0,         54,         54,        1,    55296, 0x2e096579
-0,         55,         55,        1,    55296, 0xd7295790
-0,         56,         56,        1,    55296, 0xc40b81cb
-0,         57,         57,        1,    55296, 0x53a86e41
-0,         58,         58,        1,    55296, 0x74142f89
-0,         59,         59,        1,    55296, 0x2a1428ce
-0,         60,         60,        1,    55296, 0x5d0c2852
-0,         61,         61,        1,    55296, 0x162058a4
-0,         62,         62,        1,    55296, 0x4e8c6ce8
-0,         63,         63,        1,    55296, 0x1d382af2
-0,         64,         64,        1,    55296, 0x35dd2b75
-0,         65,         65,        1,    55296, 0x1e4c205f
-0,         66,         66,        1,    55296, 0x74a22383
-0,         67,         67,        1,    55296, 0x6ddb237d
-0,         68,         68,        1,    55296, 0xd290263b
-0,         69,         69,        1,    55296, 0xc778249f
+0,         41,         41,        1,    55296, 0x770b6984
+0,         42,         42,        1,    55296, 0x378b4f1a
+0,         43,         43,        1,    55296, 0xe8253faf
+0,         44,         44,        1,    55296, 0xc8224326
+0,         45,         45,        1,    55296, 0x22b33d1b
+0,         46,         46,        1,    55296, 0x83c33475
+0,         47,         47,        1,    55296, 0xbd2aed59
+0,         48,         48,        1,    55296, 0xe058e7e6
+0,         49,         49,        1,    55296, 0x575fc611
+0,         50,         50,        1,    55296, 0x218196dd
+0,         51,         51,        1,    55296, 0x0882998c
+0,         52,         52,        1,    55296, 0x380887dd
+0,         53,         53,        1,    55296, 0x833f82c8
+0,         54,         54,        1,    55296, 0x9db9697b
+0,         55,         55,        1,    55296, 0xb8dd5be8
+0,         56,         56,        1,    55296, 0x1df1869b
+0,         57,         57,        1,    55296, 0xe0bd7399
+0,         58,         58,        1,    55296, 0xc2413536
+0,         59,         59,        1,    55296, 0x90bb2ebf
+0,         60,         60,        1,    55296, 0x6fe72e97
+0,         61,         61,        1,    55296, 0x41385f50
+0,         62,         62,        1,    55296, 0x360973b0
+0,         63,         63,        1,    55296, 0x211d31c4
+0,         64,         64,        1,    55296, 0xe40632a9
+0,         65,         65,        1,    55296, 0xc4052794
+0,         66,         66,        1,    55296, 0x2ff82adf
+0,         67,         67,        1,    55296, 0x7d272ac3
+0,         68,         68,        1,    55296, 0x37512dcc
+0,         69,         69,        1,    55296, 0x66832c7f
 0,         70,         70,        1,    55296, 0xbc1046fb
 0,         71,         71,        1,    55296, 0xf44d470f
 0,         72,         72,        1,    55296, 0x28d85a11
-0,         73,         73,        1,    55296, 0xa68953b6
-0,         74,         74,        1,    55296, 0x02593ce5
-0,         75,         75,        1,    55296, 0x61be53d5
-0,         76,         76,        1,    55296, 0x4c503c54
-0,         77,         77,        1,    55296, 0x3d3e60f7
-0,         78,         78,        1,    55296, 0xec876b9d
-0,         79,         79,        1,    55296, 0x5b5f59e4
-0,         80,         80,        1,    55296, 0xbd2d5f84
-0,         81,         81,        1,    55296, 0xaa7a6410
-0,         82,         82,        1,    55296, 0xaa196189
-0,         83,         83,        1,    55296, 0x81365cca
-0,         84,         84,        1,    55296, 0xa85f6861
-0,         85,         85,        1,    55296, 0xcb46562e
-0,         86,         86,        1,    55296, 0x1b935862
-0,         87,         87,        1,    55296, 0x80a45a60
-0,         88,         88,        1,    55296, 0x8e8aabba
-0,         89,         89,        1,    55296, 0x38939b53
-0,         90,         90,        1,    55296, 0x4f397c22
-0,         91,         91,        1,    55296, 0x7d0d8476
-0,         92,         92,        1,    55296, 0x943e8044
-0,         93,         93,        1,    55296, 0xabc6b323
-0,         94,         94,        1,    55296, 0x87dfb605
-0,         95,         95,        1,    55296, 0x5ca89202
+0,         73,         73,        1,    55296, 0xbdb153c7
+0,         74,         74,        1,    55296, 0x87093d2d
+0,         75,         75,        1,    55296, 0x132c5459
+0,         76,         76,        1,    55296, 0x293e3cff
+0,         77,         77,        1,    55296, 0x1a2c61a2
+0,         78,         78,        1,    55296, 0xc9756c48
+0,         79,         79,        1,    55296, 0xd90f5a89
+0,         80,         80,        1,    55296, 0x83eb6053
+0,         81,         81,        1,    55296, 0x713864df
+0,         82,         82,        1,    55296, 0xf0ef6273
+0,         83,         83,        1,    55296, 0x24875dc4
+0,         84,         84,        1,    55296, 0x094a696e
+0,         85,         85,        1,    55296, 0xe2d95761
+0,         86,         86,        1,    55296, 0x33265995
+0,         87,         87,        1,    55296, 0xc24b5bb9
+0,         88,         88,        1,    55296, 0xc130ad3d
+0,         89,         89,        1,    55296, 0x42389d02
+0,         90,         90,        1,    55296, 0x372b7dbd
+0,         91,         91,        1,    55296, 0x8645860d
+0,         92,         92,        1,    55296, 0x218581dc
+0,         93,         93,        1,    55296, 0xaea2b4e7
+0,         94,         94,        1,    55296, 0x3b14b7fa
+0,         95,         95,        1,    55296, 0x7b7d93f7
 0,         96,         96,        1,    55296, 0x61bc9b27
 0,         97,         97,        1,    55296, 0x1e4baa30
-0,         98,         98,        1,    55296, 0xd8a7adb0
+0,         98,         98,        1,    55296, 0x2a6dadac
 0,         99,         99,        1,    55296, 0x0d0aa8fb
-0,        100,        100,        1,    55296, 0x1f1ba33c
-0,        101,        101,        1,    55296, 0xa000a80b
-0,        102,        102,        1,    55296, 0xb49dd332
-0,        103,        103,        1,    55296, 0x6b8ac499
-0,        104,        104,        1,    55296, 0x9636ed15
-0,        105,        105,        1,    55296, 0xa152f03d
-0,        106,        106,        1,    55296, 0x47a8cfc7
-0,        107,        107,        1,    55296, 0x9f94c82a
-0,        108,        108,        1,    55296, 0xe208d626
-0,        109,        109,        1,    55296, 0x28cc0616
-0,        110,        110,        1,    55296, 0xc545179e
-0,        111,        111,        1,    55296, 0xd38e05af
-0,        112,        112,        1,    55296, 0x25d6ed99
-0,        113,        113,        1,    55296, 0x7a6bf86e
-0,        114,        114,        1,    55296, 0xbb3bfbcd
-0,        115,        115,        1,    55296, 0x33de2984
-0,        116,        116,        1,    55296, 0xd5b10c27
-0,        117,        117,        1,    55296, 0x19e31f78
-0,        118,        118,        1,    55296, 0xf62f1a4f
-0,        119,        119,        1,    55296, 0x3f792203
-0,        120,        120,        1,    55296, 0xe4ed6202
-0,        121,        121,        1,    55296, 0xee265136
-0,        122,        122,        1,    55296, 0x408af73c
+0,        100,        100,        1,    55296, 0x3c6aa33a
+0,        101,        101,        1,    55296, 0xbd4fa809
+0,        102,        102,        1,    55296, 0x55edd382
+0,        103,        103,        1,    55296, 0x6a9cc4df
+0,        104,        104,        1,    55296, 0x49a5ed7f
+0,        105,        105,        1,    55296, 0x0c33f0b3
+0,        106,        106,        1,    55296, 0xd5e4d037
+0,        107,        107,        1,    55296, 0x2ddfc89a
+0,        108,        108,        1,    55296, 0xb39ed6c6
+0,        109,        109,        1,    55296, 0x8fe606e2
+0,        110,        110,        1,    55296, 0x10ce18ec
+0,        111,        111,        1,    55296, 0xe2110705
+0,        112,        112,        1,    55296, 0x1a00ef28
+0,        113,        113,        1,    55296, 0x9c90fa21
+0,        114,        114,        1,    55296, 0xfafffdbe
+0,        115,        115,        1,    55296, 0x741e2bc8
+0,        116,        116,        1,    55296, 0xf5b00ece
+0,        117,        117,        1,    55296, 0x3b2e224d
+0,        118,        118,        1,    55296, 0x29e81d73
+0,        119,        119,        1,    55296, 0xe3412580
+0,        120,        120,        1,    55296, 0x3c2865dc
+0,        121,        121,        1,    55296, 0xd4b5553b
+0,        122,        122,        1,    55296, 0x1ad8fa27
 0,        123,        123,        1,    55296, 0xc1533ef5
 0,        124,        124,        1,    55296, 0xf671f85d
 0,        125,        125,        1,    55296, 0xae2670e0
diff --git a/tests/ref/fate/vp61 b/tests/ref/fate/vp61
index 2d3beea189d63..13146c4071baa 100644
--- a/tests/ref/fate/vp61
+++ b/tests/ref/fate/vp61
@@ -4,122 +4,122 @@
 #dimensions 0: 112x112
 #sar 0: 0/1
 0,          0,          0,        1,    18816, 0xc3fe9fc7
-0,          1,          1,        1,    18816, 0x6ddf972f
-0,          2,          2,        1,    18816, 0x72808b6e
-0,          3,          3,        1,    18816, 0x8f09857f
-0,          4,          4,        1,    18816, 0xe8027c00
-0,          5,          5,        1,    18816, 0x308670cf
-0,          6,          6,        1,    18816, 0x0e656170
-0,          7,          7,        1,    18816, 0x594e54a4
-0,          8,          8,        1,    18816, 0x36944b05
-0,          9,          9,        1,    18816, 0x87013a34
-0,         10,         10,        1,    18816, 0xc0f32f0d
-0,         11,         11,        1,    18816, 0x911f1951
-0,         12,         12,        1,    18816, 0xad590d59
-0,         13,         13,        1,    18816, 0x943afff0
-0,         14,         14,        1,    18816, 0x7f5ef719
-0,         15,         15,        1,    18816, 0x889feafc
-0,         16,         16,        1,    18816, 0x4334e12b
-0,         17,         17,        1,    18816, 0xd080cc67
-0,         18,         18,        1,    18816, 0xc3c1c04c
-0,         19,         19,        1,    18816, 0x816bae4b
-0,         20,         20,        1,    18816, 0xed23a5c7
-0,         21,         21,        1,    18816, 0x86689c2f
-0,         22,         22,        1,    18816, 0x63408c52
-0,         23,         23,        1,    18816, 0x399c79d6
-0,         24,         24,        1,    18816, 0xf0ff63bf
-0,         25,         25,        1,    18816, 0xa6185353
-0,         26,         26,        1,    18816, 0xe33d46fc
-0,         27,         27,        1,    18816, 0xd58d3c6d
-0,         28,         28,        1,    18816, 0xc94a27ea
-0,         29,         29,        1,    18816, 0x62f31c59
-0,         30,         30,        1,    18816, 0x71880825
-0,         31,         31,        1,    18816, 0xa6ce01d7
-0,         32,         32,        1,    18816, 0xa1d4fc06
-0,         33,         33,        1,    18816, 0xc208f570
-0,         34,         34,        1,    18816, 0xc862e637
-0,         35,         35,        1,    18816, 0xcf9ed93a
-0,         36,         36,        1,    18816, 0x85a8cbcc
-0,         37,         37,        1,    18816, 0x650ac6c1
-0,         38,         38,        1,    18816, 0xb418c12b
-0,         39,         39,        1,    18816, 0x9fe5b412
-0,         40,         40,        1,    18816, 0x80f6a7c1
-0,         41,         41,        1,    18816, 0x283299e4
-0,         42,         42,        1,    18816, 0x15429202
-0,         43,         43,        1,    18816, 0x9f0f8c8a
-0,         44,         44,        1,    18816, 0x8e828811
-0,         45,         45,        1,    18816, 0xaac67993
-0,         46,         46,        1,    18816, 0x8f3b6f4f
-0,         47,         47,        1,    18816, 0x0b125f95
-0,         48,         48,        1,    18816, 0xb4e75d14
-0,         49,         49,        1,    18816, 0x1bac5933
-0,         50,         50,        1,    18816, 0x300b521b
-0,         51,         51,        1,    18816, 0x51174590
-0,         52,         52,        1,    18816, 0x03df3d70
-0,         53,         53,        1,    18816, 0x338a344a
-0,         54,         54,        1,    18816, 0x45ad328d
-0,         55,         55,        1,    18816, 0x2d4e321a
-0,         56,         56,        1,    18816, 0x15932563
-0,         57,         57,        1,    18816, 0x9b4f1c76
-0,         58,         58,        1,    18816, 0x8e31153c
-0,         59,         59,        1,    18816, 0xfb391185
-0,         60,         60,        1,    18816, 0x93ee0cdc
-0,         61,         61,        1,    18816, 0xddeb0642
-0,         62,         62,        1,    18816, 0xda6cf529
-0,         63,         63,        1,    18816, 0xdbd6f085
-0,         64,         64,        1,    18816, 0x357aec81
-0,         65,         65,        1,    18816, 0x36eaecca
-0,         66,         66,        1,    18816, 0x6535ee02
-0,         67,         67,        1,    18816, 0xb7dfe466
-0,         68,         68,        1,    18816, 0x58d3d86b
-0,         69,         69,        1,    18816, 0xd8aad64b
-0,         70,         70,        1,    18816, 0x37ecd588
-0,         71,         71,        1,    18816, 0xe2f9cee4
-0,         72,         72,        1,    18816, 0xcd1ac93e
-0,         73,         73,        1,    18816, 0x18e1be81
-0,         74,         74,        1,    18816, 0xa05bb9d7
-0,         75,         75,        1,    18816, 0xe0ebb663
-0,         76,         76,        1,    18816, 0x7d61b39a
-0,         77,         77,        1,    18816, 0x01b8acb5
-0,         78,         78,        1,    18816, 0x7577aa8b
-0,         79,         79,        1,    18816, 0x6bbda4b5
-0,         80,         80,        1,    18816, 0xd0cc9b29
-0,         81,         81,        1,    18816, 0xb2858cbb
-0,         82,         82,        1,    18816, 0x93608c9d
-0,         83,         83,        1,    18816, 0x80c38e03
-0,         84,         84,        1,    18816, 0x37d6843c
-0,         85,         85,        1,    18816, 0xacc47b9a
-0,         86,         86,        1,    18816, 0xc4317178
-0,         87,         87,        1,    18816, 0xc92f6ebd
-0,         88,         88,        1,    18816, 0xc1217a3b
-0,         89,         89,        1,    18816, 0x03a37ccb
-0,         90,         90,        1,    18816, 0xf38c71a2
-0,         91,         91,        1,    18816, 0x68ff697d
-0,         92,         92,        1,    18816, 0x0fe358e5
-0,         93,         93,        1,    18816, 0x58455870
-0,         94,         94,        1,    18816, 0xc9075ce7
-0,         95,         95,        1,    18816, 0x16685773
-0,         96,         96,        1,    18816, 0x1b434c0e
-0,         97,         97,        1,    18816, 0x008e4c97
-0,         98,         98,        1,    18816, 0xb4d04f4f
-0,         99,         99,        1,    18816, 0xc8c94848
-0,        100,        100,        1,    18816, 0x64664191
-0,        101,        101,        1,    18816, 0xd591367f
-0,        102,        102,        1,    18816, 0xc70d3141
-0,        103,        103,        1,    18816, 0x8d492655
-0,        104,        104,        1,    18816, 0x7e7f22c8
-0,        105,        105,        1,    18816, 0x335d23f9
-0,        106,        106,        1,    18816, 0x0a7f22b6
-0,        107,        107,        1,    18816, 0x6cf51cb2
-0,        108,        108,        1,    18816, 0x312516e1
-0,        109,        109,        1,    18816, 0x8a3c0c7a
-0,        110,        110,        1,    18816, 0x997d0d20
-0,        111,        111,        1,    18816, 0xffbd117e
-0,        112,        112,        1,    18816, 0x855808ca
-0,        113,        113,        1,    18816, 0xe335fb94
-0,        114,        114,        1,    18816, 0x12e6f95c
-0,        115,        115,        1,    18816, 0x2d62f845
-0,        116,        116,        1,    18816, 0x7e63f591
-0,        117,        117,        1,    18816, 0x7463f175
-0,        118,        118,        1,    18816, 0x1521e0d2
-0,        119,        119,        1,    18816, 0x96a8dbce
+0,          1,          1,        1,    18816, 0xab429744
+0,          2,          2,        1,    18816, 0x1eec8b7d
+0,          3,          3,        1,    18816, 0x3817859f
+0,          4,          4,        1,    18816, 0x8f487c41
+0,          5,          5,        1,    18816, 0x4ce87138
+0,          6,          6,        1,    18816, 0x499361dd
+0,          7,          7,        1,    18816, 0xd26c5518
+0,          8,          8,        1,    18816, 0xfb1a4b88
+0,          9,          9,        1,    18816, 0x4ac63ac7
+0,         10,         10,        1,    18816, 0x39642fad
+0,         11,         11,        1,    18816, 0x4dd219f6
+0,         12,         12,        1,    18816, 0x6c500df7
+0,         13,         13,        1,    18816, 0x700e00b4
+0,         14,         14,        1,    18816, 0xbebaf7e0
+0,         15,         15,        1,    18816, 0x22c1ebf6
+0,         16,         16,        1,    18816, 0x723fe229
+0,         17,         17,        1,    18816, 0x9b22cd91
+0,         18,         18,        1,    18816, 0x45ddc17a
+0,         19,         19,        1,    18816, 0x9b33af88
+0,         20,         20,        1,    18816, 0xf9d1a70d
+0,         21,         21,        1,    18816, 0x9bfd9d84
+0,         22,         22,        1,    18816, 0x27048da1
+0,         23,         23,        1,    18816, 0x0e497b2d
+0,         24,         24,        1,    18816, 0x508264fd
+0,         25,         25,        1,    18816, 0xf6985495
+0,         26,         26,        1,    18816, 0xa5014869
+0,         27,         27,        1,    18816, 0x426e3de4
+0,         28,         28,        1,    18816, 0xd72129b9
+0,         29,         29,        1,    18816, 0x12e51e1c
+0,         30,         30,        1,    18816, 0x754109eb
+0,         31,         31,        1,    18816, 0x2fb5039e
+0,         32,         32,        1,    18816, 0x9407fdea
+0,         33,         33,        1,    18816, 0x59f2f752
+0,         34,         34,        1,    18816, 0xb448e831
+0,         35,         35,        1,    18816, 0xf31fdb2e
+0,         36,         36,        1,    18816, 0x6bedcdec
+0,         37,         37,        1,    18816, 0xbbfec8e6
+0,         38,         38,        1,    18816, 0x104ac345
+0,         39,         39,        1,    18816, 0xd387b629
+0,         40,         40,        1,    18816, 0x4187a9e7
+0,         41,         41,        1,    18816, 0x1c649c17
+0,         42,         42,        1,    18816, 0xcc869431
+0,         43,         43,        1,    18816, 0x8bc78ed1
+0,         44,         44,        1,    18816, 0x34de8a5f
+0,         45,         45,        1,    18816, 0x42727bd0
+0,         46,         46,        1,    18816, 0x6e9c7192
+0,         47,         47,        1,    18816, 0xd93761c1
+0,         48,         48,        1,    18816, 0x69455f50
+0,         49,         49,        1,    18816, 0xa35d5b8b
+0,         50,         50,        1,    18816, 0xe6635469
+0,         51,         51,        1,    18816, 0x285d47f9
+0,         52,         52,        1,    18816, 0xdb0a3fdf
+0,         53,         53,        1,    18816, 0x608336d0
+0,         54,         54,        1,    18816, 0x39e9353f
+0,         55,         55,        1,    18816, 0xad2034bd
+0,         56,         56,        1,    18816, 0xa0f527e7
+0,         57,         57,        1,    18816, 0x5ed71ef1
+0,         58,         58,        1,    18816, 0xd2df1798
+0,         59,         59,        1,    18816, 0x932513d3
+0,         60,         60,        1,    18816, 0xab600f1b
+0,         61,         61,        1,    18816, 0x665d087e
+0,         62,         62,        1,    18816, 0x8d71f765
+0,         63,         63,        1,    18816, 0xfc80f2ae
+0,         64,         64,        1,    18816, 0x885bee81
+0,         65,         65,        1,    18816, 0x6c4feec9
+0,         66,         66,        1,    18816, 0x5010f017
+0,         67,         67,        1,    18816, 0x63d5e683
+0,         68,         68,        1,    18816, 0x216ddab5
+0,         69,         69,        1,    18816, 0xb758d8c2
+0,         70,         70,        1,    18816, 0xeae4d7e5
+0,         71,         71,        1,    18816, 0x2553d137
+0,         72,         72,        1,    18816, 0xced6cb97
+0,         73,         73,        1,    18816, 0xd332c103
+0,         74,         74,        1,    18816, 0xe84bbc67
+0,         75,         75,        1,    18816, 0x21f7b90d
+0,         76,         76,        1,    18816, 0xbd88b648
+0,         77,         77,        1,    18816, 0xe7b8af59
+0,         78,         78,        1,    18816, 0x30f5ad3d
+0,         79,         79,        1,    18816, 0x79bfa7bb
+0,         80,         80,        1,    18816, 0xd5919e34
+0,         81,         81,        1,    18816, 0x43758fda
+0,         82,         82,        1,    18816, 0x93ff8fcc
+0,         83,         83,        1,    18816, 0xa444913f
+0,         84,         84,        1,    18816, 0xc41d878b
+0,         85,         85,        1,    18816, 0x526d7f09
+0,         86,         86,        1,    18816, 0x40307528
+0,         87,         87,        1,    18816, 0xf63d725c
+0,         88,         88,        1,    18816, 0x45587ddb
+0,         89,         89,        1,    18816, 0x37018069
+0,         90,         90,        1,    18816, 0x69567572
+0,         91,         91,        1,    18816, 0xa8086d54
+0,         92,         92,        1,    18816, 0x2ab65ccc
+0,         93,         93,        1,    18816, 0xa5475c5d
+0,         94,         94,        1,    18816, 0x87dc60e2
+0,         95,         95,        1,    18816, 0x54295b5d
+0,         96,         96,        1,    18816, 0xce424fe9
+0,         97,         97,        1,    18816, 0x99e0506d
+0,         98,         98,        1,    18816, 0x6ac55324
+0,         99,         99,        1,    18816, 0xa60f4c1a
+0,        100,        100,        1,    18816, 0x7eba456e
+0,        101,        101,        1,    18816, 0xab703a88
+0,        102,        102,        1,    18816, 0xb8263539
+0,        103,        103,        1,    18816, 0x829e2abb
+0,        104,        104,        1,    18816, 0x2e9f2736
+0,        105,        105,        1,    18816, 0x7a3d2885
+0,        106,        106,        1,    18816, 0x8023274f
+0,        107,        107,        1,    18816, 0x39bf2196
+0,        108,        108,        1,    18816, 0x21cf1bcb
+0,        109,        109,        1,    18816, 0x016911b9
+0,        110,        110,        1,    18816, 0x2a791237
+0,        111,        111,        1,    18816, 0x093f16a7
+0,        112,        112,        1,    18816, 0x07a90df3
+0,        113,        113,        1,    18816, 0x8de800ca
+0,        114,        114,        1,    18816, 0xe379fe68
+0,        115,        115,        1,    18816, 0x267afd69
+0,        116,        116,        1,    18816, 0x5b29fac1
+0,        117,        117,        1,    18816, 0x3606f6b6
+0,        118,        118,        1,    18816, 0xcbdce61e
+0,        119,        119,        1,    18816, 0x28ffe113
diff --git a/tests/ref/fate/vp6a b/tests/ref/fate/vp6a
index b4f33630f74cc..8fb2630ff3ffd 100644
--- a/tests/ref/fate/vp6a
+++ b/tests/ref/fate/vp6a
@@ -4,95 +4,95 @@
 #dimensions 0: 300x180
 #sar 0: 0/1
 0,          0,          0,        1,   135000, 0x9dceed6d
-0,          1,          1,        1,   135000, 0x47e5778d
-0,          2,          2,        1,   135000, 0x5de36599
-0,          3,          3,        1,   135000, 0x540d8079
-0,          4,          4,        1,   135000, 0xba9ea534
-0,          5,          5,        1,   135000, 0xa75088f8
-0,          6,          6,        1,   135000, 0x7d867559
-0,          7,          7,        1,   135000, 0xcc678fee
-0,          8,          8,        1,   135000, 0x79c590b9
-0,          9,          9,        1,   135000, 0x87789918
-0,         10,         10,        1,   135000, 0xaa939213
-0,         11,         11,        1,   135000, 0x3912916d
-0,         12,         12,        1,   135000, 0x41305d0b
-0,         13,         13,        1,   135000, 0x2686b5dd
-0,         14,         14,        1,   135000, 0xa69ae422
-0,         15,         15,        1,   135000, 0x998a3478
-0,         16,         16,        1,   135000, 0x5842768d
-0,         17,         17,        1,   135000, 0xf6a85b16
-0,         18,         18,        1,   135000, 0x7a5b2708
-0,         19,         19,        1,   135000, 0x8b2abb63
-0,         20,         20,        1,   135000, 0x7dc8468b
-0,         21,         21,        1,   135000, 0x04d85001
-0,         22,         22,        1,   135000, 0x83e3c647
-0,         23,         23,        1,   135000, 0xcddd687e
-0,         24,         24,        1,   135000, 0x818e785e
-0,         25,         25,        1,   135000, 0x3a915080
-0,         26,         26,        1,   135000, 0x953d603d
-0,         27,         27,        1,   135000, 0x79005ebf
-0,         28,         28,        1,   135000, 0x80afec75
-0,         29,         29,        1,   135000, 0xfc8e376b
-0,         30,         30,        1,   135000, 0xf957b7ef
-0,         31,         31,        1,   135000, 0xe878da44
-0,         32,         32,        1,   135000, 0xe68ecca3
-0,         33,         33,        1,   135000, 0x1a2cc7d3
-0,         34,         34,        1,   135000, 0x4f346a69
-0,         35,         35,        1,   135000, 0x7a0cf4ac
-0,         36,         36,        1,   135000, 0x6d4eee7a
-0,         37,         37,        1,   135000, 0xf0688cbd
-0,         38,         38,        1,   135000, 0xca4abbbc
-0,         39,         39,        1,   135000, 0x87669519
-0,         40,         40,        1,   135000, 0xd090e9d7
-0,         41,         41,        1,   135000, 0xd7f536c1
-0,         42,         42,        1,   135000, 0x353ede54
-0,         43,         43,        1,   135000, 0xbc8f5358
-0,         44,         44,        1,   135000, 0xb52cd59a
-0,         45,         45,        1,   135000, 0x0b882eba
-0,         46,         46,        1,   135000, 0xc544cd54
-0,         47,         47,        1,   135000, 0x31ca7e73
+0,          1,          1,        1,   135000, 0x1fc377a4
+0,          2,          2,        1,   135000, 0x0b4465d4
+0,          3,          3,        1,   135000, 0x136b8062
+0,          4,          4,        1,   135000, 0x4691a55c
+0,          5,          5,        1,   135000, 0x55bb8a19
+0,          6,          6,        1,   135000, 0xdbf67651
+0,          7,          7,        1,   135000, 0x6fb19113
+0,          8,          8,        1,   135000, 0x3edc9227
+0,          9,          9,        1,   135000, 0x53b39aff
+0,         10,         10,        1,   135000, 0x699e94b0
+0,         11,         11,        1,   135000, 0xeedd9388
+0,         12,         12,        1,   135000, 0x14055f96
+0,         13,         13,        1,   135000, 0x71fbb5fd
+0,         14,         14,        1,   135000, 0x6fb4e491
+0,         15,         15,        1,   135000, 0x35ca3482
+0,         16,         16,        1,   135000, 0x0c2a7530
+0,         17,         17,        1,   135000, 0x422c5581
+0,         18,         18,        1,   135000, 0x19eb2155
+0,         19,         19,        1,   135000, 0x07e1b114
+0,         20,         20,        1,   135000, 0xa10f3f81
+0,         21,         21,        1,   135000, 0x75684dcd
+0,         22,         22,        1,   135000, 0x1721c337
+0,         23,         23,        1,   135000, 0x3897667d
+0,         24,         24,        1,   135000, 0x1232769e
+0,         25,         25,        1,   135000, 0xec975059
+0,         26,         26,        1,   135000, 0xb2a46123
+0,         27,         27,        1,   135000, 0x052c5f72
+0,         28,         28,        1,   135000, 0x3087eb2f
+0,         29,         29,        1,   135000, 0xd1e0373a
+0,         30,         30,        1,   135000, 0x64dab704
+0,         31,         31,        1,   135000, 0xa44dd89e
+0,         32,         32,        1,   135000, 0x380ecae9
+0,         33,         33,        1,   135000, 0x8c6fc4ab
+0,         34,         34,        1,   135000, 0x02096903
+0,         35,         35,        1,   135000, 0x11edf432
+0,         36,         36,        1,   135000, 0x3585ee5f
+0,         37,         37,        1,   135000, 0xe1338c40
+0,         38,         38,        1,   135000, 0x5edfbd0c
+0,         39,         39,        1,   135000, 0x9420965c
+0,         40,         40,        1,   135000, 0x0caceb17
+0,         41,         41,        1,   135000, 0x3fdc36c3
+0,         42,         42,        1,   135000, 0x8a24df14
+0,         43,         43,        1,   135000, 0x5dc057b0
+0,         44,         44,        1,   135000, 0xdc5eda65
+0,         45,         45,        1,   135000, 0x60433612
+0,         46,         46,        1,   135000, 0x6a91d6c9
+0,         47,         47,        1,   135000, 0x53598734
 0,         48,         48,        1,   135000, 0xb1569ce9
-0,         49,         49,        1,   135000, 0x8bf4394f
-0,         50,         50,        1,   135000, 0xf413812a
-0,         51,         51,        1,   135000, 0xf2fa90ab
-0,         52,         52,        1,   135000, 0xdcd8b265
-0,         53,         53,        1,   135000, 0xa89cdba1
-0,         54,         54,        1,   135000, 0x212b59a5
-0,         55,         55,        1,   135000, 0x10c589c3
-0,         56,         56,        1,   135000, 0x432ab5b4
+0,         49,         49,        1,   135000, 0xf5e83a33
+0,         50,         50,        1,   135000, 0xebe18275
+0,         51,         51,        1,   135000, 0x98af9447
+0,         52,         52,        1,   135000, 0x3f03b765
+0,         53,         53,        1,   135000, 0x7423e0b8
+0,         54,         54,        1,   135000, 0x6c1b5faa
+0,         55,         55,        1,   135000, 0xebf98d52
+0,         56,         56,        1,   135000, 0xf3dfb8b6
 0,         57,         57,        1,   135000, 0x85a9634a
-0,         58,         58,        1,   135000, 0x10db5b87
-0,         59,         59,        1,   135000, 0x583145d9
-0,         60,         60,        1,   135000, 0x7d3a33bd
-0,         61,         61,        1,   135000, 0xcf592423
-0,         62,         62,        1,   135000, 0xb59728e5
-0,         63,         63,        1,   135000, 0x1eeca660
-0,         64,         64,        1,   135000, 0xff7bcc34
-0,         65,         65,        1,   135000, 0x0ef8f271
-0,         66,         66,        1,   135000, 0x8c9ca8ee
-0,         67,         67,        1,   135000, 0x8a7ece34
-0,         68,         68,        1,   135000, 0x7d4c3b5d
-0,         69,         69,        1,   135000, 0x99118f21
-0,         70,         70,        1,   135000, 0xd97fe7e2
-0,         71,         71,        1,   135000, 0xf93842f1
-0,         72,         72,        1,   135000, 0x35c912e8
-0,         73,         73,        1,   135000, 0x14e59e97
+0,         58,         58,        1,   135000, 0x4d425bb5
+0,         59,         59,        1,   135000, 0xfb7945ee
+0,         60,         60,        1,   135000, 0x593534c1
+0,         61,         61,        1,   135000, 0xe3fa2517
+0,         62,         62,        1,   135000, 0x893629e3
+0,         63,         63,        1,   135000, 0xdc3ca6ad
+0,         64,         64,        1,   135000, 0x16b1ce27
+0,         65,         65,        1,   135000, 0x8296f478
+0,         66,         66,        1,   135000, 0x9e9baaa3
+0,         67,         67,        1,   135000, 0x994ecd4a
+0,         68,         68,        1,   135000, 0x40f83b3c
+0,         69,         69,        1,   135000, 0x0de38f90
+0,         70,         70,        1,   135000, 0x5455ea6c
+0,         71,         71,        1,   135000, 0x053e41e8
+0,         72,         72,        1,   135000, 0x0fee1281
+0,         73,         73,        1,   135000, 0xa0c9a434
 0,         74,         74,        1,   135000, 0x8e4c19aa
-0,         75,         75,        1,   135000, 0x4adfbc53
-0,         76,         76,        1,   135000, 0x0613adde
-0,         77,         77,        1,   135000, 0x8db264ab
-0,         78,         78,        1,   135000, 0x3948b619
-0,         79,         79,        1,   135000, 0x843d7c02
-0,         80,         80,        1,   135000, 0x534fea34
+0,         75,         75,        1,   135000, 0x34bebc00
+0,         76,         76,        1,   135000, 0x6670ad6f
+0,         77,         77,        1,   135000, 0xdbba63fc
+0,         78,         78,        1,   135000, 0xe34fb839
+0,         79,         79,        1,   135000, 0xa3ce7eb1
+0,         80,         80,        1,   135000, 0xdec7ed7d
 0,         81,         81,        1,   135000, 0xdb7041bf
-0,         82,         82,        1,   135000, 0xd0ce1cce
-0,         83,         83,        1,   135000, 0x3c008335
-0,         84,         84,        1,   135000, 0xb699208f
-0,         85,         85,        1,   135000, 0xe07da3ca
-0,         86,         86,        1,   135000, 0x26331f41
-0,         87,         87,        1,   135000, 0x4e19fe83
-0,         88,         88,        1,   135000, 0xaa9a9e45
-0,         89,         89,        1,   135000, 0x336b7ed0
-0,         90,         90,        1,   135000, 0xc9bf7611
+0,         82,         82,        1,   135000, 0x2b1d1dc1
+0,         83,         83,        1,   135000, 0xaaa384c1
+0,         84,         84,        1,   135000, 0x37f42217
+0,         85,         85,        1,   135000, 0x928ba4da
+0,         86,         86,        1,   135000, 0x2b681f41
+0,         87,         87,        1,   135000, 0xcbe1ff84
+0,         88,         88,        1,   135000, 0x47949f24
+0,         89,         89,        1,   135000, 0x368b7fe5
+0,         90,         90,        1,   135000, 0x1cf4773b
 0,         91,         91,        1,   135000, 0x14c33a35
 0,         92,         92,        1,   135000, 0xdc08470e
diff --git a/tests/ref/fate/vp6a-skip_alpha b/tests/ref/fate/vp6a-skip_alpha
index e2c435f402c96..f092778021889 100644
--- a/tests/ref/fate/vp6a-skip_alpha
+++ b/tests/ref/fate/vp6a-skip_alpha
@@ -4,95 +4,95 @@
 #dimensions 0: 300x180
 #sar 0: 0/1
 0,          0,          0,        1,    81000, 0xcb92962d
-0,          1,          1,        1,    81000, 0xae381904
-0,          2,          2,        1,    81000, 0x1fcc0c75
-0,          3,          3,        1,    81000, 0x023f0c21
-0,          4,          4,        1,    81000, 0xad691402
-0,          5,          5,        1,    81000, 0x42390be0
-0,          6,          6,        1,    81000, 0xc1c10a4e
-0,          7,          7,        1,    81000, 0x9c0315ac
-0,          8,          8,        1,    81000, 0xc2a315a7
-0,          9,          9,        1,    81000, 0x3a631392
-0,         10,         10,        1,    81000, 0x11591414
-0,         11,         11,        1,    81000, 0x1a551125
-0,         12,         12,        1,    81000, 0x2e1efa4f
-0,         13,         13,        1,    81000, 0x4aa3f016
-0,         14,         14,        1,    81000, 0x74c029d8
-0,         15,         15,        1,    81000, 0xdee9a98b
-0,         16,         16,        1,    81000, 0xdf3502d5
-0,         17,         17,        1,    81000, 0x4653536b
-0,         18,         18,        1,    81000, 0x7f658c75
-0,         19,         19,        1,    81000, 0xab18ff13
-0,         20,         20,        1,    81000, 0xac2b8f3b
-0,         21,         21,        1,    81000, 0xd61ff094
-0,         22,         22,        1,    81000, 0x425bfc2b
-0,         23,         23,        1,    81000, 0x6be7ecd3
-0,         24,         24,        1,    81000, 0x0b0ee65b
-0,         25,         25,        1,    81000, 0x3c6f146b
-0,         26,         26,        1,    81000, 0x27c4e9c8
-0,         27,         27,        1,    81000, 0x174022c4
-0,         28,         28,        1,    81000, 0x3320fe81
-0,         29,         29,        1,    81000, 0x7a3c342e
-0,         30,         30,        1,    81000, 0x448b4346
-0,         31,         31,        1,    81000, 0xd285b23d
-0,         32,         32,        1,    81000, 0x852ed590
-0,         33,         33,        1,    81000, 0xc9d3df17
-0,         34,         34,        1,    81000, 0x4d23727b
-0,         35,         35,        1,    81000, 0x1fae66cd
-0,         36,         36,        1,    81000, 0x384d54ab
-0,         37,         37,        1,    81000, 0x2fee6ba3
-0,         38,         38,        1,    81000, 0xd7ad6f59
-0,         39,         39,        1,    81000, 0xaf5e3e76
-0,         40,         40,        1,    81000, 0x10fceda4
-0,         41,         41,        1,    81000, 0xb26df92b
-0,         42,         42,        1,    81000, 0xd6676e08
-0,         43,         43,        1,    81000, 0xff6b1b95
-0,         44,         44,        1,    81000, 0x6196d598
-0,         45,         45,        1,    81000, 0x833ebf1b
-0,         46,         46,        1,    81000, 0x7b085af1
-0,         47,         47,        1,    81000, 0xe8f583b4
+0,          1,          1,        1,    81000, 0x8fef1925
+0,          2,          2,        1,    81000, 0xf0350cb6
+0,          3,          3,        1,    81000, 0xa70a0c52
+0,          4,          4,        1,    81000, 0x21ef1490
+0,          5,          5,        1,    81000, 0x98bc0c96
+0,          6,          6,        1,    81000, 0x92380b27
+0,          7,          7,        1,    81000, 0xbba216cd
+0,          8,          8,        1,    81000, 0x92a8172b
+0,          9,          9,        1,    81000, 0xfbc21592
+0,         10,         10,        1,    81000, 0xdad416a1
+0,         11,         11,        1,    81000, 0xec4d13aa
+0,         12,         12,        1,    81000, 0xaf36fcff
+0,         13,         13,        1,    81000, 0xb4fcf056
+0,         14,         14,        1,    81000, 0xe3782a3f
+0,         15,         15,        1,    81000, 0x714daa0b
+0,         16,         16,        1,    81000, 0xe2770382
+0,         17,         17,        1,    81000, 0x553253ff
+0,         18,         18,        1,    81000, 0x928d8c46
+0,         19,         19,        1,    81000, 0x06c8fe82
+0,         20,         20,        1,    81000, 0xb8198f5a
+0,         21,         21,        1,    81000, 0x0029f118
+0,         22,         22,        1,    81000, 0x6fe7fc6f
+0,         23,         23,        1,    81000, 0x9165edde
+0,         24,         24,        1,    81000, 0xe76ae791
+0,         25,         25,        1,    81000, 0xa4dd145a
+0,         26,         26,        1,    81000, 0x2d7de9d8
+0,         27,         27,        1,    81000, 0xe102228b
+0,         28,         28,        1,    81000, 0xc57ffe0e
+0,         29,         29,        1,    81000, 0x324434cb
+0,         30,         30,        1,    81000, 0xedc0433e
+0,         31,         31,        1,    81000, 0xd42bb18a
+0,         32,         32,        1,    81000, 0xedb3d561
+0,         33,         33,        1,    81000, 0x5244de92
+0,         34,         34,        1,    81000, 0x0bb27280
+0,         35,         35,        1,    81000, 0xc6116736
+0,         36,         36,        1,    81000, 0x42f154e2
+0,         37,         37,        1,    81000, 0xffbd6bf9
+0,         38,         38,        1,    81000, 0x813170d0
+0,         39,         39,        1,    81000, 0x430c4040
+0,         40,         40,        1,    81000, 0x56d1eecb
+0,         41,         41,        1,    81000, 0xaa4afa12
+0,         42,         42,        1,    81000, 0x2c3d6fb8
+0,         43,         43,        1,    81000, 0xfedf1e3e
+0,         44,         44,        1,    81000, 0xf538d893
+0,         45,         45,        1,    81000, 0xcc81c3b5
+0,         46,         46,        1,    81000, 0x59b95fbc
+0,         47,         47,        1,    81000, 0xb4da87a0
 0,         48,         48,        1,    81000, 0x3426d5e4
-0,         49,         49,        1,    81000, 0x214069ed
-0,         50,         50,        1,    81000, 0x7dbdfd3f
-0,         51,         51,        1,    81000, 0xf19b3f45
-0,         52,         52,        1,    81000, 0x0f05c7e2
-0,         53,         53,        1,    81000, 0xba94e323
-0,         54,         54,        1,    81000, 0x0de7b0c2
-0,         55,         55,        1,    81000, 0xfcf93c55
-0,         56,         56,        1,    81000, 0x8a8dbd55
+0,         49,         49,        1,    81000, 0x8d066aae
+0,         50,         50,        1,    81000, 0x09effe79
+0,         51,         51,        1,    81000, 0xecc540ae
+0,         52,         52,        1,    81000, 0x845dc90c
+0,         53,         53,        1,    81000, 0x9c2fe4d2
+0,         54,         54,        1,    81000, 0x8887b277
+0,         55,         55,        1,    81000, 0x3bdc3ca9
+0,         56,         56,        1,    81000, 0x094fbe27
 0,         57,         57,        1,    81000, 0xddf22b97
-0,         58,         58,        1,    81000, 0x49a830ff
-0,         59,         59,        1,    81000, 0x82ab2a4b
-0,         60,         60,        1,    81000, 0xd23420e5
-0,         61,         61,        1,    81000, 0x7c1017d1
-0,         62,         62,        1,    81000, 0x9aa61b38
-0,         63,         63,        1,    81000, 0x2a724a18
-0,         64,         64,        1,    81000, 0xc18055f2
-0,         65,         65,        1,    81000, 0xecba3855
-0,         66,         66,        1,    81000, 0x0eed6b0f
-0,         67,         67,        1,    81000, 0x4be73816
-0,         68,         68,        1,    81000, 0xa681214e
-0,         69,         69,        1,    81000, 0x4958f83d
-0,         70,         70,        1,    81000, 0xca0f0d61
-0,         71,         71,        1,    81000, 0x3c453de1
-0,         72,         72,        1,    81000, 0xff60360a
-0,         73,         73,        1,    81000, 0xdcef0949
+0,         58,         58,        1,    81000, 0x31b23156
+0,         59,         59,        1,    81000, 0xf5bf2ad8
+0,         60,         60,        1,    81000, 0x4a9321c4
+0,         61,         61,        1,    81000, 0xdd1b18ca
+0,         62,         62,        1,    81000, 0x4ece1cdd
+0,         63,         63,        1,    81000, 0x6a9a4b53
+0,         64,         64,        1,    81000, 0x6624578b
+0,         65,         65,        1,    81000, 0x89273a0f
+0,         66,         66,        1,    81000, 0x9eb56c4c
+0,         67,         67,        1,    81000, 0xe60238f5
+0,         68,         68,        1,    81000, 0xbc4c228a
+0,         69,         69,        1,    81000, 0x3feefa08
+0,         70,         70,        1,    81000, 0x0d620f37
+0,         71,         71,        1,    81000, 0x93693fc8
+0,         72,         72,        1,    81000, 0xfc4b3848
+0,         73,         73,        1,    81000, 0xd9950bfb
 0,         74,         74,        1,    81000, 0xe5e3732d
-0,         75,         75,        1,    81000, 0x39747fd4
-0,         76,         76,        1,    81000, 0x6bec70e6
-0,         77,         77,        1,    81000, 0x7026a8c0
-0,         78,         78,        1,    81000, 0x92de5b61
-0,         79,         79,        1,    81000, 0x3f00507f
-0,         80,         80,        1,    81000, 0x5620c377
+0,         75,         75,        1,    81000, 0x53517fd2
+0,         76,         76,        1,    81000, 0xb57c70b0
+0,         77,         77,        1,    81000, 0x378ea87c
+0,         78,         78,        1,    81000, 0xfd2b5b58
+0,         79,         79,        1,    81000, 0x66d45077
+0,         80,         80,        1,    81000, 0x6e07c3f8
 0,         81,         81,        1,    81000, 0x39f5ed38
-0,         82,         82,        1,    81000, 0x6ee35d67
-0,         83,         83,        1,    81000, 0x4f99a409
-0,         84,         84,        1,    81000, 0x0a05b6ea
-0,         85,         85,        1,    81000, 0xd6c442d9
-0,         86,         86,        1,    81000, 0x0bb3d2f0
-0,         87,         87,        1,    81000, 0x6891c5b1
-0,         88,         88,        1,    81000, 0xf16ba9be
-0,         89,         89,        1,    81000, 0xba53528e
-0,         90,         90,        1,    81000, 0xc847de49
+0,         82,         82,        1,    81000, 0x55b05da7
+0,         83,         83,        1,    81000, 0x7f78a42c
+0,         84,         84,        1,    81000, 0x5139b79c
+0,         85,         85,        1,    81000, 0x4054437d
+0,         86,         86,        1,    81000, 0x0f9dd327
+0,         87,         87,        1,    81000, 0xa885c60e
+0,         88,         88,        1,    81000, 0x37abaa72
+0,         89,         89,        1,    81000, 0xdab25345
+0,         90,         90,        1,    81000, 0xbdf4df9d
 0,         91,         91,        1,    81000, 0xc5b2e2b0
 0,         92,         92,        1,    81000, 0xb0b497ff
diff --git a/tests/ref/fate/vp6f b/tests/ref/fate/vp6f
index 65e113174aba9..c76e077cbf429 100644
--- a/tests/ref/fate/vp6f
+++ b/tests/ref/fate/vp6f
@@ -4,176 +4,176 @@
 #dimensions 0: 112x80
 #sar 0: 0/1
 0,          0,          0,        1,    13440, 0x7cb0a22f
-0,          1,          1,        1,    13440, 0xdfcea6ba
-0,          2,          2,        1,    13440, 0x59b2a5da
-0,          3,          3,        1,    13440, 0x12f1b2d8
-0,          4,          4,        1,    13440, 0x280fb9f6
-0,          5,          5,        1,    13440, 0x7bace8b3
-0,          6,          6,        1,    13440, 0x4ec91480
-0,          7,          7,        1,    13440, 0xa8010450
-0,          8,          8,        1,    13440, 0x61d8fc46
-0,          9,          9,        1,    13440, 0x242bb24e
-0,         10,         10,        1,    13440, 0x88397a36
-0,         11,         11,        1,    13440, 0x10e15726
-0,         12,         12,        1,    13440, 0x3018438c
-0,         13,         13,        1,    13440, 0xbbb94c21
-0,         14,         14,        1,    13440, 0xfc3e5e2b
-0,         15,         15,        1,    13440, 0xeaa69354
-0,         16,         16,        1,    13440, 0x96f1cc01
-0,         17,         17,        1,    13440, 0x333fdaff
-0,         18,         18,        1,    13440, 0xb5230ed2
-0,         19,         19,        1,    13440, 0x59383446
-0,         20,         20,        1,    13440, 0x954939e6
-0,         21,         21,        1,    13440, 0x53813d2f
-0,         22,         22,        1,    13440, 0x3ca53600
-0,         23,         23,        1,    13440, 0x7b30227a
-0,         24,         24,        1,    13440, 0x5145bbfe
-0,         25,         25,        1,    13440, 0xa0979632
-0,         26,         26,        1,    13440, 0x08026e21
-0,         27,         27,        1,    13440, 0x3f456d1e
-0,         28,         28,        1,    13440, 0x7d036b62
-0,         29,         29,        1,    13440, 0x508085fb
-0,         30,         30,        1,    13440, 0x251dc193
-0,         31,         31,        1,    13440, 0xf3121c9b
-0,         32,         32,        1,    13440, 0xf5da772e
-0,         33,         33,        1,    13440, 0x8179ccf7
-0,         34,         34,        1,    13440, 0xd57ceeb3
-0,         35,         35,        1,    13440, 0xc8f2169c
-0,         36,         36,        1,    13440, 0xbf8296c3
-0,         37,         37,        1,    13440, 0xee1927d0
-0,         38,         38,        1,    13440, 0xdd84e8d1
-0,         39,         39,        1,    13440, 0x7be57be2
-0,         40,         40,        1,    13440, 0xae353f91
-0,         41,         41,        1,    13440, 0x3ae927f2
-0,         42,         42,        1,    13440, 0x417227c6
-0,         43,         43,        1,    13440, 0x32572bea
-0,         44,         44,        1,    13440, 0x8b9e4839
-0,         45,         45,        1,    13440, 0xad669441
-0,         46,         46,        1,    13440, 0xc9de99a6
-0,         47,         47,        1,    13440, 0xb3ffb88b
-0,         48,         48,        1,    13440, 0xb321b8a0
-0,         49,         49,        1,    13440, 0x2efdbf53
-0,         50,         50,        1,    13440, 0x9b7aa566
-0,         51,         51,        1,    13440, 0x563c8d60
-0,         52,         52,        1,    13440, 0xe3848ee8
-0,         53,         53,        1,    13440, 0xa84b8f1d
-0,         54,         54,        1,    13440, 0x52da9f9f
-0,         55,         55,        1,    13440, 0x2ed56d97
-0,         56,         56,        1,    13440, 0x4e8534c2
-0,         57,         57,        1,    13440, 0x318900a6
-0,         58,         58,        1,    13440, 0xda96de39
-0,         59,         59,        1,    13440, 0xaae7ac0b
+0,          1,          1,        1,    13440, 0xe38fa6b7
+0,          2,          2,        1,    13440, 0xe003a5c6
+0,          3,          3,        1,    13440, 0xe502b2f6
+0,          4,          4,        1,    13440, 0xb731ba56
+0,          5,          5,        1,    13440, 0x8168e8f5
+0,          6,          6,        1,    13440, 0x776d14fb
+0,          7,          7,        1,    13440, 0xb5d0049c
+0,          8,          8,        1,    13440, 0x3434fc98
+0,          9,          9,        1,    13440, 0xd219b282
+0,         10,         10,        1,    13440, 0xd2027a7c
+0,         11,         11,        1,    13440, 0xd50b5799
+0,         12,         12,        1,    13440, 0x091543c4
+0,         13,         13,        1,    13440, 0x26374c6b
+0,         14,         14,        1,    13440, 0x5b4f5e1e
+0,         15,         15,        1,    13440, 0x55ba92da
+0,         16,         16,        1,    13440, 0x1ff5cb4d
+0,         17,         17,        1,    13440, 0x0ccbda45
+0,         18,         18,        1,    13440, 0x5ff80e6a
+0,         19,         19,        1,    13440, 0x09df3417
+0,         20,         20,        1,    13440, 0xfbed3a0f
+0,         21,         21,        1,    13440, 0xd65c3c7f
+0,         22,         22,        1,    13440, 0xc8173576
+0,         23,         23,        1,    13440, 0xd5e521f8
+0,         24,         24,        1,    13440, 0xe440bb55
+0,         25,         25,        1,    13440, 0xcbeb95c8
+0,         26,         26,        1,    13440, 0x8cd66dba
+0,         27,         27,        1,    13440, 0x5f356cbc
+0,         28,         28,        1,    13440, 0x54e16ad7
+0,         29,         29,        1,    13440, 0x17ed8566
+0,         30,         30,        1,    13440, 0x1aa1c121
+0,         31,         31,        1,    13440, 0x64e31cac
+0,         32,         32,        1,    13440, 0x869677a4
+0,         33,         33,        1,    13440, 0x7df6cdba
+0,         34,         34,        1,    13440, 0x23f3ef45
+0,         35,         35,        1,    13440, 0x176217b4
+0,         36,         36,        1,    13440, 0x493897cb
+0,         37,         37,        1,    13440, 0x0591296d
+0,         38,         38,        1,    13440, 0xb58aeae8
+0,         39,         39,        1,    13440, 0x38fd7e58
+0,         40,         40,        1,    13440, 0x200c424c
+0,         41,         41,        1,    13440, 0x2ee42b14
+0,         42,         42,        1,    13440, 0xdcba2ae5
+0,         43,         43,        1,    13440, 0xbb102f41
+0,         44,         44,        1,    13440, 0x034d4b06
+0,         45,         45,        1,    13440, 0x62d99722
+0,         46,         46,        1,    13440, 0x174d9ce4
+0,         47,         47,        1,    13440, 0xf676bbc8
+0,         48,         48,        1,    13440, 0xe2e5bbef
+0,         49,         49,        1,    13440, 0x6ef0c2af
+0,         50,         50,        1,    13440, 0x8d08a917
+0,         51,         51,        1,    13440, 0x030d90e8
+0,         52,         52,        1,    13440, 0x4d6f9232
+0,         53,         53,        1,    13440, 0xb7a792f6
+0,         54,         54,        1,    13440, 0xa0daa330
+0,         55,         55,        1,    13440, 0xe9f870a3
+0,         56,         56,        1,    13440, 0xed4f36cb
+0,         57,         57,        1,    13440, 0x8a6f02c3
+0,         58,         58,        1,    13440, 0x663ee03d
+0,         59,         59,        1,    13440, 0x2571adef
 0,         60,         60,        1,    13440, 0x7533ad99
-0,         61,         61,        1,    13440, 0x4e70c2c9
-0,         62,         62,        1,    13440, 0x9ce5e3fa
-0,         63,         63,        1,    13440, 0xc788fbbc
-0,         64,         64,        1,    13440, 0xd36604a9
-0,         65,         65,        1,    13440, 0x246221a4
-0,         66,         66,        1,    13440, 0x290c5c2b
-0,         67,         67,        1,    13440, 0xde6c68ec
-0,         68,         68,        1,    13440, 0x56248dbf
-0,         69,         69,        1,    13440, 0x5b898cbd
-0,         70,         70,        1,    13440, 0x090574b9
-0,         71,         71,        1,    13440, 0x8df2814a
-0,         72,         72,        1,    13440, 0xd4a6b285
-0,         73,         73,        1,    13440, 0xa016e921
-0,         74,         74,        1,    13440, 0x7f93fdc1
-0,         75,         75,        1,    13440, 0xfd0dee6f
-0,         76,         76,        1,    13440, 0xef04ce0e
-0,         77,         77,        1,    13440, 0x7560bee3
-0,         78,         78,        1,    13440, 0x5a8cdc85
-0,         79,         79,        1,    13440, 0x4788f7bc
-0,         80,         80,        1,    13440, 0xc001e34d
-0,         81,         81,        1,    13440, 0xc687eb74
-0,         82,         82,        1,    13440, 0xbf20feba
-0,         83,         83,        1,    13440, 0xd32647a8
-0,         84,         84,        1,    13440, 0xe69a955a
-0,         85,         85,        1,    13440, 0x1b56951f
-0,         86,         86,        1,    13440, 0xd1977378
-0,         87,         87,        1,    13440, 0x1620357d
-0,         88,         88,        1,    13440, 0x2596116f
-0,         89,         89,        1,    13440, 0x7473feca
-0,         90,         90,        1,    13440, 0x7f92bb47
-0,         91,         91,        1,    13440, 0x6866a683
-0,         92,         92,        1,    13440, 0xe9b08d7e
-0,         93,         93,        1,    13440, 0xa3fd7546
-0,         94,         94,        1,    13440, 0xa4416522
-0,         95,         95,        1,    13440, 0xd8f5572e
-0,         96,         96,        1,    13440, 0xf5746dbd
-0,         97,         97,        1,    13440, 0x256a87c6
-0,         98,         98,        1,    13440, 0x722aa2c8
-0,         99,         99,        1,    13440, 0xb26de5f5
-0,        100,        100,        1,    13440, 0x117f0841
-0,        101,        101,        1,    13440, 0xda2d192c
-0,        102,        102,        1,    13440, 0xb022442d
-0,        103,        103,        1,    13440, 0xbc4044f2
-0,        104,        104,        1,    13440, 0x68b330da
-0,        105,        105,        1,    13440, 0xc07228cf
-0,        106,        106,        1,    13440, 0xaa3f3d44
-0,        107,        107,        1,    13440, 0x25867aad
-0,        108,        108,        1,    13440, 0xa3ecb432
-0,        109,        109,        1,    13440, 0x93ccdcbb
-0,        110,        110,        1,    13440, 0x8302fa4f
-0,        111,        111,        1,    13440, 0x2f960f33
-0,        112,        112,        1,    13440, 0x15d41d14
-0,        113,        113,        1,    13440, 0x636529d0
-0,        114,        114,        1,    13440, 0x11035be5
-0,        115,        115,        1,    13440, 0x9b6e9167
-0,        116,        116,        1,    13440, 0x7b01adc7
+0,         61,         61,        1,    13440, 0xe3aec2d4
+0,         62,         62,        1,    13440, 0x3368e416
+0,         63,         63,        1,    13440, 0xe2ecfbf5
+0,         64,         64,        1,    13440, 0x45010538
+0,         65,         65,        1,    13440, 0xc075222c
+0,         66,         66,        1,    13440, 0x5bf45cc4
+0,         67,         67,        1,    13440, 0xa06869b5
+0,         68,         68,        1,    13440, 0x1ff98e6f
+0,         69,         69,        1,    13440, 0x11c88d84
+0,         70,         70,        1,    13440, 0x95c275e8
+0,         71,         71,        1,    13440, 0x81a68285
+0,         72,         72,        1,    13440, 0xb903b402
+0,         73,         73,        1,    13440, 0xa9d2eb1c
+0,         74,         74,        1,    13440, 0x2c48ff65
+0,         75,         75,        1,    13440, 0xc024eff6
+0,         76,         76,        1,    13440, 0x41bacff0
+0,         77,         77,        1,    13440, 0x2cf9c144
+0,         78,         78,        1,    13440, 0x9e67de72
+0,         79,         79,        1,    13440, 0x3e74fa36
+0,         80,         80,        1,    13440, 0x9b9be609
+0,         81,         81,        1,    13440, 0x594eee16
+0,         82,         82,        1,    13440, 0x8a7c00d7
+0,         83,         83,        1,    13440, 0x56104a2a
+0,         84,         84,        1,    13440, 0xd94d97cc
+0,         85,         85,        1,    13440, 0x95d09887
+0,         86,         86,        1,    13440, 0xe02a769b
+0,         87,         87,        1,    13440, 0x847839d0
+0,         88,         88,        1,    13440, 0x47441606
+0,         89,         89,        1,    13440, 0x19b10373
+0,         90,         90,        1,    13440, 0x3ddfbf15
+0,         91,         91,        1,    13440, 0xf4fdab37
+0,         92,         92,        1,    13440, 0xfe9a92ad
+0,         93,         93,        1,    13440, 0xaa387bfb
+0,         94,         94,        1,    13440, 0xe57a6c85
+0,         95,         95,        1,    13440, 0x68ef5f30
+0,         96,         96,        1,    13440, 0x481d76f7
+0,         97,         97,        1,    13440, 0x5b0e9192
+0,         98,         98,        1,    13440, 0xebc1ac80
+0,         99,         99,        1,    13440, 0x13bfef3e
+0,        100,        100,        1,    13440, 0xb7b01291
+0,        101,        101,        1,    13440, 0xc8c72419
+0,        102,        102,        1,    13440, 0x9dca4f32
+0,        103,        103,        1,    13440, 0x67635158
+0,        104,        104,        1,    13440, 0x6c773e0d
+0,        105,        105,        1,    13440, 0x6c2f3777
+0,        106,        106,        1,    13440, 0x19574d97
+0,        107,        107,        1,    13440, 0xd9f48cc1
+0,        108,        108,        1,    13440, 0xd19dc87c
+0,        109,        109,        1,    13440, 0xa23cf2ab
+0,        110,        110,        1,    13440, 0x3f7611b6
+0,        111,        111,        1,    13440, 0x183d2723
+0,        112,        112,        1,    13440, 0xf79b3534
+0,        113,        113,        1,    13440, 0xcbba4280
+0,        114,        114,        1,    13440, 0x03777586
+0,        115,        115,        1,    13440, 0xea1aac72
+0,        116,        116,        1,    13440, 0xad27cae0
 0,        117,        117,        1,    13440, 0xa237e05d
-0,        118,        118,        1,    13440, 0xd2f4f134
-0,        119,        119,        1,    13440, 0x2052d368
-0,        120,        120,        1,    13440, 0x08f7ae0d
-0,        121,        121,        1,    13440, 0xa89185bc
-0,        122,        122,        1,    13440, 0xfa628236
-0,        123,        123,        1,    13440, 0xdf79848b
-0,        124,        124,        1,    13440, 0xd19a906f
-0,        125,        125,        1,    13440, 0x219f9324
-0,        126,        126,        1,    13440, 0x46509b6d
-0,        127,        127,        1,    13440, 0xc5d9a568
-0,        128,        128,        1,    13440, 0xb21aaaa8
-0,        129,        129,        1,    13440, 0x925a97ed
-0,        130,        130,        1,    13440, 0xc5e3557f
-0,        131,        131,        1,    13440, 0x7c57155a
-0,        132,        132,        1,    13440, 0x6b26d005
-0,        133,        133,        1,    13440, 0xfdc7b369
-0,        134,        134,        1,    13440, 0x99919fc2
-0,        135,        135,        1,    13440, 0xcfe889e4
-0,        136,        136,        1,    13440, 0xd1196856
-0,        137,        137,        1,    13440, 0xec8348c6
-0,        138,        138,        1,    13440, 0x5ede0d9a
-0,        139,        139,        1,    13440, 0x198ef66e
-0,        140,        140,        1,    13440, 0x62fcefdf
-0,        141,        141,        1,    13440, 0x7791f415
-0,        142,        142,        1,    13440, 0xfbdb0029
-0,        143,        143,        1,    13440, 0xdab12b01
-0,        144,        144,        1,    13440, 0x646b2d5f
-0,        145,        145,        1,    13440, 0x5410f52e
-0,        146,        146,        1,    13440, 0x7186eef8
-0,        147,        147,        1,    13440, 0xca251ef6
-0,        148,        148,        1,    13440, 0x757c3b43
-0,        149,        149,        1,    13440, 0x59ff4982
-0,        150,        150,        1,    13440, 0xbe8ff084
-0,        151,        151,        1,    13440, 0xc85a9e38
-0,        152,        152,        1,    13440, 0x541b9a19
-0,        153,        153,        1,    13440, 0x274893c9
-0,        154,        154,        1,    13440, 0x7634b5d2
-0,        155,        155,        1,    13440, 0x1bd8e10c
-0,        156,        156,        1,    13440, 0xa661dfb1
-0,        157,        157,        1,    13440, 0x9d01bf92
-0,        158,        158,        1,    13440, 0xcb1eb220
-0,        159,        159,        1,    13440, 0x0ce27d25
-0,        160,        160,        1,    13440, 0x523b594f
-0,        161,        161,        1,    13440, 0xf0a04c4f
-0,        162,        162,        1,    13440, 0x0f0ffc3d
-0,        163,        163,        1,    13440, 0xb0d8b778
-0,        164,        164,        1,    13440, 0x5137a642
-0,        165,        165,        1,    13440, 0xd213a552
-0,        166,        166,        1,    13440, 0xc2fbc9b1
-0,        167,        167,        1,    13440, 0xfc2ee379
-0,        168,        168,        1,    13440, 0xfb80f737
-0,        169,        169,        1,    13440, 0xd6cb2447
-0,        170,        170,        1,    13440, 0x124b606d
-0,        171,        171,        1,    13440, 0xf788a066
-0,        172,        172,        1,    13440, 0xa16eed6e
-0,        173,        173,        1,    13440, 0x73ff0f82
+0,        118,        118,        1,    13440, 0xb683f189
+0,        119,        119,        1,    13440, 0x2058d3cd
+0,        120,        120,        1,    13440, 0x715dae96
+0,        121,        121,        1,    13440, 0xd8f88677
+0,        122,        122,        1,    13440, 0x45818329
+0,        123,        123,        1,    13440, 0x69ad852f
+0,        124,        124,        1,    13440, 0x56369125
+0,        125,        125,        1,    13440, 0xdfde941b
+0,        126,        126,        1,    13440, 0xf1209c90
+0,        127,        127,        1,    13440, 0x82e6a6aa
+0,        128,        128,        1,    13440, 0xb0ceabd1
+0,        129,        129,        1,    13440, 0x05f899a6
+0,        130,        130,        1,    13440, 0x9ef95758
+0,        131,        131,        1,    13440, 0x63e716d9
+0,        132,        132,        1,    13440, 0x7cb0d149
+0,        133,        133,        1,    13440, 0x3585b4f1
+0,        134,        134,        1,    13440, 0x623ea1fc
+0,        135,        135,        1,    13440, 0x762b8c32
+0,        136,        136,        1,    13440, 0xc1b96add
+0,        137,        137,        1,    13440, 0x95e24baf
+0,        138,        138,        1,    13440, 0x74631106
+0,        139,        139,        1,    13440, 0x78b5fa14
+0,        140,        140,        1,    13440, 0x5f33f34a
+0,        141,        141,        1,    13440, 0x7e6ff799
+0,        142,        142,        1,    13440, 0x9f8303b0
+0,        143,        143,        1,    13440, 0x7c3d2eb9
+0,        144,        144,        1,    13440, 0x3c0530d2
+0,        145,        145,        1,    13440, 0xe7ecf960
+0,        146,        146,        1,    13440, 0x82c3f316
+0,        147,        147,        1,    13440, 0x008922b4
+0,        148,        148,        1,    13440, 0x0d263f5f
+0,        149,        149,        1,    13440, 0x57dc4dd0
+0,        150,        150,        1,    13440, 0xbcbbf4f1
+0,        151,        151,        1,    13440, 0x62e9a335
+0,        152,        152,        1,    13440, 0x65699f37
+0,        153,        153,        1,    13440, 0xe4b69939
+0,        154,        154,        1,    13440, 0xbd4ebb6c
+0,        155,        155,        1,    13440, 0xb7d6e71e
+0,        156,        156,        1,    13440, 0xfc0ae5b6
+0,        157,        157,        1,    13440, 0xe55ec65c
+0,        158,        158,        1,    13440, 0x2095b8fc
+0,        159,        159,        1,    13440, 0x479c8408
+0,        160,        160,        1,    13440, 0x2981604c
+0,        161,        161,        1,    13440, 0xa4e55397
+0,        162,        162,        1,    13440, 0x0bc703f8
+0,        163,        163,        1,    13440, 0xd43bbf89
+0,        164,        164,        1,    13440, 0xdcd6aeca
+0,        165,        165,        1,    13440, 0x07b1ad84
+0,        166,        166,        1,    13440, 0xfb17d20b
+0,        167,        167,        1,    13440, 0x0c4aebd9
+0,        168,        168,        1,    13440, 0x826effc4
+0,        169,        169,        1,    13440, 0x4ccf2ca4
+0,        170,        170,        1,    13440, 0xe4f968f3
+0,        171,        171,        1,    13440, 0xe1b4a93a
+0,        172,        172,        1,    13440, 0x8597f65e
+0,        173,        173,        1,    13440, 0xb4f7184f
diff --git a/tests/ref/lavf/yuv4mpeg b/tests/ref/lavf/yuv4mpeg
index 8c1566ea0d4fd..367b37621d846 100644
--- a/tests/ref/lavf/yuv4mpeg
+++ b/tests/ref/lavf/yuv4mpeg
@@ -1,2 +1,3 @@
 ec8178cb152f9cdbfd9cb724d977db2e *./tests/data/lavf/lavf.y4m
 3801808 ./tests/data/lavf/lavf.y4m
+./tests/data/lavf/lavf.y4m CRC=0x0a941f26
diff --git a/tests/ref/vsynth/vsynth1-prores b/tests/ref/vsynth/vsynth1-prores
index 14de8b9cf320e..65686aeca1653 100644
--- a/tests/ref/vsynth/vsynth1-prores
+++ b/tests/ref/vsynth/vsynth1-prores
@@ -1,4 +1,4 @@
-7ca7d2f9f5d8ac2ead691b1b6a70d409 *tests/data/fate/vsynth1-prores.mov
+ec1d32acb0f6035ca45ad7d395d4a586 *tests/data/fate/vsynth1-prores.mov
 5022821 tests/data/fate/vsynth1-prores.mov
 fb4a9e025d12afc0dbbca8d82831858f *tests/data/fate/vsynth1-prores.out.rawvideo
 stddev:    2.47 PSNR: 40.27 MAXDIFF:   31 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth1-prores_444 b/tests/ref/vsynth/vsynth1-prores_444
new file mode 100644
index 0000000000000..2f064d624d955
--- /dev/null
+++ b/tests/ref/vsynth/vsynth1-prores_444
@@ -0,0 +1,4 @@
+48b30df581b35bf3ce1bc335327323a6 *tests/data/fate/vsynth1-prores_444.mov
+7778954 tests/data/fate/vsynth1-prores_444.mov
+e0da52b5d58171294d1b299539801ae0 *tests/data/fate/vsynth1-prores_444.out.rawvideo
+stddev:    2.80 PSNR: 39.17 MAXDIFF:   44 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth1-prores_444_int b/tests/ref/vsynth/vsynth1-prores_444_int
new file mode 100644
index 0000000000000..db0d42f7cdb74
--- /dev/null
+++ b/tests/ref/vsynth/vsynth1-prores_444_int
@@ -0,0 +1,4 @@
+4fc38d6a1c3171f23713c24b1342a592 *tests/data/fate/vsynth1-prores_444_int.mov
+9940947 tests/data/fate/vsynth1-prores_444_int.mov
+732ceeb6887524e0aee98762fe50578b *tests/data/fate/vsynth1-prores_444_int.out.rawvideo
+stddev:    2.83 PSNR: 39.08 MAXDIFF:   45 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth1-prores_int b/tests/ref/vsynth/vsynth1-prores_int
new file mode 100644
index 0000000000000..470174559ad2e
--- /dev/null
+++ b/tests/ref/vsynth/vsynth1-prores_int
@@ -0,0 +1,4 @@
+c2e40555435e717dd24be0430b8c0cdf *tests/data/fate/vsynth1-prores_int.mov
+6308688 tests/data/fate/vsynth1-prores_int.mov
+164a4ca890695cf594293d1acec9463c *tests/data/fate/vsynth1-prores_int.out.rawvideo
+stddev:    2.66 PSNR: 39.62 MAXDIFF:   34 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth1-r210 b/tests/ref/vsynth/vsynth1-r210
index 825e1d4789685..11033080608b7 100644
--- a/tests/ref/vsynth/vsynth1-r210
+++ b/tests/ref/vsynth/vsynth1-r210
@@ -1,4 +1,4 @@
-1ea72f280b110ed65fc535c3438d27f9 *tests/data/fate/vsynth1-r210.avi
+1a522a30ddd8c2865a731a5659001717 *tests/data/fate/vsynth1-r210.avi
 22125252 tests/data/fate/vsynth1-r210.avi
-ecaafa9eec11b5e1453a63ed6d194eed *tests/data/fate/vsynth1-r210.out.rawvideo
-stddev:    3.23 PSNR: 37.94 MAXDIFF:   48 bytes:  7603200/  7603200
+b6444935d6c4d8c75fe63d5978f5b457 *tests/data/fate/vsynth1-r210.out.rawvideo
+stddev:    3.73 PSNR: 36.68 MAXDIFF:   48 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth2-prores b/tests/ref/vsynth/vsynth2-prores
index a1b4bb748fd42..63bc8a8f30be4 100644
--- a/tests/ref/vsynth/vsynth2-prores
+++ b/tests/ref/vsynth/vsynth2-prores
@@ -1,4 +1,4 @@
-aa57fd1221b7eefaf1f34f9d57d6a7cb *tests/data/fate/vsynth2-prores.mov
-3265056 tests/data/fate/vsynth2-prores.mov
-537b0ff66d7c8c3c12faa89d042e6a49 *tests/data/fate/vsynth2-prores.out.rawvideo
+27fa0f1ecb2dbe1510582ec6d01cc81d *tests/data/fate/vsynth2-prores.mov
+3260123 tests/data/fate/vsynth2-prores.mov
+416fa8773615889c70491452428d6710 *tests/data/fate/vsynth2-prores.out.rawvideo
 stddev:    1.38 PSNR: 45.29 MAXDIFF:   12 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth2-prores_444 b/tests/ref/vsynth/vsynth2-prores_444
new file mode 100644
index 0000000000000..513caaacc2e29
--- /dev/null
+++ b/tests/ref/vsynth/vsynth2-prores_444
@@ -0,0 +1,4 @@
+cdea76066b82eed873462c2da908a8a0 *tests/data/fate/vsynth2-prores_444.mov
+5219722 tests/data/fate/vsynth2-prores_444.mov
+e425b6af7afa51b5e64fc529528b3691 *tests/data/fate/vsynth2-prores_444.out.rawvideo
+stddev:    0.88 PSNR: 49.18 MAXDIFF:   14 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth2-prores_444_int b/tests/ref/vsynth/vsynth2-prores_444_int
new file mode 100644
index 0000000000000..bce8277b3cd21
--- /dev/null
+++ b/tests/ref/vsynth/vsynth2-prores_444_int
@@ -0,0 +1,4 @@
+4043270721dafd28c1cfa176a1c6916a *tests/data/fate/vsynth2-prores_444_int.mov
+6420787 tests/data/fate/vsynth2-prores_444_int.mov
+33a5db4f0423168d4ae4f1db3610928e *tests/data/fate/vsynth2-prores_444_int.out.rawvideo
+stddev:    0.93 PSNR: 48.73 MAXDIFF:   14 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth2-prores_int b/tests/ref/vsynth/vsynth2-prores_int
new file mode 100644
index 0000000000000..48a419b9bc2cd
--- /dev/null
+++ b/tests/ref/vsynth/vsynth2-prores_int
@@ -0,0 +1,4 @@
+9fef0da9e53a028e81545cf81ec2c3e7 *tests/data/fate/vsynth2-prores_int.mov
+4070996 tests/data/fate/vsynth2-prores_int.mov
+bef9e38387a1fbb1ce2e4401b6d41674 *tests/data/fate/vsynth2-prores_int.out.rawvideo
+stddev:    1.54 PSNR: 44.37 MAXDIFF:   13 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth2-r210 b/tests/ref/vsynth/vsynth2-r210
index dc5ff1849c233..5efd87d6f57c3 100644
--- a/tests/ref/vsynth/vsynth2-r210
+++ b/tests/ref/vsynth/vsynth2-r210
@@ -1,4 +1,4 @@
-2f928096d892ce0239832afc369e117c *tests/data/fate/vsynth2-r210.avi
+9a27c0c96f9e658d610d2590b61416a1 *tests/data/fate/vsynth2-r210.avi
 22125252 tests/data/fate/vsynth2-r210.avi
-2ade5f6167d7a4a1589e168ddbbc35d0 *tests/data/fate/vsynth2-r210.out.rawvideo
-stddev:    1.17 PSNR: 46.71 MAXDIFF:   15 bytes:  7603200/  7603200
+d43196c64fd611f6e9c046e0ef3e570e *tests/data/fate/vsynth2-r210.out.rawvideo
+stddev:    1.37 PSNR: 45.34 MAXDIFF:   14 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth3-prores b/tests/ref/vsynth/vsynth3-prores
index f63a7a736f21a..2998dc1d79ddd 100644
--- a/tests/ref/vsynth/vsynth3-prores
+++ b/tests/ref/vsynth/vsynth3-prores
@@ -1,4 +1,4 @@
-b060c59be88b4b089ece5ee8dc4f1c58 *tests/data/fate/vsynth3-prores.mov
+6afd345a8f799d0459229349a30497cd *tests/data/fate/vsynth3-prores.mov
 105367 tests/data/fate/vsynth3-prores.mov
 fff5e7ad21d78501c8fa4749bf4bf289 *tests/data/fate/vsynth3-prores.out.rawvideo
 stddev:    2.80 PSNR: 39.17 MAXDIFF:   27 bytes:    86700/    86700
diff --git a/tests/ref/vsynth/vsynth3-prores_444 b/tests/ref/vsynth/vsynth3-prores_444
new file mode 100644
index 0000000000000..78ca0dd68a9e0
--- /dev/null
+++ b/tests/ref/vsynth/vsynth3-prores_444
@@ -0,0 +1,4 @@
+3319da492e560964bf92440a0d8612b3 *tests/data/fate/vsynth3-prores_444.mov
+159127 tests/data/fate/vsynth3-prores_444.mov
+025b48feb3d9a9652983ef71e6cb7e7c *tests/data/fate/vsynth3-prores_444.out.rawvideo
+stddev:    3.21 PSNR: 37.98 MAXDIFF:   41 bytes:    86700/    86700
diff --git a/tests/ref/vsynth/vsynth3-prores_444_int b/tests/ref/vsynth/vsynth3-prores_444_int
new file mode 100644
index 0000000000000..36760adf9a77d
--- /dev/null
+++ b/tests/ref/vsynth/vsynth3-prores_444_int
@@ -0,0 +1,4 @@
+270045a731d4cb6ba253880021c87a63 *tests/data/fate/vsynth3-prores_444_int.mov
+184397 tests/data/fate/vsynth3-prores_444_int.mov
+a8852aa2841c2ce5f2aa86176ceda4ef *tests/data/fate/vsynth3-prores_444_int.out.rawvideo
+stddev:    3.24 PSNR: 37.91 MAXDIFF:   41 bytes:    86700/    86700
diff --git a/tests/ref/vsynth/vsynth3-prores_int b/tests/ref/vsynth/vsynth3-prores_int
new file mode 100644
index 0000000000000..53dcfbd14d686
--- /dev/null
+++ b/tests/ref/vsynth/vsynth3-prores_int
@@ -0,0 +1,4 @@
+ee1f14b23eb9dee4d59b021d4ec041bf *tests/data/fate/vsynth3-prores_int.mov
+120484 tests/data/fate/vsynth3-prores_int.mov
+e5859ba47a99f9e53c1ddcaa68a8f8f8 *tests/data/fate/vsynth3-prores_int.out.rawvideo
+stddev:    2.92 PSNR: 38.81 MAXDIFF:   29 bytes:    86700/    86700
diff --git a/tests/ref/vsynth/vsynth3-r210 b/tests/ref/vsynth/vsynth3-r210
index 75c424cf28665..253657cd85e6f 100644
--- a/tests/ref/vsynth/vsynth3-r210
+++ b/tests/ref/vsynth/vsynth3-r210
@@ -1,4 +1,4 @@
-229e700e0fab4e81481e99a70e00bec9 *tests/data/fate/vsynth3-r210.avi
+fd12f6dde75d0872ccf9012b342208de *tests/data/fate/vsynth3-r210.avi
 442052 tests/data/fate/vsynth3-r210.avi
-e1d882babc8754f7418aa91ce48f7ab0 *tests/data/fate/vsynth3-r210.out.rawvideo
-stddev:    3.48 PSNR: 37.28 MAXDIFF:   42 bytes:    86700/    86700
+a2c4e460ebede1109bd794b1b7b05a1f *tests/data/fate/vsynth3-r210.out.rawvideo
+stddev:    4.10 PSNR: 35.87 MAXDIFF:   48 bytes:    86700/    86700
diff --git a/tests/ref/vsynth/vsynth_lena-prores b/tests/ref/vsynth/vsynth_lena-prores
index 5b8c3fe1b4116..41b8e16bf0461 100644
--- a/tests/ref/vsynth/vsynth_lena-prores
+++ b/tests/ref/vsynth/vsynth_lena-prores
@@ -1,4 +1,4 @@
-637f34b5fd81f072f76a967595fa6af7 *tests/data/fate/vsynth_lena-prores.mov
+952c0dfde2b3d238c2ef411782f309a6 *tests/data/fate/vsynth_lena-prores.mov
 2844076 tests/data/fate/vsynth_lena-prores.mov
 03fd29e3963716a09d232b6f817ecb57 *tests/data/fate/vsynth_lena-prores.out.rawvideo
 stddev:    1.31 PSNR: 45.77 MAXDIFF:   11 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth_lena-prores_444 b/tests/ref/vsynth/vsynth_lena-prores_444
new file mode 100644
index 0000000000000..2aac93f4fb291
--- /dev/null
+++ b/tests/ref/vsynth/vsynth_lena-prores_444
@@ -0,0 +1,4 @@
+9df5b55d8545ff162414499530749808 *tests/data/fate/vsynth_lena-prores_444.mov
+4734395 tests/data/fate/vsynth_lena-prores_444.mov
+a704e05e3e0a451edef7515b25a76bb8 *tests/data/fate/vsynth_lena-prores_444.out.rawvideo
+stddev:    0.81 PSNR: 49.88 MAXDIFF:    8 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth_lena-prores_444_int b/tests/ref/vsynth/vsynth_lena-prores_444_int
new file mode 100644
index 0000000000000..378b57d04dc60
--- /dev/null
+++ b/tests/ref/vsynth/vsynth_lena-prores_444_int
@@ -0,0 +1,4 @@
+02400504ef9b76cc58e8d964b9dd40a3 *tests/data/fate/vsynth_lena-prores_444_int.mov
+5696258 tests/data/fate/vsynth_lena-prores_444_int.mov
+466380156e4d2b811f4ffb9c5a8bca72 *tests/data/fate/vsynth_lena-prores_444_int.out.rawvideo
+stddev:    0.88 PSNR: 49.23 MAXDIFF:    9 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth_lena-prores_int b/tests/ref/vsynth/vsynth_lena-prores_int
new file mode 100644
index 0000000000000..bd6adaf211f67
--- /dev/null
+++ b/tests/ref/vsynth/vsynth_lena-prores_int
@@ -0,0 +1,4 @@
+5fada1996c2937546d11fda8b1e02188 *tests/data/fate/vsynth_lena-prores_int.mov
+3532698 tests/data/fate/vsynth_lena-prores_int.mov
+eb5caa9824ca294f403cd13f33c40f23 *tests/data/fate/vsynth_lena-prores_int.out.rawvideo
+stddev:    1.47 PSNR: 44.78 MAXDIFF:   12 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth_lena-r210 b/tests/ref/vsynth/vsynth_lena-r210
index 8fd1a666afcf0..0a113dc693ea5 100644
--- a/tests/ref/vsynth/vsynth_lena-r210
+++ b/tests/ref/vsynth/vsynth_lena-r210
@@ -1,4 +1,4 @@
-94874a48987fd401494f4d7ca8e1273b *tests/data/fate/vsynth_lena-r210.avi
+61fd53566d99b725e75212747b35893f *tests/data/fate/vsynth_lena-r210.avi
 22125252 tests/data/fate/vsynth_lena-r210.avi
-6ea4fcd93fc83defc8770e85b64b60bb *tests/data/fate/vsynth_lena-r210.out.rawvideo
-stddev:    0.70 PSNR: 51.12 MAXDIFF:   12 bytes:  7603200/  7603200
+4b7425191bb6a7fc4ca0dc649d9ba202 *tests/data/fate/vsynth_lena-r210.out.rawvideo
+stddev:    0.93 PSNR: 48.72 MAXDIFF:   11 bytes:  7603200/  7603200
diff --git a/tests/regression-funcs.sh b/tests/regression-funcs.sh
index 0c7d34bea3320..c88828c4a955e 100755
--- a/tests/regression-funcs.sh
+++ b/tests/regression-funcs.sh
@@ -32,9 +32,6 @@ target_crcfile="$target_datadir/$this.crc"
 cleanfiles="$raw_dst $crcfile"
 trap 'rm -f -- $cleanfiles' EXIT
 
-mkdir -p "$datadir"
-mkdir -p "$outfile"
-
 [ "${V-0}" -gt 0 ] && echov=echov || echov=:
 
 echov(){
diff --git a/tools/Makefile b/tools/Makefile
index 3909e7cfdd9ad..b347caf82a243 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -5,7 +5,7 @@ TOOLS-$(CONFIG_ZLIB) += cws2fws
 tools/target_dec_%_fuzzer.o: tools/target_dec_fuzzer.c
 	$(COMPILE_C) -DFFMPEG_DECODER=$*
 
-OBJDIRS += tools
+OUTDIRS += tools
 
 clean::
 	$(RM) $(CLEANSUFFIXES:%=tools/%)
diff --git a/tools/target_dec_fate.list b/tools/target_dec_fate.list
new file mode 100644
index 0000000000000..5ad1e940c654c
--- /dev/null
+++ b/tools/target_dec_fate.list
@@ -0,0 +1,592 @@
+496/clusterfuzz-testcase-5805083497332736                                                       target_dec_jpegls_fuzzer
+498/clusterfuzz-testcase-6157986632302592                                                       target_dec_tiff_fuzzer
+500/clusterfuzz-testcase-6315221727576064                                                       target_dec_png_fuzzer
+501/clusterfuzz-testcase-5672752870588416                                                       target_dec_sipr_fuzzer
+503/clusterfuzz-testcase-6386429735206912                                                       target_dec_mp3_fuzzer
+508/clusterfuzz-testcase-6245747678773248                                                       target_dec_amrnb_fuzzer
+510/clusterfuzz-testcase-5737865715646464                                                       target_dec_dca_fuzzer
+540/clusterfuzz-testcase-5674546153652224                                                       target_dec_jpegls_fuzzer
+544/clusterfuzz-testcase-5936536407244800.f8bd9b24_8ba77916_70c2c7be_3df6a2ea_96cd9f14          target_dec_interplay_video_fuzzer
+546/clusterfuzz-testcase-4809433909559296                                                       target_dec_png_fuzzer
+555/clusterfuzz-testcase-5986646595993600                                                       target_dec_movtext_fuzzer
+559/clusterfuzz-testcase-6424225917173760                                                       target_dec_pictor_fuzzer
+607/clusterfuzz-testcase-5108792465293312                                                       target_dec_wavpack_fuzzer
+608/clusterfuzz-testcase-6039782863929344                                                       target_dec_mpeg2video_fuzzer
+609/clusterfuzz-testcase-4825202619842560                                                       target_dec_png_fuzzer
+610/clusterfuzz-testcase-4831030085156864                                                       target_dec_ac3_fuzzer
+611/clusterfuzz-testcase-5613455820193792                                                       target_dec_mjpeg_fuzzer
+612/clusterfuzz-testcase-4707817137111040                                                       target_dec_mpeg4_fuzzer
+614/clusterfuzz-testcase-4931860079575040                                                       target_dec_h264_fuzzer
+615/clusterfuzz-testcase-5488002644049920                                                       target_dec_h264_fuzzer
+616/clusterfuzz-testcase-5724692654587904                                                       target_dec_jpegls_fuzzer
+617/clusterfuzz-testcase-6413875723370496                                                       target_dec_subrip_fuzzer
+618/clusterfuzz-testcase-6594990333493248                                                       target_dec_h263_fuzzer
+619/clusterfuzz-testcase-5803914534322176                                                       target_dec_dvbsub_fuzzer
+622/clusterfuzz-testcase-5745722022428672                                                       target_dec_pictor_fuzzer
+626/clusterfuzz-testcase-4738718621499392                                                       target_dec_flac_fuzzer
+628/clusterfuzz-testcase-6187747641393152                                                       target_dec_flac_fuzzer
+629/clusterfuzz-testcase-6697457381539840                                                       target_dec_dca_fuzzer
+630/clusterfuzz-testcase-6608718928019456                                                       target_dec_rv40_fuzzer
+631/clusterfuzz-testcase-6725491035734016                                                       target_dec_mp3_fuzzer
+633/clusterfuzz-testcase-4553133554401280                                                       target_dec_dvvideo_fuzzer
+634/clusterfuzz-testcase-5285420445204480                                                       target_dec_h264_fuzzer
+637/clusterfuzz-testcase-5713159862091776                                                       target_dec_rv40_fuzzer
+639/clusterfuzz-testcase-5143866241974272                                                       target_dec_h263_fuzzer
+656/clusterfuzz-testcase-6463814516080640                                                       target_dec_mpeg4_fuzzer
+657/clusterfuzz-testcase-6674741433729024                                                       target_dec_jpegls_fuzzer
+658/clusterfuzz-testcase-6691260146384896                                                       target_dec_mpeg4_fuzzer
+659/clusterfuzz-testcase-5866673603084288                                                       target_dec_h263_fuzzer
+662/clusterfuzz-testcase-4898131432964096                                                       target_dec_rv30_fuzzer
+662/clusterfuzz-testcase-4898131432964096                                                       target_dec_rv40_fuzzer
+664/clusterfuzz-testcase-4917047475568640                                                       target_dec_vp6_fuzzer
+665/clusterfuzz-testcase-4863789881098240                                                       target_dec_mp2_fuzzer
+665/clusterfuzz-testcase-4863789881098240                                                       target_dec_mp3_fuzzer
+670/clusterfuzz-testcase-4852021066727424                                                       target_dec_h263_fuzzer
+671/clusterfuzz-testcase-4990381827555328                                                       target_dec_mpeg1video_fuzzer
+672/clusterfuzz-testcase-5595018867769344                                                       target_dec_eac3_fuzzer
+673/clusterfuzz-testcase-5948736536576000                                                       target_dec_flac_fuzzer
+674/clusterfuzz-testcase-6713275880308736                                                       target_dec_mpeg4_fuzzer
+675/clusterfuzz-testcase-6722971232108544                                                       target_dec_pictor_fuzzer
+677/clusterfuzz-testcase-6635120628858880                                                       target_dec_h264_fuzzer
+680/clusterfuzz-testcase-5416627266912256                                                       target_dec_dca_fuzzer
+681/clusterfuzz-testcase-5013323462475776                                                       target_dec_dca_fuzzer
+700/clusterfuzz-testcase-5660909504561152                                                       target_dec_vp6_fuzzer
+701/clusterfuzz-testcase-6594719951880192                                                       target_dec_mpeg4_fuzzer
+702/clusterfuzz-testcase-4553541576294400                                                       target_dec_vp5_fuzzer
+712/clusterfuzz-testcase-6647676227551232                                                       target_dec_h264_fuzzer
+713/clusterfuzz-testcase-4999324687663104                                                       target_dec_h264_fuzzer
+716/clusterfuzz-testcase-4890287480504320                                                       target_dec_mpeg4_fuzzer
+717/clusterfuzz-testcase-5434924129583104                                                       target_dec_wavpack_fuzzer
+719/clusterfuzz-testcase-6214837208088576                                                       target_dec_vp6f_fuzzer
+722/clusterfuzz-testcase-5711268868521984                                                       target_dec_dca_fuzzer
+723/clusterfuzz-testcase-6471394663596032                                                       target_dec_wavpack_fuzzer
+724/clusterfuzz-testcase-6738249571631104                                                       target_dec_pictor_fuzzer
+729/clusterfuzz-testcase-5154831595470848                                                       target_dec_wavpack_fuzzer
+730/clusterfuzz-testcase-5265113739165696                                                       target_dec_vp8_fuzzer
+731/clusterfuzz-testcase-5391628980191232                                                       target_dec_flac_fuzzer
+732/clusterfuzz-testcase-4872990070145024                                                       target_dec_dca_fuzzer
+733/clusterfuzz-testcase-4682158096515072                                                       target_dec_mjpeg_fuzzer
+734/clusterfuzz-testcase-4821293192970240                                                       target_dec_h264_fuzzer
+736/clusterfuzz-testcase-5580263943831552                                                       target_dec_mpeg4_fuzzer
+741/clusterfuzz-testcase-5869962004529152                                                       target_dec_movtext_fuzzer
+755/clusterfuzz-testcase-5369072516595712                                                       target_dec_h264_fuzzer
+758/clusterfuzz-testcase-4720832028868608                                                       target_dec_vp5_fuzzer
+761/clusterfuzz-testcase-5442222252097536                                                       target_dec_wavpack_fuzzer
+762/clusterfuzz-testcase-5927683747741696                                                       target_dec_dca_fuzzer
+763/clusterfuzz-testcase-6007567320875008                                                       target_dec_amrwb_fuzzer
+764/clusterfuzz-testcase-6273034652483584                                                       target_dec_mpeg2video_fuzzer
+766/clusterfuzz-testcase-4603047080624128                                                       target_dec_vp6_fuzzer
+767/clusterfuzz-testcase-6743603416137728                                                       target_dec_vp6f_fuzzer
+768/clusterfuzz-testcase-4807444305805312                                                       target_dec_rv40_fuzzer
+772/clusterfuzz-testcase-5453962780082176                                                       target_dec_h264_fuzzer
+773/clusterfuzz-testcase-6362160458366976                                                       target_dec_vp6f_fuzzer
+779/clusterfuzz-testcase-5568669545398272                                                       target_dec_mpeg4_fuzzer
+780/clusterfuzz-testcase-6393552642768896                                                       target_dec_gif_fuzzer
+807/clusterfuzz-testcase-6470061042696192                                                       target_dec_vp6f_fuzzer
+808/clusterfuzz-testcase-4715513349406720                                                       target_dec_wavpack_fuzzer
+809/clusterfuzz-testcase-6172687908995072                                                       target_dec_vp6f_fuzzer
+810/clusterfuzz-testcase-5249282825256960                                                       target_dec_targa_fuzzer
+811/clusterfuzz-testcase-6465493076541440                                                       target_dec_mjpeg_fuzzer
+822/clusterfuzz-testcase-4873433189974016                                                       target_dec_wavpack_fuzzer
+823/clusterfuzz-testcase-6727060074528768                                                       target_dec_pictor_fuzzer
+826/clusterfuzz-testcase-5316921379520512                                                       target_dec_tiff_fuzzer
+830/clusterfuzz-testcase-6253175327686656                                                       target_dec_mp2_fuzzer
+839/clusterfuzz-testcase-4871084446842880                                                       target_dec_wavpack_fuzzer
+842/clusterfuzz-testcase-6361547318231040                                                       target_dec_tiff_fuzzer
+847/clusterfuzz-testcase-5291877358108672                                                       target_dec_vp5_fuzzer
+848/clusterfuzz-testcase-5432155620507648                                                       target_dec_vp6f_fuzzer
+850/clusterfuzz-testcase-5721296509861888                                                       target_dec_vp6f_fuzzer
+857/clusterfuzz-testcase-5319093760557056                                                       target_dec_h264_fuzzer
+858/clusterfuzz-testcase-5168477042114560                                                       target_dec_h264_fuzzer
+861/clusterfuzz-testcase-5688284384591872                                                       target_dec_tiff_fuzzer
+864/clusterfuzz-testcase-4774385942528000                                                       target_dec_h264_fuzzer
+870/clusterfuzz-testcase-5649105424482304                                                       target_dec_mjpeg_fuzzer
+873/clusterfuzz-testcase-5714546230558720                                                       target_dec_mp3_fuzzer
+874/clusterfuzz-testcase-5252796175613952                                                       target_dec_tiff_fuzzer
+894/clusterfuzz-testcase-4841537823309824                                                       target_dec_wavpack_fuzzer
+898/clusterfuzz-testcase-6149765467209728                                                       target_dec_pictor_fuzzer
+902/clusterfuzz-testcase-4561155144024064                                                       target_dec_h264_fuzzer
+911/clusterfuzz-testcase-5415105606975488                                                       target_dec_h264_fuzzer
+936/clusterfuzz-testcase-4700061919346688                                                       target_dec_tiff_fuzzer
+938/clusterfuzz-testcase-4791735110598656                                                       target_dec_amrnb_fuzzer
+939/clusterfuzz-testcase-6515070404132864                                                       target_dec_h264_fuzzer
+940/clusterfuzz-testcase-5200378381467648                                                       target_dec_wavpack_fuzzer
+943/clusterfuzz-testcase-5114865297391616                                                       target_dec_jpegls_fuzzer
+943/clusterfuzz-testcase-5114865297391616                                                       target_dec_mjpeg_fuzzer
+945/clusterfuzz-testcase-6037937588273152                                                       target_dec_wavpack_fuzzer
+979/clusterfuzz-testcase-4940780542099456                                                       target_dec_h264_fuzzer
+1044/clusterfuzz-testcase-minimized-ffmpeg_AUDIO_AV_CODEC_ID_DTS_fuzzer-6135262067294208        target_dec_dca_fuzzer
+1072/clusterfuzz-testcase-6456688074817536                                                      target_dec_aac_fuzzer
+1080/clusterfuzz-testcase-5353236754071552                                                      target_dec_dvdsub_fuzzer
+1085/clusterfuzz-testcase-6089649833377792                                                      target_dec_tiff_fuzzer
+1133/clusterfuzz-testcase-minimized-ffmpeg_VIDEO_AV_CODEC_ID_MJPEG_fuzzer-4861925596856320      target_dec_mjpeg_fuzzer
+1136/clusterfuzz-testcase-6024209379622912                                                      target_dec_sipr_fuzzer
+1137/clusterfuzz-testcase-6711216560930816                                                      target_dec_jpegls_fuzzer
+1141/clusterfuzz-testcase-6659734767665152                                                      target_dec_amrnb_fuzzer
+1213/clusterfuzz-testcase-minimized-6022987469815808                                            target_dec_tiff_fuzzer
+1214/clusterfuzz-testcase-minimized-6130606599569408                                            target_dec_h264_fuzzer
+1271/clusterfuzz-testcase-minimized-6095220498235392                                            target_dec_targa_fuzzer
+1275/clusterfuzz-testcase-minimized-6718162017976320                                            target_dec_mdec_fuzzer
+1280/clusterfuzz-testcase-minimized-6102353767825408                                            target_dec_svq3_fuzzer
+1282/clusterfuzz-testcase-minimized-5400131681648640                                            target_dec_bmp_fuzzer
+1283/clusterfuzz-testcase-minimized-6221126759874560                                            target_dec_vp3_fuzzer
+1290/clusterfuzz-testcase-minimized-5815578902134784                                            target_dec_indeo2_fuzzer
+1292/clusterfuzz-testcase-minimized-5795512143839232                                            target_dec_flic_fuzzer
+1293/clusterfuzz-testcase-minimized-6054752074858496                                            target_dec_smc_fuzzer
+1298/clusterfuzz-testcase-minimized-5955580877340672                                            target_dec_mpeg4_fuzzer
+1305/clusterfuzz-testcase-minimized-5787235003662336                                            target_dec_amv_fuzzer
+1306/clusterfuzz-testcase-minimized-6152296217968640                                            target_dec_msvideo1_fuzzer
+1309/clusterfuzz-testcase-minimized-5754803370065920                                            target_dec_pcx_fuzzer
+1314/clusterfuzz-testcase-minimized-4621997222920192                                            target_dec_png_fuzzer
+1321/clusterfuzz-testcase-minimized-5875549597597696                                            target_dec_cinepak_fuzzer
+1322/clusterfuzz-testcase-minimized-4728193644756992                                            target_dec_png_fuzzer
+1335/clusterfuzz-testcase-minimized-5566961566089216                                            target_dec_cavs_fuzzer
+1336/clusterfuzz-testcase-minimized-4761381930795008                                            target_dec_pixlet_fuzzer
+1337/clusterfuzz-testcase-minimized-5212314171080704                                            target_dec_aac_fuzzer
+1338/clusterfuzz-testcase-minimized-6485546354343936                                            target_dec_wnv1_fuzzer
+1339/clusterfuzz-testcase-minimized-4614671485108224                                            target_dec_dss_sp_fuzzer
+1340/clusterfuzz-testcase-minimized-4669892148068352                                            target_dec_adpcm_g722_fuzzer
+1341/clusterfuzz-testcase-minimized-5441502618583040                                            target_dec_cdxl_fuzzer
+1342/clusterfuzz-testcase-minimized-5490842129137664                                            target_dec_nellymoser_fuzzer
+1344/clusterfuzz-testcase-minimized-5567131804499968                                            target_dec_zmbv_fuzzer
+1345/clusterfuzz-testcase-minimized-6062963045695488                                            target_dec_dfa_fuzzer
+1346/clusterfuzz-testcase-minimized-5776732600664064                                            target_dec_mdec_fuzzer
+1348/clusterfuzz-testcase-minimized-6195673642827776                                            target_dec_tiertexseqvideo_fuzzer
+1349/clusterfuzz-testcase-minimized-5370707196248064                                            target_dec_aac_fuzzer
+1351/clusterfuzz-testcase-minimized-5861971645693952                                            target_dec_indeo4_fuzzer
+1352/clusterfuzz-testcase-minimized-5757565017260032                                            target_dec_ac3_fixed_fuzzer
+1353/clusterfuzz-testcase-minimized-5208180449607680                                            target_dec_snow_fuzzer
+1354/clusterfuzz-testcase-minimized-5520132195483648                                            target_dec_sami_fuzzer
+1355/clusterfuzz-testcase-minimized-6662205472768000                                            target_dec_mlp_fuzzer
+1356/clusterfuzz-testcase-minimized-6008489086287872                                            target_dec_fic_fuzzer
+1360/clusterfuzz-testcase-minimized-5606472043986944                                            target_dec_clearvideo_fuzzer
+1362/clusterfuzz-testcase-minimized-6097275002552320                                            target_dec_opus_fuzzer
+1365/clusterfuzz-testcase-minimized-5624158450876416                                            target_dec_mimic_fuzzer
+1366/clusterfuzz-testcase-minimized-5958052211589120                                            target_dec_ppm_fuzzer
+1367/clusterfuzz-testcase-minimized-5714968823463936                                            target_dec_g723_1_fuzzer
+1368/clusterfuzz-testcase-minimized-4507293276176384                                            target_dec_dfa_fuzzer
+1369/clusterfuzz-testcase-minimized-5048908029886464                                            target_dec_webp_fuzzer
+1371/clusterfuzz-testcase-minimized-5770822591447040                                            target_dec_shorten_fuzzer
+1372/clusterfuzz-testcase-minimized-5712192982745088                                            target_dec_msa1_fuzzer
+1374/clusterfuzz-testcase-minimized-5692496346611712                                            target_dec_mpeg1video_fuzzer
+1375/clusterfuzz-testcase-minimized-6070134701555712                                            target_dec_hq_hqa_fuzzer
+1376/clusterfuzz-testcase-minimized-6361794975105024                                            target_dec_targa_y216_fuzzer
+1377/clusterfuzz-testcase-minimized-5487049807233024                                            target_dec_aac_fixed_fuzzer
+1378/clusterfuzz-testcase-minimized-5715088008806400                                            target_dec_cdxl_fuzzer
+1380/clusterfuzz-testcase-minimized-6501225451225088                                            target_dec_dds_fuzzer
+1381/clusterfuzz-testcase-minimized-5513944540119040                                            target_dec_msmpeg4v1_fuzzer
+1382/clusterfuzz-testcase-minimized-6013445293998080                                            target_dec_svq3_fuzzer
+1385/clusterfuzz-testcase-minimized-5552882663292928                                            target_dec_indeo4_fuzzer
+1386/clusterfuzz-testcase-minimized-5323086394032128                                            target_dec_txd_fuzzer
+1387/clusterfuzz-testcase-minimized-4802757766676480                                            target_dec_mts2_fuzzer
+1388/clusterfuzz-testcase-minimized-6680800936329216                                            target_dec_ra_144_fuzzer
+1389/clusterfuzz-testcase-minimized-5330877464707072                                            target_dec_shorten_fuzzer
+1390/clusterfuzz-testcase-minimized-5452757630713856                                            target_dec_magicyuv_fuzzer
+1391/clusterfuzz-testcase-minimized-4556900198776832                                            target_dec_indeo4_fuzzer
+1393/clusterfuzz-testcase-minimized-5948366791901184                                            target_dec_adpcm_g726_fuzzer
+1394/clusterfuzz-testcase-minimized-6493376885030912                                            target_dec_eamad_fuzzer
+1395/clusterfuzz-testcase-minimized-5330939741732864                                            target_dec_s302m_fuzzer
+1397/clusterfuzz-testcase-minimized-6369226291937280                                            target_dec_hevc_fuzzer
+1398/clusterfuzz-testcase-minimized-4576913622302720                                            target_dec_aac_fuzzer
+1399/clusterfuzz-testcase-minimized-4866094172995584                                            target_dec_xwd_fuzzer
+1401/clusterfuzz-testcase-minimized-6526248148795392                                            target_dec_wmv2_fuzzer
+1402/clusterfuzz-testcase-minimized-6302213041291264                                            target_dec_h264_fuzzer
+1403/clusterfuzz-testcase-minimized-4724820484816896                                            target_dec_ffv1_fuzzer
+1404/clusterfuzz-testcase-minimized-5000441286885376                                            target_dec_cavs_fuzzer
+1405/clusterfuzz-testcase-minimized-5011491835084800                                            target_dec_hqx_fuzzer
+1406/clusterfuzz-testcase-minimized-5064865125236736                                            target_dec_vp7_fuzzer
+1407/clusterfuzz-testcase-minimized-6044604124102656                                            target_dec_webp_fuzzer
+1408/clusterfuzz-testcase-minimized-6529985844084736                                            target_dec_dvbsub_fuzzer
+1409/clusterfuzz-testcase-minimized-5237365020819456                                            target_dec_dss_sp_fuzzer
+1410/clusterfuzz-testcase-minimized-6065423843852288                                            target_dec_magicyuv_fuzzer
+1411/clusterfuzz-testcase-minimized-5776085184675840                                            target_dec_bmv_video_fuzzer
+1412/clusterfuzz-testcase-minimized-6561308772139008                                            target_dec_g723_1_fuzzer
+1413/clusterfuzz-testcase-minimized-5923451770503168                                            target_dec_fic_fuzzer
+1415/clusterfuzz-testcase-minimized-6417783363469312                                            target_dec_dvbsub_fuzzer
+1416/clusterfuzz-testcase-minimized-5536862435278848                                            target_dec_indeo2_fuzzer
+1417/clusterfuzz-testcase-minimized-6606778030620672                                            target_dec_clearvideo_fuzzer
+1418/clusterfuzz-testcase-minimized-5934472438480896                                            target_dec_flac_fuzzer
+1419/clusterfuzz-testcase-minimized-6108700873850880                                            target_dec_snow_fuzzer
+1420/clusterfuzz-testcase-minimized-6059927359455232                                            target_dec_webp_fuzzer
+1421/clusterfuzz-testcase-minimized-6239947507892224                                            target_dec_cllc_fuzzer
+1422/clusterfuzz-testcase-minimized-5030993939398656                                            target_dec_scpr_fuzzer
+1423/clusterfuzz-testcase-minimized-5063889899225088                                            target_dec_tak_fuzzer
+1424/clusterfuzz-testcase-minimized-6088327159611392                                            target_dec_lagarith_fuzzer
+1425/clusterfuzz-testcase-minimized-6295712339853312                                            target_dec_lagarith_fuzzer
+1426/clusterfuzz-testcase-minimized-4774371304407040                                            target_dec_lagarith_fuzzer
+1427/clusterfuzz-testcase-minimized-5020737339392000                                            target_dec_cdxl_fuzzer
+1428/clusterfuzz-testcase-minimized-5263281793007616                                            target_dec_dds_fuzzer
+1429/clusterfuzz-testcase-minimized-5959951610544128                                            target_dec_svq3_fuzzer
+1434/clusterfuzz-testcase-minimized-6314998085189632                                            target_dec_webp_fuzzer
+1435/clusterfuzz-testcase-minimized-6483783723253760                                            target_dec_webp_fuzzer
+1437/clusterfuzz-testcase-minimized-4569970002362368                                            target_dec_y41p_fuzzer
+1438/clusterfuzz-testcase-minimized-4917542646710272                                            target_dec_cavs_fuzzer
+1439/clusterfuzz-testcase-minimized-4999148417843200                                            target_dec_g723_1_fuzzer
+1440/clusterfuzz-testcase-minimized-5785716111966208                                            target_dec_vp7_fuzzer
+1441/clusterfuzz-testcase-minimized-6223152357048320                                            target_dec_dss_sp_fuzzer
+1443/clusterfuzz-testcase-minimized-4826998612426752                                            target_dec_eatqi_fuzzer
+1446/clusterfuzz-testcase-minimized-5577409124368384                                            target_dec_truemotion1_fuzzer
+1453/clusterfuzz-testcase-minimized-5024976874766336                                            target_dec_wavpack_fuzzer
+1462/clusterfuzz-testcase-minimized-6558894463647744                                            target_dec_pixlet_fuzzer
+1464/clusterfuzz-testcase-minimized-4925445571084288                                            target_dec_mpeg2video_fuzzer
+1466/clusterfuzz-testcase-minimized-5961584419536896                                            target_dec_xpm_fuzzer
+1468/clusterfuzz-testcase-minimized-5235964056174592                                            target_dec_mimic_fuzzer
+1470/clusterfuzz-testcase-minimized-5404421666111488                                            target_dec_webp_fuzzer
+1471/clusterfuzz-testcase-minimized-6376460543590400                                            target_dec_aac_fixed_fuzzer
+1472/clusterfuzz-testcase-minimized-5677426430443520                                            target_dec_webp_fuzzer
+1473/clusterfuzz-testcase-minimized-5768907824562176                                            target_dec_dvbsub_fuzzer
+1478/clusterfuzz-testcase-minimized-5285486908145664                                            target_dec_scpr_fuzzer
+1479/clusterfuzz-testcase-minimized-6638493360979968                                            target_dec_cllc_fuzzer
+1480/clusterfuzz-testcase-minimized-5188321007370240                                            target_dec_msmpeg4v2_fuzzer
+1481/clusterfuzz-testcase-minimized-5264379509473280                                            target_dec_shorten_fuzzer
+1483/clusterfuzz-testcase-minimized-6386507814273024                                            target_dec_msa1_fuzzer
+1485/clusterfuzz-testcase-minimized-6639880215986176                                            target_dec_msa1_fuzzer
+1487/clusterfuzz-testcase-minimized-6288036495097856                                            target_dec_dirac_fuzzer
+1489/clusterfuzz-testcase-minimized-5075102901207040                                            target_dec_aac_fuzzer
+1503/clusterfuzz-testcase-minimized-5369271855087616                                            target_dec_wmv2_fuzzer
+1504/clusterfuzz-testcase-minimized-6249212138225664                                            target_dec_g723_1_fuzzer
+1505/clusterfuzz-testcase-minimized-4561688818876416                                            target_dec_dds_fuzzer
+1506/clusterfuzz-testcase-minimized-5401272918212608                                            target_dec_cavs_fuzzer
+1507/clusterfuzz-testcase-minimized-4955228300378112                                            target_dec_hq_hqa_fuzzer
+1508/clusterfuzz-testcase-minimized-5011336327069696                                            target_dec_fmvc_fuzzer
+1509/clusterfuzz-testcase-minimized-5129419876204544                                            target_dec_rscc_fuzzer
+1510/clusterfuzz-testcase-minimized-5826231746428928                                            target_dec_dds_fuzzer
+1511/clusterfuzz-testcase-minimized-5906663800307712                                            target_dec_ffv1_fuzzer
+1512/clusterfuzz-testcase-minimized-4713846423945216                                            target_dec_mlp_fuzzer
+1513/clusterfuzz-testcase-minimized-6246484833992704                                            target_dec_h264_fuzzer
+1514/clusterfuzz-testcase-minimized-6437666243477504                                            target_dec_indeo4_fuzzer
+1519/clusterfuzz-testcase-minimized-5286680976162816                                            target_dec_scpr_fuzzer
+1535/clusterfuzz-testcase-minimized-5826695535788032                                            target_dec_aac_fixed_fuzzer
+1536/clusterfuzz-testcase-minimized-5973925404082176                                            target_dec_webp_fuzzer
+1538/clusterfuzz-testcase-minimized-4696904925446144                                            target_dec_ac3_fuzzer
+1541/clusterfuzz-testcase-minimized-6403410590957568                                            target_dec_mlp_fuzzer
+1556/clusterfuzz-testcase-minimized-5027865978470400                                            target_dec_svq3_fuzzer
+1557/clusterfuzz-testcase-minimized-6535013757616128                                            target_dec_webp_fuzzer
+1559/clusterfuzz-testcase-minimized-5048096079740928                                            target_dec_ffv1_fuzzer
+1560/clusterfuzz-testcase-minimized-6011037813833728                                            target_dec_ffv1_fuzzer
+1567/clusterfuzz-testcase-minimized-5693653555085312                                            target_dec_g723_1_fuzzer
+1568/clusterfuzz-testcase-minimized-5944868608147456                                            target_dec_hqx_fuzzer
+1569/clusterfuzz-testcase-minimized-6328690508038144                                            target_dec_pixlet_fuzzer
+1570/clusterfuzz-testcase-minimized-6455337349545984                                            target_dec_ac3_fuzzer
+1572/clusterfuzz-testcase-minimized-4578773729017856                                            target_dec_mpeg4_fuzzer
+1576/clusterfuzz-testcase-minimized-5592896440893440                                            target_dec_tiff_fuzzer
+1604/clusterfuzz-testcase-minimized-5312060206350336                                            target_dec_svq3_fuzzer
+1609/clusterfuzz-testcase-minimized-5102163007111168                                            target_dec_g723_1_fuzzer
+1615/clusterfuzz-testcase-minimized-6625214647500800                                            target_dec_scpr_fuzzer
+1616/clusterfuzz-testcase-minimized-5119196578971648                                            target_dec_truemotion1_fuzzer
+1626/clusterfuzz-testcase-minimized-6416580571299840                                            target_dec_hq_hqa_fuzzer
+1630/clusterfuzz-testcase-minimized-6326111917047808                                            target_dec_tak_fuzzer
+1631/clusterfuzz-testcase-minimized-4861568200212480                                            target_dec_tiff_fuzzer
+1635/clusterfuzz-testcase-minimized-4992749856096256                                            target_dec_tak_fuzzer
+1636/clusterfuzz-testcase-minimized-5310494757879808                                            target_dec_mlp_fuzzer
+1637/clusterfuzz-testcase-minimized-5376582493405184                                            target_dec_flic_fuzzer
+1639/clusterfuzz-testcase-minimized-5693801463021568                                            target_dec_h264_fuzzer
+1643/clusterfuzz-testcase-minimized-6117573403869184                                            target_dec_fmvc_fuzzer
+1654/clusterfuzz-testcase-minimized-5151903795118080                                            target_dec_aac_fixed_fuzzer
+1655/clusterfuzz-testcase-minimized-5587079276789760                                            target_dec_rv40_fuzzer
+1656/clusterfuzz-testcase-minimized-5900404925661184                                            target_dec_aac_latm_fuzzer
+1657/clusterfuzz-testcase-minimized-4710000079405056                                            target_dec_dfa_fuzzer
+1658/clusterfuzz-testcase-minimized-4889937130291200                                            target_dec_mlp_fuzzer
+1659/clusterfuzz-testcase-minimized-5396490639900672                                            target_dec_wavpack_fuzzer
+1664/clusterfuzz-testcase-minimized-6587801187385344                                            target_dec_pixlet_fuzzer
+1669/clusterfuzz-testcase-minimized-5287529198649344                                            target_dec_fic_fuzzer
+1671/clusterfuzz-testcase-minimized-4759078033162240                                            target_dec_mimic_fuzzer
+1674/clusterfuzz-testcase-minimized-6092531563495424                                            target_dec_aac_fixed_fuzzer
+1681/clusterfuzz-testcase-minimized-5970545365483520                                            target_dec_aac_fixed_fuzzer
+1686/clusterfuzz-testcase-minimized-6282691643179008                                            target_dec_aac_fixed_fuzzer
+1699/clusterfuzz-testcase-minimized-6327177438035968                                            target_dec_mlp_fuzzer
+1702/clusterfuzz-testcase-minimized-5777869676478464                                            target_dec_ffv1_fuzzer
+1706/clusterfuzz-testcase-minimized-6112772670619648                                            target_dec_tak_fuzzer
+1707/clusterfuzz-testcase-minimized-6502767008940032                                            target_dec_escape124_fuzzer
+1708/clusterfuzz-testcase-minimized-5035111957397504                                            target_dec_mlp_fuzzer
+1709/clusterfuzz-testcase-minimized-4513580554649600                                            target_dec_aac_fixed_fuzzer
+1710/clusterfuzz-testcase-minimized-4837032931098624                                            target_dec_vp9_fuzzer
+1711/clusterfuzz-testcase-minimized-5248503515185152                                            target_dec_mlp_fuzzer
+1713/clusterfuzz-testcase-minimized-5791887476654080                                            target_dec_tak_fuzzer
+1716/clusterfuzz-testcase-minimized-4691012196761600                                            target_dec_aac_fixed_fuzzer
+1717/clusterfuzz-testcase-minimized-5491696676634624                                            target_dec_vmnc_fuzzer
+1719/clusterfuzz-testcase-minimized-6375090079924224                                            target_dec_tscc2_fuzzer
+1720/clusterfuzz-testcase-minimized-4952373438971904                                            target_dec_mlp_fuzzer
+1721/clusterfuzz-testcase-minimized-4719352135811072                                            target_dec_aac_fixed_fuzzer
+1723/clusterfuzz-testcase-minimized-5309409372667904                                            target_dec_mpeg4_fuzzer
+1724/clusterfuzz-testcase-minimized-4842395432648704                                            target_dec_thp_fuzzer
+1725/clusterfuzz-testcase-minimized-5132425044688896                                            target_dec_cavs_fuzzer
+1726/clusterfuzz-testcase-minimized-4509005575618560                                            target_dec_aac_fixed_fuzzer
+1727/clusterfuzz-testcase-minimized-5900685306494976                                            target_dec_mpeg4_fuzzer
+1731/clusterfuzz-testcase-minimized-5123972414832640                                            target_dec_mp3adu_fuzzer
+1734/clusterfuzz-testcase-minimized-5385630815092736                                            target_dec_indeo5_fuzzer
+1735/clusterfuzz-testcase-minimized-5350472347025408                                            target_dec_aac_fixed_fuzzer
+1737/clusterfuzz-testcase-minimized-5922321338466304                                            target_dec_mpeg4_fuzzer
+1738/clusterfuzz-testcase-minimized-6734814327603200                                            target_dec_aac_fixed_fuzzer
+1739/clusterfuzz-testcase-minimized-5399237707694080                                            target_dec_tak_fuzzer
+1743/clusterfuzz-testcase-minimized-4994834022531072                                            target_dec_tak_fuzzer
+1745/clusterfuzz-testcase-minimized-6160693365571584                                            target_dec_paf_video_fuzzer
+1746/clusterfuzz-testcase-minimized-6687393392361472                                            target_dec_asv2_fuzzer
+1747/clusterfuzz-testcase-minimized-6035451213250560                                            target_dec_xsub_fuzzer
+1748/clusterfuzz-testcase-minimized-6690208340770816                                            target_dec_ffv1_fuzzer
+1753/clusterfuzz-testcase-minimized-6205127620820992                                            target_dec_pbm_fuzzer
+1758/clusterfuzz-testcase-minimized-6054857184116736                                            target_dec_g723_1_fuzzer
+1762/clusterfuzz-testcase-minimized-5150981081792512                                            target_dec_aac_fixed_fuzzer
+1763/clusterfuzz-testcase-minimized-5191733576990720                                            target_dec_pam_fuzzer
+1764/clusterfuzz-testcase-minimized-5394243164045312                                            target_dec_lagarith_fuzzer
+1766/clusterfuzz-testcase-minimized-6562020075765760                                            target_dec_g723_1_fuzzer
+1767/clusterfuzz-testcase-minimized-6657181250224128                                            target_dec_pgm_fuzzer
+1770/clusterfuzz-testcase-minimized-5285511235108864                                            target_dec_aac_fixed_fuzzer
+1773/clusterfuzz-testcase-minimized-4832523987189760                                            target_dec_mjpeg_fuzzer
+1775/clusterfuzz-testcase-minimized-5330288148217856                                            target_dec_aac_fixed_fuzzer
+1776/clusterfuzz-testcase-minimized-6191258231898112                                            target_dec_wavpack_fuzzer
+1778/clusterfuzz-testcase-minimized-5128953268273152                                            target_dec_wavpack_fuzzer
+1781/clusterfuzz-testcase-minimized-4617176877105152                                            target_dec_dirac_fuzzer
+1785/clusterfuzz-testcase-minimized-6035918794260480                                            target_dec_ppm_fuzzer
+1802/clusterfuzz-testcase-minimized-5008293510512640                                            target_dec_cllc_fuzzer
+1807/clusterfuzz-testcase-minimized-6258676199325696                                            target_dec_wavpack_fuzzer
+1815/clusterfuzz-testcase-minimized-5237739320508416                                            target_dec_jpeg2000_fuzzer
+1817/clusterfuzz-testcase-minimized-5104230530547712                                            target_dec_subrip_fuzzer
+1818/clusterfuzz-testcase-minimized-5039166473633792                                            target_dec_smc_fuzzer
+1821/clusterfuzz-testcase-minimized-6050283782144000                                            target_dec_vp3_fuzzer
+1825/clusterfuzz-testcase-minimized-6002833050566656                                            target_dec_aac_fixed_fuzzer
+1826/clusterfuzz-testcase-minimized-5728569256837120                                            target_dec_clearvideo_fuzzer
+1829/clusterfuzz-testcase-minimized-5527165321871360                                            target_dec_pixlet_fuzzer
+1830/clusterfuzz-testcase-minimized-5828293733384192                                            target_dec_ra_144_fuzzer
+1832/clusterfuzz-testcase-minimized-6574546079449088                                            target_dec_mlp_fuzzer
+1839/clusterfuzz-testcase-minimized-6238490993885184                                            target_dec_indeo4_fuzzer
+1841/clusterfuzz-testcase-minimized-5858969564217344                                            target_dec_aac_fixed_fuzzer
+1845/clusterfuzz-testcase-minimized-5075974343360512                                            target_dec_wnv1_fuzzer
+1851/clusterfuzz-testcase-minimized-5692607495667712                                            target_dec_aac_fixed_fuzzer
+1853/clusterfuzz-testcase-minimized-5471155626442752                                            target_dec_wavpack_fuzzer
+1858/clusterfuzz-testcase-minimized-6450473802399744                                            target_dec_sheervideo_fuzzer
+1870/clusterfuzz-testcase-minimized-4686788029317120                                            target_dec_jpeg2000_fuzzer
+1871/clusterfuzz-testcase-minimized-5719950331215872                                            target_dec_snow_fuzzer
+1874/clusterfuzz-testcase-minimized-5037763613163520                                            target_dec_ylc_fuzzer
+1875/clusterfuzz-testcase-minimized-5536474562822144                                            target_dec_webp_fuzzer
+1878/clusterfuzz-testcase-minimized-6441918630199296                                            target_dec_aac_fixed_fuzzer
+1880/clusterfuzz-testcase-minimized-4900645322620928                                            target_dec_aac_fixed_fuzzer
+1882/clusterfuzz-testcase-minimized-5539735650959360                                            target_dec_aac_fuzzer
+1884/clusterfuzz-testcase-minimized-4637425835966464                                            target_dec_ra_144_fuzzer
+1885/clusterfuzz-testcase-minimized-5336328549957632                                            target_dec_ra_144_fuzzer
+1888/clusterfuzz-testcase-minimized-5237704826552320                                            target_dec_truemotion2_fuzzer
+1890/clusterfuzz-testcase-minimized-6329019509243904                                            target_dec_jpeg2000_fuzzer
+1891/clusterfuzz-testcase-minimized-6274417925554176                                            target_dec_dds_fuzzer
+1892/clusterfuzz-testcase-minimized-4519341733183488                                            target_dec_ansi_fuzzer
+1894/clusterfuzz-testcase-minimized-4716739789062144                                            target_dec_wavpack_fuzzer
+1898/clusterfuzz-testcase-minimized-5970744880136192                                            target_dec_wavpack_fuzzer
+1902/clusterfuzz-testcase-minimized-4762451407011840                                            target_dec_amrwb_fuzzer
+1903/clusterfuzz-testcase-minimized-5359318167715840                                            target_dec_cavs_fuzzer
+1906/clusterfuzz-testcase-minimized-4599315114754048                                            target_dec_pgmyuv_fuzzer
+1908/clusterfuzz-testcase-minimized-5392712477966336                                            target_dec_ra_144_fuzzer
+1909/clusterfuzz-testcase-minimized-6732072662073344                                            target_dec_hevc_fuzzer
+1917/clusterfuzz-testcase-minimized-5023221273329664                                            target_dec_cinepak_fuzzer
+1922/clusterfuzz-testcase-minimized-5561194112876544                                            target_dec_wavpack_fuzzer
+1925/clusterfuzz-testcase-minimized-5564569688735744                                            target_dec_cfhd_fuzzer
+1934/clusterfuzz-testcase-minimized-4659523174268928                                            target_dec_mp1_fuzzer
+1935/clusterfuzz-testcase-minimized-4939127826939904                                            target_dec_mp2float_fuzzer
+1938/clusterfuzz-testcase-minimized-6595305602547712                                            target_dec_mp3float_fuzzer
+1939/clusterfuzz-testcase-minimized-5544941956628480                                            target_dec_vp9_fuzzer
+1941/clusterfuzz-testcase-minimized-4719816059387904                                            target_dec_h264_fuzzer
+1942/clusterfuzz-testcase-minimized-4870171724349440                                            target_dec_h264_fuzzer
+1943/clusterfuzz-testcase-minimized-4912348974284800                                            target_dec_vp9_fuzzer
+1944/clusterfuzz-testcase-minimized-4957953339686912                                            target_dec_aac_fixed_fuzzer
+1946/clusterfuzz-testcase-minimized-5780475010351104                                            target_dec_vp9_fuzzer
+1947/clusterfuzz-testcase-minimized-6266250911023104                                            target_dec_vp9_fuzzer
+1948/clusterfuzz-testcase-minimized-6601933810827264                                            target_dec_rv40_fuzzer
+1949/clusterfuzz-testcase-minimized-6645980176842752                                            target_dec_svq3_fuzzer
+1967/clusterfuzz-testcase-minimized-5757031199801344                                            target_dec_wavpack_fuzzer
+2001/clusterfuzz-testcase-minimized-6187599389523968                                            target_dec_asv2_fuzzer
+2004/clusterfuzz-testcase-minimized-5533262866808832                                            target_dec_snow_fuzzer
+2005/clusterfuzz-testcase-minimized-5744226438479872                                            target_dec_aac_fixed_fuzzer
+2006/clusterfuzz-testcase-minimized-5766515037044736                                            target_dec_dxv_fuzzer
+2010/clusterfuzz-testcase-minimized-6209288450080768                                            target_dec_hevc_fuzzer
+2014/clusterfuzz-testcase-minimized-5186337030275072                                            target_dec_aac_fixed_fuzzer
+2038/clusterfuzz-testcase-minimized-4521466148159488                                            target_dec_wavpack_fuzzer
+2045/clusterfuzz-testcase-minimized-6751255865065472                                            target_dec_aac_fixed_fuzzer
+2065/clusterfuzz-testcase-minimized-6298930457346048                                            target_dec_qdraw_fuzzer
+2067/clusterfuzz-testcase-minimized-5578430902960128                                            target_dec_cavs_fuzzer
+2071/clusterfuzz-testcase-minimized-6036414271586304                                            target_dec_aac_fixed_fuzzer
+2076/clusterfuzz-testcase-minimized-6542640243802112                                            target_dec_tiff_fuzzer
+2079/clusterfuzz-testcase-minimized-5345861779324928                                            target_dec_tak_fuzzer
+2096/clusterfuzz-testcase-minimized-4901566068817920                                            target_dec_aac_fixed_fuzzer
+2097/clusterfuzz-testcase-minimized-5036861833609216                                            target_dec_mxpeg_fuzzer
+2100/clusterfuzz-testcase-minimized-4522961547558912                                            target_dec_paf_video_fuzzer
+2106/clusterfuzz-testcase-minimized-6136503639998464                                            target_dec_mpeg4_fuzzer
+2113/clusterfuzz-testcase-minimized-6510704959946752                                            target_dec_ac3_fixed_fuzzer
+2115/clusterfuzz-testcase-minimized-6594111748440064                                            target_dec_indeo4_fuzzer
+2127/clusterfuzz-testcase-minimized-6595787859427328                                            target_dec_subrip_fuzzer
+2131/clusterfuzz-testcase-minimized-4718045157130240                                            target_dec_shorten_fuzzer
+2134/clusterfuzz-testcase-minimized-4619258405322752                                            target_dec_wavpack_fuzzer
+2143/clusterfuzz-testcase-minimized-5482288060039168                                            target_dec_dvbsub_fuzzer
+2145/clusterfuzz-testcase-minimized-5866217724182528                                            target_dec_h264_fuzzer
+2154/clusterfuzz-testcase-minimized-4879971375906816                                            target_dec_hevc_fuzzer
+2159/clusterfuzz-testcase-minimized-5267945972301824                                            target_dec_h264_fuzzer
+2164/clusterfuzz-testcase-minimized-4715936172998656                                            target_dec_aac_fixed_fuzzer
+2169/clusterfuzz-testcase-minimized-5688641642823680                                            target_dec_cfhd_fuzzer
+2174/clusterfuzz-testcase-minimized-5739234533048320                                            target_dec_flic_fuzzer
+2175/clusterfuzz-testcase-minimized-5809657849315328                                            target_dec_ra_144_fuzzer
+2176/clusterfuzz-testcase-minimized-5908197216878592                                            target_dec_tiff_fuzzer
+2181/clusterfuzz-testcase-minimized-6314784322486272                                            target_dec_wavpack_fuzzer
+2192/clusterfuzz-testcase-minimized-5370387988742144                                            target_dec_mpeg4_fuzzer
+2195/clusterfuzz-testcase-minimized-4736721533009920                                            target_dec_aac_fixed_fuzzer
+2197/clusterfuzz-testcase-minimized-6010716676947968                                            target_dec_snow_fuzzer
+2204/clusterfuzz-testcase-minimized-5616756909408256                                            target_dec_mpeg4_fuzzer
+2208/clusterfuzz-testcase-minimized-5976593765761024                                            target_dec_jpeg2000_fuzzer
+2209/clusterfuzz-testcase-minimized-5012343912136704                                            target_dec_hevc_fuzzer
+2224/clusterfuzz-testcase-minimized-6208559949807616                                            target_dec_aac_fixed_fuzzer
+2225/clusterfuzz-testcase-minimized-5505632079708160                                            target_dec_jpeg2000_fuzzer
+2231/clusterfuzz-testcase-minimized-4565181982048256                                            target_dec_jpeg2000_fuzzer
+2233/clusterfuzz-testcase-minimized-5943031318446080                                            target_dec_truemotion2_fuzzer
+2234/clusterfuzz-testcase-minimized-6266896041115648                                            target_dec_tak_fuzzer
+2239/clusterfuzz-testcase-minimized-5639766592716800                                            target_dec_jpeg2000_fuzzer
+2243/clusterfuzz-testcase-minimized-4683988125876224                                            target_dec_vp9_fuzzer
+2247/clusterfuzz-testcase-minimized-5165385038954496                                            target_dec_vp9_fuzzer
+2249/clusterfuzz-testcase-minimized-5388542379294720                                            target_dec_gdv_fuzzer
+2250/clusterfuzz-testcase-minimized-5693382112313344                                            target_dec_hevc_fuzzer
+2254/clusterfuzz-testcase-minimized-4735977664806912                                            target_dec_vp9_fuzzer
+2255/clusterfuzz-testcase-minimized-4917394667470848                                            target_dec_vp9_fuzzer
+2257/clusterfuzz-testcase-minimized-5622708022804480                                            target_dec_vp9_fuzzer
+2258/clusterfuzz-testcase-minimized-5924773878038528                                            target_dec_vp9_fuzzer
+2263/clusterfuzz-testcase-minimized-4800359627227136                                            target_dec_hevc_fuzzer
+2271/clusterfuzz-testcase-minimized-5778297776504832                                            target_dec_jpeg2000_fuzzer
+2272/clusterfuzz-testcase-minimized-5059103858622464                                            target_dec_iff_ilbm_fuzzer
+2286/clusterfuzz-testcase-minimized-5711764169687040                                            target_dec_aac_fixed_fuzzer
+2291/clusterfuzz-testcase-minimized-5538453481586688                                            target_dec_wavpack_fuzzer
+2292/clusterfuzz-testcase-minimized-6156080415506432                                            target_dec_mpeg4_fuzzer
+2299/clusterfuzz-testcase-minimized-4843509351710720                                            target_dec_hevc_fuzzer
+2303/clusterfuzz-testcase-minimized-5529675273076736                                            target_dec_cfhd_fuzzer
+2306/clusterfuzz-testcase-minimized-5002997392211968                                            target_dec_cfhd_fuzzer
+2310/clusterfuzz-testcase-minimized-4534784887881728                                            target_dec_tiff_fuzzer
+2314/clusterfuzz-testcase-minimized-4519333877252096                                            target_dec_tak_fuzzer
+2331/clusterfuzz-testcase-minimized-6182185830711296                                            target_dec_wavpack_fuzzer
+2333/clusterfuzz-testcase-minimized-5223935677300736                                            target_dec_hevc_fuzzer
+2338/clusterfuzz-testcase-minimized-5153426541379584                                            target_dec_mpeg4_fuzzer
+2339/clusterfuzz-testcase-minimized-6663164320022528                                            target_dec_hevc_fuzzer
+2351/clusterfuzz-testcase-minimized-5359403240783872                                            target_dec_wavpack_fuzzer
+2365/clusterfuzz-testcase-minimized-6020421927305216                                            target_dec_jpeg2000_fuzzer
+2367/clusterfuzz-testcase-minimized-4648678897745920                                            target_dec_jpeg2000_fuzzer
+2377/clusterfuzz-testcase-minimized-6108505935183872                                            target_dec_wavpack_fuzzer
+2385/clusterfuzz-testcase-minimized-6594333576790016                                            target_dec_hevc_fuzzer
+2393/clusterfuzz-testcase-minimized-6128334993883136                                            target_dec_vb_fuzzer
+2395/clusterfuzz-testcase-minimized-6540529313513472                                            target_dec_cfhd_fuzzer
+2406/clusterfuzz-testcase-minimized-5294603055923200                                            target_dec_interplay_video_fuzzer
+2407/clusterfuzz-testcase-minimized-5858436027777024                                            target_dec_interplay_video_fuzzer
+2408/clusterfuzz-testcase-minimized-5432734438653952                                            target_dec_interplay_video_fuzzer
+2415/clusterfuzz-testcase-minimized-4672827619803136                                            target_dec_interplay_video_fuzzer
+2422/clusterfuzz-testcase-minimized-5242114713583616                                            target_dec_hevc_fuzzer
+2442/clusterfuzz-testcase-minimized-4985479546011648                                            target_dec_interplay_video_fuzzer
+2451/clusterfuzz-testcase-minimized-4781613957251072                                            target_dec_hevc_fuzzer
+2456/clusterfuzz-testcase-minimized-4822695051001856                                            target_dec_h264_fuzzer
+2467/clusterfuzz-testcase-minimized-4755798049685504                                            target_dec_interplay_video_fuzzer
+2478/clusterfuzz-testcase-minimized-4649584649306112                                            target_dec_aac_fuzzer
+2515/clusterfuzz-testcase-minimized-6197200012967936                                            target_dec_ylc_fuzzer
+2527/clusterfuzz-testcase-minimized-5260915396050944                                            target_dec_aac_fixed_fuzzer
+2533/clusterfuzz-testcase-minimized-5372857678823424                                            target_dec_thp_fuzzer
+2550/clusterfuzz-testcase-minimized-6275019871092736                                            target_dec_aac_fixed_fuzzer
+2568/clusterfuzz-testcase-minimized-4926115716005888                                            target_dec_magicyuv_fuzzer
+2576/clusterfuzz-testcase-minimized-6002596705730560                                            target_dec_hevc_fuzzer
+2577/clusterfuzz-testcase-minimized-4802472348483584                                            target_dec_shorten_fuzzer
+2578/clusterfuzz-testcase-minimized-5098313588146176                                            target_dec_h264_fuzzer
+2581/clusterfuzz-testcase-minimized-4681474395602944                                            target_dec_aac_fixed_fuzzer
+2614/clusterfuzz-testcase-minimized-5949228129976320                                            target_dec_dirac_fuzzer
+2634/clusterfuzz-testcase-minimized-4540890636877824                                            target_dec_ffv1_fuzzer
+2674/clusterfuzz-testcase-minimized-4999700518273024                                            target_dec_dirac_fuzzer
+2678/clusterfuzz-testcase-minimized-4702787684270080                                            target_dec_aac_fixed_fuzzer
+2698/clusterfuzz-testcase-minimized-4713541443518464                                            target_dec_ylc_fuzzer
+2699/clusterfuzz-testcase-minimized-5631303862976512                                            target_dec_aac_fixed_fuzzer
+2702/clusterfuzz-testcase-minimized-4511932591636480                                            target_dec_hevc_fuzzer
+2707/clusterfuzz-testcase-minimized-5179636394754048                                            target_dec_jpeg2000_fuzzer
+2708/clusterfuzz-testcase-minimized-5510405650644992                                            target_dec_pixlet_fuzzer
+2710/clusterfuzz-testcase-minimized-4750001420894208                                            target_dec_zmbv_fuzzer
+2711/clusterfuzz-testcase-minimized-4975142398590976                                            target_dec_tak_fuzzer
+2715/clusterfuzz-testcase-minimized-5099055292088320                                            target_dec_jpeg2000_fuzzer
+2729/clusterfuzz-testcase-minimized-5902915464069120                                            target_dec_dirac_fuzzer
+2737/clusterfuzz-testcase-minimized-4968639147016192                                            target_dec_dirac_fuzzer
+2739/clusterfuzz-testcase-minimized-6737297955356672                                            target_dec_dirac_fuzzer
+2742/clusterfuzz-testcase-minimized-5724322402402304                                            target_dec_dirac_fuzzer
+2743/clusterfuzz-testcase-minimized-5820652076400640                                            target_dec_snow_fuzzer
+2744/clusterfuzz-testcase-minimized-4672435653705728                                            target_dec_dirac_fuzzer
+2747/clusterfuzz-testcase-minimized-5108132302815232                                            target_dec_dirac_fuzzer
+2749/clusterfuzz-testcase-minimized-5298741273690112                                            target_dec_dirac_fuzzer
+2764/clusterfuzz-testcase-minimized-5382561922547712                                            target_dec_dirac_fuzzer
+2809/clusterfuzz-testcase-minimized-4785181833560064                                            target_dec_h264_fuzzer
+2815/clusterfuzz-testcase-minimized-6062914471460864                                            target_dec_hevc_fuzzer
+2817/clusterfuzz-testcase-minimized-5289691240726528                                            target_dec_h264_fuzzer
+2818/clusterfuzz-testcase-minimized-5062943676825600                                            target_dec_aac_fixed_fuzzer
+2819/clusterfuzz-testcase-minimized-4743700301217792                                            target_dec_dirac_fuzzer
+2826/clusterfuzz-testcase-minimized-5901511613743104                                            target_dec_mpeg4_fuzzer
+2834/clusterfuzz-testcase-minimized-5988039123795968                                            target_dec_ffv1_fuzzer
+2838/clusterfuzz-testcase-minimized-6260066086813696                                            target_dec_dirac_fuzzer
+2841/clusterfuzz-testcase-minimized-4869071805874176                                            target_dec_dirac_fuzzer
+2844/clusterfuzz-testcase-minimized-5561715838156800                                            target_dec_dirac_fuzzer
+2860/clusterfuzz-testcase-minimized-4672811689836544                                            target_dec_dirac_fuzzer
+2861/clusterfuzz-testcase-minimized-5361070510178304                                            target_dec_dirac_fuzzer
+2866/clusterfuzz-testcase-minimized-4581973265743872                                            target_dec_hevc_fuzzer
+2873/clusterfuzz-testcase-minimized-5924145713905664                                            target_dec_aac_latm_fuzzer
+2879/clusterfuzz-testcase-minimized-6317542639403008                                            target_dec_pixlet_fuzzer
+2891/clusterfuzz-testcase-minimized-5881795457318912                                            target_dec_h264_fuzzer
+2893/clusterfuzz-testcase-minimized-5809330567774208                                            target_dec_hevc_fuzzer
+#2914/clusterfuzz-testcase-minimized-4787845073993728                                            target_dec_mpeg4_fuzzer
+#2919/clusterfuzz-testcase-minimized-4828609145470976                                            target_dec_dnxhd_fuzzer
+#2925/clusterfuzz-testcase-minimized-4971717589991424                                            target_dec_dca_fuzzer
+#2926/clusterfuzz-testcase-minimized-4987110014582784                                            target_dec_gdv_fuzzer
+#2928/clusterfuzz-testcase-minimized-4992812120539136                                            target_dec_shorten_fuzzer
+#2931/clusterfuzz-testcase-minimized-5051106906341376                                            target_dec_aac_latm_fuzzer
+#2933/clusterfuzz-testcase-minimized-5124990208835584                                            target_dec_lagarith_fuzzer
+#2943/clusterfuzz-testcase-minimized-5430257156882432                                            target_dec_cavs_fuzzer
+#2953/clusterfuzz-testcase-minimized-5604124211019776                                            target_dec_thp_fuzzer
+#2962/clusterfuzz-testcase-minimized-5812616687517696                                            target_dec_mjpeg_fuzzer
+#2971/clusterfuzz-testcase-minimized-6130678276030464                                            target_dec_ffv1_fuzzer
+#2973/clusterfuzz-testcase-minimized-6244323446226944                                            target_dec_vp9_fuzzer
+#2992/clusterfuzz-testcase-minimized-6649611793989632                                            target_dec_comfortnoise_fuzzer
+3013/clusterfuzz-testcase-minimized-4644084197097472                                            target_dec_dirac_fuzzer
+3023/clusterfuzz-testcase-minimized-6421736130084864                                            target_dec_snow_fuzzer
+3024/clusterfuzz-testcase-minimized-5885660323905536                                            target_dec_fic_fuzzer
+3030/clusterfuzz-testcase-minimized-4649809254285312                                            target_dec_pixlet_fuzzer
+3042/clusterfuzz-testcase-minimized-5174210131394560                                            target_dec_jpeg2000_fuzzer
+3051/clusterfuzz-testcase-minimized-5745818336231424                                            target_dec_dvbsub_fuzzer
+3053/clusterfuzz-testcase-minimized-6355082062856192                                            target_dec_dirac_fuzzer
+#3073/clusterfuzz-testcase-minimized-6717666356101120                                            target_dec_jpegls_fuzzer
+3077/clusterfuzz-testcase-minimized-4684917524922368                                            target_dec_hevc_fuzzer
+3081/clusterfuzz-testcase-minimized-4807564879462400                                            target_dec_dirac_fuzzer
+3091/clusterfuzz-testcase-minimized-6229767969832960                                            target_dec_hevc_fuzzer
+3124/clusterfuzz-testcase-minimized-4546434357526528                                            target_dec_dirac_fuzzer
+#3142/clusterfuzz-testcase-minimized-5007853163118592                                            target_dec_snow_fuzzer
+#3147/clusterfuzz-testcase-minimized-4870592182353920                                            target_dec_clearvideo_fuzzer
+3175/clusterfuzz-testcase-minimized-4736774054084608                                            target_dec_hevc_fuzzer
+3191/clusterfuzz-testcase-minimized-5688798451073024                                            target_dec_aac_fixed_fuzzer
+3196/clusterfuzz-testcase-minimized-4528307146063872                                            target_dec_tak_fuzzer
+#3200/clusterfuzz-testcase-minimized-5750022136135680                                            target_dec_wmv2_fuzzer
+3202/clusterfuzz-testcase-minimized-4988291642294272                                            target_dec_tak_fuzzer
+3203/clusterfuzz-testcase-minimized-4514553595428864                                            target_dec_png_fuzzer
+#3218/clusterfuzz-testcase-minimized-5390672154591232                                            target_dec_dvbsub_fuzzer
+#3242/clusterfuzz-testcase-minimized-5811951672229888                                            target_dec_scpr_fuzzer
+3279/clusterfuzz-testcase-minimized-4564805744590848                                            target_dec_dirac_fuzzer
+#3291/clusterfuzz-testcase-minimized-4630024655208448                                            target_dec_dxv_fuzzer
+3295/clusterfuzz-testcase-minimized-4738998142500864                                            target_dec_dirac_fuzzer
+3336/clusterfuzz-testcase-minimized-5656839179993088                                            target_dec_truemotion2_fuzzer
+3348/clusterfuzz-testcase-minimized-4809500517203968                                            target_dec_svq3_fuzzer
+3361/clusterfuzz-testcase-minimized-5065842955911168                                            target_dec_ffv1_fuzzer
+3373/clusterfuzz-testcase-minimized-5604083912146944                                            target_dec_hevc_fuzzer
+3410/clusterfuzz-testcase-minimized-5313377960198144                                            target_dec_prores_fuzzer
+3416/clusterfuzz-testcase-minimized-6125587682820096                                            target_dec_h264_fuzzer
+3443/clusterfuzz-testcase-minimized-5369987105554432                                            target_dec_aac_fixed_fuzzer
+3444/clusterfuzz-testcase-minimized-6270352105668608                                            target_dec_aac_latm_fuzzer
+3453/clusterfuzz-testcase-minimized-5555554657239040                                            target_dec_mpeg4_fuzzer
+3463/clusterfuzz-testcase-minimized-5557381989662720                                            target_dec_tiff_fuzzer
+3482/clusterfuzz-testcase-minimized-5446915875405824                                            target_dec_prores_fuzzer
+3485/clusterfuzz-testcase-minimized-4940429332054016                                            target_dec_dirac_fuzzer
+3492/clusterfuzz-testcase-minimized-5784775283441664                                            target_dec_aac_fixed_fuzzer
+3512/clusterfuzz-testcase-minimized-4812747210489856                                            target_dec_snow_fuzzer
+3516/clusterfuzz-testcase-minimized-4608518562775040                                            target_dec_mpeg4_fuzzer
+3528/clusterfuzz-testcase-minimized-6283628420005888                                            target_dec_mpeg4_fuzzer
+#3529/clusterfuzz-testcase-minimized-5057068371279872                                            target_dec_paf_video_fuzzer
+3541/clusterfuzz-testcase-minimized-6469958596820992                                            target_dec_jpeg2000_fuzzer
+3547/clusterfuzz-testcase-minimized-6009386439802880                                            target_dec_aac_fixed_fuzzer
+3594/clusterfuzz-testcase-minimized-4650622935629824                                            target_dec_aac_fixed_fuzzer
+3612/clusterfuzz-testcase-minimized-6393461273001984                                            target_dec_xan_wc3_fuzzer
+3642/clusterfuzz-testcase-minimized-5443853801750528                                            target_dec_aac_fixed_fuzzer
+#3707/clusterfuzz-testcase-minimized-6465922706440192                                            target_dec_xan_wc3_fuzzer
+3787/clusterfuzz-testcase-minimized-5728764920070144                                            target_dec_exr_fuzzer
+3805/clusterfuzz-testcase-minimized-6578427831255040                                            target_dec_h264_fuzzer
+3902/clusterfuzz-testcase-minimized-6081926122176512                                            target_dec_exr_fuzzer
+3984/clusterfuzz-testcase-minimized-5265759929368576                                            target_dec_snow_fuzzer
+5264/clusterfuzz-testcase-minimized-4621956621008896                                            target_dec_indeo5_fuzzer
+7279/clusterfuzz-testcase-minimized-ffmpeg_AV_CODEC_ID_G2M_fuzzer-5977332473921536              target_dec_g2m_fuzzer
+10053/clusterfuzz-testcase-minimized-ffmpeg_AV_CODEC_ID_PROSUMER_fuzzer-5636993883570176        target_dec_prosumer_fuzzer
diff --git a/tools/target_dec_fate.sh b/tools/target_dec_fate.sh
new file mode 100755
index 0000000000000..1377b6b4e80b2
--- /dev/null
+++ b/tools/target_dec_fate.sh
@@ -0,0 +1,83 @@
+#!/bin/sh
+#
+# * Copyright (C) 2018 Michael Niedermayer (michaelni@gmx.at)
+# *
+# * This file is part of FFmpeg.
+# *
+# * FFmpeg is free software; you can redistribute it and/or modify
+# * it under the terms of the GNU General Public License as published by
+# * the Free Software Foundation; either version 2 of the License, or
+# * (at your option) any later version.
+# *
+# * FFmpeg is distributed in the hope that it will be useful,
+# * but WITHOUT ANY WARRANTY; without even the implied warranty of
+# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# * GNU General Public License for more details.
+# *
+# * You should have received a copy of the GNU General Public License
+# * along with FFmpeg; if not, write to the Free Software
+# * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+set -e
+
+LC_ALL=C
+export LC_ALL
+
+LIST=target_dec_fate.list
+
+show_help(){
+    cat <<EOF
+Usage: ./target_dec_fate.sh <directory> [<test to run>]
+
+directory       the directory into which sample files will be downloaded
+test to run     the number of the issue to test
+Note, some test samples may not yet be available to the public, also this
+script will not download samples which are already in the directory. So you
+may want to preserve its content between runs.
+EOF
+    exit 0
+}
+
+test -z "$1"  && show_help
+test ! -d "$1"  && echo $1 is not an accessable directory && show_help
+test ! -f target_dec_fate.sh && echo $0 Must be run from its location && show_help
+grep 'CONFIG_OSSFUZZ 0' ../config.h && echo not configured for ossfuzz && show_help
+
+#Download testcases
+while read -r LINE; do
+    ISSUE_NUM=`echo $LINE | sed 's#/.*##'`
+    FILE_ID=`echo $LINE | sed 's#.*/clusterfuzz-testcase[a-zA-Z0-9_-]*-\([0-9]*\).*#\1#'`
+    FILE=`echo $LINE | sed 's# .*##'`
+    if test -f "$1/$FILE" ; then
+        echo exists       $FILE
+    elif echo "$ISSUE_NUM" | grep '#' >/dev/null ; then
+        echo disabled     $FILE
+    else
+        echo downloading  $FILE
+        mkdir -p "$1/$ISSUE_NUM"
+        wget -O "$1/$FILE" "https://oss-fuzz.com/download?testcase_id=$FILE_ID" || rm "$1/$FILE"
+    fi
+done < "$LIST"
+
+#Find which fuzzers we need to build
+TOOLS=
+while read -r LINE; do
+    TOOL_ID=`echo $LINE | sed 's#[^ ]* ##'`
+    TOOLS="$TOOLS tools/$TOOL_ID"
+done < "$LIST"
+
+cd ..
+#Build fuzzers
+make -j4 $TOOLS
+
+#Run testcases
+while read -r LINE; do
+    TOOL_ID=`echo $LINE | sed 's#[^ ]* ##'`
+    FILE=`echo $LINE | sed 's# .*##'`
+    if ! test -f "$1/$FILE" ; then
+        continue
+    fi
+    tools/$TOOL_ID $1/$FILE
+done < "tools/$LIST"
+
+echo OK
diff --git a/tools/target_dec_fuzzer.c b/tools/target_dec_fuzzer.c
index a94d5e6a92b94..2a6d525b73ec1 100644
--- a/tools/target_dec_fuzzer.c
+++ b/tools/target_dec_fuzzer.c
@@ -30,7 +30,7 @@
   * build the fuzz target.
     Choose the value of FFMPEG_CODEC (e.g. AV_CODEC_ID_DVD_SUBTITLE) and
     choose one of FUZZ_FFMPEG_VIDEO, FUZZ_FFMPEG_AUDIO, FUZZ_FFMPEG_SUBTITLE.
-    clang -fsanitize=address -fsanitize-coverage=trace-pc-guard,trace-cmp tools/target_dec_fuzzer.c -o target_dec_fuzzer -I.   -DFFMPEG_CODEC=AV_CODEC_ID_MPEG1VIDEO -DFUZZ_FFMPEG_VIDEO ../../libfuzzer/libFuzzer.a   -Llibavcodec -Llibavdevice -Llibavfilter -Llibavformat -Llibavresample -Llibavutil -Llibpostproc -Llibswscale -Llibswresample -Wl,--as-needed -Wl,-z,noexecstack -Wl,--warn-common -Wl,-rpath-link=libpostproc:libswresample:libswscale:libavfilter:libavdevice:libavformat:libavcodec:libavutil:libavresample -lavdevice -lavfilter -lavformat -lavcodec -lswresample -lswscale -lavutil -ldl -lxcb -lxcb-shm -lxcb -lxcb-xfixes  -lxcb -lxcb-shape -lxcb -lX11 -lasound -lm -lbz2 -lz -pthread
+    clang -fsanitize=address -fsanitize-coverage=trace-pc-guard,trace-cmp tools/target_dec_fuzzer.c -o target_dec_fuzzer -I.   -DFFMPEG_CODEC=AV_CODEC_ID_MPEG1VIDEO -DFUZZ_FFMPEG_VIDEO ../../libfuzzer/libFuzzer.a   -Llibavcodec -Llibavdevice -Llibavfilter -Llibavformat -Llibavresample -Llibavutil -Llibpostproc -Llibswscale -Llibswresample -Wl,--as-needed -Wl,-z,noexecstack -Wl,--warn-common -Wl,-rpath-link=:libpostproc:libswresample:libswscale:libavfilter:libavdevice:libavformat:libavcodec:libavutil:libavresample -lavdevice -lavfilter -lavformat -lavcodec -lswresample -lswscale -lavutil -ldl -lxcb -lxcb-shm -lxcb -lxcb-xfixes  -lxcb -lxcb-shape -lxcb -lX11 -lasound -lm -lbz2 -lz -pthread
   * create a corpus directory and put some samples there (empty dir is ok too):
     mkdir CORPUS && cp some-files CORPUS