LIF120, tune percep, 10-bit butter, etc

BlueSwordM · Feb 26, 2022 · d5cd41d · d5cd41d
1 parent 377b1c4
commit d5cd41d
Show file tree

Hide file tree

Showing 17 changed files with 423 additions and 149 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -541,7 +541,7 @@ if(CONFIG_AV1_ENCODER)
                                     ${LIBBROTLICOMMON_LIBRARIES})
       target_include_directories(aom PRIVATE ${LIBJXL_INCLUDE_DIRS})
     else()
-      pkg_check_modules(LIBJXL REQUIRED libjxl)
+      pkg_check_modules(LIBJXL REQUIRED libjxl libjxl_threads)
       target_link_libraries(aom PRIVATE ${LIBJXL_LDFLAGS} ${LIBJXL_LIBRARIES})
       target_include_directories(aom PRIVATE ${LIBJXL_INCLUDE_DIRS})
       if(LIBJXL_CFLAGS)

diff --git a/aom/aomcx.h b/aom/aomcx.h
@@ -1547,6 +1547,7 @@ typedef enum {
   AOM_TUNE_VMAF_MAX_GAIN = 6,
   AOM_TUNE_VMAF_NEG_MAX_GAIN = 7,
   AOM_TUNE_BUTTERAUGLI = 8,
+  AOM_TUNE_IMAGE_PERCEPTUAL_QUALITY = 9,
 } aom_tune_metric;
 
 /*!\brief Distortion metric to use for RD optimization.

diff --git a/aom_dsp/butteraugli.c b/aom_dsp/butteraugli.c
@@ -11,17 +11,19 @@
 
 #include <assert.h>
 #include <jxl/butteraugli.h>
+#include <jxl/thread_parallel_runner.h>
 
 #include "aom_dsp/butteraugli.h"
 #include "aom_mem/aom_mem.h"
+#include "aom_ports/mem.h"
 #include "third_party/libyuv/include/libyuv/convert_argb.h"
 
 int aom_calc_butteraugli(const YV12_BUFFER_CONFIG *source,
                          const YV12_BUFFER_CONFIG *distorted, int bit_depth,
                          aom_matrix_coefficients_t matrix_coefficients,
                          aom_color_range_t color_range, float *dist_map) {
   (void)bit_depth;
-  assert(bit_depth == 8);
+  assert(bit_depth <= 10);
   const int width = source->y_crop_width;
   const int height = source->y_crop_height;
   const int ss_x = source->subsampling_x;
@@ -37,7 +39,7 @@ int aom_calc_butteraugli(const YV12_BUFFER_CONFIG *source,
   }
 
   const size_t stride_argb = width * 4;
-  const size_t buffer_size = height * stride_argb;
+  const size_t buffer_size = height * stride_argb * (bit_depth > 8 ? 2 : 1);
   uint8_t *src_argb = (uint8_t *)aom_malloc(buffer_size);
   uint8_t *distorted_argb = (uint8_t *)aom_malloc(buffer_size);
   if (!src_argb || !distorted_argb) {
@@ -46,39 +48,71 @@ int aom_calc_butteraugli(const YV12_BUFFER_CONFIG *source,
     return 0;
   }
 
+
   if (ss_x == 1 && ss_y == 1) {
-    I420ToARGBMatrix(source->y_buffer, source->y_stride, source->u_buffer,
-                     source->uv_stride, source->v_buffer, source->uv_stride,
-                     src_argb, stride_argb, yuv_constants, width, height);
-    I420ToARGBMatrix(distorted->y_buffer, distorted->y_stride,
-                     distorted->u_buffer, distorted->uv_stride,
-                     distorted->v_buffer, distorted->uv_stride, distorted_argb,
-                     stride_argb, yuv_constants, width, height);
+    if (bit_depth == 8) {
+      I420ToARGBMatrix(source->y_buffer, source->y_stride, source->u_buffer,
+                      source->uv_stride, source->v_buffer, source->uv_stride,
+                      src_argb, stride_argb, yuv_constants, width, height);
+      I420ToARGBMatrix(distorted->y_buffer, distorted->y_stride,
+                      distorted->u_buffer, distorted->uv_stride,
+                      distorted->v_buffer, distorted->uv_stride, distorted_argb,
+                      stride_argb, yuv_constants, width, height);
+    } else {
+      I010ToARGBMatrix(CONVERT_TO_SHORTPTR(source->y_buffer), source->y_stride,
+                      CONVERT_TO_SHORTPTR(source->u_buffer), source->uv_stride,
+                      CONVERT_TO_SHORTPTR(source->v_buffer), source->uv_stride,
+                      src_argb, stride_argb, yuv_constants, width, height);
+      I010ToARGBMatrix(CONVERT_TO_SHORTPTR(distorted->y_buffer), distorted->y_stride,
+                      CONVERT_TO_SHORTPTR(distorted->u_buffer), distorted->uv_stride,
+                      CONVERT_TO_SHORTPTR(distorted->v_buffer), distorted->uv_stride,
+                      distorted_argb, stride_argb, yuv_constants, width, height);
+    }
   } else if (ss_x == 1 && ss_y == 0) {
-    I422ToARGBMatrix(source->y_buffer, source->y_stride, source->u_buffer,
-                     source->uv_stride, source->v_buffer, source->uv_stride,
-                     src_argb, stride_argb, yuv_constants, width, height);
-    I422ToARGBMatrix(distorted->y_buffer, distorted->y_stride,
-                     distorted->u_buffer, distorted->uv_stride,
-                     distorted->v_buffer, distorted->uv_stride, distorted_argb,
-                     stride_argb, yuv_constants, width, height);
+    if (bit_depth == 8) {
+      I422ToARGBMatrix(source->y_buffer, source->y_stride, source->u_buffer,
+                      source->uv_stride, source->v_buffer, source->uv_stride,
+                      src_argb, stride_argb, yuv_constants, width, height);
+      I422ToARGBMatrix(distorted->y_buffer, distorted->y_stride,
+                      distorted->u_buffer, distorted->uv_stride,
+                      distorted->v_buffer, distorted->uv_stride, distorted_argb,
+                      stride_argb, yuv_constants, width, height);
+    } else {
+      I210ToARGBMatrix(CONVERT_TO_SHORTPTR(source->y_buffer), source->y_stride,
+                      CONVERT_TO_SHORTPTR(source->u_buffer), source->uv_stride,
+                      CONVERT_TO_SHORTPTR(source->v_buffer), source->uv_stride,
+                      src_argb, stride_argb, yuv_constants, width, height);
+      I210ToARGBMatrix(CONVERT_TO_SHORTPTR(distorted->y_buffer), distorted->y_stride,
+                      CONVERT_TO_SHORTPTR(distorted->u_buffer), distorted->uv_stride,
+                      CONVERT_TO_SHORTPTR(distorted->v_buffer), distorted->uv_stride,
+                      distorted_argb, stride_argb, yuv_constants, width, height);
+    }
   } else if (ss_x == 0 && ss_y == 0) {
-    I444ToARGBMatrix(source->y_buffer, source->y_stride, source->u_buffer,
-                     source->uv_stride, source->v_buffer, source->uv_stride,
-                     src_argb, stride_argb, yuv_constants, width, height);
-    I444ToARGBMatrix(distorted->y_buffer, distorted->y_stride,
-                     distorted->u_buffer, distorted->uv_stride,
-                     distorted->v_buffer, distorted->uv_stride, distorted_argb,
-                     stride_argb, yuv_constants, width, height);
+    if (bit_depth == 8) {
+      I444ToARGBMatrix(source->y_buffer, source->y_stride, source->u_buffer,
+                      source->uv_stride, source->v_buffer, source->uv_stride,
+                      src_argb, stride_argb, yuv_constants, width, height);
+      I444ToARGBMatrix(distorted->y_buffer, distorted->y_stride,
+                      distorted->u_buffer, distorted->uv_stride,
+                      distorted->v_buffer, distorted->uv_stride, distorted_argb,
+                      stride_argb, yuv_constants, width, height);
+    } else {
+      return 0;
+    }
   } else {
     aom_free(src_argb);
     aom_free(distorted_argb);
     return 0;
   }
 
   JxlPixelFormat pixel_format = { 4, JXL_TYPE_UINT8, JXL_NATIVE_ENDIAN, 0 };
+  if (bit_depth == 10) {
+    pixel_format.data_type = JXL_TYPE_UINT16;
+  }
   JxlButteraugliApi *api = JxlButteraugliApiCreate(NULL);
-  JxlButteraugliApiSetHFAsymmetry(api, 0.8f);
+  JxlParallelRunner runner = JxlThreadParallelRunnerCreate(NULL, 6);
+  JxlButteraugliApiSetParallelRunner(api, JxlThreadParallelRunner, runner);
+  JxlButteraugliApiSetHFAsymmetry(api, 0.5f);
 
   JxlButteraugliResult *result = JxlButteraugliCompute(
       api, width, height, &pixel_format, src_argb, buffer_size, &pixel_format,

diff --git a/av1/arg_defs.c b/av1/arg_defs.c
@@ -47,6 +47,7 @@ static const struct arg_enum_list tuning_enum[] = {
   { "vmaf", AOM_TUNE_VMAF_MAX_GAIN },
   { "vmaf_neg", AOM_TUNE_VMAF_NEG_MAX_GAIN },
   { "butteraugli", AOM_TUNE_BUTTERAUGLI },
+  { "image_perceptual_quality", AOM_TUNE_IMAGE_PERCEPTUAL_QUALITY },
   { NULL, 0 }
 };
 
@@ -535,8 +536,9 @@ const av1_codec_arg_definitions_t g_av1_codec_arg_defs = {
       ARG_DEF(NULL, "deltaq-mode", 1,
               "Delta qindex mode (0: off, 1: deltaq objective (default), "
               "2: deltaq placeholder, 3: key frame visual quality, 4: user "
-              "rating based visual quality optimization). "
-              "Currently this requires enable-tpl-model as a prerequisite."),
+              "rating based visual quality optimization, \n"
+              "                                        5: HDR deltaq optimization). "
+              "Currently, deltaq-mode=1 and 2 require enable-tpl-model as a prerequisite."),
   .deltaq_strength = ARG_DEF(NULL, "deltaq-strength", 1,
                              "Deltaq strength for"
                              " --deltaq-mode=4 (%)"),

diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
@@ -798,7 +798,8 @@ static aom_codec_err_t validate_config(aom_codec_alg_priv_t *ctx,
   }
 #endif
 
-  RANGE_CHECK(extra_cfg, tuning, AOM_TUNE_PSNR, AOM_TUNE_BUTTERAUGLI);
+  RANGE_CHECK(extra_cfg, tuning, AOM_TUNE_PSNR,
+              AOM_TUNE_IMAGE_PERCEPTUAL_QUALITY);
 
   RANGE_CHECK(extra_cfg, dist_metric, AOM_DIST_METRIC_PSNR,
               AOM_DIST_METRIC_QM_PSNR);
@@ -878,9 +879,6 @@ static aom_codec_err_t validate_img(aom_codec_alg_priv_t *ctx,
 
 #if CONFIG_TUNE_BUTTERAUGLI
   if (ctx->extra_cfg.tuning == AOM_TUNE_BUTTERAUGLI) {
-    if (img->bit_depth > 8) {
-      ERROR("Only 8 bit depth images supported in tune=butteraugli mode.");
-    }
     if (img->mc != 0 && img->mc != AOM_CICP_MC_BT_709 &&
         img->mc != AOM_CICP_MC_BT_601 && img->mc != AOM_CICP_MC_BT_470_B_G) {
       ERROR(

diff --git a/av1/encoder/av1_quantize.c b/av1/encoder/av1_quantize.c
@@ -770,7 +770,8 @@ static int adjust_hdr_cb_deltaq(int base_qindex) {
   const double dcbQP = CHROMA_CB_QP_SCALE * chromaQp * QP_SCALE_FACTOR;
   int dqpCb = (int)(dcbQP + (dcbQP < 0 ? -0.5 : 0.5));
   dqpCb = AOMMIN(0, dqpCb);
-  dqpCb = (int)CLIP(dqpCb, -12 * QP_SCALE_FACTOR, 12 * QP_SCALE_FACTOR);
+  // dqpCb = (int)CLIP(dqpCb, -12 * QP_SCALE_FACTOR, 12 * QP_SCALE_FACTOR);
+  dqpCb = (int)CLIP(dqpCb, -CHROMA_DQP_MAX, CHROMA_DQP_MAX);
   return dqpCb;
 }
 
@@ -780,7 +781,8 @@ static int adjust_hdr_cr_deltaq(int base_qindex) {
   const double dcrQP = CHROMA_CR_QP_SCALE * chromaQp * QP_SCALE_FACTOR;
   int dqpCr = (int)(dcrQP + (dcrQP < 0 ? -0.5 : 0.5));
   dqpCr = AOMMIN(0, dqpCr);
-  dqpCr = (int)CLIP(dqpCr, -12 * QP_SCALE_FACTOR, 12 * QP_SCALE_FACTOR);
+  //dqpCr = (int)CLIP(dqpCr, -12 * QP_SCALE_FACTOR, 12 * QP_SCALE_FACTOR);
+  dqpCr = (int)CLIP(dqpCr, -CHROMA_DQP_MAX, CHROMA_DQP_MAX);
   return dqpCr;
 }
 

diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
@@ -852,7 +852,7 @@ static AOM_INLINE void encode_sb_row(AV1_COMP *cpi, ThreadData *td,
     x->content_state_sb.lighting_change = 0;
     x->content_state_sb.low_sumdiff = 0;
 
-    if (cpi->oxcf.mode == ALLINTRA) {
+    if (cpi->oxcf.mode == ALLINTRA || cpi->oxcf.tune_cfg.content == AOM_CONTENT_PSY) {
       x->intra_sb_rdmult_modifier = 128;
     }
 

diff --git a/av1/encoder/encodeframe_utils.c b/av1/encoder/encodeframe_utils.c
@@ -35,7 +35,8 @@ void av1_set_ssim_rdmult(const AV1_COMP *const cpi, int *errorperbit,
   double num_of_mi = 0.0;
   double geom_mean_of_scale = 0.0;
 
-  assert(cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIM);
+  assert(cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIM ||
+         cpi->oxcf.tune_cfg.tuning == AOM_TUNE_IMAGE_PERCEPTUAL_QUALITY);
 
   for (row = mi_row / num_mi_w;
        row < num_rows && row < mi_row / num_mi_w + num_brows; ++row) {

diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
@@ -2589,9 +2589,18 @@ static int encode_with_recode_loop(AV1_COMP *cpi, size_t *size, uint8_t *dest) {
 #endif
 
 #if !CONFIG_RD_COMMAND
-  // Determine whether to use screen content tools using two fast encoding.
-  if (!cpi->sf.hl_sf.disable_extra_sc_testing)
+  if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_PSY) {
+    // Screen content optimizations are bad for Psy tuning,
+    // disable them and avoid the extra testing to speed us up.
+    FeatureFlags *const features = &cm->features;
+    features->allow_screen_content_tools = 0;
+    features->allow_intrabc = 0;
+    cpi->use_screen_content_tools = 0;
+    cpi->is_screen_content_type = 0;
+  } else if (!cpi->sf.hl_sf.disable_extra_sc_testing) {
+    // Determine whether to use screen content tools using two fast encoding.
     av1_determine_sc_tools_with_encoding(cpi, q);
+  }
 #endif  // !CONFIG_RD_COMMAND
 
 #if CONFIG_TUNE_VMAF
@@ -3481,7 +3490,8 @@ static int encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
     }
   }
 
-  if (oxcf->tune_cfg.tuning == AOM_TUNE_SSIM) {
+  if (oxcf->tune_cfg.tuning == AOM_TUNE_SSIM ||
+      oxcf->tune_cfg.tuning == AOM_TUNE_IMAGE_PERCEPTUAL_QUALITY) {
     av1_set_mb_ssim_rdmult_scaling(cpi);
   }
 

diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
@@ -88,11 +88,12 @@ extern "C" {
 #define TF_LOOKAHEAD_IDX_THR 7
 
 #define HDR_QP_LEVELS 10
-#define CHROMA_CB_QP_SCALE 1.04
-#define CHROMA_CR_QP_SCALE 1.04
+#define CHROMA_CB_QP_SCALE 1.39
+#define CHROMA_CR_QP_SCALE 1.39
 #define CHROMA_QP_SCALE -0.46
 #define CHROMA_QP_OFFSET 9.26
 #define QP_SCALE_FACTOR 2.0
+#define CHROMA_DQP_MAX 80
 #define DISABLE_HDR_LUMA_DELTAQ 1
 
 // Rational number with an int64 numerator

diff --git a/av1/encoder/encoder_utils.c b/av1/encoder/encoder_utils.c
@@ -1274,7 +1274,7 @@ void av1_set_mb_ssim_rdmult_scaling(AV1_COMP *cpi) {
   // Loop through each 16x16 block.
   for (int row = 0; row < num_rows; ++row) {
     for (int col = 0; col < num_cols; ++col) {
-      double var = 0.0, num_of_var = 0.0;
+      double var = 0.0, num_of_var = 0.0, var_log = 0.0;
       const int index = row * num_cols + col;
 
       // Loop through each 8x8 block.
@@ -1291,31 +1291,89 @@ void av1_set_mb_ssim_rdmult_scaling(AV1_COMP *cpi) {
           buf.buf = y_buffer + row_offset_y * y_stride + col_offset_y;
           buf.stride = y_stride;
 
+          double blk_var;
           if (use_hbd) {
-            var += av1_high_get_sby_perpixel_variance(cpi, &buf, BLOCK_8X8,
-                                                      xd->bd);
+            blk_var = av1_high_get_sby_perpixel_variance(cpi, &buf, BLOCK_8X8,
+                                                         xd->bd);
           } else {
-            var += av1_get_sby_perpixel_variance(cpi, &buf, BLOCK_8X8);
+            blk_var = av1_get_sby_perpixel_variance(cpi, &buf, BLOCK_8X8);
           }
 
+          var_log += log(AOMMAX(blk_var, 1));
+          var += blk_var;
           num_of_var += 1.0;
         }
       }
-      var = var / num_of_var;
 
-      // Curve fitting with an exponential model on all 16x16 blocks from the
-      // midres dataset.
-      var = 67.035434 * (1 - exp(-0.0021489 * var)) + 17.492222;
+      if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_IMAGE_PERCEPTUAL_QUALITY) {
+        var = exp(var_log / num_of_var);
+        const int cq_level = cpi->oxcf.rc_cfg.cq_level;
+        const double hq_level = 30 * 4;
+        const double delta =
+            cq_level < hq_level
+                ? 2.0 * (double)(hq_level - cq_level) / hq_level
+                : 10.0 * (double)(cq_level - hq_level) / (MAXQ - hq_level);
+        // Curve fitting with an exponential model on user rating dataset.
+        var = 39.126 * (1 - exp(-0.0009413 * var)) + 1.236 + delta;
+      } else {
+        var = var / num_of_var;
+        // Curve fitting with an exponential model on all 16x16 blocks from the
+        // midres dataset.
+        var = 67.035434 * (1 - exp(-0.0021489 * var)) + 17.492222;
+      }
       cpi->ssim_rdmult_scaling_factors[index] = var;
       log_sum += log(var);
     }
   }
-  log_sum = exp(log_sum / (double)(num_rows * num_cols));
 
-  for (int row = 0; row < num_rows; ++row) {
-    for (int col = 0; col < num_cols; ++col) {
-      const int index = row * num_cols + col;
-      cpi->ssim_rdmult_scaling_factors[index] /= log_sum;
+  if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_IMAGE_PERCEPTUAL_QUALITY &&
+      cpi->oxcf.q_cfg.deltaq_mode != NO_DELTA_Q) {
+    const int sb_size = cpi->common.seq_params->sb_size;
+    const int num_mi_w_sb = mi_size_wide[sb_size];
+    const int num_mi_h_sb = mi_size_high[sb_size];
+    const int num_cols_sb =
+        (mi_params->mi_cols + num_mi_w_sb - 1) / num_mi_w_sb;
+    const int num_rows_sb =
+        (mi_params->mi_rows + num_mi_h_sb - 1) / num_mi_h_sb;
+    const int num_blk_w = num_mi_w_sb / num_mi_w;
+    const int num_blk_h = num_mi_h_sb / num_mi_h;
+    assert(num_blk_w * num_mi_w == num_mi_w_sb);
+    assert(num_blk_h * num_mi_h == num_mi_h_sb);
+
+    for (int row = 0; row < num_rows_sb; ++row) {
+      for (int col = 0; col < num_cols_sb; ++col) {
+        double log_sum_sb = 0.0;
+        double blk_count = 0.0;
+        for (int blk_row = row * num_blk_h;
+             blk_row < (row + 1) * num_blk_h && blk_row < num_rows; ++blk_row) {
+          for (int blk_col = col * num_blk_w;
+               blk_col < (col + 1) * num_blk_w && blk_col < num_cols;
+               ++blk_col) {
+            const int index = blk_row * num_cols + blk_col;
+            log_sum_sb += log(cpi->ssim_rdmult_scaling_factors[index]);
+            blk_count += 1.0;
+          }
+        }
+        log_sum_sb = exp(log_sum_sb / blk_count);
+        for (int blk_row = row * num_blk_h;
+             blk_row < (row + 1) * num_blk_h && blk_row < num_rows; ++blk_row) {
+          for (int blk_col = col * num_blk_w;
+               blk_col < (col + 1) * num_blk_w && blk_col < num_cols;
+               ++blk_col) {
+            const int index = blk_row * num_cols + blk_col;
+            cpi->ssim_rdmult_scaling_factors[index] /= log_sum_sb;
+          }
+        }
+      }
+    }
+  } else {
+    log_sum = exp(log_sum / (double)(num_rows * num_cols));
+
+    for (int row = 0; row < num_rows; ++row) {
+      for (int col = 0; col < num_cols; ++col) {
+        const int index = row * num_cols + col;
+        cpi->ssim_rdmult_scaling_factors[index] /= log_sum;
+      }
     }
   }
 }

diff --git a/av1/encoder/lookahead.h b/av1/encoder/lookahead.h
@@ -25,8 +25,8 @@ extern "C" {
 #endif
 
 /*!\cond */
-#define MAX_LAG_BUFFERS 48
-#define MAX_LAP_BUFFERS 48
+#define MAX_LAG_BUFFERS 120
+#define MAX_LAP_BUFFERS 120
 #define MAX_TOTAL_BUFFERS (MAX_LAG_BUFFERS + MAX_LAP_BUFFERS)
 #define LAP_LAG_IN_FRAMES 17