Adding initial psycho-visual optimization content tune

BlueSwordM · Jan 22, 2022 · f46c52f · f46c52f
1 parent 9725f60
commit f46c52f
Show file tree

Hide file tree

Showing 17 changed files with 158 additions and 32 deletions.
diff --git a/aom/aomcx.h b/aom/aomcx.h
@@ -491,6 +491,8 @@ enum aome_enc_control_id {
    *  - AOM_CONTENT_DEFAULT = Regular video content (default)
    *  - AOM_CONTENT_SCREEN  = Screen capture content
    *  - AOM_CONTENT_FILM = Film content
+   *  - AOM_CONTENT_PSY = Psychovisual optimizations for video
+   *  - AOM_CONTENT_ANIMATION = Psychovisual optimizations for complex animation
    */
   AV1E_SET_TUNE_CONTENT = 43,
 
@@ -1509,6 +1511,8 @@ typedef enum {
   AOM_CONTENT_DEFAULT,
   AOM_CONTENT_SCREEN,
   AOM_CONTENT_FILM,
+  AOM_CONTENT_PSY,
+  AOM_CONTENT_ANIMATION,
   AOM_CONTENT_INVALID
 } aom_tune_content;
 

diff --git a/av1/arg_defs.c b/av1/arg_defs.c
@@ -94,6 +94,8 @@ static const struct arg_enum_list tune_content_enum[] = {
   { "default", AOM_CONTENT_DEFAULT },
   { "screen", AOM_CONTENT_SCREEN },
   { "film", AOM_CONTENT_FILM },
+  { "psy", AOM_CONTENT_PSY },
+  { "animation", AOM_CONTENT_ANIMATION },
   { NULL, 0 }
 };
 

diff --git a/av1/encoder/aq_variance.c b/av1/encoder/aq_variance.c
@@ -150,6 +150,33 @@ int av1_log_block_avg(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs,
   const int pic_h = cpi->common.height;
   const int bw = MI_SIZE * mi_size_wide[bs];
   const int bh = MI_SIZE * mi_size_high[bs];
+
+  sum = 0;
+  num_pix = 0;
+  avg = 0;
+  int row = mi_row << MI_SIZE_LOG2;
+  int col = mi_col << MI_SIZE_LOG2;
+  for (r = row; (r < (row + bh)) && (r < pic_h); r++) {
+    for (c = col; (c < (col + bw)) && (c < pic_w); c++) {
+      sum += *(x->plane[0].src.buf + r * x->plane[0].src.stride + c);
+      num_pix++;
+    }
+  }
+  if (num_pix != 0) {
+    avg = sum / num_pix;
+  }
+  return avg;
+}
+
+int av1_log_block_avg_hbd(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs,
+                          int mi_row, int mi_col) {
+  // This functions returns the block average of luma block
+  unsigned int sum, avg, num_pix;
+  int r, c;
+  const int pic_w = cpi->common.width;
+  const int pic_h = cpi->common.height;
+  const int bw = MI_SIZE * mi_size_wide[bs];
+  const int bh = MI_SIZE * mi_size_high[bs];
   const uint16_t *x16 = CONVERT_TO_SHORTPTR(x->plane[0].src.buf);
 
   sum = 0;

diff --git a/av1/encoder/aq_variance.h b/av1/encoder/aq_variance.h
@@ -23,6 +23,8 @@ void av1_vaq_frame_setup(AV1_COMP *cpi);
 int av1_log_block_var(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs);
 int av1_log_block_avg(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs,
                       int mi_row, int mi_col);
+int av1_log_block_avg_hbd(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs,
+                          int mi_row, int mi_col);
 int av1_compute_q_from_energy_level_deltaq_mode(const AV1_COMP *const cpi,
                                                 int block_var_level);
 int av1_block_wavelet_energy_level(const AV1_COMP *cpi, MACROBLOCK *x,

diff --git a/av1/encoder/av1_quantize.c b/av1/encoder/av1_quantize.c
@@ -784,27 +784,15 @@ static int adjust_hdr_cr_deltaq(int base_qindex) {
   return dqpCr;
 }
 
-void av1_set_quantizer(AV1_COMMON *const cm, int min_qmlevel, int max_qmlevel,
+void av1_set_quantizer(AV1_COMP *const cpi, int min_qmlevel, int max_qmlevel,
                        int q, int enable_chroma_deltaq, int enable_hdr_deltaq) {
   // quantizer has to be reinitialized with av1_init_quantizer() if any
   // delta_q changes.
+  AV1_COMMON *const cm = &cpi->common;
   CommonQuantParams *quant_params = &cm->quant_params;
   quant_params->base_qindex = AOMMAX(cm->delta_q_info.delta_q_present_flag, q);
   quant_params->y_dc_delta_q = 0;
 
-  if (enable_chroma_deltaq) {
-    // TODO(aomedia:2717): need to design better delta
-    quant_params->u_dc_delta_q = 2;
-    quant_params->u_ac_delta_q = 2;
-    quant_params->v_dc_delta_q = 2;
-    quant_params->v_ac_delta_q = 2;
-  } else {
-    quant_params->u_dc_delta_q = 0;
-    quant_params->u_ac_delta_q = 0;
-    quant_params->v_dc_delta_q = 0;
-    quant_params->v_ac_delta_q = 0;
-  }
-
   // following section 8.3.2 in T-REC-H.Sup15 document
   // to apply to AV1 qindex in the range of [0, 255]
   if (enable_hdr_deltaq) {
@@ -817,6 +805,23 @@ void av1_set_quantizer(AV1_COMMON *const cm, int min_qmlevel, int max_qmlevel,
     }
   }
 
+  // TODO(aomedia:2717): need to design better delta
+  int adjustment = 0;
+  if (enable_chroma_deltaq) {
+    if ((cpi->oxcf.tune_cfg.content == AOM_CONTENT_PSY) ||
+        (cpi->oxcf.tune_cfg.content == AOM_CONTENT_ANIMATION)) {
+      // This will use -2 for 4:2:0, -1 for 4:2:2, and 0 for 4:4:4
+      int subsampling = cpi->source->subsampling_x + cpi->source->subsampling_y;
+      adjustment = -subsampling;
+    } else {
+      adjustment = 2;
+    }
+  }
+  quant_params->u_dc_delta_q = adjustment;
+  quant_params->u_ac_delta_q = adjustment;
+  quant_params->v_dc_delta_q = adjustment;
+  quant_params->v_ac_delta_q = adjustment;
+
   quant_params->qmatrix_level_y =
       aom_get_qmlevel(quant_params->base_qindex, min_qmlevel, max_qmlevel);
   quant_params->qmatrix_level_u =

diff --git a/av1/encoder/av1_quantize.h b/av1/encoder/av1_quantize.h
@@ -105,7 +105,7 @@ void av1_init_quantizer(EncQuantDequantParams *const enc_quant_dequant_params,
                         const CommonQuantParams *quant_params,
                         aom_bit_depth_t bit_depth);
 
-void av1_set_quantizer(struct AV1Common *const cm, int min_qmlevel,
+void av1_set_quantizer(struct AV1_COMP *const cpi, int min_qmlevel,
                        int max_qmlevel, int q, int enable_chroma_deltaq,
                        int enable_hdr_deltaq);
 

diff --git a/av1/encoder/encodeframe_utils.c b/av1/encoder/encodeframe_utils.c
@@ -1058,7 +1058,8 @@ int av1_get_q_for_hdr(AV1_COMP *const cpi, MACROBLOCK *const x,
   return cm->quant_params.base_qindex;
 #else
   // calculate pixel average
-  const int block_luma_avg = av1_log_block_avg(cpi, x, bsize, mi_row, mi_col);
+  const int block_luma_avg =
+      av1_log_block_avg_hbd(cpi, x, bsize, mi_row, mi_col);
   // adjust offset based on average of the pixel block
   int offset = 0;
   for (int i = 0; i < HDR_QP_LEVELS; i++) {

diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
@@ -2415,7 +2415,7 @@ static int encode_without_recode(AV1_COMP *cpi) {
       av1_scale_references(cpi, filter_scaler, phase_scaler, 1);
   }
 
-  av1_set_quantizer(cm, q_cfg->qm_minlevel, q_cfg->qm_maxlevel, q,
+  av1_set_quantizer(cpi, q_cfg->qm_minlevel, q_cfg->qm_maxlevel, q,
                     q_cfg->enable_chroma_deltaq, q_cfg->enable_hdr_deltaq);
   av1_set_speed_features_qindex_dependent(cpi, cpi->oxcf.speed);
   if ((q_cfg->deltaq_mode != NO_DELTA_Q) || q_cfg->enable_chroma_deltaq)
@@ -2431,7 +2431,7 @@ static int encode_without_recode(AV1_COMP *cpi) {
       (cpi->rc.high_source_sad ||
        (cpi->ppi->use_svc && cpi->svc.high_source_sad_superframe))) {
     if (av1_encodedframe_overshoot_cbr(cpi, &q)) {
-      av1_set_quantizer(cm, q_cfg->qm_minlevel, q_cfg->qm_maxlevel, q,
+      av1_set_quantizer(cpi, q_cfg->qm_minlevel, q_cfg->qm_maxlevel, q,
                         q_cfg->enable_chroma_deltaq, q_cfg->enable_hdr_deltaq);
       av1_set_speed_features_qindex_dependent(cpi, cpi->oxcf.speed);
       if (q_cfg->deltaq_mode != NO_DELTA_Q || q_cfg->enable_chroma_deltaq)
@@ -2666,7 +2666,7 @@ static int encode_with_recode_loop(AV1_COMP *cpi, size_t *size, uint8_t *dest) {
       q = cpi->vbr_rc_info.q_index_list[cpi->gf_frame_index];
     }
 #endif
-    av1_set_quantizer(cm, q_cfg->qm_minlevel, q_cfg->qm_maxlevel, q,
+    av1_set_quantizer(cpi, q_cfg->qm_minlevel, q_cfg->qm_maxlevel, q,
                       q_cfg->enable_chroma_deltaq, q_cfg->enable_hdr_deltaq);
     av1_set_speed_features_qindex_dependent(cpi, oxcf->speed);
 

diff --git a/av1/encoder/encoder_utils.c b/av1/encoder/encoder_utils.c
@@ -772,6 +772,10 @@ BLOCK_SIZE av1_select_sb_size(const AV1EncoderConfig *const oxcf, int width,
   if (oxcf->tool_cfg.superblock_size == AOM_SUPERBLOCK_SIZE_128X128) {
     return BLOCK_128X128;
   }
+    //Force 64x64 superblock size to improve psycho-visual quality in video content
+  if (oxcf->tune_cfg.content == AOM_CONTENT_PSY) {
+    return BLOCK_64X64;
+    }
 #if CONFIG_TFLITE
   if (oxcf->q_cfg.deltaq_mode == DELTA_Q_USER_RATING_BASED) return BLOCK_64X64;
 #endif
@@ -1043,7 +1047,7 @@ void av1_determine_sc_tools_with_encoding(AV1_COMP *cpi, const int q_orig) {
   // content tools, with a high q and fixed partition.
   for (int pass = 0; pass < 2; ++pass) {
     set_encoding_params_for_screen_content(cpi, pass);
-    av1_set_quantizer(cm, q_cfg->qm_minlevel, q_cfg->qm_maxlevel,
+    av1_set_quantizer(cpi, q_cfg->qm_minlevel, q_cfg->qm_maxlevel,
                       q_for_screen_content_quick_run,
                       q_cfg->enable_chroma_deltaq, q_cfg->enable_hdr_deltaq);
     av1_set_speed_features_qindex_dependent(cpi, oxcf->speed);

diff --git a/av1/encoder/encoder_utils.c.rej b/av1/encoder/encoder_utils.c.rej
@@ -0,0 +1,12 @@
+--- av1/encoder/encoder_utils.c
++++ av1/encoder/encoder_utils.c
+@@ -770,6 +770,9 @@ BLOCK_SIZE av1_select_sb_size(const AV1EncoderConfig *const oxcf, int width,
+     return BLOCK_64X64;
+   if (oxcf->tool_cfg.superblock_size == AOM_SUPERBLOCK_SIZE_128X128)
+     return BLOCK_128X128;
++  //Force 64x64 superblock size to improve psycho-visual quality in video content
++  if (oxcf->tune_cfg.content == AOM_CONTENT_PSY)
++    return BLOCK_64X64;
+
+   // Force 64x64 superblock size to increase resolution in perceptual
+   // AQ mode.
diff --git a/av1/encoder/firstpass.c b/av1/encoder/firstpass.c
@@ -1288,7 +1288,7 @@ void av1_first_pass(AV1_COMP *cpi, const int64_t ts_duration) {
   cpi->rc.frames_to_key = INT_MAX;
 
   av1_set_quantizer(
-      cm, cpi->oxcf.q_cfg.qm_minlevel, cpi->oxcf.q_cfg.qm_maxlevel, qindex,
+      cpi, cpi->oxcf.q_cfg.qm_minlevel, cpi->oxcf.q_cfg.qm_maxlevel, qindex,
       cpi->oxcf.q_cfg.enable_chroma_deltaq, cpi->oxcf.q_cfg.enable_hdr_deltaq);
 
   av1_setup_block_planes(xd, seq_params->subsampling_x,

diff --git a/av1/encoder/intra_mode_search.c b/av1/encoder/intra_mode_search.c
@@ -92,6 +92,10 @@ DECLARE_ALIGNED(16, static const uint16_t,
 static double intra_rd_variance_factor(const AV1_COMP *cpi, MACROBLOCK *x,
                                        BLOCK_SIZE bs) {
   double threshold = 1.0 - (0.25 * cpi->oxcf.speed);
+  //PSY mode activates full analysis no matter the speed preset to work as a better reference
+  if ((cpi->oxcf.tune_cfg.content == AOM_CONTENT_PSY)
+  || (cpi->oxcf.tune_cfg.content == AOM_CONTENT_ANIMATION))
+    threshold = 1.0;
   // For non-positive threshold values, the comparison of source and
   // reconstructed variances with threshold evaluates to false
   // (src_var < threshold/rec_var < threshold) as these metrics are greater than
@@ -233,7 +237,8 @@ static int rd_pick_filter_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
     this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
 
     // Visual quality adjustment based on recon vs source variance.
-    if ((cpi->oxcf.mode == ALLINTRA) && (this_rd != INT64_MAX)) {
+    if (((cpi->oxcf.mode == ALLINTRA) || (cpi->oxcf.tune_cfg.content == AOM_CONTENT_PSY)
+    || (cpi->oxcf.tune_cfg.content == AOM_CONTENT_ANIMATION)) && (this_rd != INT64_MAX)) {
       this_rd = (int64_t)(this_rd * intra_rd_variance_factor(cpi, x, bsize));
     }
 
@@ -1421,7 +1426,8 @@ int64_t av1_rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
     this_rd = RDCOST(x->rdmult, this_rate, this_distortion);
 
     // Visual quality adjustment based on recon vs source variance.
-    if ((cpi->oxcf.mode == ALLINTRA) && (this_rd != INT64_MAX)) {
+    if (((cpi->oxcf.mode == ALLINTRA) || (cpi->oxcf.tune_cfg.content == AOM_CONTENT_PSY)
+    || (cpi->oxcf.tune_cfg.content == AOM_CONTENT_ANIMATION)) && (this_rd != INT64_MAX)) {
       this_rd = (int64_t)(this_rd * intra_rd_variance_factor(cpi, x, bsize));
     }
 

diff --git a/av1/encoder/partition_search.c b/av1/encoder/partition_search.c
@@ -584,9 +584,46 @@ static void setup_block_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x,
     assert(mbmi != NULL);
     if (aq_mode == VARIANCE_AQ) {
       if (cpi->vaq_refresh) {
-        const int energy = bsize <= BLOCK_16X16
-                               ? x->mb_energy
-                               : av1_log_block_var(cpi, x, bsize);
+        int energy = bsize <= BLOCK_16X16 ? x->mb_energy
+                                          : av1_log_block_var(cpi, x, bsize);
+
+        // Only active low luma biaised variance AQ if any of the psy tunes
+        // are on for now
+        if ((cpi->oxcf.tune_cfg.content == AOM_CONTENT_PSY) ||
+            (cpi->oxcf.tune_cfg.content == AOM_CONTENT_ANIMATION)) {
+          // We want to allocate more bits to this block if it is dark.
+          // We will shift the segment by a maximum of +2
+          BitDepthInfo bd_info = get_bit_depth_info(&x->e_mbd);
+          int avg_brightness;
+          if (bd_info.use_highbitdepth_buf) {
+            avg_brightness =
+                av1_log_block_avg_hbd(cpi, x, bsize, mi_row, mi_col) >>
+                (bd_info.bit_depth - 8);
+          } else {
+            avg_brightness = av1_log_block_avg(cpi, x, bsize, mi_row, mi_col);
+          }
+          int adjustment = 0;
+          // These breakpoints were chosen semi-arbitrarily after some testing,
+          // and assuming a Limited color range.
+          if (avg_brightness < 45) {
+            adjustment = -2;
+          } else if (avg_brightness < 70) {
+            adjustment = -1;
+          } else if (avg_brightness > 215) {
+            adjustment = 2;
+          } else if (avg_brightness > 195) {
+            adjustment = 1;
+          }
+
+          if (energy + adjustment < 0) {
+            energy = 0;
+          } else if (energy + adjustment > 7) {
+            energy = 7;
+          } else {
+            energy = energy + adjustment;
+          }
+        }
+
         mbmi->segment_id = energy;
       }
       x->rdmult = set_segment_rdmult(cpi, x, mbmi->segment_id);

diff --git a/av1/encoder/pickrst.c b/av1/encoder/pickrst.c
@@ -32,10 +32,6 @@
 #include "av1/encoder/picklpf.h"
 #include "av1/encoder/pickrst.h"
 
-// When set to RESTORE_WIENER or RESTORE_SGRPROJ only those are allowed.
-// When set to RESTORE_TYPES we allow switchable.
-static const RestorationType force_restore_type = RESTORE_TYPES;
-
 // Number of Wiener iterations
 #define NUM_WIENER_ITERS 5
 
@@ -103,6 +99,17 @@ static uint64_t var_restoration_unit(const RestorationTileLimits *limits,
       limits->v_end - limits->v_start);
 }
 
+// When set to RESTORE_WIENER or RESTORE_SGRPROJ only those are allowed.
+// When set to RESTORE_TYPES we allow switchable.
+static inline RestorationType get_forced_restore_types(AV1EncoderConfig *oxcf) {
+  const TuneCfg *tune_params = &oxcf->tune_cfg;
+  if (tune_params->content == AOM_CONTENT_PSY) {
+    return RESTORE_SGRPROJ;
+  } else {
+    return RESTORE_TYPES;
+  }
+}
+
 typedef struct {
   // The best coefficients for Wiener or Sgrproj restoration
   WienerInfo wiener;
@@ -1775,6 +1782,8 @@ void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi) {
   RestSearchCtxt rsc;
   const int plane_start = AOM_PLANE_Y;
   const int plane_end = num_planes > 1 ? AOM_PLANE_V : AOM_PLANE_Y;
+  const RestorationType force_restore_type =
+      get_forced_restore_types(&cpi->oxcf);
   for (int plane = plane_start; plane <= plane_end; ++plane) {
     init_rsc(src, &cpi->common, x, &cpi->sf.lpf_sf, plane, rusi,
              &cpi->trial_frame_rst, &rsc);

diff --git a/av1/encoder/temporal_filter.c b/av1/encoder/temporal_filter.c
@@ -799,7 +799,21 @@ void av1_tf_do_filtering_row(AV1_COMP *cpi, ThreadData *td, int mb_row) {
   uint8_t *pred = tf_data->pred;
 
   // Factor to control the filering strength.
-  const int filter_strength = cpi->oxcf.algo_cfg.arnr_strength;
+  int filter_strength = cpi->oxcf.algo_cfg.arnr_strength;
+
+  // Disable ARNR filtering for animation psycho-visual mode
+  // Note: This changes the rate control depending on the scene complexity
+  // and source luminance/variance. Not much of an issue for animated content
+  // but not for real content
+  if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_ANIMATION)
+    filter_strength = 0;
+
+  // We keep it at 0 for now until more testing is done, as filter_strength=1
+  // does allocate more bitrate in low luma-scenes. However, it makes stuff
+  // blurrier as well, so using a low luma biaised variance AQ-mode would be
+  // better for restoring balance and increasing general psy quality
+  if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_PSY)
+    filter_strength = 0;
 
   // Do filtering.
   FRAME_DIFF *diff = &td->tf_data.diff;

diff --git a/av1/encoder/tune_butteraugli.c b/av1/encoder/tune_butteraugli.c
@@ -294,7 +294,7 @@ void av1_setup_butteraugli_rdmult(AV1_COMP *cpi) {
   // cpi->sf.part_sf.partition_search_type = FIXED_PARTITION;
   // cpi->sf.part_sf.fixed_partition_size = BLOCK_32X32;
 
-  av1_set_quantizer(cm, q_cfg->qm_minlevel, q_cfg->qm_maxlevel, q_index,
+  av1_set_quantizer(cpi, q_cfg->qm_minlevel, q_cfg->qm_maxlevel, q_index,
                     q_cfg->enable_chroma_deltaq);
   av1_set_speed_features_qindex_dependent(cpi, oxcf->speed);
   if (q_cfg->deltaq_mode != NO_DELTA_Q || q_cfg->enable_chroma_deltaq)

diff --git a/av1/encoder/tx_search.c b/av1/encoder/tx_search.c
@@ -1971,6 +1971,8 @@ static void search_tx_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
   MACROBLOCKD *xd = &x->e_mbd;
   MB_MODE_INFO *mbmi = xd->mi[0];
   const TxfmSearchParams *txfm_params = &x->txfm_search_params;
+  const AV1EncoderConfig *const oxcf = &cpi->oxcf;
+  const TuneCfg *tune_params = &oxcf->tune_cfg;
   int64_t best_rd = INT64_MAX;
   uint16_t best_eob = 0;
   TX_TYPE best_tx_type = DCT_DCT;
@@ -2054,6 +2056,7 @@ static void search_tx_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
   // TODO(any): Experiment with variance and mean based thresholds
   int use_transform_domain_distortion =
       (txfm_params->use_transform_domain_distortion > 0) &&
+      (tune_params->content != AOM_CONTENT_PSY) &&
       (block_mse_q8 >= txfm_params->tx_domain_dist_threshold) &&
       // Any 64-pt transforms only preserves half the coefficients.
       // Therefore transform domain distortion is not valid for these