From 91a60fdfa6226ff730c20423b4ee65be77f52ff8 Mon Sep 17 00:00:00 2001 From: "Timothy B. Terriberry" Date: Thu, 11 Feb 2016 06:38:34 -0800 Subject: [PATCH] Fix some additional overflows in the 64x64 DCT. Test #350 could overflow even with 4-point lapping. Tests #200 and #242 could only overflow with 8-point lapping. Again, this has exactly zero impact on measured MSE in dcttest. There is a very small impact on the reported biases, but the first several significant figures of each one do not change. Changes in metrics on ntt-short-1 seem to be mostly noise: RATE (%) DSNR (dB) PSNR 0.00470 -0.00014 PSNRHVS 0.00603 -0.00028 SSIM 0.00379 -0.00009 FASTSSIM -0.06955 0.00187 --- src/dct.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/dct.c b/src/dct.c index 13778216..88b8fd06 100644 --- a/src/dct.c +++ b/src/dct.c @@ -1369,9 +1369,9 @@ void od_bin_idct16x16(od_coeff *x, int xstride, 0.505367194937830 */ \ OD_DCT_OVERFLOW_CHECK(t1, 1035, 1024, 199); \ t6 += (t1*1035 + 1024) >> 11; \ - /* 14699/16384 ~= Sqrt[2]*Sin[7*Pi/32] ~= 0.897167586342636 */ \ - OD_DCT_OVERFLOW_CHECK(t6, 14699, 8192, 200); \ - t1 -= (t6*14699 + 8192) >> 14; \ + /* 3675/4096 ~= Sqrt[2]*Sin[7*Pi/32] ~= 0.897167586342636 */ \ + OD_DCT_OVERFLOW_CHECK(t6, 3675, 2048, 200); \ + t1 -= (t6*3675 + 2048) >> 12; \ /* 851/8192 ~= (Cos[7*Pi/32] - 1/Sqrt[2])/Sin[7*Pi/32] ~= 0.103884567856159 */ \ OD_DCT_OVERFLOW_CHECK(t1, 851, 4096, 201); \ @@ -1531,8 +1531,8 @@ void od_bin_idct16x16(od_coeff *x, int xstride, /* 851/8192 ~= (Cos[7*Pi/32] - 1/Sqrt[2])/Sin[7*Pi/32] ~= 0.103884567856159 */ \ t3 += (t4*851 + 4096) >> 13; \ - /* 14699/16384 ~= Sqrt[2]*Sin[7*Pi/32] ~= 0.897167586342636 */ \ - t4 += (t3*14699 + 8192) >> 14; \ + /* 3675/4096 ~= Sqrt[2]*Sin[7*Pi/32] ~= 0.897167586342636 */ \ + t4 += (t3*3675 + 2048) >> 12; \ /* 1035/2048 ~= (Sqrt[2] - Cos[7*Pi/32])/(2*Sin[7*Pi/32]) ~= 0.505367194937830 */ \ t3 -= (t4*1035 + 1024) >> 11; \ @@ -1757,9 +1757,9 @@ void od_bin_idct16x16(od_coeff *x, int xstride, /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \ OD_DCT_OVERFLOW_CHECK(s9, 2485, 4096, 241); \ s6 += (s9*2485 + 4096) >> 13; \ - /* 18205/32768 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \ - OD_DCT_OVERFLOW_CHECK(s6, 18205, 16384, 242); \ - s9 -= (s6*18205 + 16384) >> 15; \ + /* 4551/8192 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \ + OD_DCT_OVERFLOW_CHECK(s6, 4551, 4096, 242); \ + s9 -= (s6*4551 + 4096) >> 13; \ /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \ OD_DCT_OVERFLOW_CHECK(s9, 2485, 4096, 243); \ s6 += (s9*2485 + 4096) >> 13; \ @@ -2011,8 +2011,8 @@ void od_bin_idct16x16(od_coeff *x, int xstride, se -= (s1*3227 + 16384) >> 15; \ /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \ s6 -= (s9*2485 + 4096) >> 13; \ - /* 18205/32768 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \ - s9 += (s6*18205 + 16384) >> 15; \ + /* 4551/8192 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \ + s9 += (s6*4551 + 4096) >> 13; \ /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \ s6 -= (s9*2485 + 4096) >> 13; \ /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \ @@ -3160,9 +3160,9 @@ void od_bin_idct16x16(od_coeff *x, int xstride, 0.6685570995525147 */ \ OD_DCT_OVERFLOW_CHECK(to, 5477, 4096, 349); \ t7 += (to*5477 + 4096) >> 13; \ - /* 8339/32768 ~= Sqrt[2]*Sin[15*Pi/128] ~= 0.5089684416985407 */ \ - OD_DCT_OVERFLOW_CHECK(t7, 8339, 8192, 350); \ - to -= (t7*8339 + 8192) >> 14; \ + /* 4169/8192 ~= Sqrt[2]*Sin[15*Pi/128] ~= 0.5089684416985407 */ \ + OD_DCT_OVERFLOW_CHECK(t7, 4169, 4096, 350); \ + to -= (t7*4169 + 4096) >> 13; \ /* -2571/4096 ~= (1/Sqrt[2] - Cos[15*Pi/128])/Sin[15*Pi/128] ~= -0.6276441593165217 */ \ OD_DCT_OVERFLOW_CHECK(to, 2571, 2048, 351); \ @@ -3413,8 +3413,8 @@ void od_bin_idct16x16(od_coeff *x, int xstride, /* -2571/4096 ~= (1/Sqrt[2] - Cos[15*Pi/128])/Sin[15*Pi/128] ~= -0.6276441593165217 */ \ ts += (t3*2571 + 2048) >> 12; \ - /* 8339/32768 ~= Sqrt[2]*Sin[15*Pi/128] ~= 0.5089684416985407 */ \ - t3 += (ts*8339 + 8192) >> 14; \ + /* 4169/8192 ~= Sqrt[2]*Sin[15*Pi/128] ~= 0.5089684416985407 */ \ + t3 += (ts*4169 + 4096) >> 13; \ /* 5477/8192 ~= (1/Sqrt[2] - Cos[15*Pi/128]/2)/Sin[15*Pi/128] ~= 0.6685570995525147 */ \ ts -= (t3*5477 + 4096) >> 13; \