Skip to content

Commit

Permalink
添加变换模块接口
Browse files Browse the repository at this point in the history
  • Loading branch information
dujiangpku committed Dec 27, 2021
1 parent 6f4f114 commit 1dbd722
Show file tree
Hide file tree
Showing 6 changed files with 264 additions and 29 deletions.
36 changes: 18 additions & 18 deletions build/android/app/src/main/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -8,33 +8,33 @@ LOCAL_C_INCLUDES := $(LOCAL_PATH)/inc \
$(LOCAL_PATH)/test \
$(LOCAL_PATH)/src/armv8

uavs3e_srcs_c += $(LOCAL_PATH)/src/alf.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/alf.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/analyze.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/bitstream.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/bitstream.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/com_alf.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/com_cost.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/com_df.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/com_cost.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/com_df.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/com_ipred.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/com_itdq.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/com_mc.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/com_refman.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/com_sao.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/com_itdq.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/com_mc.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/com_refman.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/com_sao.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/com_tables.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/com_thread.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/com_thread_win32.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/com_thread_win32.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/com_util.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/entropy.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/inter.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/intra.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/inter.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/intra.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/lookahead.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/me.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/quant.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/ratectrl.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/sao.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/tables.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/me.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/quant.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/ratectrl.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/sao.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/tables.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/transform.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/uavs3e.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/util.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/uavs3e.c \
uavs3e_srcs_c += $(LOCAL_PATH)/src/util.c \
uavs3e_srcs_c += $(LOCAL_PATH)/test/utest.c

LOCAL_CFLAGS += -O0 -fPIC -std=gnu99
Expand Down
36 changes: 28 additions & 8 deletions build/android/app/src/main/jni/src/armv8/arm64.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,34 @@ void uavs3e_funs_init_arm64()
uavs3e_funs_handle.alf = uavs3e_alf_filter_block_arm64;

#else
//uavs3e_funs_handle.trans_dct2[1][1] = uavs3e_trans_dct2_w4_h4_arm64;
//uavs3e_funs_handle.trans_dct2[1][2] = uavs3e_trans_dct2_w4_h8_arm64;
//uavs3e_funs_handle.trans_dct2[1][3] = uavs3e_trans_dct2_w4_h16_arm64;
//uavs3e_funs_handle.trans_dct2[1][4] = uavs3e_trans_dct2_w4_h32_arm64;

//uavs3e_funs_handle.trans_dct2[2][1] = uavs3e_trans_dct2_w8_h4_arm64;
//uavs3e_funs_handle.trans_dct2[2][2] = uavs3e_trans_dct2_w8_h8_arm64;
//uavs3e_funs_handle.trans_dct2[2][3] = uavs3e_trans_dct2_w8_h16_arm64;
//uavs3e_funs_handle.trans_dct2[2][4] = uavs3e_trans_dct2_w8_h32_arm64;
//uavs3e_funs_handle.trans_dct2[2][5] = uavs3e_trans_dct2_w8_h64_arm64;

//uavs3e_funs_handle.trans_dct2[3][1] = uavs3e_trans_dct2_w16_h16_arm64;
//uavs3e_funs_handle.trans_dct2[3][2] = uavs3e_trans_dct2_w16_h8_arm64;
//uavs3e_funs_handle.trans_dct2[3][3] = uavs3e_trans_dct2_w16_h16_arm64;
//uavs3e_funs_handle.trans_dct2[3][4] = uavs3e_trans_dct2_w16_h32_arm64;
//uavs3e_funs_handle.trans_dct2[3][5] = uavs3e_trans_dct2_w16_h64_arm64;

//uavs3e_funs_handle.trans_dct2[4][1] = uavs3e_trans_dct2_w32_h4_arm64;
//uavs3e_funs_handle.trans_dct2[4][2] = uavs3e_trans_dct2_w32_h8_arm64;
//uavs3e_funs_handle.trans_dct2[4][3] = uavs3e_trans_dct2_w32_h16_arm64;
//uavs3e_funs_handle.trans_dct2[4][4] = uavs3e_trans_dct2_w32_h32_arm64;
//uavs3e_funs_handle.trans_dct2[4][5] = uavs3e_trans_dct2_w32_h64_arm64;

//uavs3e_funs_handle.trans_dct2[5][2] = uavs3e_trans_dct2_w64_h8_arm64;
//uavs3e_funs_handle.trans_dct2[5][3] = uavs3e_trans_dct2_w64_h16_arm64;
//uavs3e_funs_handle.trans_dct2[5][4] = uavs3e_trans_dct2_w64_h32_arm64;
//uavs3e_funs_handle.trans_dct2[5][5] = uavs3e_trans_dct2_w64_h64_arm64;

uavs3e_funs_handle.itrans_dct2[1][1] = uavs3e_itrans_dct2_h4_w4_arm64;
uavs3e_funs_handle.itrans_dct2[1][2] = uavs3e_itrans_dct2_h4_w8_arm64;
uavs3e_funs_handle.itrans_dct2[1][3] = uavs3e_itrans_dct2_h4_w16_arm64;
Expand Down Expand Up @@ -192,13 +220,6 @@ void uavs3e_funs_init_arm64()
uavs3e_funs_handle.cost_satd[1][2] = uavs3e_had_8x16_arm64;

/*
uavs3e_funs_handle.cost_var[0] = uavs3e_get_var_4_arm64;
uavs3e_funs_handle.cost_var[1] = uavs3e_get_var_8_arm64;
uavs3e_funs_handle.cost_var[2] = uavs3e_get_var_16_arm64;
uavs3e_funs_handle.cost_var[3] = uavs3e_get_var_32_arm64;
uavs3e_funs_handle.cost_var[4] = uavs3e_get_var_64_arm64;
uavs3e_funs_handle.cost_var[5] = uavs3e_get_var_128_arm64;
uavs3e_funs_handle.ssim_4x4x2_core = ssim_4x4x2_core;
uavs3e_funs_handle.ssim_end4 = ssim_end4;
Expand All @@ -218,7 +239,6 @@ void uavs3e_funs_init_arm64()
uavs3e_funs_handle.pel_avrg[4] = uavs3e_pel_avrg_64_arm64;
uavs3e_funs_handle.pel_avrg[5] = uavs3e_pel_avrg_128_arm64;
uavs3e_funs_handle.recon[5] = uavs3e_recon_w128_arm64;
*/
#endif

Expand Down
37 changes: 34 additions & 3 deletions build/android/app/src/main/jni/src/armv8/arm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,40 @@ void uavs3e_itrans_dst7_pb4_arm64(s16 *coeff, s16 *block, int shift, int line, i
void uavs3e_itrans_dst7_pb8_arm64(s16 *coeff, s16 *block, int shift, int line, int max_tr_val, int min_tr_val, s8 *iT);
void uavs3e_itrans_dst7_pb16_arm64(s16 *coeff, s16 *block, int shift, int line, int max_tr_val, int min_tr_val, s8 *iT);

void uavs3e_trans_dct2_w4_h4_arm64(s16 *src, s16 *dst, int bit_depth);
void uavs3e_trans_dct2_w4_h8_arm64(s16 *src, s16 *dst, int bit_depth);
void uavs3e_trans_dct2_w4_h16_arm64(s16 *src, s16 *dst, int bit_depth);
void uavs3e_trans_dct2_w4_h32_arm64(s16 *src, s16 *dst, int bit_depth);

void uavs3e_trans_dct2_w8_h4_arm64(s16 *src, s16 *dst, int bit_depth);
void uavs3e_trans_dct2_w8_h8_arm64(s16 *src, s16 *dst, int bit_depth);
void uavs3e_trans_dct2_w8_h16_arm64(s16 *src, s16 *dst, int bit_depth);
void uavs3e_trans_dct2_w8_h32_arm64(s16 *src, s16 *dst, int bit_depth);
void uavs3e_trans_dct2_w8_h64_arm64(s16 *src, s16 *dst, int bit_depth);

void uavs3e_trans_dct2_w16_h4_arm64(s16 *src, s16 *dst, int bit_depth);
void uavs3e_trans_dct2_w16_h8_arm64(s16 *src, s16 *dst, int bit_depth);
void uavs3e_trans_dct2_w16_h16_arm64(s16 *src, s16 *dst, int bit_depth);
void uavs3e_trans_dct2_w16_h32_arm64(s16 *src, s16 *dst, int bit_depth);
void uavs3e_trans_dct2_w16_h64_arm64(s16 *src, s16 *dst, int bit_depth);

void uavs3e_trans_dct2_w32_h4_arm64(s16 *src, s16 *dst, int bit_depth);
void uavs3e_trans_dct2_w32_h8_arm64(s16 *src, s16 *dst, int bit_depth);
void uavs3e_trans_dct2_w32_h16_arm64(s16 *src, s16 *dst, int bit_depth);
void uavs3e_trans_dct2_w32_h32_arm64(s16 *src, s16 *dst, int bit_depth);
void uavs3e_trans_dct2_w32_h64_arm64(s16 *src, s16 *dst, int bit_depth);

void uavs3e_trans_dct2_w64_h8_arm64(s16 *src, s16 *dst, int bit_depth);
void uavs3e_trans_dct2_w64_h16_arm64(s16 *src, s16 *dst, int bit_depth);
void uavs3e_trans_dct2_w64_h32_arm64(s16 *src, s16 *dst, int bit_depth);
void uavs3e_trans_dct2_w64_h64_arm64(s16 *src, s16 *dst, int bit_depth);

void tx_dct2_pb4_arm64(s16* src, s16* dst, int line, int limit_line, int shift);
void tx_dct2_pb8_arm64(s16* src, s16* dst, int line, int limit_line, int shift);
void tx_dct2_pb16_arm64(s16* src, s16* dst, int line, int limit_line, int shift);
void tx_dct2_pb32_arm64(s16* src, s16* dst, int line, int limit_line, int shift);
void tx_dct2_pb64_arm64(s16* src, s16* dst, int line, int limit_line, int shift);

u32 uavs3e_get_sad_4_arm64(pel *p_org, int i_org, pel *p_pred, int i_pred, int height);
u32 uavs3e_get_sad_8_arm64(pel *p_org, int i_org, pel *p_pred, int i_pred, int height);
u32 uavs3e_get_sad_16_arm64(pel *p_org, int i_org, pel *p_pred, int i_pred, int height);
Expand Down Expand Up @@ -190,9 +224,6 @@ u32 uavs3e_had_8x8_arm64(pel *org, int s_org, pel *cur, int s_cur);
u32 uavs3e_had_16x8_arm64(pel *org, int s_org, pel *cur, int s_cur);
u32 uavs3e_had_8x16_arm64(pel *org, int s_org, pel *cur, int s_cur);

u64 uavs3e_var_8_arm64(pel* pix, int i_pix);
u64 uavs3e_var_16_arm64(pel* pix, int i_pix);

void uavs3e_pel_diff_4_arm64(pel *p_org, int i_org, pel *p_pred, int i_pred, s16 *p_resi, int i_resi, int height);
void uavs3e_pel_diff_8_arm64(pel *p_org, int i_org, pel *p_pred, int i_pred, s16 *p_resi, int i_resi, int height);
void uavs3e_pel_diff_16_arm64(pel *p_org, int i_org, pel *p_pred, int i_pred, s16 *p_resi, int i_resi, int height);
Expand Down
13 changes: 13 additions & 0 deletions build/android/app/src/main/jni/src/armv8/trans_dct2_arm64.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#include "def_arm64.S"
#if defined(__arm64__)

#if !COMPILE_10BIT

#else
function tx_dct2_pb4_arm64

ret


#endif
#endif
169 changes: 169 additions & 0 deletions build/android/app/src/main/jni/src/armv8/transform_arm64.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
#include "arm64.h"
#if defined(__arm64__)

#define trans_test 0

#ifndef trans_test
void uavs3e_trans_dct2_w4_h4_arm64(s16 *src, s16 *dst, int bit_depth)
{
ALIGNED_16(s16 tmp[4*4]);
tx_dct2_pb4_arm64(src, tmp, 4, 4, 0 + bit_depth - 8);
tx_dct2_pb4_arm64(tmp, dst, 4, 4, 7);
}

void uavs3e_trans_dct2_w4_h8_arm64(s16 *src, s16 *dst, int bit_depth)
{
ALIGNED_16(s16 tmp[4*8]);
tx_dct2_pb4_arm64(src, tmp, 8, 8, 0 + bit_depth - 8);
tx_dct2_pb8_arm64(tmp, dst, 4, 4, 8);
}

void uavs3e_trans_dct2_w4_h16_arm64(s16 *src, s16 *dst, int bit_depth)
{
ALIGNED_16(s16 tmp[4*16]);
tx_dct2_pb4_arm64(src, tmp, 16, 16, 0 + bit_depth - 8);
tx_dct2_pb16_arm64(tmp, dst, 4, 4, 9);
}

void uavs3e_trans_dct2_w4_h32_arm64(s16 *src, s16 *dst, int bit_depth)
{
ALIGNED_16(s16 tmp[4*32]);
tx_dct2_pb4_arm64(src, tmp, 32, 32, 0 + bit_depth - 8);
tx_dct2_pb32_arm64(tmp, dst, 4, 4, 10);
}

void uavs3e_trans_dct2_w8_h4_arm64(s16 *src, s16 *dst, int bit_depth)
{
ALIGNED_16(s16 tmp[8*4]);
tx_dct2_pb8_arm64(src, tmp, 4, 4, 1 + bit_depth - 8);
tx_dct2_pb4_arm64(tmp, dst, 8, 8, 7);
}

void uavs3e_trans_dct2_w8_h8_arm64(s16 *src, s16 *dst, int bit_depth)
{
ALIGNED_16(s16 tmp[8 * 8]);
tx_dct2_pb8_arm64(src, tmp, 8, 8, 1 + bit_depth - 8);
tx_dct2_pb8_arm64(tmp, dst, 8, 8, 8);
}

void uavs3e_trans_dct2_w8_h16_arm64(s16 *src, s16 *dst, int bit_depth)
{
ALIGNED_16(s16 tmp[8*16]);
tx_dct2_pb8_arm64(src, tmp, 16, 16, 1 + bit_depth - 8);
tx_dct2_pb16_arm64(tmp, dst, 8, 8, 9);
}

void uavs3e_trans_dct2_w8_h32_arm64(s16 *src, s16 *dst, int bit_depth)
{
ALIGNED_16(s16 tmp[8*32]);
tx_dct2_pb8_arm64(src, tmp, 32, 32, 1 + bit_depth - 8);
tx_dct2_pb32_arm64(tmp, dst, 8, 8, 10);
}

void uavs3e_trans_dct2_w8_h64_arm64(s16 *src, s16 *dst, int bit_depth)
{
ALIGNED_16(s16 tmp[8*64]);
tx_dct2_pb8_arm64(src, tmp, 64, 64, 1 + bit_depth - 8);
tx_dct2_pb64_arm64(tmp, dst, 8, 8, 11);
}

void uavs3e_trans_dct2_w16_h4_arm64(s16 *src, s16 *dst, int bit_depth)
{
ALIGNED_16(s16 tmp[16*4]);
tx_dct2_pb16_arm64(src, tmp, 4, 4, 2 + bit_depth - 8);
tx_dct2_pb4_arm64(tmp, dst, 16, 16, 7);
}

void uavs3e_trans_dct2_w16_h8_arm64(s16 *src, s16 *dst, int bit_depth)
{
ALIGNED_16(s16 tmp[16*8]);
tx_dct2_pb16_arm64(src, tmp, 8, 8, 2 + bit_depth - 8);
tx_dct2_pb8_arm64(tmp, dst, 16, 16, 8);
}

void uavs3e_trans_dct2_w16_h16_arm64(s16 *src, s16 *dst, int bit_depth)
{
ALIGNED_16(s16 tmp[16 * 16]);
tx_dct2_pb16_arm64(src, tmp, 16, 16, 2 + bit_depth - 8);
tx_dct2_pb16_arm64(tmp, dst, 16, 16, 9);
}

void uavs3e_trans_dct2_w16_h32_arm64(s16 *src, s16 *dst, int bit_depth)
{
ALIGNED_16(s16 tmp[16*32]);
tx_dct2_pb16_arm64(src, tmp, 32, 32, 2 + bit_depth - 8);
tx_dct2_pb32_arm64(tmp, dst, 16, 16, 10);
}

void uavs3e_trans_dct2_w16_h64_arm64(s16 *src, s16 *dst, int bit_depth)
{
ALIGNED_16(s16 tmp[16*64]);
tx_dct2_pb16_arm64(src, tmp, 64, 64, 2 + bit_depth - 8);
tx_dct2_pb64_arm64(tmp, dst, 16, 16, 11);
}

void uavs3e_trans_dct2_w32_h4_arm64(s16 *src, s16 *dst, int bit_depth)
{
ALIGNED_16(s16 tmp[32*4]);
tx_dct2_pb32_arm64(src, tmp, 4, 4, 3 + bit_depth - 8);
tx_dct2_pb4_arm64(tmp, dst, 32, 32, 7);
}

void uavs3e_trans_dct2_w32_h8_arm64(s16 *src, s16 *dst, int bit_depth)
{
ALIGNED_16(s16 tmp[32*8]);
tx_dct2_pb32_arm64(src, tmp, 8, 8, 3 + bit_depth - 8);
tx_dct2_pb8_arm64(tmp, dst, 32, 32, 8);
}

void uavs3e_trans_dct2_w32_h16_arm64(s16 *src, s16 *dst, int bit_depth)
{
ALIGNED_16(s16 tmp[32*16]);
tx_dct2_pb32_arm64(src, tmp, 16, 16, 3 + bit_depth - 8);
tx_dct2_pb16_arm64(tmp, dst, 32, 32, 9);
}

void uavs3e_trans_dct2_w32_h32_arm64(s16 *src, s16 *dst, int bit_depth)
{
ALIGNED_16(s16 tmp[32 * 32]);
tx_dct2_pb32_arm64(src, tmp, 32, 32, 3 + bit_depth - 8);
tx_dct2_pb32_arm64(tmp, dst, 32, 32, 10);
}

void uavs3e_trans_dct2_w32_h64_arm64(s16 *src, s16 *dst, int bit_depth)
{
ALIGNED_16(s16 tmp[32*64]);
tx_dct2_pb32_arm64(src, tmp, 64, 64, 3 + bit_depth - 8);
tx_dct2_pb64_arm64(tmp, dst, 32, 32, 11);
}

void uavs3e_trans_dct2_w64_h8_arm64(s16 *src, s16 *dst, int bit_depth)
{
ALIGNED_16(s16 tmp[64*8]);
tx_dct2_pb64_arm64(src, tmp, 8, 8, 4 + bit_depth - 8);
tx_dct2_pb8_arm64(tmp, dst, 64, 32, 8);
}

void uavs3e_trans_dct2_w64_h16_arm64(s16 *src, s16 *dst, int bit_depth)
{
ALIGNED_16(s16 tmp[64*16]);
tx_dct2_pb64_arm64(src, tmp, 16, 16, 4 + bit_depth - 8);
tx_dct2_pb16_arm64(tmp, dst, 64, 32, 9);
}

void uavs3e_trans_dct2_w64_h32_arm64(s16 *src, s16 *dst, int bit_depth)
{
ALIGNED_16(s16 tmp[64*32]);
tx_dct2_pb64_arm64(src, tmp, 32, 32, 4 + bit_depth - 8);
tx_dct2_pb32_arm64(tmp, dst, 64, 32, 10);
}

void uavs3e_trans_dct2_w64_h64_arm64(s16 *src, s16 *dst, int bit_depth)
{
ALIGNED_16(s16 tmp[64 * 64]);
tx_dct2_pb64_arm64(src, tmp, 64, 64, 4 + bit_depth - 8);
tx_dct2_pb64_arm64(tmp, dst, 64, 32, 11);
}
#endif

#endif
2 changes: 2 additions & 0 deletions build/android/app/src/main/jni/uavs3e_arm64.mk
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,5 @@ uavs3e_srcs_arm += $(ARM64_SRC_PATH)/deblock_arm64.S
uavs3e_srcs_arm += $(ARM64_SRC_PATH)/alf_arm64.S
uavs3e_srcs_arm += $(ARM64_SRC_PATH)/intra_pred_arm64.S
uavs3e_srcs_arm += $(ARM64_SRC_PATH)/cost_arm64.S
uavs3e_srcs_arm += $(ARM64_SRC_PATH)/transform_arm64.c
uavs3e_srcs_arm += $(ARM64_SRC_PATH)/trans_dct2_arm64.S

0 comments on commit 1dbd722

Please sign in to comment.