Skip to content

Commit

Permalink
apply code-format changes
Browse files Browse the repository at this point in the history
  • Loading branch information
nihui authored and github-actions[bot] committed Nov 29, 2024
1 parent 310992b commit cd35861
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 9 deletions.
2 changes: 1 addition & 1 deletion src/layer/x86/convolution_3x3_winograd_int8.h
Original file line number Diff line number Diff line change
Expand Up @@ -2141,7 +2141,7 @@ static void gemm_transB_packed_tile_int8(const Mat& AT_tile, const Mat& BT_tile,
_sum1 = _mm256_comp_dpwssd_epi32(_sum1, _pA0, _pB1);
_sum2 = _mm256_comp_dpwssd_epi32(_sum2, _pA1, _pB0);
_sum3 = _mm256_comp_dpwssd_epi32(_sum3, _pA1, _pB1);
#else // __AVX2__
#else // __AVX2__
__m128i _pA0 = _mm_loadu_si128((const __m128i*)pA);
__m128i _pB0 = _mm_loadu_si128((const __m128i*)pB);
__m128i _pB1 = _mm_loadu_si128((const __m128i*)(pB + 8));
Expand Down
4 changes: 2 additions & 2 deletions src/layer/x86/convolution_im2col_gemm_int8.h
Original file line number Diff line number Diff line change
Expand Up @@ -4807,7 +4807,7 @@ static void convolution_gemm_transB_packed_tile_int8(const Mat& AT_tile, const M

_sum0 = _mm256_comp_dpwssd_epi32(_sum0, _pA0, _pB0);
_sum1 = _mm256_comp_dpwssd_epi32(_sum1, _pA0, _pB1);
#else // __AVX2__
#else // __AVX2__
#if __SSE4_1__
_pA = _mm_cvtepi8_epi16(_pA);
#else
Expand Down Expand Up @@ -5309,7 +5309,7 @@ static void convolution_gemm_transB_packed_tile_int8(const Mat& AT_tile, const M
__m256i _pB0 = _mm256_cvtepi8_epi16(_pB);

_sum0 = _mm256_comp_dpwssd_epi32(_sum0, _pA0, _pB0);
#else // __AVX2__
#else // __AVX2__
#if __SSE4_1__
_pA = _mm_cvtepi8_epi16(_pA);
#else
Expand Down
12 changes: 6 additions & 6 deletions src/layer/x86/convolution_packed_int8.h
Original file line number Diff line number Diff line change
Expand Up @@ -2826,7 +2826,7 @@ static void convolution_packed_int8(const Mat& bottom_blob, Mat& top_blob, const
_sum11 = _mm256_comp_dpwssd_epi32(_sum11, _mm256_shuffle_epi32(_rr1, _MM_SHUFFLE(2, 2, 2, 2)), _w1);
_sum22 = _mm256_comp_dpwssd_epi32(_sum22, _mm256_shuffle_epi32(_rr2, _MM_SHUFFLE(2, 2, 2, 2)), _w1);
_sum33 = _mm256_comp_dpwssd_epi32(_sum33, _mm256_shuffle_epi32(_rr3, _MM_SHUFFLE(2, 2, 2, 2)), _w1);
#else // __AVX2__
#else // __AVX2__
__m128i _w01 = _mm_load_si128((const __m128i*)kptr);
__m128i _w23 = _mm_load_si128((const __m128i*)(kptr + 16));
__m128i _extw01 = _mm_cmpgt_epi8(_mm_setzero_si128(), _w01);
Expand Down Expand Up @@ -3184,7 +3184,7 @@ static void convolution_packed_int8(const Mat& bottom_blob, Mat& top_blob, const
_sum11 = _mm256_comp_dpwssd_epi32(_sum11, _mm256_shuffle_epi32(_rr1, _MM_SHUFFLE(0, 0, 0, 0)), _w0);
_sum22 = _mm256_comp_dpwssd_epi32(_sum22, _mm256_shuffle_epi32(_rr0, _MM_SHUFFLE(2, 2, 2, 2)), _w1);
_sum33 = _mm256_comp_dpwssd_epi32(_sum33, _mm256_shuffle_epi32(_rr1, _MM_SHUFFLE(2, 2, 2, 2)), _w1);
#else // __AVX2__
#else // __AVX2__
__m128i _w01 = _mm_load_si128((const __m128i*)kptr);
__m128i _w23 = _mm_load_si128((const __m128i*)(kptr + 16));
__m128i _extw01 = _mm_cmpgt_epi8(_mm_setzero_si128(), _w01);
Expand Down Expand Up @@ -3455,7 +3455,7 @@ static void convolution_packed_int8(const Mat& bottom_blob, Mat& top_blob, const

_sum00 = _mm256_comp_dpwssd_epi32(_sum00, _mm256_shuffle_epi32(_rr0, _MM_SHUFFLE(0, 0, 0, 0)), _w0);
_sum11 = _mm256_comp_dpwssd_epi32(_sum11, _mm256_shuffle_epi32(_rr0, _MM_SHUFFLE(2, 2, 2, 2)), _w1);
#else // __AVX2__
#else // __AVX2__
__m128i _w01 = _mm_load_si128((const __m128i*)kptr);
__m128i _w23 = _mm_load_si128((const __m128i*)(kptr + 16));
__m128i _extw01 = _mm_cmpgt_epi8(_mm_setzero_si128(), _w01);
Expand Down Expand Up @@ -3795,7 +3795,7 @@ static void convolution_packed_int8(const Mat& bottom_blob, Mat& top_blob, const
_sum11 = _mm256_comp_dpwssd_epi32(_sum11, _valval1, _w);
_sum22 = _mm256_comp_dpwssd_epi32(_sum22, _valval2, _w);
_sum33 = _mm256_comp_dpwssd_epi32(_sum33, _valval3, _w);
#else // __AVX2__
#else // __AVX2__
__m128i _extw01 = _mm_cmpgt_epi8(_mm_setzero_si128(), _w01);
__m128i _w0 = _mm_unpacklo_epi8(_w01, _extw01);
__m128i _w1 = _mm_unpackhi_epi8(_w01, _extw01);
Expand Down Expand Up @@ -4064,7 +4064,7 @@ static void convolution_packed_int8(const Mat& bottom_blob, Mat& top_blob, const

_sum0 = _mm256_comp_dpwssd_epi32(_sum0, _valval0, _w);
_sum1 = _mm256_comp_dpwssd_epi32(_sum1, _valval1, _w);
#else // __AVX2__
#else // __AVX2__
__m128i _extw01 = _mm_cmpgt_epi8(_mm_setzero_si128(), _w01);
__m128i _w0 = _mm_unpacklo_epi8(_w01, _extw01);
__m128i _w1 = _mm_unpackhi_epi8(_w01, _extw01);
Expand Down Expand Up @@ -4262,7 +4262,7 @@ static void convolution_packed_int8(const Mat& bottom_blob, Mat& top_blob, const
__m256i _rr0 = _mm256_inserti128_si256(_mm256_castsi128_si256(_r0), _r0, 1);

_sum = _mm256_comp_dpwssd_epi32(_sum, _rr0, _w);
#else // __AVX2__
#else // __AVX2__
__m128i _extw01 = _mm_cmpgt_epi8(_mm_setzero_si128(), _w01);
__m128i _w0 = _mm_unpacklo_epi8(_w01, _extw01);
__m128i _w1 = _mm_unpackhi_epi8(_w01, _extw01);
Expand Down

0 comments on commit cd35861

Please sign in to comment.