From 1ead9b443d6d11e0cf34a302d39468536652751f Mon Sep 17 00:00:00 2001 From: MyPandaShaoxiang Date: Wed, 1 Aug 2018 13:00:20 +0800 Subject: [PATCH 1/2] fix normalize --- saber/funcs/impl/cuda/base/cuda_c/saber_normalize.cu | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/saber/funcs/impl/cuda/base/cuda_c/saber_normalize.cu b/saber/funcs/impl/cuda/base/cuda_c/saber_normalize.cu index 52565092b..a2da2c765 100644 --- a/saber/funcs/impl/cuda/base/cuda_c/saber_normalize.cu +++ b/saber/funcs/impl/cuda/base/cuda_c/saber_normalize.cu @@ -9,7 +9,7 @@ template __global__ void normalize_kernel_no_across_spatial(const int size_in_channel, const int n,const int channels, \ const Dtype* scale, const Dtype* bottom_data, Dtype* top_data, const float eps, const int p){ - CUDA_KERNEL_LOOP(index, size_in_channel){ + CUDA_KERNEL_LOOP(index, size_in_channel * n){ float sqr_sum = 0.f; int num_index=index/size_in_channel; int index_in_channel=index%size_in_channel; @@ -26,17 +26,16 @@ __global__ void normalize_kernel_no_across_spatial(const int size_in_channel, co if (p == 1) { norm = 1.f / (sqr_sum + eps); } else { - norm = 1.f / (sqrtf(sqr_sum) + eps); + norm = 1.f / sqrtf(sqr_sum + eps); } - Dtype has_scale_norm=scale[0]*norm; for (int i = 0; i < channels; ++i) { if (has_scale) { if (shared) { top_data[data_index + i * size_in_channel] = \ - bottom_data[data_index + i * size_in_channel] * scale[0] * has_scale_norm; + bottom_data[data_index + i * size_in_channel] * scale[0] * norm } else { top_data[data_index + i * size_in_channel] = \ - bottom_data[data_index + i * size_in_channel] * scale[i] * has_scale_norm; + bottom_data[data_index + i * size_in_channel] * scale[i] * norm; } } else { top_data[data_index + i * size_in_channel] = \ From 8c3e59430511de85cf186532319b310605724c8c Mon Sep 17 00:00:00 2001 From: MyPandaShaoxiang Date: Wed, 1 Aug 2018 18:09:01 +0800 Subject: [PATCH 2/2] fix normalize --- saber/funcs/impl/cuda/base/cuda_c/saber_normalize.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/saber/funcs/impl/cuda/base/cuda_c/saber_normalize.cu b/saber/funcs/impl/cuda/base/cuda_c/saber_normalize.cu index a2da2c765..dea2492ba 100644 --- a/saber/funcs/impl/cuda/base/cuda_c/saber_normalize.cu +++ b/saber/funcs/impl/cuda/base/cuda_c/saber_normalize.cu @@ -32,10 +32,10 @@ __global__ void normalize_kernel_no_across_spatial(const int size_in_channel, co if (has_scale) { if (shared) { top_data[data_index + i * size_in_channel] = \ - bottom_data[data_index + i * size_in_channel] * scale[0] * norm + bottom_data[data_index + i * size_in_channel] * scale[0] * norm; } else { top_data[data_index + i * size_in_channel] = \ - bottom_data[data_index + i * size_in_channel] * scale[i] * norm; + bottom_data[data_index + i * size_in_channel] * scale[i] * norm; } } else { top_data[data_index + i * size_in_channel] = \