Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes for Pytorch 1.6 and CUDA 11. #71

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DCN/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .dcn_v2 import *
File renamed without changes.
30 changes: 27 additions & 3 deletions src/cpu/dcn_v2_cpu.cpp → DCN/src/cpu/dcn_v2_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,13 +221,37 @@ std::vector<at::Tensor> dcn_v2_cpu_backward(const at::Tensor &input,
// gradient w.r.t. bias
// long m_ = channels_out;
// long k__ = height_out * width_out;
THFloatBlas_gemv('t', k_, m_, 1.0f,
// mv
// const CBLAS_LAYOUT Layout ,
// const CBLAS_TRANSPOSE trans ,
// const MKL_INT m , const MKL_INT n ,
// const float alpha , const float *a , const MKL_INT lda , const float *x , const MKL_INT incx , const float beta , float *y , const MKL_INT incy
//
// mm
// const CBLAS_LAYOUT Layout ,
// const CBLAS_TRANSPOSE transa , const CBLAS_TRANSPOSE transb ,
// const MKL_INT m , const MKL_INT n , const MKL_INT k ,
// const float alpha , const float *a , const MKL_INT lda , const float *b , const MKL_INT ldb , const float beta , float *c , const MKL_INT ldc
// void THFloatBlas_gemm(char, char, int64_t, int64_t, int64_t, float, float*, int64_t, float*, int64_t, float, float*, int64_t)
// the following are equivalent:
// ?gemv('T', M, N, a, A, M, X, 1, b, Y, 1)
// ?gemm('N','N', 1, N, M, a, X, 1, A, M, b, Y, 1)
// THFloatBlas_gemv('t', k_, m_, 1.0f,
// grad_output_n.data<scalar_t>(), k_,
// ones.data<scalar_t>(), 1, 1.0f,
// grad_bias.data<scalar_t>(), 1);
// auto a = 1.0f;
// auto A = grad_output_n.data<scalar_t>();
// auto X = ones.data<scalar_t>();
// auto Y = grad_bias.data<scalar_t>();
THFloatBlas_gemm('N', 'N', 1, m_, k_, 1.0f,
ones.data<scalar_t>(), 1,
grad_output_n.data<scalar_t>(), k_,
ones.data<scalar_t>(), 1, 1.0f,
1.0f,
grad_bias.data<scalar_t>(), 1);
}

return {
grad_input, grad_offset, grad_mask, grad_weight, grad_bias
};
}
}
File renamed without changes.
File renamed without changes.
File renamed without changes.
30 changes: 18 additions & 12 deletions src/cuda/dcn_v2_cuda.cu → DCN/src/cuda/dcn_v2_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ dcn_v2_cuda_forward(const at::Tensor &input,
const int block = 128;
const int grid = (batch + block - 1) / block;

createBatchGemmBuffer<<<grid, block, 0, THCState_getCurrentStream(state)>>>(
createBatchGemmBuffer<<<grid, block, 0, c10::cuda::getCurrentCUDAStream()>>>(
input_b, output_b,
columns_b, ones_b,
weight_b, bias_b,
Expand Down Expand Up @@ -136,7 +136,7 @@ dcn_v2_cuda_forward(const at::Tensor &input,
output_b, n_,
batch);

modulated_deformable_im2col_cuda(THCState_getCurrentStream(state),
modulated_deformable_im2col_cuda(c10::cuda::getCurrentCUDAStream(),
input.data<scalar_t>(),
offset.data<scalar_t>(),
mask.data<scalar_t>(),
Expand Down Expand Up @@ -276,7 +276,7 @@ std::vector<at::Tensor> dcn_v2_cuda_backward(const at::Tensor &input,
columns.data<scalar_t>(), n);

// gradient w.r.t. input coordinate data
modulated_deformable_col2im_coord_cuda(THCState_getCurrentStream(state),
modulated_deformable_col2im_coord_cuda(c10::cuda::getCurrentCUDAStream(),
columns.data<scalar_t>(),
input_n.data<scalar_t>(),
offset_n.data<scalar_t>(),
Expand All @@ -288,7 +288,7 @@ std::vector<at::Tensor> dcn_v2_cuda_backward(const at::Tensor &input,
grad_offset_n.data<scalar_t>(),
grad_mask_n.data<scalar_t>());
// gradient w.r.t. input data
modulated_deformable_col2im_cuda(THCState_getCurrentStream(state),
modulated_deformable_col2im_cuda(c10::cuda::getCurrentCUDAStream(),
columns.data<scalar_t>(),
offset_n.data<scalar_t>(),
mask_n.data<scalar_t>(),
Expand All @@ -299,7 +299,7 @@ std::vector<at::Tensor> dcn_v2_cuda_backward(const at::Tensor &input,
grad_input_n.data<scalar_t>());

// gradient w.r.t. weight, dWeight should accumulate across the batch and group
modulated_deformable_im2col_cuda(THCState_getCurrentStream(state),
modulated_deformable_im2col_cuda(c10::cuda::getCurrentCUDAStream(),
input_n.data<scalar_t>(),
offset_n.data<scalar_t>(),
mask_n.data<scalar_t>(),
Expand All @@ -321,15 +321,21 @@ std::vector<at::Tensor> dcn_v2_cuda_backward(const at::Tensor &input,
// gradient w.r.t. bias
// long m_ = channels_out;
// long k__ = height_out * width_out;
THCudaBlas_Sgemv(state,
't',
k_, m_, 1.0f,
grad_output_n.data<scalar_t>(), k_,
ones.data<scalar_t>(), 1, 1.0f,
grad_bias.data<scalar_t>(), 1);
// THCudaBlas_Sgemm(state,
// 't', 'n',
// k_, m_, 1, 1.0f,
// grad_output_n.data<scalar_t>(), k_,
// ones.data<scalar_t>(), 1, 1.0f,
// grad_bias.data<scalar_t>(), 1);
THCudaBlas_Sgemm(state,
'N', 'N', 1, m_, k_, 1.0f,
ones.data<scalar_t>(), 1,
grad_output_n.data<scalar_t>(), k_,
1.0f,
grad_bias.data<scalar_t>(), 1);
}

return {
grad_input, grad_offset, grad_mask, grad_weight, grad_bias
};
}
}
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion testcuda.py → DCN/testcuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def check_pooling_zero_offset():


def check_gradient_dpooling():
input = torch.randn(2, 3, 5, 5).cuda() * 0.01
input = torch.randn(2, 3, 5, 5).cuda().float() * 0.01
N = 4
batch_inds = torch.randint(2, (N, 1)).cuda().float()
x = torch.rand((N, 1)).cuda().float() * 15
Expand Down
Empty file removed __init__.py
Empty file.
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

def get_extensions():
this_dir = os.path.dirname(os.path.abspath(__file__))
extensions_dir = os.path.join(this_dir, "src")
extensions_dir = os.path.join(this_dir, "DCN", "src")

main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
Expand Down Expand Up @@ -68,4 +68,4 @@ def get_extensions():
# install_requires=requirements,
ext_modules=get_extensions(),
cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
)
)