ENH: Remove CUDA code for compute capability 1

RTK does not support compute capability 1 since 1e75fbc.
RTKConsortium · Apr 21, 2024 · 4dfa489 · 4dfa489
1 parent 902db67
commit 4dfa489
Show file tree

Hide file tree

Showing 4 changed files with 47 additions and 347 deletions.
diff --git a/src/rtkCudaBackProjectionImageFilter.cu b/src/rtkCudaBackProjectionImageFilter.cu
@@ -51,7 +51,7 @@ __constant__ int3 c_volSize;
 //_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_( S T A R T )_
 //_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_
 
-template <unsigned int vectorLength, bool isCylindrical>
+template <unsigned int VVectorLength, bool VIsCylindrical>
 __global__ void
 kernel_backProject(float * dev_vol_in, float * dev_vol_out, float radius, cudaTextureObject_t * dev_tex_proj)
 {
@@ -68,13 +68,13 @@ kernel_backProject(float * dev_vol_in, float * dev_vol_out, float radius, cudaTe
   itk::SizeValueType vol_idx = i + (j + k * c_volSize.y) * (c_volSize.x);
 
   float3 ip, pp;
-  float  voxel_data[vectorLength];
-  for (unsigned int c = 0; c < vectorLength; c++)
+  float  voxel_data[VVectorLength];
+  for (unsigned int c = 0; c < VVectorLength; c++)
     voxel_data[c] = 0.0f;
 
   for (unsigned int proj = 0; proj < c_projSize.z; proj++)
   {
-    if (isCylindrical)
+    if (VIsCylindrical)
     {
       // matrix multiply
       pp = matrix_multiply(make_float3(i, j, k), &(c_volIndexToProjPP[12 * proj]));
@@ -105,13 +105,13 @@ kernel_backProject(float * dev_vol_in, float * dev_vol_out, float radius, cudaTe
     }
 
     // Get texture point, clip left to GPU, and accumulate in voxel_data
-    for (unsigned int c = 0; c < vectorLength; c++)
+    for (unsigned int c = 0; c < VVectorLength; c++)
       voxel_data[c] += tex2DLayered<float>(dev_tex_proj[c], ip.x, ip.y, proj);
   }
 
   // Place it into the volume
-  for (unsigned int c = 0; c < vectorLength; c++)
-    dev_vol_out[vol_idx * vectorLength + c] = dev_vol_in[vol_idx * vectorLength + c] + voxel_data[c];
+  for (unsigned int c = 0; c < VVectorLength; c++)
+    dev_vol_out[vol_idx * VVectorLength + c] = dev_vol_in[vol_idx * VVectorLength + c] + voxel_data[c];
 }
 
 
@@ -134,14 +134,6 @@ CUDA_back_project(int          projSize[3],
                   double       radiusCylindricalDetector,
                   unsigned int vectorLength)
 {
-  // 2D layered texture requires CudaComputeCapability >= 2.0
-  int device;
-  cudaGetDevice(&device);
-  if (GetCudaComputeCapability(device).first <= 1)
-  {
-    itkGenericExceptionMacro(<< "RTK no longer supports GPUs with CudaComputeCapability < 2.0");
-  }
-
   // Copy the size of inputs into constant memory
   cudaMemcpyToSymbol(c_projSize, projSize, sizeof(int3));
   cudaMemcpyToSymbol(c_volSize, volSize, sizeof(int3));

diff --git a/src/rtkCudaFDKBackProjectionImageFilter.cu b/src/rtkCudaFDKBackProjectionImageFilter.cu
@@ -40,8 +40,7 @@
 #include <cuda.h>
 
 // T E X T U R E S ////////////////////////////////////////////////////////
-texture<float, cudaTextureType2DLayered>   tex_proj;
-texture<float, 3, cudaReadModeElementType> tex_proj_3D;
+texture<float, cudaTextureType2DLayered> tex_proj;
 
 // Constant memory
 __constant__ float c_matrices[SLAB_SIZE * 12]; // Can process stacks of at most SLAB_SIZE projections
@@ -53,48 +52,6 @@ __constant__ int3 c_vol_size;
 //_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_( S T A R T )_
 //_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_
 
-__global__ void
-kernel_fdk(float * dev_vol_in, float * dev_vol_out, unsigned int Blocks_Y)
-{
-  // CUDA 2.0 does not allow for a 3D grid, which severely
-  // limits the manipulation of large 3D arrays of data.  The
-  // following code is a hack to bypass this implementation
-  // limitation.
-  unsigned int       blockIdx_z = blockIdx.y / Blocks_Y;
-  unsigned int       blockIdx_y = blockIdx.y - __umul24(blockIdx_z, Blocks_Y);
-  itk::SizeValueType i = __umul24(blockIdx.x, blockDim.x) + threadIdx.x;
-  itk::SizeValueType j = __umul24(blockIdx_y, blockDim.y) + threadIdx.y;
-  itk::SizeValueType k = __umul24(blockIdx_z, blockDim.z) + threadIdx.z;
-
-  if (i >= c_vol_size.x || j >= c_vol_size.y || k >= c_vol_size.z)
-  {
-    return;
-  }
-
-  // Index row major into the volume
-  itk::SizeValueType vol_idx = i + (j + k * c_vol_size.y) * (c_vol_size.x);
-
-  float3 ip;
-  float  voxel_data = 0;
-
-  for (unsigned int proj = 0; proj < c_projSize.z; proj++)
-  {
-    // matrix multiply
-    ip = matrix_multiply(make_float3(i, j, k), &(c_matrices[12 * proj]));
-
-    // Change coordinate systems
-    ip.z = 1 / ip.z;
-    ip.x = ip.x * ip.z;
-    ip.y = ip.y * ip.z;
-
-    // Get texture point, clip left to GPU
-    voxel_data += tex3D(tex_proj_3D, ip.x, ip.y, proj + 0.5) * ip.z * ip.z;
-  }
-
-  // Place it into the volume
-  dev_vol_out[vol_idx] = dev_vol_in[vol_idx] + voxel_data;
-}
-
 __global__ void
 kernel_fdk_3Dgrid(float * dev_vol_in, float * dev_vol_out)
 {
@@ -146,9 +103,6 @@ CUDA_reconstruct_conebeam(int     proj_size[3],
                           float * dev_vol_out,
                           float * dev_proj)
 {
-  int device;
-  cudaGetDevice(&device);
-
   // Copy the size of inputs into constant memory
   cudaMemcpyToSymbol(c_projSize, proj_size, sizeof(int3));
   cudaMemcpyToSymbol(c_vol_size, vol_size, sizeof(int3));
@@ -163,25 +117,15 @@ CUDA_reconstruct_conebeam(int     proj_size[3],
   tex_proj.filterMode = cudaFilterModeLinear;
   tex_proj.normalized = false; // don't access with normalized texture coords
 
-  tex_proj_3D.addressMode[0] = cudaAddressModeBorder;
-  tex_proj_3D.addressMode[1] = cudaAddressModeBorder;
-  tex_proj_3D.addressMode[2] = cudaAddressModeBorder;
-  tex_proj_3D.filterMode = cudaFilterModeLinear;
-  tex_proj_3D.normalized = false; // don't access with normalized texture coords
-
   // Copy projection data to array, bind the array to the texture
   cudaExtent                   projExtent = make_cudaExtent(proj_size[0], proj_size[1], proj_size[2]);
   cudaArray *                  array_proj;
   static cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
   CUDA_CHECK_ERROR;
 
   // Allocate array for input projections, in order to bind them to
-  // either a 2D layered texture (requires GetCudaComputeCapability >= 2.0) or
-  // a 3D texture
-  if (GetCudaComputeCapability(device).first <= 1)
-    cudaMalloc3DArray((cudaArray **)&array_proj, &channelDesc, projExtent);
-  else
-    cudaMalloc3DArray((cudaArray **)&array_proj, &channelDesc, projExtent, cudaArrayLayered);
+  // a 2D layered texture
+  cudaMalloc3DArray((cudaArray **)&array_proj, &channelDesc, projExtent, cudaArrayLayered);
   CUDA_CHECK_ERROR;
 
   // Copy data to 3D array
@@ -205,39 +149,21 @@ CUDA_reconstruct_conebeam(int     proj_size[3],
 
   // Run kernels. Note: Projection data is passed via texture memory,
   // transform matrix is passed via constant memory
-  if (GetCudaComputeCapability(device).first <= 1)
-  {
-    // Compute block and grid sizes
-    dim3 dimGrid = dim3(blocksInX, blocksInY * blocksInZ);
-    dim3 dimBlock = dim3(tBlock_x, tBlock_y, tBlock_z);
-
-    // Bind the array of projections to a 3D texture
-    cudaBindTextureToArray(tex_proj_3D, (cudaArray *)array_proj, channelDesc);
-    CUDA_CHECK_ERROR;
 
-    kernel_fdk<<<dimGrid, dimBlock>>>(dev_vol_in, dev_vol_out, blocksInY);
-
-    // Unbind the image and projection matrix textures
-    cudaUnbindTexture(tex_proj_3D);
-    CUDA_CHECK_ERROR;
-  }
-  else
-  {
-    // Compute block and grid sizes
-    dim3 dimGrid = dim3(blocksInX, blocksInY, blocksInZ);
-    dim3 dimBlock = dim3(tBlock_x, tBlock_y, tBlock_z);
-    CUDA_CHECK_ERROR;
+  // Compute block and grid sizes
+  dim3 dimGrid = dim3(blocksInX, blocksInY, blocksInZ);
+  dim3 dimBlock = dim3(tBlock_x, tBlock_y, tBlock_z);
+  CUDA_CHECK_ERROR;
 
-    // Bind the array of projections to a 2D layered texture
-    cudaBindTextureToArray(tex_proj, (cudaArray *)array_proj, channelDesc);
-    CUDA_CHECK_ERROR;
+  // Bind the array of projections to a 2D layered texture
+  cudaBindTextureToArray(tex_proj, (cudaArray *)array_proj, channelDesc);
+  CUDA_CHECK_ERROR;
 
-    kernel_fdk_3Dgrid<<<dimGrid, dimBlock>>>(dev_vol_in, dev_vol_out);
+  kernel_fdk_3Dgrid<<<dimGrid, dimBlock>>>(dev_vol_in, dev_vol_out);
 
-    // Unbind the image and projection matrix textures
-    cudaUnbindTexture(tex_proj);
-    CUDA_CHECK_ERROR;
-  }
+  // Unbind the image and projection matrix textures
+  cudaUnbindTexture(tex_proj);
+  CUDA_CHECK_ERROR;
 
   // Cleanup
   cudaFreeArray((cudaArray *)array_proj);