trying to remove kernels.py

alex-rakowski · Nov 21, 2023 · 2f49043 · 2f49043
1 parent 7b4874b
commit 2f49043
Showing 1 changed file with 98 additions and 97 deletions.
diff --git a/py4DSTEM/braggvectors/kernels.py b/py4DSTEM/braggvectors/kernels.py
@@ -3,102 +3,103 @@
 if IN_COLAB is False:
     try:
         import cupy as cp
-    except Exception:
-        pass
-
-
-__all__ = ["kernels"]
-
-kernels = {}
-
-############################# multicorr kernels #################################
-
-import os
-
-with open(os.path.join(os.path.dirname(__file__), "multicorr_row_kernel.cu"), "r") as f:
-    kernels["multicorr_row_kernel"] = cp.RawKernel(f.read(), "multicorr_row_kernel")
-
-with open(os.path.join(os.path.dirname(__file__), "multicorr_col_kernel.cu"), "r") as f:
-    kernels["multicorr_col_kernel"] = cp.RawKernel(f.read(), "multicorr_col_kernel")
-
-
-############################# get_maximal_points ################################
-
-"""
-These kernels are approximately 50x faster than the np.roll approach used in the CPU version,
-per my testing with 1024x1024 pixels and float64 on a Jetson Xavier NX.
-The boundary conditions are slightly different in this version, in that pixels on the edge
-of the frame are always false. This simplifies the indexing, and since in the Braggdisk
-detection application an edgeBoundary is always applied in the case of subpixel detection,
-this is not considered a problem.
-"""
-
-maximal_pts_float32 = r"""
-extern "C" __global__
-void maximal_pts(const float *ar, bool *out, const double minAbsoluteIntensity, const long long sizex, const long long sizey, const long long N){
-    int tid = blockDim.x * blockIdx.x + threadIdx.x;
-    int x = tid / sizey;
-    int y = tid % sizey;
-    bool res = false;
-    if (tid < N && x>0 && x<(sizex-1) && y>0 && y<(sizey-1)) {
-        float val = ar[tid];
-
-        out[tid] = ( val > ar[tid + sizey]) &&
-                    (val > ar[tid - sizey]) &&
-                    (val > ar[tid + 1]) &&
-                    (val > ar[tid - 1]) &&
-                    (val > ar[tid - sizey - 1]) &&
-                    (val > ar[tid - sizey + 1]) &&
-                    (val > ar[tid + sizey - 1]) &&
-                    (val > ar[tid+sizey + 1] &&
-                    (val >= minAbsoluteIntensity));
-    }
-}
-"""
-
-kernels["maximal_pts_float32"] = cp.RawKernel(maximal_pts_float32, "maximal_pts")
-
-maximal_pts_float64 = r"""
-extern "C" __global__
-void maximal_pts(const double *ar, bool *out, const double minAbsoluteIntensity, const long long sizex, const long long sizey, const long long N){
-    int tid = blockDim.x * blockIdx.x + threadIdx.x;
-    int x = tid / sizey;
-    int y = tid % sizey;
-    bool res = false;
-    if (tid < N && x>0 && x<(sizex-1) && y>0 && y<(sizey-1)) {
-        double val = ar[tid];
-
-        out[tid] = ( val > ar[tid + sizey]) &&
-                    (val > ar[tid - sizey]) &&
-                    (val > ar[tid + 1]) &&
-                    (val > ar[tid - 1]) &&
-                    (val > ar[tid - sizey - 1]) &&
-                    (val > ar[tid - sizey + 1]) &&
-                    (val > ar[tid + sizey - 1]) &&
-                    (val > ar[tid+sizey + 1] &&
-                    (val >= minAbsoluteIntensity));
-    }
-}
-"""
-
-kernels["maximal_pts_float64"] = cp.RawKernel(maximal_pts_float64, "maximal_pts")
-
-
-################################ edge_boundary ######################################
-
-edge_boundary = r"""
-extern "C" __global__
-void edge_boundary(bool *ar, const long long edgeBoundary,
-                const long long sizex, const long long sizey, const long long N){
-    int tid = blockDim.x * blockIdx.x + threadIdx.x;
-    int x = tid % sizex;
-    int y = tid / sizey; // Floor divide
-    if (tid < N) {
-        if (x<edgeBoundary || x>(sizex-1-edgeBoundary) || y<edgeBoundary || y>(sizey-1-edgeBoundary)){
-            ar[tid] = false;
+
+
+
+        __all__ = ["kernels"]
+
+        kernels = {}
+
+        ############################# multicorr kernels #################################
+
+        import os
+
+        with open(os.path.join(os.path.dirname(__file__), "multicorr_row_kernel.cu"), "r") as f:
+            kernels["multicorr_row_kernel"] = cp.RawKernel(f.read(), "multicorr_row_kernel")
+
+        with open(os.path.join(os.path.dirname(__file__), "multicorr_col_kernel.cu"), "r") as f:
+            kernels["multicorr_col_kernel"] = cp.RawKernel(f.read(), "multicorr_col_kernel")
+
+
+        ############################# get_maximal_points ################################
+
+        """
+        These kernels are approximately 50x faster than the np.roll approach used in the CPU version,
+        per my testing with 1024x1024 pixels and float64 on a Jetson Xavier NX.
+        The boundary conditions are slightly different in this version, in that pixels on the edge
+        of the frame are always false. This simplifies the indexing, and since in the Braggdisk
+        detection application an edgeBoundary is always applied in the case of subpixel detection,
+        this is not considered a problem.
+        """
+
+        maximal_pts_float32 = r"""
+        extern "C" __global__
+        void maximal_pts(const float *ar, bool *out, const double minAbsoluteIntensity, const long long sizex, const long long sizey, const long long N){
+            int tid = blockDim.x * blockIdx.x + threadIdx.x;
+            int x = tid / sizey;
+            int y = tid % sizey;
+            bool res = false;
+            if (tid < N && x>0 && x<(sizex-1) && y>0 && y<(sizey-1)) {
+                float val = ar[tid];
+
+                out[tid] = ( val > ar[tid + sizey]) &&
+                            (val > ar[tid - sizey]) &&
+                            (val > ar[tid + 1]) &&
+                            (val > ar[tid - 1]) &&
+                            (val > ar[tid - sizey - 1]) &&
+                            (val > ar[tid - sizey + 1]) &&
+                            (val > ar[tid + sizey - 1]) &&
+                            (val > ar[tid+sizey + 1] &&
+                            (val >= minAbsoluteIntensity));
+            }
+        }
+        """
+
+        kernels["maximal_pts_float32"] = cp.RawKernel(maximal_pts_float32, "maximal_pts")
+
+        maximal_pts_float64 = r"""
+        extern "C" __global__
+        void maximal_pts(const double *ar, bool *out, const double minAbsoluteIntensity, const long long sizex, const long long sizey, const long long N){
+            int tid = blockDim.x * blockIdx.x + threadIdx.x;
+            int x = tid / sizey;
+            int y = tid % sizey;
+            bool res = false;
+            if (tid < N && x>0 && x<(sizex-1) && y>0 && y<(sizey-1)) {
+                double val = ar[tid];
+
+                out[tid] = ( val > ar[tid + sizey]) &&
+                            (val > ar[tid - sizey]) &&
+                            (val > ar[tid + 1]) &&
+                            (val > ar[tid - 1]) &&
+                            (val > ar[tid - sizey - 1]) &&
+                            (val > ar[tid - sizey + 1]) &&
+                            (val > ar[tid + sizey - 1]) &&
+                            (val > ar[tid+sizey + 1] &&
+                            (val >= minAbsoluteIntensity));
+            }
+        }
+        """
+
+        kernels["maximal_pts_float64"] = cp.RawKernel(maximal_pts_float64, "maximal_pts")
+
+
+        ################################ edge_boundary ######################################
+
+        edge_boundary = r"""
+        extern "C" __global__
+        void edge_boundary(bool *ar, const long long edgeBoundary,
+                        const long long sizex, const long long sizey, const long long N){
+            int tid = blockDim.x * blockIdx.x + threadIdx.x;
+            int x = tid % sizex;
+            int y = tid / sizey; // Floor divide
+            if (tid < N) {
+                if (x<edgeBoundary || x>(sizex-1-edgeBoundary) || y<edgeBoundary || y>(sizey-1-edgeBoundary)){
+                    ar[tid] = false;
+                }
+            }
         }
-    }
-}
-"""
+        """
 
-kernels["edge_boundary"] = cp.RawKernel(edge_boundary, "edge_boundary")
+        kernels["edge_boundary"] = cp.RawKernel(edge_boundary, "edge_boundary")
+    except Exception:
+        pass