-
Notifications
You must be signed in to change notification settings - Fork 0
/
vector_add_cuda.py
47 lines (37 loc) · 1.09 KB
/
vector_add_cuda.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import numpy as np
import time
from numba import cuda, float32
# Define vector size
N = 250000000
# CPU implementation of vector addition
def cpu_vector_add(a, b):
return a + b
# CUDA kernel for vector addition
@cuda.jit
def gpu_vector_add(a, b, c):
idx = cuda.grid(1)
if idx < a.size:
c[idx] = a[idx] + b[idx]
# Create input vectors
a = np.random.rand(N).astype(np.float32)
b = np.random.rand(N).astype(np.float32)
c = np.zeros(N, dtype=np.float32)
# Time the CPU vector addition
start = time.time()
c_cpu = cpu_vector_add(a, b)
cpu_time = time.time() - start
print(f"CPU time: {cpu_time:.5f} seconds")
# Allocate memory on the GPU
a_gpu = cuda.to_device(a)
b_gpu = cuda.to_device(b)
c_gpu = cuda.device_array(N)
# Time the GPU vector addition
threads_per_block = 1024
blocks_per_grid = (N + (threads_per_block - 1)) // threads_per_block
start = time.time()
gpu_vector_add[blocks_per_grid, threads_per_block](a_gpu, b_gpu, c_gpu)
cuda.synchronize()
gpu_time = time.time() - start
# Copy result back to the host
c_result = c_gpu.copy_to_host()
print(f"GPU time: {gpu_time:.5f} seconds")