-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_gpu_allgather.f90
105 lines (70 loc) · 1.94 KB
/
test_gpu_allgather.f90
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
program test_gpu_allreduce
use MPI
implicit none
integer, parameter :: dp = selected_real_kind(15,300)
integer, parameter :: ll = 20480000
integer, parameter :: nn = 10
integer :: comm
integer :: irank
integer :: nrank
integer :: ierr
integer :: ii
real(dp) :: t1
real(dp) :: t2
integer, allocatable :: buf(:)
integer, allocatable :: buf2(:)
call MPI_Init(ierr)
comm = MPI_COMM_WORLD
call MPI_Comm_rank(comm,irank,ierr)
call MPI_Comm_size(comm,nrank,ierr)
allocate(buf(ll))
allocate(buf2(ll))
buf(:) = 1
if(irank == 0) then
write(6,"(2X,A,I10)") "Number of ranks:",nrank
write(6,"(2X,A,I10)") "Number of bytes:",ll*4
flush(6)
end if
!!!!!!!!!!!!!!!!!!!!! CPU MPI !!!!!!!!!!!!!!!!!!!!!
! warm up
do ii = 1,nn
call MPI_Allgather(buf,ll/nrank,MPI_INTEGER,buf2,ll/nrank,MPI_INTEGER,comm,ierr)
end do
! time
t1 = MPI_Wtime()
do ii = 1,nn
call MPI_Allgather(buf,ll/nrank,MPI_INTEGER,buf2,ll/nrank,MPI_INTEGER,comm,ierr)
end do
t2 = MPI_Wtime()
if(irank == 0) then
write(6,"(2X,A,F10.3,A)") "CPU MPI:",(t2-t1)/nn,"s"
flush(6)
end if
!!!!!!!!!!!!!!!!!!!!! GPU MPI !!!!!!!!!!!!!!!!!!!!!
!! !$acc data create(buf)
!! !$acc kernels present(buf)
buf(:) = 1
!! !$acc end kernels
!$acc data copy(buf,buf2)
! warm up
!$acc host_data use_device(buf,buf2)
do ii = 1,nn
call MPI_Allgather(buf,ll/nrank,MPI_INTEGER,buf2,ll/nrank,MPI_INTEGER,comm,ierr)
end do
!$acc end host_data
! time
t1 = MPI_Wtime()
!$acc host_data use_device(buf,buf2)
do ii = 1,nn
call MPI_Allgather(buf,ll/nrank,MPI_INTEGER,buf2,ll/nrank,MPI_INTEGER,comm,ierr)
end do
!$acc end host_data
t2 = MPI_Wtime()
if(irank == 0) then
write(6,"(2X,A,F10.3,A)") "GPU MPI:",(t2-t1)/nn,"s"
flush(6)
end if
!$acc end data
deallocate(buf)
call MPI_Finalize(ierr)
end program