-
Notifications
You must be signed in to change notification settings - Fork 18
Expand file tree
/
Copy pathCheckDivergence.F90
More file actions
108 lines (100 loc) · 3.23 KB
/
Copy pathCheckDivergence.F90
File metadata and controls
108 lines (100 loc) · 3.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! !
! FILE: CheckDivergence.F90 !
! CONTAINS: subroutine CheckDivergence !
! !
! PURPOSE: Check the maximum divergence of velocity !
! in the domain !
! !
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
subroutine CheckDivergence(qmax)
#if defined(USE_CUDA) && defined(USE_HYBRID)
use cudafor
use param, only: fp_kind, nxm,dz,dy, udx3m_h => udx3m, udx3m => udx3m_d
use local_arrays, only: vy_h => vy, vy => vy_d, vx_h => vx, vx => vx_d, vz_h => vz, vz => vz_d
#elif defined (USE_CUDA)
use cudafor
use param, only: fp_kind, nxm,dz,dy, udx3m => udx3m_d
use local_arrays, only: vy => vy_d, vx => vx_d, vz => vz_d
#else
use param, only: fp_kind, nxm,dz,dy, udx3m
use local_arrays, only: vy,vx,vz
#endif
use mpih
#ifdef USE_HYBRID
use decomp_2d, only: xstart => xstart_gpu, xend => xend_gpu, xstart_cpu, xend_cpu, nrank
#else
use decomp_2d, only: xstart,xend, nrank
#endif
use nvtx
implicit none
real(fp_kind),intent(out) :: qmax
integer :: jc,kc,kp,jp,ic,ip, istat
real(fp_kind) :: dqcap
#ifdef USE_HYBRID
real(fp_kind) :: qmax_h
real(fp_kind), device :: qmax_d
qmax_h =-huge(real(0.0,fp_kind))
qmax_d =-huge(real(0.0,fp_kind))
#endif
qmax =-huge(real(0.0,fp_kind))
#ifdef USE_CUDA
!$cuf kernel do(3) <<<*,*>>>
#else
!$OMP PARALLEL DO &
!$OMP DEFAULT(none) &
!$OMP SHARED(xstart,xend,nxm,vz,vy,vx,dz,dy,udx3m) &
!$OMP PRIVATE(ic,jc,kc,ip,jp,kp) &
!$OMP PRIVATE(dqcap) &
!$OMP REDUCTION(max:qmax)
#endif
do ic=xstart(3),xend(3)
ip=ic+1
do jc=xstart(2),xend(2)
jp=jc+1
do kc=1,nxm
kp=kc+1
dqcap= (vz(kc,jc,ip)-vz(kc,jc,ic))*dz &
+(vy(kc,jp,ic)-vy(kc,jc,ic))*dy &
+(vx(kp,jc,ic)-vx(kc,jc,ic))*udx3m(kc)
#ifdef USE_HYBRID
qmax_d = max(abs(dqcap),qmax_d)
#else
qmax = max(abs(dqcap),qmax)
#endif
enddo
enddo
enddo
#ifndef USE_CUDA
!$OMP END PARALLEL DO
#endif
#ifdef USE_HYBRID
call nvtxStartRangeAsync("CPU", 4)
!$OMP PARALLEL DO &
!$OMP DEFAULT(none) &
!$OMP SHARED(xstart_cpu,xend_cpu,nxm,vz_h,vy_h,vx_h,dz,dy,udx3m_h) &
!$OMP PRIVATE(ic,jc,kc,ip,jp,kp) &
!$OMP PRIVATE(dqcap) &
!$OMP REDUCTION(max:qmax_h)
do ic=xstart_cpu(3),xend_cpu(3)
ip=ic+1
do jc=xstart_cpu(2),xend_cpu(2)
jp=jc+1
do kc=1,nxm
kp=kc+1
dqcap= (vz_h(kc,jc,ip)-vz_h(kc,jc,ic))*dz &
+(vy_h(kc,jp,ic)-vy_h(kc,jc,ic))*dy &
+(vx_h(kp,jc,ic)-vx_h(kc,jc,ic))*udx3m_h(kc)
qmax_h = max(abs(dqcap),qmax_h)
enddo
enddo
enddo
!$OMP END PARALLEL DO
call nvtxEndRangeAsync
! Reduce device and host divergence
qmax = qmax_d
qmax = max(qmax, qmax_h)
#endif
call MpiMaxRealScalar(qmax)
return
end subroutine CheckDivergence