▶ 书上第十三章,用一系列步骤优化一个云水参数化方案。用于熟悉 Fortran 以及 OpenACC 在旗下的表现
● 代码,文件较多,放在一起了
1 ! main.f90 2 PROGRAM main 3 USE m_config, ONLY: nstop 4 USE m_physics, ONLY: physics 5 USE m_io, ONLY: write_output 6 USE m_setup, ONLY: initialize, cleanup 7 USE m_timing, ONLY: start_timer, end_timer, print_timers 8 9 IMPLICIT NONE 10 11 INTEGER :: ntstep 12 INTEGER, parameter :: itimloop = 5 13 14 CALL initialize() ! 初始化计时器和设备 15 16 WRITE(*,"(A)") "Start of time loop" 17 CALL start_timer(itimloop, "Time loop") 18 19 DO ntstep = 1, nstop ! 计算 20 CALL physics() 21 CALL write_output( ntstep ) 22 END DO 23 24 CALL end_timer( itimloop ) 25 WRITE(*,"(A)") "End of time loop" 26 27 CALL print_timers() 28 CALL cleanup() 29 30 END PROGRAM main 31 32 ! m_config.f90,运行参数 33 MODULE m_config 34 INTEGER, parameter :: nx = 128 ! 经度网格数 35 INTEGER, parameter :: ny = 128 ! 纬度网格数 36 INTEGER, parameter :: nz = 60 ! 海拔网格数 37 INTEGER, parameter :: nstop = 100 ! 时间步数 38 INTEGER, parameter :: nout = 20 ! 输出间隔 39 40 END MODULE m_config 41 42 ! m_fields.f90,场参数 43 MODULE m_fields 44 REAL*8, ALLOCATABLE :: qv(:,:,:) ! 水蒸汽含量 45 REAL*8, ALLOCATABLE :: t(:,:,:) ! 温度 46 47 END MODULE m_fields 48 49 ! m_io.f90,输入输出函数 50 MODULE m_io 51 USE m_config, ONLY: nout, nx, ny, nz 52 USE m_fields, ONLY: qv 53 54 IMPLICIT NONE 55 56 CONTAINS 57 SUBROUTINE write_output(ntstep) 58 IMPLICIT NONE 59 60 INTEGER, INTENT(IN) :: ntstep ! 当前时间片 61 INTEGER :: i, j, k 62 REAL*8 :: qv_mean ! 水蒸汽含量平均值(标量) 63 64 IF (MOD(ntstep, nout) /= 0) RETURN ! 当前时间片不作输出 65 66 qv_mean = 0.0D0 ! 计算均值并输出 67 DO k = 1, nz 68 DO j = 1, ny 69 DO i = 1, nx 70 qv_mean = qv_mean + qv(i,j,k) 71 END DO 72 END DO 73 END DO 74 qv_mean = qv_mean / REAL(nx * ny * nz, KIND(qv_mean)) 75 76 WRITE(*,"(A,I6,A,ES18.8)") "Step: ", ntstep, ", mean(qv) =", qv_mean 77 END SUBROUTINE write_output 78 79 END MODULE m_io 80 81 ! m_parametrizations.f90,参数化方案 82 MODULE m_parametrizations 83 IMPLICIT NONE 84 85 REAL*8, parameter :: cs1 = 1.0D-6, cs2 = 0.02D0, cs3 = 7.2D0, cs4=0.1D0, t0=273.0D0 86 REAL*8, parameter :: cm1 = 1.0D-6, cm2=25.0D0, cm3=0.2D0, cm4=100.0D0 87 88 CONTAINS 89 SUBROUTINE saturation_adjustment(npx, npy, nlev, t, qc, qv) ! 参数化方案一 90 IMPLICIT NONE 91 92 INTEGER, INTENT(IN) :: npx, npy, nlev ! 输入维度 93 REAL*8, INTENT(IN) :: t(:,:,:) ! 温度 94 REAL*8, INTENT(OUT) :: qc(:,:,:) ! 云水含量 95 REAL*8, INTENT(INOUT) :: qv(:,:,:) ! 水蒸汽含量 96 INTEGER :: i, j, k 97 98 DO k = 1, nlev 99 DO j = 1, npy 100 DO i = 1, npx 101 qv(i,j,k) = qv(i,j,k) + cs1*EXP(cs2*( t(i,j,k) - t0 )/( t(i,j,k) - cs3) ) 102 qc(i,j,k) = cs4 * qv(i,j,k) 103 END DO 104 END DO 105 END DO 106 END SUBROUTINE saturation_adjustment 107 108 SUBROUTINE microphysics(npx, npy, nlev, t, qc, qv) ! 参数化方案二 109 IMPLICIT NONE 110 111 INTEGER, INTENT(IN) :: npx, npy, nlev 112 REAL*8, INTENT(INOUT) :: t(:,:,:) 113 REAL*8, INTENT(IN) :: qc(:,:,:) 114 REAL*8, INTENT(INOUT) :: qv(:,:,:) 115 INTEGER :: i, j, k 116 117 DO k = 2, nlev 118 DO j = 1, npy 119 DO i = 1, npx 120 qv(i, j, k) = qv(i,j,k-1) + cm1*(t(i,j,k)-cm2)**cm3 121 t(i, j, k) = t(i, j, k)*( 1.0D0 - cm4*qc(i,j,k)+qv(i,j,k) ) 122 END DO 123 END DO 124 END DO 125 END SUBROUTINE microphysics 126 127 END MODULE m_parametrizations 128 129 ! m_physics.f90,参数化方案的执行 130 MODULE m_physics 131 USE m_config, ONLY: nx, ny, nz 132 USE m_fields, ONLY: qv, t 133 USE m_parametrizations, ONLY: saturation_adjustment, microphysics 134 135 IMPLICIT NONE 136 137 CONTAINS 138 SUBROUTINE physics() 139 IMPLICIT NONE 140 REAL*8 :: qc(nx,ny,nz) ! 云水含量临时变量 141 CALL saturation_adjustment(nx, ny, nz, t, qc, qv) ! 第一物理参数化 142 CALL microphysics(nx, ny, nz, t, qc, qv) ! 第二物理参数化 143 END SUBROUTINE physics 144 145 END MODULE m_physics 146 147 ! m_timming.f90,计时器 148 MODULE m_timing 149 IMPLICIT NONE 150 151 INTEGER, PARAMETER :: ntimer=10 ! 计时器数量 152 REAL*8 :: rtimer(ntimer) ! 计时器 153 CHARACTER(32) :: timertag(ntimer) ! 计时器标签 154 INTEGER :: icountold(ntimer), & ! tick (start of timer section) 155 icountrate, & ! countrate of SYSTEM_CLOCK() 156 icountmax ! maximum counter value of SYSTEM_CLOCK() 157 158 CONTAINS 159 SUBROUTINE init_timers() ! 初始化计时器 160 IMPLICIT NONE 161 162 rtimer(:) = 0.0D0 163 timertag(:) = "" 164 icountold(:) = 0 165 166 CALL SYSTEM_CLOCK( COUNT_RATE=icountrate, COUNT_MAX=icountmax ) 167 END SUBROUTINE init_timers 168 169 SUBROUTINE start_timer(id, tag) ! 开始计时 170 IMPLICIT NONE 171 172 INTEGER, INTENT(IN) :: id 173 CHARACTER(*), INTENT(IN) :: tag 174 175 IF (id < 1 .OR. id > ntimer) THEN ! 检查计时器编号范围 176 WRITE(*,"(A,I4,A,I4)") "Error: timer id=", id, "exceeds maximum timer number", ntimer 177 STOP 178 END IF 179 180 181 IF (LEN_TRIM(timertag(id)) /= 0) THEN ! 检查计时器是否已经开始运行 182 WRITE(*,"(A,I4)") "Error: timer already started previously, id:", id 183 STOP 184 END IF 185 186 IF (LEN_TRIM(tag) == 0) THEN ! 检查计时器标签是否非空 187 WRITE(*,"(A,I4)") "Error: empty tag provided, id:", id 188 STOP 189 END IF 190 191 timertag(id) = TRIM(tag) ! 保存标签 192 !$acc wait 193 194 CALL SYSTEM_CLOCK( COUNT=icountold(id) ) ! 开始计时 195 END SUBROUTINE start_timer 196 197 SUBROUTINE end_timer(id) ! 结束计时 198 IMPLICIT NONE 199 200 INTEGER, INTENT(IN) :: id 201 INTEGER :: icountnew 202 203 IF (id < 1 .OR. id > ntimer) THEN ! 检查计时器编号范围 204 WRITE(*,"(A,I4,A,I4)") "Error: timer id=", id, "exceed max timer number", ntimer 205 STOP 206 END IF 207 208 IF (LEN_TRIM(timertag(id)) == 0) THEN ! 检查计时器是否已经开始运行 209 WRITE(*,"(A,I4)") "Error: Need to call start_timer before end_timing, id:", id 210 STOP 211 END IF 212 !$acc wait 213 214 CALL SYSTEM_CLOCK( COUNT=icountnew ) ! 获取当前时间,计算耗时 215 rtimer(id) = ( REAL(icountnew - icountold(id), KIND(rtimer(id))) ) / REAL(icountrate, KIND(rtimer(id))) 216 END SUBROUTINE end_timer 217 218 SUBROUTINE print_timers() ! 打印计时 219 IMPLICIT NONE 220 221 INTEGER :: id 222 223 WRITE(*,"(A)") "----------------------------" 224 WRITE(*,"(A)") "Timers:" 225 WRITE(*,"(A)") "----------------------------" 226 DO id = 1, ntimer 227 IF ( rtimer(id) > 0.0D0 ) THEN 228 WRITE(*,"(A15,A2,F8.2,A)") timertag(id), ": ", rtimer(id)*1.0D3, " ms" 229 END IF 230 END DO 231 WRITE(*,"(A)") "----------------------------" 232 END SUBROUTINE print_timers 233 234 END MODULE m_timing 235 236 ! m_setup.f90,初始化和清理 237 MODULE m_setup 238 USE m_config, ONLY: nstop, nout, nx, ny, nz 239 USE m_fields, ONLY: t,qv 240 USE m_timing, ONLY: init_timers, start_timer, end_timer 241 242 IMPLICIT NONE 243 244 CONTAINS 245 SUBROUTINE initialize() ! 初始化计时器和设备 246 IMPLICIT NONE 247 248 INTEGER, PARAMETER :: itiminit = 1 ! 计时器编号 249 INTEGER :: i, j, k 250 251 #ifdef _OPENACC 252 WRITE(*,"(A)") "Running with OpenACC" 253 #else 254 WRITE(*,"(A)") "Running without OpenACC" 255 #endif 256 257 WRITE(*,"(A)") "Initialize" 258 259 CALL init_timers() 260 CALL start_timer( itiminit, "Initialization" ) 261 ALLOCATE( t(nx,ny,nz), qv(nx,ny,nz) ) 262 263 DO k =1, nz 264 DO j = 1, ny 265 DO i = 1, nx 266 t(i,j,k) = 293.0D0 * (1.2D0 + 0.07D0 * COS(6.2D0 * REAL(i+j+k) / REAL(nx+ny+nz))) 267 qv(i,j,k) = 1.0D-6 * (1.1D0 + 0.13D0 * COS(5.3D0 * REAL(i+j+k) / REAL(nx*ny*nz))) 268 END DO 269 END DO 270 END DO 271 272 #ifdef _OPENACC 273 CALL initialize_gpu() 274 #endif 275 276 CALL end_timer( itiminit ) 277 END SUBROUTINE initialize 278 279 SUBROUTINE initialize_gpu()! 让 GPU 跑一个小内核来初始化 280 IMPLICIT NONE 281 282 INTEGER :: temp(16) 283 INTEGER :: i 284 285 !$acc parallel loop 286 DO i = 1, 16 287 temp(i) = 1 288 END DO 289 290 IF (SUM(temp) == 16) THEN 291 WRITE(*,"(A)") "GPU initialized" 292 ELSE 293 WRITE(*,"(A,I4)") "Error: Problem encountered initializing the GPU" 294 STOP 295 END IF 296 END SUBROUTINE initialize_gpu 297 298 SUBROUTINE cleanup()! 清扫 t 和 qv 的内存 299 IMPLICIT NONE 300 301 DEALLOCATE( t, qv ) 302 END SUBROUTINE cleanup 303 304 END MODULE m_setup
● OpenMP 优化,改了 m_io.f90,m_parametrizations.f90,m_setup.f90
1 ! m_io.f90 2 MODULE m_io 3 USE m_config, ONLY: nout, nx, ny, nz 4 USE m_fields, ONLY: qv 5 6 IMPLICIT NONE 7 8 CONTAINS 9 SUBROUTINE write_output(ntstep) 10 IMPLICIT NONE 11 12 INTEGER, INTENT(IN) :: ntstep 13 INTEGER :: i, j, k 14 REAL*8 :: qv_mean 15 16 IF (MOD(ntstep, nout) /= 0) RETURN 17 18 qv_mean = 0.0D0 19 DO k = 1, nz 20 !$OMP PARALLEL DO PRIVATE(i,j) SHARED(k,qv) REDUCTION(+:qv_mean) 21 DO j = 1, ny 22 DO i = 1, nx 23 qv_mean = qv_mean + qv(i,j,k) 24 END DO 25 END DO 26 END DO 27 qv_mean = qv_mean / REAL(nx * ny * nz, KIND(qv_mean)) 28 29 WRITE(*,"(A,I6,A,ES18.8)") "Step: ", ntstep, ", mean(qv) =", qv_mean 30 END SUBROUTINE write_output 31 32 END MODULE m_io 33 34 ! m_parametrizations.f90 35 MODULE m_parametrizations 36 IMPLICIT NONE 37 38 REAL*8, parameter :: cs1 = 1.0D-6, cs2 = 0.02D0, cs3 = 7.2D0, cs4=0.1D0, t0=273.0D0 39 REAL*8, parameter :: cm1 = 1.0D-6, cm2=25.0D0, cm3=0.2D0, cm4=100.0D0 40 41 CONTAINS 42 SUBROUTINE saturation_adjustment(npx, npy, nlev, t, qc, qv) 43 IMPLICIT NONE 44 45 INTEGER, INTENT(IN) :: npx, npy, nlev 46 REAL*8, INTENT(IN) :: t(:,:,:) 47 REAL*8, INTENT(OUT) :: qc(:,:,:) 48 REAL*8, INTENT(INOUT) :: qv(:,:,:) 49 INTEGER :: i, j, k 50 51 !$OMP PARALLEL 52 DO k = 1, nlev 53 !$OMP DO PRIVATE(i,j) 54 DO j = 1, npy 55 DO i = 1, npx 56 qv(i,j,k) = qv(i,j,k) + cs1*EXP(cs2*( t(i,j,k) - t0 )/( t(i,j,k) - cs3) ) 57 qc(i,j,k) = cs4 * qv(i,j,k) 58 END DO 59 END DO 60 END DO 61 !$OMP END PARALLEL 62 END SUBROUTINE saturation_adjustment 63 64 SUBROUTINE microphysics(npx, npy, nlev, t, qc, qv) 65 IMPLICIT NONE 66 67 INTEGER, INTENT(IN) :: npx, npy, nlev 68 REAL*8, INTENT(INOUT) :: t(:,:,:) 69 REAL*8, INTENT(IN) :: qc(:,:,:) 70 REAL*8, INTENT(INOUT) :: qv(:,:,:) 71 INTEGER :: i, j, k 72 73 !$OMP PARALLEL 74 DO k = 2, nlev 75 !$OMP DO PRIVATE(i,j) 76 DO j = 1, npy 77 DO i = 1, npx 78 qv(i, j, k) = qv(i,j,k-1) + cm1*(t(i,j,k)-cm2)**cm3 79 t(i, j, k) = t(i, j, k)*( 1.0D0 - cm4*qc(i,j,k)+qv(i,j,k) ) 80 END DO 81 END DO 82 END DO 83 !$OMP END PARALLEL 84 END SUBROUTINE microphysics 85 86 END MODULE m_parametrizations 87 88 ! m_setup.f90 89 MODULE m_setup 90 USE m_config, ONLY: nstop, nout, nx, ny, nz 91 USE m_fields, ONLY: t,qv 92 USE m_timing, ONLY: init_timers, start_timer, end_timer 93 94 IMPLICIT NONE 95 96 CONTAINS 97 SUBROUTINE initialize() ! 初始化计时器和设备 98 IMPLICIT NONE 99 100 INTEGER, PARAMETER :: itiminit = 1 ! timer ID 101 INTEGER :: i, j, k ! loop indices 102 INTEGER :: OMP_GET_NUM_THREADS, OMP_GET_THREAD_NUM 103 104 #ifdef _OPENACC 105 WRITE(*,"(A)") "Running with OpenACC" 106 #else 107 WRITE(*,"(A)") "Running without OpenACC" 108 #ifdef _OPENMP 109 !$OMP PARALLEL 110 IF (OMP_GET_THREAD_NUM()==0) THEN 111 WRITE(*,"(A,I4,A)") "Running with OpenMP with ", OMP_GET_NUM_THREADS(), " threads" 112 END IF 113 !$OMP END PARALLEL 114 #endif 115 #endif 116 WRITE(*,"(A)") "Initialize" 117 118 CALL init_timers() 119 CALL start_timer( itiminit, "Initialization" ) 120 ALLOCATE( t(nx,ny,nz), qv(nx,ny,nz) ) 121 122 DO k =1, nz 123 DO j = 1, ny 124 DO i = 1, nx 125 t(i,j,k) = 293.0D0 * (1.2D0 + 0.07D0 * COS(6.2D0 * REAL(i+j+k) / REAL(nx+ny+nz))) 126 qv(i,j,k) = 1.0D-6 * (1.1D0 + 0.13D0 * COS(5.3D0 * REAL(i+j+k) / REAL(nx*ny*nz))) 127 END DO 128 END DO 129 END DO 130 131 #ifdef _OPENACC 132 CALL initialize_gpu() 133 #endif 134 135 CALL end_timer( itiminit ) 136 END SUBROUTINE initialize 137 138 SUBROUTINE initialize_gpu() 139 IMPLICIT NONE 140 141 INTEGER :: temp(16) 142 INTEGER :: i 143 144 !$acc parallel loop 145 DO i = 1, 16 146 temp(i) = 1 147 END DO 148 149 IF (SUM(temp) == 16) THEN 150 WRITE(*,"(A)") "GPU initialized" 151 ELSE 152 WRITE(*,"(A,I4)") "Error: Problem encountered initializing the GPU" 153 STOP 154 END IF 155 END SUBROUTINE initialize_gpu 156 157 SUBROUTINE cleanup() 158 IMPLICIT NONE 159 160 DEALLOCATE( t, qv ) 161 END SUBROUTINE cleanup 162 163 END MODULE m_setup
● OpenACC 优化,改了 m_io.f90,m_parametrizations.f90,m_physics.f90,m_setup.f90。树上的优化 04 设计算法改动,没有参与比较
1 ! m_io.f90 2 MODULE m_io 3 USE m_config, ONLY: nout, nx, ny, nz 4 USE m_fields, ONLY: qv 5 6 IMPLICIT NONE 7 8 CONTAINS 9 SUBROUTINE write_output(ntstep) 10 IMPLICIT NONE 11 12 INTEGER, INTENT(IN) :: ntstep 13 INTEGER :: i, j, k 14 REAL*8 :: qv_mean 15 16 IF (MOD(ntstep, nout) /= 0) RETURN 17 18 !$acc data present(qv) 19 qv_mean = 0.0D0 20 !$acc parallel 21 !$acc loop gang vector collapse(3) reduction(+:qv_mean) 22 DO k = 1, nz 23 DO j = 1, ny 24 DO i = 1, nx 25 qv_mean = qv_mean + qv(i,j,k) 26 END DO 27 END DO 28 END DO 29 !$acc end parallel 30 !$acc end data 31 qv_mean = qv_mean / REAL(nx * ny * nz, KIND(qv_mean)) 32 33 WRITE(*,"(A,I6,A,ES18.8)") "Step: ", ntstep, ", mean(qv) =", qv_mean 34 END SUBROUTINE write_output 35 36 END MODULE m_io 37 38 ! m_parametrizations.f90 39 MODULE m_parametrizations 40 IMPLICIT NONE 41 42 REAL*8, parameter :: cs1 = 1.0D-6, cs2 = 0.02D0, cs3 = 7.2D0, cs4=0.1D0, t0=273.0D0 43 REAL*8, parameter :: cm1 = 1.0D-6, cm2=25.0D0, cm3=0.2D0, cm4=100.0D0 44 45 CONTAINS 46 SUBROUTINE saturation_adjustment(npx, npy, nlev, t, qc, qv) 47 IMPLICIT NONE 48 49 INTEGER, INTENT(IN) :: npx, npy, nlev 50 REAL*8, INTENT(IN) :: t(:,:,:) 51 REAL*8, INTENT(OUT) :: qc(:,:,:) 52 REAL*8, INTENT(INOUT) :: qv(:,:,:) 53 INTEGER :: i, j, k 54 55 !$acc data present(t,qv,qc) 56 !$acc parallel 57 !$acc loop gang vector collapse(3) 58 DO k = 1, nlev 59 DO j = 1, npy 60 DO i = 1, npx 61 qv(i,j,k) = qv(i,j,k) + cs1*EXP(cs2*( t(i,j,k) - t0 )/( t(i,j,k) - cs3) ) 62 qc(i,j,k) = cs4 * qv(i,j,k) 63 END DO 64 END DO 65 END DO 66 !$acc end parallel 67 !$acc end data 68 END SUBROUTINE saturation_adjustment 69 70 SUBROUTINE microphysics(npx, npy, nlev, t, qc, qv) 71 IMPLICIT NONE 72 73 INTEGER, INTENT(IN) :: npx, npy, nlev 74 REAL*8, INTENT(INOUT) :: t(:,:,:) 75 REAL*8, INTENT(IN) :: qc(:,:,:) 76 REAL*8, INTENT(INOUT) :: qv(:,:,:) 77 INTEGER :: i, j, k 78 !$acc data present(t,qv,qc) 79 !$acc parallel 80 !$acc loop seq 81 DO k = 2, nlev 82 !$acc loop gang 83 DO j = 1, npy 84 !$acc loop vector 85 DO i = 1, npx 86 qv(i, j, k) = qv(i,j,k-1) + cm1*(t(i,j,k)-cm2)**cm3 87 t(i, j, k) = t(i, j, k)*( 1.0D0 - cm4*qc(i,j,k)+qv(i,j,k) ) 88 END DO 89 END DO 90 END DO 91 !$acc end parallel 92 !$acc end data 93 END SUBROUTINE microphysics 94 95 END MODULE m_parametrizations 96 97 ! m_physics.f90 98 MODULE m_physics 99 USE m_config, ONLY: nx, ny, nz 100 USE m_fields, ONLY: qv, t 101 USE m_parametrizations, ONLY: saturation_adjustment, microphysics 102 103 IMPLICIT NONE 104 105 REAL*8, ALLOCATABLE :: qc(:,:,:) ! 提前声明,由 init_physics 和 finalize_physics 来申请和释放 106 107 CONTAINS 108 SUBROUTINE physics() 109 IMPLICIT NONE 110 111 CALL saturation_adjustment(nx, ny, nz, t, qc, qv) 112 CALL microphysics(nx, ny, nz, t, qc, qv) 113 END SUBROUTINE physics 114 115 SUBROUTINE init_physics() 116 IMPLICIT NONE 117 118 ALLOCATE( qc(nx,ny,nz) ) 119 !$acc enter data create(qc) 120 END SUBROUTINE init_physics 121 122 SUBROUTINE finalize_physics() 123 IMPLICIT NONE 124 1 125 !$acc exit data delete(qc) 126 DEALLOCATE(qc) 127 END SUBROUTINE finalize_physics 128 129 END MODULE m_physics 130 131 ! m_setup.f90 132 MODULE m_setup 133 USE m_config, ONLY: nstop, nout, nx, ny, nz 134 USE m_fields, ONLY: t,qv 135 USE m_timing, ONLY: init_timers, start_timer, end_timer 136 USE m_physics, ONLY: init_physics, finalize_physics 137 IMPLICIT NONE 138 139 CONTAINS 140 SUBROUTINE initialize() ! 初始化计时器和设备 141 IMPLICIT NONE 142 143 INTEGER, PARAMETER :: itiminit = 1 144 INTEGER :: i, j, k 145 146 #ifdef _OPENACC 147 WRITE(*,"(A)") "Running with OpenACC" 148 #else 149 WRITE(*,"(A)") "Running without OpenACC" 150 #endif 151 152 WRITE(*,"(A)") "Initialize" 153 154 CALL init_timers() 155 CALL start_timer( itiminit, "Initialization" ) 156 ALLOCATE( t(nx,ny,nz), qv(nx,ny,nz) ) 157 158 !$acc enter data create(t,qv) 159 DO k =1, nz 160 DO j = 1, ny 161 DO i = 1, nx 162 t(i,j,k) = 293.0D0 * (1.2D0 + 0.07D0 * COS(6.2D0 * REAL(i+j+k) / REAL(nx+ny+nz))) 163 qv(i,j,k) = 1.0D-6 * (1.1D0 + 0.13D0 * COS(5.3D0 * REAL(i+j+k) / REAL(nx*ny*nz))) 164 END DO 165 END DO 166 END DO 167 !$acc update device(t,qv) 168 169 #ifdef _OPENACC 170 CALL initialize_gpu() 171 #endif 172 CALL init_physics() 173 CALL end_timer( itiminit ) 174 END SUBROUTINE initialize 175 176 SUBROUTINE initialize_gpu() 177 IMPLICIT NONE 178 179 INTEGER :: temp(16) 180 INTEGER :: i 181 182 !$acc parallel loop 183 DO i = 1, 16 184 temp(i) = 1 185 END DO 186 187 IF (SUM(temp) == 16) THEN 188 WRITE(*,"(A)") "GPU initialized" 189 ELSE 190 WRITE(*,"(A,I4)") "Error: Problem encountered initializing the GPU" 191 STOP 192 END IF 193 END SUBROUTINE initialize_gpu 194 195 SUBROUTINE cleanup() 196 IMPLICIT NONE 197 198 !$acc exit data delete(t,qv) 199 DEALLOCATE( t, qv ) 200 CALL finalize_physics() 201 END SUBROUTINE cleanup 202 203 END MODULE m_setup
● 所有的输出结果。单独编译一个模式(而不使用默认的 makefile)时,在命令 pgf90 中要使用参数 -Mpreprocess,意思是将预编译器作用到 fortran 文件中,否则 m_setup.f90 中的 # 预编译命令会被当成错误
cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ make example_serial example_openmp example_openacc1 example_openacc2 example_openacc3 example_openacc4 make[1]: Entering directory '/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13/example_serial' compiling m_config.f90 compiling m_fields.f90 compiling m_io.f90 compiling m_parametrizations.f90 compiling m_physics.f90 compiling m_timing.f90 compiling m_setup.f90 compiling main.f90 make[1]: Leaving directory '/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13/example_serial' make[1]: Entering directory '/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13/example_openmp' ... ! 类似上面的过程 make[1]: Leaving directory '/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13/example_openacc4' cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_serial/example_serial Running without OpenACC Initialize Start of time loop Step: 20, mean(qv) = 1.14302104E-04 Step: 40, mean(qv) = 1.34041461E-04 Step: 60, mean(qv) = 1.53710207E-04 Step: 80, mean(qv) = 1.73309068E-04 Step: 100, mean(qv) = 1.92838848E-04 End of time loop ---------------------------- Timers: ---------------------------- Initialization : 17.28 ms Time loop : 978.08 ms ---------------------------- cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openmp/example_openmp Running without OpenACC Running with OpenMP with 1 threads Initialize Start of time loop Step: 20, mean(qv) = 1.14302104E-04 Step: 40, mean(qv) = 1.34041461E-04 Step: 60, mean(qv) = 1.53710207E-04 Step: 80, mean(qv) = 1.73309068E-04 Step: 100, mean(qv) = 1.92838848E-04 End of time loop ---------------------------- Timers: ---------------------------- Initialization : 17.96 ms Time loop : 898.92 ms ---------------------------- cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openacc1/example_openacc1 Running with OpenACC Initialize GPU initialized Start of time loop Step: 20, mean(qv) = 1.14302104E-04 Step: 40, mean(qv) = 1.34041461E-04 Step: 60, mean(qv) = 1.53710207E-04 Step: 80, mean(qv) = 1.73309068E-04 Step: 100, mean(qv) = 1.92838848E-04 End of time loop ---------------------------- Timers: ---------------------------- Initialization : 191.11 ms Time loop : 1044.35 ms ---------------------------- cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openacc2/example_openacc2 Running with OpenACC Initialize GPU initialized Start of time loop Step: 20, mean(qv) = 1.14302104E-04 Step: 40, mean(qv) = 1.34041461E-04 Step: 60, mean(qv) = 1.53710207E-04 Step: 80, mean(qv) = 1.73309068E-04 Step: 100, mean(qv) = 1.92838848E-04 End of time loop ---------------------------- Timers: ---------------------------- Initialization : 176.72 ms Time loop : 142.11 ms ---------------------------- cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openacc3/example_openacc3 Running with OpenACC Initialize GPU initialized Start of time loop Step: 20, mean(qv) = 1.14302104E-04 Step: 40, mean(qv) = 1.34041461E-04 Step: 60, mean(qv) = 1.53710207E-04 Step: 80, mean(qv) = 1.73309068E-04 Step: 100, mean(qv) = 1.92838848E-04 End of time loop ---------------------------- Timers: ---------------------------- Initialization : 162.15 ms Time loop : 121.77 ms ---------------------------- cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openacc4/example_openacc4 Running with OpenACC Initialize GPU initialized Start of time loop Step: 20, mean(qv) = 1.14302104E-04 Step: 40, mean(qv) = 1.34041461E-04 Step: 60, mean(qv) = 1.53710207E-04 Step: 80, mean(qv) = 1.73309068E-04 Step: 100, mean(qv) = 1.92838848E-04 End of time loop ---------------------------- Timers: ---------------------------- Initialization : 152.47 ms Time loop : 166.53 ms ----------------------------
● 所有的结果在 nvprof 中的图形。三张图分别为 “仅计算优化无数据优化”,“计算优化与数据优化”,“手工优化变量”