zoukankan      html  css  js  c++  java
  • OpenACC 云水参数化方案

    ▶ 书上第十三章,用一系列步骤优化一个云水参数化方案。用于熟悉 Fortran 以及 OpenACC 在旗下的表现

    ● 代码,文件较多,放在一起了

      1 ! main.f90
      2 PROGRAM main
      3     USE m_config,  ONLY: nstop
      4     USE m_physics, ONLY: physics
      5     USE m_io,      ONLY: write_output
      6     USE m_setup,   ONLY: initialize, cleanup
      7     USE m_timing,  ONLY: start_timer, end_timer, print_timers
      8 
      9     IMPLICIT NONE
     10 
     11     INTEGER :: ntstep
     12     INTEGER, parameter :: itimloop = 5
     13 
     14     CALL initialize()       ! 初始化计时器和设备
     15     
     16     WRITE(*,"(A)") "Start of time loop"
     17     CALL start_timer(itimloop, "Time loop")
     18 
     19     DO ntstep = 1, nstop    ! 计算
     20         CALL physics()
     21         CALL write_output( ntstep )
     22     END DO
     23 
     24     CALL end_timer( itimloop )
     25     WRITE(*,"(A)") "End of time loop"
     26 
     27     CALL print_timers()
     28     CALL cleanup()
     29 
     30 END PROGRAM main
     31 
     32 ! m_config.f90,运行参数
     33 MODULE m_config
     34     INTEGER, parameter :: nx    = 128  ! 经度网格数 
     35     INTEGER, parameter :: ny    = 128  ! 纬度网格数
     36     INTEGER, parameter :: nz    = 60   ! 海拔网格数
     37     INTEGER, parameter :: nstop = 100  ! 时间步数
     38     INTEGER, parameter :: nout  = 20   ! 输出间隔
     39     
     40 END MODULE m_config
     41 
     42 ! m_fields.f90,场参数
     43 MODULE m_fields
     44     REAL*8, ALLOCATABLE :: qv(:,:,:)  ! 水蒸汽含量
     45     REAL*8, ALLOCATABLE :: t(:,:,:)   ! 温度
     46   
     47 END MODULE m_fields
     48 
     49 ! m_io.f90,输入输出函数
     50 MODULE m_io
     51     USE m_config,  ONLY: nout, nx, ny, nz
     52     USE m_fields,  ONLY: qv
     53 
     54     IMPLICIT NONE
     55 
     56 CONTAINS
     57     SUBROUTINE write_output(ntstep)
     58         IMPLICIT NONE
     59 
     60         INTEGER, INTENT(IN) :: ntstep       ! 当前时间片
     61         INTEGER :: i, j, k
     62         REAL*8  :: qv_mean                  ! 水蒸汽含量平均值(标量)
     63         
     64         IF (MOD(ntstep, nout) /= 0) RETURN  ! 当前时间片不作输出            
     65 
     66         qv_mean = 0.0D0                     ! 计算均值并输出
     67         DO k = 1, nz
     68             DO j = 1, ny
     69                 DO i = 1, nx
     70                     qv_mean = qv_mean + qv(i,j,k)
     71                 END DO
     72             END DO
     73         END DO
     74         qv_mean = qv_mean / REAL(nx * ny * nz, KIND(qv_mean))
     75 
     76         WRITE(*,"(A,I6,A,ES18.8)") "Step: ", ntstep, ", mean(qv) =", qv_mean
     77     END SUBROUTINE write_output
     78 
     79 END MODULE m_io
     80 
     81 ! m_parametrizations.f90,参数化方案
     82 MODULE m_parametrizations
     83     IMPLICIT NONE
     84 
     85     REAL*8, parameter ::  cs1 = 1.0D-6, cs2 = 0.02D0, cs3 = 7.2D0, cs4=0.1D0, t0=273.0D0
     86     REAL*8, parameter ::  cm1 = 1.0D-6, cm2=25.0D0, cm3=0.2D0, cm4=100.0D0
     87 
     88 CONTAINS
     89     SUBROUTINE saturation_adjustment(npx, npy, nlev, t, qc, qv) ! 参数化方案一
     90         IMPLICIT NONE
     91     
     92         INTEGER, INTENT(IN)    :: npx, npy, nlev  ! 输入维度
     93         REAL*8,  INTENT(IN)    :: t(:,:,:)        ! 温度
     94         REAL*8,  INTENT(OUT)   :: qc(:,:,:)       ! 云水含量
     95         REAL*8,  INTENT(INOUT) :: qv(:,:,:)       ! 水蒸汽含量
     96         INTEGER :: i, j, k
     97 
     98         DO k = 1, nlev
     99             DO j = 1, npy
    100                 DO i = 1, npx
    101                     qv(i,j,k) = qv(i,j,k) + cs1*EXP(cs2*( t(i,j,k) - t0 )/( t(i,j,k) - cs3) )
    102                     qc(i,j,k) = cs4 * qv(i,j,k)
    103                 END DO
    104             END DO
    105         END DO
    106     END SUBROUTINE saturation_adjustment
    107 
    108     SUBROUTINE microphysics(npx, npy, nlev, t, qc, qv)  ! 参数化方案二
    109         IMPLICIT NONE
    110 
    111         INTEGER, INTENT(IN)   :: npx, npy, nlev
    112         REAL*8, INTENT(INOUT) :: t(:,:,:)      
    113         REAL*8, INTENT(IN)    :: qc(:,:,:)     
    114         REAL*8, INTENT(INOUT) :: qv(:,:,:)     
    115         INTEGER :: i, j, k
    116 
    117         DO k = 2, nlev
    118             DO j = 1, npy
    119                 DO i = 1, npx
    120                     qv(i, j, k) = qv(i,j,k-1) + cm1*(t(i,j,k)-cm2)**cm3
    121                     t(i, j, k)  = t(i, j, k)*( 1.0D0 - cm4*qc(i,j,k)+qv(i,j,k) )
    122                 END DO
    123             END DO
    124         END DO
    125     END SUBROUTINE microphysics
    126 
    127 END MODULE m_parametrizations
    128 
    129 ! m_physics.f90,参数化方案的执行
    130 MODULE m_physics
    131     USE m_config,           ONLY: nx, ny, nz
    132     USE m_fields,           ONLY: qv, t
    133     USE m_parametrizations, ONLY: saturation_adjustment, microphysics
    134 
    135     IMPLICIT NONE
    136 
    137 CONTAINS
    138     SUBROUTINE physics()
    139         IMPLICIT NONE
    140         REAL*8 :: qc(nx,ny,nz)                              ! 云水含量临时变量
    141         CALL saturation_adjustment(nx, ny, nz, t, qc, qv)   ! 第一物理参数化  
    142         CALL microphysics(nx, ny, nz, t, qc, qv)            ! 第二物理参数化
    143     END SUBROUTINE physics
    144     
    145 END MODULE m_physics
    146 
    147 ! m_timming.f90,计时器
    148 MODULE m_timing
    149     IMPLICIT NONE
    150 
    151     INTEGER, PARAMETER :: ntimer=10             ! 计时器数量
    152     REAL*8             :: rtimer(ntimer)        ! 计时器
    153     CHARACTER(32)      :: timertag(ntimer)      ! 计时器标签
    154     INTEGER            :: icountold(ntimer), &  ! tick (start of timer section)
    155                         icountrate,          &  ! countrate of SYSTEM_CLOCK()
    156                         icountmax               ! maximum counter value of SYSTEM_CLOCK()
    157 
    158 CONTAINS
    159     SUBROUTINE init_timers()        ! 初始化计时器
    160         IMPLICIT NONE
    161 
    162         rtimer(:)   = 0.0D0
    163         timertag(:) = ""
    164         icountold(:) = 0
    165 
    166         CALL SYSTEM_CLOCK( COUNT_RATE=icountrate, COUNT_MAX=icountmax )
    167     END SUBROUTINE init_timers
    168 
    169     SUBROUTINE start_timer(id, tag) ! 开始计时
    170         IMPLICIT NONE
    171 
    172         INTEGER, INTENT(IN)       :: id
    173         CHARACTER(*), INTENT(IN) :: tag
    174 
    175         IF (id < 1 .OR. id > ntimer) THEN           ! 检查计时器编号范围
    176           WRITE(*,"(A,I4,A,I4)") "Error: timer id=", id, "exceeds maximum timer number", ntimer
    177           STOP
    178         END IF
    179 
    180 
    181         IF (LEN_TRIM(timertag(id)) /= 0) THEN       ! 检查计时器是否已经开始运行
    182           WRITE(*,"(A,I4)") "Error: timer already started previously, id:", id
    183           STOP
    184         END IF
    185 
    186         IF (LEN_TRIM(tag) == 0) THEN                ! 检查计时器标签是否非空
    187           WRITE(*,"(A,I4)") "Error: empty tag provided, id:", id
    188           STOP
    189         END IF
    190 
    191         timertag(id) = TRIM(tag)                    ! 保存标签
    192         !$acc wait
    193         
    194         CALL SYSTEM_CLOCK( COUNT=icountold(id) )    ! 开始计时
    195     END SUBROUTINE start_timer
    196 
    197     SUBROUTINE end_timer(id) ! 结束计时
    198         IMPLICIT NONE
    199 
    200         INTEGER, INTENT(IN) :: id
    201         INTEGER             :: icountnew
    202 
    203         IF (id < 1 .OR. id > ntimer) THEN       ! 检查计时器编号范围
    204           WRITE(*,"(A,I4,A,I4)") "Error: timer id=", id, "exceed max timer number", ntimer
    205           STOP
    206         END IF
    207 
    208         IF (LEN_TRIM(timertag(id)) == 0) THEN   ! 检查计时器是否已经开始运行
    209           WRITE(*,"(A,I4)") "Error: Need to call start_timer before end_timing, id:", id
    210           STOP
    211         END IF
    212         !$acc wait
    213         
    214         CALL SYSTEM_CLOCK( COUNT=icountnew )    ! 获取当前时间,计算耗时
    215         rtimer(id) = ( REAL(icountnew - icountold(id), KIND(rtimer(id))) ) / REAL(icountrate, KIND(rtimer(id)))
    216     END SUBROUTINE end_timer
    217 
    218     SUBROUTINE print_timers()   ! 打印计时
    219         IMPLICIT NONE
    220 
    221         INTEGER :: id
    222 
    223         WRITE(*,"(A)") "----------------------------"
    224         WRITE(*,"(A)") "Timers:"
    225         WRITE(*,"(A)") "----------------------------"
    226         DO id = 1, ntimer
    227             IF ( rtimer(id) > 0.0D0 ) THEN
    228                 WRITE(*,"(A15,A2,F8.2,A)") timertag(id), ": ", rtimer(id)*1.0D3, " ms"
    229             END IF
    230         END DO    
    231         WRITE(*,"(A)") "----------------------------"
    232     END SUBROUTINE print_timers
    233   
    234 END MODULE m_timing
    235 
    236 ! m_setup.f90,初始化和清理
    237 MODULE m_setup
    238     USE m_config,  ONLY: nstop, nout, nx, ny, nz
    239     USE m_fields,  ONLY: t,qv
    240     USE m_timing,  ONLY: init_timers, start_timer, end_timer
    241 
    242     IMPLICIT NONE
    243 
    244 CONTAINS
    245     SUBROUTINE initialize() ! 初始化计时器和设备
    246         IMPLICIT NONE
    247         
    248         INTEGER, PARAMETER :: itiminit = 1  ! 计时器编号
    249         INTEGER :: i, j, k                  
    250 
    251 #ifdef _OPENACC
    252         WRITE(*,"(A)") "Running with OpenACC"       
    253 #else
    254         WRITE(*,"(A)") "Running without OpenACC"   
    255 #endif
    256 
    257         WRITE(*,"(A)") "Initialize"
    258 
    259         CALL init_timers()
    260         CALL start_timer( itiminit, "Initialization" )
    261         ALLOCATE( t(nx,ny,nz), qv(nx,ny,nz) )
    262 
    263         DO k =1, nz
    264             DO j = 1, ny
    265                 DO i = 1, nx
    266                     t(i,j,k)  = 293.0D0 * (1.2D0 + 0.07D0 * COS(6.2D0 * REAL(i+j+k) / REAL(nx+ny+nz)))
    267                     qv(i,j,k) = 1.0D-6 * (1.1D0 + 0.13D0 * COS(5.3D0 * REAL(i+j+k) / REAL(nx*ny*nz)))
    268                 END DO
    269             END DO
    270         END DO
    271 
    272 #ifdef _OPENACC
    273         CALL initialize_gpu()
    274 #endif
    275 
    276         CALL end_timer( itiminit )
    277     END SUBROUTINE initialize
    278 
    279     SUBROUTINE initialize_gpu()! 让 GPU 跑一个小内核来初始化
    280         IMPLICIT NONE
    281 
    282         INTEGER :: temp(16)
    283         INTEGER :: i
    284 
    285         !$acc parallel loop
    286         DO i = 1, 16
    287             temp(i) = 1
    288         END DO
    289 
    290         IF (SUM(temp) == 16) THEN
    291             WRITE(*,"(A)") "GPU initialized"
    292         ELSE
    293             WRITE(*,"(A,I4)") "Error: Problem encountered initializing the GPU"
    294             STOP
    295         END IF
    296     END SUBROUTINE initialize_gpu
    297 
    298     SUBROUTINE cleanup()! 清扫 t 和 qv 的内存
    299         IMPLICIT NONE
    300 
    301         DEALLOCATE( t, qv )
    302     END SUBROUTINE cleanup
    303 
    304 END MODULE m_setup

    ● OpenMP 优化,改了 m_io.f90,m_parametrizations.f90,m_setup.f90

      1 ! m_io.f90
      2 MODULE m_io
      3     USE m_config,  ONLY: nout, nx, ny, nz
      4     USE m_fields,  ONLY: qv
      5 
      6     IMPLICIT NONE
      7 
      8 CONTAINS
      9     SUBROUTINE write_output(ntstep)
     10         IMPLICIT NONE
     11 
     12         INTEGER, INTENT(IN) :: ntstep
     13         INTEGER :: i, j, k
     14         REAL*8  :: qv_mean           
     15         
     16         IF (MOD(ntstep, nout) /= 0) RETURN
     17 
     18         qv_mean = 0.0D0                   
     19         DO k = 1, nz
     20             !$OMP PARALLEL DO PRIVATE(i,j) SHARED(k,qv) REDUCTION(+:qv_mean) 
     21             DO j = 1, ny
     22                 DO i = 1, nx
     23                     qv_mean = qv_mean + qv(i,j,k)
     24                 END DO
     25             END DO
     26         END DO
     27         qv_mean = qv_mean / REAL(nx * ny * nz, KIND(qv_mean))
     28 
     29         WRITE(*,"(A,I6,A,ES18.8)") "Step: ", ntstep, ", mean(qv) =", qv_mean
     30     END SUBROUTINE write_output
     31 
     32 END MODULE m_io
     33 
     34 ! m_parametrizations.f90
     35 MODULE m_parametrizations
     36     IMPLICIT NONE
     37 
     38     REAL*8, parameter ::  cs1 = 1.0D-6, cs2 = 0.02D0, cs3 = 7.2D0, cs4=0.1D0, t0=273.0D0
     39     REAL*8, parameter ::  cm1 = 1.0D-6, cm2=25.0D0, cm3=0.2D0, cm4=100.0D0
     40 
     41 CONTAINS
     42     SUBROUTINE saturation_adjustment(npx, npy, nlev, t, qc, qv)
     43         IMPLICIT NONE
     44     
     45         INTEGER, INTENT(IN)    :: npx, npy, nlev
     46         REAL*8,  INTENT(IN)    :: t(:,:,:)      
     47         REAL*8,  INTENT(OUT)   :: qc(:,:,:)     
     48         REAL*8,  INTENT(INOUT) :: qv(:,:,:)     
     49         INTEGER :: i, j, k
     50 
     51         !$OMP PARALLEL
     52         DO k = 1, nlev
     53             !$OMP DO PRIVATE(i,j)
     54             DO j = 1, npy
     55                 DO i = 1, npx
     56                     qv(i,j,k) = qv(i,j,k) + cs1*EXP(cs2*( t(i,j,k) - t0 )/( t(i,j,k) - cs3) )
     57                     qc(i,j,k) = cs4 * qv(i,j,k)
     58                 END DO
     59             END DO
     60         END DO
     61         !$OMP END PARALLEL
     62     END SUBROUTINE saturation_adjustment
     63 
     64     SUBROUTINE microphysics(npx, npy, nlev, t, qc, qv)
     65         IMPLICIT NONE
     66 
     67         INTEGER, INTENT(IN)   :: npx, npy, nlev
     68         REAL*8, INTENT(INOUT) :: t(:,:,:)      
     69         REAL*8, INTENT(IN)    :: qc(:,:,:)     
     70         REAL*8, INTENT(INOUT) :: qv(:,:,:)     
     71         INTEGER :: i, j, k
     72 
     73         !$OMP PARALLEL
     74         DO k = 2, nlev
     75             !$OMP DO PRIVATE(i,j)
     76             DO j = 1, npy
     77                 DO i = 1, npx
     78                     qv(i, j, k) = qv(i,j,k-1) + cm1*(t(i,j,k)-cm2)**cm3
     79                     t(i, j, k)  = t(i, j, k)*( 1.0D0 - cm4*qc(i,j,k)+qv(i,j,k) )
     80                 END DO
     81             END DO
     82         END DO
     83         !$OMP END PARALLEL
     84     END SUBROUTINE microphysics
     85 
     86 END MODULE m_parametrizations
     87 
     88 ! m_setup.f90
     89 MODULE m_setup
     90     USE m_config,  ONLY: nstop, nout, nx, ny, nz
     91     USE m_fields,  ONLY: t,qv
     92     USE m_timing,  ONLY: init_timers, start_timer, end_timer
     93 
     94     IMPLICIT NONE
     95 
     96 CONTAINS
     97     SUBROUTINE initialize() ! 初始化计时器和设备
     98         IMPLICIT NONE
     99 
    100         INTEGER, PARAMETER :: itiminit = 1  ! timer ID
    101         INTEGER :: i, j, k                  ! loop indices
    102         INTEGER :: OMP_GET_NUM_THREADS, OMP_GET_THREAD_NUM
    103         
    104 #ifdef _OPENACC
    105         WRITE(*,"(A)") "Running with OpenACC"       
    106 #else
    107         WRITE(*,"(A)") "Running without OpenACC"
    108 #ifdef _OPENMP
    109         !$OMP PARALLEL 
    110         IF (OMP_GET_THREAD_NUM()==0) THEN
    111             WRITE(*,"(A,I4,A)") "Running with OpenMP with ", OMP_GET_NUM_THREADS(), " threads"
    112         END IF
    113         !$OMP END PARALLEL
    114 #endif
    115 #endif
    116         WRITE(*,"(A)") "Initialize"
    117 
    118         CALL init_timers()
    119         CALL start_timer( itiminit, "Initialization" )
    120         ALLOCATE( t(nx,ny,nz), qv(nx,ny,nz) )
    121 
    122         DO k =1, nz
    123             DO j = 1, ny
    124                 DO i = 1, nx
    125                     t(i,j,k)  = 293.0D0 * (1.2D0 + 0.07D0 * COS(6.2D0 * REAL(i+j+k) / REAL(nx+ny+nz)))
    126                     qv(i,j,k) = 1.0D-6 * (1.1D0 + 0.13D0 * COS(5.3D0 * REAL(i+j+k) / REAL(nx*ny*nz)))
    127                 END DO
    128             END DO
    129         END DO
    130 
    131 #ifdef _OPENACC
    132         CALL initialize_gpu()
    133 #endif
    134 
    135         CALL end_timer( itiminit )
    136     END SUBROUTINE initialize
    137 
    138     SUBROUTINE initialize_gpu()
    139         IMPLICIT NONE
    140 
    141         INTEGER :: temp(16)
    142         INTEGER :: i
    143 
    144         !$acc parallel loop
    145         DO i = 1, 16
    146             temp(i) = 1
    147         END DO
    148 
    149         IF (SUM(temp) == 16) THEN
    150             WRITE(*,"(A)") "GPU initialized"
    151         ELSE
    152             WRITE(*,"(A,I4)") "Error: Problem encountered initializing the GPU"
    153             STOP
    154         END IF
    155     END SUBROUTINE initialize_gpu
    156 
    157     SUBROUTINE cleanup()
    158         IMPLICIT NONE
    159 
    160         DEALLOCATE( t, qv )
    161     END SUBROUTINE cleanup
    162 
    163 END MODULE m_setup

    ● OpenACC 优化,改了 m_io.f90,m_parametrizations.f90,m_physics.f90,m_setup.f90。树上的优化 04 设计算法改动,没有参与比较

      1 ! m_io.f90
      2 MODULE m_io
      3     USE m_config,  ONLY: nout, nx, ny, nz
      4     USE m_fields,  ONLY: qv
      5 
      6     IMPLICIT NONE
      7 
      8 CONTAINS
      9     SUBROUTINE write_output(ntstep)
     10         IMPLICIT NONE
     11 
     12         INTEGER, INTENT(IN) :: ntstep    
     13         INTEGER :: i, j, k
     14         REAL*8  :: qv_mean               
     15         
     16         IF (MOD(ntstep, nout) /= 0) RETURN
     17     
     18         !$acc data present(qv)
     19         qv_mean = 0.0D0
     20         !$acc parallel 
     21         !$acc loop gang vector collapse(3) reduction(+:qv_mean)
     22         DO k = 1, nz
     23             DO j = 1, ny
     24                 DO i = 1, nx
     25                     qv_mean = qv_mean + qv(i,j,k)
     26                 END DO
     27             END DO
     28         END DO
     29         !$acc end parallel
     30         !$acc end data
     31         qv_mean = qv_mean / REAL(nx * ny * nz, KIND(qv_mean))
     32 
     33         WRITE(*,"(A,I6,A,ES18.8)") "Step: ", ntstep, ", mean(qv) =", qv_mean
     34     END SUBROUTINE write_output
     35 
     36 END MODULE m_io
     37 
     38 ! m_parametrizations.f90
     39 MODULE m_parametrizations
     40     IMPLICIT NONE
     41 
     42     REAL*8, parameter ::  cs1 = 1.0D-6, cs2 = 0.02D0, cs3 = 7.2D0, cs4=0.1D0, t0=273.0D0
     43     REAL*8, parameter ::  cm1 = 1.0D-6, cm2=25.0D0, cm3=0.2D0, cm4=100.0D0
     44 
     45 CONTAINS
     46     SUBROUTINE saturation_adjustment(npx, npy, nlev, t, qc, qv)
     47         IMPLICIT NONE
     48     
     49         INTEGER, INTENT(IN)    :: npx, npy, nlev
     50         REAL*8,  INTENT(IN)    :: t(:,:,:)      
     51         REAL*8,  INTENT(OUT)   :: qc(:,:,:)     
     52         REAL*8,  INTENT(INOUT) :: qv(:,:,:)     
     53         INTEGER :: i, j, k
     54 
     55         !$acc data present(t,qv,qc)
     56         !$acc parallel
     57         !$acc loop gang vector collapse(3) 
     58         DO k = 1, nlev
     59             DO j = 1, npy
     60                 DO i = 1, npx
     61                     qv(i,j,k) = qv(i,j,k) + cs1*EXP(cs2*( t(i,j,k) - t0 )/( t(i,j,k) - cs3) )
     62                     qc(i,j,k) = cs4 * qv(i,j,k)
     63                 END DO
     64             END DO
     65         END DO
     66         !$acc end parallel
     67         !$acc end data
     68     END SUBROUTINE saturation_adjustment
     69 
     70     SUBROUTINE microphysics(npx, npy, nlev, t, qc, qv)
     71         IMPLICIT NONE
     72 
     73         INTEGER, INTENT(IN)   :: npx, npy, nlev
     74         REAL*8, INTENT(INOUT) :: t(:,:,:)      
     75         REAL*8, INTENT(IN)    :: qc(:,:,:)     
     76         REAL*8, INTENT(INOUT) :: qv(:,:,:)     
     77         INTEGER :: i, j, k
     78         !$acc data present(t,qv,qc)    
     79         !$acc parallel
     80         !$acc loop seq
     81         DO k = 2, nlev
     82             !$acc loop gang 
     83             DO j = 1, npy
     84                 !$acc loop vector
     85                 DO i = 1, npx
     86                     qv(i, j, k) = qv(i,j,k-1) + cm1*(t(i,j,k)-cm2)**cm3
     87                     t(i, j, k)  = t(i, j, k)*( 1.0D0 - cm4*qc(i,j,k)+qv(i,j,k) )
     88                 END DO
     89             END DO
     90         END DO
     91         !$acc end parallel
     92         !$acc end data
     93     END SUBROUTINE microphysics
     94 
     95 END MODULE m_parametrizations
     96 
     97 ! m_physics.f90
     98 MODULE m_physics
     99     USE m_config,           ONLY: nx, ny, nz
    100     USE m_fields,           ONLY: qv, t
    101     USE m_parametrizations, ONLY: saturation_adjustment, microphysics
    102 
    103     IMPLICIT NONE
    104 
    105     REAL*8, ALLOCATABLE :: qc(:,:,:)    ! 提前声明,由 init_physics 和 finalize_physics 来申请和释放
    106 
    107 CONTAINS
    108     SUBROUTINE physics()
    109         IMPLICIT NONE        
    110         
    111         CALL saturation_adjustment(nx, ny, nz, t, qc, qv)
    112         CALL microphysics(nx, ny, nz, t, qc, qv)
    113     END SUBROUTINE physics
    114 
    115     SUBROUTINE init_physics()
    116         IMPLICIT NONE        
    117         
    118         ALLOCATE( qc(nx,ny,nz) )
    119         !$acc enter data create(qc)
    120     END SUBROUTINE init_physics
    121 
    122     SUBROUTINE finalize_physics()
    123         IMPLICIT NONE
    124         1
    125         !$acc exit data delete(qc)
    126         DEALLOCATE(qc)
    127     END SUBROUTINE finalize_physics
    128 
    129 END MODULE m_physics
    130 
    131 ! m_setup.f90
    132 MODULE m_setup
    133     USE m_config,  ONLY: nstop, nout, nx, ny, nz
    134     USE m_fields,  ONLY: t,qv
    135     USE m_timing,  ONLY: init_timers, start_timer, end_timer
    136     USE m_physics, ONLY: init_physics, finalize_physics
    137     IMPLICIT NONE
    138 
    139 CONTAINS
    140     SUBROUTINE initialize() ! 初始化计时器和设备
    141         IMPLICIT NONE
    142         
    143         INTEGER, PARAMETER :: itiminit = 1
    144         INTEGER :: i, j, k                  
    145 
    146 #ifdef _OPENACC
    147         WRITE(*,"(A)") "Running with OpenACC"       
    148 #else
    149         WRITE(*,"(A)") "Running without OpenACC"   
    150 #endif
    151 
    152         WRITE(*,"(A)") "Initialize"
    153 
    154         CALL init_timers()
    155         CALL start_timer( itiminit, "Initialization" )
    156         ALLOCATE( t(nx,ny,nz), qv(nx,ny,nz) )
    157 
    158         !$acc enter data create(t,qv)
    159         DO k =1, nz
    160             DO j = 1, ny
    161                 DO i = 1, nx
    162                     t(i,j,k)  = 293.0D0 * (1.2D0 + 0.07D0 * COS(6.2D0 * REAL(i+j+k) / REAL(nx+ny+nz)))
    163                     qv(i,j,k) = 1.0D-6 * (1.1D0 + 0.13D0 * COS(5.3D0 * REAL(i+j+k) / REAL(nx*ny*nz)))
    164                 END DO
    165             END DO
    166         END DO
    167         !$acc update device(t,qv)
    168 
    169 #ifdef _OPENACC
    170         CALL initialize_gpu()
    171 #endif
    172         CALL init_physics()
    173         CALL end_timer( itiminit )
    174     END SUBROUTINE initialize
    175 
    176     SUBROUTINE initialize_gpu()
    177         IMPLICIT NONE
    178 
    179         INTEGER :: temp(16)
    180         INTEGER :: i
    181 
    182         !$acc parallel loop
    183         DO i = 1, 16
    184             temp(i) = 1
    185         END DO
    186 
    187         IF (SUM(temp) == 16) THEN
    188             WRITE(*,"(A)") "GPU initialized"
    189         ELSE
    190             WRITE(*,"(A,I4)") "Error: Problem encountered initializing the GPU"
    191             STOP
    192         END IF
    193     END SUBROUTINE initialize_gpu
    194 
    195     SUBROUTINE cleanup()
    196         IMPLICIT NONE
    197     
    198         !$acc exit data delete(t,qv)
    199         DEALLOCATE( t, qv )
    200         CALL finalize_physics()
    201   END SUBROUTINE cleanup
    202 
    203 END MODULE m_setup

    ● 所有的输出结果。单独编译一个模式(而不使用默认的 makefile)时,在命令 pgf90 中要使用参数 -Mpreprocess,意思是将预编译器作用到 fortran 文件中,否则 m_setup.f90 中的 # 预编译命令会被当成错误

    cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ make example_serial example_openmp example_openacc1 example_openacc2 example_openacc3 example_openacc4
    make[1]: Entering directory '/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13/example_serial'
    compiling m_config.f90
    compiling m_fields.f90
    compiling m_io.f90
    compiling m_parametrizations.f90
    compiling m_physics.f90
    compiling m_timing.f90
    compiling m_setup.f90
    compiling main.f90
    make[1]: Leaving directory '/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13/example_serial'
    make[1]: Entering directory '/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13/example_openmp'
    
    ... ! 类似上面的过程
    
    make[1]: Leaving directory '/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13/example_openacc4'
    cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_serial/example_serial
    Running without OpenACC
    Initialize
    Start of time loop
    Step:     20, mean(qv) =    1.14302104E-04
    Step:     40, mean(qv) =    1.34041461E-04
    Step:     60, mean(qv) =    1.53710207E-04
    Step:     80, mean(qv) =    1.73309068E-04
    Step:    100, mean(qv) =    1.92838848E-04
    End of time loop
    ----------------------------
    Timers:
    ----------------------------
    Initialization :    17.28 ms
    Time loop      :   978.08 ms
    ----------------------------
    cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openmp/example_openmp
    Running without OpenACC
    Running with OpenMP with    1 threads
    Initialize
    Start of time loop
    Step:     20, mean(qv) =    1.14302104E-04
    Step:     40, mean(qv) =    1.34041461E-04
    Step:     60, mean(qv) =    1.53710207E-04
    Step:     80, mean(qv) =    1.73309068E-04
    Step:    100, mean(qv) =    1.92838848E-04
    End of time loop
    ----------------------------
    Timers:
    ----------------------------
    Initialization :    17.96 ms
    Time loop      :   898.92 ms
    ----------------------------
    cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openacc1/example_openacc1
    Running with OpenACC
    Initialize
    GPU initialized
    Start of time loop
    Step:     20, mean(qv) =    1.14302104E-04
    Step:     40, mean(qv) =    1.34041461E-04
    Step:     60, mean(qv) =    1.53710207E-04
    Step:     80, mean(qv) =    1.73309068E-04
    Step:    100, mean(qv) =    1.92838848E-04
    End of time loop
    ----------------------------
    Timers:
    ----------------------------
    Initialization :   191.11 ms
    Time loop      :  1044.35 ms
    ----------------------------
    cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openacc2/example_openacc2
    Running with OpenACC
    Initialize
    GPU initialized
    Start of time loop
    Step:     20, mean(qv) =    1.14302104E-04
    Step:     40, mean(qv) =    1.34041461E-04
    Step:     60, mean(qv) =    1.53710207E-04
    Step:     80, mean(qv) =    1.73309068E-04
    Step:    100, mean(qv) =    1.92838848E-04
    End of time loop
    ----------------------------
    Timers:
    ----------------------------
    Initialization :   176.72 ms
    Time loop      :   142.11 ms
    ----------------------------
    cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openacc3/example_openacc3
    Running with OpenACC
    Initialize
    GPU initialized
    Start of time loop
    Step:     20, mean(qv) =    1.14302104E-04
    Step:     40, mean(qv) =    1.34041461E-04
    Step:     60, mean(qv) =    1.53710207E-04
    Step:     80, mean(qv) =    1.73309068E-04
    Step:    100, mean(qv) =    1.92838848E-04
    End of time loop
    ----------------------------
    Timers:
    ----------------------------
    Initialization :   162.15 ms
    Time loop      :   121.77 ms
    ----------------------------
    cuan@CUAN:/media/cuan/02FCDA52FCDA4019/Code/ParallelProgrammingWithOpenACC-master/Chapter13$ example_openacc4/example_openacc4
    Running with OpenACC
    Initialize
    GPU initialized
    Start of time loop
    Step:     20, mean(qv) =    1.14302104E-04
    Step:     40, mean(qv) =    1.34041461E-04
    Step:     60, mean(qv) =    1.53710207E-04
    Step:     80, mean(qv) =    1.73309068E-04
    Step:    100, mean(qv) =    1.92838848E-04
    End of time loop
    ----------------------------
    Timers:
    ----------------------------
    Initialization :   152.47 ms
    Time loop      :   166.53 ms
    ----------------------------

    ● 所有的结果在 nvprof 中的图形。三张图分别为 “仅计算优化无数据优化”,“计算优化与数据优化”,“手工优化变量”

  • 相关阅读:
    HTML5 完美解决javascript中iphone手机和android手机复制文本到剪切板问题
    去除移动端alert/confirm的网址(url)
    项目通过tomcat部署到服务器,请求数据中文乱码问题
    JEECG中表单提交的中断
    mysql数据库1129错误
    java中比较两个double类型值的大小
    spring配置上传文件大小
    GET方式,获取服务器文件
    java中多个数字运算后值不对(失真)处理方法
    V-rep学习笔记:关节力矩控制
  • 原文地址:https://www.cnblogs.com/cuancuancuanhao/p/9494656.html
Copyright © 2011-2022 走看看