#include <misc.h>
!-----------------------------------------------------------------------
!BOP
! !ROUTINE: dynpkg --- Driver for the NASA finite-volume dynamical core
!
! !INTERFACE:
subroutine dynpkg( im, jm, km, u, v, & 1,9
ps, delp, pe, pk, pkz, &
peln, ptop, jfirst, jlast, &
kfirst, klast, klastp, phis, &
omga, cp, nq, q3, ndt, &
om, rair, ae, ns0, nc, &
pt, convt, iord, jord, &
kord, te0, uxy, vxy, psxy, &
delpxy, pexy, pkxy, pkzxy, pelnxy, &
phisxy, omgaxy, q3xy, ptxy, &
ifirstxy,ilastxy, jfirstxy,jlastxy )
! !USES:
use shr_kind_mod
, only: r8 => shr_kind_r8
use dynamics_vars
, only : coslon, sinlon, cosl5, sinl5, acosp, &
acap, rcap, sine, cosp, sinp, cose, &
ns, icd, jcd, ng_c, ng_d, ng_s, q3t, yzt, yzt2
use pmgrid
, only: twod_decomp, myid_z, npr_z, npr_y, iam
use physconst
, only: cappa, gravit
#if defined( SPMD )
use spmd_dyn
, only : q_to_qxy, &
ijk_yz_to_xy, ijk_xy_to_yz, pexy_to_pe, &
m_ttrans, q_ttrans, r_ttrans, r_to_rxy
use mod_comm, only : mp_send3d, mp_recv3d, mp_sendirr, mp_recvirr
#endif
implicit none
! !INPUT PARAMETERS:
integer, intent(in):: im ! dimension in east-west
integer, intent(in):: jm ! dimension in North-South
integer, intent(in):: km ! number of Lagrangian layers
integer, intent(in):: jfirst ! starting latitude index for MPI
integer, intent(in):: jlast ! ending latitude index for MPI
integer, intent(in):: kfirst ! starting vertical index for MPI
integer, intent(in):: klast ! ending vertical index for MPI
integer, intent(in):: klastp ! klast, except km+1 when klast=km
integer, intent(in):: nq ! total # of tracers to be advected
integer, intent(in):: nc ! declared dimension of q3
integer, intent(in):: ndt ! the large time step in seconds
! Also the mapping time step in this setup
integer, intent(in):: iord ! parameter controlling monotonicity in E-W
! recommendation: iord=4
integer, intent(in):: jord ! parameter controlling monotonicity in N-S
! recommendation: jord=4
integer, intent(in):: kord ! parameter controlling monotonicity in mapping
integer, intent(in):: ifirstxy, ilastxy, jfirstxy, jlastxy ! xy decomposition
! index ranges
! recommendation: kord=4
real(r8), intent(in):: phis(im,jfirst:jlast) ! surface geopotential (grav*zs)
real(r8), intent(in):: om ! angular velocity of earth's rotation
real(r8), intent(in):: cp ! heat capacity of air at constant pressure
real(r8), intent(in):: ae ! radius of the earth (m)
real(r8), intent(in):: rair ! Gas constant of the air
real(r8), intent(in):: ptop ! Pressure at model top (interface pres)
logical, intent(in):: convt ! Flag to control pt output
! If true: output pt, the virtual temperature
! If false: pt is updated
real(r8), intent(in):: phisxy(ifirstxy:ilastxy,jfirstxy:jlastxy) ! xy-decomposed version of phis
! !INPUT/OUTPUT PARAMETERS:
integer, intent(inout):: ns0 ! total number of splits for Lagrangian
! dynamics; a suitable value will be automatically
! determined if ns0 = 0 (i.e., if you don't know what value
! to be used try ns0=0
real(r8), intent(inout):: pt(im,jfirst-ng_d:jlast+ng_d,kfirst:klast)
! scaled (virtual) potential temperature
real(r8), intent(inout):: ps(im,jfirst:jlast) ! Surface pressure (pa)
real(r8), intent(inout):: delp(im,jfirst:jlast,kfirst:klast) ! Pressure thickness
real(r8), intent(inout):: u(im,jfirst-ng_d:jlast+ng_s,kfirst:klast) ! u wind velocities, staggered grid
real(r8), intent(inout):: v(im,jfirst-ng_s:jlast+ng_d,kfirst:klast) ! v wind velocities, staggered grid
real(r8), intent(inout):: q3(im,jfirst-ng_d:jlast+ng_d,kfirst:klast,nc) ! Tracers
!--------------------------------------------------------------------------------------
! The following three arrays must be pre-computed as input to benergy(). They are NOT
! needed if consv=.F.; updated on output (to be used by physdrv)
! Please refer to routine pkez on the algorithm for computing pkz
! from pe and pk
!--------------------------------------------------------------------------------------
real(r8), intent(inout):: pe(im,kfirst:klast+1,jfirst:jlast) ! Pres at layer edges
real(r8), intent(inout):: pk(im,jfirst:jlast,kfirst:klast+1) ! pe**cappa
real(r8), intent(inout):: pkz(im,jfirst:jlast,kfirst:klast) ! finite-volume mean of pk
!--------------------------------------------------------------------------------------
! !OUTPUT (input values are not used):
!--------------------------------------------------------------------------------------
real(r8), intent(out):: te0 ! Total energy before dynamics
real(r8), intent(out):: peln(im,kfirst:klast+1,jfirst:jlast) ! log pressure (pe) at layer edges
real(r8), intent(out):: omga(im,kfirst:klast,jfirst:jlast) ! vertical pressure velocity (pa/sec)
! The following variables are xy-decomposed instanciations:
real(r8), intent(out):: uxy(ifirstxy:ilastxy,jfirstxy:jlastxy,km) ! now unghosted, was N1
real(r8), intent(out):: vxy(ifirstxy:ilastxy,jfirstxy:jlastxy,km)
real(r8), intent(out):: psxy(ifirstxy:ilastxy,jfirstxy:jlastxy)
real(r8), intent(out):: delpxy(ifirstxy:ilastxy,jfirstxy:jlastxy,km)
real(r8), intent(out):: pexy(ifirstxy:ilastxy,km+1,jfirstxy:jlastxy)
real(r8), intent(out):: pkxy(ifirstxy:ilastxy,jfirstxy:jlastxy,km+1)
real(r8), intent(out):: pkzxy(ifirstxy:ilastxy,jfirstxy:jlastxy,km)
real(r8), intent(out):: pelnxy(ifirstxy:ilastxy,km+1,jfirstxy:jlastxy)
real(r8), intent(out):: omgaxy(ifirstxy:ilastxy,km,jfirstxy:jlastxy)
real(r8), intent(out):: q3xy(ifirstxy:ilastxy,jfirstxy:jlastxy,km,nc)
real(r8), intent(out):: ptxy(ifirstxy:ilastxy,jfirstxy:jlastxy,km)
! !DESCRIPTION:
!
! Developer: Shian-Jiann Lin, NASA/GSFC; email: lin@dao.gsfc.nasa.gov
!
! Top view of D-grid prognostatic variables: u, v, and delp (and other scalars)
!
! u(i,j+1)
! |
! v(i,j)---delp(i,j)---v(i+1,j)
! |
! u(i,j)
!
! External routine required: the user needs to supply a subroutine to set up
! "Eulerian vertical coordinate" for remapping purpose.
! Currently this routine is named as set_eta()
! In principle any terrian following vertical
! coordinate can be used. The input to fvcore
! need not be on the same vertical coordinate
! as the output.
! If SPMD is defined the Pilgrim communication
! library developed by Will Sawyer will be needed.
!
! Remarks: values at poles for both u and v need not be defined; but values for
! all other scalars needed to be defined at both poles (as polar cap mean
! quantities). Tracer advection is done "off-line" using the
! large time step. Consistency is maintained by using the time accumulated
! Courant numbers and horizontal mass fluxes for the FFSL algorithm.
! The input "pt" can be either dry potential temperature
! defined as T/pkz (adiabatic case) or virtual potential temperature
! defined as T*/pkz (full phys case). IF convt is true, pt is not updated.
! Instead, virtual temperature is ouput.
! ipt is updated if convt is false.
! The user may set the value of nx to optimize the SMP performance
! The optimal valuse of nx depends on the total number of available
! shared memory CPUs per node (NS). Assuming the maximm MPI
! decomposition is used in the y-direction, set nx=1 if the
! NS <=4; nx=4 if NS=16.
!
! !REVISION HISTORY:
! SJL 99.04.13: Initial SMP version delivered to Will Sawyer
! WS 99.10.03: 1D MPI completed and tested;
! WS 99.10.11: Additional documentation
! WS 99.10.28: benergy and te_map added; arrays pruned
! SJL 00.01.01: SMP and MPI enhancements; documentation
! WS 00.07.13: Changed PILGRIM API
! WS 00.08.28: SPMD instead of MPI_ON
! AAM 00.08.10: Add kfirst:klast
! WS 00.12.19: phis now distr., LLNL2DModule initialized here
! WS 01.02.02: bug fix: parsplit only called for FIRST time
! WS 01.04.09: Added initialization of ghost regions
! WS 01.06.10: Removed if(first) section; use module
! AAM 01.06.27: Extract te_map call into separate routine
! AAM 01.07.13: Get rid of dynpkg2; recombine te_map;
! perform forward transposes for 2D decomposition
! WS 01.12.10: Ghosted PT (changes benergy, cd_core, te_map, hswf)
! WS 03.08.05: removed vars dcaf, rayf, ideal, call to hswf
! (idealized physics is now in physics package)
! WS 03.08.13: Removed ghost region from UXY
!
!EOP
!-----------------------------------------------------------------------
!BOC
! Local variables
integer i, j, k, iq ! Loop indicies
real(r8) umax ! Maximum winds, m/s
parameter (umax = 300.0)
logical consv ! Flag to force conservation of toal energy
parameter (consv = .false.)
integer nx ! # of split pieces in x-direction; for performance, the
parameter (nx = 4) ! user may set nx=1 if there is NO shared memory multitasking
integer ipe, it
integer nsplit, n, n2
integer incount, outcount
integer iqa, iqb, iqc, iqd, mq ! used for tracer transpose grouping
! Geometric arrays
! Move the following 3D arrays to an initialization routine?
real(r8), allocatable :: worka(:,:,:),dp0(:,:,:),cx(:,:,:),cy(:,:,:)
real(r8), allocatable :: mfx(:,:,:), mfy(:,:,:)
real(r8), allocatable :: delpf(:,:,:), uc(:,:,:), vc(:,:,:)
real(r8), allocatable :: dwz(:,:,:), pkc(:,:,:), wz(:,:,:)
real(r8), allocatable :: dpt(:,:,:)
real(r8), allocatable :: pkcc(:,:,:), wzc(:,:,:)
! The following variables are work arrays for xy=>yz transpose
real(r8), allocatable :: pkkp(:,:,:), wzkp(:,:,:), pekp(:,:,:)
! The following variables are xy instanciations
real(r8), allocatable :: mfxxy(:,:,:), dp0xy(:,:,:), wzxy(:,:,:)
! ps3 and psxy3 are dummy 3d variants of ps and psxy, resp.
real(r8), allocatable :: ps3(:,:,:), psxy3(:,:,:)
double precision zamda, zam5
logical first, fill
integer imh
real(r8) dt
real(r8) bdt
data first /.true./
data fill /.true./ ! perform a simple filling algorithm
! in case negatives were found
! Allocate temporary work arrays
! Change later to use pointers for SMP performance???
! (prime candidates: uc, vc, delpf)
allocate( worka(im,jfirst: jlast, kfirst:klast) )
allocate( dp0(im,jfirst: jlast, kfirst:klast) )
allocate( mfx(im,jfirst: jlast, kfirst:klast) )
allocate( mfy(im,jfirst: jlast+1, kfirst:klast) )
allocate( cx(im,jfirst-ng_d:jlast+ng_d,kfirst:klast) )
allocate( cy(im,jfirst: jlast+1, kfirst:klast) )
allocate( delpf(im,jfirst-ng_d:jlast+ng_d,kfirst:klast) )
allocate( uc(im,jfirst-ng_d:jlast+ng_d,kfirst:klast) )
allocate( vc(im,jfirst-2: jlast+2, kfirst:klast) )
allocate( dpt(im,jfirst-1: jlast+1, kfirst:klast) )
allocate( dwz(im,jfirst-1: jlast, kfirst:klast+1) )
allocate( pkc(im,jfirst-1: jlast+1, kfirst:klast+1) )
allocate( wz(im,jfirst-1: jlast+1, kfirst:klast+1) )
allocate( pkcc(im,jfirst : jlast , kfirst:klast+1) )
allocate( wzc(im,jfirst : jlast , kfirst:klast+1) )
allocate(pkkp(im,jfirst:jlast,kfirst:klast+1))
allocate(wzkp(im,jfirst:jlast,kfirst:klast+1))
allocate(pekp(im,kfirst:klastp,jfirst:jlast))
allocate(wzxy(ifirstxy:ilastxy,jfirstxy:jlastxy,km+1))
allocate(mfxxy(ifirstxy:ilastxy,jfirstxy:jlastxy,km))
allocate(dp0xy(ifirstxy:ilastxy,jfirstxy:jlastxy,km))
allocate(ps3(im,jfirst:jlast,kfirst:klast))
allocate(psxy3(ifirstxy:ilastxy,jfirstxy:jlastxy,km))
! First touch pkc and wz??? (bufferpack is multitask in vertical but geop
! computations are parallel in j-loop)
if ( km > 1 ) then ! not shallow water equations
if( consv ) then
! Compute globally integrated Total Energy (te0)
call t_startf('benergy')
call benergy
(im, jm, km, u, v, &
pt, ng_d, ng_s, delp, pe, &
pk, pkz, phis, cp, te0, &
mfx, dp0, jfirst, jlast, kfirst, &
klast, klastp)
call t_stopf('benergy')
endif
endif
! Determine splitting
bdt = ndt
! Second level splitting
n2 = max ( 1, ns/4 )
nsplit = (ns+n2-1) / n2
dt = bdt / float(nsplit*n2)
do 2000 n=1, n2
if( nq > 0 ) then
!$omp parallel do private(i, j, k)
do k=kfirst,klast
do j=jfirst,jlast
do i=1,im
! Save initial delp field before the small-time-step
! Initialize the CFL number accumulators: (cx, cy)
! Initialize total mass fluxes: (mfx, mfy)
dp0(i,j,k) = delp(i,j,k)
cx(i,j,k) = 0.
cy(i,j,k) = 0.
mfx(i,j,k) = 0.
mfy(i,j,k) = 0.
enddo
enddo
enddo
endif
do it=1, nsplit
if(it == nsplit .and. n == n2) then
ipe = 1 ! end of fvcore; output pe for te_map
elseif(it == 1 .and. n == 1) then
ipe = -1 ! start of cd_core
else
ipe = 0
endif
! Call the Lagrangian dynamical core using small tme step
call t_startf('cd_core')
call cd_core
(im, jm, km, nq, nx, &
jfirst, jlast, kfirst, klast, &
klastp, u, v, pt, delp, &
pe, pk, dt, ptop, umax, &
ae, rcap, cp, cappa, icd, &
jcd, iord, jord, ng_c, ng_d, &
ng_s, ipe, om, phis, &
cx, cy, mfx, mfy, delpf, &
uc, vc, pkz, dpt, worka, &
dwz, pkc, wz, phisxy, ptxy, pkxy, &
pexy, pkcc, wzc, wzxy, delpxy, pkkp, wzkp, &
pekp, ifirstxy, ilastxy, jfirstxy, jlastxy)
call t_stopf('cd_core')
enddo
if( nq .ne. 0 ) then
! Perform large-tme-step scalar transport using the accumulated CFL and
! mass fluxes
call t_startf('trac2d')
call trac2d
( dp0, q3, nc, nq, cx, &
cy, mfx, mfy, iord, jord, &
ng_d, fill, im, jm, km, &
jfirst, jlast, kfirst, klast, pkz, &
worka )
call t_stopf('trac2d')
endif
2000 continue
if (twod_decomp .eq. 1) then
!
! Transpose ps, u, v, and q3 from yz to xy decomposition
!
! Note: pt, pe and pk will have already been transposed through
! call to geopk in cd_core. geopk does not actually require
! secondary xy decomposition; direct 16-byte technique works just
! as well, perhaps better. However, transpose method is used on last
! call to avoid having to compute these three transposes now.
!
#if defined (SPMD)
call t_startf('transpose_total')
call t_startf('transpose_fwd')
! Transpose ps
! Embed in 3D array since transpose machinery cannot handle 2D arrays
!$omp parallel do private(i,j,k)
do k = kfirst,klast
do j = jfirst,jlast
do i = 1,im
mfx(i,j,k) = ps(i,j)
enddo
enddo
enddo
call mp_sendirr(mfx, ijk_yz_to_xy%SendDesc, &
ijk_yz_to_xy%RecvDesc, mfxxy)
!$omp parallel do private(i,j,k)
do k = kfirst,klast
do j = jfirst,jlast
do i = 1,im
yzt2(i,j,k) = u(i,j,k)
enddo
enddo
enddo
call mp_recvirr(mfxxy, ijk_yz_to_xy%RecvDesc )
! Transpose u
call mp_sendirr(yzt2, ijk_yz_to_xy%SendDesc, &
ijk_yz_to_xy%RecvDesc, uxy )
!$omp parallel do private(i,j)
do j = jfirstxy,jlastxy
do i = ifirstxy,ilastxy
psxy(i,j) = mfxxy(i,j,1)
enddo
enddo
!$omp parallel do private(i,j,k)
do k = kfirst,klast
do j = jfirst,jlast
do i = 1,im
yzt(i,j,k) = v(i,j,k)
enddo
enddo
enddo
call mp_recvirr(uxy, ijk_yz_to_xy%RecvDesc )
! Transpose v
call mp_sendirr( yzt, ijk_yz_to_xy%SendDesc, &
ijk_yz_to_xy%RecvDesc, vxy )
call mp_recvirr( vxy, ijk_yz_to_xy%RecvDesc )
call t_stopf('transpose_fwd')
call t_startf('transpose_qfwd')
! Transpose q3
iqa = 1
if (q_ttrans .ne. 0) then
iqb = m_ttrans
else
iqb = r_ttrans
endif
!$omp parallel do private(i,j,k,iq)
do iq = iqa,iqb
do k = kfirst,klast
do j = jfirst,jlast
do i = 1,im
q3t(i,j,k,iq) = q3(i,j,k,iq)
enddo
enddo
enddo
enddo
do mq = 1, q_ttrans
iqb = iqa + m_ttrans - 1
call mp_sendirr(q3t(:,:,:,iqa:iqb), q_to_qxy%SendDesc, &
q_to_qxy%RecvDesc, q3xy(:,:,:,iqa:iqb))
if (mq .lt. q_ttrans) then
iqc = iqa + m_ttrans
iqd = iqc + m_ttrans - 1
!$omp parallel do private(i,j,k,iq)
do iq = iqc, iqd
do k = kfirst,klast
do j = jfirst,jlast
do i = 1,im
q3t(i,j,k,iq) = q3(i,j,k,iq)
enddo
enddo
enddo
enddo
elseif (r_ttrans .ne. 0) then
iqc = iqa + m_ttrans
iqd = iqc + r_ttrans - 1
!$omp parallel do private(i,j,k,iq)
do iq = iqc, iqd
do k = kfirst,klast
do j = jfirst,jlast
do i = 1,im
q3t(i,j,k,iq) = q3(i,j,k,iq)
enddo
enddo
enddo
enddo
endif
call mp_recvirr( q3xy(:,:,:,iqa:iqb), q_to_qxy%RecvDesc )
iqa = iqa + m_ttrans
enddo
if (r_ttrans .ne. 0) then
iqb = iqa + r_ttrans - 1
call mp_sendirr(q3t(:,:,:,iqa:iqb), r_to_rxy%SendDesc, &
r_to_rxy%RecvDesc, q3xy(:,:,:,iqa:iqb))
call mp_recvirr(q3xy(:,:,:,iqa:iqb), r_to_rxy%RecvDesc )
endif
call t_stopf('transpose_qfwd')
call t_stopf('transpose_total')
#endif
else
call t_startf('transpose_total')
call t_startf('transpose_fwd')
do j = jfirst,jlast
do i = 1,im
psxy(i,j) = ps(i,j)
enddo
enddo
!$omp parallel do private(i,j,k)
do k = kfirst,klast
do j = jfirst,jlast
do i = 1,im
uxy(i,j,k) = u(i,j,k)
vxy(i,j,k) = v(i,j,k)
enddo
enddo
enddo
call t_stopf('transpose_fwd')
call t_startf('transpose_qfwd')
!$omp parallel do private(i,j,k,iq)
do iq = 1,nc
do k = kfirst,klast
do j = jfirst,jlast
do i = 1,im
q3xy(i,j,k,iq) = q3(i,j,k,iq)
enddo
enddo
enddo
enddo
call t_stopf('transpose_qfwd')
call t_stopf('transpose_total')
endif
if ( km > 1 ) then ! not shallow water equations
! Perform vertical remapping from Lagrangian control-volume to
! the Eulerian coordinate as specified by the routine set_eta.
! Note that this finite-volume dycore is otherwise independent of the vertical
! Eulerian coordinate.
call t_startf('te_map')
!
! te_map requires uxy, vxy, psxy, pexy, pkxy, phisxy, q3xy, and ptxy
!
call te_map
(consv, convt, psxy, omgaxy, pexy, &
delpxy, pkzxy, pkxy, ndt, im, &
jm, km, nx, jfirstxy, jlastxy, &
0, 0, 0, 0, 0, &
ifirstxy, ilastxy, &
nq, uxy, vxy, ptxy, q3xy, &
phisxy, cp, cappa, kord, pelnxy, &
te0, mfxxy, dp0xy, nc )
!
! te_map computes uxy, vxy, psxy, delpxy, pexy, pkxy, pkzxy,
! pelnxy, omgaxy, q3xy and ptxy.
!
call t_stopf('te_map')
endif
call t_startf('transpose_total')
call t_startf('transpose_bck1')
if ( .not. convt ) then
if (twod_decomp .eq. 1) then
#if defined( SPMD )
!
! Transpose delpxy to delp, pexy to pe, psxy to ps for simplified physics
! (for full_phys, these quantities are recomputed after physics advance)
!
call mp_sendirr(delpxy, ijk_xy_to_yz%SendDesc, &
ijk_xy_to_yz%RecvDesc, delp)
call mp_recvirr(delp, ijk_xy_to_yz%RecvDesc )
call mp_sendirr( pexy, pexy_to_pe%SendDesc, pexy_to_pe%RecvDesc, pe )
!$omp parallel do private(i,j,k)
do k=1,km
do j=jfirstxy,jlastxy
do i=ifirstxy,ilastxy
psxy3(i,j,k) = psxy(i,j)
enddo
enddo
enddo
call mp_recvirr( pe, pexy_to_pe%RecvDesc )
! Embed psxy in 3D array since transpose machinery cannot handle 2D arrays
call mp_sendirr(psxy3, ijk_xy_to_yz%SendDesc, &
ijk_xy_to_yz%RecvDesc, ps3)
call mp_recvirr(ps3, ijk_xy_to_yz%RecvDesc)
do j=jfirst,jlast
do i=1,im
ps(i,j) = ps3(i,j,kfirst)
enddo
enddo
#endif
else
do j = jfirst,jlast
do i = 1,im
ps(i,j) = psxy(i,j)
enddo
enddo
!$omp parallel do private(i,j,k)
do k = kfirst,klast
do j = jfirst,jlast
do i = 1,im
delp(i,j,k) = delpxy(i,j,k)
enddo
enddo
enddo
!$omp parallel do private(i,j,k)
do j = jfirst,jlast
do k = kfirst,klast+1
do i = 1,im
pe(i,k,j) = pexy(i,k,j)
enddo
enddo
enddo
endif
endif
call t_stopf('transpose_bck1')
call t_stopf('transpose_total')
deallocate( mfy )
deallocate( mfx )
deallocate( cy )
deallocate( cx )
deallocate( dp0 )
deallocate( delpf )
deallocate( uc )
deallocate( vc )
deallocate( dpt )
deallocate( pkc )
deallocate( dwz )
deallocate( wz )
deallocate( worka )
deallocate( pkcc )
deallocate( wzc )
deallocate( pkkp )
deallocate( wzkp )
deallocate( pekp )
deallocate( wzxy )
deallocate( mfxxy )
deallocate( dp0xy )
deallocate( ps3 )
deallocate( psxy3 )
!----------------------------------------------------------
! WS 03.08.05: removed idealized physics (Held-Suarez)
! from here (is now in Physics package).
!----------------------------------------------------------
!EOC
end subroutine dynpkg
!-----------------------------------------------------------------------