#include <misc.h>
!-----------------------------------------------------------------------
!BOP
! !ROUTINE: cd_core --- Dynamical core for both C- and D-grid Lagrangian
! dynamics
!
! !INTERFACE:
subroutine cd_core(im, jm, km, nq, nx, & 1,23
jfirst, jlast , kfirst, klast, klastp, u, &
v, pt, delp, pe, pk, &
dt, ptopin, umax, ae, rcap, &
cp, akap, iord_c, jord_c, iord_d, jord_d, &
ng_c, ng_d, ng_s, ipe, om, hs, &
cx3 , cy3, mfx, mfy, &
delpf, uc, vc, ptc, dpt, ptk, &
wz3, pkc, wz, hsxy, ptxy, pkxy, &
pexy, pkcc, wzc, wzxy, delpxy, pkkp, wzkp, &
pekp, ifirstxy, ilastxy, jfirstxy, jlastxy )
! !USES:
use shr_kind_mod
, only: r8 => shr_kind_r8
use sw_core
, only : c_sw, d_sw
use pft_module
, only : fftfax, pft2d, pft_cf
use dynamics_vars
, only : sinp, cosp, cose, acosp, &
sinlon, coslon, cosl5, sinl5, &
ak, bk, ptop, ns, yzt
use pmgrid
, only: twod_decomp, myid_y, myid_z, npr_y, npr_z, iam
use pmgrid
, only: ompouter
#if defined( SPMD )
use spmd_dyn
, only: pkxy_to_pkc, ijk_yz_to_xy, geopk16byte
use mod_comm, only: mp_send4d_ns, mp_recv4d_ns, &
mp_send2_ns, mp_recv2_ns, &
mp_send3d_2, mp_recv3d_2, &
mp_send3d, mp_recv3d, &
mp_sendirr, mp_recvirr
#define CPP_PRT_PREFIX if(iam==0)
#else
#define CPP_PRT_PREFIX
#endif
implicit none
! !INPUT PARAMETERS:
! Input paraterers:
integer im, jm, km
integer nx ! # of split pieces in longitude direction
integer jfirst
integer jlast
integer kfirst
integer klast
integer klastp ! klast, except km+1 when klast=km
integer ifirstxy,ilastxy ! xy-decomp. longitude ranges
integer jfirstxy,jlastxy ! xy-decomp. latitude ranges
integer ipe ! ipe=1: end of cd_core()
! ipe=-1: start of cd_core()
! ipe=0 :
integer nq ! # of tracers to be advected by trac2d
integer iord_c, jord_c ! scheme order on C grid in X and Y dir.
integer iord_d, jord_d ! scheme order on D grid in X and Y dir.
integer ng_c ! ghost latitudes on C grid
integer ng_d ! ghost lats on D (Max NS dependencies, ng_d >= ng_c)
integer ng_s ! max(ng_c+1,ng_d) significant if ng_c = ng_d
real(r8) ae ! Radius of the Earth (m)
real(r8) om ! rotation rate
real(r8) ptopin
real(r8) umax
real(r8) dt !small time step in seconds
real(r8) rcap
real(r8) cp
real(r8) akap
! Input time independent arrays:
real(r8) hs(im,jfirst:jlast) !surface geopotential
real(r8) hsxy(ifirstxy:ilastxy,jfirstxy:jlastxy) !surface geopotential XY-decomp.
! !INPUT/OUTPUT PARAMETERS:
! Prognostic variables:
real(r8) u(im,jfirst-ng_d:jlast+ng_s,kfirst:klast) ! u-Wind (m/s)
real(r8) v(im,jfirst-ng_s:jlast+ng_d,kfirst:klast) ! v-Wind (m/s)
real(r8) delp(im,jfirst:jlast,kfirst:klast) ! Delta pressure (pascal)
real(r8) pt(im,jfirst-ng_d:jlast+ng_d,kfirst:klast)! Scaled-Potential temperature
! Input/output: accumulated winds & mass fluxes on c-grid for large-
! time-step transport
real(r8) cx3(im,jfirst-ng_d:jlast+ng_d,kfirst:klast)! Accumulated Courant number in X
real(r8) cy3(im,jfirst:jlast+1,kfirst:klast) ! Accumulated Courant number in Y
real(r8) mfx(im,jfirst:jlast,kfirst:klast) ! Mass flux in X (unghosted)
real(r8) mfy(im,jfirst:jlast+1,kfirst:klast) ! Mass flux in Y
! !OUTPUT PARAMETERS:
real(r8) pe(im,kfirst:klast+1,jfirst:jlast) ! Edge pressure (pascal)
real(r8) pk(im,jfirst:jlast,kfirst:klast+1) ! Pressure to the kappa
real(r8) ptxy(ifirstxy:ilastxy,jfirstxy:jlastxy,km) ! Potential temperature XY decomp
real(r8) pkxy(ifirstxy:ilastxy,jfirstxy:jlastxy,km+1) ! P-to-the-kappa XY decomp
real(r8) pexy(ifirstxy:ilastxy,km+1,jfirstxy:jlastxy) ! Edge pressure XY decomp
! Input work arrays:
real(r8) delpf(im,jfirst-ng_d:jlast+ng_d,kfirst:klast) ! filtered delp
real(r8) uc(im,jfirst-ng_d:jlast+ng_d,kfirst:klast) ! u-Winds on C-grid
real(r8) vc(im,jfirst-2: jlast+2, kfirst:klast) ! v-Winds on C-grid
real(r8) ptc(im,jfirst:jlast,kfirst:klast)
real(r8) ptk(im,jfirst:jlast,kfirst:klast)
real(r8) dpt(im,jfirst-1:jlast+1,kfirst:klast)
real(r8) wz3(im,jfirst-1:jlast ,kfirst:klast+1)
real(r8) pkc(im,jfirst-1:jlast+1,kfirst:klast+1)
real(r8) wz(im,jfirst-1:jlast+1,kfirst:klast+1)
real(r8) pkcc(im,jfirst:jlast,kfirst:klast+1)
real(r8) wzc(im,jfirst:jlast,kfirst:klast+1)
real(r8) wzxy(ifirstxy:ilastxy,jfirstxy:jlastxy,km+1)
real(r8) delpxy(ifirstxy:ilastxy,jfirstxy:jlastxy,km)
real(r8) pkkp(im,jfirst:jlast,kfirst:klast+1)
real(r8) wzkp(im,jfirst:jlast,kfirst:klast+1)
real(r8) pekp(im,kfirst:klastp,jfirst:jlast)
! ! !DESCRIPTION:
! Perform a dynamical update for one small time step; the small
! time step is limitted by the fastest wave within the Lagrangian control-
! volume
!
! !REVISION HISTORY:
! SJL 99.01.01: Original SMP version
! WS 99.04.13: Added jfirst:jlast concept
! SJL 99.07.15: Merged c_core and d_core to this routine
! WS 99.09.07: Restructuring, cleaning, documentation
! WS 99.10.18: Walkthrough corrections; frozen for 1.0.7
! WS 99.11.23: Pruning of some 2-D arrays
! SJL 99.12.23: More comments; general optimization; reduction
! of redundant computation & communication
! WS 00.05.14: Modified ghost indices per Kevin's definition
! WS 00.07.13: Changed PILGRIM API
! WS 00.08.28: Cosmetic changes: removed old loop limit comments
! AAM 00.08.30: Introduced kfirst,klast
! WS 00.12.01: Replaced MPI_ON with SPMD; hs now distributed
! WS 01.04.11: PILGRIM optimizations for begin/endtransfer
! WS 01.05.08: Optimizations in the call of c_sw and d_sw
! AAM 01.06.27: Reinstituted 2D decomposition for use in ccm
! WS 01.12.10: Ghosted PT, code now uses mod_comm primitives
! WS 01.12.31: Removed vorticity damping, ghosted U,V,PT
! WS 02.01.15: Completed transition to mod_comm
! WS 02.07.04: Fixed 2D decomposition bug dest/src for mp_send3d
! WS 02.09.04: Integrated fvgcm-1_3_71 zero diff. changes by Lin
! WS 03.07.22: Removed HIGH_P option; this is outdated
! WS 03.10.15: Fixed hack of 00.04.13 for JORD>1 JCD=1, in clean way
!
!EOP
!---------------------------------------------------------------------
!BOC
! Local 2D arrays:
real(r8) wk(im,jfirst: jlast+2)
real(r8) wk1(im,jfirst-1:jlast+1)
real(r8) wk2(im,jfirst-ng_d:jlast+ng_d)
real(r8) wk3(im,jfirst-1:jlast+1)
real(r8) p1d(im)
! Local scalars
real(r8) ptmp, pint, press
real(r8) rat, ycrit
real(r8) dt0, dt5
real(r8) dl, dp
integer i, j, k
integer ks
integer js1g1, js2g0, js2g1, js2gc
integer jn2g0, jn1g1, jn1gc
integer iord , jord
integer ktot, ktotp
real(r8) pi
real(r8) zt_c
real(r8) zt_d
real(r8) tau, fac, pk4
real(r8) tiny
parameter (tiny = 1.e-10)
! Declare permanent local arrays
integer ifax(13) !ECMWF fft
real(r8), allocatable, save :: trigs(:)
real(r8), allocatable, save :: fc(:), f0(:)
real(r8), allocatable, save :: dc(:,:), de(:,:), sc(:), se(:)
real(r8), allocatable, save :: cdx(:,:), cdy(:,:)
real(r8), allocatable, save :: dtdx(:), dtdxe(:), txe5(:), dtxe5(:)
real(r8), allocatable, save :: dyce(:), dx(:) , rdx(:), cy(:)
real(r8), allocatable, save :: dtdx2(:), dtdx4(:), dxdt(:), dxe(:)
real(r8), allocatable, save :: cye(:), dycp(:), rdxe(:)
real(r8) rdy, dtdy, dydt, dtdy5, tdy5
data dt0 / 0./
save ifax
save dtdy, dydt, dtdy5, tdy5, rdy
save dl, dp
save zt_c, zt_d
#if defined( SPMD )
integer dest, src
#endif
!******************************************************************
!******************************************************************
!
! IMPORTANT CODE OPTIONS - SEE BELOW
!
!******************************************************************
!******************************************************************
! Option for which version of geopk to use with yz decomposition.
! If geopk16byte is true, version geopk16 is used. It computes local partial sums in z
! and then communicates those sums to combine them. It can use 16-byte arithmetic
! to preserve bit-for-bit accuracy (DSIZE=16) or 8-byte arithmetic for speed (DSIZE=8).
! The communication is semi-global in z.
! If geopk16byte=false, variables are transposed to/from xy decomposition
! for use in geopk.
! On last small timestep (ipe=1) for D-grid, the version of geopk that uses transposes
! is called regardless, as some transposed quantities are required for
! the te_map phase.
! For non-SPMD mode, geopk[cd]16 are set to false.
logical geopkc16, geopkd16
geopkc16 = .false.
geopkd16 = .false.
#if defined( SPMD )
if (geopk16byte) then
geopkc16 = .true.
if (ipe /= 1) geopkd16 = .true.
endif
#endif
!******************************************************************
ktot = klast - kfirst + 1
ktotp = ktot + 1
#if defined( SPMD )
call t_startf('send_uv')
call mp_send4d_ns( im, jm, km, 1, jfirst, jlast, &
kfirst, klast, ng_s, ng_d, u )
call mp_send4d_ns( im, jm, km, 1, jfirst, jlast, &
kfirst, klast, ng_d, ng_s, v )
call t_stopf('send_uv')
#endif
! Set general loop limits
! jfirst >= 1; jlast <= jm
js1g1 = max(1,jfirst-1)
js2g0 = max(2,jfirst)
js2g1 = max(2,jfirst-1)
jn2g0 = min(jm-1,jlast)
jn1g1 = min(jm,jlast+1)
! Construct C-grid dependent loop limits
js2gc = max(2,jfirst-ng_c) ! NG latitudes on S (starting at 2)
jn1gc = min(jm,jlast+ng_c) ! ng_c latitudes on N (ending at jm)
if( abs(dt0-dt) > 0.1 ) then
if ( .not. allocated( dtdx ) ) then
allocate(dtdx(jm),dtdx2(jm), dtdx4(jm), dtdxe(jm), dxdt(jm), &
dxe(jm), cye(jm),dycp(jm),rdxe(jm), &
txe5(jm), dtxe5(jm),dyce(jm), &
dx(jm),rdx(jm),cy(jm) )
allocate( trigs(3*im/2+1) )
allocate( sc(js2g0:jn2g0), se(js2g0:jn1g1) )
allocate( dc(im,js2g0:jn2g0), de(im,js2g0:jn1g1) )
call fftfax
(im, ifax, trigs)
! Determine ycrit such that effective DX >= DY
pi = 4.d0 * datan(1.d0)
rat = float(im)/float(2*(jm-1))
ycrit = acos( min(0.81, rat) ) * (180./pi)
call pft_cf
(im, jm, js2g0, jn2g0, jn1g1, sc, se, dc, de, &
cosp, cose, ycrit)
allocate( cdx(js2g0:jn1g1,kfirst:klast) )
allocate( cdy(js2g0:jn1g1,kfirst:klast) )
allocate( f0(jfirst-ng_s-1:jlast+ng_d) ) ! 000304 bug fix: ng_s not ng_d
allocate( fc(js2gc:jn1gc) )
do j=max(1,jfirst-ng_s-1),min(jm,jlast+ng_d) ! 000304 bug fix
f0(j) = (om+om)*sinp(j)
enddo
! Compute coriolis parameter at cell corners.
do j=js2gc, jn1gc ! Not the issue with ng_c = ng_d
fc(j) = 0.5*(f0(j) + f0(j-1))
enddo
endif
dt0 = dt
dt5 = 0.5*dt
pi = 4.d0 * datan(1.d0)
dl = (pi+pi)/im
dp = pi/(jm-1)
rdy = 1./(ae*dp)
dtdy = dt *rdy
dtdy5 = dt5*rdy
dydt = (ae*dp) / dt
tdy5 = 0.5/dtdy
do j=2,jm-1
dx(j) = dl*ae*cosp(j)
rdx(j) = 1. / dx(j)
dtdx(j) = dt / dx(j)
dxdt(j) = dx(j) / dt
dtdx2(j) = 0.5*dtdx(j)
dtdx4(j) = 0.5*dtdx2(j)
dycp(j) = ae*dp/cosp(j)
cy(j) = rdy * acosp(j)
enddo
do j=2,jm
dxe(j) = ae*dl*cose(j)
rdxe(j) = 1. / dxe(j)
dtdxe(j) = dt / dxe(j)
dtxe5(j) = 0.5*dtdxe(j)
txe5(j) = 0.5/dtdxe(j)
cye(j) = 1. / (ae*cose(j)*dp)
dyce(j) = ae*dp/cose(j)
enddo
! C-grid
zt_c = abs(umax*dt5) / (dl*ae)
CPP_PRT_PREFIX write(6,*) 'c-core: ', (180./pi)*acos(zt_c)
! D-grid
zt_d = abs(umax*dt) / (dl*ae)
CPP_PRT_PREFIX write(6,*) 'd-coret: ', (180./pi)*acos(zt_d)
if ( ptopin /= ptop) then
write(6,*) 'PTOP as input to cd_core != ptop from dynamics_vars'
stop
endif
!-----------------------------------------
! Divergence damping coeff. dx(2)*dy/(TAU)
!-----------------------------------------
do k=kfirst,klast
press = 0.5 * ( ak(k)+ak(k+1) + (bk(k)+bk(k+1))*1.E5 )
tau = 8. * (1.+ tanh(1.0*log(ptop/press)) )
tau = max(1., tau) / (128.*abs(dt))
do j=js2g0,jn1g1
fac = tau * ae / cose(j)
cdx(j,k) = fac*dp
cdy(j,k) = fac*dl
enddo
enddo
endif
if ( ipe == -1 .or. ns == 1 ) then ! starting cd_core
!$omp parallel do private(i, j, k, wk, wk2)
do k=kfirst,klast
do j=jfirst,jlast
do i=1,im
delpf(i,j,k) = delp(i,j,k)
enddo
enddo
call pft2d
( delpf(1,js2g0,k), sc(js2g0), dc(1,js2g0), &
im, jn2g0-js2g0+1, ifax, trigs, wk, wk2 )
enddo
endif
#if defined( SPMD )
call t_startf('recv_uv')
call mp_recv4d_ns( im, jm, km, 1, jfirst, jlast, &
kfirst, klast, ng_s, ng_d, u )
call mp_recv4d_ns( im, jm, km, 1, jfirst, jlast, &
kfirst, klast, ng_d, ng_s, v )
call t_stopf('recv_uv')
call t_startf('ghost_pt_delpf')
call mp_send4d_ns( im, jm, km, 1, jfirst, jlast, &
kfirst, klast, ng_d, ng_d, pt )
if ( ipe == -1 .or. ns == 1 ) then ! starting cd_core
call mp_send4d_ns( im, jm, km, 1, jfirst, jlast, &
kfirst, klast, ng_d, ng_d, delpf )
endif ! end if ipe = -1 check
call mp_recv4d_ns( im, jm, km, 1, jfirst, jlast, &
kfirst, klast, ng_d, ng_d, pt )
if ( ipe == -1 .or. ns == 1 ) then ! starting cd_core
call mp_recv4d_ns( im, jm, km, 1, jfirst, jlast, &
kfirst, klast, ng_d, ng_d, delpf )
endif ! end if ipe = -1 check
call t_stopf('ghost_pt_delpf')
#endif
call t_startf('c_core')
#if defined( NESTED_PAR )
!$omp parallel do private(i, j, k, iord, jord), num_threads(ompouter)
#else
!$omp parallel do private(i, j, k, iord, jord)
#endif
do k=kfirst,klast ! This is the main parallel loop.
if ( k <= km/8 ) then
iord = 1
jord = 1
else
iord = iord_c
jord = jord_c
endif
!-----------------------------------------------------------------
! Call the vertical independent part of the dynamics on the C-grid
!-----------------------------------------------------------------
call c_sw
( u(1,jfirst-ng_d,k), v(1,jfirst-ng_s,k), &
pt(1,jfirst-ng_d,k), delp(1,jfirst,k), &
uc(1,jfirst-ng_d,k), vc(1,jfirst-2,k), &
ptc(1,jfirst,k), delpf(1,jfirst-ng_d,k), &
ptk(1,jfirst,k), &
cosp, acosp, cose, coslon, sinlon, &
dxdt, dxe, dtdx2, dtdx4, dtxe5, rdxe, &
dycp, dydt, dtdy5, cye, fc, &
ifax, trigs, dc(1,js2g0), sc, &
zt_c, tiny, rcap, im, jm, &
jfirst, jlast, ng_c, ng_d, ng_s, &
js2g0, jn2g0, js2gc, jn1gc, &
iord, jord, cosl5, sinl5 )
enddo
call t_stopf('c_core')
! MPI note: uc, vc, ptk, and ptc computed within the above k-look from jfirst to jlast
! Needed by D-core: uc(jfirst-ng_d:jlast+ng_d), vc(jfirst:jlast+1)
call t_startf('geopk_cd')
call t_startf('c_geop')
if (geopkc16) then
!
! Stay in yz space and use semi-global z communications and 16-byte reals
!
call geopk16
(ptop, pe, ptk, pkcc, wzc, hs, ptc, 0, im, jm, km, &
jfirst, jlast, 1, im, cp, akap, kfirst, klast)
!
! Geopk does not need j ghost zones of pkc and wz
!
!$omp parallel do private(i, j, k)
do k = kfirst, klast+1
do j = jfirst, jlast
do i = 1, im
pkc(i,j,k) = pkcc(i,j,k)
wz(i,j,k) = wzc(i,j,k)
enddo
enddo
enddo
else
if (twod_decomp == 1) then
!
! Transpose to xy decomposition
!
#if defined( SPMD )
call mp_sendirr( ptk, ijk_yz_to_xy%SendDesc, &
ijk_yz_to_xy%RecvDesc, delpxy )
call mp_recvirr ( delpxy, ijk_yz_to_xy%RecvDesc )
call mp_sendirr( ptc, ijk_yz_to_xy%SendDesc, &
ijk_yz_to_xy%RecvDesc, ptxy )
call mp_recvirr ( ptxy, ijk_yz_to_xy%RecvDesc )
#endif
else
!$omp parallel do private(i, j, k)
do k = kfirst, klast
do j = jfirst, jlast
do i = 1, im
delpxy(i,j,k) = ptk(i,j,k)
ptxy(i,j,k) = ptc(i,j,k)
enddo
enddo
enddo
endif
call geopk
(ptop, pexy, delpxy, pkxy, wzxy, hsxy, ptxy, 0, im, jm, km, &
jfirstxy, jlastxy, ifirstxy, ilastxy, &
cp, akap, nx, 0)
if (twod_decomp == 1) then
!
! Transpose back to yz decomposition.
! pexy is not output quantity on this call.
! pkkp and wzkp are holding arrays, whose specific z-dimensions
! are required by Pilgrim.
! Z edge ghost points (klast+1) are automatically filled in
!
#if defined( SPMD )
call mp_sendirr(pkxy, pkxy_to_pkc%SendDesc, &
pkxy_to_pkc%RecvDesc, pkkp)
call mp_recvirr(pkkp, pkxy_to_pkc%RecvDesc )
call mp_sendirr(wzxy, pkxy_to_pkc%SendDesc, &
pkxy_to_pkc%RecvDesc, wzkp)
!$omp parallel do private(i, j, k)
do k = kfirst, klast+1
do j = jfirst, jlast
do i = 1, im
pkc(i,j,k) = pkkp(i,j,k)
enddo
enddo
enddo
call mp_recvirr(wzkp, pkxy_to_pkc%RecvDesc )
!$omp parallel do private(i, j, k)
do k = kfirst, klast+1
do j = jfirst, jlast
do i = 1, im
wz(i,j,k) = wzkp(i,j,k)
enddo
enddo
enddo
#endif
else
!$omp parallel do private(i, j, k)
do k = kfirst, klast+1
do j = jfirst, jlast
do i = 1, im
pkc(i,j,k) = pkxy(i,j,k)
wz(i,j,k) = wzxy(i,j,k)
enddo
enddo
enddo
endif
endif ! geopkc16
call t_stopf('c_geop')
call t_stopf('geopk_cd')
! Upon exit from geopk, the quantities pe, pkc and wz will have been
! updated at klast+1
#if defined( SPMD )
call t_startf('send_pkc_wz')
!
! pkc & wz need to be ghosted only at jfirst-1
!
dest = iam+1
src = iam-1
if ( mod(iam+1,npr_y) == 0 ) dest = -1
if ( mod(iam,npr_y) == 0 ) src = -1
call mp_send3d_2( dest, src, im, jm, km+1, &
1, im, jfirst-1, jlast+1, kfirst, klast+1, &
1, im, jlast, jlast, kfirst, klast+1, pkc, wz)
call t_stopf('send_pkc_wz')
#endif
call t_startf('c_u_loop')
! Beware k+1 references directly below (AAM)
!
!$omp parallel do private(i, j, k, p1d, wk, wk2)
do k=kfirst,klast
do j=js2g0,jn2g0
do i=1,im
p1d(i) = pkc(i,j,k+1) - pkc(i,j,k)
enddo
uc(1,j,k) = uc(1,j,k) + dtdx2(j) * ( &
(wz(im,j,k+1)-wz(1,j,k))*(pkc(1,j,k+1)-pkc(im,j,k)) &
+ (wz(im,j,k)-wz(1,j,k+1))*(pkc(im,j,k+1)-pkc(1,j,k))) &
/ (p1d(1)+p1d(im))
do i=2,im
uc(i,j,k) = uc(i,j,k) + dtdx2(j) * ( &
(wz(i-1,j,k+1)-wz(i,j,k))*(pkc(i,j,k+1)-pkc(i-1,j,k)) &
+ (wz(i-1,j,k)-wz(i,j,k+1))*(pkc(i-1,j,k+1)-pkc(i,j,k))) &
/ (p1d(i)+p1d(i-1))
enddo
enddo
call pft2d
(uc(1,js2g0,k), sc(js2g0), dc(1,js2g0), im, &
jn2g0-js2g0+1, ifax, trigs, wk, wk2 )
enddo
call t_stopf('c_u_loop')
#if defined( SPMD )
call t_startf('recv_pkc_wz')
call mp_recv3d_2( src, im, jm, km+1, &
1, im, jfirst-1, jlast+1, kfirst, klast+1, &
1, im, jfirst-1, jfirst-1, kfirst, klast+1, pkc, wz)
call t_stopf('recv_pkc_wz')
call t_startf('send_uc')
call mp_send4d_ns( im, jm, km, 1, jfirst, jlast, &
kfirst, klast, ng_d, ng_d, uc )
call t_stopf('send_uc')
#endif
call t_startf('c_v_pgrad')
!
! Beware k+1 references directly below (AAM)
!
!$omp parallel do private(i, j, k, wk, wk1 )
! pkc and wz need only to be ghosted jfirst-1
do k=kfirst,klast
do j=js1g1,jlast
do i=1,im
wk1(i,j) = pkc(i,j,k+1) - pkc(i,j,k)
enddo
enddo
do j=js2g0,jlast
do i=1,im
vc(i,j,k) = vc(i,j,k) + dtdy5/(wk1(i,j)+wk1(i,j-1)) * &
( (wz(i,j-1,k+1)-wz(i,j,k))*(pkc(i,j,k+1)-pkc(i,j-1,k)) &
+ (wz(i,j-1,k)-wz(i,j,k+1))*(pkc(i,j-1,k+1)-pkc(i,j,k)) )
enddo
enddo
call pft2d
(vc(1,js2g0,k), se(js2g0), de(1,js2g0), im, &
jlast-js2g0+1, ifax, trigs, wk, wk1 )
enddo
call t_stopf('c_v_pgrad')
#if defined( SPMD )
call t_startf('recv_uc')
call mp_recv4d_ns( im, jm, km, 1, jfirst, jlast, &
kfirst, klast, ng_d, ng_d, uc )
call t_stopf('recv_uc')
call t_startf('ghost_vc')
! vc only needs to be ghosted at jlast+1
dest = iam-1
src = iam+1
if ( mod(iam,npr_y) == 0 ) dest = -1
if ( mod(iam+1,npr_y) == 0 ) src = -1
call mp_send3d( dest, src, im, jm, km, &
1, im, jfirst-2, jlast+2, kfirst, klast, &
1, im, jfirst, jfirst, kfirst, klast, vc )
call mp_recv3d( src, im, jm, km, &
1, im, jfirst-2, jlast+2, kfirst, klast, &
1, im, jlast+1, jlast+1, kfirst, klast, vc )
call t_stopf('ghost_vc')
#endif
call t_startf('d_core')
#if defined( NESTED_PAR )
!$omp parallel do private(i, j, k, iord, jord), num_threads(ompouter)
#else
!$omp parallel do private(i, j, k, iord, jord)
#endif
do k=kfirst,klast
if( k <= km/8 ) then
if( k == 1 ) then
iord = 1
jord = 1
else
iord = min(2, iord_d)
jord = min(2, jord_d)
endif
else
iord = iord_d
jord = jord_d
endif
!-----------------------------------------------------------------
! Call the vertical independent part of the dynamics on the D-grid
!-----------------------------------------------------------------
call d_sw
( u(1,jfirst-ng_d,k), v(1,jfirst-ng_s,k), &
uc(1,jfirst-ng_d,k), vc(1,jfirst-2,k), &
pt(1,jfirst-ng_d,k), delp(1,jfirst,k), &
delpf(1,jfirst-ng_d,k), cx3(1,jfirst-ng_d,k), &
cy3(1,jfirst,k), mfx(1,jfirst,k), &
mfy(1,jfirst,k), cdx(js2g0,k), cdy(js2g0,k), &
dtdx, dtdxe, dtxe5, txe5, dyce, rdx, cy, &
dx, f0(jfirst-ng_d), js2g0, jn1g1, im, jm, &
jfirst, jlast, ng_d, ng_s, nq, iord, &
jord, zt_d, rcap, tiny, dtdy, &
dtdy5, tdy5, rdy, cosp, acosp, cose, &
coslon, sinlon, cosl5, sinl5 )
enddo
call t_stopf('d_core')
call t_startf('geopk_cd')
call t_startf('d_geop')
if (geopkd16) then
!
! Stay in yz space and use semi-global z communications and 16-byte reals
call geopk16
(ptop, pe, delp, pkcc, wzc, hs, pt, ng_d, im, jm, km, &
jfirst, jlast, 1, im, cp, akap, kfirst, klast)
!
! Geopk does not need j ghost zones of pkc and wz
!
!$omp parallel do private(i, j, k)
do k = kfirst, klast+1
do j = jfirst, jlast
do i = 1, im
pkc(i,j,k) = pkcc(i,j,k)
wz(i,j,k) = wzc(i,j,k)
enddo
enddo
enddo
else
if (twod_decomp == 1) then
!
! Transpose to xy decomposition
!
#if defined( SPMD )
call mp_sendirr( delp, ijk_yz_to_xy%SendDesc, &
ijk_yz_to_xy%RecvDesc, delpxy )
!$omp parallel do private(i,j,k)
do k=kfirst,klast
do j=jfirst,jlast
do i=1,im
yzt(i,j,k) = pt(i,j,k)
enddo
enddo
enddo
call mp_recvirr( delpxy, ijk_yz_to_xy%RecvDesc )
call mp_sendirr( yzt, ijk_yz_to_xy%SendDesc, &
ijk_yz_to_xy%RecvDesc, ptxy )
call mp_recvirr( ptxy, ijk_yz_to_xy%RecvDesc )
#endif
else
!$omp parallel do private(i,j,k)
do k=kfirst,klast
do j=jfirst,jlast
do i=1,im
delpxy(i,j,k) = delp(i,j,k)
ptxy(i,j,k) = pt(i,j,k)
enddo
enddo
enddo
endif
call geopk
(ptop, pexy, delpxy, pkxy, wzxy, hsxy, ptxy, 0, im, jm, km, &
jfirstxy, jlastxy, ifirstxy, ilastxy, &
cp, akap, nx, ipe)
if (twod_decomp == 1) then
!
! Transpose back to yz decomposition
! Z edge ghost points (klast+1) are automatically filled in
! pexy is output quantity on last small timestep
!
#if defined( SPMD )
call mp_sendirr(pkxy, pkxy_to_pkc%SendDesc, &
pkxy_to_pkc%RecvDesc, pkkp)
call mp_recvirr(pkkp, pkxy_to_pkc%RecvDesc )
call mp_sendirr(wzxy, pkxy_to_pkc%SendDesc, &
pkxy_to_pkc%RecvDesc, wzkp)
!$omp parallel do private(i, j, k)
do k = kfirst, klast+1
do j = jfirst, jlast
do i = 1, im
pkc(i,j,k) = pkkp(i,j,k)
enddo
enddo
enddo
call mp_recvirr(wzkp, pkxy_to_pkc%RecvDesc )
!$omp parallel do private(i, j, k)
do k = kfirst, klast+1
do j = jfirst, jlast
do i = 1, im
wz(i,j,k) = wzkp(i,j,k)
enddo
enddo
enddo
#endif
else
!$omp parallel do private(i, j, k)
do k = kfirst, klast+1
do j = jfirst, jlast
do i = 1, im
pkc(i,j,k) = pkxy(i,j,k)
wz(i,j,k) = wzxy(i,j,k)
enddo
enddo
enddo
endif
endif ! geopkd16
!
! Upon exit from geopk, the quantities pe, pkc and wz will have been
! updated at klast+1
call t_stopf('d_geop')
call t_stopf('geopk_cd')
#if defined( SPMD )
! Exchange boundary regions on north and south for pkc and wz
call t_startf('send_pkc_wz')
call mp_send2_ns( im, jm, km+1, jfirst, jlast, &
kfirst, klast+1, 1, pkc, wz)
call t_stopf('send_pkc_wz')
#endif
if ( ipe /= 1 ) then ! not the last call
!
! Perform some work while sending data on the way
!
!$omp parallel do private(i, j, k, wk, wk2)
do k=kfirst,klast
do j=jfirst,jlast
do i=1,im
delpf(i,j,k) = delp(i,j,k)
enddo
enddo
call pft2d
( delpf(1,js2g0,k), sc(js2g0), dc(1,js2g0), &
im, jn2g0-js2g0+1, ifax, trigs, wk, wk2 )
enddo
else
! Last call
!$omp parallel do private(i, j, k)
do k=kfirst,klast+1
do j=jfirst,jlast
do i=1,im
pk(i,j,k) = pkc(i,j,k)
enddo
enddo
enddo
endif
#if defined( SPMD )
call t_startf('recv_pkc_wz')
call mp_recv2_ns( im, jm, km+1, jfirst, jlast, &
kfirst, klast+1, 1, pkc, wz)
call t_stopf('recv_pkc_wz')
if ( ipe /= 1 ) then ! not the last call
call t_startf('send_delpf')
call mp_send4d_ns( im, jm, km, 1, jfirst, jlast, &
kfirst, klast, ng_d, ng_d, delpf )
call t_stopf('send_delpf')
endif
#endif
!
! Beware k+1 references directly below (AAM)
!
!$omp parallel do private(i, j, k)
do k=kfirst,klast
do j=js1g1,jn1g1 ! dpt needed NS
do i=1,im ! wz, pkc ghosted NS
dpt(i,j,k)=(wz(i,j,k+1)+wz(i,j,k))*(pkc(i,j,k+1)-pkc(i,j,k))
enddo
enddo
enddo
! GHOSTING: wz (input) NS ; pkc (input) NS
call t_startf('d-pgrad')
!$omp parallel do private(i, j, k, wk3, wk1)
do 4500 k=kfirst,klast+1
if (k == 1) then
do j=js2g0,jlast
do i=1,im
wz3(i,j,1) = 0.
wz(i,j,1) = 0.
enddo
enddo
pk4 = 4.*ptop**akap
do j=js2g0,jn1g1
do i=1,im
pkc(i,j,1) = pk4
enddo
enddo
go to 4500
endif
do j=js2g1,jn2g0 ! wk3 needed S
wk3(1,j) = (wz(1,j,k)+wz(im,j,k)) * &
(pkc(1,j,k)-pkc(im,j,k))
do i=2,im
wk3(i,j) = (wz(i,j,k)+wz(i-1,j,k)) * &
(pkc(i,j,k)-pkc(i-1,j,k))
enddo
enddo
do j=js2g1,jn2g0
do i=1,im-1
wk1(i,j) = wk3(i,j) + wk3(i+1,j)
enddo
wk1(im,j) = wk3(im,j) + wk3(1,j) ! wk3 ghosted S
enddo
if ( jfirst == 1 ) then
do i=1,im
wk1(i, 1) = 0.
enddo
endif
if ( jlast == jm ) then
do i=1,im
wk1(i,jm) = 0.
enddo
endif
do j=js2g0,jlast ! wk1 ghosted S
do i=1,im
wz3(i,j,k) = wk1(i,j) + wk1(i,j-1)
enddo
enddo
! N-S walls
do j=js2g0,jn1g1 ! wk1 needed N
do i=1,im ! wz, pkc ghosted NS
wk1(i,j) = (wz(i,j,k)+wz(i,j-1,k))*(pkc(i,j,k)-pkc(i,j-1,k))
enddo
enddo
do j=js2g0,jn1g1 ! wk3 needed N
wk3(1,j) = wk1(1,j) + wk1(im,j) ! wk1 ghosted N
do i=2,im
wk3(i,j) = wk1(i,j) + wk1(i-1,j) ! wk1 ghosted N
enddo
enddo
do j=js2g0,jn2g0
do i=1,im
wz(i,j,k) = wk3(i,j) + wk3(i,j+1) ! wk3 ghosted N
enddo
enddo
do j=js1g1,jn1g1
wk1(1,j) = pkc(1,j,k) + pkc(im,j,k)
do i=2,im
wk1(i,j) = pkc(i,j,k) + pkc(i-1,j,k)
enddo
enddo
do j=js2g0,jn1g1
do i=1,im
pkc(i,j,k) = wk1(i,j) + wk1(i,j-1)
enddo
enddo
4500 continue
! GHOSTING: dpt (loop 4000) NS ; pkc (loop 4500) N
!
! Beware k+1 references directly below (AAM)
!
!$omp parallel do private(i, j, k, wk, wk1, wk2, wk3)
do 6000 k=kfirst,klast
do j=js1g1,jn1g1
wk1(1,j) = dpt(1,j,k) + dpt(im,j,k)
do i=2,im
wk1(i,j) = dpt(i,j,k) + dpt(i-1,j,k)
enddo
enddo
do j=js2g0,jn1g1
do i=1,im
wk2(i,j) = wk1(i,j) + wk1(i,j-1)
wk(i,j) = pkc(i,j,k+1) - pkc(i,j,k)
enddo
enddo
do j=js2g0,jlast
do i=1,im-1
wk3(i,j) = uc(i,j,k) + dtdxe(j)/(wk(i,j) + wk(i+1,j)) &
* (wk2(i,j)-wk2(i+1,j)+wz3(i,j,k+1)-wz3(i,j,k))
enddo
wk3(im,j) = uc(im,j,k) + dtdxe(j)/(wk(im,j) + wk(1,j)) &
* (wk2(im,j)-wk2(1,j)+wz3(im,j,k+1)-wz3(im,j,k))
enddo
do j=js2g0,jn2g0 ! Assumes wk2 ghosted on N
do i=1,im
wk1(i,j) = vc(i,j,k) + dtdy/(wk(i,j)+wk(i,j+1)) * &
(wk2(i,j)-wk2(i,j+1)+wz(i,j,k+1)-wz(i,j,k))
enddo
enddo
#if ( !defined ALT_PFT )
call pft2d
( wk3(1,js2g0), se(js2g0), de(1,js2g0), im, &
jlast-js2g0+1, ifax, trigs, wk, wk2 )
call pft2d
( wk1(1,js2g0), sc(js2g0), dc(1,js2g0), im, &
jn2g0-js2g0+1, ifax, trigs, wk, wk2 )
#endif
do j=js2g0,jn2g0
do i=1,im
v(i,j,k) = v(i,j,k) + wk1(i,j)
u(i,j,k) = u(i,j,k) + wk3(i,j)
enddo
enddo
if ( jlast == jm ) then
do i=1,im
u(i,jlast,k) = u(i,jlast,k) + wk3(i,jlast)
enddo
endif
#if defined ( ALT_PFT )
call pft2d
( u(1,js2g0,k), se(js2g0), de(1,js2g0), im, &
jlast-js2g0+1, ifax, trigs, wk, wk2 )
call pft2d
( v(1,js2g0,k), sc(js2g0), dc(1,js2g0), im, &
jn2g0-js2g0+1, ifax, trigs, wk, wk2 )
#endif
6000 continue
call t_stopf('d-pgrad')
#if defined( SPMD )
if ( ipe /= 1 ) then
call t_startf('recv_delpf')
call mp_recv4d_ns( im, jm, km, 1, jfirst, jlast, &
kfirst, klast, ng_d, ng_d, delpf )
call t_stopf('recv_delpf')
endif
#endif
return
!EOC
end
!-----------------------------------------------------------------------