#include <misc.h>
module dp_coupling
!BOP
!
! !MODULE: dp_coupling --- dynamics-physics coupling module
!
   use shr_kind_mod, only: r8 => shr_kind_r8
   use rgrid,         only: nlon
   use pmgrid,        only: plon, plat, plev, plevp, twod_decomp, iam, &
                            beglat, endlat, beglev, endlev, &
                            beglatxy, endlatxy, beglonxy, endlonxy
   use ppgrid,        only: pcols, pver
   use phys_grid
   use phys_buffer,   only: pbuf_fld, pbuf_size_max
   use physics_types, only: physics_state, physics_tend
   use constituents,  only: ppcnst, qmin
   use physconst,     only: cpair, gravit, rair, zvir
   use geopotential,  only: geopotential_t
   use check_energy,  only: check_energy_timestep_init
!+hi-waccm
#ifdef WACCM_TIPHYS
    use constituents,  only: cnst_get_ind, cnst_mw
    use shr_const_mod, only: shr_const_rgas
#endif
!-hi-waccm
!
! !PUBLIC MEMBER FUNCTIONS:
      PUBLIC d_p_coupling, p_d_coupling

!
! !DESCRIPTION:
!
!      This module provides 
!
!      \begin{tabular}{|l|l|} \hline \hline
!        d\_p\_coupling    &  dynamics output to physics input \\ \hline
!        p\_d\_coupling    &  physics output to dynamics input \\ \hline 
!                                \hline
!      \end{tabular}
!
! !REVISION HISTORY:
!   00.06.01   Boville    Creation
!   01.10.01   Lin        Various revisions
!   01.03.26   Sawyer     Added ProTeX documentation
!   01.06.27   Mirin      Separate noncoupling coding into new routines
!   01.07.13   Mirin      Some support for multi-2D decompositions
!   02.03.01   Worley     Support for nontrivial physics remapping
!   03.03.28   Boville    set all physics_state elements, add check_energy_timestep_init
!   03.08.13   Sawyer     Removed ghost N1 region in u3sxy
!
!EOP
!-----------------------------------------------------------------------

CONTAINS

!-----------------------------------------------------------------------
!BOP
! !IROUTINE: d_p_coupling --- convert dynamics output to physics input
!
! !INTERFACE: 
  subroutine d_p_coupling(ps,   u3s,   v3s,  pt,    coslon,  sinlon,     &
                          q3,   omga, phis,  pe,    peln,    pk,         &
                          pkz,  phys_state,  phys_tend, pbuf, full_phys, &
                          psxy,  u3sxy,       v3sxy,   ptxy,             &
                          q3xy, omgaxy,phisxy,      pexy,  pelnxy,       &
                          pkxy, pkzxy )

! !USES:
    use dynamics_vars, only: ng_d, ng_s
    use constituents,  only: cnst_get_type_byind
    use physics_types, only: set_state_pdry, set_wet_to_dry
    use abortutils,    only: endrun

#if defined (SPMD)
    use mpishorthand, only : mpicom
    use spmd_dyn,     only : ijk_xy_to_yz, local_dp_map
    use spmd_dyn,     only : block_buf_nrecs, chunk_buf_nrecs
    use parutilitiesmodule, only: sumop, parcollective
    use mod_comm,           only: mp_sendirr, mp_recvirr
#endif
#if ( defined WACCM_GHG || defined WACCM_MOZART )
    use ctem, only: ctem_driver, ctem_output
#endif
!-----------------------------------------------------------------------
    implicit none
!-----------------------------------------------------------------------
! !INPUT PARAMETERS:
!
    real(r8), intent(in) :: ps (plon, beglat:endlat)                      ! surface pressure
    real(r8), intent(inout) :: u3s(plon, beglat-ng_d:endlat+ng_s, beglev:endlev)    ! u-wind on d-grid
    real(r8), intent(in) :: v3s(plon, beglat-ng_s:endlat+ng_d, beglev:endlev)       ! v-wind on d-grid
    real(r8), intent(in) :: pt (plon, beglat-ng_d:endlat+ng_d, beglev:endlev)       ! Virtual pot temp
    real(r8), intent(in) :: q3 (plon, beglat-ng_d:endlat+ng_d, beglev:endlev, ppcnst) ! constituents
    real(r8), intent(in) :: omga(plon, beglev:endlev, beglat:endlat)      ! vertical velocity
    real(r8), intent(in) :: phis(plon, beglat:endlat)                     ! surface geopotential
    real(r8), intent(in) :: pe  (plon, beglev:endlev+1, beglat:endlat)    ! this fv's pint
    real(r8), intent(in) :: peln(plon, beglev:endlev+1, beglat:endlat)    ! log(pe)
    real(r8), intent(in) :: pk  (plon, beglat:endlat, beglev:endlev+1)    ! pe**cappa
    real(r8), intent(in) :: pkz (plon, beglat:endlat, beglev:endlev)      ! f-v mean of pk
    real(r8), intent(in) :: coslon(plon)                                  ! cosine of longitude
    real(r8), intent(in) :: sinlon(plon)                                  ! sin of longitudes
    logical,  intent(in) :: full_phys

! xy-decomposed instanciations below:
    real(r8), intent(in) :: psxy (beglonxy:endlonxy, beglatxy:endlatxy)                      ! surface pressure
    real(r8), intent(in) :: u3sxy(beglonxy:endlonxy, beglatxy:endlatxy, plev)       ! u-wind on d-grid
    real(r8), intent(in) :: v3sxy(beglonxy:endlonxy, beglatxy:endlatxy, plev)       ! v-wind on d-grid
    real(r8), intent(in) :: ptxy (beglonxy:endlonxy, beglatxy:endlatxy, plev)       ! Virtual pot temp
    real(r8), target, intent(in) :: q3xy (beglonxy:endlonxy, beglatxy:endlatxy, plev, ppcnst) ! constituents
    real(r8), intent(in) :: omgaxy(beglonxy:endlonxy, plev, beglatxy:endlatxy)      ! vertical velocity
    real(r8), intent(in) :: phisxy(beglonxy:endlonxy, beglatxy:endlatxy)            ! surface geopotential
    real(r8), intent(in) :: pexy  (beglonxy:endlonxy, plevp, beglatxy:endlatxy)     ! this fv's pint
    real(r8), intent(in) :: pelnxy(beglonxy:endlonxy, plevp, beglatxy:endlatxy)     ! log(pe)
    real(r8), intent(in) :: pkxy  (beglonxy:endlonxy, beglatxy:endlatxy, plevp)     ! pe**cappa
    real(r8), intent(in) :: pkzxy (beglonxy:endlonxy, beglatxy:endlatxy, plev)      ! f-v mean of pk

! !OUTPUT PARAMETERS:

    type(physics_state), intent(out), dimension(begchunk:endchunk) :: phys_state
    type(physics_tend ), intent(out), dimension(begchunk:endchunk) :: phys_tend
    type(pbuf_fld),    intent(inout), dimension(pbuf_size_max)     :: pbuf

! !DESCRIPTION:
!
!   Coupler for converting dynamics output variables into physics 
!   input variables
!
! !REVISION HISTORY:
!   00.06.01   Boville    Creation
!   01.07.13   AAM        Some support for multi-2D decompositions
!   02.03.01   Worley     Support for nontrivial physics remapping
!   02.05.02   Sawyer     u3s made inout due to ghosting in d2a3dikj
!   03.08.05   Sawyer     Removed pe11k, pe11kln (for defunct Rayl fric)
!   04.08.29   Eaton      Added lat, lon coords to physics_state type
!
!EOP
!-----------------------------------------------------------------------
!BOC
! !LOCAL VARIABLES:
    integer :: i,ib,j,k,m,lchnk      ! indices
    integer :: ic, jc
    integer :: boff
    integer :: ncol                  ! number of columns in current chunk
    integer :: lats(pcols)           ! array of latitude indices
    integer :: lons(pcols)           ! array of longitude indices
    integer :: blksiz                ! number of columns in 2D block
    integer :: tsize                 ! amount of data per grid point passed to physics
    integer :: astat
#if ( defined WACCM_GHG || defined WACCM_MOZART )
    integer :: ip_gm1(beglonxy:endlonxy,beglatxy:endlatxy)
    integer :: ip_gm1_chnk(pcols)
#endif
#ifdef WACCM_TIPHYS
!+hi-waccm
    integer :: ixo, ixo2, ixh, ixh2, ixn
    real(r8):: mmro, mmro2, mmrh, mmrn2
    real(r8):: mbarvi, tint, cpairvi
!-hi-waccm
#endif
    integer, allocatable :: bpter(:,:) ! offsets into block buffer for packing data
    integer :: cpter(pcols,0:pver)     ! offsets into chunk buffer for unpacking data

    real(r8) :: rlat(pcols)          ! array of latitudes (radians)
    real(r8) :: rlon(pcols)          ! array of longitudes (radians)
    real(r8) :: qmavl                ! available q at level pver-1
    real(r8) :: dqreq                ! q change at pver-1 required to remove q<qmin at pver
    real(r8) :: qbot                 ! bottom level q before change
    real(r8) :: qbotm1               ! bottom-1 level q before change
    real(r8) :: pic(pcols)           ! ps**cappa
#if ( defined WACCM_GHG || defined WACCM_MOZART )
    real(r8) :: vth(plevp,beglatxy:endlatxy)
    real(r8) :: wth(plevp,beglatxy:endlatxy)
    real(r8) :: uv(plevp,beglatxy:endlatxy)
    real(r8) :: uw(plevp,beglatxy:endlatxy)
    real(r8) :: uzm(plev,beglatxy:endlatxy)
    real(r8) :: u2d(plevp,beglatxy:endlatxy)
    real(r8) :: v2d(plevp,beglatxy:endlatxy)
    real(r8) :: th2d(plevp,beglatxy:endlatxy)
    real(r8) :: w2d(plevp,beglatxy:endlatxy)
    real(r8) :: thi(beglonxy:endlonxy,plevp,beglatxy:endlatxy)
    real(r8) :: vth_chnk(pcols,plevp)
    real(r8) :: wth_chnk(pcols,plevp)
    real(r8) :: uv_chnk(pcols,plevp)
    real(r8) :: uw_chnk(pcols,plevp)
    real(r8) :: u2d_chnk(pcols,plevp)
    real(r8) :: v2d_chnk(pcols,plevp)
    real(r8) :: th2d_chnk(pcols,plevp)
    real(r8) :: w2d_chnk(pcols,plevp)
    real(r8) :: thi_chnk(pcols,plevp)
#endif
    real(r8), allocatable :: u3(:, :, :)       ! u-wind on a-grid
    real(r8), allocatable :: v3(:, :, :)       ! v-wind on a-grid
    real(r8), allocatable :: bbuffer(:)        ! transpose buffer
    real(r8), allocatable :: cbuffer(:)        ! transpose buffer
#if (! defined SPMD)
    integer  :: block_buf_nrecs = 0
    integer  :: chunk_buf_nrecs = 0
    logical  :: local_dp_map=.true. 
#endif
!---------------------------End Local workspace-------------------------

!-----------------------------------------------------------------------
! Transform dynamics staggered winds to physics grid (D=>A)
!-----------------------------------------------------------------------
    allocate( u3(beglonxy:endlonxy,plev,beglatxy:endlatxy), &
              v3(beglonxy:endlonxy,plev,beglatxy:endlatxy),stat=astat )
    if( astat /= 0 ) then
       write(*,*) 'd_p_coupling: failed to allocate u3,v3; error = ',astat
       call endrun
    end if

    call d2a3dikj(u3sxy,  v3sxy,   u3,    v3,   plon,  plat, plev,    &
                  beglatxy, endlatxy,  0, 0, 0, 0, 0,                 &
                  beglonxy, endlonxy,    coslon,      sinlon)

#if ( defined WACCM_GHG || defined WACCM_MOZART )
    call t_startf('DP_CPLN: ctem')
    call ctem_driver( u3, v3, omgaxy, ptxy, q3xy(:,:,:,1), &
                      vth, wth, uv, uw, uzm, u2d, &
                      v2d, th2d, w2d, ip_gm1, thi, &
                      psxy, pexy, beglatxy, endlatxy, beglonxy, endlonxy )
    call t_stopf('DP_CPLN: ctem')
#endif
         
!-----------------------------------------------------------------------
! set chunk id, number of columns, and coordinates
!-----------------------------------------------------------------------
    do lchnk = begchunk,endchunk
       ncol = get_ncols_p(lchnk)
       call get_rlon_all_p(lchnk, ncol, rlon)
       call get_rlat_all_p(lchnk, ncol, rlat)
       phys_state(lchnk)%ncol  = ncol
       phys_state(lchnk)%lchnk = lchnk
       do i=1,ncol
          phys_state(lchnk)%lat(i) = rlat(i)
          phys_state(lchnk)%lon(i) = rlon(i)
       end do
    end do

!-----------------------------------------------------------------------
! Copy data from dynamics data structure to physics data structure
!-----------------------------------------------------------------------
has_local_map : &
    if (local_dp_map) then

#if ( defined WACCM_GHG || defined WACCM_MOZART )
!$omp parallel do private (lchnk, ncol, i, k, m, ic, jc, lons, lats, pic, ip_gm1_chnk, vth_chnk, wth_chnk, uv_chnk, uw_chnk, u2d_chnk, v2d_chnk, th2d_chnk, w2d_chnk, thi_chnk)
#else
!$omp parallel do private (lchnk, ncol, i, k, m, ic, jc, lons, lats, pic)
#endif
chnk_loop1 : &
       do lchnk = begchunk,endchunk
          ncol = phys_state(lchnk)%ncol
          call get_lon_all_p( lchnk, ncol, lons )
          call get_lat_all_p( lchnk, ncol, lats )

          do i=1,ncol
             ic = lons(i)
             jc = lats(i)
             phys_state(lchnk)%ps(i)   = psxy(ic,jc)
             phys_state(lchnk)%phis(i) = phisxy(ic,jc)
             pic(i)                    = pkxy(ic,jc,pver+1)
#if ( defined WACCM_GHG || defined WACCM_MOZART )
             ip_gm1_chnk(i)            = ip_gm1(ic,jc)
             vth_chnk(i,pver+1)        = vth(pver+1,jc)
             wth_chnk(i,pver+1)        = wth(pver+1,jc)
             uv_chnk(i,pver+1)         = uv(pver+1,jc)
             uw_chnk(i,pver+1)         = uw(pver+1,jc)
             u2d_chnk(i,pver+1)        = u2d(pver+1,jc)
             v2d_chnk(i,pver+1)        = v2d(pver+1,jc)
             th2d_chnk(i,pver+1)       = th2d(pver+1,jc)
             w2d_chnk(i,pver+1)        = w2d(pver+1,jc)
             thi_chnk(i,pver+1)        = thi(ic,pver+1,jc)
#endif
          enddo

          do k=1,plev
             do i=1,ncol
                ic = lons(i)
                jc = lats(i)
                phys_state(lchnk)%u    (i,k) = u3(ic,k,jc)
                phys_state(lchnk)%v    (i,k) = v3(ic,k,jc)
                phys_state(lchnk)%omega(i,k) = omgaxy(ic,k,jc)
#if ( defined WACCM_GHG || defined WACCM_MOZART )
                vth_chnk(i,k)                = vth(k,jc)
                wth_chnk(i,k)                = wth(k,jc)
                uv_chnk(i,k)                 = uv(k,jc)
                uw_chnk(i,k)                 = uw(k,jc)
                phys_state(lchnk)%uzm(i,k)   = uzm(k,jc)
                u2d_chnk(i,k)                = u2d(k,jc)
                v2d_chnk(i,k)                = v2d(k,jc)
                th2d_chnk(i,k)               = th2d(k,jc)
                w2d_chnk(i,k)                = w2d(k,jc)
                thi_chnk(i,k)                = thi(ic,k,jc)
#endif
                if (full_phys) then
                   phys_state(lchnk)%t    (i,k) = ptxy(ic,jc,k) / (1. + zvir*q3xy(ic,jc,k,1))
                   phys_state(lchnk)%exner(i,k) = pic(i) / pkzxy(ic,jc,k) 
                else
                   phys_state(lchnk)%t    (i,k) = ptxy(ic,jc,k) * pkzxy(ic,jc,k)
                end if
             end do
          end do

          do k=1,plevp
             do i=1,ncol
!
! edge-level pressure arrays: copy from the arrays computed by dynpkg
!
                phys_state(lchnk)%pint  (i,k) = pexy  (lons(i),k,lats(i))
                phys_state(lchnk)%lnpint(i,k) = pelnxy(lons(i),k,lats(i))
             end do
          end do

!
! Copy constituents
! Dry types converted from moist to dry m.r. at bottom of this routine
!
          do m=1,ppcnst
             do k=1,plev
                do i=1,ncol
                   phys_state(lchnk)%q(i,k,m) = q3xy(lons(i),lats(i),k,m)
                end do
             end do
          end do
          
#if ( defined WACCM_GHG || defined WACCM_MOZART )
          call ctem_output( lchnk, ncol, lons, ip_gm1_chnk, vth_chnk, &
                            wth_chnk, uv_chnk, uw_chnk, u2d_chnk, v2d_chnk, &
                            th2d_chnk, w2d_chnk, thi_chnk ) 
#endif
       end do chnk_loop1

    else has_local_map

#if ( defined WACCM_GHG || defined WACCM_MOZART )
       tsize = 17 + ppcnst
       boff  = 16
#else
       tsize = 7 + ppcnst
       boff  = 6
#endif
 
       blksiz = (endlatxy-beglatxy+1)*(endlonxy-beglonxy+1)
       allocate( bpter(blksiz,0:plev),stat=astat )
       if( astat /= 0 ) then
          write(*,*) 'd_p_coupling: failed to allocate bpter; error = ',astat
          call endrun
       end if
       allocate( bbuffer(tsize*block_buf_nrecs),stat=astat )
       if( astat /= 0 ) then
          write(*,*) 'd_p_coupling: failed to allocate bbuffer; error = ',astat
          call endrun
       end if
       allocate( cbuffer(tsize*chunk_buf_nrecs),stat=astat )
       if( astat /= 0 ) then
          write(*,*) 'd_p_coupling: failed to allocate cbuffer; error = ',astat
          call endrun
       end if

       call block_to_chunk_send_pters( iam+1, blksiz, plevp, tsize, bpter )

       ib = 0
       do j=beglatxy,endlatxy
          do i=beglonxy,endlonxy
             ib = ib + 1
             bbuffer(bpter(ib,0))   = pexy(i,plevp,j)
             bbuffer(bpter(ib,0)+1) = pelnxy(i,plevp,j)
             bbuffer(bpter(ib,0)+2) = psxy(i,j)
             bbuffer(bpter(ib,0)+3) = phisxy(i,j)
#if ( defined WACCM_GHG || defined WACCM_MOZART )
             bbuffer(bpter(ib,0)+4) = ip_gm1(i,j)
             bbuffer(bpter(ib,0)+7) = vth(plevp,j)
             bbuffer(bpter(ib,0)+8) = wth(plevp,j)
             bbuffer(bpter(ib,0)+9) = uv(plevp,j)
             bbuffer(bpter(ib,0)+10) = uw(plevp,j)
             bbuffer(bpter(ib,0)+11) = u2d(plevp,j)
             bbuffer(bpter(ib,0)+12) = v2d(plevp,j)
             bbuffer(bpter(ib,0)+13) = th2d(plevp,j)
             bbuffer(bpter(ib,0)+14) = w2d(plevp,j)
             bbuffer(bpter(ib,0)+15) = thi(i,plevp,j)
#endif

             do k=1,plev
                bbuffer(bpter(ib,k))   = pexy(i,k,j)
                bbuffer(bpter(ib,k)+1) = pelnxy(i,k,j)
                bbuffer(bpter(ib,k)+2) = u3    (i,k,j)
                bbuffer(bpter(ib,k)+3) = v3    (i,k,j)
                bbuffer(bpter(ib,k)+4) = omgaxy(i,k,j)
                if (full_phys) then
                   bbuffer(bpter(ib,k)+5) = ptxy(i,j,k) / (1. + zvir*q3xy(i,j,k,1))
                   bbuffer(bpter(ib,k)+6) = pkxy(i,j,pver+1) / pkzxy(i,j,k) 
                else
                   bbuffer(bpter(ib,k)+6) = ptxy(i,j,k) * pkzxy(i,j,k)
                end if
#if ( defined WACCM_GHG || defined WACCM_MOZART )
                bbuffer(bpter(ib,k)+7)  = vth(k,j)
                bbuffer(bpter(ib,k)+8)  = wth(k,j)
                bbuffer(bpter(ib,k)+9)  = uv(k,j)
                bbuffer(bpter(ib,k)+10) = uw(k,j)
                bbuffer(bpter(ib,k)+11) = u2d(k,j)
                bbuffer(bpter(ib,k)+12) = v2d(k,j)
                bbuffer(bpter(ib,k)+13) = th2d(k,j)
                bbuffer(bpter(ib,k)+14) = w2d(k,j)
                bbuffer(bpter(ib,k)+15) = thi(i,k,j)
                bbuffer(bpter(ib,k)+16) = uzm(k,j)
#endif

                do m=1,ppcnst
                   bbuffer(bpter(ib,k)+boff+m) = q3xy(i,j,k,m)
                end do
             end do
          end do
       end do

       call transpose_block_to_chunk( tsize, bbuffer, cbuffer )

chnk_loop2 : &
       do lchnk = begchunk,endchunk
          ncol = phys_state(lchnk)%ncol
#if ( defined WACCM_GHG || defined WACCM_MOZART )
          call get_lon_all_p( lchnk, ncol, lons )
#endif

          call block_to_chunk_recv_pters( lchnk, pcols, pver+1, tsize, cpter )

          do i=1,ncol
             phys_state(lchnk)%pint  (i,pver+1) = cbuffer(cpter(i,0))
             phys_state(lchnk)%lnpint(i,pver+1) = cbuffer(cpter(i,0)+1)
             phys_state(lchnk)%ps(i)            = cbuffer(cpter(i,0)+2)
             phys_state(lchnk)%phis(i)          = cbuffer(cpter(i,0)+3)
#if ( defined WACCM_GHG || defined WACCM_MOZART )
             ip_gm1_chnk(i)                     = cbuffer(cpter(i,0)+4)
             vth_chnk(i,pver+1)                 = cbuffer(cpter(i,0)+7)
             wth_chnk(i,pver+1)                 = cbuffer(cpter(i,0)+8)
             uv_chnk(i,pver+1)                  = cbuffer(cpter(i,0)+9)
             uw_chnk(i,pver+1)                  = cbuffer(cpter(i,0)+10)
             u2d_chnk(i,pver+1)                 = cbuffer(cpter(i,0)+11)
             v2d_chnk(i,pver+1)                 = cbuffer(cpter(i,0)+12)
             th2d_chnk(i,pver+1)                = cbuffer(cpter(i,0)+13)
             w2d_chnk(i,pver+1)                 = cbuffer(cpter(i,0)+14)
             thi_chnk(i,pver+1)                 = cbuffer(cpter(i,0)+15)
#endif

             do k=1,plev
                phys_state(lchnk)%pint  (i,k) = cbuffer(cpter(i,k))
                phys_state(lchnk)%lnpint(i,k) = cbuffer(cpter(i,k)+1)
                phys_state(lchnk)%u     (i,k) = cbuffer(cpter(i,k)+2)
                phys_state(lchnk)%v     (i,k) = cbuffer(cpter(i,k)+3)
                phys_state(lchnk)%omega (i,k) = cbuffer(cpter(i,k)+4)
                if (full_phys) then
                   phys_state(lchnk)%t    (i,k) = cbuffer(cpter(i,k)+5)
                   phys_state(lchnk)%exner(i,k) = cbuffer(cpter(i,k)+6)
                else
                   phys_state(lchnk)%t    (i,k) = cbuffer(cpter(i,k)+6)
                end if
#if ( defined WACCM_GHG || defined WACCM_MOZART )
                vth_chnk(i,k)                   = cbuffer(cpter(i,k)+7)
                wth_chnk(i,k)                   = cbuffer(cpter(i,k)+8)
                uv_chnk(i,k)                    = cbuffer(cpter(i,k)+9)
                uw_chnk(i,k)                    = cbuffer(cpter(i,k)+10)
                u2d_chnk(i,k)                   = cbuffer(cpter(i,k)+11)
                v2d_chnk(i,k)                   = cbuffer(cpter(i,k)+12)
                th2d_chnk(i,k)                  = cbuffer(cpter(i,k)+13)
                w2d_chnk(i,k)                   = cbuffer(cpter(i,k)+14)
                thi_chnk(i,k)                   = cbuffer(cpter(i,k)+15)
                phys_state(lchnk)%uzm(i,k)      = cbuffer(cpter(i,k)+16)
#endif
                
                ! dry type constituents converted from moist to dry at bottom of routine
                do m=1,ppcnst
                   phys_state(lchnk)%q(i,k,m) = cbuffer(cpter(i,k)+boff+m)
                end do

             end do
          end do
#if ( defined WACCM_GHG || defined WACCM_MOZART )
          call ctem_output( lchnk, ncol, lons, ip_gm1_chnk, vth_chnk, &
                            wth_chnk, uv_chnk, uw_chnk, u2d_chnk, v2d_chnk, &
                            th2d_chnk, w2d_chnk, thi_chnk ) 
#endif

       end do chnk_loop2

       deallocate(bpter)
       deallocate(bbuffer)
       deallocate(cbuffer)

    endif has_local_map

!
! Evaluate derived quantities
!
!+hi-waccm
#ifdef WACCM_TIPHYS
    call cnst_get_ind('O', ixo)
    call cnst_get_ind('O2', ixo2)
    call cnst_get_ind('H', ixh)
    call cnst_get_ind('H2', ixh2)
    call cnst_get_ind('N', ixn)
#endif
!-hi-waccm
    do lchnk = begchunk,endchunk
       ncol = phys_state(lchnk)%ncol
       do k=1,plev
          do i=1,ncol
             phys_state(lchnk)%pdel (i,k) = phys_state(lchnk)%pint(i,k+1) - phys_state(lchnk)%pint(i,k)
             phys_state(lchnk)%rpdel(i,k) = 1./phys_state(lchnk)%pdel(i,k)
             phys_state(lchnk)%pmid (i,k) = 0.5*(phys_state(lchnk)%pint(i,k) + phys_state(lchnk)%pint(i,k+1))
             phys_state(lchnk)%lnpmid(i,k) = log(phys_state(lchnk)%pmid(i,k))
          end do
       end do

! Attempt to remove negative constituents in bottom layer only by moving from next level
! This is a BAB kludge to avoid masses of warning messages for cloud water and ice, since
! the vertical remapping operator currently being used for cam is not strictly monotonic 
! at the endpoints.
       do m=1,ppcnst
          do i=1,ncol
             if (phys_state(lchnk)%q(i,pver,m) < qmin(m)) then
! available q in 2nd level
                qmavl = phys_state(lchnk)%q (i,pver-1,m) - qmin(m)
! required q change in bottom level rescaled to mass fraction in 2nd level
                dqreq = (qmin(m) - phys_state(lchnk)%q(i,pver,m))                         &
                      * phys_state(lchnk)%pdel(i,pver) / phys_state(lchnk)%pdel(i,pver-1)
                qbot   = phys_state(lchnk)%q(i,pver  ,m)
                qbotm1 = phys_state(lchnk)%q(i,pver-1,m)
                if (dqreq < qmavl) then
                   phys_state(lchnk)%q(i,pver  ,m) = qmin(m)
                   phys_state(lchnk)%q(i,pver-1,m) = phys_state(lchnk)%q(i,pver-1,m) - dqreq
                   if (dqreq>1.e-14 .and. iam .eq. 0) write (6,*) 'dpcoup dqreq', m, lchnk, i, qbot, qbotm1, dqreq
                else 
                   if (iam .eq. 0) write (6,*) 'dpcoup cant adjust', m, lchnk, i, qbot, qbotm1, dqreq
                end if
             end if
          end do
       end do
!+hi-waccm

#ifdef WACCM_TIPHYS

! Check for unusually large H, H2 values and issue message if found
!
       do k=1,pver
          do i=1,ncol
             if(phys_state(lchnk)%q(i,k,ixh) .gt. 0.01) then
                write(*,*)'Large H (dp_couple)',i,k,phys_state(lchnk)%q(i,k,ixh)
             endif
             if(phys_state(lchnk)%q(i,k,ixh2) .gt. 6.e-5) then
                write(*,*)'Large H2 (dp_couple)',i,k,phys_state(lchnk)%q(i,k,ixh2)
             endif
          end do
       end do
       
#endif

!-hi-waccm
!---------------------------------------------

!+hi-waccm
#ifdef WACCM_TIPHYS
! compute cpairv, rairv, mbarv, and cappav
       do k=1,pver
          do i=1,ncol
             mmro = phys_state(lchnk)%q(i,k, ixo)
             mmro2 = phys_state(lchnk)%q(i,k, ixo2)
             mmrh = phys_state(lchnk)%q(i,k, ixh)
             mmrn2 = 1.-mmro-mmro2-mmrh
             phys_state(lchnk)%mbarv(i,k) = 1./(mmro/cnst_mw(ixo) +            &
                                     mmro2/cnst_mw(ixo2) +        &
                                     mmrn2/cnst_mw(ixn)/2.+       &
                                     mmrh/cnst_mw(ixh))
             phys_state(lchnk)%rairv(i,k) = shr_const_rgas / phys_state(lchnk)%mbarv(i,k)
             phys_state(lchnk)%cpairv(i,k) = 0.5*shr_const_rgas*phys_state(lchnk)%mbarv(i,k)*         &
                                   (5.*mmro/cnst_mw(ixo)/cnst_mw(ixo)+     &
                                    7.*mmro2/cnst_mw(ixo2)/cnst_mw(ixo2)+  &
                                    7.*mmrn2/cnst_mw(ixn)/cnst_mw(ixn)/4.+ &
                                    5.*mmrh/cnst_mw(ixh)/cnst_mw(ixh))
             phys_state(lchnk)%cappav(i,k) = phys_state(lchnk)%rairv(i,k)/phys_state(lchnk)%cpairv(i,k)
          enddo
       enddo
       do k=2,pver
          do i=1,ncol
             mmro = .5*(phys_state(lchnk)%q(i,k-1, ixo)+phys_state(lchnk)%q(i,k,ixo))
             mmro2 = .5*(phys_state(lchnk)%q(i,k-1, ixo2)+phys_state(lchnk)%q(i,k,ixo2))
             mmrn2 = 1.-mmro-mmro2
             mbarvi = .5*(phys_state(lchnk)%mbarv(i,k-1)+phys_state(lchnk)%mbarv(i,k))
             tint = .5*(phys_state(lchnk)%t(i,k-1)+phys_state(lchnk)%t(i,k))
             phys_state(lchnk)%kmvis(i,k) = (4.03*mmro2/cnst_mw(ixo2)+                   &
                                 3.42*mmrn2/cnst_mw(ixn)/2.+                 &
                                 3.9*mmro/cnst_mw(ixo))*mbarvi*    &
                                 tint**0.69 * 1.e-7
             phys_state(lchnk)%kmcnd(i,k) = (56.*mmro2/cnst_mw(ixo2)+                   &
                                 56.*mmrn2/cnst_mw(ixn)/2.+                 &
                                 75.9*mmro/cnst_mw(ixo))*mbarvi*  &
                                tint**0.69 * 1.e-5
          enddo
       enddo
       do i=1,ncol
          phys_state(lchnk)%kmvis(i,1) = 1.5*phys_state(lchnk)%kmvis(i,2)-.5*phys_state(lchnk)%kmvis(i,3)
          phys_state(lchnk)%kmcnd(i,1) = 1.5*phys_state(lchnk)%kmcnd(i,2)-.5*phys_state(lchnk)%kmcnd(i,3)
          phys_state(lchnk)%kmvis(i,pver+1) = phys_state(lchnk)%kmvis(i,pver)
          phys_state(lchnk)%kmcnd(i,pver+1) = phys_state(lchnk)%kmcnd(i,pver)
       enddo
!
! Compute initial geopotential heights
!
       call geopotential_t (phys_state(lchnk)%lnpint, phys_state(lchnk)%lnpmid  , phys_state(lchnk)%pint  , &
                            phys_state(lchnk)%pmid  , phys_state(lchnk)%pdel    , phys_state(lchnk)%rpdel , &
                            phys_state(lchnk)%t     , phys_state(lchnk)%q(1,1,1), phys_state(lchnk)%rairv , &
                            gravit,  zvir    , &
                            phys_state(lchnk)%zi    , phys_state(lchnk)%zm      , ncol                )
#else
       call geopotential_t (phys_state(lchnk)%lnpint, phys_state(lchnk)%lnpmid  , phys_state(lchnk)%pint  , &
                            phys_state(lchnk)%pmid  , phys_state(lchnk)%pdel    , phys_state(lchnk)%rpdel , &
                            phys_state(lchnk)%t     , phys_state(lchnk)%q(1,1,1), rair,  gravit,  zvir    , &
                            phys_state(lchnk)%zi    , phys_state(lchnk)%zm      , ncol                )
#endif
!-hi-waccm

! Compute initial dry static energy, include surface geopotential
!+hi-waccm
#ifdef WACCM_TIPHYS
       do k = 1, pver
          do i=1,ncol
             phys_state(lchnk)%s(i,k) = phys_state(lchnk)%cpairv(i,k)*phys_state(lchnk)%t(i,k) &
                                      + gravit*phys_state(lchnk)%zm(i,k) + phys_state(lchnk)%phis(i)
          end do
       end do
#else
       do k = 1, pver
          do i=1,ncol
             phys_state(lchnk)%s(i,k) = cpair*phys_state(lchnk)%t(i,k) &
                                      + gravit*phys_state(lchnk)%zm(i,k) + phys_state(lchnk)%phis(i)
          end do
       end do
#endif
!-hi-waccm

!
! Convert dry type constituents from moist to dry mixing ratio
!
       call set_state_pdry(phys_state(lchnk))    ! First get dry pressure to use for this timestep
       call set_wet_to_dry(phys_state(lchnk))    ! Dynamics had moist, physics wants dry.


! Compute energy and water integrals of input state
       call check_energy_timestep_init(phys_state(lchnk), phys_tend(lchnk), pbuf)

    end do

    deallocate (u3)
    deallocate (v3)

!EOC
  end subroutine d_p_coupling
!-----------------------------------------------------------------------

!-----------------------------------------------------------------------
!BOP
! !IROUTINE: p_d_coupling --- convert physics output to dynamics input
!
! !INTERFACE: 
  subroutine p_d_coupling(phys_state, phys_tend, full_phys,        &
                          adiabatic,  t3,                          &
                          q3, pt,   dudt,      dvdt,    pkz,     &
                          q3xy, ptxy, dudtxy,    dvdtxy,  pkzxy,   &
                          dtime, u3s, v3s,       u3sxy,   v3sxy,   &
                          zvir,       cappa,     ptop,     pk,     &
                          peln,       ps,                          &
                          pe,         pexy,      delp,     delpxy )

! !USES:
    use dynamics_vars, only : ng_d, ng_s, yzt, q3t, yzt2
#if defined ( SPMD )
    use spmd_dyn, only : ikj_xy_to_yz, qxy_to_q,     &
                         m_ttrans, q_ttrans, r_ttrans,    &
                         ijk_xy_to_yz, rxy_to_r, local_dp_map, &
                         block_buf_nrecs, chunk_buf_nrecs
    use mod_comm, only: mp_sendirr, mp_recvirr
#endif
#if ( defined OFFLINE_DYN )
   use metdata,     only: get_met_fields
#endif
!+hi-waccm
#ifdef WACCM_TIPHYS
    use prognostics,   only: cpair3v, rair3v, cappa3v
#endif
!-hi-waccm
!-----------------------------------------------------------------------
    implicit none

! Variables ending in xy are xy-decomposition instanciations.

! !INPUT PARAMETERS:
    type(physics_state), intent(inout), dimension(begchunk:endchunk) :: phys_state
    type(physics_tend),  intent(inout), dimension(begchunk:endchunk) :: phys_tend
    logical,  intent(in) :: full_phys
    logical,  intent(in) :: adiabatic
    real(r8), intent(in) :: dtime
    real(r8), intent(in) :: zvir
    real(r8), intent(in) :: cappa
    real(r8), intent(in) :: ptop
! xy-decomposition instanciation immediately below
    real(r8), intent(in) :: pkzxy(beglonxy:endlonxy,beglatxy:endlatxy,plev)
    real(r8), intent(in) :: u3sxy(beglonxy:endlonxy,beglatxy:endlatxy,plev)
    real(r8), intent(in) :: v3sxy(beglonxy:endlonxy,beglatxy:endlatxy,plev)

! !INPUT/OUTPUT PARAMETERS:
    real(r8), intent(inout) :: u3s(plon,beglat-ng_d:endlat+ng_s,beglev:endlev)
    real(r8), intent(inout) :: v3s(plon,beglat-ng_s:endlat+ng_d,beglev:endlev)
    real(r8), intent(inout) :: pkz(plon,beglat:endlat,beglev:endlev)
    real(r8), intent(inout) :: pe(plon,beglev:endlev+1,beglat:endlat)
! xy-decomposition instantiation immediately below
    real(r8), intent(inout) :: pexy(beglonxy:endlonxy,plev+1,beglatxy:endlatxy) ! work variable

! !OUTPUT PARAMETERS:
    real(r8), intent(out) :: pt(plon,beglat-ng_d:endlat+ng_d,beglev:endlev)
    real(r8), intent(out) :: t3(plon,beglev:endlev,beglat:endlat) !  Temperature
    real(r8), intent(out) :: q3(plon,beglat-ng_d:endlat+ng_d,beglev:endlev,ppcnst) ! constituents
! xy-decomposition instantiation immediately below
    real(r8), intent(out) :: ptxy(beglonxy:endlonxy,beglatxy:endlatxy,plev)
    real(r8), intent(out) :: q3xy(beglonxy:endlonxy,beglatxy:endlatxy,plev,ppcnst)

    real(r8), intent(out) :: dudt(plon,beglev:endlev,beglat:endlat) ! U-velocity tendency
    real(r8), intent(out) :: dvdt(plon,beglev:endlev,beglat:endlat) ! V-velocity tendency
! xy-decomposition instantiation immediately below
    real(r8), intent(out) :: dudtxy(beglonxy:endlonxy,plev,beglatxy:endlatxy)
    real(r8), intent(out) :: dvdtxy(beglonxy:endlonxy,plev,beglatxy:endlatxy)

    real(r8), intent(out) :: pk(plon,beglat:endlat,beglev:endlev+1)
    real(r8), intent(out) :: peln(plon,beglev:endlev+1,beglat:endlat)
    real(r8), intent(out) :: delp(plon,beglat:endlat,beglev:endlev)
! xy-decomposition instanciation immediately below
    real(r8), intent(out) :: delpxy(beglonxy:endlonxy,beglatxy:endlatxy,plev) ! work variable

    real(r8), intent(out) :: ps(plon,beglat:endlat)

! !DESCRIPTION:
!
!   Coupler for converting physics output variables into dynamics input variables
!
! !REVISION HISTORY:
!   00.06.01   Boville    Creation
!   01.06.08   AAM        Compactified
!   01.07.13   AAM        Some support for multi-2D decompositions
!   02.03.01   Worley     Support for nontrivial physics remapping
!   02.08.06   Sawyer     T3 added -- updated to current temperature
!
!EOP
!-----------------------------------------------------------------------
!BOC
! !LOCAL VARIABLES:
    integer :: i, ib, k, m, j, lchnk  ! indices
    integer :: ncol                   ! number of columns in current chunk
    integer :: lats(pcols)            ! array of latitude indices
    integer :: lons(pcols)            ! array of longitude indices
    integer :: blksiz                 ! number of columns in 2D block
    integer :: tsize                  ! amount of data per grid point passed to physics
    integer, allocatable, dimension(:,:) :: bpter
                                     ! offsets into block buffer for unpacking data
    integer :: cpter(pcols,0:pver)   ! offsets into chunk buffer for packing data
    integer :: iqa, iqb, iqc, iqd, mq     ! used for tracer transpose grouping
#ifdef WACCM_TIPHYS
!+hi-waccm
    integer :: ixo, ixo2, ixh, ixn
    real(r8):: mmro, mmro2, mmrh, mmrn2
    real(r8):: mwbar
!-hi-waccm
#endif

    real(r8) :: dt5
    real(r8), allocatable, dimension(:) :: &
       bbuffer, cbuffer               ! transpose buffers
#if (! defined SPMD)
    integer  :: block_buf_nrecs = 0
    integer  :: chunk_buf_nrecs = 0
    logical  :: local_dp_map=.true. 
#endif
!---------------------------End Local workspace-------------------------

#if ( defined OFFLINE_DYN )
!
! set the dyn flds to offline meteorological data
!
      call get_met_fields( phys_state, phys_tend, dtime )
#endif
! -------------------------------------------------------------------------
! Copy temperature, tendencies and constituents to dynamics data structures
! For adiabatic case, compute transposes only (2-D decomposition)
! -------------------------------------------------------------------------

! -------------------------------------------------------------------------
! Copy onto xy decomposition, then transpose to yz decomposition
! -------------------------------------------------------------------------

    if (.not. adiabatic) then

       if (local_dp_map) then

!$omp parallel do private(lchnk, i, k, ncol, m, lons, lats)

          do lchnk = begchunk,endchunk
             ncol = get_ncols_p(lchnk)
             call get_lon_all_p(lchnk, ncol, lons)
             call get_lat_all_p(lchnk, ncol, lats)

             do k = 1, plev
                do i = 1, ncol
                   dvdtxy(lons(i),k,lats(i)) = phys_tend(lchnk)%dvdt(i,k)
                   dudtxy(lons(i),k,lats(i)) = phys_tend(lchnk)%dudt(i,k)
                   ptxy  (lons(i),lats(i),k) = phys_state(lchnk)%t(i,k)
                   delpxy(lons(i),lats(i),k) = phys_state(lchnk)%pdel(i,k)
                enddo
             enddo

             do m=1,ppcnst
                do k=1,plev
                   do i=1,ncol
                      q3xy(lons(i),lats(i),k,m) = phys_state(lchnk)%q(i,k,m)
                   end do
                end do
             end do

          enddo

       else

          tsize = 4 + ppcnst

          blksiz = (endlatxy-beglatxy+1)*(endlonxy-beglonxy+1)
          allocate(bpter(blksiz,0:plev))
          allocate(bbuffer(tsize*block_buf_nrecs))
          allocate(cbuffer(tsize*chunk_buf_nrecs))

          do lchnk = begchunk,endchunk
             ncol = get_ncols_p(lchnk)

             call chunk_to_block_send_pters(lchnk,pcols,plev+1,tsize,cpter)

             do i=1,ncol

                do k=1,plev

                   cbuffer(cpter(i,k))   = phys_tend(lchnk)%dvdt(i,k)
                   cbuffer(cpter(i,k)+1) = phys_tend(lchnk)%dudt(i,k)
                   cbuffer(cpter(i,k)+2) = phys_state(lchnk)%t(i,k)
                   cbuffer(cpter(i,k)+3) = phys_state(lchnk)%pdel(i,k)

                   do m=1,ppcnst
                      cbuffer(cpter(i,k)+3+m) = phys_state(lchnk)%q(i,k,m)
                   end do

                end do
  
             end do

          end do

          call transpose_chunk_to_block(tsize, cbuffer, bbuffer)

          call chunk_to_block_recv_pters(iam+1,blksiz,plev+1,tsize,bpter)

          ib = 0
          do j=beglatxy,endlatxy
             do i=beglonxy,endlonxy
                ib = ib + 1

                do k=1,plev
                   dvdtxy(i,k,j) = bbuffer(bpter(ib,k))
                   dudtxy(i,k,j) = bbuffer(bpter(ib,k)+1)
                   ptxy  (i,j,k) = bbuffer(bpter(ib,k)+2)
                   delpxy(i,j,k) = bbuffer(bpter(ib,k)+3)

                   do m=1,ppcnst
                      q3xy(i,j,k,m) = bbuffer(bpter(ib,k)+3+m)
                   end do

                enddo
             enddo
          enddo

          deallocate(bpter)
          deallocate(bbuffer)
          deallocate(cbuffer)

       endif
    endif

    if (twod_decomp .eq. 1) then

#if defined (SPMD)
! Transpose from xy to yz decomposition
       call t_startf('transpose_total')
       call t_startf('transpose_bck2')

       if (.not. adiabatic) then
! Transpose dudt and dvdt
          call mp_sendirr( dudtxy, ikj_xy_to_yz%SendDesc,        &
                           ikj_xy_to_yz%RecvDesc, dudt )
          call mp_recvirr( dudt, ikj_xy_to_yz%RecvDesc )
          call mp_sendirr( dvdtxy, ikj_xy_to_yz%SendDesc,        &
                           ikj_xy_to_yz%RecvDesc, dvdt )
          call mp_recvirr( dvdt, ikj_xy_to_yz%RecvDesc )
          if (.not. full_phys) then
! Transpose pkz
! WS 02.08.06 : pkz needed to determine potential temperature,
!               pt contains temperature needed for t3
             call mp_sendirr( pkzxy, ijk_xy_to_yz%SendDesc,      &
                              ijk_xy_to_yz%RecvDesc, pkz )
             call mp_recvirr( pkz, ijk_xy_to_yz%RecvDesc )
          endif
          call mp_sendirr( delpxy, ijk_xy_to_yz%SendDesc,      &
                           ijk_xy_to_yz%RecvDesc, delp )
          call mp_recvirr( delp, ijk_xy_to_yz%RecvDesc )
       endif

! Transpose pt
       call mp_sendirr( ptxy, ijk_xy_to_yz%SendDesc,             &
                        ijk_xy_to_yz%RecvDesc, yzt )

       call mp_recvirr( yzt, ijk_xy_to_yz%RecvDesc )
! Transpose u3s
       call mp_sendirr( u3sxy, ijk_xy_to_yz%SendDesc,              &
                        ijk_xy_to_yz%RecvDesc, yzt2 )

!$omp parallel do private(i,j,k)
       do k=beglev,endlev
          do j = beglat,endlat
             do i=1,plon
                pt(i,j,k) = yzt(i,j,k)
             enddo
          enddo
       enddo

       call mp_recvirr( yzt2, ijk_xy_to_yz%RecvDesc )
! Transpose v3s
       call mp_sendirr( v3sxy, ijk_xy_to_yz%SendDesc,            &
                        ijk_xy_to_yz%RecvDesc, yzt )

!$omp parallel do private(i,j,k)
       do k=beglev,endlev
          do j = beglat,endlat
             do i=1,plon
                u3s(i,j,k) = yzt2(i,j,k)
             enddo
          enddo
       enddo

       call mp_recvirr( yzt, ijk_xy_to_yz%RecvDesc )
!$omp parallel do private(i,j,k)
       do k=beglev,endlev
          do j = beglat,endlat
             do i=1,plon
                v3s(i,j,k) = yzt(i,j,k)
             enddo
          enddo
       enddo

       call t_stopf('transpose_bck2')

       call t_startf('transpose_qbck')

! Transpose q3
       iqa = 1
       do mq = 1, q_ttrans
          iqb = iqa + m_ttrans - 1
          call mp_sendirr( q3xy(:,:,:,iqa:iqb), qxy_to_q%SendDesc,    &
                           qxy_to_q%RecvDesc, q3t(:,:,:,iqa:iqb) )
          if (mq .gt. 1) then
            iqc = iqa - m_ttrans
            iqd = iqc + m_ttrans - 1
!$omp parallel do private(i,j,k,m)
            do m=iqc,iqd
               do k=beglev,endlev
                  do j = beglat,endlat
                     do i=1,plon
                        q3(i,j,k,m) = q3t(i,j,k,m)
                     enddo
                  enddo
               enddo
            enddo
          endif
          call mp_recvirr( q3t(:,:,:,iqa:iqb), qxy_to_q%RecvDesc )
          iqa = iqa + m_ttrans
       enddo
       if (r_ttrans .ne. 0) then
          iqb = iqa + r_ttrans - 1
          call mp_sendirr( q3xy(:,:,:,iqa:iqb), rxy_to_r%SendDesc,     &
                           rxy_to_r%RecvDesc, q3t(:,:,:,iqa:iqb) )
       endif
       if (q_ttrans .ne. 0) then
            iqc = iqa - m_ttrans
            iqd = iqc + m_ttrans - 1
!$omp parallel do private(i,j,k,m)
          do m=iqc,iqd
             do k=beglev,endlev
                do j = beglat,endlat
                   do i=1,plon
                      q3(i,j,k,m) = q3t(i,j,k,m)
                   enddo
                enddo
             enddo
          enddo
       endif
       if (r_ttrans .ne. 0) then
          call mp_recvirr  ( q3t(:,:,:,iqa:iqb), rxy_to_r%RecvDesc )
!$omp parallel do private(i,j,k,m)
          do m=iqa,iqb
             do k=beglev,endlev
                do j = beglat,endlat
                   do i=1,plon
                      q3(i,j,k,m) = q3t(i,j,k,m)
                   enddo
                enddo
             enddo
          enddo
       endif

       call t_stopf('transpose_qbck')
       call t_stopf('transpose_total')
#endif

    else

       call t_startf('transpose_total')
       call t_startf('transpose_bck2')
       if (.not. adiabatic) then
!$omp parallel do private(i,j,k)
          do j = beglat,endlat
             do k=beglev,endlev
                do i=1,plon
                   dudt(i,k,j) = dudtxy(i,k,j)
                   dvdt(i,k,j) = dvdtxy(i,k,j)
                enddo
             enddo
          enddo
          if (.not. full_phys) then
!$omp parallel do private(i,j,k)
             do k=beglev,endlev
                do j = beglat,endlat
                   do i=1,plon
                      pkz(i,j,k) = pkzxy(i,j,k)
                   enddo
                enddo
             enddo
          endif
!$omp parallel do private(i,j,k)
          do k=beglev,endlev
             do j = beglat,endlat
                do i=1,plon
                   delp(i,j,k) = delpxy(i,j,k)
                enddo
             enddo
          enddo
       endif

!$omp parallel do private(i,j,k)
       do k=beglev,endlev
          do j = beglat,endlat
             do i=1,plon
                pt(i,j,k) = ptxy(i,j,k)
                u3s(i,j,k) = u3sxy(i,j,k)
                v3s(i,j,k) = v3sxy(i,j,k)
             enddo
          enddo
       enddo
       call t_stopf('transpose_bck2')

       call t_startf('transpose_qbck')
!$omp parallel do private(i,j,k,m)
       do m=1,ppcnst
          do k=beglev,endlev
             do j = beglat,endlat
                do i=1,plon
                   q3(i,j,k,m) = q3xy(i,j,k,m)
                enddo
             enddo
          enddo
       enddo
       call t_stopf('transpose_qbck')
       call t_stopf('transpose_total')

    endif

    if (.not. adiabatic) then
! WS: 02.08.06: Update t3 to temperature; is this necessary for adiabatic??
!$omp parallel do private(i,j,k)
       do k=beglev,endlev
          do j = beglat,endlat
             do i=1,plon
                t3(i,k,j) = pt(i,j,k)
             enddo
          enddo
       enddo

       if (.not. full_phys) then
!$omp parallel do private(i, j, k)
          do k=1,plev
             do j=beglat,endlat
                do i=1,plon
                   pt(i,j,k) = pt(i,j,k) / pkz(i,j,k)
                enddo
             enddo
          enddo
       endif

! -------------------------------------------------------------------------
! Update u3s and v3s from tendencies dudt and dvdt.
! -------------------------------------------------------------------------
       dt5 = .5*dtime
       call uv3s_update(dudt, u3s, dvdt, v3s, dt5,                     &
                        plon, plat, plev, beglat, endlat,              &
                        ng_d, ng_s, ng_s, ng_d, beglev, endlev)

    endif

!+hi-waccm
#ifdef WACCM_TIPHYS
! compute cpair3v, rair3v, and cappa3v from q3
    call cnst_get_ind('O', ixo)
    call cnst_get_ind('O2', ixo2)
    call cnst_get_ind('H', ixh)
    call cnst_get_ind('N', ixn)
    do k=beglev,endlev
       do j = beglat,endlat
          do i=1,plon
             mmro = q3(i,j,k, ixo)
             mmro2 =q3(i,j,k, ixo2)
             mmrh = q3(i,j,k, ixh)
             mmrn2 = 1.-mmro-mmro2-mmrh

             mwbar = 1./(mmro/cnst_mw(ixo) +            &
                         mmro2/cnst_mw(ixo2) +        &
                         mmrn2/cnst_mw(ixn)/2.+       &
                         mmrh/cnst_mw(ixh))
             cpair3v(i,j,k) = 0.5*shr_const_rgas*mwbar*                 &
                                (5.*mmro/cnst_mw(ixo)/cnst_mw(ixo)+     &
                                 7.*mmro2/cnst_mw(ixo2)/cnst_mw(ixo2)+  &
                                 7.*mmrn2/cnst_mw(ixn)/cnst_mw(ixn)/4.+ &
                                    5.*mmrh/cnst_mw(ixh)/cnst_mw(ixh))
             rair3v(i,j,k)  = shr_const_rgas / mwbar
             cappa3v(i,j,k) = rair3v(i,j,k) / cpair3v(i,j,k)
          enddo
       enddo
    enddo
#endif
!-hi-waccm

! -------------------------------------------------------------------------
! Compute pt, q3, pe, delp, ps, peln, pkz and pk.
! For 2-D decomposition, delp is transposed to delpxy, pexy is computed
!  from delpxy (and ptop), and pexy is transposed back to pe.
! Note that pt, q3, delp and pe are input parameters as well.
! For ideal or adiabatic physics, fewer quantities are updated.
! -------------------------------------------------------------------------
    call p_d_adjust(pe,   pt,   q3,   delp,   ps,              &
                    peln, pk,   pkz,  zvir,    cappa,  delpxy, &
                    pexy, ptop, full_phys  )
 
!EOC
  end subroutine p_d_coupling
!-----------------------------------------------------------------------
end module dp_coupling
