module dp_coupling
!BOP
!
! !MODULE: dp_coupling --- dynamics-physics coupling module
!
   use shr_kind_mod, only: r8 => shr_kind_r8
   use rgrid,         only: nlon
   use ppgrid,        only: pcols, pver
   use phys_grid
   use phys_buffer,   only: pbuf_fld, pbuf_size_max
   use physics_types, only: physics_state, physics_tend
   use constituents,  only: pcnst, qmin
   use physconst,     only: cpair, gravit, rair, zvir
   use geopotential,  only: geopotential_t
   use check_energy,  only: check_energy_timestep_init
   use dynamics_vars, only: T_FVDYCORE_GRID, T_TRACERS
   use dyn_comp,      only: dyn_import_t, dyn_export_t
   use abortutils,    only: endrun
#if defined ( SPMD )
   use mpishorthand, only : mpicom
   use spmd_dyn,     only : local_dp_map, block_buf_nrecs, chunk_buf_nrecs
#endif
   use perf_mod

!
! !PUBLIC MEMBER FUNCTIONS:
      PUBLIC d_p_coupling, p_d_coupling

!
! !DESCRIPTION:
!
!      This module provides 
!
!      \begin{tabular}{|l|l|} \hline \hline
!        d\_p\_coupling    &  dynamics output to physics input \\ \hline
!        p\_d\_coupling    &  physics output to dynamics input \\ \hline 
!                                \hline
!      \end{tabular}
!
! !REVISION HISTORY:
!   00.06.01   Boville    Creation
!   01.10.01   Lin        Various revisions
!   01.03.26   Sawyer     Added ProTeX documentation
!   01.06.27   Mirin      Separate noncoupling coding into new routines
!   01.07.13   Mirin      Some support for multi-2D decompositions
!   02.03.01   Worley     Support for nontrivial physics remapping
!   03.03.28   Boville    set all physics_state elements, add check_energy_timestep_init
!   03.08.13   Sawyer     Removed ghost N1 region in u3sxy
!   05.06.28   Sawyer     Simplified interfaces -- only XY decomposition 
!   05.10.25   Sawyer     Extensive refactoring, dyn_interface
!   05.11.10   Sawyer     Now using dyn_import/export_t containers
!   06.07.01   Sawyer     Transitioned constituents to T_TRACERS
!
!EOP
!-----------------------------------------------------------------------

   private 
   real(r8), parameter ::  D0_5                    =  0.5_r8
   real(r8), parameter ::  D1_0                    =  1.0_r8

CONTAINS

!-----------------------------------------------------------------------
!BOP
! !IROUTINE: d_p_coupling --- convert dynamics output to physics input
!
! !INTERFACE: 
  subroutine d_p_coupling(grid,  phys_state, phys_tend, pbuf, full_phys,&
                          dyn_out )

! !USES:
    use constituents,  only: cnst_get_type_byind
    use physics_types, only: set_state_pdry, set_wet_to_dry

#if ( defined WACCM_GHG || defined WACCM_MOZART )
    use pmgrid, only : plev, plevp
    use ctem, only   : ctem_driver, ctem_output
#endif
!-----------------------------------------------------------------------
    implicit none
!-----------------------------------------------------------------------
! !INPUT PARAMETERS:
!
    type(T_FVDYCORE_GRID), intent(in) :: grid ! FV Dynamics grid

    logical,  intent(in) :: full_phys

    type(dyn_export_t), intent(in)    :: dyn_out  ! dynamics export 

! !OUTPUT PARAMETERS:

    type(physics_state), intent(inout), dimension(begchunk:endchunk) :: phys_state
    type(physics_tend ),   intent(out), dimension(begchunk:endchunk) :: phys_tend
    type(pbuf_fld),      intent(inout), dimension(pbuf_size_max)     :: pbuf

! !DESCRIPTION:
!
!   Coupler for converting dynamics output variables into physics 
!   input variables
!
! !REVISION HISTORY:
!   00.06.01   Boville    Creation
!   01.07.13   AAM        Some support for multi-2D decompositions
!   02.03.01   Worley     Support for nontrivial physics remapping
!   02.05.02   Sawyer     u3s made inout due to ghosting in d2a3dikj
!   03.08.05   Sawyer     Removed pe11k, pe11kln (for defunct Rayl fric)
!   04.08.29   Eaton      Added lat, lon coords to physics_state type
!   05.06.28   Sawyer     Simplified interface -- on XY decomp vars.
!   05.07.06   Sawyer     Added dyn_state as argument
!   05.10.31   Sawyer     Refactoring, replaced dyn_state by dyn_interface
!
!EOP
!-----------------------------------------------------------------------
!BOC
! !LOCAL VARIABLES:

! Variables from dynamics export container
    real(r8), pointer :: phisxy(:,:)              ! surface geopotential
    real(r8), pointer :: psxy (:,:)               ! surface pressure
    real(r8), pointer :: u3sxy(:,:,:)             ! u-wind on d-grid
    real(r8), pointer :: v3sxy(:,:,:)             ! v-wind on d-grid
    real(r8), pointer :: ptxy (:,:,:)             ! Virtual pot temp
    type (T_TRACERS), pointer :: tracer(:)        ! constituents
    real(r8), pointer :: omgaxy(:,:,:)            ! vertical velocity
    real(r8), pointer :: pexy  (:,:,:)            ! edge pressure
    real(r8), pointer :: pelnxy(:,:,:)            ! log(pe)
    real(r8), pointer :: pkxy  (:,:,:)            ! pe**cappa
    real(r8), pointer :: pkzxy (:,:,:)            ! f-v mean of pk

    integer :: i,ib,j,k,m,lchnk      ! indices
    integer :: ncol                  ! number of columns in current chunk
    integer :: lats(pcols)           ! array of latitude indices
    integer :: lons(pcols)           ! array of longitude indices
    integer :: blksiz                ! number of columns in 2D block
    integer :: tsize                 ! amount of data per grid point passed to physics
    integer, allocatable, dimension(:,:) :: bpter
                                     ! offsets into block buffer for packing data
    integer :: cpter(pcols,0:pver)   ! offsets into chunk buffer for unpacking data

    real(r8) :: rlat(pcols)          ! array of latitudes (radians)
    real(r8) :: rlon(pcols)          ! array of longitudes (radians)
    real(r8) :: qmavl                ! available q at level pver-1
    real(r8) :: dqreq                ! q change at pver-1 required to remove q<qmin at pver
    real(r8) :: qbot                 ! bottom level q before change
    real(r8) :: qbotm1               ! bottom-1 level q before change
    real(r8) :: pic(pcols)           ! ps**cappa
    real(r8), allocatable :: u3(:, :, :)       ! u-wind on a-grid
    real(r8), allocatable :: v3(:, :, :)       ! v-wind on a-grid
    real(r8), allocatable, dimension(:) :: bbuffer, cbuffer
                                     ! transpose buffers

    integer  :: im, jm, km, kmp1, iam
    integer  :: ifirstxy, ilastxy, jfirstxy, jlastxy
    integer  :: ic, jc
    integer  :: astat
    integer  :: boff

#if ( defined WACCM_GHG || defined WACCM_MOZART )
    integer :: ip_gm1(grid%ifirstxy:grid%ilastxy,grid%jfirstxy:grid%jlastxy)
    integer :: ip_gm1_chnk(pcols)
#endif

#if ( defined WACCM_GHG || defined WACCM_MOZART )
    real(r8) :: vth(plevp,grid%jfirstxy:grid%jlastxy)
    real(r8) :: wth(plevp,grid%jfirstxy:grid%jlastxy)
    real(r8) :: uv(plevp,grid%jfirstxy:grid%jlastxy)
    real(r8) :: uw(plevp,grid%jfirstxy:grid%jlastxy)
    real(r8) :: uzm(plev,grid%jfirstxy:grid%jlastxy)
    real(r8) :: u2d(plevp,grid%jfirstxy:grid%jlastxy)
    real(r8) :: v2d(plevp,grid%jfirstxy:grid%jlastxy)
    real(r8) :: th2d(plevp,grid%jfirstxy:grid%jlastxy)
    real(r8) :: w2d(plevp,grid%jfirstxy:grid%jlastxy)
    real(r8) :: thi(grid%ifirstxy:grid%ilastxy,plevp,grid%jfirstxy:grid%jlastxy)
    real(r8) :: vth_chnk(pcols,plevp)
    real(r8) :: wth_chnk(pcols,plevp)
    real(r8) :: uv_chnk(pcols,plevp)
    real(r8) :: uw_chnk(pcols,plevp)
    real(r8) :: u2d_chnk(pcols,plevp)
    real(r8) :: v2d_chnk(pcols,plevp)
    real(r8) :: th2d_chnk(pcols,plevp)
    real(r8) :: w2d_chnk(pcols,plevp)
    real(r8) :: thi_chnk(pcols,plevp)
#endif

#if (! defined SPMD)
    integer  :: block_buf_nrecs = 0
    integer  :: chunk_buf_nrecs = 0
    integer  :: mpicom = 0
    logical  :: local_dp_map=.true. 
#endif
!---------------------------End Local workspace-------------------------

    phisxy   => dyn_out%phis
    psxy     => dyn_out%ps
    u3sxy    => dyn_out%u3s
    v3sxy    => dyn_out%v3s
    ptxy     => dyn_out%pt
    tracer   => dyn_out%tracer

    omgaxy   => dyn_out%omga
    pexy     => dyn_out%pe
    pelnxy   => dyn_out%peln
    pkxy     => dyn_out%pk
    pkzxy    => dyn_out%pkz

    im       = grid%im
    jm       = grid%jm
    km       = grid%km
    kmp1     = km + 1

    ifirstxy = grid%ifirstxy
    ilastxy  = grid%ilastxy
    jfirstxy = grid%jfirstxy
    jlastxy  = grid%jlastxy

    iam      = grid%iam
!-----------------------------------------------------------------------
! Transform dynamics staggered winds to physics grid (D=>A)
!-----------------------------------------------------------------------

    call t_startf ('d2a3dikj')
    allocate (u3(ifirstxy:ilastxy, km, jfirstxy:jlastxy))
    allocate (v3(ifirstxy:ilastxy, km, jfirstxy:jlastxy))

    call d2a3dikj( grid, u3sxy,  v3sxy, u3, v3 )
    call t_stopf  ('d2a3dikj')

#if ( defined WACCM_GHG || defined WACCM_MOZART )
    call t_startf('DP_CPLN: ctem')
    if ( tracer(1)%is_r4 ) then
       call endrun ('DP_COUPLING:d_p_coupling -- real 4 tracers not yet supported, aborting')
    else
       call ctem_driver( u3, v3, omgaxy, ptxy(:,jfirstxy:jlastxy,:), tracer(1)%content(:,jfirstxy:jlastxy,:), &
                         vth, wth, uv, uw, uzm, u2d, &
                         v2d, th2d, w2d, ip_gm1, thi, &
                         psxy, pexy, jfirstxy, jlastxy, ifirstxy, ilastxy, grid )
    endif
    call t_stopf('DP_CPLN: ctem')
#endif
         
!-----------------------------------------------------------------------
! Copy data from dynamics data structure to physics data structure
!-----------------------------------------------------------------------
has_local_map : &
    if (local_dp_map) then

#if ( defined WACCM_GHG || defined WACCM_MOZART )
!$omp parallel do private (lchnk, ncol, i, k, m, ic, jc, lons, lats, pic, ip_gm1_chnk, vth_chnk, &
!$omp                      wth_chnk, uv_chnk, uw_chnk, u2d_chnk, v2d_chnk, th2d_chnk, w2d_chnk, thi_chnk)
#else
!$omp parallel do private (lchnk, ncol, i, k, m, ic, jc, lons, lats, pic)
#endif
chnk_loop1 : &
       do lchnk = begchunk,endchunk
          ncol = phys_state(lchnk)%ncol
          call get_lon_all_p(lchnk, ncol, lons)
          call get_lat_all_p(lchnk, ncol, lats)

          do i=1,ncol
             ic = lons(i)
             jc = lats(i)
             phys_state(lchnk)%ps(i)   = psxy(ic,jc)
             phys_state(lchnk)%phis(i) = phisxy(ic,jc)
             pic(i) = pkxy(ic,jc,pver+1)
#if ( defined WACCM_GHG || defined WACCM_MOZART )
             ip_gm1_chnk(i)            = ip_gm1(ic,jc)
             vth_chnk(i,pver+1)        = vth(pver+1,jc)
             wth_chnk(i,pver+1)        = wth(pver+1,jc)
             uv_chnk(i,pver+1)         = uv(pver+1,jc)
             uw_chnk(i,pver+1)         = uw(pver+1,jc)
             u2d_chnk(i,pver+1)        = u2d(pver+1,jc)
             v2d_chnk(i,pver+1)        = v2d(pver+1,jc)
             th2d_chnk(i,pver+1)       = th2d(pver+1,jc)
             w2d_chnk(i,pver+1)        = w2d(pver+1,jc)
             thi_chnk(i,pver+1)        = thi(ic,pver+1,jc)
#endif
          enddo
          if ( tracer(1)%is_r4 ) then
            call endrun ('DP_COUPLING:d_p_coupling -- real 4 tracers not yet supported, aborting')
          else
             do k=1,km
                do i=1,ncol
                   ic = lons(i)
                   jc = lats(i)
                   phys_state(lchnk)%u    (i,k) = u3(ic,k,jc)
                   phys_state(lchnk)%v    (i,k) = v3(ic,k,jc)
                   phys_state(lchnk)%omega(i,k) = omgaxy(ic,k,jc)
#if ( defined WACCM_GHG || defined WACCM_MOZART )
                   vth_chnk(i,k)                = vth(k,jc)
                   wth_chnk(i,k)                = wth(k,jc)
                   uv_chnk(i,k)                 = uv(k,jc)
                   uw_chnk(i,k)                 = uw(k,jc)
                   phys_state(lchnk)%uzm(i,k)   = uzm(k,jc)
                   u2d_chnk(i,k)                = u2d(k,jc)
                   v2d_chnk(i,k)                = v2d(k,jc)
                   th2d_chnk(i,k)               = th2d(k,jc)
                   w2d_chnk(i,k)                = w2d(k,jc)
                   thi_chnk(i,k)                = thi(ic,k,jc)
#endif
                   if (full_phys) then
                     phys_state(lchnk)%t    (i,k) = ptxy(ic,jc,k) / (D1_0 + zvir*tracer(1)%content(ic,jc,k))
                     phys_state(lchnk)%exner(i,k) = pic(i) / pkzxy(ic,jc,k) 
                   else
                      phys_state(lchnk)%t    (i,k) = ptxy(ic,jc,k) * pkzxy(ic,jc,k)
                   end if
                end do
             end do
          endif

          do k=1,kmp1
             do i=1,ncol
!
! edge-level pressure arrays: copy from the arrays computed by dynpkg
!
                ic = lons(i)
                jc = lats(i)
                phys_state(lchnk)%pint  (i,k) = pexy  (ic,k,jc)
                phys_state(lchnk)%lnpint(i,k) = pelnxy(ic,k,jc)
             end do
          end do

!
! Copy constituents
! Dry types converted from moist to dry m.r. at bottom of this routine
!
          do m=1,pcnst
             if ( tracer(m)%is_r4 ) then
                do k=1,km
                   do i=1,ncol
                      phys_state(lchnk)%q(i,k,m) = &
                         tracer(m)%content_r4(lons(i),lats(i),k)
                   end do
                end do
             else
                do k=1,km
                   do i=1,ncol
                      phys_state(lchnk)%q(i,k,m) = &
                         tracer(m)%content(lons(i),lats(i),k)
                   end do
                end do
             endif
          end do
#if ( defined WACCM_GHG || defined WACCM_MOZART )
          call ctem_output( lchnk, ncol, lons, ip_gm1_chnk, vth_chnk, &
                            wth_chnk, uv_chnk, uw_chnk, u2d_chnk, v2d_chnk, &
                            th2d_chnk, w2d_chnk, thi_chnk ) 
#endif
 
       end do chnk_loop1

    else has_local_map
#if ( defined WACCM_GHG || defined WACCM_MOZART )
       tsize = 17 + pcnst
       boff  = 15
#else
       tsize = 7 + pcnst
       boff  = 6
#endif
 
       blksiz = (jlastxy-jfirstxy+1)*(ilastxy-ifirstxy+1)
       allocate( bpter(blksiz,0:km),stat=astat )
       if( astat /= 0 ) then
          write(*,*) 'd_p_coupling: failed to allocate bpter; error = ',astat
          call endrun
       end if
       allocate( bbuffer(tsize*block_buf_nrecs),stat=astat )
       if( astat /= 0 ) then
          write(*,*) 'd_p_coupling: failed to allocate bbuffer; error = ',astat
          call endrun
       end if
       allocate( cbuffer(tsize*chunk_buf_nrecs),stat=astat )
       if( astat /= 0 ) then
          write(*,*) 'd_p_coupling: failed to allocate cbuffer; error = ',astat
          call endrun
       end if

       call block_to_chunk_send_pters( iam+1, blksiz, kmp1, tsize, bpter )

!dir$ concurrent
       do j=jfirstxy,jlastxy
!dir$ concurrent
          do i=ifirstxy,ilastxy
             ib = (j-jfirstxy)*(ilastxy-ifirstxy+1) + (i-ifirstxy+1)

             bbuffer(bpter(ib,0))   = pexy(i,kmp1,j)
             bbuffer(bpter(ib,0)+1) = pelnxy(i,kmp1,j)
             bbuffer(bpter(ib,0)+2) = psxy(i,j)
             bbuffer(bpter(ib,0)+3) = phisxy(i,j)
#if ( defined WACCM_GHG || defined WACCM_MOZART )
             bbuffer(bpter(ib,0)+4) = ip_gm1(i,j)
             bbuffer(bpter(ib,0)+7) = vth(plevp,j)
             bbuffer(bpter(ib,0)+8) = wth(plevp,j)
             bbuffer(bpter(ib,0)+9) = uv(plevp,j)
             bbuffer(bpter(ib,0)+10) = uw(plevp,j)
             bbuffer(bpter(ib,0)+11) = u2d(plevp,j)
             bbuffer(bpter(ib,0)+12) = v2d(plevp,j)
             bbuffer(bpter(ib,0)+13) = th2d(plevp,j)
             bbuffer(bpter(ib,0)+14) = w2d(plevp,j)
             bbuffer(bpter(ib,0)+15) = thi(i,plevp,j)
#endif

!dir$ concurrent
             do k=1,km

                bbuffer(bpter(ib,k))   = pexy(i,k,j)
                bbuffer(bpter(ib,k)+1) = pelnxy(i,k,j)
                bbuffer(bpter(ib,k)+2) = u3    (i,k,j)
                bbuffer(bpter(ib,k)+3) = v3    (i,k,j)
                bbuffer(bpter(ib,k)+4) = omgaxy(i,k,j)
                if (full_phys) then    ! For time being, forget about is_r4
                   bbuffer(bpter(ib,k)+5) = ptxy(i,j,k) / (D1_0 + zvir*tracer(1)%content(i,j,k))
                   bbuffer(bpter(ib,k)+6) = pkxy(i,j,pver+1) / pkzxy(i,j,k) 
                else
                   bbuffer(bpter(ib,k)+6) = ptxy(i,j,k) * pkzxy(i,j,k)
                end if
#if ( defined WACCM_GHG || defined WACCM_MOZART )
                bbuffer(bpter(ib,k)+7)  = vth(k,j)
                bbuffer(bpter(ib,k)+8)  = wth(k,j)
                bbuffer(bpter(ib,k)+9)  = uv(k,j)
                bbuffer(bpter(ib,k)+10) = uw(k,j)
                bbuffer(bpter(ib,k)+11) = u2d(k,j)
                bbuffer(bpter(ib,k)+12) = v2d(k,j)
                bbuffer(bpter(ib,k)+13) = th2d(k,j)
                bbuffer(bpter(ib,k)+14) = w2d(k,j)
                bbuffer(bpter(ib,k)+15) = thi(i,k,j)
                bbuffer(bpter(ib,k)+16) = uzm(k,j)
#endif

                do m=1,pcnst    ! For time being, forget about is_r4
                   bbuffer(bpter(ib,k)+boff+m) = tracer(m)%content(i,j,k)
                end do

             end do
          end do
       end do

       call t_barrierf('sync_blk_to_chk', mpicom)
       call t_startf ('block_to_chunk')
       call transpose_block_to_chunk(tsize, bbuffer, cbuffer)
       call t_stopf  ('block_to_chunk')

chnk_loop2 : &
       do lchnk = begchunk,endchunk
          ncol = phys_state(lchnk)%ncol
#if ( defined WACCM_GHG || defined WACCM_MOZART )
          call get_lon_all_p( lchnk, ncol, lons )
#endif

          call block_to_chunk_recv_pters(lchnk,pcols,pver+1,tsize,cpter)

          do i=1,ncol

             phys_state(lchnk)%pint  (i,pver+1) = cbuffer(cpter(i,0))
             phys_state(lchnk)%lnpint(i,pver+1) = cbuffer(cpter(i,0)+1)
             phys_state(lchnk)%ps(i)            = cbuffer(cpter(i,0)+2)
             phys_state(lchnk)%phis(i)          = cbuffer(cpter(i,0)+3)
#if ( defined WACCM_GHG || defined WACCM_MOZART )
             ip_gm1_chnk(i)                     = cbuffer(cpter(i,0)+4)
             vth_chnk(i,pver+1)                 = cbuffer(cpter(i,0)+7)
             wth_chnk(i,pver+1)                 = cbuffer(cpter(i,0)+8)
             uv_chnk(i,pver+1)                  = cbuffer(cpter(i,0)+9)
             uw_chnk(i,pver+1)                  = cbuffer(cpter(i,0)+10)
             u2d_chnk(i,pver+1)                 = cbuffer(cpter(i,0)+11)
             v2d_chnk(i,pver+1)                 = cbuffer(cpter(i,0)+12)
             th2d_chnk(i,pver+1)                = cbuffer(cpter(i,0)+13)
             w2d_chnk(i,pver+1)                 = cbuffer(cpter(i,0)+14)
             thi_chnk(i,pver+1)                 = cbuffer(cpter(i,0)+15)
#endif

             do k=1,km

                phys_state(lchnk)%pint  (i,k) = cbuffer(cpter(i,k))
                phys_state(lchnk)%lnpint(i,k) = cbuffer(cpter(i,k)+1)
                phys_state(lchnk)%u     (i,k) = cbuffer(cpter(i,k)+2)
                phys_state(lchnk)%v     (i,k) = cbuffer(cpter(i,k)+3)
                phys_state(lchnk)%omega (i,k) = cbuffer(cpter(i,k)+4)
                if (full_phys) then
                   phys_state(lchnk)%t    (i,k) = cbuffer(cpter(i,k)+5)
                   phys_state(lchnk)%exner(i,k) = cbuffer(cpter(i,k)+6)
                else
                   phys_state(lchnk)%t    (i,k) = cbuffer(cpter(i,k)+6)
                end if
#if ( defined WACCM_GHG || defined WACCM_MOZART )
                vth_chnk(i,k)                   = cbuffer(cpter(i,k)+7)
                wth_chnk(i,k)                   = cbuffer(cpter(i,k)+8)
                uv_chnk(i,k)                    = cbuffer(cpter(i,k)+9)
                uw_chnk(i,k)                    = cbuffer(cpter(i,k)+10)
                u2d_chnk(i,k)                   = cbuffer(cpter(i,k)+11)
                v2d_chnk(i,k)                   = cbuffer(cpter(i,k)+12)
                th2d_chnk(i,k)                  = cbuffer(cpter(i,k)+13)
                w2d_chnk(i,k)                   = cbuffer(cpter(i,k)+14)
                thi_chnk(i,k)                   = cbuffer(cpter(i,k)+15)
                phys_state(lchnk)%uzm(i,k)      = cbuffer(cpter(i,k)+16)
#endif
                
                ! dry type constituents converted from moist to dry at bottom of routine
                do m=1,pcnst
                   phys_state(lchnk)%q(i,k,m) = cbuffer(cpter(i,k)+boff+m)
                end do

             end do
          end do
#if ( defined WACCM_GHG || defined WACCM_MOZART )
          call ctem_output( lchnk, ncol, lons, ip_gm1_chnk, vth_chnk, &
                            wth_chnk, uv_chnk, uw_chnk, u2d_chnk, v2d_chnk, &
                            th2d_chnk, w2d_chnk, thi_chnk ) 
#endif

       end do chnk_loop2

       deallocate(bpter)
       deallocate(bbuffer)
       deallocate(cbuffer)

    endif has_local_map

!
! Evaluate derived quantities
!
    call t_startf ('derived_fields')
    do lchnk = begchunk,endchunk
       ncol = phys_state(lchnk)%ncol
       do k=1,km
          do i=1,ncol
             phys_state(lchnk)%pdel (i,k) = phys_state(lchnk)%pint(i,k+1) - phys_state(lchnk)%pint(i,k)
             phys_state(lchnk)%rpdel(i,k) = D1_0/phys_state(lchnk)%pdel(i,k)
             phys_state(lchnk)%pmid (i,k) = D0_5*(phys_state(lchnk)%pint(i,k) + phys_state(lchnk)%pint(i,k+1))
             phys_state(lchnk)%lnpmid(i,k) = log(phys_state(lchnk)%pmid(i,k))
          end do
       end do

! Attempt to remove negative constituents in bottom layer only by moving from next level
! This is a BAB kludge to avoid masses of warning messages for cloud water and ice, since
! the vertical remapping operator currently being used for cam is not strictly monotonic 
! at the endpoints.
       do m=1,pcnst
          do i=1,ncol
             if (phys_state(lchnk)%q(i,pver,m) < qmin(m)) then
! available q in 2nd level
                qmavl = phys_state(lchnk)%q (i,pver-1,m) - qmin(m)
! required q change in bottom level rescaled to mass fraction in 2nd level
                dqreq = (qmin(m) - phys_state(lchnk)%q(i,pver,m))                         &
                      * phys_state(lchnk)%pdel(i,pver) / phys_state(lchnk)%pdel(i,pver-1)
                qbot   = phys_state(lchnk)%q(i,pver  ,m)
                qbotm1 = phys_state(lchnk)%q(i,pver-1,m)
                if (dqreq < qmavl) then
                   phys_state(lchnk)%q(i,pver  ,m) = qmin(m)
                   phys_state(lchnk)%q(i,pver-1,m) = phys_state(lchnk)%q(i,pver-1,m) - dqreq
                   if (dqreq>1.e-14_r8 .and. iam .eq. 0) write (6,*) 'dpcoup dqreq', m, lchnk, i, qbot, qbotm1, dqreq
                else 
                   if (iam .eq. 0) write (6,*) 'dpcoup cant adjust', m, lchnk, i, qbot, qbotm1, dqreq
                end if
             end if
          end do
       end do
                   
!
! Compute initial geopotential heights
       call geopotential_t (phys_state(lchnk)%lnpint, phys_state(lchnk)%lnpmid  , phys_state(lchnk)%pint  , &
                            phys_state(lchnk)%pmid  , phys_state(lchnk)%pdel    , phys_state(lchnk)%rpdel , &
                            phys_state(lchnk)%t     , phys_state(lchnk)%q(1,1,1), rair,  gravit,  zvir    , &
                            phys_state(lchnk)%zi    , phys_state(lchnk)%zm      , ncol                )

! Compute initial dry static energy, include surface geopotential
       do k = 1, pver
          do i=1,ncol
             phys_state(lchnk)%s(i,k) = cpair*phys_state(lchnk)%t(i,k) &
                                      + gravit*phys_state(lchnk)%zm(i,k) + phys_state(lchnk)%phis(i)
          end do
       end do

!
! Convert dry type constituents from moist to dry mixing ratio
!
       call set_state_pdry(phys_state(lchnk))    ! First get dry pressure to use for this timestep
       call set_wet_to_dry(phys_state(lchnk))    ! Dynamics had moist, physics wants dry.


! Compute energy and water integrals of input state
       call check_energy_timestep_init(phys_state(lchnk), phys_tend(lchnk), pbuf)

    end do
    call t_stopf('derived_fields')

    deallocate (u3)
    deallocate (v3)

!EOC
  end subroutine d_p_coupling
!-----------------------------------------------------------------------

!-----------------------------------------------------------------------
!BOP
! !IROUTINE: p_d_coupling --- convert physics output to dynamics input
!
! !INTERFACE: 
  subroutine p_d_coupling(grid, phys_state, phys_tend, full_phys, adiabatic,&
                          dyn_in,    dtime, zvir,      cappa,     ptop )

! !USES:
#if ( defined OFFLINE_DYN )
   use metdata,     only: get_met_fields
#endif
!-----------------------------------------------------------------------
    implicit none

! Variables ending in xy are xy-decomposition instanciations.

    type(T_FVDYCORE_GRID), intent(in) :: grid ! FV Dynamics grid

! !INPUT PARAMETERS:
    type(physics_state), intent(inout), dimension(begchunk:endchunk) :: phys_state
    type(physics_tend),  intent(inout), dimension(begchunk:endchunk) :: phys_tend
    type(dyn_import_t),  intent(inout)   :: dyn_in

    logical,  intent(in) :: full_phys
    logical,  intent(in) :: adiabatic
    real(r8), intent(in) :: dtime
    real(r8), intent(in) :: zvir
    real(r8), intent(in) :: cappa
    real(r8), intent(in) :: ptop

! !DESCRIPTION:
!
!   Coupler for converting physics output variables into dynamics input variables
!
! !REVISION HISTORY:
!   00.06.01   Boville    Creation
!   01.06.08   AAM        Compactified
!   01.07.13   AAM        Some support for multi-2D decompositions
!   02.03.01   Worley     Support for nontrivial physics remapping
!   02.08.06   Sawyer     T3 added -- updated to current temperature
!   05.07.12   Sawyer     Added dyn_state as argument
!   05.09.23   Sawyer     Transitioned to XY decomposition vars. only
!   05.10.31   Sawyer     Replaced dyn_state with dyn_interface
!
!EOP
!-----------------------------------------------------------------------
!BOC
! !LOCAL VARIABLES:

! Variables from the dynamics import container

    real(r8), pointer :: psxy(:,:)
    real(r8), pointer :: u3sxy(:,:,:)
    real(r8), pointer :: v3sxy(:,:,:)
    real(r8), pointer :: t3xy(:,:,:)                  !  Temperature
    real(r8), pointer :: ptxy(:,:,:)                  !  Virt. pot. temp.
    type(T_TRACERS), pointer :: tracer(:)             !  Constituents

    real(r8), pointer :: pexy(:,:,:)
    real(r8), pointer :: delpxy(:,:,:)
    real(r8), pointer :: pkxy(:,:,:)
    real(r8), pointer :: pkzxy(:,:,:)

! Local workspace

    real(r8):: dudtxy(grid%ifirstxy:grid%ilastxy,&
                      grid%km,grid%jfirstxy:grid%jlastxy)
    real(r8):: dvdtxy(grid%ifirstxy:grid%ilastxy,&
                      grid%km,grid%jfirstxy:grid%jlastxy)
    real(r8):: dummy_pelnxy(grid%ifirstxy:grid%ilastxy,grid%km+1, &
                            grid%jfirstxy:grid%jlastxy)

    integer :: i, ib, k, m, j, lchnk  ! indices
    integer :: ncol                   ! number of columns in current chunk
    integer :: lats(pcols)            ! array of latitude indices
    integer :: lons(pcols)            ! array of longitude indices
    integer :: blksiz                 ! number of columns in 2D block
    integer :: tsize                  ! amount of data per grid point passed to physics
    integer, allocatable, dimension(:,:) :: bpter
                                     ! offsets into block buffer for unpacking data
    integer :: cpter(pcols,0:pver)   ! offsets into chunk buffer for packing data
    integer :: iqa, iqb, iqc, iqd, mq     ! used for tracer transpose grouping

    real(r8) :: dt5
    real(r8), allocatable, dimension(:) :: &
       bbuffer, cbuffer               ! transpose buffers
#if (! defined SPMD)
    integer  :: block_buf_nrecs = 0
    integer  :: chunk_buf_nrecs = 0
    integer  :: mpicom = 0
    logical  :: local_dp_map=.true. 
#endif
    integer  :: im, jm, km, ng_d, ng_s, iam
    integer  :: ifirstxy, ilastxy, jfirstxy, jlastxy 
    integer  :: jfirst, jlast, kfirst, klast

! Pull the variables out of the dynamics export container

    psxy    => dyn_in%ps
    u3sxy   => dyn_in%u3s
    v3sxy   => dyn_in%v3s
    t3xy    => dyn_in%t3
    ptxy    => dyn_in%pt
    tracer  => dyn_in%tracer

    pexy    => dyn_in%pe
    delpxy  => dyn_in%delp
    pkxy    => dyn_in%pk
    pkzxy   => dyn_in%pkz    

    im   = grid%im
    jm   = grid%jm
    km   = grid%km

    ifirstxy = grid%ifirstxy
    ilastxy  = grid%ilastxy
    jfirstxy = grid%jfirstxy
    jlastxy  = grid%jlastxy

    jfirst   = grid%jfirst
    jlast    = grid%jlast
    kfirst   = grid%kfirst
    klast    = grid%klast

    ng_d     = grid%ng_d
    ng_s     = grid%ng_s

    iam      = grid%iam

!---------------------------End Local workspace-------------------------

#if ( defined OFFLINE_DYN )
!
! set the dyn flds to offline meteorological data
!
      call get_met_fields( phys_state, phys_tend, dtime )
#endif
! -------------------------------------------------------------------------
! Copy temperature, tendencies and constituents to dynamics data structures
! For adiabatic case, compute transposes only (2-D decomposition)
! -------------------------------------------------------------------------

! -------------------------------------------------------------------------
! Copy onto xy decomposition, then transpose to yz decomposition
! -------------------------------------------------------------------------

    if (.not. adiabatic) then

       if (local_dp_map) then

!$omp parallel do private(lchnk, i, k, ncol, m, lons, lats)

          do lchnk = begchunk,endchunk
             ncol = get_ncols_p(lchnk)
             call get_lon_all_p(lchnk, ncol, lons)
             call get_lat_all_p(lchnk, ncol, lats)

             do k = 1, km
                do i = 1, ncol
                   dvdtxy(lons(i),k,lats(i)) = phys_tend(lchnk)%dvdt(i,k)
                   dudtxy(lons(i),k,lats(i)) = phys_tend(lchnk)%dudt(i,k)
                   ptxy  (lons(i),lats(i),k) = phys_state(lchnk)%t(i,k)
                   delpxy(lons(i),lats(i),k) = phys_state(lchnk)%pdel(i,k)
                enddo
             enddo

             do m=1,pcnst
                if ( tracer(m)%is_r4 ) then
                   do k=1,km
                      do i=1,ncol
                         tracer(m)%content_r4(lons(i),lats(i),k) = &
                            phys_state(lchnk)%q(i,k,m)
                      end do
                   end do
                else
                   do k=1,km
                      do i=1,ncol
                         tracer(m)%content(lons(i),lats(i),k) = &
                            phys_state(lchnk)%q(i,k,m)
                      end do
                   end do
                endif
             end do

          enddo

       else

          tsize = 4 + pcnst

          blksiz = (jlastxy-jfirstxy+1)*(ilastxy-ifirstxy+1)
          allocate(bpter(blksiz,0:km))
          allocate(bbuffer(tsize*block_buf_nrecs))
          allocate(cbuffer(tsize*chunk_buf_nrecs))

          do lchnk = begchunk,endchunk
             ncol = get_ncols_p(lchnk)

             call chunk_to_block_send_pters(lchnk,pcols,km+1,tsize,cpter)

!dir$ concurrent
             do k=1,km
!dir$ concurrent
               do i=1,ncol

                   cbuffer(cpter(i,k))   = phys_tend(lchnk)%dvdt(i,k)
                   cbuffer(cpter(i,k)+1) = phys_tend(lchnk)%dudt(i,k)
                   cbuffer(cpter(i,k)+2) = phys_state(lchnk)%t(i,k)
                   cbuffer(cpter(i,k)+3) = phys_state(lchnk)%pdel(i,k)

                   do m=1,pcnst
                      cbuffer(cpter(i,k)+3+m) = phys_state(lchnk)%q(i,k,m)
                   end do

                end do
  
             end do

          end do

          call t_barrierf('sync_chk_to_blk', mpicom)
          call t_startf ('chunk_to_block')
          call transpose_chunk_to_block(tsize, cbuffer, bbuffer)
          call t_stopf  ('chunk_to_block')

          call chunk_to_block_recv_pters(iam+1,blksiz,km+1,tsize,bpter)

!dir$ concurrent
          do j=jfirstxy,jlastxy
!dir$ concurrent
             do k=1,km
!dir$ concurrent
                do i=ifirstxy,ilastxy
                   ib = (j-jfirstxy)*(ilastxy-ifirstxy+1) + (i-ifirstxy+1)

                   dvdtxy(i,k,j) = bbuffer(bpter(ib,k))
                   dudtxy(i,k,j) = bbuffer(bpter(ib,k)+1)
                   ptxy  (i,j,k) = bbuffer(bpter(ib,k)+2)
                   delpxy(i,j,k) = bbuffer(bpter(ib,k)+3)

                   do m=1,pcnst    ! For time being, forget about is_r4
                      tracer(m)%content(i,j,k) = bbuffer(bpter(ib,k)+3+m)
                   end do

                enddo
             enddo
          enddo

          deallocate(bpter)
          deallocate(bbuffer)
          deallocate(cbuffer)

       endif
    endif

    if (.not. adiabatic) then
! WS: 02.08.06: Update t3 to temperature
!$omp parallel do private(i,j,k)
!dir$ concurrent
       do k=1,km
          do j = jfirstxy,jlastxy
             do i = ifirstxy,ilastxy
                t3xy(i,j,k) = ptxy(i,j,k)
             enddo
          enddo
       enddo

       if (.not. full_phys) then
!$omp parallel do private(i, j, k)
!dir$ concurrent
          do k=1,km
             do j=jfirstxy,jlastxy
                do i=ifirstxy,ilastxy
                   ptxy(i,j,k) = ptxy(i,j,k) / pkzxy(i,j,k)
                enddo
             enddo
          enddo
       endif

! -------------------------------------------------------------------------
! Update u3s and v3s from tendencies dudt and dvdt.
! -------------------------------------------------------------------------
       dt5 = D0_5*dtime

       call t_barrierf('sync_uv3s_update', mpicom)
       call t_startf('uv3s_update')
       call uv3s_update( grid, dudtxy, u3sxy, dvdtxy, v3sxy, dt5 )
       call t_stopf('uv3s_update')

    endif

! -------------------------------------------------------------------------
! Compute pt, q3, pe, delp, ps, peln, pkz and pk.
! For 2-D decomposition, delp is transposed to delpxy, pexy is computed
!  from delpxy (and ptop), and pexy is transposed back to pe.
! Note that pt, q3, delp and pe are input parameters as well.
! For ideal or adiabatic physics, fewer quantities are updated.
! -------------------------------------------------------------------------
    call t_barrierf('sync_p_d_adjust', mpicom)
    call t_startf ('p_d_adjust')
    call p_d_adjust(grid, tracer, dummy_pelnxy, pkxy, pkzxy, zvir,  cappa, &
                    delpxy, ptxy, pexy, psxy, ptop, full_phys  )
    call t_stopf  ('p_d_adjust')

!EOC
  end subroutine p_d_coupling
!-----------------------------------------------------------------------
end module dp_coupling
