#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#define _BEGIN_FACE 1
#define _END_FACE   4
#undef _FACE_6
#undef _FACE_5

module cube_mod
  use kinds, only : real_kind, long_kind, longdouble_kind
  use coordinate_systems_mod, only : spherical_polar_t, cartesian3D_t, cartesian2d_t, &
       projectpoint, cubedsphere2cart, spherical_to_cart, sphere_tri_area,dist_threshold, &
       change_coordinates

  use physical_constants, only : dd_pi, rearth
  use control_mod, only : hypervis_scaling, cubed_sphere_map
  use parallel_mod, only : abortmp

  implicit none
  private

  integer,public, parameter :: nfaces = 6          ! number of faces on the cube
  integer,public, parameter :: nInnerElemEdge = 8  ! number of edges for an interior element
  integer,public, parameter :: nCornerElemEdge = 4 ! number of corner elements

  real(kind=real_kind), public, parameter :: cube_xstart = -0.25D0*DD_PI
  real(kind=real_kind), public, parameter :: cube_xend   =  0.25D0*DD_PI
  real(kind=real_kind), public, parameter :: cube_ystart = -0.25D0*DD_PI
  real(kind=real_kind), public, parameter :: cube_yend   =  0.25D0*DD_PI


  type, public :: face_t
     type (spherical_polar_t) :: sphere0       ! tangent point of face on sphere
     type (spherical_polar_t) :: sw            ! sw corner of face on sphere
     type (spherical_polar_t) :: se            ! se corner of face on sphere
     type (spherical_polar_t) :: ne            ! ne corner of face on sphere
     type (spherical_polar_t) :: nw            ! nw corner of face on sphere
     type (cartesian3D_t)     :: P0
     type (cartesian3D_t)     :: X0
     type (cartesian3D_t)     :: Y0
     integer                  :: number
     integer                  :: padding       ! pad the struct
  end type face_t

  type, public :: cube_face_coord_t
     real(real_kind) :: x             ! x coordinate
     real(real_kind) :: y             ! y coordinate
     type (face_t), pointer :: face     ! face
  end type cube_face_coord_t

  ! ==========================================
  ! Public Interfaces
  ! ==========================================

  public :: CubeTopology

  ! Rotate the North Pole:  used for JW baroclinic test case
  ! Settings this only changes Coriolis.  
  ! User must also rotate initial condition
  real (kind=real_kind), public :: rotate_grid = 0

  ! ===============================
  ! Public methods for cube
  ! ===============================

  public  :: cube_init_atomic
  public  :: convert_gbl_index
  public  :: cube_assemble
  public  :: vmap,dmap
  public  :: covariant_rot
  public  :: contravariant_rot
  public  :: set_corner_coordinates
  public  :: assign_node_numbers_to_elem


  public  :: CubeEdgeCount
  public  :: CubeElemCount
  public  :: CubeSetupEdgeIndex
  public  :: rotation_init_atomic
  public  :: ref2sphere

  ! public interface to REFERECE element map
#if HOMME_QUAD_PREC
  interface ref2sphere
     module procedure ref2sphere_double
     module procedure ref2sphere_longdouble
  end interface
#else
  ! both routines have identical arguments in this case, cant use interface
  interface ref2sphere
     module procedure ref2sphere_double
  end interface
#endif


  ! ===============================
  ! Private methods
  ! ===============================
  private :: coordinates_atomic
  private :: metric_atomic
  private :: coreolis_init_atomic

contains

  ! =======================================
  !  cube_init_atomic:
  !
  ! Initialize element descriptors for 
  ! cube sphere case for each element ... 
  ! =======================================
  subroutine cube_init_atomic(elem,gll_points,alpha_in)
    use element_mod, only : element_t
    use dimensions_mod, only : np
    type (element_t),intent(inout) :: elem
    real (kind=real_kind),optional :: alpha_in
    real (kind=real_kind)          :: alpha=1
    real (kind=longdouble_kind)      :: gll_points(np)

    if(present(alpha_in)) alpha=alpha_in
    
    elem%FaceNum=elem%vertex%face_number
    call coordinates_atomic(elem,gll_points)

    call metric_atomic(elem,gll_points,alpha)

    call coreolis_init_atomic(elem)
    elem%desc%use_rotation= 0
!    call solver_weights_atomic(elem)


  end subroutine cube_init_atomic

  ! =======================================
  ! coordinates_atomic:
  !
  ! Initialize element coordinates for
  ! cube-sphere case ... (atomic) 
  !
  ! =======================================

  subroutine coordinates_atomic(elem,gll_points)
    use element_mod, only : element_t, element_var_coordinates
    use dimensions_mod, only : np
    type (element_t) :: elem
    real (kind=longdouble_kind)      :: gll_points(np)


    real (kind=real_kind)      :: area1,area2
    type (cartesian3d_t) :: quad(4)
    integer face_no,i,j

    face_no = elem%vertex%face_number
    ! compute the corners in Cartesian coordinates
    do i=1,4
       elem%corners3D(i)=cubedsphere2cart(elem%corners(i),face_no)
    enddo

    ! =========================================
    ! compute lat/lon coordinates of each GLL point
    ! =========================================
    do i=1,np
    do j=1,np
       elem%spherep(i,j)=ref2sphere(gll_points(i),gll_points(j),elem)
    enddo
    enddo

    ! also compute the [-pi/2,pi/2] cubed sphere coordinates:
    elem%cartp=element_var_coordinates(elem%corners,gll_points)

    ! Matrix describing vector conversion to cartesian
    ! Zonal direction
    elem%vec_sphere2cart(:,:,1,1) = -SIN(elem%spherep(:,:)%lon)
    elem%vec_sphere2cart(:,:,2,1) =  COS(elem%spherep(:,:)%lon)
    elem%vec_sphere2cart(:,:,3,1) =  0.0_real_kind
    ! Meridional direction
    elem%vec_sphere2cart(:,:,1,2) = -SIN(elem%spherep(:,:)%lat)*COS(elem%spherep(:,:)%lon)
    elem%vec_sphere2cart(:,:,2,2) = -SIN(elem%spherep(:,:)%lat)*SIN(elem%spherep(:,:)%lon)
    elem%vec_sphere2cart(:,:,3,2) =  COS(elem%spherep(:,:)%lat)

  end subroutine coordinates_atomic

  ! elem_jacobians:
  !
  ! Calculate Jacobian associated with mapping
  ! from arbitrary quadrilateral to [-1,1]^2
  ! along with its inverse and determinant
  ! ==========================================

  subroutine elem_jacobians(coords, unif2quadmap)

    use dimensions_mod, only : np
    type (cartesian2D_t),  dimension(np,np), intent(in) :: coords
    ! unif2quadmap is the bilinear map from [-1,1]^2 -> arbitrary quadrilateral
    real (kind=real_kind), dimension(4,2), intent(out) :: unif2quadmap
    integer :: ii,jj

    unif2quadmap(1,1)=(coords(1,1)%x+coords(np,1)%x+coords(np,np)%x+coords(1,np)%x)/4.0d0
    unif2quadmap(1,2)=(coords(1,1)%y+coords(np,1)%y+coords(np,np)%y+coords(1,np)%y)/4.0d0
    unif2quadmap(2,1)=(-coords(1,1)%x+coords(np,1)%x+coords(np,np)%x-coords(1,np)%x)/4.0d0
    unif2quadmap(2,2)=(-coords(1,1)%y+coords(np,1)%y+coords(np,np)%y-coords(1,np)%y)/4.0d0
    unif2quadmap(3,1)=(-coords(1,1)%x-coords(np,1)%x+coords(np,np)%x+coords(1,np)%x)/4.0d0
    unif2quadmap(3,2)=(-coords(1,1)%y-coords(np,1)%y+coords(np,np)%y+coords(1,np)%y)/4.0d0
    unif2quadmap(4,1)=(coords(1,1)%x-coords(np,1)%x+coords(np,np)%x-coords(1,np)%x)/4.0d0
    unif2quadmap(4,2)=(coords(1,1)%y-coords(np,1)%y+coords(np,np)%y-coords(1,np)%y)/4.0d0

  end subroutine elem_jacobians

  ! =========================================
  ! metric_atomic:
  !
  ! Initialize cube-sphere metric terms:
  ! equal angular elements (atomic)
  ! initialize:  
  !         metdet, rmetdet  (analytic)    = detD, 1/detD
  !         met                (analytic)    D^t D     (symmetric)
  !         metdet             (analytic)    = detD
  !         metinv             (analytic)    Dinv Dinv^t  (symmetic)
  !         D     (from subroutine vmap)
  !         Dinv  (computed directly from D)
  ! 
  ! ucontra = Dinv * u  =  metinv * ucov   
  ! ucov    = D^t * u   =  met * ucontra
  !
  ! we also compute DE = D*E, where 
  ! E = eigenvectors of metinv as a basis      metinv = E LAMBDA E^t
  !   
  ! ueig = E^t ucov  = E^t D^t u =  (DE)^t u  
  !  
  !
  ! so if we want to tweak the mapping by a factor alpha (so he weights add up to 4pi, for example)
  ! we take:
  !    NEW       OLD     
  !       D = sqrt(alpha) D  and then rederive all quantities.  
  !    detD = alpha detD
  !    
  ! where alpha = 4pi/SEMarea, SEMarea = global sum elem(ie)%mv(i,j)*elem(ie)%metdet(i,j)
  ! 
  ! =========================================

  subroutine metric_atomic(elem,gll_points,alpha)
    use element_mod, only : element_t
    use dimensions_mod, only : np
    use physical_constants, only : rrearth

    type (element_t) :: elem
    real(kind=real_kind) :: alpha
    real (kind=longdouble_kind)      :: gll_points(np)
    ! Local variables
    integer ii
    integer i,j,nn
    integer iptr

    real (kind=real_kind) :: r         ! distance from origin for point on cube tangent to unit sphere

    real (kind=real_kind) :: const, norm
    real (kind=real_kind) :: detD      ! determinant of vector field mapping matrix.  

    real (kind=real_kind) :: x1        ! 1st cube face coordinate
    real (kind=real_kind) :: x2        ! 2nd cube face coordinate
    real (kind=real_kind) :: tmpD(2,2)
    real (kind=real_kind) :: M(2,2),E(2,2),eig(2),DE(2,2),DEL(2,2),V(2,2), nu1, nu2, lamStar1, lamStar2
    integer :: imaxM(2)
    real (kind=real_kind) :: l1, l2, sc     ! eigen values of met

    real (kind=real_kind) :: roundoff_err = 1e-11 !!! OG: this is a temporal fix
    
    ! ==============================================
    ! Initialize differential mapping operator
    ! to and from vector fields on the sphere to 
    ! contravariant vector fields on the cube
    ! i.e. dM/dx^i in Sadourney (1972) and it's 
    ! inverse
    ! ==============================================

    ! MNL: Calculate Jacobians of bilinear map from cubed-sphere to ref element
    if (cubed_sphere_map==0) then
       call elem_jacobians(elem%cartp, elem%u2qmap)
    endif

    elem%max_eig = 0.0d0
    elem%min_eig = 1d99
    elem%max_eig_ratio = 0d0
    do j=1,np
       do i=1,np
          x1=gll_points(i)
          x2=gll_points(j)
          call Dmap(elem%D(:,:,i,j),elem,x1,x2)


          ! Numerical metric tensor based on analytic D: met = D^T times D
          ! (D maps between sphere and reference element)
          elem%met(1,1,i,j) = elem%D(1,1,i,j)*elem%D(1,1,i,j) + &
                              elem%D(2,1,i,j)*elem%D(2,1,i,j)
          elem%met(1,2,i,j) = elem%D(1,1,i,j)*elem%D(1,2,i,j) + &
                              elem%D(2,1,i,j)*elem%D(2,2,i,j)
          elem%met(2,1,i,j) = elem%D(1,1,i,j)*elem%D(1,2,i,j) + &
                              elem%D(2,1,i,j)*elem%D(2,2,i,j)
          elem%met(2,2,i,j) = elem%D(1,2,i,j)*elem%D(1,2,i,j) + &
                              elem%D(2,2,i,j)*elem%D(2,2,i,j)

          ! compute D^-1...
          ! compute determinant of D mapping matrix... if not zero compute inverse

          detD = elem%D(1,1,i,j)*elem%D(2,2,i,j) - elem%D(1,2,i,j)*elem%D(2,1,i,j)      

          elem%Dinv(1,1,i,j) =  elem%D(2,2,i,j)/detD
          elem%Dinv(1,2,i,j) = -elem%D(1,2,i,j)/detD
          elem%Dinv(2,1,i,j) = -elem%D(2,1,i,j)/detD
          elem%Dinv(2,2,i,j) =  elem%D(1,1,i,j)/detD

          ! L2 norm = sqrt max eigenvalue of metinv
          !         = 1/sqrt(min eigenvalue of met)
          ! l1 and l2 are eigenvalues of met
          ! (should both be positive, l1 > l2)
          l1 = (elem%met(1,1,i,j) + elem%met(2,2,i,j) + sqrt(4.0d0*elem%met(1,2,i,j)*elem%met(2,1,i,j) + &
              (elem%met(1,1,i,j) - elem%met(2,2,i,j))**2))/2.0d0
          l2 = (elem%met(1,1,i,j) + elem%met(2,2,i,j) - sqrt(4.0d0*elem%met(1,2,i,j)*elem%met(2,1,i,j) + &
              (elem%met(1,1,i,j) - elem%met(2,2,i,j))**2))/2.0d0
          ! Max L2 norm of Dinv is sqrt of max eigenvalue of metinv
          ! max eigenvalue of metinv is 1/min eigenvalue of met
          norm = 1.0d0/sqrt(min(abs(l1),abs(l2)))
          elem%max_eig = max(norm, elem%max_eig)
          ! Min L2 norm of Dinv is sqrt of min eigenvalue of metinv
          ! min eigenvalue of metinv is 1/max eigenvalue of met
          norm = 1.0d0/sqrt(max(abs(l1),abs(l2)))
          elem%min_eig = min(norm, elem%min_eig)

          ! Need inverse of met if not calculated analytically
          elem%metdet(i,j) = abs(detD)
          elem%rmetdet(i,j) = 1.0D0/abs(detD)

          elem%metinv(1,1,i,j) =  elem%met(2,2,i,j)/(detD*detD)
          elem%metinv(1,2,i,j) = -elem%met(1,2,i,j)/(detD*detD)
          elem%metinv(2,1,i,j) = -elem%met(2,1,i,j)/(detD*detD)
          elem%metinv(2,2,i,j) =  elem%met(1,1,i,j)/(detD*detD)

          ! matricies for tensor hyper-viscosity
          ! compute eigenvectors of metinv (probably same as computed above)
          M = elem%metinv(:,:,i,j)

          eig(1) = (M(1,1) + M(2,2) + sqrt(4.0d0*M(1,2)*M(2,1) + &
              (M(1,1) - M(2,2))**2))/2.0d0
          eig(2) = (M(1,1) + M(2,2) - sqrt(4.0d0*M(1,2)*M(2,1) + &
              (M(1,1) - M(2,2))**2))/2.0d0
          
          ! use DE to store M - Lambda, to compute eigenvectors
          DE=M
          DE(1,1)=DE(1,1)-eig(1)
          DE(2,2)=DE(2,2)-eig(1)

          imaxM = maxloc(abs(DE))
          if (maxval(abs(DE))==0) then
             E(1,1)=1; E(2,1)=0;
          elseif ( imaxM(1)==1 .and. imaxM(2)==1 ) then
             E(2,1)=1; E(1,1) = -DE(2,1)/DE(1,1)
          else   if ( imaxM(1)==1 .and. imaxM(2)==2 ) then
             E(2,1)=1; E(1,1) = -DE(2,2)/DE(1,2)
          else   if ( imaxM(1)==2 .and. imaxM(2)==1 ) then
             E(1,1)=1; E(2,1) = -DE(1,1)/DE(2,1)
          else   if ( imaxM(1)==2 .and. imaxM(2)==2 ) then
             E(1,1)=1; E(2,1) = -DE(1,2)/DE(2,2)
          else
             call abortmp('Impossible error in cube_mod.F90::metric_atomic()')
          endif

          ! the other eigenvector is orthgonal:
          E(1,2)=-E(2,1)
          E(2,2)= E(1,1)

!normalize columns
	  E(:,1)=E(:,1)/sqrt(sum(E(:,1)*E(:,1))); 
	  E(:,2)=E(:,2)/sqrt(sum(E(:,2)*E(:,2))); 


! OBTAINING TENSOR FOR HV:

! Instead of the traditional scalar Laplace operator \grad \cdot \grad
! we introduce \grad \cdot V \grad
! where V = D E LAM LAM^* E^T D^T. 
! Recall (metric_tensor)^{-1}=(D^T D)^{-1} = E LAM E^T.
! Here, LAM = diag( 4/((np-1)dx)^2 , 4/((np-1)dy)^2 ) = diag(  4/(dx_elem)^2, 4/(dy_elem)^2 )
! Note that metric tensors and LAM correspondingly are quantities on a unit sphere.

! This motivates us to use V = D E LAM LAM^* E^T D^T
! where LAM^* = diag( nu1, nu2 ) where nu1, nu2 are HV coefficients scaled like (dx)^{hv_scaling/2}, (dy)^{hv_scaling/2}.
! (Halves in powers come from the fact that HV consists of two Laplace iterations.)

! Originally, we took LAM^* = diag(
!  1/(eig(1)**(hypervis_scaling/4.0d0))*(rearth**(hypervis_scaling/2.0d0))
!  1/(eig(2)**(hypervis_scaling/4.0d0))*(rearth**(hypervis_scaling/2.0d0)) ) = 
!  = diag( lamStar1, lamStar2)
!  \simeq ((np-1)*dx_sphere / 2 )^hv_scaling/2 = SQRT(OPERATOR_HV)
! because 1/eig(...) \simeq (dx_on_unit_sphere)^2 .
! Introducing the notation OPERATOR = lamStar^2 is useful for conversion formulas.

! This leads to the following conversion formula: nu_const is nu used for traditional HV on uniform grids
! nu_tensor = nu_const * OPERATOR_HV^{-1}, so
! nu_tensor = nu_const *((np-1)*dx_sphere / 2 )^{ - hv_scaling} or
! nu_tensor = nu_const *(2/( (np-1) * dx_sphere) )^{hv_scaling} .
! dx_sphere = 2\pi *rearth/(np-1)/4/NE
! [nu_tensor] = [meter]^{4-hp_scaling}/[sec]

! (1) Later developments:
! Apply tensor V only at the second Laplace iteration. Thus, LAM^* should be scaled as (dx)^{hv_scaling}, (dy)^{hv_scaling},
! see this code below:
!          DEL(1:2,1) = (lamStar1**2) *eig(1)*DE(1:2,1)
!          DEL(1:2,2) = (lamStar2**2) *eig(2)*DE(1:2,2)

! (2) Later developments:
! Bringing [nu_tensor] to 1/[sec]:
!	  lamStar1=1/(eig(1)**(hypervis_scaling/4.0d0)) *(rearth**2.0d0)
!	  lamStar2=1/(eig(2)**(hypervis_scaling/4.0d0)) *(rearth**2.0d0)
! OPERATOR_HV = ( (np-1)*dx_unif_sphere / 2 )^{hv_scaling} * rearth^4
! Conversion formula:
! nu_tensor = nu_const * OPERATOR_HV^{-1}, so
! nu_tensor = nu_const *( 2*rearth /((np-1)*dx))^{hv_scaling} * rearth^{-4.0}.

! For the baseline coefficient nu=1e15 for NE30, 
! nu_tensor=7e-8 (BUT RUN TWICE AS SMALL VALUE FOR NOW) for hv_scaling=3.2
! and 
! nu_tensor=1.3e-6 for hv_scaling=4.0.


!matrix D*E
          DE(1,1)=sum(elem%D(1,:,i,j)*E(:,1))
          DE(1,2)=sum(elem%D(1,:,i,j)*E(:,2))
          DE(2,1)=sum(elem%D(2,:,i,j)*E(:,1))
          DE(2,2)=sum(elem%D(2,:,i,j)*E(:,2))

	  lamStar1=1/(eig(1)**(hypervis_scaling/4.0d0)) *(rearth**2.0d0)
	  lamStar2=1/(eig(2)**(hypervis_scaling/4.0d0)) *(rearth**2.0d0)

!matrix (DE) * Lam^* * Lam , tensor HV when V is applied at each Laplace calculation
!          DEL(1:2,1) = lamStar1*eig(1)*DE(1:2,1)
!          DEL(1:2,2) = lamStar2*eig(2)*DE(1:2,2)

!matrix (DE) * (Lam^*)^2 * Lam, tensor HV when V is applied only once, at the last Laplace calculation
!will only work with hyperviscosity, not viscosity
          DEL(1:2,1) = (lamStar1**2) *eig(1)*DE(1:2,1)
          DEL(1:2,2) = (lamStar2**2) *eig(2)*DE(1:2,2)

!matrix (DE) * Lam^* * Lam  *E^t *D^t or (DE) * (Lam^*)^2 * Lam  *E^t *D^t 
          V(1,1)=sum(DEL(1,:)*DE(1,:))
          V(1,2)=sum(DEL(1,:)*DE(2,:))
          V(2,1)=sum(DEL(2,:)*DE(1,:))
          V(2,2)=sum(DEL(2,:)*DE(2,:))

	  elem%tensorVisc(:,:,i,j)=V(:,:)

       end do
    end do

    elem%dx_short = 1.0d0/(elem%max_eig*0.5d0*dble(np-1)*rrearth*1000.0d0)
    elem%dx_long  = 1.0d0/(elem%min_eig*0.5d0*dble(np-1)*rrearth*1000.0d0)
    ! ===============================================
    !
    ! Initialize equal angular metric tensor on each 
    ! on velocity grid for unit sphere.
    !
    ! Initialize gdet = SQRT(ABS(DET(gij)))
    !
    ! These quantities are the same on every face
    ! of the cube.
    !
    ! =================================================

    ! mt: better might be to compute all these quantities directly from D
    ! for consistency?
    !
    ! MNL: done
    elem%D = elem%D * sqrt(alpha) 
    elem%Dinv = elem%Dinv / sqrt(alpha) 
    elem%metdet = elem%metdet * alpha
    elem%rmetdet = elem%rmetdet / alpha
    elem%met = elem%met * alpha
    elem%metinv = elem%metinv / alpha

  end subroutine metric_atomic

  ! =======================================
  ! solver_weights:
  !
  ! For nonstaggered GaussLobatto elements,
  ! compute weights for redundant points in 
  ! cg solver.
  !
  ! =======================================
#if 0
  subroutine solver_weights_atomic(elem)
    use element_mod, only : element_t
    use dimensions_mod, only : np

    type (element_t) :: elem
    real (kind=real_kind) :: x 

    ! Local variables

    integer :: i, j
    ! =========================================
    ! compute cube face coordinates of element
    ! =========================================

    do i=1,np
      do j=1,np
        if (i==1) then
          if (j==1) then
             x = 1.0_real_kind/elem%node_multiplicity(1)
          else if (j==np) then
             x = 1.0_real_kind/elem%node_multiplicity(4)
          else
             x = 0.5_real_kind
          end if
        else if (i==np) then
          if (j==1) then
             x = 1.0_real_kind/elem%node_multiplicity(2)
          else if (j==np) then
             x = 1.0_real_kind/elem%node_multiplicity(3)
          else
             x = 0.5_real_kind
          end if
        else if (j==1 .or. j==np) then
           x = 0.5_real_kind
        else
           x = 1.0_real_kind
        end if
        elem%solver_wts(i,j) = x
      end do
    end do

  end subroutine solver_weights_atomic
#endif

#if 1
  ! ========================================
  ! covariant_rot:
  !
  ! 2 x 2 matrix multiply:  Db^T * Da^-T
  ! for edge rotations: maps face a to face b
  !
  ! ========================================

  function covariant_rot(Da,Db) result(R)

    real (kind=real_kind) :: Da(2,2)
    real (kind=real_kind) :: Db(2,2)
    real (kind=real_kind) :: R(2,2)

    real (kind=real_kind) :: detDa

    detDa = Da(2,2)*Da(1,1) - Da(1,2)*Da(2,1)

    R(1,1)=(Da(2,2)*Db(1,1) - Da(1,2)*Db(2,1))/detDa
    R(1,2)=(Da(1,1)*Db(2,1) - Da(2,1)*Db(1,1))/detDa
    R(2,1)=(Da(2,2)*Db(1,2) - Da(1,2)*Db(2,2))/detDa
    R(2,2)=(Da(1,1)*Db(2,2) - Da(2,1)*Db(1,2))/detDa

  end function covariant_rot
#else

  ! ========================================
  ! covariant_rot:
  !
  ! 2 x 2 matrix multiply:  Db * Da^-1
  ! for edge rotations: maps face a to face b
  !
  ! ========================================

  function covariant_rot(Da,Db) result(R)

    real (kind=real_kind) :: Da(2,2)
    real (kind=real_kind) :: Db(2,2)
    real (kind=real_kind) :: R(2,2)

    real (kind=real_kind) :: detDa

    detDa = Da(2,2)*Da(1,1) - Da(1,2)*Da(2,1)

    R(1,1)=(Da(2,2)*Db(1,1) - Da(2,1)*Db(1,2))/detDa
    R(1,2)=(Da(1,1)*Db(1,2) - Da(1,2)*Db(1,1))/detDa
    R(2,1)=(Da(2,2)*Db(2,1) - Da(2,1)*Db(2,2))/detDa
    R(2,2)=(Da(1,1)*Db(2,2) - Da(1,2)*Db(2,1))/detDa

  end function covariant_rot

#endif

  ! ========================================
  ! contravariant_rot:
  !
  ! 2 x 2 matrix multiply:  Db^-1 * Da
  ! that maps a contravariant vector field
  ! from an edge of cube face a to a contiguous 
  ! edge of cube face b.
  !
  ! ========================================

  function contravariant_rot(Da,Db) result(R)

    real (kind=real_kind) :: Da(2,2)
    real (kind=real_kind) :: Db(2,2)
    real (kind=real_kind) :: R(2,2)

    real (kind=real_kind) :: detDb

    detDb = Db(2,2)*Db(1,1) - Db(1,2)*Db(2,1)

    R(1,1)=(Da(1,1)*Db(2,2) - Da(2,1)*Db(1,2))/detDb
    R(1,2)=(Da(1,2)*Db(2,2) - Da(2,2)*Db(1,2))/detDb
    R(2,1)=(Da(2,1)*Db(1,1) - Da(1,1)*Db(2,1))/detDb
    R(2,2)=(Da(2,2)*Db(1,1) - Da(1,2)*Db(2,1))/detDb

  end function contravariant_rot

  ! ========================================================
  ! Dmap:
  !
  ! Initialize mapping that tranforms contravariant 
  ! vector fields on the reference element onto vector fields on
  ! the sphere. 
  ! ========================================================
  subroutine Dmap(D, elem, a,b)
    use element_mod, only : element_t
    type (element_t) :: elem
    real (kind=real_kind), intent(out)  :: D(2,2)
    real (kind=real_kind), intent(in)     :: a,b
    if (cubed_sphere_map==0) then
       call dmap_equiangular(D,elem,a,b)
    else if (cubed_sphere_map==1) then
       call abortmp('equi-distance gnomonic map not yet implemented')
    else if (cubed_sphere_map==2) then
       call dmap_elementlocal(D,elem,a,b)
    else
       call abortmp('bad value of cubed_sphere_map')
    endif
  end subroutine Dmap



  ! ========================================================
  ! Dmap:
  !
  ! Equiangular Gnomonic Projection
  ! Composition of equiangular Gnomonic projection to cubed-sphere face,
  ! followd by bilinear map to reference element
  ! ========================================================
  subroutine dmap_equiangular(D, elem, a,b)
    use element_mod, only : element_t
    use dimensions_mod, only : np
    type (element_t) :: elem
    real (kind=real_kind), intent(out)  :: D(2,2)
    real (kind=real_kind), intent(in)     :: a,b
    ! local
    real (kind=real_kind)  :: tmpD(2,2), Jp(2,2),x1,x2,pi,pj,qi,qj
    real (kind=real_kind), dimension(4,2) :: unif2quadmap

#if 0
    ! we shoud get rid of elem%u2qmap() and routine cube_mod.F90::elem_jacobian()
    ! and replace with this code below:
    ! but this produces roundoff level changes
    !unif2quadmap(1,1)=(elem%cartp(1,1)%x+elem%cartp(np,1)%x+elem%cartp(np,np)%x+elem%cartp(1,np)%x)/4.0d0
    !unif2quadmap(1,2)=(elem%cartp(1,1)%y+elem%cartp(np,1)%y+elem%cartp(np,np)%y+elem%cartp(1,np)%y)/4.0d0
    unif2quadmap(2,1)=(-elem%cartp(1,1)%x+elem%cartp(np,1)%x+elem%cartp(np,np)%x-elem%cartp(1,np)%x)/4.0d0
    unif2quadmap(2,2)=(-elem%cartp(1,1)%y+elem%cartp(np,1)%y+elem%cartp(np,np)%y-elem%cartp(1,np)%y)/4.0d0
    unif2quadmap(3,1)=(-elem%cartp(1,1)%x-elem%cartp(np,1)%x+elem%cartp(np,np)%x+elem%cartp(1,np)%x)/4.0d0
    unif2quadmap(3,2)=(-elem%cartp(1,1)%y-elem%cartp(np,1)%y+elem%cartp(np,np)%y+elem%cartp(1,np)%y)/4.0d0
    unif2quadmap(4,1)=(elem%cartp(1,1)%x-elem%cartp(np,1)%x+elem%cartp(np,np)%x-elem%cartp(1,np)%x)/4.0d0
    unif2quadmap(4,2)=(elem%cartp(1,1)%y-elem%cartp(np,1)%y+elem%cartp(np,np)%y-elem%cartp(1,np)%y)/4.0d0
    Jp(1,1) = unif2quadmap(2,1) + unif2quadmap(4,1)*b
    Jp(1,2) = unif2quadmap(3,1) + unif2quadmap(4,1)*a
    Jp(2,1) = unif2quadmap(2,2) + unif2quadmap(4,2)*b
    Jp(2,2) = unif2quadmap(3,2) + unif2quadmap(4,2)*a
#else
    ! input (a,b) shold be a point in the reference element [-1,1]
    ! compute Jp(a,b)
    Jp(1,1) = elem%u2qmap(2,1) + elem%u2qmap(4,1)*b
    Jp(1,2) = elem%u2qmap(3,1) + elem%u2qmap(4,1)*a
    Jp(2,1) = elem%u2qmap(2,2) + elem%u2qmap(4,2)*b
    Jp(2,2) = elem%u2qmap(3,2) + elem%u2qmap(4,2)*a
#endif

    ! map (a,b) to the [-pi/2,pi/2] equi angular cube face:  x1,x2
    ! a = gp%points(i)
    ! b = gp%points(j)
    pi = (1-a)/2
    pj = (1-b)/2
    qi = (1+a)/2
    qj = (1+b)/2
    x1 = pi*pj*elem%corners(1)%x &
         + qi*pj*elem%corners(2)%x &
         + qi*qj*elem%corners(3)%x &
         + pi*qj*elem%corners(4)%x 
    x2 = pi*pj*elem%corners(1)%y &
         + qi*pj*elem%corners(2)%y &
         + qi*qj*elem%corners(3)%y &
         + pi*qj*elem%corners(4)%y 
    
    call vmap(tmpD,x1,x2,elem%vertex%face_number)

    ! Include map from element -> ref element in D
    D(1,1) = tmpD(1,1)*Jp(1,1) + tmpD(1,2)*Jp(2,1)
    D(1,2) = tmpD(1,1)*Jp(1,2) + tmpD(1,2)*Jp(2,2)
    D(2,1) = tmpD(2,1)*Jp(1,1) + tmpD(2,2)*Jp(2,1)
    D(2,2) = tmpD(2,1)*Jp(1,2) + tmpD(2,2)*Jp(2,2)
  end subroutine dmap_equiangular



  ! ========================================================
  ! vmap:
  !
  ! Initialize mapping that tranforms contravariant 
  ! vector fields on the cube onto vector fields on
  ! the sphere. This follows Taylor's D matrix 
  !
  !       | cos(theta)dlambda/dx1  cos(theta)dlambda/dx2 |
  !   D = |                                              |
  !       |     dtheta/dx1              dtheta/dx2       |
  !
  ! ========================================================

  subroutine vmap(D, x1, x2, face_no) 
    real (kind=real_kind), intent(inout)  :: D(2,2)
    real (kind=real_kind), intent(in)     :: x1
    real (kind=real_kind), intent(in)     :: x2
    integer              , intent(in)     :: face_no

    ! Local variables

    real (kind=real_kind) :: poledist  ! SQRT(TAN(x1)**2 +TAN(x2)**2)
    real (kind=real_kind) :: r         ! distance from cube point to center of sphere

    real (kind=real_kind) :: D11
    real (kind=real_kind) :: D12
    real (kind=real_kind) :: D21
    real (kind=real_kind) :: D22

    r=SQRT(1.0D0 + TAN(x1)**2 + TAN(x2)**2)

    if (face_no >= 1 .and. face_no <= 4) then

       D11 = 1.0D0/(r*COS(x1))
       D12 = 0.0D0
       D21 = -TAN(x1)*TAN(x2)/(COS(x1)*r*r)        
       D22 = 1.0D0/(r*r*COS(x1)*COS(x2)*COS(x2))

       D(1,1) =  D11
       D(1,2) =  D12
       D(2,1) =  D21
       D(2,2) =  D22


    else if (face_no==6) then
       poledist=SQRT( TAN(x1)**2 + TAN(x2)**2)
       if ( poledist <= DIST_THRESHOLD ) then

          ! we set the D transform to the identity matrix 
          ! which works ONLY for swtc1, phi starting at 
          ! 3*PI/2... assumes lon at pole == 0

          D(1,1) =  1.0D0
          D(1,2) =  0.0D0
          D(2,1) =  0.0D0
          D(2,2) =  1.0D0

       else

          D11 = -TAN(x2)/(poledist*COS(x1)*COS(x1)*r)
          D12 =  TAN(x1)/(poledist*COS(x2)*COS(x2)*r)
          D21 = -TAN(x1)/(poledist*COS(x1)*COS(x1)*r*r)
          D22 = -TAN(x2)/(poledist*COS(x2)*COS(x2)*r*r)

          D(1,1) =  D11
          D(1,2) =  D12
          D(2,1) =  D21
          D(2,2) =  D22

       end if
    else if (face_no==5) then
       poledist=SQRT( TAN(x1)**2 + TAN(x2)**2)
       if ( poledist <= DIST_THRESHOLD ) then

          ! we set the D transform to the identity matrix 
          ! which works ONLY for swtc1, phi starting at 
          ! 3*PI/2... assumes lon at pole == 0, i.e. very specific

          D(1,1) =  1.0D0
          D(1,2) =  0.0D0
          D(2,1) =  0.0D0
          D(2,2) =  1.0D0

       else

          D11 =  TAN(x2)/(poledist*COS(x1)*COS(x1)*r)
          D12 = -TAN(x1)/(poledist*COS(x2)*COS(x2)*r)
          D21 =  TAN(x1)/(poledist*COS(x1)*COS(x1)*r*r)
          D22 =  TAN(x2)/(poledist*COS(x2)*COS(x2)*r*r)

          D(1,1) =  D11
          D(1,2) =  D12
          D(2,1) =  D21
          D(2,2) =  D22

       end if
    end if

  end subroutine vmap




  ! ========================================================
  ! Dmap:
  !
  ! Initialize mapping that tranforms contravariant 
  ! vector fields on the reference element onto vector fields on
  ! the sphere. 
  ! For Gnomonic, followed by bilinear, this code uses the old vmap()
  ! for unstructured grids, this code uses the parametric map that
  ! maps quads on the sphere directly to the reference element
  ! ========================================================
  subroutine dmap_elementlocal(D, elem, a,b)
    use element_mod, only : element_t

    type (element_t) :: elem
    real (kind=real_kind), intent(out)    :: D(2,2)
    real (kind=real_kind), intent(in)     :: a,b

    type (spherical_polar_t)      :: sphere

    type (cartesian3d_t)               ::  corners(4)   
    real(kind=real_kind)               ::  c(3,4), q(4), xx(3), r, lam, th, dd(4,2)
    real(kind=real_kind)               ::  sinlam, sinth, coslam, costh
    real(kind=real_kind)               ::  D1(2,3), D2(3,3), D3(3,2), D4(3,2)
    integer :: i,j

    sphere = ref2sphere(a,b,elem)
    corners = elem%corners3D

    c(1,1)=corners(1)%x;  c(2,1)=corners(1)%y;  c(3,1)=corners(1)%z; 
    c(1,2)=corners(2)%x;  c(2,2)=corners(2)%y;  c(3,2)=corners(2)%z; 
    c(1,3)=corners(3)%x;  c(2,3)=corners(3)%y;  c(3,3)=corners(3)%z; 
    c(1,4)=corners(4)%x;  c(2,4)=corners(4)%y;  c(3,4)=corners(4)%z; 

    q(1)=(1-a)*(1-b); q(2)=(1+a)*(1-b); q(3)=(1+a)*(1+b); q(4)=(1-a)*(1+b);
    q=q/4.0d0;

    do i=1,3
      xx(i)=sum(c(i,:)*q(:))
    enddo

    r=sqrt(xx(1)**2+xx(2)**2+xx(3)**2)

    lam=sphere%lon; th=sphere%lat;
    sinlam=sin(lam); sinth=sin(th);
    coslam=cos(lam); costh=cos(th);

    D1(1,1)=-sinlam; D1(1,2)=coslam; D1(1,3)=0.0d0; 
    D1(2,1)=0.0d0;  D1(2,2)=0.0d0;    D1(2,3)=1.0d0;

    D2(1,1)=(sinlam**2)*(costh**2)+sinth**2; D2(1,2)=-sinlam*coslam*(costh**2); D2(1,3)=-coslam*sinth*costh;
    D2(2,1)=-sinlam*coslam*(costh**2); D2(2,2)=(coslam**2)*(costh**2)+sinth**2; D2(2,3)=-sinlam*sinth*costh;
    D2(3,1)=-coslam*sinth;           D2(3,2)=-sinlam*sinth;               D2(3,3)=costh;

    dd(1,1)=-1+b; dd(1,2)=-1+a;
    dd(2,1)=1-b; dd(2,2)=-1-a;
    dd(3,1)=1+b; dd(3,2)=1+a;
    dd(4,1)=-1-b; dd(4,2)=1-a;

    dd=dd/4.0d0

    do i=1,3
      do j=1,2
	D3(i,j)=sum(c(i,:)*dd(:,j))
      enddo
    enddo

    do i=1,3
      do j=1,2
	D4(i,j)=sum(D2(i,:)*D3(:,j))
      enddo
    enddo   

    do i=1,2
      do j=1,2
	D(i,j)=sum(D1(i,:)*D4(:,j))
      enddo
    enddo

    D=D/r
  end subroutine dmap_elementlocal







  ! ========================================
  ! coreolis_init_atomic:
  !
  ! Initialize coreolis term ...
  !
  ! ========================================

  subroutine coreolis_init_atomic(elem)
    use element_mod, only : element_t
    use dimensions_mod, only : np
    use physical_constants, only : omega

    type (element_t) :: elem

    ! Local variables

    integer                  :: i,j
    real (kind=real_kind) :: lat,lon,rangle

    rangle = rotate_grid*DD_PI/180
    do j=1,np
       do i=1,np
             if ( rotate_grid /= 0) then
                lat = elem%spherep(i,j)%lat
                lon = elem%spherep(i,j)%lon
             	elem%fcor(i,j)= 2*omega* &
                     (-cos(lon)*cos(lat)*sin(rangle) + sin(lat)*cos(rangle))
             else
                elem%fcor(i,j) = 2.0D0*omega*SIN(elem%spherep(i,j)%lat)
             endif
       end do
    end do

  end subroutine coreolis_init_atomic

  ! =========================================
  ! rotation_init_atomic:
  !
  ! Initialize cube rotation terms resulting
  ! from changing cube face coordinate systems
  !
  ! =========================================


 subroutine rotation_init_atomic(elem, rot_type)
    use element_mod, only : element_t
    use dimensions_mod, only : np
    use control_mod, only : north, south, east, west, neast, seast, swest, nwest

    type (element_t) :: elem
    character(len=*) rot_type

    ! =======================================
    ! Local variables
    ! =======================================

    integer :: myface_no        ! current element face number
    integer :: nbrface_no       ! neighbor element face number
    integer :: inbr
    integer :: nrot,irot
    integer :: ii,i,j,k
    integer :: ir,jr
    integer :: start, cnt

    real (kind=real_kind) :: Dloc(2,2,np)
    real (kind=real_kind) :: Drem(2,2,np)
    real (kind=real_kind) :: x1,x2


    myface_no = elem%vertex%face_number

    nrot   = 0

    do inbr=1,8
        cnt = elem%vertex%nbrs_ptr(inbr+1) -  elem%vertex%nbrs_ptr(inbr) 
        start =  elem%vertex%nbrs_ptr(inbr) 

        do k = 0, cnt-1
           nbrface_no = elem%vertex%nbrs_face(start+k)
           if (myface_no /= nbrface_no) nrot=nrot+1
        end do

    end do

    if(associated(elem%desc%rot)) then
       if (size(elem%desc%rot) > 0) then
          !         deallocate(elem%desc%rot)
          NULLIFY(elem%desc%rot)
       endif
    end if

    ! =====================================================
    ! If there are neighbors on other cube faces, allocate 
    ! an array of rotation matrix structs.
    ! =====================================================

    if (nrot > 0) then
       allocate(elem%desc%rot(nrot))
       elem%desc%use_rotation=1
       irot=0          

       do inbr=1,8
          cnt = elem%vertex%nbrs_ptr(inbr+1) -  elem%vertex%nbrs_ptr(inbr) 
          start =  elem%vertex%nbrs_ptr(inbr) 

          do k= 0, cnt-1

             nbrface_no = elem%vertex%nbrs_face(start+k)
             ! The cube edge (myface_no,nbrface_no) and inbr defines 
             ! a unique rotation given by (D^-1) on myface_no x (D) on nbrface_no

             if (myface_no /= nbrface_no .and. elem%vertex%nbrs(start+k) /= -1 ) then           
                irot=irot+1

                if (inbr <= 4) then      
                   allocate(elem%desc%rot(irot)%R(2,2,np))  ! edge
                else                     
                   allocate(elem%desc%rot(irot)%R(2,2,1 ))   ! corner
                end if

                ! must compute Dloc on my face, Drem on neighbor face, 
                ! for each point on edge or corner.
                
                ! ==================================== 
                ! Equatorial belt east/west neighbors
                ! ==================================== 
                
                if (nbrface_no <= 4 .and. myface_no <= 4) then
                   
                   if (inbr == west) then
                      do j=1,np
                         x1 = elem%cartp(1,j)%x
                         x2 = elem%cartp(1,j)%y
                         call Vmap(Dloc(1,1,j), x1,x2,myface_no)
                         call Vmap(Drem(1,1,j),-x1,x2,nbrface_no)
                      end do
                   else if (inbr == east) then
                      do j=1,np
                         x1 = elem%cartp(np,j)%x
                         x2 = elem%cartp(np,j)%y
                         call Vmap(Dloc(1,1,j), x1,x2,myface_no)
                         call Vmap(Drem(1,1,j),-x1,x2,nbrface_no)
                      end do
                   else if (inbr == swest ) then
                      x1 = elem%cartp(1,1)%x
                      x2 = elem%cartp(1,1)%y
                      call Vmap(Dloc(1,1,1),x1,x2,myface_no)
                      call Vmap(Drem(1,1,1),-x1,x2,nbrface_no)
                   else if (inbr == nwest ) then
                      x1 = elem%cartp(1,np)%x
                      x2 = elem%cartp(1,np)%y
                      call Vmap(Dloc(1,1,1), x1,x2,myface_no)
                      call Vmap(Drem(1,1,1),-x1,x2,nbrface_no)
                   else if (inbr == seast ) then
                      x1 = elem%cartp(np,1)%x
                      x2 = elem%cartp(np,1)%y
                      call Vmap(Dloc(1,1,1), x1,x2,myface_no)
                      call Vmap(Drem(1,1,1),-x1,x2,nbrface_no)
                   else if (inbr == neast ) then
                      x1 = elem%cartp(np,np)%x
                      x2 = elem%cartp(np,np)%y
                      call Vmap(Dloc(1,1,1), x1,x2,myface_no)
                      call Vmap(Drem(1,1,1),-x1,x2,nbrface_no)
                   end if
                   
                end if
                
                ! Northern Neighbors of Equatorial Belt
                
                if ( myface_no <= 4 .and. nbrface_no == 6 ) then
                   if (inbr == north) then
                      do i=1,np
                         ir=np+1-i
                         x1 = elem%cartp(i,np)%x
                         x2 = elem%cartp(i,np)%y
                         if ( myface_no == 1) then
                            call Vmap(Dloc(1,1,i), x1,x2,myface_no)
                            call Vmap(Drem(1,1,i),x1,-x2,nbrface_no)
                         end if
                         if ( myface_no == 2) then
                            call Vmap(Dloc(1,1,i),x1,x2,myface_no)
                            call Vmap(Drem(1,1,i),x2,x1,nbrface_no)
                            
                         end if
                         if ( myface_no == 3) then
                            call Vmap(Dloc(1,1,ir), x1,x2,myface_no)
                            call Vmap(Drem(1,1,ir),-x1,x2,nbrface_no)
                         end if
                         if ( myface_no == 4) then
                            call Vmap(Dloc(1,1,ir), x1,x2,myface_no)
                            call Vmap(Drem(1,1,ir),-x2,-x1,nbrface_no)
                         end if
                      end do
                   else if (inbr == nwest) then
                      x1 = elem%cartp(1,np)%x
                      x2 = elem%cartp(1,np)%y
                      call Vmap(Dloc(1,1,1), x1,x2,myface_no)
                      if ( myface_no == 1) call Vmap(Drem(1,1,1),x1,-x2,nbrface_no)
                      if ( myface_no == 2) call Vmap(Drem(1,1,1),x2, x1,nbrface_no)
                      if ( myface_no == 3) call Vmap(Drem(1,1,1),-x1,x2,nbrface_no)
                      if ( myface_no == 4) call Vmap(Drem(1,1,1),-x2,-x1,nbrface_no)
                   else if (inbr == neast) then
                      x1 = elem%cartp(np,np)%x
                      x2 = elem%cartp(np,np)%y
                      call Vmap(Dloc(1,1,1),x1,x2,myface_no)
                      if ( myface_no == 1) call Vmap(Drem(1,1,1),x1,-x2,nbrface_no)
                      if ( myface_no == 2) call Vmap(Drem(1,1,1),x2, x1,nbrface_no)
                      if ( myface_no == 3) call Vmap(Drem(1,1,1),-x1,x2,nbrface_no)
                      if ( myface_no == 4) call Vmap(Drem(1,1,1),-x2,-x1,nbrface_no)
                   end if
                   
                end if
                
                ! Southern Neighbors of Equatorial Belt
                
                if ( myface_no <= 4 .and. nbrface_no == 5 ) then
                   if (inbr == south) then
                      do i=1,np
                         ir=np+1-i
                         x1 = elem%cartp(i,1)%x
                         x2 = elem%cartp(i,1)%y
                         if ( myface_no == 1) then
                            call Vmap(Dloc(1,1,i), x1, x2,myface_no)
                            call Vmap(Drem(1,1,i), x1,-x2,nbrface_no)
                         end if
                         if ( myface_no == 2) then
                            call Vmap(Dloc(1,1,ir),x1,x2,myface_no)
                            call Vmap(Drem(1,1,ir),-x2,-x1,nbrface_no)
                         end if
                         if ( myface_no == 3) then
                            call Vmap(Dloc(1,1,ir), x1,x2,myface_no)
                            call Vmap(Drem(1,1,ir),-x1,x2,nbrface_no)
                         end if
                         if ( myface_no == 4) then
                            call Vmap(Dloc(1,1,i), x1,x2,myface_no)
                            call Vmap(Drem(1,1,i), x2,x1,nbrface_no)
                         end if
                      end do
                   else if (inbr == swest) then
                      x1 = elem%cartp(1,1)%x
                      x2 = elem%cartp(1,1)%y
                      call Vmap(Dloc(1,1,1),x1,x2,myface_no)
                      
                      
                      if ( myface_no == 1) call Vmap(Drem(1,1,1),x1,-x2,nbrface_no)
                      if ( myface_no == 2) call Vmap(Drem(1,1,1),-x2,-x1,nbrface_no)
                      if ( myface_no == 3) call Vmap(Drem(1,1,1),-x1,x2,nbrface_no)
                      if ( myface_no == 4) call Vmap(Drem(1,1,1),x2,x1,nbrface_no)
                      
                   else if (inbr == seast) then
                      x1 = elem%cartp(np,1)%x
                      x2 = elem%cartp(np,1)%y
                      call Vmap(Dloc(1,1,1),x1,x2,myface_no)
                      if ( myface_no == 1) call Vmap(Drem(1,1,1),x1,-x2,nbrface_no)
                      if ( myface_no == 2) call Vmap(Drem(1,1,1),-x2,-x1,nbrface_no)
                      if ( myface_no == 3) call Vmap(Drem(1,1,1),-x1,x2,nbrface_no)
                      if ( myface_no == 4) call Vmap(Drem(1,1,1),x2,x1,nbrface_no)
                   end if
                   
                end if
                
                ! Neighbors of Northern Capping Face Number 6
                
                if ( myface_no == 6 ) then
                   if (nbrface_no == 1) then
                      if (inbr == south) then
                         do i=1,np
                            x1 = elem%cartp(i,1)%x
                            x2 = elem%cartp(i,1)%y
                            call Vmap(Dloc(1,1,i),x1,x2,myface_no)
                            call Vmap(Drem(1,1,i),x1,-x2,nbrface_no)
                         end do
                      else if (inbr == swest) then
                         x1 = elem%cartp(1,1)%x
                         x2 = elem%cartp(1,1)%y
                         call Vmap(Dloc(1,1,1),x1,x2,myface_no)
                         call Vmap(Drem(1,1,1),x1,-x2,nbrface_no)
                      else if (inbr == seast) then
                         x1 = elem%cartp(np,1)%x
                         x2 = elem%cartp(np,1)%y
                         call Vmap(Dloc(1,1,1),x1,x2,myface_no)
                         call Vmap(Drem(1,1,1),x1,-x2,nbrface_no)
                      end if
                   else if (nbrface_no == 2) then
                      if (inbr == east) then
                         do j=1,np
                            x1 = elem%cartp(np,j)%x
                            x2 = elem%cartp(np,j)%y
                            call Vmap(Dloc(1,1,j),x1,x2,myface_no)
                            call Vmap(Drem(1,1,j),x2,x1,nbrface_no)
                         end do
                      else if (inbr == seast) then
                         x1 = elem%cartp(np,1)%x
                         x2 = elem%cartp(np,1)%y
                         call Vmap(Dloc(1,1,1),x1,x2,myface_no)
                         call Vmap(Drem(1,1,1),x2,x1,nbrface_no)
                      else if (inbr == neast) then
                         x1 = elem%cartp(np,np)%x
                         x2 = elem%cartp(np,np)%y
                         call Vmap(Dloc(1,1,1),x1,x2,myface_no)
                         call Vmap(Drem(1,1,1),x2,x1,nbrface_no)
                      end if
                   else if (nbrface_no == 3) then
                      if (inbr == north) then
                         do i=1,np
                            ir =np+1-i
                            x1 = elem%cartp(i,np)%x
                            x2 = elem%cartp(i,np)%y
                            call Vmap(Dloc(1,1,ir),x1,x2,myface_no)
                            call Vmap(Drem(1,1,ir),-x1,x2,nbrface_no)
                         end do
                      else if (inbr == nwest) then
                         x1 = elem%cartp(1,np)%x
                         x2 = elem%cartp(1,np)%y
                         call Vmap(Dloc(1,1,1),x1,x2,myface_no)
                         call Vmap(Drem(1,1,1),-x1,x2,nbrface_no)
                      else if (inbr == neast) then
                         x1 = elem%cartp(np,np)%x
                         x2 = elem%cartp(np,np)%y
                         call Vmap(Dloc(1,1,1),x1,x2,myface_no)
                         call Vmap(Drem(1,1,1),-x1,x2,nbrface_no)
                      end if
                   else if (nbrface_no == 4) then
                      if (inbr == west) then
                         do j=1,np
                            jr=np+1-j
                            x1 = elem%cartp(1,j)%x
                            x2 = elem%cartp(1,j)%y
                            call Vmap(Dloc(1,1,jr), x1, x2,myface_no )
                            call Vmap(Drem(1,1,jr),-x2,-x1,nbrface_no)
                         end do
                      else if (inbr == swest) then
                         x1 = elem%cartp(1,1)%x
                         x2 = elem%cartp(1,1)%y
                         call Vmap(Dloc(1,1,1),x1,x2,myface_no)
                         call Vmap(Drem(1,1,1),-x2,-x1,nbrface_no)
                      else if (inbr == nwest) then
                         x1 = elem%cartp(1,np)%x
                         x2 = elem%cartp(1,np)%y
                         call Vmap(Dloc(1,1,1),x1,x2,myface_no)
                         call Vmap(Drem(1,1,1),-x2,-x1,nbrface_no)
                      end if
                   end if
                end if
                
                ! Neighbors of South Capping Face Number 5

                if ( myface_no == 5 ) then
                   if (nbrface_no == 1) then
                      if (inbr == north) then
                         do i=1,np
                            x1 = elem%cartp(i,np)%x
                            x2 = elem%cartp(i,np)%y
                            call Vmap(Dloc(1,1,i),x1,x2,myface_no)
                            call Vmap(Drem(1,1,i),x1,-x2,nbrface_no)
                         end do
                      else if (inbr == nwest) then
                         x1 = elem%cartp(1,np)%x
                         x2 = elem%cartp(1,np)%y
                         call Vmap(Dloc(:,:,1),x1,x2,myface_no)
                         call Vmap(Drem(:,:,1),x1,-x2,nbrface_no)
                      else if (inbr == neast) then
                         x1 = elem%cartp(np,np)%x
                         x2 = elem%cartp(np,np)%y
                         call Vmap(Dloc(1,1,1),x1,x2,myface_no)
                         call Vmap(Drem(1,1,1),x1,-x2,nbrface_no)
                      end if
                   else if (nbrface_no == 2) then
                      if (inbr == east) then
                         do j=1,np
                            jr=np+1-j
                            x1 = elem%cartp(np,j)%x
                            x2 = elem%cartp(np,j)%y
                            call Vmap(Dloc(1,1,jr),x1,  x2,myface_no)
                            call Vmap(Drem(1,1,jr),-x2,-x1,nbrface_no)
                         end do
                      else if (inbr == seast) then
                         x1 = elem%cartp(np,1)%x
                         x2 = elem%cartp(np,1)%y
                         call Vmap(Dloc(1,1,1),x1,x2,myface_no)
                         call Vmap(Drem(1,1,1),-x2,-x1,nbrface_no)
                      else if (inbr == neast) then
                         x1 = elem%cartp(np,np)%x
                         x2 = elem%cartp(np,np)%y
                         call Vmap(Dloc(1,1,1),x1,x2,myface_no)
                         call Vmap(Drem(1,1,1),-x2,-x1,nbrface_no)
                      end if
                   else if (nbrface_no == 3) then
                      if (inbr == south) then
                         do i=1,np
                            ir=np+1-i
                            x1 = elem%cartp(i,1)%x
                            x2 = elem%cartp(i,1)%y
                            call Vmap(Dloc(1,1,ir),x1,x2,myface_no)
                            call Vmap(Drem(1,1,ir),-x1,x2,nbrface_no)
                         end do
                      else if (inbr == swest) then
                         x1 = elem%cartp(1,1)%x
                         x2 = elem%cartp(1,1)%y
                         call Vmap(Dloc(1,1,1),x1,x2,myface_no)
                         call Vmap(Drem(1,1,1),-x1,x2,nbrface_no)
                      else if (inbr == seast) then
                         x1 = elem%cartp(np,1)%x
                         x2 = elem%cartp(np,1)%y
                         call Vmap(Dloc(1,1,1),x1,x2,myface_no)
                         call Vmap(Drem(1,1,1),-x1,x2,nbrface_no)
                      end if
                   else if (nbrface_no == 4) then
                      if (inbr == west) then
                         do j=1,np
                            x1 = elem%cartp(1,j)%x
                            x2 = elem%cartp(1,j)%y
                            call Vmap(Dloc(1,1,j),x1,x2,myface_no)
                            call Vmap(Drem(1,1,j),x2,x1,nbrface_no)
                         end do
                      else if (inbr == swest) then
                         x1 = elem%cartp(1,1)%x
                         x2 = elem%cartp(1,1)%y
                         call Vmap(Dloc(1,1,1),x1,x2,myface_no)
                         call Vmap(Drem(1,1,1),x2,x1,nbrface_no)
                      else if (inbr == nwest) then
                         x1 = elem%cartp(1,np)%x
                         x2 = elem%cartp(1,np)%y
                         call Vmap(Dloc(1,1,1),x1,x2,myface_no)
                         call Vmap(Drem(1,1,1),x2,x1,nbrface_no)
                      end if
                   end if
                end if
                
                elem%desc%rot(irot)%nbr = inbr
                if (rot_type == "covariant") then
                   do i=1,SIZE(elem%desc%rot(irot)%R(:,:,:),3)
                      elem%desc%rot(irot)%R(:,:,i)=covariant_rot(Dloc(:,:,i),Drem(:,:,i))
                   end do
                else if (rot_type == "contravariant") then
                   do i=1,SIZE(elem%desc%rot(irot)%R(:,:,:),3)
                      elem%desc%rot(irot)%R(:,:,i)=contravariant_rot(Dloc(:,:,i),Drem(:,:,i))
                   end do
                end if
                
             endif ! end of a unique rotation
          end do !k loop over neighbors in that direction
       end do !inbr loop
    end if !nrot > 0
    
  end subroutine rotation_init_atomic
  

  subroutine set_corner_coordinates(elem)
    use element_mod,    only : element_t 
    use dimensions_mod, only : ne
 
    type (element_t) :: elem 

    ! Local variables
    integer  i,ie,je,face_no,nn
    real (kind=real_kind)  :: dx,dy, startx, starty

    if (0==ne) call abortmp('Error in set_corner_coordinates: ne is zero')

    ! ========================================
    ! compute cube face coordinates of element
    ! =========================================

    call convert_gbl_index(elem%vertex%number,ie,je,face_no)

    elem%vertex%face_number = face_no 
    dx = (cube_xend-cube_xstart)/ne
    dy = (cube_yend-cube_ystart)/ne

    startx = cube_xstart+ie*dx
    starty = cube_ystart+je*dy

    elem%corners(1)%x = startx
    elem%corners(1)%y = starty
    elem%corners(2)%x = startx+dx
    elem%corners(2)%y = starty
    elem%corners(3)%x = startx+dx
    elem%corners(3)%y = starty+dy
    elem%corners(4)%x = startx   
    elem%corners(4)%y = starty+dy

    do i=1,4
       elem%node_multiplicity(i) = 4
    end do  
    ie = ie + 1
    je = je + 1
    if      (ie ==  1 .and. je ==  1) then 
       elem%node_multiplicity(1) = 3
    else if (ie == ne .and. je ==  1) then 
       elem%node_multiplicity(2) = 3
    else if (ie == ne .and. je == ne) then
       elem%node_multiplicity(3) = 3
    else if (ie ==  1 .and. je == ne) then
       elem%node_multiplicity(4) = 3
    end if  

  end subroutine set_corner_coordinates


  subroutine assign_node_numbers_to_elem(elements, GridVertex)
    use dimensions_mod, only : ne
    use element_mod,    only : element_t
    use control_mod,    only : north, south, east, west, neast, seast, swest, nwest
    use gridgraph_mod,  only : GridVertex_t
    implicit none
    type (element_t), intent(inout)    :: elements(:)
    type (GridVertex_t), intent(in)    :: GridVertex(:)

    type (GridVertex_t)                :: vertex
    integer                            :: connectivity(6*ne*ne, 4)
    integer                            :: nn(4), en(4)
    integer el, i, n, direction
    integer current_node_num, tot_ne
    integer :: start, cnt

    current_node_num = 0
    tot_ne = 6*ne*ne

    if (0==ne)      call abortmp('Error in assign_node_numbers_to_elem: ne is zero')
    if (tot_ne /= SIZE(GridVertex)) call abortmp('Error in assign_node_numbers_to_elem: GridVertex not correct length')

    connectivity = 0 

    do el = 1,tot_ne  
       vertex = GridVertex(el)
       en = 0 
       do direction = 1,8
          cnt = vertex%nbrs_ptr(direction+1) -  vertex%nbrs_ptr(direction) 
          start =  vertex%nbrs_ptr(direction) 

          do i=0, cnt-1
             n = vertex%nbrs(start+i)
             if (n /= -1) then
                nn = connectivity(n,:)
                select case (direction)
                case (north)
                   if (nn(1)/=0) en(4) = nn(1)
                   if (nn(2)/=0) en(3) = nn(2)
                case (south)
                   if (nn(4)/=0) en(1) = nn(4)
                   if (nn(3)/=0) en(2) = nn(3)
                case (east)
                   if (nn(1)/=0) en(2) = nn(1)
                   if (nn(4)/=0) en(3) = nn(4)
                case (west)
                   if (nn(2)/=0) en(1) = nn(2)
                   if (nn(3)/=0) en(4) = nn(3)
                case (neast)
                   if (nn(1)/=0) en(3) = nn(1)
                case (seast)
                   if (nn(4)/=0) en(2) = nn(4)
                case (swest)
                   if (nn(3)/=0) en(1) = nn(3)
                case (nwest)
                   if (nn(2)/=0) en(4) = nn(2)
                end select
             end if
          end do
       end do !direction

       do i=1,4
          if (en(i) == 0) then
             current_node_num = current_node_num + 1
             en(i) = current_node_num
          end if
       end do
       connectivity(el,:) = en
    end do

    if (current_node_num /= (6*ne*ne+2)) then
       call abortmp('Error in assignment of node numbers: Failed Euler test')
    end if
    do el = 1,SIZE(elements)
      elements(el)%node_numbers = connectivity(elements(el)%vertex%number, :)
    end do
  end subroutine assign_node_numbers_to_elem


  ! ================================================
  ! convert_gbl_index:
  !
  ! Convert global element index to cube index
  ! ================================================

  subroutine convert_gbl_index(number,ie,je,face_no)
    use dimensions_mod, only : ne
    integer, intent(in)  :: number
    integer, intent(out) :: ie,je,face_no

    if (0==ne) call abortmp('Error in cube_mod:convert_gbl_index: ne is zero')

    !  inverse of the function:      number = 1 + ie + ne*je + ne*ne*(face_no-1)
    face_no=((number-1)/(ne*ne))+1
    ie=MODULO(number-1,ne)
    je=(number-1)/ne - (face_no-1)*ne

  end subroutine convert_gbl_index
   
  subroutine CubeTopology(GridEdge, GridVertex)
    use params_mod, only : RECURSIVE, SFCURVE
    use control_mod, only: partmethod
    use gridgraph_mod, only : GridEdge_t, GridVertex_t, initgridedge, PrintGridEdge, &
         allocate_gridvertex_nbrs, deallocate_gridvertex_nbrs 
    use dimensions_mod, only : np, ne
    use spacecurve_mod, only :  IsFactorable, genspacecurve
    use control_mod, only : north, south, east, west, neast, seast, swest, nwest
    !-----------------------
    implicit none

    ! Since GridVertex fields must be allocated before calling this, it
    ! must be intent(inout).
!og: is 'target' here necessary?
!GridEdge : changed its 'out' attribute to 'inout'
    type (GridEdge_t),   intent(inout),target     :: GridEdge(:)
    type (GridVertex_t), intent(inout),target     :: GridVertex(:)


    integer,allocatable       :: Mesh(:,:)
    integer,allocatable       :: Mesh2(:,:),Mesh2_map(:,:,:),sfcij(:,:)
    type (GridVertex_t),allocatable        :: GridElem(:,:,:)
    integer                   :: i,j,k,ll,number,irev,ne2,i2,j2,sfc_index
    integer                   :: EdgeWgtP,CornerWgt
    integer                   :: ielem, nedge
    integer                   :: offset, ierr, loc
    logical, allocatable      :: nbrs_used(:,:,:,:)
    

    if (0==ne) call abortmp('Error in CubeTopology: ne is zero')

    allocate(GridElem(ne,ne,nfaces),stat=ierr)
    do k = 1, nfaces
       do j = 1, ne
          do i = 1, ne
             call allocate_gridvertex_nbrs(GridElem(i,j,k))
          end do
       end do
    end do

    if(ierr/=0) then
       call abortmp('error in allocation of GridElem structure')
    end if

    allocate(nbrs_used(ne,ne,nfaces,8))
    nbrs_used = .false.


    number=1
    EdgeWgtP   = np
    CornerWgt = 1
    do k=1,nfaces
       do j=1,ne
          do i=1,ne
             ! ====================================
             ! Number elements
             ! ====================================
             GridElem(i,j,k)%nbrs(:)=0
             GridElem(i,j,k)%nbrs_wgt(:)=0
             GridElem(i,j,k)%nbrs_ptr(:)=0
             GridElem(i,j,k)%nbrs_wgt_ghost(:)=1  ! always this value
             GridElem(i,j,k)%SpaceCurve=0
             GridElem(i,j,k)%number=number 
             number=number+1

          end do
       end do
    end do
    

    !    print *,'CubeTopology: Ne, IsFactorable, IsLoadBalanced : ',ne,IsFactorable(ne),IsLoadBalanced(nelem,npart)

    allocate(Mesh(ne,ne))
    if(IsFactorable(ne)) then
       call GenspaceCurve(Mesh)
       !      call PrintCurve(Mesh) 
    else
       ! find the smallest ne2 which is a power of 2 and ne2>ne
       ne2=2**ceiling( log(real(ne))/log(2d0) )
       if (ne2<ne) call abortmp('Fatel SFC error')

       allocate(Mesh2(ne2,ne2))
       allocate(Mesh2_map(ne2,ne2,2))
       allocate(sfcij(0:ne2*ne2,2))

       call GenspaceCurve(Mesh2)  ! SFC partition for ne2

       ! associate every element on the ne x ne mesh (Mesh)
       ! with its closest element on the ne2 x ne2 mesh (Mesh2)
       ! Store this as a map from Mesh2 -> Mesh in Mesh2_map.
       ! elements in Mesh2 which are not mapped get assigned a value of 0
       Mesh2_map=0
       do j=1,ne
          do i=1,ne
             ! map this element to an (i2,j2) element
             ! [ (i-.5)/ne , (j-.5)/ne ]  = [ (i2-.5)/ne2 , (j2-.5)/ne2 ]
             i2=nint( ((i-.5)/ne)*ne2 + .5 )
             j2=nint( ((j-.5)/ne)*ne2 + .5 )
             if (i2<1) i2=1
             if (i2>ne2) i2=ne2
             if (j2<1) j2=1
             if (j2>ne2) j2=ne2
             Mesh2_map(i2,j2,1)=i
             Mesh2_map(i2,j2,2)=j
          enddo
       enddo

       ! create a reverse index array for Mesh2
       ! k = Mesh2(i,j) 
       ! (i,j) = (sfcij(k,1),sfci(k,2)) 
       do j=1,ne2
          do i=1,ne2
             k=Mesh2(i,j)
             sfcij(k,1)=i
             sfcij(k,2)=j
          enddo
       enddo

       ! generate a SFC for Mesh with the same ordering as the 
       ! elements in Mesh2 which map to Mesh.
       sfc_index=0
       do k=0,ne2*ne2-1
          i2=sfcij(k,1)
          j2=sfcij(k,2)
          i=Mesh2_map(i2,j2,1)
          j=Mesh2_map(i2,j2,2)
          if (i/=0) then
             ! (i2,j2) element maps to (i,j) element
             Mesh(i,j)=sfc_index
             sfc_index=sfc_index+1
          endif
       enddo
#if 0
       print *,'SFC Mapping to non powers of 2,3 used.  Mesh:'  
       do j=1,ne
          write(*,'(99i3)') (Mesh(i,j),i=1,ne)
       enddo
       call PrintCurve(Mesh2) 
#endif
       deallocate(Mesh2)
       deallocate(Mesh2_map)
       deallocate(sfcij)
    endif


    ! -------------------------------------------
    !  Setup the space-filling curve for face 1
    ! -------------------------------------------
    offset=0
    do j=1,ne
       do i=1,ne
          GridElem(i,j,1)%SpaceCurve = offset + Mesh(i,ne-j+1)
       enddo
    enddo

    ! -------------------------------------------
    !  Setup the space-filling curve for face 2
    ! -------------------------------------------
    offset = offset + ne*ne
    do j=1,ne
       do i=1,ne
          GridElem(i,j,2)%SpaceCurve = offset + Mesh(i,ne-j+1)
       enddo
    enddo

    ! -------------------------------------------
    !  Setup the space-filling curve for face 6
    ! -------------------------------------------
    offset = offset + ne*ne
    do j=1,ne
       do i=1,ne
          GridElem(i,j,6)%SpaceCurve = offset + Mesh(ne-i+1,ne-j+1)
       enddo
    enddo

    ! -------------------------------------------
    !  Setup the space-filling curve for face 4
    ! -------------------------------------------
    offset = offset + ne*ne
    do j=1,ne
       do i=1,ne
          GridElem(i,j,4)%SpaceCurve = offset + Mesh(ne-j+1,i)
       enddo
    enddo

    ! -------------------------------------------
    !  Setup the space-filling curve for face 5
    ! -------------------------------------------
    offset = offset + ne*ne
    do j=1,ne
       do i=1,ne
          GridElem(i,j,5)%SpaceCurve = offset + Mesh(i,j)
       enddo
    enddo


    ! -------------------------------------------
    !  Setup the space-filling curve for face 3
    ! -------------------------------------------
    offset = offset + ne*ne
    do j=1,ne
       do i=1,ne
          GridElem(i,j,3)%SpaceCurve = offset + Mesh(i,j)
       enddo
    enddo

    ! ==================
    ! face interiors
    ! ==================
    do k=1,6
       ! setup  SOUTH, WEST, SW neighbors
       do j=2,ne
          do i=2,ne
             nbrs_used(i,j,k,west) = .true.
             nbrs_used(i,j,k,south) = .true.
             nbrs_used(i,j,k,swest) = .true.


             GridElem(i,j,k)%nbrs(west)  = GridElem(i-1,j,k)%number
             GridElem(i,j,k)%nbrs_face(west)  = k
             GridElem(i,j,k)%nbrs_wgt(west)       = EdgeWgtP
             GridElem(i,j,k)%nbrs(south) = GridElem(i,j-1,k)%number
             GridElem(i,j,k)%nbrs_face(south) = k
             GridElem(i,j,k)%nbrs_wgt(south)      = EdgeWgtP
             GridElem(i,j,k)%nbrs(swest) = GridElem(i-1,j-1,k)%number
             GridElem(i,j,k)%nbrs_face(swest) = k
             GridElem(i,j,k)%nbrs_wgt(swest)      = CornerWgt
          end do
       end do

       !  setup EAST, NORTH, NE neighbors
       do j=1,ne-1
          do i=1,ne-1
             nbrs_used(i,j,k,east) = .true.
             nbrs_used(i,j,k,north) = .true.
             nbrs_used(i,j,k,neast) = .true.
             
             GridElem(i,j,k)%nbrs(east)   = GridElem(i+1,j,k)%number
             GridElem(i,j,k)%nbrs_face(east)   = k
             GridElem(i,j,k)%nbrs_wgt(east)        = EdgeWgtP
             GridElem(i,j,k)%nbrs(north)  = GridElem(i,j+1,k)%number
             GridElem(i,j,k)%nbrs_face(north)  = k
             GridElem(i,j,k)%nbrs_wgt(north)       = EdgeWgtP
             GridElem(i,j,k)%nbrs(neast) = GridElem(i+1,j+1,k)%number
             GridElem(i,j,k)%nbrs_face(neast)  = k
             GridElem(i,j,k)%nbrs_wgt(neast)       = CornerWgt
          end do
       end do

       ! Setup the remaining SOUTH, EAST, and SE neighbors
       do j=2,ne
          do i=1,ne-1
             nbrs_used(i,j,k,south) = .true.
             nbrs_used(i,j,k,east) = .true.
             nbrs_used(i,j,k,seast) = .true.
             
             
             
             GridElem(i,j,k)%nbrs(south)  = GridElem(i,j-1,k)%number
             GridElem(i,j,k)%nbrs_face(south)  = k
             GridElem(i,j,k)%nbrs_wgt(south)       = EdgeWgtP
             GridElem(i,j,k)%nbrs(east)   = GridElem(i+1,j,k)%number
             GridElem(i,j,k)%nbrs_face(east)   = k
             GridElem(i,j,k)%nbrs_wgt(east)        = EdgeWgtP
             GridElem(i,j,k)%nbrs(seast)  = GridElem(i+1,j-1,k)%number
             GridElem(i,j,k)%nbrs_face(seast)  = k
             GridElem(i,j,k)%nbrs_wgt(seast)       = CornerWgt
          enddo
       enddo

       ! Setup the remaining NORTH, WEST, and NW neighbors
       do j=1,ne-1
          do i=2,ne
             nbrs_used(i,j,k,north) = .true.
             nbrs_used(i,j,k,west) = .true.
             nbrs_used(i,j,k,nwest) = .true.
             
             
             
             GridElem(i,j,k)%nbrs(north)  = GridElem(i,j+1,k)%number
             GridElem(i,j,k)%nbrs_face(north)  = k
             GridElem(i,j,k)%nbrs_wgt(north)       = EdgeWgtP
             GridElem(i,j,k)%nbrs(west)   = GridElem(i-1,j,k)%number
             GridElem(i,j,k)%nbrs_face(west)   = k
             GridElem(i,j,k)%nbrs_wgt(west)        = EdgeWgtP
             GridElem(i,j,k)%nbrs(nwest)  = GridElem(i-1,j+1,k)%number
             GridElem(i,j,k)%nbrs_face(nwest)  = k
             GridElem(i,j,k)%nbrs_wgt(nwest)       = CornerWgt
          enddo
       enddo
    end do

    ! ======================
    ! west/east "belt" edges
    ! ======================

    do k=1,4
       do j=1,ne
          nbrs_used(1,j,k,west) = .true.
          nbrs_used(ne,j,k,east) = .true.
          
          
          GridElem(1 ,j,k)%nbrs(west) = GridElem(ne,j,MODULO(2+k,4)+1)%number
          GridElem(1 ,j,k)%nbrs_face(west) = MODULO(2+k,4)+1
          GridElem(1 ,j,k)%nbrs_wgt(west)  = EdgeWgtP
          GridElem(ne,j,k)%nbrs(east) = GridElem(1 ,j,MODULO(k  ,4)+1)%number
          GridElem(ne,j,k)%nbrs_face(east) = MODULO(k  ,4)+1
          GridElem(ne,j,k)%nbrs_wgt(east)  = EdgeWgtP

          !  Special rules for corner 'edges'
          if( j /= 1) then
             nbrs_used(1,j,k,swest) = .true.
             nbrs_used(ne,j,k,seast) = .true.
             
             
             GridElem(1 ,j,k)%nbrs(swest)   = GridElem(ne,j-1,MODULO(2+k,4)+1)%number
             GridElem(1 ,j,k)%nbrs_face(swest)   = MODULO(2+k,4)+1
             GridElem(1 ,j,k)%nbrs_wgt(swest)        = CornerWgt
             GridElem(ne,j,k)%nbrs(seast)   = GridElem(1 ,j-1,MODULO(k  ,4)+1)%number
             GridElem(ne,j,k)%nbrs_face(seast)   = MODULO(k  ,4)+1
             GridElem(ne,j,k)%nbrs_wgt(seast)        = CornerWgt
          endif
          if( j /= ne) then
             nbrs_used(1,j,k,nwest) = .true.
             nbrs_used(ne,j,k,neast) = .true.
             
             
             GridElem(1 ,j,k)%nbrs(nwest)   = GridElem(ne,j+1,MODULO(2+k,4)+1)%number
             GridElem(1 ,j,k)%nbrs_face(nwest)   = MODULO(2+k,4)+1
             GridElem(1 ,j,k)%nbrs_wgt(nwest)        = CornerWgt
             GridElem(ne,j,k)%nbrs(neast)   = GridElem(1 ,j+1,MODULO(k  ,4)+1)%number
             GridElem(ne,j,k)%nbrs_face(neast)   = MODULO(k  ,4)+1
             GridElem(ne,j,k)%nbrs_wgt(neast)        = CornerWgt
          endif
       end do
    end do


    ! ==================================
    ! south edge of 1 / north edge of 5
    ! ==================================

    do i=1,ne
       nbrs_used(i,1,1,south) = .true.
       nbrs_used(i,ne,5,north) = .true.
              
       GridElem(i,1 ,1)%nbrs(south) = GridElem(i,ne,5)%number
       GridElem(i,1 ,1)%nbrs_face(south) = 5
       GridElem(i,1 ,1)%nbrs_wgt(south)      = EdgeWgtP
       GridElem(i,ne,5)%nbrs(north) = GridElem(i,1 ,1)%number
       GridElem(i,ne,5)%nbrs_face(north) = 1
       GridElem(i,ne,5)%nbrs_wgt(north)      = EdgeWgtP

       !  Special rules for corner 'edges'
       if( i /= 1) then
          nbrs_used(i,1,1,swest) = .true.
          nbrs_used(i,ne,5,nwest) = .true.
          
          GridElem(i,1 ,1)%nbrs(swest)    = GridElem(i-1,ne,5)%number
          GridElem(i,1 ,1)%nbrs_face(swest)    = 5
          GridElem(i,1 ,1)%nbrs_wgt(swest)         = CornerWgt
          GridElem(i,ne,5)%nbrs(nwest)    = GridElem(i-1,1 ,1)%number
          GridElem(i,ne,5)%nbrs_face(nwest)    = 1
          GridElem(i,ne,5)%nbrs_wgt(nwest)         = CornerWgt
       endif
       if( i /= ne) then
          nbrs_used(i,1,1,seast) = .true.
          nbrs_used(i,ne,5,neast) = .true.
          
          GridElem(i,1 ,1)%nbrs(seast)    = GridElem(i+1,ne,5)%number
          GridElem(i,1 ,1)%nbrs_face(seast)    = 5
          GridElem(i,1 ,1)%nbrs_wgt(seast)         = CornerWgt
          GridElem(i,ne,5)%nbrs(neast)    = GridElem(i+1,1 ,1)%number
          GridElem(i,ne,5)%nbrs_face(neast)    = 1
          GridElem(i,ne,5)%nbrs_wgt(neast)         = CornerWgt
       endif

    end do

    ! ==================================
    ! south edge of 2 / east edge of 5
    ! ==================================

    do i=1,ne
       irev=ne+1-i
       nbrs_used(i,1,2,south) = .true.
       nbrs_used(ne,i,5,east) = .true.
       
       
       GridElem(i,1 ,2)%nbrs(south) = GridElem(ne,irev,5)%number
       GridElem(i,1 ,2)%nbrs_face(south) = 5
       GridElem(i,1 ,2)%nbrs_wgt(south)      = EdgeWgtP
       GridElem(ne,i,5)%nbrs(east)  = GridElem(irev,1 ,2)%number
       GridElem(ne,i,5)%nbrs_face(east)  = 2
       GridElem(ne,i,5)%nbrs_wgt(east)       = EdgeWgtP

       !  Special rules for corner 'edges'
       if( i /= 1) then
          nbrs_used(i,1,2,swest) = .true.
          nbrs_used(ne,i,5,seast) = .true.
          
          
          GridElem(i,1 ,2)%nbrs(swest) = GridElem(ne,irev+1,5)%number
          GridElem(i,1 ,2)%nbrs_face(swest) = 5
          GridElem(i,1 ,2)%nbrs_wgt(swest)      = CornerWgt
          GridElem(ne,i,5)%nbrs(seast) = GridElem(irev+1,1 ,2)%number
          GridElem(ne,i,5)%nbrs_face(seast) = 2
          GridElem(ne,i,5)%nbrs_wgt(seast)      = CornerWgt
       endif
       if(i /= ne) then
          nbrs_used(i,1,2,seast) = .true.
          nbrs_used(ne,i,5,neast) = .true.
          
          
          GridElem(i,1 ,2)%nbrs(seast)   = GridElem(ne,irev-1,5)%number
          GridElem(i,1 ,2)%nbrs_face(seast)   = 5
          GridElem(i,1 ,2)%nbrs_wgt(seast)        = CornerWgt
          GridElem(ne,i,5)%nbrs(neast)   = GridElem(irev-1,1 ,2)%number
          GridElem(ne,i,5)%nbrs_face(neast)   = 2
          GridElem(ne,i,5)%nbrs_wgt(neast)        = CornerWgt
       endif
    enddo
    ! ==================================
    ! south edge of 3 / south edge of 5
    ! ==================================

    do i=1,ne
       irev=ne+1-i
       nbrs_used(i,1,3,south) = .true.
       nbrs_used(i,1,5,south) = .true.
       
       GridElem(i,1,3)%nbrs(south) = GridElem(irev,1,5)%number
       GridElem(i,1,3)%nbrs_face(south) = 5
       GridElem(i,1,3)%nbrs_wgt(south)      = EdgeWgtP
       GridElem(i,1,5)%nbrs(south) = GridElem(irev,1,3)%number
       GridElem(i,1,5)%nbrs_face(south) = 3
       GridElem(i,1,5)%nbrs_wgt(south)      = EdgeWgtP

       !  Special rules for corner 'edges'
       if( i /= 1) then
          nbrs_used(i,1,3,swest) = .true.
          nbrs_used(i,1,5,swest) = .true.
          
          
          GridElem(i,1,3)%nbrs(swest) = GridElem(irev+1,1,5)%number
          GridElem(i,1,3)%nbrs_face(swest) = 5
          GridElem(i,1,3)%nbrs_wgt(swest)      = CornerWgt
          GridElem(i,1,5)%nbrs(swest) = GridElem(irev+1,1,3)%number
          GridElem(i,1,5)%nbrs_face(swest) = 3
          GridElem(i,1,5)%nbrs_wgt(swest)      = CornerWgt
       endif
       if(i /= ne) then
          nbrs_used(i,1,3,seast) = .true.
          nbrs_used(i,1,5,seast) = .true.
          
          GridElem(i,1,3)%nbrs(seast)    = GridElem(irev-1,1,5)%number
          GridElem(i,1,3)%nbrs_face(seast)    = 5
          GridElem(i,1,3)%nbrs_wgt(seast)         = CornerWgt
          GridElem(i,1,5)%nbrs(seast)    = GridElem(irev-1,1,3)%number
          GridElem(i,1,5)%nbrs_face(seast)    = 3
          GridElem(i,1,5)%nbrs_wgt(seast)         = CornerWgt
       endif
    end do

    ! ==================================
    ! south edge of 4 / west edge of 5
    ! ==================================

    do i=1,ne
       irev=ne+1-i
       nbrs_used(i,1,4,south) = .true.
       nbrs_used(1,i,5,west) = .true.
       
       GridElem(i,1,4)%nbrs(south) = GridElem(1,i,5)%number
       GridElem(i,1,4)%nbrs_face(south) = 5
       GridElem(i,1,4)%nbrs_wgt(south)      = EdgeWgtP
       GridElem(1,i,5)%nbrs(west)  = GridElem(i,1,4)%number
       GridElem(1,i,5)%nbrs_face(west)  = 4
       GridElem(1,i,5)%nbrs_wgt(west)       = EdgeWgtP
       !  Special rules for corner 'edges'
       if( i /= 1) then
          nbrs_used(i,1,4,swest) = .true.
          nbrs_used(1,i,5,swest) = .true.
          
          GridElem(i,1,4)%nbrs(swest)    = GridElem(1,i-1,5)%number
          GridElem(i,1,4)%nbrs_face(swest)    = 5
          GridElem(i,1,4)%nbrs_wgt(swest)         = CornerWgt
          GridElem(1,i,5)%nbrs(swest)    = GridElem(i-1,1,4)%number
          GridElem(1,i,5)%nbrs_face(swest)    = 4
          GridElem(1,i,5)%nbrs_wgt(swest)         = CornerWgt
       endif
       if( i /= ne) then
          nbrs_used(i,1,4,seast) = .true.
          nbrs_used(1,i,5,nwest) = .true.
          
          GridElem(i,1,4)%nbrs(seast) = GridElem(1,i+1,5)%number
          GridElem(i,1,4)%nbrs_face(seast) = 5
          GridElem(i,1,4)%nbrs_wgt(seast)      = CornerWgt
          GridElem(1,i,5)%nbrs(nwest) = GridElem(i+1,1,4)%number
          GridElem(1,i,5)%nbrs_face(nwest) = 4
          GridElem(1,i,5)%nbrs_wgt(nwest)      = CornerWgt
       endif
    end do

    ! ==================================
    ! north edge of 1 / south edge of 6
    ! ==================================

    do i=1,ne
       nbrs_used(i,ne,1,north) = .true.
       nbrs_used(i,1,6,south) = .true.
       
       
       GridElem(i,ne,1)%nbrs(north) = GridElem(i,1 ,6)%number
       GridElem(i,ne,1)%nbrs_face(north) = 6
       GridElem(i,ne,1)%nbrs_wgt(north)      = EdgeWgtP
       GridElem(i,1 ,6)%nbrs(south) = GridElem(i,ne,1)%number
       GridElem(i,1 ,6)%nbrs_face(south) = 1
       GridElem(i,1 ,6)%nbrs_wgt(south)      = EdgeWgtP
       !  Special rules for corner 'edges'
       if( i /= 1) then
          nbrs_used(i,ne,1,nwest) = .true.
          nbrs_used(i,1,6,swest) = .true.
          
          GridElem(i,ne,1)%nbrs(nwest) = GridElem(i-1,1 ,6)%number
          GridElem(i,ne,1)%nbrs_face(nwest) = 6
          GridElem(i,ne,1)%nbrs_wgt(nwest)      = CornerWgt
          GridElem(i,1 ,6)%nbrs(swest) = GridElem(i-1,ne,1)%number
          GridElem(i,1 ,6)%nbrs_face(swest) = 1
          GridElem(i,1 ,6)%nbrs_wgt(swest)      = CornerWgt
       endif
       if( i /= ne) then
          nbrs_used(i,ne,1,neast) = .true.
          nbrs_used(i,1,6,seast) = .true.
          
          
          GridElem(i,ne,1)%nbrs(neast) = GridElem(i+1,1 ,6)%number
          GridElem(i,ne,1)%nbrs_face(neast) = 6
          GridElem(i,ne,1)%nbrs_wgt(neast)      = CornerWgt
          GridElem(i,1 ,6)%nbrs(seast) = GridElem(i+1,ne,1)%number
          GridElem(i,1 ,6)%nbrs_face(seast) = 1
          GridElem(i,1 ,6)%nbrs_wgt(seast)      = CornerWgt
       endif
    end do

    ! ==================================
    ! north edge of 2 / east edge of 6
    ! ==================================

    do i=1,ne
       nbrs_used(i,ne,2,north) = .true.
       nbrs_used(ne,i,6,east ) = .true.
       
       GridElem(i,ne,2)%nbrs(north) = GridElem(ne,i,6)%number
       GridElem(i,ne,2)%nbrs_face(north) = 6
       GridElem(i,ne,2)%nbrs_wgt(north)      = EdgeWgtP
       GridElem(ne,i,6)%nbrs(east)  = GridElem(i,ne,2)%number
       GridElem(ne,i,6)%nbrs_face(east)  = 2
       GridElem(ne,i,6)%nbrs_wgt(east)       = EdgeWgtP
       !  Special rules for corner 'edges'
       if( i /= 1) then
          nbrs_used(i,ne,2,nwest) = .true.
          nbrs_used(ne,i,6,seast) = .true.
          
          GridElem(i,ne,2)%nbrs(nwest)    = GridElem(ne,i-1,6)%number
          GridElem(i,ne,2)%nbrs_face(nwest)    = 6
          GridElem(i,ne,2)%nbrs_wgt(nwest)         = CornerWgt
          GridElem(ne,i,6)%nbrs(seast)    = GridElem(i-1,ne,2)%number
          GridElem(ne,i,6)%nbrs_face(seast)    = 2
          GridElem(ne,i,6)%nbrs_wgt(seast)         = CornerWgt
       endif
       if( i /= ne) then
          nbrs_used(i,ne,2,neast) = .true.
          nbrs_used(ne,i,6,neast) = .true.
          
          
          GridElem(i,ne,2)%nbrs(neast) = GridElem(ne,i+1,6)%number
          GridElem(i,ne,2)%nbrs_face(neast) = 6
          GridElem(i,ne,2)%nbrs_wgt(neast)      = CornerWgt
          GridElem(ne,i,6)%nbrs(neast) = GridElem(i+1,ne,2)%number
          GridElem(ne,i,6)%nbrs_face(neast) = 2
          GridElem(ne,i,6)%nbrs_wgt(neast)      = CornerWgt
       endif
    end do

    ! ===================================
    ! north edge of 3 / north edge of 6
    ! ===================================

    do i=1,ne
       irev=ne+1-i
       nbrs_used(i,ne,3,north) = .true.
       nbrs_used(i,ne,6,north) = .true.
       
       GridElem(i,ne,3)%nbrs(north) = GridElem(irev,ne,6)%number
       GridElem(i,ne,3)%nbrs_face(north) = 6
       GridElem(i,ne,3)%nbrs_wgt(north)      = EdgeWgtP
       GridElem(i,ne,6)%nbrs(north) = GridElem(irev,ne,3)%number
       GridElem(i,ne,6)%nbrs_face(north) = 3
       GridElem(i,ne,6)%nbrs_wgt(north)      = EdgeWgtP
       !  Special rules for corner 'edges'
       if( i /= 1) then
          nbrs_used(i,ne,3,nwest) = .true.
          nbrs_used(i,ne,6,nwest) = .true.
          
          GridElem(i,ne,3)%nbrs(nwest) = GridElem(irev+1,ne,6)%number
          GridElem(i,ne,3)%nbrs_face(nwest) = 6
          GridElem(i,ne,3)%nbrs_wgt(nwest)      = CornerWgt
          GridElem(i,ne,6)%nbrs(nwest) = GridElem(irev+1,ne,3)%number
          GridElem(i,ne,6)%nbrs_face(nwest) = 3
          GridElem(i,ne,6)%nbrs_wgt(nwest)      = CornerWgt
       endif
       if( i /= ne) then
          nbrs_used(i,ne,3,neast) = .true.
          nbrs_used(i,ne,6,neast) = .true.
          
          GridElem(i,ne,3)%nbrs(neast) = GridElem(irev-1,ne,6)%number
          GridElem(i,ne,3)%nbrs_face(neast) = 6
          GridElem(i,ne,3)%nbrs_wgt(neast)      = CornerWgt
          GridElem(i,ne,6)%nbrs(neast) = GridElem(irev-1,ne,3)%number
          GridElem(i,ne,6)%nbrs_face(neast) = 3
          GridElem(i,ne,6)%nbrs_wgt(neast)      = CornerWgt
       endif
    end do

    ! ===================================
    ! north edge of 4 / west edge of 6
    ! ===================================

    do i=1,ne
       irev=ne+1-i
       nbrs_used(i,ne,4,north) = .true.
       nbrs_used(1,i,6,west) = .true.
       
       GridElem(i,ne,4)%nbrs(north) = GridElem(1,irev,6)%number
       GridElem(i,ne,4)%nbrs_face(north) = 6
       GridElem(i,ne,4)%nbrs_wgt(north)      = EdgeWgtP
       GridElem(1,i,6)%nbrs(west)   = GridElem(irev,ne,4)%number
       GridElem(1,i,6)%nbrs_face(west)   = 4
       GridElem(1,i,6)%nbrs_wgt(west)        = EdgeWgtP
       !  Special rules for corner 'edges'
       if( i /= 1) then
          nbrs_used(i,ne,4,nwest) = .true.
          nbrs_used(1,i,6,swest) = .true.
          
          GridElem(i,ne,4)%nbrs(nwest) = GridElem(1,irev+1,6)%number
          GridElem(i,ne,4)%nbrs_face(nwest) = 6
          GridElem(i,ne,4)%nbrs_wgt(nwest)      = CornerWgt
          GridElem(1,i,6)%nbrs(swest)  = GridElem(irev+1,ne,4)%number
          GridElem(1,i,6)%nbrs_face(swest)  = 4
          GridElem(1,i,6)%nbrs_wgt(swest)       = CornerWgt
       endif
       if( i /= ne) then
          nbrs_used(i,ne,4,neast) = .true.
          nbrs_used(1,i,6,nwest) = .true.
          
          GridElem(i,ne,4)%nbrs(neast) = GridElem(1,irev-1,6)%number
          GridElem(i,ne,4)%nbrs_face(neast) = 6
          GridElem(i,ne,4)%nbrs_wgt(neast)      = CornerWgt
          GridElem(1,i,6)%nbrs(nwest)  = GridElem(irev-1,ne,4)%number
          GridElem(1,i,6)%nbrs_face(nwest)  = 4
          GridElem(1,i,6)%nbrs_wgt(nwest)       = CornerWgt
       endif
    end do
    

    ielem = 1                       ! Element counter
    do k=1,6
       do j=1,ne
          do i=1,ne
             GridVertex(ielem)%nbrs_ptr(1) = 1
             do ll=1,8
                loc =  GridVertex(ielem)%nbrs_ptr(ll)
                if (nbrs_used(i,j,k,ll)) then
                   GridVertex(ielem)%nbrs(loc)       = GridElem(i,j,k)%nbrs(ll)
                   GridVertex(ielem)%nbrs_face(loc)  = GridElem(i,j,k)%nbrs_face(ll)
                   GridVertex(ielem)%nbrs_wgt(loc)       = GridElem(i,j,k)%nbrs_wgt(ll)
                   GridVertex(ielem)%nbrs_wgt_ghost(loc) = GridElem(i,j,k)%nbrs_wgt_ghost(ll)

                   GridVertex(ielem)%nbrs_ptr(ll+1) = GridVertex(ielem)%nbrs_ptr(ll)+1
                else
                   GridVertex(ielem)%nbrs_ptr(ll+1) = GridVertex(ielem)%nbrs_ptr(ll)
                end if
             end do
             GridVertex(ielem)%number     = GridElem(i,j,k)%number
             GridVertex(ielem)%processor_number  = 0
             GridVertex(ielem)%SpaceCurve = GridElem(i,j,k)%SpaceCurve
             ielem=ielem+1
          end do
       end do
    end do

    DEALLOCATE(Mesh)
     do k = 1, nfaces
       do j = 1, ne
          do i = 1, ne
             call deallocate_gridvertex_nbrs(GridElem(i,j,k))
          end do
       end do
    end do
    DEALLOCATE(GridElem)
    DEALLOCATE(nbrs_used)

#if 0
    if(OutputFiles) then
       close(7)
       close(8)
    endif
#endif

    ! =======================================
    ! Generate cube graph...
    ! =======================================

#if 0
    if(OutputFiles) then
       write(9,*)nelem,2*nelem      ! METIS requires this first line
    endif
#endif

    ! ============================================
    !  Setup the Grid edges (topology independent)
    ! ============================================
    call initgridedge(GridEdge,GridVertex)

    ! ============================================
    !  Setup the Grid edge Indirect addresses
    !          (topology dependent)
    ! ============================================
    nedge = SIZE(GridEdge)
    do i=1,nedge
       call CubeSetupEdgeIndex(GridEdge(i))
    enddo

  end subroutine CubeTopology




  ! =======================================
  ! cube_assemble:
  !
  ! Assemble the cube field element by element
  ! this routine is assumed to be single 
  ! threaded...
  ! =======================================

  function cube_assemble(gbl,fld,elem,par,nelemd,nelem,ielem) result(ierr)
    use element_mod, only : element_t

#ifdef _MPI
    use parallel_mod, only : parallel_t, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_STATUS_SIZE, MPI_REAL8,MPI_TAG
#else
    use parallel_mod, only : parallel_t
#endif
    real (kind=real_kind) :: gbl(:,:,:,:)    ! global output field 
    real (kind=real_kind) :: fld(:,:,:)      ! local model field  
    type (element_t)      :: elem            ! element to assemble 
    type (parallel_t)     :: par             ! parallel structure 
    integer               :: nelemd          ! number of elements on the node
    integer               :: nelem           ! number of elements on the node
    integer               :: ielem           ! local element ctr 
    integer               :: ierr            ! returned error code

    ! Local variables

    integer :: ie,je,face_no
    integer :: ibase,jbase
    integer :: i,j,k
    integer :: elem_number

    integer :: ne1,ne2    ! element dimensions
    integer :: n1,n2      ! gbl face dimensions
    integer :: nface      ! number of faces (must be 6)
    integer :: nlyr       ! number of layers

#if defined(_MPI)
    integer :: ectr       ! global element counter
    integer tag
    integer :: count      ! w/o "::", triggers PGI 3.1 F90 bug 
    integer pe
    integer status(MPI_STATUS_SIZE)
    integer mpi_err
#endif      

    call abortmp('Because convert_gbl_index is not used cube_assemble is broken. ')
    ne1   = SIZE(fld,1)
    ne2   = SIZE(fld,2)
    nlyr  = SIZE(fld,3)

    n1    = SIZE(gbl,1)
    n2    = SIZE(gbl,2)
    nface = SIZE(gbl,3)

    ! =========================
    ! Enforce certain rules...
    ! =========================

    ierr=0

    if (MODULO(n1,ne1) /= 0) then
       ierr=-1
       return
    end if

    if (MODULO(n2,ne2) /= 0) then 
       ierr=-2
       return
    end if

    if (nface /= 6) then
       ierr=-3
       return
    end if

    ! =========================================================
    ! Perform global assembly procedure element by element ...
    ! =========================================================

    if (par%rank==par%root) then

       if (ielem<=nelemd) then
          elem_number = elem%vertex%number

          call convert_gbl_index(elem_number,ie,je,face_no)
          if (face_no /= elem%vertex%face_number) call abortmp('Error in getting face number')

          ibase=ie*ne1
          jbase=je*ne2

          do k=1,nlyr
             do j=1,ne2
                do i=1,ne1
                   gbl(i+ibase,j+jbase,face_no,k)=fld(i,j,k)
                end do
             end do
          end do
       end if

#if defined(_MPI)
       if (ielem==nelemd) then
          ectr=nelemd
          do while(ectr<nelem)
             pe    = MPI_ANY_SOURCE
             tag   = MPI_ANY_TAG
             count = ne1*ne2*nlyr
             call MPI_RECV(fld(1,1,1),   &
                  count,        &
                  MPI_REAL8,    &
                  pe,           &
                  tag,          &  
                  par%comm,     &
                  status,       &
                  mpi_err) 

             elem_number = status(MPI_TAG)
             ! call convert_gbl_index(elem_number,ie,je,face_no)
             call abortmp('Because convert_gbl_index is not used for neghbors, the _MPI version needs to be fixed')

             ibase=ie*ne1
             jbase=je*ne2

             do k=1,nlyr
                do j=1,ne2
                   do i=1,ne1
                      gbl(i+ibase,j+jbase,face_no,k)=fld(i,j,k)
                   end do
                end do
             end do

             ectr=ectr+1
          end do
       end if

    else

       pe    = par%root
       tag   = elem%vertex%number
       count = ne1*ne2*nlyr
       call MPI_SEND(fld(1,1,1),    &
            count,         &
            MPI_REAL8,     &
            pe,            &
            tag,           &
            par%comm,      &
            mpi_err)
#endif
    end if

  end function cube_assemble

  ! ===================================================================
  ! CubeEdgeCount:
  !
  !  Determine the number of Grid Edges
  !
  ! ===================================================================

  function CubeEdgeCount()  result(nedge)
    use dimensions_mod, only     : ne
    implicit none
    integer                     :: nedge

    if (0==ne) call abortmp('Error in CubeEdgeCount: ne is zero')
    nedge = nfaces*(ne*ne*nInnerElemEdge - nCornerElemEdge)

  end function CubeEdgeCount

  ! ===================================================================
  ! CubeElemCount:
  !
  !  Determine the number of Grid Elem
  !
  ! ===================================================================

  function CubeElemCount()  result(nelem)

    use dimensions_mod, only     : ne

    implicit none
    integer                     :: nelem
    if (0==ne) call abortmp('Error in CubeElemCount: ne is zero')

    nelem = nfaces*ne*ne
  end function CubeElemCount

  subroutine CubeSetupEdgeIndex(Edge)
    use gridgraph_mod, only : gridedge_t
    use dimensions_mod, only : np
    use control_mod, only : north, south, east, west, neast, seast, swest, nwest
    type (GridEdge_t),target           :: Edge

    integer                            :: np0,sFace,dFace
    logical                            :: reverse
    integer,allocatable                :: forwardV(:), forwardP(:)
    integer,allocatable                :: backwardV(:), backwardP(:)
    integer                            :: i,ii

    ii=Edge%tail_face

    !map to correct location - for now all on same nbr side have same wgt, so take the first one
    ii = Edge%tail%nbrs_ptr(ii)
  
    np0 = Edge%tail%nbrs_wgt(ii)

    sFace = Edge%tail_face
    dFace = Edge%head_face
    ! Do not reverse the indices
    reverse=.FALSE.

    ! Under special conditions use index reversal
    if(       (SFace == south .AND. dFace == east)  &
         .OR. (sFace == east  .AND. dFace == south) &
         .OR. (sFace == north .AND. dFace == west)  &
         .OR. (sFace == west  .AND. dFace == north) &
         .OR. (sFace == south .AND. dFace == south) &
         .OR. (sFace == north .AND. dFace == north) &
         .OR. (sFace == east  .AND. dFace == east ) &
         .OR. (sFace == west  .AND. dFace == west ) ) then
       reverse=.TRUE.
       Edge%reverse=.TRUE.
    endif


  end subroutine CubeSetupEdgeIndex

! 
!  HOMME mapping from sphere (or other manifold) to reference element
!  one should be able to add any mapping here.  For each new map, 
!  an associated dmap() routine (which computes the map derivative matrix) 
!  must also be written
!  Note that for conservation, the parameterization of element edges must be
!  identical for adjacent elements.  (this is violated with HOMME's default
!  equi-angular cubed-sphere mapping for non-cubed sphere grids, hence the 
!  need for a new map)
!
  function ref2sphere_double(a,b, elem) result(sphere)         
    use element_mod, only : element_t
    type (element_t)      :: elem
    real(kind=real_kind)    :: a,b
    type (spherical_polar_t)      :: sphere

    if (cubed_sphere_map==0) then
       sphere = ref2sphere_equiangular_double(a,b,elem%corners,elem%facenum)
    elseif (cubed_sphere_map==1) then
!       sphere = ref2sphere_gnomonic_double(a,b,corners,face_no)
    elseif (cubed_sphere_map==2) then
       sphere = ref2sphere_elementlocal_double(a,b,elem)
    else
       call abortmp('ref2sphere_double(): bad value of cubed_sphere_map')
    endif
  end function

  function ref2sphere_longdouble(a,b, elem) result(sphere)         
    use element_mod, only : element_t
    type (element_t)      :: elem
    real(kind=longdouble_kind)    :: a,b
    type (spherical_polar_t)      :: sphere

    if (cubed_sphere_map==0) then
       sphere = ref2sphere_equiangular_longdouble(a,b,elem%corners,elem%facenum)
    elseif (cubed_sphere_map==1) then
!       sphere = ref2sphere_gnomonic_longdouble(a,b,corners,face_no)
    elseif (cubed_sphere_map==2) then
       sphere = ref2sphere_elementlocal_longdouble(a,b,elem)
    else
       call abortmp('ref2sphere_double(): bad value of cubed_sphere_map')
    endif
  end function



!
! map a point in the referece element to the sphere
!
  function ref2sphere_equiangular_double(a,b, corners, face_no) result(sphere)         
    implicit none
    real(kind=real_kind)    :: a,b
    integer,intent(in)            :: face_no
    type (spherical_polar_t)      :: sphere
    type (cartesian2d_t)          :: corners(4)
    ! local
    real(kind=real_kind)               :: pi,pj,qi,qj
    type (cartesian2d_t)                 :: cart   

    ! map (a,b) to the [-pi/2,pi/2] equi angular cube face:  x1,x2
    ! a = gp%points(i)
    ! b = gp%points(j)
    pi = (1-a)/2
    pj = (1-b)/2
    qi = (1+a)/2
    qj = (1+b)/2
    cart%x = pi*pj*corners(1)%x &
         + qi*pj*corners(2)%x &
         + qi*qj*corners(3)%x &
         + pi*qj*corners(4)%x 
    cart%y = pi*pj*corners(1)%y &
         + qi*pj*corners(2)%y &
         + qi*qj*corners(3)%y &
         + pi*qj*corners(4)%y 
    ! map from [pi/2,pi/2] equ angular cube face to sphere:   
    sphere=projectpoint(cart,face_no)

  end function ref2sphere_equiangular_double




!
! map a point in the referece element to the sphere
!
  function ref2sphere_equiangular_longdouble(a,b, corners, face_no) result(sphere)         
    implicit none
    real(kind=longdouble_kind)    :: a,b
    integer,intent(in)            :: face_no
    type (spherical_polar_t)      :: sphere
    type (cartesian2d_t)          :: corners(4)
    ! local
    real(kind=real_kind)               :: pi,pj,qi,qj
    type (cartesian2d_t)                 :: cart   

    ! map (a,b) to the [-pi/2,pi/2] equi angular cube face:  x1,x2
    ! a = gp%points(i)
    ! b = gp%points(j)
    pi = (1-a)/2
    pj = (1-b)/2
    qi = (1+a)/2
    qj = (1+b)/2
    cart%x = pi*pj*corners(1)%x &
         + qi*pj*corners(2)%x &
         + qi*qj*corners(3)%x &
         + pi*qj*corners(4)%x 
    cart%y = pi*pj*corners(1)%y &
         + qi*pj*corners(2)%y &
         + qi*qj*corners(3)%y &
         + pi*qj*corners(4)%y 
    ! map from [pi/2,pi/2] equ angular cube face to sphere:   
    sphere=projectpoint(cart,face_no)

  end function ref2sphere_equiangular_longdouble



!-----------------------------------------------------------------------------------------
! ELEMENT LOCAL MAP (DOES NOT USE CUBE FACES)
! unlike gnomonic equiangular map, this map will map all straight lines to
! great circle arcs
!
! map a point in the referece element to the quad on the sphere by a
! general map, without using faces the map works this way: first, fix
! a coordinate (say, X). Map 4 corners of the ref element (corners are
! (-1,-1),(-1,1),(1,1), and (1,-1)) into 4 X-components of the quad in
! physical space via a bilinear map. Do so for Y and Z components as
! well. It produces a map: Ref element (\xi, \eta) ---> A quad in XYZ
! (ess, a piece of a twisted plane) with vertices of our target quad.  though
! the quad lies in a plane and not on the sphere manifold, its
! vertices belong to the sphere (by initial conditions). The last step
! is to utilize a map (X,Y,X) --> (X,Y,Z)/SQRT(X**2+Y**2+Z**2) to
! project the quad to the unit sphere.
! -----------------------------------------------------------------------------------------
  function ref2sphere_elementlocal_double(a,b, elem) result(sphere)
    use element_mod, only : element_t
    implicit none
    real(kind=real_kind)    :: a,b
    type (element_t) :: elem
    type (spherical_polar_t)      :: sphere
    real(kind=real_kind)               ::  q(4) ! local

    q(1)=(1-a)*(1-b); q(2)=(1+a)*(1-b); q(3)=(1+a)*(1+b); q(4)=(1-a)*(1+b);
    q=q/4.0d0;
    sphere=ref2sphere_elementlocal_q(q,elem%corners3D)
  end function 
  function ref2sphere_elementlocal_longdouble(a,b, elem) result(sphere)
    use element_mod, only : element_t
    implicit none
    real(kind=longdouble_kind)    :: a,b
    type (element_t) :: elem
    type (spherical_polar_t)      :: sphere
    real(kind=real_kind)               ::  q(4) ! local

    q(1)=(1-a)*(1-b); q(2)=(1+a)*(1-b); q(3)=(1+a)*(1+b); q(4)=(1-a)*(1+b);
    q=q/4.0d0;
    sphere=ref2sphere_elementlocal_q(q,elem%corners3D)
  end function 

  function ref2sphere_elementlocal_q(q, corners) result(sphere)
    implicit none
    real(kind=real_kind)          :: q(4)
    type (spherical_polar_t)      :: sphere
    type (cartesian3d_t)          :: corners(4)
    ! local
    type (cartesian3d_t)                 :: cart   
    real(kind=real_kind)               ::  c(3,4),  xx(3), r
    integer :: i

!3D corners fo the quad
    c(1,1)=corners(1)%x;  c(2,1)=corners(1)%y;  c(3,1)=corners(1)%z; 
    c(1,2)=corners(2)%x;  c(2,2)=corners(2)%y;  c(3,2)=corners(2)%z; 
    c(1,3)=corners(3)%x;  c(2,3)=corners(3)%y;  c(3,3)=corners(3)%z; 
    c(1,4)=corners(4)%x;  c(2,4)=corners(4)%y;  c(3,4)=corners(4)%z; 

!physical point on a plane (sliced), not yet on the sphere
    do i=1,3
      xx(i)=sum(c(i,:)*q(:))
    enddo

!distance from the plane point to the origin
    r=sqrt(xx(1)**2+xx(2)**2+xx(3)**2)

!projecting the plane point to the sphere
    cart%x=xx(1)/r; cart%y=xx(2)/r; cart%z=xx(3)/r;

!XYZ coords of the point to lon/lat
    sphere=change_coordinates(cart)

  end function 



end module cube_mod



