#!/usr/bin/perl
#
use Getopt::Long;
use Cwd;
use File::Copy;
#
($progname = $0) =~ s!(.*)/!!;
#$login = getlogin();
$login = getlogin() || (getpwuid($<))[0] || die "Could not get login..\n";
$cwd = getcwd();
#
my $model = tiegcm;   # default model
my $help  = 0;        # default is no help
my $cvs   = 0;
my $noclean = 0;      # do not clean wrkdir on entry
my $batch = 0;        # submit batch job rather than interactive
my $monitor = 0;      # watch for completion status in output
$jobname = "";
&GetOptions(
   "model=s"     => \$model,     # model name (in tgcmroot)
   "tgcmroot:s"  => \$tgcmroot,  # tgcm root directory
   "wrkdir:s"    => \$wrkdir,    # working directory
   "execdir:s"   => \$execdir,   # exec directory
   "input:s"     => \$input,     # optional namelist input file
   "mail:s"      => \$mail,      # report to this email address
   "jobname:s"	 => \$jobname,	 # optional job name (used in output file names)
   "cvs"         => \$cvs,       # if set, do cvs checkout to obtain source
   "noclean"     => \$noclean,   # if set, do not remove pre-existing wrkdir
   "batch"       => \$batch,     # if set, submit batch job
   "monitor"     => \$monitor,   # if set, watch for completion status in output file
   "h|help"      => \$help,
) or usage();
#
if ($help) { usage(); } # provide usage if help was requested
#
# Get OS (if lightning Linux, set $ln):
#
$OS = `uname -s`; chop $OS;
$nodename = `uname -n`; chop $nodename;
$ln = 0; if ($nodename =~ /^ln/) { $ln = 1; }	# is lightning
$bv = 0; if ($nodename =~ /^bv/) { $bv = 1; }	# is bluevista
#
# Get tgcmroot:
#
$tgcmroot_default_hao = "/home/tgcm";
$tgcmroot_default_scd = "/fis/hao/tgcm";
if (! $tgcmroot) {
  if (defined $ENV{TGCMROOT}) { 
    $tgcmroot = $ENV{TGCMROOT}; 
  } elsif (-d $tgcmroot_default_hao) {
    $tgcmroot = $tgcmroot_default_hao;
  } elsif (-d $tgcmroot_default_scd) {
    $tgcmroot = $tgcmroot_default_scd;
  } else {
    die "\n>>> $progname: Cannot determine TGCMROOT directory.\n\n";
  }
}
$tgcmroot = abspath($tgcmroot);
if (! -d "$tgcmroot") {
  die "\n>>> Cannot find tgcmroot directory $tgcmroot\n\n"; } 
#
# Determine tgcmdata dir at hao for default wrkdir:
#
$tgcmdata_default_hao = "/toshi/ftp/pub/tgcm/data";
if (defined $ENV{TGCMDATA}) {
  $tgcmdata = $ENV{TGCMDATA}; 
} elsif (-d $tgcmdata_default_hao) {
  $tgcmdata = $tgcmdata_default_hao;
} else {
  print "Note: Cannot determine TGCMDATA directory -- default to cwd\n";
  $tgcmdata = $cwd;
}
#
# Check for namelist input file:
if ($input) {
  if (! -e $input) { die "\n>>> Could not find input file $input\n"; }
}
#
# Report to stdout:
#
$date = `date`; chop($date);
print "\n$progname beginning $date\n";
print "  tgcmroot = $tgcmroot\n";
print "  model    = $model\n";
print "  OS       = $OS\n";
print "  nodename = $nodename\n";
print "  input    = $input\n";
print "  cvs      = $cvs\n";
print "  login    = $login\n";
print "  cwd      = $cwd\n";
#
# Verify existence of model, source, and script directories:
# (unless cvs checkout has been requested)
#
if (! $cvs) {
  if (! -d "$tgcmroot/$model") {
    die "\n>>> Cannot find root model directory $tgcmroot/$model\n\n"; } 
  if (! -d "$tgcmroot/$model/src") {
    die "\n>>> Cannot find model source directory $tgcmroot/$model/src\n\n"; } 
  if (! -d "$tgcmroot/$model/scripts") {
    die "\n>>> Cannot find model scripts directory $tgcmroot/$model/scripts\n\n"; } 
}
#
# Set up working directory and go there:
# (/ptmp is available on scd lightning Linux (ln), but not on hao Linux)
#
if (! defined $wrkdir or $wrkdir eq "") {        # use default wrkdir
  $wrkdir = "/ptmp/$login/system_tests/$model";
  if ($OS eq 'Linux') {
#   if (! $ln) { $wrkdir = "/vishnu/e/foster/system_tests/$model"; }
    if (! $ln) { $wrkdir = "$tgcmdata/system_tests/$model"; }
  }
}
$wrkdir = abspath($wrkdir);
print "  wrkdir   = $wrkdir\n";
print "  jobname  = $jobname\n\n";
#
# Clean pre-existing wrkdir (if its not the cwd):
#
if (-e $wrkdir and $wrkdir ne '.' and $wrkdir ne $cwd and ! $noclean) {
  print "Clearing wrkdir $wrkdir...\n";
  $stat = system("rm -rf $wrkdir/*");
  if ($stat) { die "$progname: Could not clean wrkdir directory $wrkdir\n\n"; }
}
$stat = system("mkdir -p $wrkdir");
if ($stat) { die "$progname: Could not make directory $wrkdir\n\n"; }
chdir $wrkdir || die "$progname: Could not chdir to directory $wrkdir\n\n";
print "Moved to wrkdir $wrkdir\n";
#
# Do cvs checkout if requested:
#
if ($cvs) {
  $command = "csh -c \"cvs checkout $model\"";
  print "Executing: $command\n";
  $stat = system($command);
  if ($stat) {
    print "\n>>> WARNING: Error from system call executing: $command: stat=$stat\n\n";
  }
  $tgcmroot = $wrkdir;  # model is now in wrkdir (cwd)
} # cvs
#
# Determine model class:
#
SWITCH: {
  if ($model =~ /^tiegcm/)  { $model_class = "tiegcm"  ; last SWITCH; }
  if ($model =~ /^timegcm/) { $model_class = "timegcm" ; last SWITCH; }
  if ($model =~ /^mtgcm/)   { $model_class = "mtgcm"   ; last SWITCH; }
  if ($model =~ /^titan/)   { $model_class = "titan"   ; last SWITCH; }
  if ($model =~ /^glbmean/) { $model_class = "glbmean" ; last SWITCH; }
}
#
# jobpath is directory containing job script to copy:
#
$jobpath = "$tgcmroot/$model/scripts";
#
# Get job script appropriate for model and OS:
#
SWITCH: {
#
# Linux:
  if ($OS eq 'Linux')  {
    $jobscript = $model_class . '-linux.job';
    if ($ln) {
      $file = "$jobpath/$model_class" . "-linux-mpi.job"; # (old job script name)
      if (-e $file) { $jobscript = $model_class . "-linux-mpi.job"; } 
    }
    last SWITCH;
  }
#
# IBM AIX:
  if ($OS eq 'AIX')  {
    $jobscript = $model_class . '-ibm.job';
    last SWITCH;
  }
#
# SGI IRIX:
  if ($OS eq 'IRIX64')  {
    $jobscript = $model_class . '-sgi.job';
    last SWITCH;
  }
  die "Could not determine jobscript name given OS=$OS and model=$model\n\n";
} # SWITCH
#
# Job script must exist:
if (! -e "$jobpath/$jobscript") { 
  die "Could not find job script $jobpath\n\n"; }
#
# Copy job script (we are still in $wrkdir):
copy("$jobpath/$jobscript",$jobscript) ||
  die ">>> Error copying job script $jobpath/$jobscript\n\n";
print "Copied job script $jobpath/$jobscript\n";

&edit_jobscript("$jobscript",$tgcmroot,$model,$input);
chmod 0755, $jobscript || die ">>> Could not chmod 744 $jobscript\n\n";
#
# Execute a default run:
# (csh job script is executed interactively, except for lightning, where bsub is used)
# Stdout from the job script is redirected to $jobscript.out in the wrkdir.
#
$outfile = "$jobscript.out";
if ($jobname) { $outfile = "${jobscript}_$jobname.out"; }
#
$command = "csh -c \"$jobscript >& $outfile\"";
#
# Lightning Linux cluster and IBM bluevista can be run only with LSF bsub batch:
if ($ln) { $command = "csh -c \"bsub < $jobscript\""; }
if ($bv) { $command = "csh -c \"bsub < $jobscript\""; }
#
# Batch jobs on IBM or SGI (cannot do batch under regular Linux):
# If AIX, bluesky uses LoadLeveler (llsubmit), whereas bluevista uses LSF (bsub):
#
if ($batch) {
  if ($OS eq 'AIX') { 
    $command = "csh -c \"llsubmit $jobscript\"";
    if ($bv) { $command = "csh -c \"bsub < $jobscript\""; }
  } elsif ($OS eq 'IRIX64') {
    $command = "csh -c \"qsub $jobscript\"";
  } else {
    print"\nNote: don\'t know how to submit batch job for OS=$OS\n\n";
  }
}
print "\nCurrent directory: $cwd\n";
print "Executing: $command\n";
$stat = system($command); # system() will return on completion of $command
if ($stat) {
  print "\n>>> WARNING $progname: Error from system call executing: $command: stat=$stat\n\n";
}
#
# Watch stdout files for completion status:
#
if (! $ln and ! $batch and $monitor) { &monitor("$wrkdir/$outfile",$model); }
exit;
#-----------------------------------------------------------------------
sub edit_jobscript {
#
# Edit job script to set tgcmroot and model.
#
my ($jobscript, $tgcmroot, $model, $input) = @_ ;
if (! -e $jobscript) { die "\nedit_jobscript: Cannot find jobscript $jobscript\n\n"; }
#
open(INFILE,"< $jobscript") || 
  die "Could not open jobscript $jobscript for reading\n\n";
my $outfile = "$jobscript.new";
open(OUTFILE,"> $outfile") || 
  die "Could not open new jobscript $outfile for writing\n\n";
while (<INFILE>) {
  if (/^[\s*#]set model\s*=/) {                    # model name
    print OUTFILE " set model = $model\n";         
  } elsif (/^[\s*#]set tgcmroot\s*=/) {            # tgcmroot
    print OUTFILE " set tgcmroot = $tgcmroot\n";
  } elsif (/^[\s*#]set input\s*=/ and $input) {               # namelist input
    print OUTFILE " set input = $input\n";
  } elsif (/^[\s*#]set output\s*=/ and $jobname) {            # job output
    print OUTFILE " set output = ${model}_$jobname.out \n";

  } elsif (/^[\s*#]set execdir\s*=/ and $execdir) {            # exec dir
    print OUTFILE " set execdir = $execdir\n";

  } elsif (/^\s*#+BSUB\s*-o/) {                   # LSF output (ln)
    print OUTFILE "#BSUB -o $jobscript.out\n";
  } elsif (/^\s*#+BSUB\s*-e/) {                   # LSF error output (ln)
    print OUTFILE "#BSUB -e $jobscript.out\n";
  } else {
    print OUTFILE $_;
  }
} # while INFILE
close OUTFILE;

rename ($outfile, $jobscript) || 
  die "Could not rename $outfile to $jobscript\nError code: $!\n\n";

print "Rewrote $jobscript to include:\n";
print "  set tgcmroot = $tgcmroot\n";
print "  set model = $model\n";
if ($input) { print "  set input = $input\n"; }

} # end edit_jobscript
#-----------------------------------------------------------------------
sub usage {
  die <<EOF;

SYNOPSIS
  $0 [options]

PURPOSE			Perform a short default test job to build and run the
			requested model.
OPTIONS
  -tgcmroot [path] 	Directory of TGCM root distribution (default \$TGCMROOT).
			(ignored if cvs is set) (\$model should be a subdir)

  -model [name]		Name of requested model 
			(src/ and scripts/ subdirs should be in \$tgcmroot/\$model)

  -wrkdir [path]  	Working directory (will be cleared before test, unless it is cwd)
			Default wrkdir at SCD = /ptmp/\$login/system_tests/\$model
			Default wrkdir at HAO = \$TGCMDATA/system_tests/\$model

  -input [file]		Optional file to use for namelist input

  -cvs			If set, obtain source code and scripts with cvs checkout to wrkdir
			(\$tgcmroot is ignored if cvs is set)

  -noclean		If set, do not clean wrkdir prior to running job script, 
			thereby allowing use of pre-existing execdir, etc.
			Default: noclean not set (clean wrkdir and force build)

  -batch		If set, submit job script to a batch queue

EOF
}
#-----------------------------------------------------------------------
sub abspath {
#
# Convert a pathname into an absolute pathname, expanding any . or .. characters.
# Assumes pathnames refer to a local filesystem.
# Assumes the directory separator is "/".
#
  my ($path) = @_;
  my $cwd = getcwd();  # current working directory
  my $abspath;         # resulting absolute pathname
#
# Strip off any leading or trailing whitespace.  
# (This pattern won't match if there's embedded whitespace.
#
  $path =~ s!^\s*(\S*)\s*$!$1!;
#
# Convert relative to absolute path.
#
  if ($path =~ m!^\.$!) {          # path is "."
      return $cwd;
  } elsif ($path =~ m!^\./!) {     # path starts with "./"
      $path =~ s!^\.!$cwd!;
  } elsif ($path =~ m!^\.\.$!) {   # path is ".."
      $path = "$cwd/..";
  } elsif ($path =~ m!^\.\./!) {   # path starts with "../"
      $path = "$cwd/$path";
  } elsif ($path =~ m!^[^/]!) {    # path starts with non-slash character
      $path = "$cwd/$path";
  }
  my ($dir, @dirs2);
#
# The -1 prevents split from stripping trailing nulls
# This enables correct processing of the input "/".
#
  my @dirs = split "/", $path, -1;   

  my $i;
  # Remove any "" that are not leading.
  for ($i=0; $i<=$#dirs; ++$i) {
      if ($i == 0 or $dirs[$i] ne "") {
          push @dirs2, $dirs[$i];
      }  
  }
  @dirs = ();

  # Remove any "."
  foreach $dir (@dirs2) {
      unless ($dir eq ".") {
          push @dirs, $dir;
      }  
  }
  @dirs2 = ();

  # Remove the "subdir/.." parts.
  foreach $dir (@dirs) {
    if ( $dir !~ /^\.\.$/ ) {
        push @dirs2, $dir;
    } else {
        pop @dirs2;   # remove previous dir when current dir is ..
    }
  }  
  if ($#dirs2 == 0 and $dirs2[0] eq "") { return "/"; }
  $abspath = join '/', @dirs2;
  return( $abspath );
}
#-----------------------------------------------------------------------
sub monitor {
#
# Watch for existence of file $jobout (job script and config output). 
# When found, search $jobout for completion status as determined by 
# searching $jobout for predefined substrings.
#
  my ($jobout,$model) = @_;
#
# Watch for appearance of file $jobout:
# (90 attempts at 10 secs each allows 15 minutes)
#
  $attempts = 1;
  while (! -e $jobout) { 
    print "monitor: Sleeping while watching for $jobout...\n";
    $attempts = $attempts+1;
    if ($attempts > 90) { last; }
    sleep 10; 
  }
  if (! -e $jobout) {
    die "monitor: Could not find file $jobout after $attempts attempts\n";
  }
  $success = -1; $failure = -1; $makefail = -1;
  $attempts = 1;
#
# Check for completion status in $jobout:
# (90 attempts at 10 secs each allows 15 minutes)
#
  CHECKING: while ($success < 0 and $failure < 0 and $makefail < 0) {
    open(OUTFILE,"< $jobout") || die "\nmonitor: Error opening output file $jobout\n";
    while (<OUTFILE>) {
#
# Ordering here may be important:
      if (($makefail = index $_,"Error return from gmake") >= 0) { last CHECKING; }
      if (($failure  = index $_,"FAILED") >= 0)                  { last CHECKING; }
      if (($success  = index $_,"Completed execution") >= 0)     { last CHECKING; }
    }
    close OUTFILE;
    if ($success < 0 and $failure < 0 and $makefail < 0) {
      print "Checking for completion status in $jobout (attempts=$attempts)\n";
      if ($attempts > 90) {
        print "monitor quitting after $attempts attempts..\n";
        last CHECKING;
      }
      $attempts = $attempts+1;
      sleep 10;
    }
  }
#
# Send email to user indicating completion status:
#
  $msgfile = "mail.msg";
  open(MSG,"> $msgfile") || die "\n>>> $0: Cannot open file $msgfile for writing\n";
  $date = `date`; chop $date;
  $os = $OS;
  if ($OS eq 'Linux') { $os = $os . " (ln=" . $ln . ")"; }
  print "$date\n";
#
# Success:
#
  if ($success >= 0) {
    print "\nSuccessful completion status in $jobout\n";
    print MSG <<EOF

$date

System test of model $model succeeded (OS = $os).

See output in $jobout

EOF
#
# Error return by job script (probably model failure):
#
  } elsif ($failure >= 0) {
    print "\n>>> Failure completion status in $jobout\n";
    print MSG <<EOF

$date

System test of model $model failed (OS = $os).

Failure completion status detected in $jobout

EOF
#
# Error in build:
#
  } elsif ($makefail >= 0) {
    print "\n>>> gmake error occurred in $jobout\n";
    print MSG <<EOF

$date

System test of model $model failed in the build (OS = $os).

See gmake error in output file $jobout

EOF
#
# Could not determine completion status:
#
  } else {
    print "\nCould not find completion status in $jobout after $attempts attempts.\n";
    print MSG <<EOF

$date

System test of model $model (OS=$os)

Could not determine completion status of $jobout

EOF
  } # error checks: success, failure, gmake error.
#
# Email message to user:
#
close MSG;
if ($mail) {
  $address = $mail;
} else {
  $address = $login . '@ucar.edu';
}
$cmd = "mail " . $address . " < " . $msgfile;
$stat = system($cmd);
if ($stat) { die "$progname: Error sending mail to $address\n Command: $cmd\n"; }
print "$progname: Sent mail to $address\n";

} # end sub monitor
