#!/usr/bin/perl use Getopt::Long; # # Given an executable, and 1 or more namelist read input files, submit multiple # runs under either LSF (e.g., bluevista or lightning), or LL (e.g., bluesky). # If the system is not running LL or LSF, or interactive runs are requested, # this script will attempt simple "command-line" executions of $exec. # See the "multiinp" command for assistance creating multiple namelist # read files for input to this script. # my $wallclock = "00:30"; # 30-minute default wall clock limit my $nosubmit = 0; # default is to submit the jobs my $nnodes = 1; # default number of nodes (LL only) my $ntasks = 4; # default total number of MPI tasks my $interactive = 0; # default is to submit to batch system my $serial = 0; # default is parallel jobs (not serial) my $sequential = 0; # default is simultaeneous jobs (not sequential) &GetOptions( "x|exec=s" => \$exec, "bn|basename=s" => \$basename, "wc|wallclock=s" => \$wallclock, "q|queue=s" => \$queue, "nn|nnodes=i" => \$nnodes, "nt|ntasks=i" => \$ntasks, "ns|nosubmit" => \$nosubmit, "ia|interactive" => \$interactive, "ser|serial" => \$serial, "seq|sequential" => \$sequential, "h|help" => \$help, ) or die "\n>>> $0: Error parsing options.\nPlease execute \"$0 -h\" for a usage message.\n\n"; # if ($help) { usage(); } # provide usage if help was requested # # User must provide an executable: # if (! $exec) { print "\n>>> $0: Please provide an executable file.\n"; print "(Use the command \"$0 -h\" for a usage message)\n\n"; exit; } else { $exec = glob($exec); } if (! -x $exec) { print "\n>>> $0: Executable $exec must have execution permission.\n"; print "(Use the command \"$0 -h\" for a usage message)\n\n"; exit; } # # Remaining args are input files (user must provide at least one): # if (! @ARGV) { print "\n>>> $0: Please provide at least one namelist input file.\n"; print "(Use the command \"$0 -h\" for a usage message)\n\n"; exit; } # # Save input file names in @infiles: # $njobs = 0; while (@ARGV) { push @infiles, shift @ARGV; $njobs++; } # while @ARGV # # Get nodename (may be needed to set #BSUB -a): # $nodename = `uname -n`; chop $nodename; # # Determine batch system type and set submit command accordingly: # # LSF: # $LoadShare = $ENV{"LSF_ENVDIR"}; if ($LoadShare) { # LSF system $batchsys = "LSF"; $submitcommand = "bsub < "; if (! $basename) { $basename = "LSF"; } if (! $queue) { $queue = "regular"; } print " Batch system: Load Sharing Facility ($batchsys)\n"; # if ($interactive and ! $serial) { print "\n>>> Sorry, I cannot execute interactive parallel jobs under LSF\n"; exit; } # # Loadleveler: # } elsif (-d "/usr/lpp/LoadL") { # LoadLeveler system $batchsys = "LL"; $submitcommand = "llsubmit "; if (! $basename) { $basename = "LL"; } if (! $queue) { $queue = "com_rg8"; } print " Batch system: LoadLeveler ($batchsys)\n"; # # Unknown batch system -> attempt to execute interactively: # } else { # unknown batch system if (! $interactive) { print "\n$0: cannot determine batch system:\n"; print "I can submit to either Load Sharing Facility (LSF), or LoadLeveler (LL):\n"; print " Is not Load Share Facility (LSF), because env var LSF_ENVDIR is not set\n"; print " Is not LoadLeveler because directory /usr/lpp/LoadL does not exist.\n\n"; print "\nWill attempt direct execution of $exec..\n\n"; } $interactive = 1; # force interactive when batch system is unknown if (! $basename) { $basename = "multi"; } } if ($ntasks eq 1) { $serial = 1; } # # Report options to stdout: # print "\n$0:\n"; print " Exec file: $exec\n"; print " Njobs: $njobs\n"; print " Input files: @infiles\n"; print " Submit command: \"$submitcommand\"\n"; print " Base name: \"$basename\"\n"; print " Wallclock: \"$wallclock\"\n"; print " Queue: \"$queue\"\n"; print " Nnodes: \"$nnodes\"\n"; print " Ntasks: \"$ntasks\"\n"; print " Serial: $serial\n"; print " Nosubmit: $nosubmit\n"; print " Interactive: $interactive\n"; print " Sequential: $sequential\n"; # # If submitting batch under LL and sequential is requested, build single # LoadLeveler job script with multiple foreground executions, submit the # job, and exit: # if ($batchsys eq "LL" and $sequential and ! $interactive) { $job_csh = mkjob_LLseq($exec,$queue,$wallclock,$nnodes,$ntasks, $serial,$njobs); print "Made LL job script $job_csh with $njobs executions of $exec.\n"; if (! $nosubmit) { $command = "$submitcommand $job_csh"; $stat = system($command); if ($stat) { die ">>> Error executing \"$command\"\n"; } print "Submitted job script $job_csh to LoadLeveler at `date`\n"; } exit; } # # Loop through input files: # $i = 1; print "\n"; foreach $infile (@infiles) { # # Make batch job script if not interactive: # if (! $interactive) { $job_csh = mkjob($batchsys,$exec,$infile,$queue,$wallclock,$nnodes,$ntasks, $serial,$interactive,$sequential,$i); print"Input file $i: $infile Job script: $job_csh\n"; } else { print"Input file $i: $infile\n"; } # # Submit current job (unless nosubmit is set): # if (! $nosubmit) { # # Submit to batch system: if (! $interactive) { # submit batch job $command = "$submitcommand $job_csh"; $stat = system($command); if ($stat) { die ">>> Error executing \"$command\"\n"; } # # Execute interactively. If sequential option is set, run interactive # jobs in foreground, otherwise run in background. # } else { # make interactive run $ii = &mknum($i); my $outfile = "${basename}${ii}.out"; # stdout of model run sleep 1; # add small time space between interactive runs # # Start non-sequential interactive jobs in background: if (! $sequential) { # non-sequential jobs $command = "csh -c \"$exec < $infile >&! $outfile &\""; print "Starting interactive exec of $exec in background (job $i)..\n", $stat = system($command); # # Execute sequential interactive jobs in foreground: } else { # sequential jobs $command = "csh -c \"$exec < $infile >&! $outfile\""; print "Executing $exec interactively in foreground", " to force sequential execution (job $i)..\n"; $stat = system($command); } # sequential interactive or not } # interactive or not } # not nosubmit # # Increment job counter: # $i++; } # foreach $infile exit; # #------------------------------------------------------------------------- sub mkjob { # # Build LL or LSF batch job script for current job: # my ($batchsys,$exec,$infile,$queue,$wallclock,$nnodes,$ntasks,$serial, $ia,$sequential,$ijob) = @_; # my $jobscript = "${basename}_$ijob.job"; # job script returned by this function my $outfile = "${basename}_$ijob.out"; # stdout of model run my $jobname = "${basename}_$ijob"; # step or job name passed to batch sys $prev_job = $ijob - 1; $prev_jobname = "${basename}_$prev_job"; # open (JOBSCRIPT,"> $jobscript") || die "mkjob: cannot open file $file"; # # - - - - - - - - - - - - Build LoadLeveler job script - - - - - - - - - - if ($batchsys eq "LL") { # Load Leveler # # Default parallel job: # my $LL_node = "# @ node = $nnodes"; my $LL_ntasks = "# @ total_tasks = $ntasks"; my $LL_nodeusage = "# @ node_usage = not_shared"; my $LL_jobtype = "# @ job_type = parallel"; my $LL_network = "# @ network.MPI = csss,shared,us"; # # Serial job: # if ($serial) { $LL_jobtype = "# @ job_type = serial"; $LL_node = ""; $LL_ntasks = ""; $LL_nodeusage = ""; $LL_network = ""; if ($queue eq "com_rg8") { $queue = "share"; } } # print JOBSCRIPT <&! $outfile || \\ echo ">>> Execution of $exec FAILED at `date`" && echo "See output in $outfile" EOF # # - - - - - - - - - - - - - - Build LSF job script - - - - - - - - - - - - } elsif ($batchsys eq "LSF") { # # Default parallel job: # $bsub_a = "poe"; # bluevista if ($nodename =~ "ln*") { $bsub_a = "mpich_gm"; } # lightning my $LSF_jobtype = "#BSUB -a $bsub_a"; my $LSF_ntasks = "#BSUB -n $ntasks"; my $LSF_nodeusage = "#BSUB -x"; # # Serial job if $ntasks==1: # if ($serial eq 1) { $LSF_jobtype = ""; $LSF_ntasks = ""; $LSF_nodeusage = ""; $LSF_dependency = ""; } # # Add dependency on previous job (#BSUB -w) if sequential runs are requested: # my $LSF_dependency= ""; if ($sequential and $ijob > 1) { $LSF_dependency= "#BSUB -w done($prev_jobname)"; } # print JOBSCRIPT <&! $outfile || \\ echo ">>> Execution (mpirun.lsf) of $exec FAILED at `date`" && echo "See output in $outfile" EOF1 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - } else { # unknown batch system -> main program will attempt # interactive "command-line" execution of $exec, so return; # return silently } # batch sys # # Return executable csh script: # chmod 0755, $jobscript; return $jobscript; } # mkjob #------------------------------------------------------------------------- sub mkjob_LLseq { # # Make batch job script for sequential LL runs. # This will be a single-step LL job, with multiple foreground executions. # Note wallclock limit is 6 hours for the *total* job (if 6 hour limit is # needed for *each* job, you must use an LSF system). # my ($exec,$queue,$wallclock,$nnods,$ntasks,$serial,$njobs) = @_; my $jobscript = "${basename}.job"; # job script returned by this function my $stepname = $jobscript; my $output = "${basename}.out"; # script output my $LL_node = "# @ node = $nnodes"; my $LL_ntasks = "# @ total_tasks = $ntasks"; my $LL_nodeusage = "# @ node_usage = not_shared"; my $LL_jobtype = "# @ job_type = parallel"; my $LL_network = "# @ network.MPI = csss,shared,us"; # # Serial job if $ntasks==1: # if ($serial) { $LL_jobtype = "# @ job_type = serial"; $LL_node = ""; $LL_ntasks = ""; $LL_nodeusage = ""; $LL_network = ""; if ($queue eq "com_rg8") { $queue = "share"; } } open (JOBSCRIPT,"> $jobscript") || die "mkjob_LLseq: cannot open file $file"; print JOBSCRIPT <&! $outfile || \\ echo ">>> Execution of $exec FAILED at `date`" && exit echo "Completed execution of job $ijob at `date`" EOF $ijob++; } # foreach infile chmod 0755, $jobscript; return $jobscript; } #------------------------------------------------------------------------- # sub usage { die <