#! /bin/csh # # This script may be submitted to blackforest using $TGCMROOT/bin/submit. # (as of 11/00, the rcp of output file job step does not work) # # Execute this on IBM SP system (e.g. blackforest or babyblue) # as a csh script (submit command will do this). It will build # LoadLeveler command files for serial build, parallel execution, # and rcp of output file to remotedir. This builds the csh scripts # for each loadleveler step. It then submits the loadleveler job # using llsubmit. # echo " " date cd set home = `pwd` echo "Begin $0 in home directory $home" # # submitdir: Submit directory on execution machine (IBM SP). # (if submitdir is changed here, it must be chaned in loadleveler # job steps below) # remotedir: Machine:path for rcp of outfile to originating workstation. # outfile: Output file name (will be in $submitdir). # set job = tgcm15 # use same for loadleveler job_name below set submitdir = $home/submit set remotedir = vishnu.hao.ucar.edu:tiegcm/$job set outfile = $job.out # echo $submitdir >! $home/submitdir.dat echo $remotedir >! $home/remotedir.dat echo $outfile >! $home/outfile.dat # if (! -d $submitdir) then mkdir -p $submitdir || \ echo "WARNING: error making directory $submitdir" else /usr/bin/rm $submitdir/* echo "Cleaned $submitdir" endif echo "Submit directory = $submitdir" # # Make 2-step loadleveler job script: # echo "Making $submitdir/loadlev.job" cat << 'EOF_LL' >! $submitdir/loadlev.job # # LoadLeveler step 0: make the executable (serial) # # @ job_name = tgcm15 # @ step_name = build_step # @ class = share # It typically takes < 4 mins wallclock to build tgcm15 # (maybe 2+ mins user cpu) # @ wall_clock_limit = 00:05:00 # @ job_type = serial # @ executable = ~/submit/build_step.csh # @ output = ~/submit/$(job_name)_build.out # @ error = ~/submit/$(job_name)_build.out # @ notification = error ## @ account_no = 93300840 # @ queue # # LoadLeveler step 1: execute (parallel) # # @ step_name = exec_step # @ dependency = (build_step == 0) # @ class = com_reg ## @ class = csl_reg ## @ class = share ## @ class = com_pr # Set wall clock limit as low as possible to get through the queue # (wall_clock_limit = hr:min:sec -- 6 hour max) # tgcm15 can take ~30min wallclock per 1-day sim w/ 12 tasks. ## @ wall_clock_limit = 06:00:00 # @ wall_clock_limit = 03:30:00 ## @ wall_clock_limit = 02:00:00 ## @ wall_clock_limit = 00:30:00 ## @ wall_clock_limit = 00:15:00 # @ job_type = parallel # @ network.MPI = css0,shared,us # @ total_tasks = 12 # @ node = 3 # @ executable = ~/submit/exec_step.csh # @ output = ~/submit/$(job_name)_exec.out # @ error = ~/submit/$(job_name)_exec.out # @ ja_report = yes # @ notification = complete ## @ account_no = 93300840 # @ queue # # Step 2: rcp output back to home workstation # (must be interactive class): # # @ step_name = rcp_step # @ dependency = (exec_step == 0) # @ job_type = serial # @ class = interactive # @ wall_clock_limit = 00:00:30 # @ executable = ~/submit/rcp_step.csh # @ output = ~/submit/$(job_name)_rcp.out # @ error = ~/submit/$(job_name)_rcp.out # @ notification = never # @ queue # 'EOF_LL' #------------------------------------------------------------- # # Make csh script for step 0: # echo "Making $submitdir/build_step.csh" cat << 'EOF_BUILD_STEP' >! $submitdir/build_step.csh # set tgcm_version = tgcm15 # model version name #set COMPILE_ALL # recompile all code, not just mods #set EXEC # do NOT execute (execute in next LL job step) #set empty_tempdir # set to empty tempdir before exec #set save_bld # # Setup script establishes working directories (blddir, execdir, and # tempdir), cd's to blddir, and gets fixed source and object code # from mss: # INCLUDE $TGCMROOT/bld/tgcm.setup # # Include any source code to be overlayed on fixed source and recompiled: # #INCLUDE -h $TGCMROOT/tgcm15/*.h $TGCMROOT/tgcm15/*.F #INCLUDE -h ~/tiegcm/tgcm15/mods/*.F ~/tiegcm/tgcm15/mods/*.h #INCLUDE -h ~/tiegcm/tgcm15/mods/rmfile # # Exec script builds executable. It executes only if EXEC is set above, # so for AIX, always leave EXEC unset -- model is executed in loadleveler # job step 2 below. # INCLUDE $TGCMROOT/bld/tgcm.exec # 'EOF_BUILD_STEP' #------------------------------------------------------------- echo "Making $submitdir/exec_step.csh" cat << 'EOF_EXEC_STEP' >! $submitdir/exec_step.csh # # Csh script for parallel exec job step 1: # # execdir was set by tgcm.setup set execdir = `cat ~/execdir.dat` if (-d $execdir) then cd $execdir echo "Moved to execdir $execdir" else echo "WARNING: execdir $execdir does not exist." endif # echo " " echo "Begin exec step" set input = tgcm.inp #INCLUDE tgcm15.inp #INCLUDE tgcm15.precons.inp #INCLUDE tgcm15.cons.inp cat << 'EOFINP' >! $input INCLUDE tgcm15.inp 'EOFINP' # # Set poe env vars that are not set by loadleveler commands: # These MP_ env vars are also set in tgcm.exec script, where # they work for interactive runs, but under loadleveler, those # are executed from a different shell, so they have to be repeated # here before execution. # setenv MP_PGMMODEL SPMD echo "set MP_PGMMODEL SPMD" setenv MP_LABELIO YES echo "set MP_LABELIO YES" setenv MP_SHARED_MEMORY YES echo "set MP_SHARED_MEMORY YES" #setenv MP_STDOUTMODE ORDERED #setenv MP_INFOLEVEL 3 # set exec = tgcm.aix # must match exec target in Makefile /usr/bin/ls -l $exec $input echo "Executing $exec < $input ..." date timex $exec < $input || goto fail_exec date echo "Execution of $exec completed normally." exit 0 # # Execution error trap: fail_exec: date echo "Execution of $exec failed." exit 1 'EOF_EXEC_STEP' #------------------------------------------------------------- echo "Making $submitdir/rcp_step.csh" cat << 'EOF_RCP_STEP' >! $submitdir/rcp_step.csh #! /bin/csh # # Cat output from build and exec steps to output file # and send to remote directory (on originating workstation): # set submitdir = `cat ~/submitdir.dat` set outfile = `cat ~/outfile.dat` set remotedir = `cat ~/remotedir.dat` echo " " echo "Begin rcp_step at `date`" echo " submitdir = $submitdir" echo " outfile = $outfile" echo " remotedir = $remotedir" cd $submitdir echo " " echo "rcp_step: Moved to submitdir $submitdir" if (-e $outfile) then rm $outfile echo "Removed pre-existing outfile $outfile" endif echo "ls -l of \*build.out and \*exec.out.." ls -l *build.out *exec.out cat *build.out *exec.out > $outfile || \ echo "Error cat of \*build.out and \*exec.out to $outfile" echo "ls -l of outfile $outfile.." ls -l $outfile # # Run mklogs on the output file: # INCLUDE -h $TGCMROOT/bld/mklogs mklogs $outfile echo "ls -l of all .out files after mklogs.." ls -l *.out # # Do the rcp: # if ($?remotedir) then set err = 0 rcp $outfile $remotedir/$outfile || set err = 1 if ($err == 0) then echo "Successful rcp of $outfile to $remotedir/$outfile" else echo "Error return from the following rcp command:" echo "rcp $outfile $remotedir/$outfile" endif else echo "remotedir not set -- $outfile not sent." endif exit 0 'EOF_RCP_STEP' #------------------------------------------------------------- # # Add execution permission to scripts, in case they are to be # executed from an interactive session. This is not generally # done, however, see comment below. # chmod u+x $submitdir/build_step.csh chmod u+x $submitdir/exec_step.csh chmod u+x $submitdir/rcp_step.csh # # Submit to loadleveler: # # If tgcm15_aix.job.submit is to be executed interactively # (e.g., on command line from ~/submit), it will submit all # 4 job scripts to the queue, and the build step (step 0) # should begin immediately in the shared queue. It will compile # in ~/tgcm15.$$ (with stdout in ~/submit), but will not execute # until the parallel job step (step 1) begins. After the build, # one may either let the exec step go through the queue and # begin, or kill it and the remaining steps (use llcancel), # and execute interactively from the execution dir (typically # under /ptmp/$login). Before executing interactively from # /ptmp, set MP_xxx env vars, as in tgcm.exec. # echo "Submitting $submitdir/loadlev.job to LoadLeveler.." llsubmit $submitdir/loadlev.job exit