#! /bin/csh # # Run make and execute model (if EXEC not set, do not execute): # Assume source and fixed object files are already in cwd (see tgcm.setup) # This script requires that $inp be the model input file. # # 4/10/02: replacing all exits with "goto end_of_script", so # error output can be returned to user by loadleveler csh executable. # set return_code = 0 echo " " set date = `date` echo "Enter tgcm.exec $date" # # Check ncpus: # (ncpus not used under aix -- loadleveler does this # for aix) # if ($OS != 'aix') then if (! $?ncpus) then set ncpus = 1 echo "Set default ncpus = $ncpus" echo "(If you want to run multi-tasked, please set ncpus > 1)" endif echo tgcm.exec\: ncpus = $ncpus endif # # Set sgi/irix env vars: # Can use setenv TRAP_FPE "${TRAP_FPE};OVERFL=MAX,TRACE" # to set overflows to max real value (1.79e309). This # is esp useful for S4 calculation in chapmn.f. # Module purge is recommended to clear modules loaded # by .cshrc or elsewhere -- i.e. to get a "clean slate". # It is important load MIPSpro first: # if ($OS == 'irix') then echo "OS = $OS" echo "Executing module purge followed by " echo " module load MIPSpro mpt nqe" module purge module load MIPSpro mpt nqe # setenv OMP_NUM_THREADS $ncpus # number of cpus (from user script) # # Use TRAP_FPE to trap and/or count, trace, and replace # floating point exceptions (see man fsigfpe): # # setenv TRAP_FPE "OFF" # setenv TRAP_FPE "ALL=COUNT" # setenv TRAP_FPE setenv TRAP_FPE "${TRAP_FPE}UNDERFL=FLUSH_ZERO" setenv TRAP_FPE "${TRAP_FPE};OVERFL=MAX" setenv TRAP_FPE "${TRAP_FPE};DIVZERO=ABORT,TRACE" setenv TRAP_FPE "${TRAP_FPE};INVALID=ABORT,TRACE" # # Below are recommended by the scd doc "Getting started on ute" # (however, _DSM_MUSTRUN is not good for time-gcm) # setenv _DSM_MUSTRUN # setenv _DSM_VERBOSE ON setenv _DSM_PLACEMENT ROUND_ROBIN setenv _DSM_MIGRATION OFF setenv MPC_GANG OFF setenv MP_SUGNUMTHD OFF setenv _DSM_BARRIER FOP setenv _DSM_WAIT SPIN # # Report env vars: echo " " echo "DSM, MP, and TRAP Environment Vars:" env | grep OMP_NUM_THREADS env | grep DSM env | grep MP_ env | grep MPC_ env | grep TRAP # # Set Cray/unicos env vars: # else if ($OS == 'unicos') then setenv NCPUS $ncpus setenv OMP_NUM_THREADS $ncpus # number of cpus (from user script) echo "Setenv NCPUS to ncpus=$ncpus" echo "Setenv OMP_NUM_THREADS to ncpus=$ncpus" # # Traceback and core options (see REPRIEVE man page): setenv TRACEBK2 1 # get a traceback setenv TRBKVEC 1 # enhanced traceback setenv TRBKCORE 1 # get both traceback and core dump echo "TRACEBK2 = $TRACEBK2 TRBKVEC = $TRBKVEC TRBKCORE = $TRBKCORE" # # 7/21/99: Load new module with OpenMP fix (Crays only) module load cf90.3.2.1.0 # # Set IBM AIX env vars (some may be done by loadleveler # if batch job). Note: if batch, these should also be set # in the loadleveler exec script. # else if ($OS == 'aix') then if ($?LOADLBATCH) then echo "loadleveler batch job under $OS" setenv MP_LABELIO YES setenv MP_PGMMODEL SPMD setenv MP_SHARED_MEMORY YES else # interactive (blackforest max MP_PROCS=4) setenv MP_PROCS $ncpus setenv MP_RMPOOL 1 setenv MP_TASKS_PER_NODE 4 setenv MP_PGMMODEL SPMD setenv MP_LABELIO YES setenv MP_SHARED_MEMORY YES endif echo "MP env under ${OS}:" env | grep MP_ # # Increasing data size is necessary only for time-gcm: limit data unlimited echo "OS=$OS set data unlimited" # # Compaq osf (or tru-64): # else if ($OS == 'osf') then # # For all omp (can run only single node (4 procs) on compaq): # setenv OMP_NUM_THREADS $ncpus # echo "Set OMP_NUM_THREADS $ncpus under OS $OS" # # For hybrid omp/mpi, set both ncpn and OMP_NUM_THREADS # For mpi only, set ncpn, but do not set OMP_NUM_THREADS # # Set number of threads equal to number of processors per node: # (see also -c4 in prun command at execution below) # (Also set this for mpi only) set ncpn = 4 # number of cpus per node (also used in prun below) echo "Set ncpn = $ncpn (number of cpus per node) under OS $OS" # # Set num threads for omp: # setenv OMP_NUM_THREADS $ncpn # echo "Set OMP_NUM_THREADS $ncpn under OS $OS" # # Shared-mem Sun (e.g., k2): else if ($OS == 'SunOS') then setenv OMP_NUM_THREADS $ncpus echo "Set OMP_NUM_THREADS $ncpus under OS $OS" # # Stacksize (kb) should be divisible by 512. 12800 works for # tgcm14 with 12 threads (default on Sun is not enough): setenv STACKSIZE 12800 echo "Set STACKSIZE $STACKSIZE under OS $OS" else echo "Note: no env vars set for OS $OS" endif # # fft991.f must not be used if on unicos. # (multi-tasked unicos prefers the lib version) # if ($OS == 'unicos') then if (-e fft991.f) then echo "Removing fft991.f (lib version will be used under $OS)" rm fft991.f endif if (-e fft991.F) then echo "Removing fft991.F (lib version will be used under $OS)" rm fft991.F endif endif # # If the file "rmfile" exists, remove files listed in rmfile. # if (-e rmfile) then echo " " echo "Remove file rmfile exists (list of files to remove):" echo "Listing of rmfile:" ls -l rmfile echo "Contents of rmfile:" cat rmfile set err = 0 echo "Removing files listed in rmfile.." rm -f `cat rmfile` || set err = 1 if ($err == 0) then echo "Remove of files listed in rmfile was successful." else echo "WARNING: error removing files listed in rmfile." endif endif # # Include Makefile unless user did: # (must use goto to avoid conflict with endif's in the # gnu Makefile) # if (-e Makefile) then echo "Will use user-provided Makefile:" /bin/ls -l Makefile goto gotmakefile else INCLUDE -h $TGCMROOT/bld/Makefile endif gotmakefile: # # Make object list and dependencies for makefile: # INCLUDE -h $TGCMROOT/bld/mkdep echo Executing mkdep ... perl mkdep || goto fail_mkdep # # Compile new/modified code and load executable: # (note executable is always tgcm*x) # (get separate ja report for make) # echo " " echo Making executable ... set exec = tgcm.${OS} # this should be the default target in Makefile if ($OS == 'unicos') then # # Can use multiple processors NPROC on unicos make # (NPROC will not work with gnumake) # Use keywords imbedded in the gnu Makefile to parse # out unicos flags, and the generic targets and rules. # set err = 0 sed -n '/CRAY_begin/,/CRAY_end/p' Makefile >! Makefile.cray || set err = 1 sed -n '/EXEC_begin/,/EXEC_end/p' Makefile >> Makefile.cray || set err = 1 mv Makefile.cray Makefile ja if ($err == 0) then set nproc = 3 echo "OS = ${OS}: using make NPROC=$nproc" make -f Makefile NPROC=$nproc || goto fail_make else echo ">>> NOTE: OS=${OS}, but Makefile parse failed." echo " Cannot use NPROC. Will use gnumake..." gnumake || goto fail_make endif echo "Completed successful make under OS $OS" ja -st else if ($OS == 'osf') then # # Compile on compaq: timex gmake || goto fail_make else if ($OS == 'SunOS') then set err = 0 sed -n "/${OS}_begin/,/${OS}_end/p" Makefile >! Makefile.${OS} \ || set err = 1 sed -n "/EXEC_begin/,/EXEC_end/p" Makefile >> Makefile.${OS} \ || set err = 1 if ($err == 1) then echo ">>> Error parsing Makefile for OS $OS" goto fail_make endif echo "Executing make -f Makefile.${OS} EXEC=$exec OS=$OS..." make -f Makefile.${OS} EXEC=$exec OS=$OS || goto fail_make else # irix or aix timex gnumake || goto fail_make echo "Completed successful make under OS $OS" endif if (! -e $exec) then echo "WARNING: exec file $exec does not exist (check Makefile targets)" else set cwd = `pwd` echo "cwd: $cwd Exec file: $exec" ls -l $exec endif # # 3/00: copy executable to $execdir even if EXEC not set. # This permits aix loadleveler job to find the executable # in the execution job step. (input file is put in # execdir by user step 2 loadleveler job before execution) # 2/15/01: Clean out exec dir even if EXEC not set. This is for # ibm jobs, where EXEC is not set, but execution is done in # exec job step. # #if (! $?EXEC) then # if (-d $execdir) then # if ($execdir != $blddir ) then # rm $execdir/* # remove any pre-existing files # echo "Cleaned out execdir $execdir." # endif # set err = 0 # cp $exec $execdir || set err = 1 # if ($err == 0) then # echo "Copied executable $exec to exec directory $execdir" # else # echo ">>> WARNING: could not copy $exec to $execdir" # endif # endif ## ## Clean out build dir if requested: # if ($?postclean_bld && ($?blddir)) then # if (-d $blddir) then # if ($blddir != $execdir) then # see comments in tgcm.setup # cd $execdir # so the blddir directory can be removed w/ -r # rm -fr $blddir # echo "Removed build dir $blddir prior to execution." # endif # endif # endif # if (! $?EXEC) then if (-d $execdir) then set err = 0 cp $exec $execdir || set err = 1 if ($err == 0) then echo "Copied executable $exec to exec directory $execdir" else echo ">>> WARNING: could not copy $exec to $execdir" endif endif if ($?RETURN) then echo Return without executing because EXEC is not set. echo \(see set EXEC in job script\) goto $RETURN else echo Exit without executing because EXEC is not set. echo \(see set EXEC in job script\) endif goto end_of_script endif # not executing # # Execute (Makefile's 1st target must be $exec = tgcm_${CF}${ARCH}.x) # execute: if (! $?execdir) then echo ' ' echo '>>> tgcm.exec: execdir not set.' goto fail_exec endif set cwd = `pwd` if ($cwd != $execdir) then rm $execdir/* # remove any pre-existing files echo "Cleaned out execdir $execdir." cp $exec $execdir || echo ">>> WARNING: could not copy $exec to $execdir" echo "Copied executable $exec to exec directory $execdir" if (-e $job.inp) then cp $job.inp $execdir || \ echo ">>> WARNING: could not copy $job.inp to $execdir" echo "Copied input file $job.inp to exec directory $execdir" else echo ">>> WARNING: cannot find input file $job.inp" endif cd $execdir echo "Moved to exec directory $execdir" endif if (-e HISTCPY) then rm HISTCPY endif if (! $?inp) then echo '>>> WARNING: input file shell var not set.' endif if (! -e $inp) then echo '>>> WARNING: cannot find input file inp = $inp' endif # # Clean out build dir prior to execution if requested: if ($?postclean_bld && ($?blddir)) then if (-d $blddir) then if ($blddir != $execdir) then # see comments in tgcm.setup rm -fr $blddir echo "Removed build dir $blddir prior to execution." endif endif endif # if ($OS == 'unicos') then # unicos has ja and timex ja # echo "Executing timex $exec < $inp under OS $OS.." # timex $exec < $inp || goto fail_exec echo "Executing $exec < $inp under OS $OS.." $exec < $inp || goto fail_exec ja -cdst else if ($OS == 'osf') then # # Request core dump: setenv decfort_dump_flag y echo "Set decfort_dump_flag yes to force core dump." # # MP_STACK_SIZE is stack size in bytes for each thread # (1/01: new qrj stack is about 5.6 MB) # setenv MP_STACK_SIZE 8000000 setenv MP_STACK_SIZE 10000000 echo "Set MP_STACK_SIZE $MP_STACK_SIZE" # # Interactive hybrid runs on Compaq prospect using prun command e.g., # if 8 cpus are requested ($ncpus), use prun -n2 -c4 for 2 nodes # (MPI tasks) and 4 threads per node: # prun -n2 -c4 -B1 -t -v sh -c "tgcm.osf < tgcm14a.inp" >&! tgcm.osf.out & # -B1 means start at node 1 (node 0 is used for i/o, etc) # OMP_NUM_THREADS was set above and should be the same as -c in the prun # command # # ncpn = number of cpus per node (set above). # Determine number of tasks (nodes) required given the requested # number of total processors ncpus: # e.g.: ntasks = (12 total / 4 cpus per node) = 3 tasks # This will be used differently in the prun command depending # on hybrid vs mpi-only (see prun commands below) # @ ntasks = $ncpus / $ncpn if ($ncpus % $ncpn > 0) @ ntasks++ # echo "Execution under OS ${OS}:" echo " ncpus = $ncpus (total number of requested processes)" echo " ncpn = $ncpn (number of cpus per node)" echo " ntasks = $ntasks (number of nodes required)" # # For hybrid mpi/omp: #echo "Executing for hybrid mode on OS $OS with the following prun command:" # echo " prun -n $ntasks -c $ncpn -t -v sh -c $exec < $inp.." # prun -n $ntasks -c $ncpn -t -v sh -c "$exec < $inp" || goto fail_exec # # For mpi only: echo "Executing for mpi only on OS $OS with the following prun command:" echo " prun -n $ncpus -N $ntasks -t -v sh -c $exec < $inp" prun -n $ncpus -N $ntasks -t -v sh -c "$exec < $inp" || goto fail_exec else # IBM-SP aix, or SGI-O2K irix limit data unlimited ulimit -a echo "Executing timex $exec < $inp under OS $OS.." timex $exec < $inp || goto fail_exec endif # # Clean exec dir if necessary: # if ($?postclean_exec) then cd set err = 0 rm -fr $execdir || set err=1 if ($err==1) then echo "WARNING: Error cleaning and removing exec dir $execdir" else echo "Cleaned and removed exec dir $execdir" endif endif # # Return to parent (user) job script if requested: # if ($?RETURN) then goto $RETURN endif goto end_of_script #----------------------------------------------- # # Error traps: # fail_getbin: echo Msread of executable $msread_bin failed set return_code = 1 goto end_of_script # fail_modify: echo Modify failed set return_code = 2 goto end_of_script # fail_mkdep: echo tgcm.exec\: mkdep failed. set return_code = 3 goto end_of_script # fail_make: echo tgcm.exec\: make failed. set return_code = 4 goto end_of_script # fail_exec: echo Model execution failed if ($OS == 'unicos') then if (-e core) then cp core /tmp/$user/core.$$ echo "Copied core file to /tmp/$user/core.$$" endif debugview -c core $exec -B ja -cdst set return_code = 5 endif end_of_script: if ($return_code == 0) then echo "tgcm.exec returning." else echo ">>> tgcm.exec error return_code = $return_code" endif