; ; See also ~/timegcm/tgcm23/save/speedup.pro ;-------------------------------------------------------------------- pro addtext,text,xstart,ystart ;xstart = .5 ;ystart = .9 spacedown = .03 ntext = n_elements(text) ;print,'addtext: ntext=',ntext if (ntext eq 0) then return for i=1,ntext do begin xyouts,xstart,ystart-(i-1)*spacedown,text(i-1),/normal,$ charsize=1.2 ; print,'addtext: i=',i,' text(i-1)=',text(i-1) endfor end ;-------------------------------------------------------------------- ; ; Main: pro perf,ps=ps ; ; Set device (ps or x) ; if (keyword_set(ps)) then begin set_plot,'ps' device,file='perf.ps' ips = 1 endif else begin set_plot,'x' ips = 0 endelse ; ; Plot nprocs on x-axis, wall-clock time on y-axis: ; ;- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ; spval = 9999. niter = 480 envvars = [ $ " _DSM_PLACEMENT=ROUND_ROBIN",$ " _DSM_VERBOSE=ON ",$ " _DSM_MIGRATION=OFF ",$ " _DSM_BARRIER=FOP ",$ " _DSM_WAIT=SPIN ",$ " MP_SUGNUMTHD=OFF ",$ " MPC_GANG=OFF ",$ " TRAP_FPE=UNDERFL=FLUSH_ZERO;OVERFL=MAX;",$ " DIVZERO=ABORT,TRACE;INVALID=ABORT,TRACE"] ; ; x-axis explanation: xdesc = [ $ "OpenMP: n_threads ",$ "MPI : n_tasks ",$ "Hybrid: ntasks (nodes) x ncpus/node (threads)"] pmethod = [ $ "CRAY/J90 : OpenMP only",$ "Compaq/ES40: Hybrid MPI+OpenMP",$ "IBM/WH2 : MPI only",$ "SGI/O2K : OpenMP only"] nruns_sgi = 7 nruns_ibm = 7 nruns_cray = 7 nruns_compaq = 3 ;xtitle='Number of threads (SGI) or tasks (IBM)' xtitle='Total number of processes' ;xrange = [0,25] xrange = [0,30] ; ; x = number of cpus, y = wall-clock (secs) ;x_sgi = [1, 2, 4, 6, 9, 12, 18] ;y_sgi = [3971., 2441., 1332., 1071., 895., 803., 791.] ;x_ibm = [1, 2, 4, 6, 9, 12, 18] ;y_ibm = [3662., 2116., 1590., 1494., 1240., 1077., 962.] ;ytitle='Wall clock time (seconds)' ;yrange = [0,4000] ; ; x = number of cpus, y = average wallclock secs per step x_sgi = [1, 2, 4, 6, 9, 12, 18] y_sgi = [8.3, 5.0, 2.8, 2.2, 1.8, 1.7, 1.6] x_ibm = [1, 2, 4, 6, 9, 12, 18] y_ibm = [7.6, 4.4, 3.3, 3.1, 2.6, 2.2, 2.0] x_cray = [1, 2, 4, 6, 9, 12, 18] y_cray = [21., 11.4, 8.2, 7.3, 3.5, 3.2, 3.1] x_compaq = [8, 12, 24] y_compaq = [4.1, 2.7, 2.0] ytitle='Average wallclock seconds per simulated time step' yrange = [0,10] ;- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ; chsize = 1.2 if (keyword_set(ps)) then device,font_size=10.,xsize=19.,ysize=20. plot,x_sgi,y_sgi,yrange=yrange,xrange=xrange,title=$ 'TIEGCM (tgcm14a): (' + $ string(format="(i3)",niter) + ' 3-minute timesteps)',charsize=chsize,$ xcharsize=chsize, ycharsize=chsize,xtitle=xtitle,ytitle=ytitle,$ psym=-4,max_value=spval/10.,$ position=[.1,.25,.95,.95],/norm oplot,x_ibm,y_ibm,psym=-6,max_value=spval/10. oplot,x_cray,y_cray,psym=-5,max_value=spval/10. oplot,x_compaq,y_compaq,psym=-2,max_value=spval/10. ;oplot,[0,20],[0,20],linestyle=2 xyouts,x_sgi(nruns_sgi-1)+0.5,y_sgi(nruns_sgi-1),'SGI/O2K' xyouts,x_ibm(nruns_ibm-1)+0.5,y_ibm(nruns_ibm-1),'IBM/WH2' xyouts,x_cray(nruns_cray-1)+0.5,y_cray(nruns_cray-1),'CRAY/J90' xyouts,x_compaq(nruns_compaq-1)+0.5,y_compaq(nruns_compaq-1),'Compaq/ES40' addtext,pmethod,.5,.9 addtext,xdesc,.36,.15 ; ; Close ps file: ; if (keyword_set(ps)) then begin device,/close endif end