/* !-------------------------------------------------------------------------! ! ! ! N A S P A R A L L E L B E N C H M A R K S 3.0 ! ! ! ! J A V A V E R S I O N ! ! ! ! MG ! ! ! !-------------------------------------------------------------------------! ! ! ! This benchmark is a serial/multithreaded version of the ! ! NPB_JAV MG code. ! ! ! ! Permission to use, copy, distribute and modify this software ! ! for any purpose with or without fee is hereby granted. We ! ! request, however, that all derived work reference the NAS ! ! Parallel Benchmarks 3.0. This software is provided "as is" ! ! without express or implied warranty. ! ! ! ! Information on NPB 3.0, including the Technical Report NAS-02-008 ! ! "Implementation of the NAS Parallel Benchmarks in Java", ! ! original specifications, source code, results and information ! ! on how to submit new results, is available at: ! ! ! ! http://www.nas.nasa.gov/Software/NPB/ ! ! ! ! Send comments or suggestions to npb@nas.nasa.gov ! ! ! ! NAS Parallel Benchmarks Group ! ! NASA Ames Research Center ! ! Mail Stop: T27A-1 ! ! Moffett Field, CA 94035-1000 ! ! ! ! E-mail: npb@nas.nasa.gov ! ! Fax: (650) 604-3957 ! ! ! !-------------------------------------------------------------------------! ! Authors: E. Barszcz ! ! P. Frederickson ! ! A. Woo ! ! M. Yarrow ! ! Translation to Java and MultiThreaded Code ! ! M. Frumkin ! ! M. Schultz ! !-------------------------------------------------------------------------! */ package NPB_JAV; import NPB_JAV.MGThreads.*; import NPB_JAV.BMInOut.*; import java.io.*; import java.text.*; public class MG extends MGBase{ public int bid=-1; public BMResults results; public boolean serial=false; public boolean timeron=false; public double rnm2, rnmu, epsilon; public int n1, n2, n3, nn; int verified; String t_names[]; int is1, is2, is3, ie1, ie2, ie3; int nsizes[]; public MG(char clss,int np,boolean ser ){ super(clss,np,ser); serial=ser; } public static void main(String argv[] ){ MG mg=null; BMArgs.ParseCmdLineArgs(argv,BMName); char CLSS=BMArgs.CLASS; int np=BMArgs.num_threads; boolean serial=BMArgs.serial; try{ mg = new MG(CLSS,np,serial); }catch(OutOfMemoryError e){ BMArgs.outOfMemoryMessage(); System.exit(0); } mg.runBenchMark(); } public void run(){runBenchMark();} public void runBenchMark(){ BMArgs.Banner(BMName,CLASS,serial,num_threads); int niter=getInputPars(); nsizes=new int[3]; setup(nsizes); n1=nsizes[0]; n2=nsizes[1]; n3=nsizes[2]; setTimers(); timer.resetAllTimers(); timer.start(T_init); zero3(u,0,n1,n2,n3); zran3(v,n1,n2,n3,nx[lt-1],ny[lt-1]); if(!serial) setupThreads(this); if(serial) resid(u,v,r,0,n1,n2,n3); else residMaster(u,v,r,0,n1,n2,n3); //-------------------------------------------------------------------- // One iteration for startup //-------------------------------------------------------------------- if(serial){ mg3P(u,v,r,n1,n2,n3); resid(u,v,r,0,n1,n2,n3); }else{ mg3Pmaster(u,v,r,n1,n2,n3); residMaster(u,v,r,0,n1,n2,n3); } zero3(u,0,n1,n2,n3); zran3(v,n1,n2,n3,nx[lt-1],ny[lt-1]); timer.stop(T_init); timer.start(T_bench); if (timeron) timer.start(T_resid2); if(serial) resid(u,v,r,0,n1,n2,n3); else residMaster(u,v,r,0,n1,n2,n3); if (timeron) timer.stop(T_resid2); for(int it=1;it<=nit;it++){ if (timeron) timer.start(T_mg3P); if(serial) mg3P(u,v,r,n1,n2,n3); else mg3Pmaster(u,v,r,n1,n2,n3); if (timeron) timer.stop(T_mg3P); if (timeron) timer.start(T_resid2); if(serial) resid(u,v,r,0,n1,n2,n3); else residMaster(u,v,r,0,n1,n2,n3); if (timeron) timer.stop(T_resid2); } timer.stop(T_bench); double tinit = timer.readTimer(T_init); System.out.println(" Initialization time: "+tinit+" seconds"); rnm2=norm2u3(r,n1,n2,n3,rnmu,nx[lt-1],ny[lt-1],nz[lt-1]); verified=verify(rnm2); double tm = timer.readTimer(T_bench); results=new BMResults("MG", CLASS, nx[lt-1], ny[lt-1], nz[lt-1], nit, tm, getMFLOPS(tm,nit), "floating point", verified, serial, num_threads, bid); results.print(); if (timeron) printTimers(); } public int verify(double rnm2){ double verify_value=0.0; epsilon = 1.0E-8; if (CLASS != 'U') { if(CLASS=='S') { verify_value = 0.530770700573E-4; }else if(CLASS=='W') { verify_value = 0.250391406439E-17; }else if(CLASS=='A') { verify_value = 0.2433365309E-5; }else if(CLASS=='B') { verify_value = 0.180056440132E-5; }else if(CLASS=='C') { verify_value = 0.570674826298E-6; } System.out.println(" L2 Norm is "+rnm2); if( Math.abs( rnm2 - verify_value ) < epsilon ) { verified = 1; System.out.println(" Deviation is "+(rnm2 - verify_value)); }else{ verified = 0; System.out.println(" The correct L2 Norm is "+verify_value); } }else{ verified = -1; } BMResults.printVerificationStatus(CLASS,verified,BMName); return verified; } public double getMFLOPS(double tm,int niter){ double mflops = 0.0; if( tm > 0.0 ) { mflops = 58.0*n1*n2*n3; mflops *= niter / (tm*1000000.0); } return mflops; } public int getInputPars(){ int lnx=32,lny=32,lnz=32; File f2 = new File("mg.input"); if ( f2.exists() ){ System.out.println("Reading from input file mg.input"); try{ FileInputStream fis = new FileInputStream(f2); DataInputStream datafile = new DataInputStream(fis); lt = datafile.readInt(); if(lt>maxlevel) { System.out.println("lt="+lt+" Maximum allowable="+maxlevel); System.exit(0); } lnx = datafile.readInt(); lny = datafile.readInt(); lnz = datafile.readInt(); nit = datafile.readInt(); fis.close(); }catch(Exception e){ System.err.println("Error reading from file mg.input"); } if (lnx!=lny||lnx!=lnz){ CLASS = 'U'; }else if( lnx==32&&nit==4 ){ CLASS = 'S'; }else if( lnx==64&&nit==40 ){ CLASS = 'W'; }else if( lnx==256&&nit==20 ){ CLASS = 'B'; }else if( lnx==512&&nit==20 ){ CLASS = 'C'; }else if( lnx==256&&nit==4 ){ CLASS = 'A'; }else{ CLASS = 'U'; } }else{ System.out.println(" No input file mg.input, Using compiled defaults"); } System.out.println(" Size: "+nx[lt-1]+"x"+ny[lt-1]+"x"+nz[lt-1] +" Iterations: " + nit ); return nit; } public void setTimers(){ File f1 = new File("timer.flag"); if( f1.exists() ){ timeron = true; t_names = new String[16]; t_names[T_init] = "init"; t_names[T_bench] = "benchmark"; t_names[T_mg3P] = "mg3P"; t_names[T_psinv] = "psinv"; t_names[T_resid] = "resid"; t_names[T_rprj3] = "rprj3"; t_names[T_interp] = "interp"; t_names[T_norm2] = "norm2"; } } public void printTimers(){ //% of the ime should be fixed DecimalFormat fmt = new DecimalFormat("0.000"); System.out.println(" SECTION Time (secs)"); double tmax = timer.readTimer(T_bench); if (tmax == 0.0) tmax = 1.0; for (int i=T_bench;i<=T_last;i++){ double t = timer.readTimer(i); if (i==T_resid2) { t = timer.readTimer(T_resid) - t; System.out.println(" --> total mg-resid "+fmt.format(t)+ " ("+fmt.format(t*100./tmax)+"%)"); }else{ System.out.println(" "+t_names[i]+" "+fmt.format(t)+ " ("+fmt.format(t*100./tmax)+"%)"); } } } public void setup(int nsizes[]){ int k; int d, i, j; int ax; int size1=3,size2=10; int mi[]=new int[size1*size2]; int ng[]=new int[size1*size2]; int s, dir; lb = 1; ng[ (lt-1)*size1]=nx[lt-1]; ng[1+(lt-1)*size1]=ny[lt-1]; ng[2+(lt-1)*size1]=nz[lt-1]; for(ax=0;ax=0;k--) ng[ax+k*size1]=ng[ax+(k+1)*size1]/2; for(k=lt-2;k>=0;k--){ nx[k]=ng[ k*size1]; ny[k]=ng[1+k*size1]; nz[k]=ng[2+k*size1]; } for(k=lt-1;k>=0;k--){ for(ax=0;ax=0;j--){ ir[j]=ir[j+1]+m1[j+1]*m2[j+1]*m3[j+1]; } } public void zero3(double z[],int off,int n1,int n2,int n3){ int i1, i2, i3; for(i3=0;i3 ten[mm] ){ ten[mm] = z[i1+n1*(i2+n2*i3)]; j1[mm] = i1; j2[mm] = i2; j3[mm] = i3; bubble( ten, j1, j2, j3, mm, 1 ); } if( z[i1+n1*(i2+n2*i3)] < ten[0] ){ ten[0] = z[i1+n1*(i2+n2*i3)]; j1[0] = i1; j2[0] = i2; j3[0] = i3; bubble( ten, j1, j2, j3, mm, 0 ); } } } } //c--------------------------------------------------------------------- //c Now which of these are globally best? //c--------------------------------------------------------------------- i1 = mm; i0 = mm; for(i=mm-1;i>=0;i--){ best = z[j1[i1-1+mm]+n1*(j2[i1-1+mm]+n2*(j3[i1-1+mm]))]; if(best==z[j1[i1-1+mm]+n1*(j2[i1-1+mm]+n2*(j3[i1-1+mm]))]){ jg[4*(i+mm)] = 0; jg[1+4*(i+mm)] = is1 - 2 + j1[i1-1+mm]; jg[2+4*(i+mm)] = is2 - 2 + j2[i1-1+mm]; jg[3+4*(i+mm)] = is3 - 2 + j3[i1-1+mm]; i1 = i1-1; }else{ jg[4*(i+mm)] = 0; jg[1+4*(i+mm)] = 0; jg[2+4*(i+mm)] = 0; jg[3+4*(i+mm)] = 0; } ten[i+mm] = best; best = z[j1[i0-1]+n1*(j2[i0-1]+n2*(j3[i0-1]))]; if(best==z[j1[i0-1]+n1*(j2[i0-1]+n2*(j3[i0-1]))]){ jg[4*i] = 0; jg[1+4*i] = is1 - 2 + j1[i0-1]; jg[2+4*i] = is2 - 2 + j2[i0-1]; jg[3+4*i] = is3 - 2 + j3[i0-1]; i0 = i0-1; }else{ jg[4*i] = 0; jg[1+4*i] = 0; jg[2+4*i] = 0; jg[3+4*i] = 0; } ten[i] = best; } m1 = i1+1; m0 = i0+1; for(i3=0;i3=m0;i--) z[j1[i-1]+n1*(j2[i-1]+n2*(j3[i-1]))] = -1.0; for(i=mm;i>=m1;i--) z[j1[i-1+mm]+n1*(j2[i-1+mm]+n2*(j3[i-1+mm]))] = 1.0; comm3(z,0,n1,n2,n3); } public double norm2u3(double r[],int n1,int n2,int n3, double rnmu,int nx,int ny,int nz){ //c--------------------------------------------------------------------- //c norm2u3 evaluates approximations to the L2 norm and the //c uniform (or L-infinity or Chebyshev) norm, under the //c assumption that the boundaries are periodic or zero. Add the //c boundaries in with half weight (quarter weight on the edges //c and eighth weight at the corners) for inhomogeneous boundaries. //c--------------------------------------------------------------------- // double precision r(n1,n2,n3) if (timeron) timer.start(T_norm2); rnmu = 0.0; double rnm2=0.0; for(int i3=1;i3 ten[i+1+m*ind] ){ temp = ten[i+1+m*ind]; ten[i+1+m*ind] = ten[i+m*ind]; ten[i+m*ind] = temp; j_temp = j1[i+1+m*ind]; j1[i+1+m*ind] = j1[i+m*ind]; j1[i+m*ind] = j_temp; j_temp = j2[i+1+m*ind]; j2[i+1+m*ind] = j2[i+m*ind]; j2[i+m*ind] = j_temp; j_temp = j3[ i+1+m*ind ]; j3[i+1+m*ind] = j3[ i+m*ind ]; j3[i+m*ind] = j_temp; }else { return; } } }else{ for(i=0;i - a[1] * ( u(i1-1,i2,i3) + u(i1+1,i2,i3) //c > + u1(i1) ) //c--------------------------------------------------------------------- - a[2] * ( u2[i1] + u1[i1-1] + u1[i1+1] ) - a[3] * ( u2[i1-1] + u2[i1+1] ); } } //c--------------------------------------------------------------------- //c exchange boundary data //c--------------------------------------------------------------------- comm3(r,off,n1,n2,n3); if (timeron) timer.stop(T_resid); } public void mg3P(double u[],double v[],double r[],int n1,int n2,int n3){ //c--------------------------------------------------------------------- //c multigrid V-cycle routine //c--------------------------------------------------------------------- // double precision u(nr),v(nv),r(nr) int j,k; //c--------------------------------------------------------------------- //c down cycle. //c restrict the residual from the find grid to the coarse //c--------------------------------------------------------------------- for(k=lt-1;k>=lb;k--){ j = k-1; rprj3(r,ir[k],m1[k],m2[k],m3[k],ir[j],m1[j],m2[j],m3[j]); } k = lb-1; //c--------------------------------------------------------------------- //c compute an approximate solution on the coarsest grid //c--------------------------------------------------------------------- zero3(u,ir[k],m1[k],m2[k],m3[k]); psinv(r,ir[k],u,ir[k],m1[k],m2[k],m3[k]); for(k=lb;k=lb;k--){ j = k-1; rprj3Master(r,ir[k],m1[k],m2[k],m3[k],ir[j],m1[j],m2[j],m3[j]); } k = lb-1; //c--------------------------------------------------------------------- //c compute an approximate solution on the coarsest grid //c--------------------------------------------------------------------- zero3(u,ir[k],m1[k],m2[k],m3[k]); psinvMaster(r,ir[k],u,ir[k],m1[k],m2[k],m3[k]); for(k=lb;k + c(3) * ( r2(i1-1) + r2(i1+1) ) //c--------------------------------------------------------------------- } } } //c--------------------------------------------------------------------- //c exchange boundary points //c--------------------------------------------------------------------- comm3(u,uoff,n1,n2,n3); if (timeron) timer.stop(T_psinv); } public void residMaster(double u[],double v[],double r[], int off,int n1,int n2,int n3){ if (timeron) timer.start(T_resid); if(num_threads==1) resid(u,v,r,off,n1,n2,n3); else{ boolean visr=false; if(v==r)visr=true; synchronized(this){ for(int l=0;l