c-------------------------------------------------------------------------c c c c G R I D N P B 3.1 c c c c N A S G R I D B E N C H M A R K S c c c c F 7 7 / S E R I A L V E R S I O N c c c c M G c c c c-------------------------------------------------------------------------c c c c MG solves a (slightly modified) version of the NAS Parallel c c Benchmarks MG code. c c c c Permission to use, copy, distribute and modify this software c c for any purpose with or without fee is hereby granted. We c c request, however, that all derived work reference the NAS c c Grid Benchmarks (NGB) 3.1. This software is provided "as is" c c without express or implied warranty. c c c c Information on NGB 3.1, including the Technical Reports c c NAS-02-005 "NAS Grid Benchmarks Version 1.0" and c c NAS-04-005 "Evaluating the Information Power Grid using the c c NAS Grid Benchmarks", original specifications, source code, c c results and information on how to submit new results, c c is available at: c c c c http://www.nas.nasa.gov/Software/NPB/ c c c c Send comments or suggestions to npb@nas.nasa.gov c c c c NAS Parallel Benchmarks Group c c NASA Ames Research Center c c Mail Stop: T27A-1 c c Moffett Field, CA 94035-1000 c c c c E-mail: npb@nas.nasa.gov c c Fax: (650) 604-3957 c c c c--------------------------------------------------------------------- c c Authors: R. Van der Wijngaart c M. Frumkin c E. Barszcz c P. Frederickson c A. Woo c M. Yarrow c c--------------------------------------------------------------------- c--------------------------------------------------------------------- program mg c--------------------------------------------------------------------- implicit none include 'globals.h' c---------------------------------------------------------------------------c c k is the current level. It is passed down through subroutine args c and is NOT global. it is the current iteration c---------------------------------------------------------------------------c integer k, it, init_con, pidlen, width, depth, ascii, $ col_indx, verbose character graphname*2, pid*16 data pidlen /16/ external timer_read double precision t, tinit, mflops, timer_read c---------------------------------------------------------------------------c c These arrays are in common because they are quite large c and probably shouldn't be allocated on the stack. They c are always passed as subroutine args. c---------------------------------------------------------------------------c double precision u(nr),v(nv),r(nr),a(0:3),c(0:3) common /noautom/ u,v,r double precision rnm2, rnmu, old2, oldu, epsilon integer n1, n2, n3, nn, nit double precision verify_value logical verified integer ierr,i, fstatus integer T_bench, T_init parameter (T_bench=1, T_init=2) read *, ascii read (*,99) graphname read (*,99) class c skip over reading the NGB class read * read *, init_con read *, width read *, depth read (*,99) pid read *, verbose 99 format(a) call timer_clear(T_bench) call timer_clear(T_init) call timer_start(T_init) c--------------------------------------------------------------------- c Set input data c--------------------------------------------------------------------- write (*, 1000) 1000 format(//,' NAS Parallel Benchmarks 3.1-SER', > ' - MG Benchmark', /) lt = lt_default nit = nit_default nx(lt) = nx_default ny(lt) = ny_default nz(lt) = nz_default nit = max(1,nit/width) if (graphname .eq. 'MB') then C WE CHANGE THE NUMBER OF ITERATIONS WITHIN THE LAYER OF MG C NODES IN THE MB DATA FLOW GRAPH, BASED ON THE RELATIVE LOCATION C OF THE NODE IN THE LAYER (COLUMN INDEX) col_indx = mod(init_con,width) nit = max(1,int(nit*(1.0-0.5*col_indx/(1.0+col_indx)))) endif a(0) = -8.0D0/3.0D0 a(1) = 0.0D0 a(2) = 1.0D0/6.0D0 a(3) = 1.0D0/12.0D0 if(Class .eq. 'A' .or. Class .eq. 'S'.or. Class .eq.'W') then c--------------------------------------------------------------------- c Coefficients for the S(a) smoother c--------------------------------------------------------------------- c(0) = -3.0D0/8.0D0 c(1) = +1.0D0/32.0D0 c(2) = -1.0D0/64.0D0 c(3) = 0.0D0 else c--------------------------------------------------------------------- c Coefficients for the S(b) smoother c--------------------------------------------------------------------- c(0) = -3.0D0/17.0D0 c(1) = +1.0D0/33.0D0 c(2) = -1.0D0/61.0D0 c(3) = 0.0D0 endif lb = 1 k = lt call setup(n1,n2,n3,k) call zero3(u,n1,n2,n3) call zran3(v,n1,n2,n3,k, ascii, graphname, init_con, $ pid, pidlen) call norm2u3(v,n1,n2,n3,rnm2,rnmu,nx(lt),ny(lt),nz(lt)) write (*, 1001) nx(lt),ny(lt),nz(lt), Class write (*, 1002) nit 1001 format(' Size: ', i3, 'x', i3, 'x', i3, ' (class ', A, ')' ) 1002 format(' Iterations: ', i3) call resid(u,v,r,n1,n2,n3,a,k) call norm2u3(r,n1,n2,n3,rnm2,rnmu,nx(lt),ny(lt),nz(lt)) old2 = rnm2 oldu = rnmu call timer_stop(T_init) call timer_start(T_bench) old2 = rnm2 oldu = rnmu do it=1,nit call mg3P(u,v,r,a,c,n1,n2,n3,k) call resid(u,v,r,n1,n2,n3,a,k) enddo call timer_stop(T_bench) t = timer_read(T_bench) tinit = timer_read(T_init) write( *,'(/A,F15.3,A/)' ) > ' Initialization time: ',tinit, ' seconds' write(*,100) 100 format(' Benchmark completed ') if (verbose .eq. 1) then verified = .true. call norm2u3(r,n1,n2,n3,rnm2,rnmu,nx(lt),ny(lt),nz(lt)) write(*, 301) rnm2 301 format(' L2 Norm is ', E20.12) write(*, 2022) 2022 format(' No reference values provided') nn = nx(lt)*ny(lt)*nz(lt) if( t .ne. 0. ) then mflops = 58.*nit*nn*1.0D-6 /t else mflops = 0.0 endif call print_results('MG', class, nx(lt), ny(lt), nz(lt), > nit, t, > mflops, ' floating point', > verified, npbversion, compiletime, > cs1, cs2, cs3, cs4, cs5, cs6) endif if ((graphname .eq. 'VP') .or. (graphname .eq. 'MB')) then call dump_solution(ascii, graphname, init_con, pid, $ pidlen, u, n1, n2, n3) endif end subroutine dump_solution(ascii, name, init_con, pid, pidlen, $ u,n1,n2,n3) include 'globals.h' integer init_con, pidlen, taillen, outfilelen, i, j, k, stat character name*2, outfile*100, tail*10, fileform*12 character*(*) pid data taillen, outfilelen /10,100/ integer n1, n2, n3, ascii double precision u(n1,n2,n3) call blankout(outfile, outfilelen) call blankout(tail, taillen) outfile = name call integer_to_string(tail, taillen, init_con) call join(outfile, outfilelen, tail, taillen) call join(outfile, outfilelen, pid, pidlen) print *, 'File to be dumped is named:', outfile if (ascii .eq. 0) then fileform = 'unformatted' elseif (ascii .eq. 1) then fileform = 'formatted' else print *, 'Failure: Illegal data format: ', ascii stop 970 endif open(10,file=outfile,form=fileform,status='unknown', $ iostat=stat) if (stat .ne. 0) then print *, 'Failure: Could not open mg output file' stop 971 elseif (ascii .eq. 0) then write (10,iostat=stat) $ (((u(i,j,k), i=2,n1-1), j=2,n2-1), k=2,n3-1) else write (10,99,iostat=stat) $ (((u(i,j,k), i=2,n1-1), j=2,n2-1), k=2,n3-1) 99 format(500(e20.13)) endif if (stat .lt. 0) then print *, 'EOF encountered while writing mg output file' stop 972 elseif (stat .gt. 0) then print *, 'Error encountered while writing mg output file' stop 973 endif return end c--------------------------------------------------------------------- c--------------------------------------------------------------------- subroutine setup(n1,n2,n3,k) c--------------------------------------------------------------------- c--------------------------------------------------------------------- implicit none include 'globals.h' integer is1, is2, is3, ie1, ie2, ie3 common /grid/ is1,is2,is3,ie1,ie2,ie3 integer n1,n2,n3,k integer d, i, j integer ax, mi(3,10) integer ng(3,10) integer s, dir,ierr ng(1,lt) = nx(lt) ng(2,lt) = ny(lt) ng(3,lt) = nz(lt) do ax=1,3 do k=lt-1,1,-1 ng(ax,k) = ng(ax,k+1)/2 enddo enddo 61 format(10i4) do k=lt,1,-1 nx(k) = ng(1,k) ny(k) = ng(2,k) nz(k) = ng(3,k) enddo do k = lt,1,-1 do ax = 1,3 mi(ax,k) = 2 + ng(ax,k) enddo m1(k) = mi(1,k) m2(k) = mi(2,k) m3(k) = mi(3,k) enddo k = lt is1 = 2 + ng(1,k) - ng(1,lt) ie1 = 1 + ng(1,k) n1 = 3 + ie1 - is1 is2 = 2 + ng(2,k) - ng(2,lt) ie2 = 1 + ng(2,k) n2 = 3 + ie2 - is2 is3 = 2 + ng(3,k) - ng(3,lt) ie3 = 1 + ng(3,k) n3 = 3 + ie3 - is3 ir(lt)=1 do j = lt-1, 1, -1 ir(j)=ir(j+1)+m1(j+1)*m2(j+1)*m3(j+1) enddo k = lt return end c--------------------------------------------------------------------- c--------------------------------------------------------------------- subroutine mg3P(u,v,r,a,c,n1,n2,n3,k) c--------------------------------------------------------------------- c--------------------------------------------------------------------- c--------------------------------------------------------------------- c multigrid V-cycle routine c--------------------------------------------------------------------- implicit none include 'globals.h' integer n1, n2, n3, k double precision u(nr),v(nv),r(nr) double precision a(0:3),c(0:3) integer j c--------------------------------------------------------------------- c down cycle. c restrict the residual from the find grid to the coarse c--------------------------------------------------------------------- do k= lt, lb+1 , -1 j = k-1 call rprj3(r(ir(k)),m1(k),m2(k),m3(k), > r(ir(j)),m1(j),m2(j),m3(j),k) enddo k = lb c--------------------------------------------------------------------- c compute an approximate solution on the coarsest grid c--------------------------------------------------------------------- call zero3(u(ir(k)),m1(k),m2(k),m3(k)) call psinv(r(ir(k)),u(ir(k)),m1(k),m2(k),m3(k),c,k) do k = lb+1, lt-1 j = k-1 c--------------------------------------------------------------------- c prolongate from level k-1 to k c--------------------------------------------------------------------- call zero3(u(ir(k)),m1(k),m2(k),m3(k)) call interp(u(ir(j)),m1(j),m2(j),m3(j), > u(ir(k)),m1(k),m2(k),m3(k),k) c--------------------------------------------------------------------- c compute residual for level k c--------------------------------------------------------------------- call resid(u(ir(k)),r(ir(k)),r(ir(k)),m1(k),m2(k),m3(k),a,k) c--------------------------------------------------------------------- c apply smoother c--------------------------------------------------------------------- call psinv(r(ir(k)),u(ir(k)),m1(k),m2(k),m3(k),c,k) enddo 200 continue j = lt - 1 k = lt call interp(u(ir(j)),m1(j),m2(j),m3(j),u,n1,n2,n3,k) call resid(u,v,r,n1,n2,n3,a,k) call psinv(r,u,n1,n2,n3,c,k) return end c--------------------------------------------------------------------- c--------------------------------------------------------------------- subroutine psinv( r,u,n1,n2,n3,c,k) c--------------------------------------------------------------------- c--------------------------------------------------------------------- c--------------------------------------------------------------------- c psinv applies an approximate inverse as smoother: u = u + Cr c c This implementation costs 15A + 4M per result, where c A and M denote the costs of Addition and Multiplication. c Presuming coefficient c(3) is zero (the NPB assumes this, c but it is thus not a general case), 2A + 1M may be eliminated, c resulting in 13A + 3M. c Note that this vectorizes, and is also fine for cache c based machines. c--------------------------------------------------------------------- implicit none include 'globals.h' integer n1,n2,n3,k double precision u(n1,n2,n3),r(n1,n2,n3),c(0:3) integer i3, i2, i1 double precision r1(m), r2(m) do i3=2,n3-1 do i2=2,n2-1 do i1=1,n1 r1(i1) = r(i1,i2-1,i3) + r(i1,i2+1,i3) > + r(i1,i2,i3-1) + r(i1,i2,i3+1) r2(i1) = r(i1,i2-1,i3-1) + r(i1,i2+1,i3-1) > + r(i1,i2-1,i3+1) + r(i1,i2+1,i3+1) enddo do i1=2,n1-1 u(i1,i2,i3) = u(i1,i2,i3) > + c(0) * r(i1,i2,i3) > + c(1) * ( r(i1-1,i2,i3) + r(i1+1,i2,i3) > + r1(i1) ) > + c(2) * ( r2(i1) + r1(i1-1) + r1(i1+1) ) c--------------------------------------------------------------------- c Assume c(3) = 0 (Enable line below if c(3) not= 0) c--------------------------------------------------------------------- c > + c(3) * ( r2(i1-1) + r2(i1+1) ) c--------------------------------------------------------------------- enddo enddo enddo c--------------------------------------------------------------------- c exchange boundary points c--------------------------------------------------------------------- call comm3(u,n1,n2,n3,k) return end c--------------------------------------------------------------------- c--------------------------------------------------------------------- subroutine resid( u,v,r,n1,n2,n3,a,k ) c--------------------------------------------------------------------- c--------------------------------------------------------------------- c--------------------------------------------------------------------- c resid computes the residual: r = v - Au c c This implementation costs 15A + 4M per result, where c A and M denote the costs of Addition (or Subtraction) and c Multiplication, respectively. c Presuming coefficient a(1) is zero (the NPB assumes this, c but it is thus not a general case), 3A + 1M may be eliminated, c resulting in 12A + 3M. c Note that this vectorizes, and is also fine for cache c based machines. c--------------------------------------------------------------------- implicit none include 'globals.h' integer n1,n2,n3,k double precision u(n1,n2,n3),v(n1,n2,n3),r(n1,n2,n3),a(0:3) integer i3, i2, i1 double precision u1(m), u2(m) do i3=2,n3-1 do i2=2,n2-1 do i1=1,n1 u1(i1) = u(i1,i2-1,i3) + u(i1,i2+1,i3) > + u(i1,i2,i3-1) + u(i1,i2,i3+1) u2(i1) = u(i1,i2-1,i3-1) + u(i1,i2+1,i3-1) > + u(i1,i2-1,i3+1) + u(i1,i2+1,i3+1) enddo do i1=2,n1-1 r(i1,i2,i3) = v(i1,i2,i3) > - a(0) * u(i1,i2,i3) c--------------------------------------------------------------------- c Assume a(1) = 0 (Enable 2 lines below if a(1) not= 0) c--------------------------------------------------------------------- c > - a(1) * ( u(i1-1,i2,i3) + u(i1+1,i2,i3) c > + u1(i1) ) c--------------------------------------------------------------------- > - a(2) * ( u2(i1) + u1(i1-1) + u1(i1+1) ) > - a(3) * ( u2(i1-1) + u2(i1+1) ) enddo enddo enddo c--------------------------------------------------------------------- c exchange boundary data c--------------------------------------------------------------------- call comm3(r,n1,n2,n3,k) return end c--------------------------------------------------------------------- c--------------------------------------------------------------------- subroutine rprj3( r,m1k,m2k,m3k,s,m1j,m2j,m3j,k ) c--------------------------------------------------------------------- c--------------------------------------------------------------------- c--------------------------------------------------------------------- c rprj3 projects onto the next coarser grid, c using a trilinear Finite Element projection: s = r' = P r c c This implementation costs 20A + 4M per result, where c A and M denote the costs of Addition and Multiplication. c Note that this vectorizes, and is also fine for cache c based machines. c--------------------------------------------------------------------- implicit none include 'globals.h' integer m1k, m2k, m3k, m1j, m2j, m3j,k double precision r(m1k,m2k,m3k), s(m1j,m2j,m3j) integer j3, j2, j1, i3, i2, i1, d1, d2, d3, j double precision x1(m), y1(m), x2,y2 if(m1k.eq.3)then d1 = 2 else d1 = 1 endif if(m2k.eq.3)then d2 = 2 else d2 = 1 endif if(m3k.eq.3)then d3 = 2 else d3 = 1 endif do j3=2,m3j-1 i3 = 2*j3-d3 C i3 = 2*j3-1 do j2=2,m2j-1 i2 = 2*j2-d2 C i2 = 2*j2-1 do j1=2,m1j i1 = 2*j1-d1 C i1 = 2*j1-1 x1(i1-1) = r(i1-1,i2-1,i3 ) + r(i1-1,i2+1,i3 ) > + r(i1-1,i2, i3-1) + r(i1-1,i2, i3+1) y1(i1-1) = r(i1-1,i2-1,i3-1) + r(i1-1,i2-1,i3+1) > + r(i1-1,i2+1,i3-1) + r(i1-1,i2+1,i3+1) enddo do j1=2,m1j-1 i1 = 2*j1-d1 C i1 = 2*j1-1 y2 = r(i1, i2-1,i3-1) + r(i1, i2-1,i3+1) > + r(i1, i2+1,i3-1) + r(i1, i2+1,i3+1) x2 = r(i1, i2-1,i3 ) + r(i1, i2+1,i3 ) > + r(i1, i2, i3-1) + r(i1, i2, i3+1) s(j1,j2,j3) = > 0.5D0 * r(i1,i2,i3) > + 0.25D0 * ( r(i1-1,i2,i3) + r(i1+1,i2,i3) + x2) > + 0.125D0 * ( x1(i1-1) + x1(i1+1) + y2) > + 0.0625D0 * ( y1(i1-1) + y1(i1+1) ) enddo enddo enddo j = k-1 call comm3(s,m1j,m2j,m3j,j) return end c--------------------------------------------------------------------- c--------------------------------------------------------------------- subroutine interp( z,mm1,mm2,mm3,u,n1,n2,n3,k ) c--------------------------------------------------------------------- c--------------------------------------------------------------------- c--------------------------------------------------------------------- c interp adds the trilinear interpolation of the correction c from the coarser grid to the current approximation: u = u + Qu' c c Observe that this implementation costs 16A + 4M, where c A and M denote the costs of Addition and Multiplication. c Note that this vectorizes, and is also fine for cache c based machines. Vector machines may get slightly better c performance however, with 8 separate "do i1" loops, rather than 4. c--------------------------------------------------------------------- implicit none include 'globals.h' integer mm1, mm2, mm3, n1, n2, n3,k double precision z(mm1,mm2,mm3),u(n1,n2,n3) integer i3, i2, i1, d1, d2, d3, t1, t2, t3 c note that m = 1037 in globals.h but for this only need to be c 535 to handle up to 1024^3 c integer m c parameter( m=535 ) double precision z1(m),z2(m),z3(m) if( n1 .ne. 3 .and. n2 .ne. 3 .and. n3 .ne. 3 ) then do i3=1,mm3-1 do i2=1,mm2-1 do i1=1,mm1 z1(i1) = z(i1,i2+1,i3) + z(i1,i2,i3) z2(i1) = z(i1,i2,i3+1) + z(i1,i2,i3) z3(i1) = z(i1,i2+1,i3+1) + z(i1,i2,i3+1) + z1(i1) enddo do i1=1,mm1-1 u(2*i1-1,2*i2-1,2*i3-1)=u(2*i1-1,2*i2-1,2*i3-1) > +z(i1,i2,i3) u(2*i1,2*i2-1,2*i3-1)=u(2*i1,2*i2-1,2*i3-1) > +0.5d0*(z(i1+1,i2,i3)+z(i1,i2,i3)) enddo do i1=1,mm1-1 u(2*i1-1,2*i2,2*i3-1)=u(2*i1-1,2*i2,2*i3-1) > +0.5d0 * z1(i1) u(2*i1,2*i2,2*i3-1)=u(2*i1,2*i2,2*i3-1) > +0.25d0*( z1(i1) + z1(i1+1) ) enddo do i1=1,mm1-1 u(2*i1-1,2*i2-1,2*i3)=u(2*i1-1,2*i2-1,2*i3) > +0.5d0 * z2(i1) u(2*i1,2*i2-1,2*i3)=u(2*i1,2*i2-1,2*i3) > +0.25d0*( z2(i1) + z2(i1+1) ) enddo do i1=1,mm1-1 u(2*i1-1,2*i2,2*i3)=u(2*i1-1,2*i2,2*i3) > +0.25d0* z3(i1) u(2*i1,2*i2,2*i3)=u(2*i1,2*i2,2*i3) > +0.125d0*( z3(i1) + z3(i1+1) ) enddo enddo enddo else if(n1.eq.3)then d1 = 2 t1 = 1 else d1 = 1 t1 = 0 endif if(n2.eq.3)then d2 = 2 t2 = 1 else d2 = 1 t2 = 0 endif if(n3.eq.3)then d3 = 2 t3 = 1 else d3 = 1 t3 = 0 endif do i3=d3,mm3-1 do i2=d2,mm2-1 do i1=d1,mm1-1 u(2*i1-d1,2*i2-d2,2*i3-d3)=u(2*i1-d1,2*i2-d2,2*i3-d3) > +z(i1,i2,i3) enddo do i1=1,mm1-1 u(2*i1-t1,2*i2-d2,2*i3-d3)=u(2*i1-t1,2*i2-d2,2*i3-d3) > +0.5D0*(z(i1+1,i2,i3)+z(i1,i2,i3)) enddo enddo do i2=1,mm2-1 do i1=d1,mm1-1 u(2*i1-d1,2*i2-t2,2*i3-d3)=u(2*i1-d1,2*i2-t2,2*i3-d3) > +0.5D0*(z(i1,i2+1,i3)+z(i1,i2,i3)) enddo do i1=1,mm1-1 u(2*i1-t1,2*i2-t2,2*i3-d3)=u(2*i1-t1,2*i2-t2,2*i3-d3) > +0.25D0*(z(i1+1,i2+1,i3)+z(i1+1,i2,i3) > +z(i1, i2+1,i3)+z(i1, i2,i3)) enddo enddo enddo do i3=1,mm3-1 do i2=d2,mm2-1 do i1=d1,mm1-1 u(2*i1-d1,2*i2-d2,2*i3-t3)=u(2*i1-d1,2*i2-d2,2*i3-t3) > +0.5D0*(z(i1,i2,i3+1)+z(i1,i2,i3)) enddo do i1=1,mm1-1 u(2*i1-t1,2*i2-d2,2*i3-t3)=u(2*i1-t1,2*i2-d2,2*i3-t3) > +0.25D0*(z(i1+1,i2,i3+1)+z(i1,i2,i3+1) > +z(i1+1,i2,i3 )+z(i1,i2,i3 )) enddo enddo do i2=1,mm2-1 do i1=d1,mm1-1 u(2*i1-d1,2*i2-t2,2*i3-t3)=u(2*i1-d1,2*i2-t2,2*i3-t3) > +0.25D0*(z(i1,i2+1,i3+1)+z(i1,i2,i3+1) > +z(i1,i2+1,i3 )+z(i1,i2,i3 )) enddo do i1=1,mm1-1 u(2*i1-t1,2*i2-t2,2*i3-t3)=u(2*i1-t1,2*i2-t2,2*i3-t3) > +0.125D0*(z(i1+1,i2+1,i3+1)+z(i1+1,i2,i3+1) > +z(i1 ,i2+1,i3+1)+z(i1 ,i2,i3+1) > +z(i1+1,i2+1,i3 )+z(i1+1,i2,i3 ) > +z(i1 ,i2+1,i3 )+z(i1 ,i2,i3 )) enddo enddo enddo endif return end c--------------------------------------------------------------------- c--------------------------------------------------------------------- subroutine norm2u3(r,n1,n2,n3,rnm2,rnmu,nx,ny,nz) c--------------------------------------------------------------------- c--------------------------------------------------------------------- c--------------------------------------------------------------------- c norm2u3 evaluates approximations to the L2 norm and the c uniform (or L-infinity or Chebyshev) norm, under the c assumption that the boundaries are periodic or zero. Add the c boundaries in with half weight (quarter weight on the edges c and eighth weight at the corners) for inhomogeneous boundaries. c--------------------------------------------------------------------- implicit none integer n1, n2, n3, nx, ny, nz double precision rnm2, rnmu, r(n1,n2,n3) double precision s, a, ss integer i3, i2, i1, ierr integer n n = nx*ny*nz s=0.0D0 rnmu = 0.0D0 do i3=2,n3-1 do i2=2,n2-1 do i1=2,n1-1 s=s+r(i1,i2,i3)**2 a=abs(r(i1,i2,i3)) if(a.gt.rnmu)rnmu=a enddo enddo enddo rnm2=sqrt( s / float( n )) return end c--------------------------------------------------------------------- c--------------------------------------------------------------------- subroutine rep_nrm(u,n1,n2,n3,title,kk) c--------------------------------------------------------------------- c--------------------------------------------------------------------- c--------------------------------------------------------------------- c report on norm c--------------------------------------------------------------------- implicit none include 'globals.h' integer n1, n2, n3, kk double precision u(n1,n2,n3) character*8 title double precision rnm2, rnmu call norm2u3(u,n1,n2,n3,rnm2,rnmu,nx(kk),ny(kk),nz(kk)) write(*,7)kk,title,rnm2,rnmu 7 format(' Level',i2,' in ',a8,': norms =',D21.14,D21.14) return end c--------------------------------------------------------------------- c--------------------------------------------------------------------- subroutine comm3(u,n1,n2,n3,kk) c--------------------------------------------------------------------- c--------------------------------------------------------------------- c--------------------------------------------------------------------- c comm3 organizes the communication on all borders c--------------------------------------------------------------------- implicit none include 'globals.h' integer n1, n2, n3, kk double precision u(n1,n2,n3) integer axis do axis = 1, 3 call comm1p( axis, u, n1, n2, n3, kk ) enddo return end c--------------------------------------------------------------------- c--------------------------------------------------------------------- subroutine comm1p( axis, u, n1, n2, n3, kk ) c--------------------------------------------------------------------- c--------------------------------------------------------------------- implicit none include 'globals.h' integer axis, dir, n1, n2, n3 double precision u( n1, n2, n3 ) integer i3, i2, i1, buff_len,buff_id integer i, kk, indx dir = -1 buff_id = 3 + dir buff_len = nm2 do i=1,nm2 buff(i,buff_id) = 0.0D0 enddo dir = +1 buff_id = 3 + dir buff_len = nm2 do i=1,nm2 buff(i,buff_id) = 0.0D0 enddo dir = +1 buff_id = 2 + dir buff_len = 0 if( axis .eq. 1 )then do i3=2,n3-1 do i2=2,n2-1 buff_len = buff_len + 1 buff(buff_len, buff_id ) = u( n1-1, i2,i3) enddo enddo endif if( axis .eq. 2 )then do i3=2,n3-1 do i1=1,n1 buff_len = buff_len + 1 buff(buff_len, buff_id )= u( i1,n2-1,i3) enddo enddo endif if( axis .eq. 3 )then do i2=1,n2 do i1=1,n1 buff_len = buff_len + 1 buff(buff_len, buff_id ) = u( i1,i2,n3-1) enddo enddo endif dir = -1 buff_id = 2 + dir buff_len = 0 if( axis .eq. 1 )then do i3=2,n3-1 do i2=2,n2-1 buff_len = buff_len + 1 buff(buff_len,buff_id ) = u( 2, i2,i3) enddo enddo endif if( axis .eq. 2 )then do i3=2,n3-1 do i1=1,n1 buff_len = buff_len + 1 buff(buff_len, buff_id ) = u( i1, 2,i3) enddo enddo endif if( axis .eq. 3 )then do i2=1,n2 do i1=1,n1 buff_len = buff_len + 1 buff(buff_len, buff_id ) = u( i1,i2,2) enddo enddo endif do i=1,nm2 buff(i,4) = buff(i,3) buff(i,2) = buff(i,1) enddo dir = -1 buff_id = 3 + dir indx = 0 if( axis .eq. 1 )then do i3=2,n3-1 do i2=2,n2-1 indx = indx + 1 u(n1,i2,i3) = buff(indx, buff_id ) enddo enddo endif if( axis .eq. 2 )then do i3=2,n3-1 do i1=1,n1 indx = indx + 1 u(i1,n2,i3) = buff(indx, buff_id ) enddo enddo endif if( axis .eq. 3 )then do i2=1,n2 do i1=1,n1 indx = indx + 1 u(i1,i2,n3) = buff(indx, buff_id ) enddo enddo endif dir = +1 buff_id = 3 + dir indx = 0 if( axis .eq. 1 )then do i3=2,n3-1 do i2=2,n2-1 indx = indx + 1 u(1,i2,i3) = buff(indx, buff_id ) enddo enddo endif if( axis .eq. 2 )then do i3=2,n3-1 do i1=1,n1 indx = indx + 1 u(i1,1,i3) = buff(indx, buff_id ) enddo enddo endif if( axis .eq. 3 )then do i2=1,n2 do i1=1,n1 indx = indx + 1 u(i1,i2,1) = buff(indx, buff_id ) enddo enddo endif return end c--------------------------------------------------------------------- c--------------------------------------------------------------------- subroutine zran3(z,n1,n2,n3,k, $ ascii, name, init_con, pid, pidlen) c--------------------------------------------------------------------- c--------------------------------------------------------------------- c--------------------------------------------------------------------- c Read the initial solution from a file c--------------------------------------------------------------------- implicit none integer init_con, pidlen, taillen, infilelen, stat, ascii character name*2, infile*100, tail*10, fileform*12 character*(*) pid data taillen, infilelen /10,100/ integer n1, n2, n3, k, ii, jj, kk double precision z(n1,n2,n3) if ((name .eq. 'VP') .or. (name .eq. 'MB')) then c construct the name of the input file and read it call blankout(infile, infilelen) call blankout(tail, taillen) infile = name call integer_to_string(tail, taillen, init_con) call join(infile,infilelen,tail,taillen) call join(infile,infilelen,pid,pidlen) call join(infile,infilelen+pidlen,'IN',2) print *, 'File to be read is named:', infile if (ascii .eq. 0) then fileform = 'unformatted' elseif (ascii .eq. 1) then fileform = 'formatted' else print *, 'Failure: Illegal data format: ', ascii stop 966 endif open(10,file=infile,form=fileform,status='old', $ iostat=stat) if (stat .ne. 0) then print *, 'Failure: Could not open mg input file' stop 967 elseif (ascii .eq. 0) then read (10,iostat=stat) $ (((z(ii,jj,kk),ii=2,n1-1),jj=2,n2-1),kk=2,n3-1) else read (10,99,iostat=stat) $ (((z(ii,jj,kk),ii=2,n1-1),jj=2,n2-1),kk=2,n3-1) 99 format(500(e20.13)) endif if (stat .lt. 0) then print *, 'EOF encountered while reading mg input file' stop 968 elseif (stat .gt. 0) then print *, 'Error encountered while reading mg input file' stop 969 endif call comm3(z,n1,n2,n3,k) return endif c return end c--------------------------------------------------------------------- c--------------------------------------------------------------------- subroutine zero3(z,n1,n2,n3) c--------------------------------------------------------------------- c--------------------------------------------------------------------- implicit none integer n1, n2, n3 double precision z(n1,n2,n3) integer i1, i2, i3 do i3=1,n3 do i2=1,n2 do i1=1,n1 z(i1,i2,i3)=0.0D0 enddo enddo enddo return end c----- end of program ------------------------------------------------