HPC tutorial heading HPC tutorial heading
  •                          
 
NCAR

 

Example 7 - shallow7.F - Advanced hybrid parallelization example.

Download shallow7.F code.

1.  	      PROGRAM SHALOW 
2.  	C     BENCHMARK WEATHER PREDICTION PROGRAM FOR COMPARING THE
3.  	C     PREFORMANCE OF CURRENT SUPERCOMPUTERS. THE MODEL IS
4.  	C     BASED OF THE PAPER - THE DYNAMICS OF FINITE-DIFFERENCE
5.  	C     MODELS OF THE SHALLOW-WATER EQUATIONS, BY ROBERT SADOURNY
6.  	C     J. ATM. SCIENCES, VOL 32, NO 4, APRIL 1975.
7.  	C     
8.  	C     CODE BY PAUL N. SWARZTRAUBER, NATIONAL CENTER FOR
9.  	C     ATMOSPHERIC RESEARCH, BOULDER, CO,  OCTOBER 1984.
10. 	C     
11. 	C     MODIFIED BY R. K. SATO, NCAR, APRIL 7, 1986 FOR MULTITASKING.
12. 	*     Modified by Juliana Rew, May 2006.
13. 	*     This version of the model, shallow7.F, can be run in parallel
14. 	*     on a variable number of processors rather than just two.
15. 	*     The goal is to obtain results in less total wallclock time
16. 	*     Initial and calculated values
17. 	*     of U, V, and P are written to a netCDF file
18. 	*     for later use in visualizing the results. The netCDF data
19. 	*     management library is freely available from
20. 	*     http://www.unidata.ucar.edu/software/netcdf
21. 	*     OpenMP parallel dos are used in the major worker
22. 	*     loops.
23. 	*     The modified code uses portable intrinsic Fortran 90 timing routines
24. 	*     This can be compiled on the IBM SP using:
25. 	*     mpxlf90_r -qmaxmem=-1 -g -o shallow7 -qsmp=omp -qfixed=132 -qsclk=micro \
26. 	*     -I/usr/local/include shallow7.F -L/usr/local/lib64/r4i4 -l netcdf
27. 	*     where mpxlf90_r is the variant of XLF on the IBM that brings in
28. 	*     the MPI library, the -L and -I point to local installation of netCDF,
29. 	*     and -qsmp=omp is XLF's option for OpenMP threading
30. 	*     The MPI gathers and printout of results were commented out
31. 	*     to make timings, as well as netCDF file writes. This is because
32. 	*     writing to a file must be done serially to avoid each task clobbering
33. 	*     the results of the other tasks. A future enhancement
34. 	*     would be to parallelize writing of the results to the data file.
35. 	
36. 	      integer N1,N2
37. 	      PARAMETER (N1=65, N2=65)
38. 	      real, dimension(N1,N2) ::  U, V, P,UNEW, VNEW,PNEW, UOLD,VOLD,
39. 	     + POLD,CU, CV,Z, H, PSI
40. 	C     
41. 	      integer ncid,p_id,u_id,v_id
42. 	      integer istart(3)
43. 	      integer icount(3)
44. 	      
45. 	*     Prepare netCDF file to receive model output data
46. 	*     File writing will be done by MPI task 0, not in parallel
47. 	      
48. 	      include 'netcdf.inc'
49. 	
50. 	      include "mpif.h"
51. 	       integer js,je,jsj,jej,taskid,numtasks,req(16),
52. 	     1     istat(MPI_STATUS_SIZE,16)
53. 	 
54. 	      integer m,n
55. 	      integer size
56. 	      double precision MFS100, MFS200,MFS300
57. 	      double precision T100,T200,T300,TSTART,CTIME,TCYC
58. 	      integer c1,c2,r,max
59. 	C     
60. 	      real DT,TDT,DX,DY,A,ALPHA,EL,PI,TPI,DI,DJ,PCF
61. 	      integer MASTER, itmax,mprint,mp1,np1
62. 	      parameter (MASTER=0)
63. 	      integer ierr, i,
64. 	     &     chunksize, extra
65. 	      integer  status(3)
66. 	
67. 	******receive buffers for MPI gather calls of p,u,v values
68. 	      real, dimension(:,:),allocatable:: precv, urecv,vrecv
69. 	******variables to be used in gather before printing results
70. 	      integer, allocatable:: idisp(:),irecvcounts(:),mychunk(:)
71. 	** set up netcdf file
72. 	      call netcdf_setup(N1,N2,ncid,p_id,u_id,v_id,istart,icount)
73. 	******initialize MPI and get number of tasks working on this.
74. 	      cALL MPI_INIT( ierr )
75. 	      
76. 	      cALL MPI_COMM_RANK( MPI_COMM_WORLD, taskid, ierr )
77. 	      cALL MPI_COMM_SIZE( MPI_COMM_WORLD, numtasks, ierr )
78. 	C     
79. 	C     Initializing array req with MPI_REQUEST_NULL
80. 	C     
81. 	      do i=1,16
82. 	         req(i) = MPI_REQUEST_NULL
83. 	      end do
84. 	      m=n2-1
85. 	      n=n1-1
86. 	C     
87. 	C     INITIALIZE CONSTANTS AND ARRAYS
88. 	C     
89. 	C     NOTE BELOW THAT TWO DELTA T (TDT) IS SET TO DT ON THE FIRST
90. 	C     CYCLE AFTER WHICH IT IS RESET TO DT+DT.
91. 	C     
92. 	      DT = 90.
93. 	      
94. 	      DX = 1.E5
95. 	      DY = 1.E5
96. 	      
97. 	      A = 1.E6
98. 	      ALPHA = .001
99. 	      ITMAX = 4000
100.	      MPRINT = 100
101.	      M = N1 - 1
102.	      N = N2 - 1
103.	      TDT = DT
104.	      MP1 = M+1
105.	      NP1 = N+1
106.	      EL = FLOAT(N)*DX
107.	      PI = 4.*ATAN(1.)
108.	      TPI = PI+PI
109.	      DI = TPI/FLOAT(M)
110.	      DJ = TPI/FLOAT(N)
111.	      PCF = PI*PI*A*A/(EL*EL)
112.	      js = 1 + (N/numtasks+1)*taskid
113.	      je = min(n2,js+N/numtasks)
114.	*     index for the gather
115.	      jsj = 1 + (N/numtasks)*taskid
116.	      jej = min(n2,jsj + N/numtasks)
117.	C     
118.	C     INITIAL VALUES OF THE STREAM FUNCTION AND P
119.	C     
120 	      DO J=jsj,jej
121.	         DO I=1,mp1
122.	            PSI(I,J) = A*SIN((FLOAT(I)-.5)*DI)*SIN((FLOAT(J)-.5)*DJ)
123.	            P(I,J) = PCF*(COS(2.*FLOAT(I-1)*DI)
124.	     1           +COS(2.*FLOAT(J-1)*DJ))+50000.
125.	            if(j.eq.n)then
126.	               P(I,J+1) = PCF*(COS(2.*FLOAT(I-1)*DI)
127.	     1              +COS(2.*FLOAT(J)*DJ))+50000.
128.	            endif
129.	         end do
130.	 50   end do
131.	C     
132.	C     INITIALIZE VELOCITIES
133.	C     
134.	      if(taskid.lt.numtasks-1)then
135.	         cALL mpi_irecv(psi(1,jej+1),n1,MPI_DOUBLE_PRECISION,
136.	     1        taskid+1,1,MPI_COMM_WORLD,req(1),ierr)
137.	      endif
138.	      if(taskid.gt.0)then
139.	         cALL mpi_isend(psi(1,jsj),n1,MPI_DOUBLE_PRECISION,
140.	     1        taskid-1,1,MPI_COMM_WORLD,req(2),ierr)
141.	      endif
142.	      cALL MPI_WAITALL(2,req,istat,ierr)
143.	      DO J=jsj,jej
144.	         DO I=1,M
145.	            if(i.eq.m) then
146.	               U(I+1,J) = -(PSI(I+1,J+1)-PSI(I+1,J))/DY
147.	            else
148.	               U(I,J) = -(PSI(I+1,J)-PSI(I,J))/DY
149.	            endif
150.	            if(j.gt.1)then
151.	               V(I,J) = (PSI(I+1,J)-PSI(I,J))/DX
152.	            endif
153.	            if(j.eq.n)then
154.	               V(I,J+1) = (PSI(I+1,J+1)-PSI(I,J+1))/DX
155.	            endif
156.	         end do
157.	 60   end do
158.	C     
159.	C     PERIODIC CONTINUATION for U and V
160.	C     
161.	      DO J=1,N
162.	         U(1,J) = U(M+1,J)
163.	
164.	         V(M+1,J) = V(1,J)
165.	         if(j.eq.n)then
166.	            V(M+1,J+1) = V(1,J+1)
167.	         endif
168.	 70   end do
169.	      if(taskid.eq.0)then
170.	         cALL mpi_irecv(v(1,1),n1,MPI_DOUBLE_PRECISION,
171.	     1        numtasks-1,1,MPI_COMM_WORLD,req(1),ierr)
172.	      endif
173.	      if(taskid.eq.numtasks-1)then
174.	         cALL mpi_irecv(u(1,n+1),n1,MPI_DOUBLE_PRECISION,
175.	     1        0,2,MPI_COMM_WORLD,req(2),ierr)
176.	      endif
177.	      if(taskid.eq.numtasks-1)then
178.	         cALL mpi_isend(v(1,n+1),n1,MPI_DOUBLE_PRECISION,
179.	     1        0,1,MPI_COMM_WORLD,req(3),ierr)
180.	      endif
181.	      if(taskid.eq.0)then
182.	         cALL mpi_isend(u(1,1),n1,MPI_DOUBLE_PRECISION,
183.	     1        numtasks-1,2,MPI_COMM_WORLD,req(4),ierr)
184.	      endif
185.	      if(taskid.eq.0.or.taskid.eq.numtasks-1)then
186.	         cALL MPI_WAITALL(4,req,istat,ierr)
187.	      endif
188.	      if(taskid.eq.0)then
189.	         cALL mpi_irecv(v(m+1,1),1,MPI_DOUBLE_PRECISION,
190.	     1        numtasks-1,1,MPI_COMM_WORLD,req(1),ierr)
191.	      endif
192.	      if(taskid.eq.numtasks-1)then
193.	         cALL mpi_irecv(u(1,n+1),1,MPI_DOUBLE_PRECISION,
194.	     1        0,2,MPI_COMM_WORLD,req(2),ierr)
195.	      endif
196.	      if(taskid.eq.numtasks-1)then
197.	         cALL mpi_isend(v(1,n+1),1,MPI_DOUBLE_PRECISION,
198.	     1        0,1,MPI_COMM_WORLD,req(3),ierr)
199.	      endif
200.	      if(taskid.eq.0)then
201.	         cALL mpi_isend(u(M+1,1),1,MPI_DOUBLE_PRECISION,
202.	     1        numtasks-1,2,MPI_COMM_WORLD,req(4),ierr)
203.	      endif
204.	      if(taskid.eq.0.or.taskid.eq.numtasks-1)then
205.	         cALL MPI_WAITALL(4,req,istat,ierr)
206.	      endif
207.	      DO J=jsj,jej
208.	         DO I=1,mp1
209.	            UOLD(I,J) = U(I,J)
210.	            VOLD(I,J) = V(I,J)
211.	            POLD(I,J) = P(I,J)
212.	            if(j.eq.n)then
213.	               UOLD(I,J+1) = U(I,J+1)
214.	               VOLD(I,J+1) = V(I,J+1)
215.	               POLD(I,J+1) = P(I,J+1)
216.	            endif
217.	         end do
218.	 86   end do
219.	C     END OF INITIALIZATION
220.	C     
221.	
222.	C     PRINT INITIAL VALUES
223.	C     
224.	*     Each task has been working on 1/numtasks of the values. Printing
225.	*     will require gathering the values from each task.
226.	*     Gathering is also necessary if we want to create images
227.	*     from the data.
228.	*     Gathering is expensive (similar to doing I/O), so if you
229.	*     are "benchmarking" (seeing how fast the model performs), the
230.	*     gathering and printing should be commented out.
231.	*     
232.	*     The pold, uold, and vold arrays we wish to print are dimensioned 
233.	*     65x65, not evenly divisible. Each task's chunks will be approximately
234.	*     equal in size, but we will give any leftover to the final task
235.	      
236.	      extra=mod(n1*n2,numtasks)
237.	      if(taskid.eq. numtasks -1) then
238.	         chunksize=n1*n2/numtasks + extra
239.	      else
240.	         chunksize=n1*n2/numtasks
241.	      endif
242.	      allocate (mychunk(numtasks))
243.	      mychunk = chunksize
244.	      
245.	*     We then specify in the MPI gather call how big the chunks
246.	*     are and the displacement between chunks.
247.	*     Counts and displacements are only required on master
248.	*     gatherv is called by all the tasks,however
249.	      if(taskid == master)then
250.	         allocate(irecvcounts(0:numtasks-1))
251.	         allocate(idisp(0:numtasks-1))
252.	      endif
253.	*     gather the counts to the master
254.	      
255.	      call MPI_Gather(mychunk(1),1,MPI_INTEGER,
256.	     +     irecvcounts,1, MPI_INTEGER,
257.	     +     master, MPI_COMM_WORLD, ierr)
258.	*     calculate displacements and size of receive buffer
259.	*     (urecv array)
260.	      if(taskid == master) then
261.	         idisp(0) = 0
262.	         do i=1,numtasks-1,1
263.	            idisp(i)=irecvcounts(i-1)+idisp(i-1)
264.	         end do
265.	*     allocate receive buffers
266.	         allocate(precv(n1,n2))
267.	         allocate(urecv(n1,n2))
268.	         allocate(vrecv(n1,n2))
269.	      endif
270.	      
271.	      call mpi_gatherv(pold(jsj,jsj),chunksize,MPI_DOUBLE_PRECISION,
272.	     +     precv,irecvcounts,idisp,MPI_DOUBLE_PRECISION,
273.	     +     master,MPI_COMM_WORLD,ierr)
274.	      
275.	      call mpi_gatherv(uold(jsj,jsj),chunksize,MPI_DOUBLE_PRECISION,
276.	     +     urecv,irecvcounts,idisp,MPI_DOUBLE_PRECISION,
277.	     +     master,MPI_COMM_WORLD,ierr)
278.	      
279.	      call mpi_gatherv(vold(jsj,jsj),chunksize,MPI_DOUBLE_PRECISION,
280.	     +     vrecv,irecvcounts,idisp,MPI_DOUBLE_PRECISION,
281.	     +     master,MPI_COMM_WORLD,ierr)
282.	      
283.	      if(taskid.eq.MASTER)then
284.	         WRITE(6,390) N,M,DX,DY,DT,ALPHA,ITMAX
285.	 390     FORMAT(' NUMBER OF POINTS IN THE X DIRECTION', I8/
286.	     1        ' NUMBER OF POINTS IN THE Y DIRECTION', I8/
287.	     2        ' GRID SPACING IN THE X DIRECTION    ', F8.0/
288.	     3        ' GRID SPACING IN THE Y DIRECTION    ', F8.0/
289.	     4        ' TIME STEP                          ', F8.0/
290.	     5        ' TIME FILTER PARAMETER              ', F8.3/
291.	     6        ' NUMBER OF ITERATIONS               ', I8)
292.	      endif         
293.	
294.	         MNMIN = MIN0(M,N)
295.	      if(taskid.eq.master) then
296.	         WRITE(6,391) (PRECV(I,I),I=1,MNMIN)
297.	 391     FORMAT(/' INITIAL DIAGONAL ELEMENTS OF P ', //(8E15.7))
298.	         WRITE(6,392) (URECV(I,I),I=1,MNMIN)
299.	 392     FORMAT(/' INITIAL DIAGONAL ELEMENTS OF U ', //(8E15.7))
200.	         WRITE(6,393) (VRECV(I,I),I=1,MNMIN)
301.	 393     FORMAT(/' INITIAL DIAGONAL ELEMENTS OF V ', //(8E15.7))
302.	      endif
303.	      
304.	*     write initial values of p, u, and v into a netCDF file
305.	      if(taskid.eq.MASTER) then
306.	       call my_ncwrite(ncid,p_id,istart,icount,precv,N1,N2)
307.	       call my_ncwrite(ncid,u_id,istart,icount,urecv,N1,N2)
308.	       call my_ncwrite(ncid,v_id,istart,icount,vrecv,N1,N2)
309.	      endif
310.	      TIME = 0
311.	      NCYCLE = 0
312.	      call system_clock (count=c1, count_rate=r, count_max=max)
313.	      TSTART = c1
314.	      T300 = 1.
315.	      do NCYCLE = 1,itmax
316.	C     
317.	C     COMPUTE CAPITAL  U, CAPITAL V, Z AND H
318.	C
319.	         FSDX = 4./DX
320.	         FSDY = 4./DY
321.	C     - ------------------------------------------------
322.	         if(taskid.lt.numtasks-1)then
323.	            cALL mpi_irecv(V(1,je+1),n1,MPI_DOUBLE_PRECISION,
324.	     1           taskid+1,1,MPI_COMM_WORLD,req(1),ierr)
325.	
326.	         endif
327.	         if(taskid.gt.0)then
328.	            cALL mpi_isend(V(1,js),n1,MPI_DOUBLE_PRECISION,
329.	     1           taskid-1,1,MPI_COMM_WORLD,req(4),ierr)
330.	         endif
331.	         if(taskid.gt.0)then
332.	            cALL mpi_irecv(U(1,js-1),n1,MPI_DOUBLE_PRECISION,
333.	     1           taskid-1,2,MPI_COMM_WORLD,req(2),ierr)
334.	            cALL mpi_irecv(P(1,jsj-1),n1,MPI_DOUBLE_PRECISION,
335.	     1           taskid-1,3,MPI_COMM_WORLD,req(3),ierr)
336.	         endif
337.	         if(taskid.lt.numtasks-1)then
338.	            cALL mpi_isend(U(1,je),n1,MPI_DOUBLE_PRECISION,
339.	     1           taskid+1,2,MPI_COMM_WORLD,req(5),ierr)
340.	            cALL mpi_isend(P(1,jej),n1,MPI_DOUBLE_PRECISION,
341.	     1           taskid+1,3,MPI_COMM_WORLD,req(6),ierr)
342.	         endif
343.	         cALL MPI_WAITALL(6,req,istat,ierr)
344.	C$OMP PARALLELDO
345.	C$OMP&SHARED (FSDY,FSDX,M,N,U,V,P,CU,CV,Z,H,js,je,jsj,jej)
346.	C$OMP&PRIVATE (I,J)
347.	         DO J=jsj,jej
348.	            DO I=1,M
349.	               CU(I+1,J) = .5*(P(I+1,J)+P(I,J))*U(I+1,J)
350.	               if(j.gt.1)then 
351.	                  CV(I,J)  = .5*(P(I,J)+P(I,J-1))*V(I,J)
352.	                  Z(I+1,J) = (FSDX*(V(I+1,J)-V(I,J))-FSDY*(U(I+1,J)
353.	     1                 -U(I+1,J-1)))/(P(I,J-1)+P(I+1,J-1)+P(I+1,J)+P(I,J))
354.	               endif
355.	               if(j.eq.n)then
356.	                  CV(I,J+1)  = .5*(P(I,J+1)+P(I,J))*V(I,J+1)
357.	                  Z(I+1,J+1) = (FSDX*(V(I+1,J+1)-V(I,J+1))-FSDY*(U(I+1,J+1)
358.	     1                 -U(I+1,J)))/(P(I,J)+P(I+1,J)+P(I+1,J+1)+P(I,J+1))
359.	               endif
360.	               H(I,J) = P(I,J)+.25*(U(I+1,J)*U(I+1,J)+U(I,J)*U(I,J)
361.	     1              +V(I,J+1)*V(I,J+1)+V(I,J)*V(I,J))
362.	            end do
363.	 100     end do
364.	C     - -------------------------------------------
365.	C     
366.	C     PERIODIC CONTINUATION
367.	C     
368.	         DO J=js,je
369.	            CU(1,J) = CU(M+1,J)
370.	            H(M+1,J) = H(1,J)
371.	            if(j.gt.1)then
372.	               Z(1,J) = Z(M+1,J)
373.	               CV(M+1,J) = CV(1,J)
374.	            endif
375.	            if(j.eq.n)then
376.	               Z(1,J+1) = Z(M+1,J+1)
377.	               CV(M+1,J+1) = CV(1,J+1)
378.	            endif
379.	 110     end do
380.	         if(taskid.eq.0)then
381.	            cALL mpi_irecv(cv(1,1),n1,MPI_DOUBLE_PRECISION,
382.	     1           numtasks-1,1,MPI_COMM_WORLD,req(1),ierr)
383.	            cALL mpi_irecv(z(1,1),n1,MPI_DOUBLE_PRECISION,
384.	     1           numtasks-1,2,MPI_COMM_WORLD,req(2),ierr)
385.	         endif
386.	         if(taskid.eq.numtasks-1)then
387.	            cALL mpi_irecv(cu(1,n+1),n1,MPI_DOUBLE_PRECISION,
388.	     1           0,3,MPI_COMM_WORLD,req(3),ierr)
389.	            cALL mpi_irecv(h(1,n+1),n1,MPI_DOUBLE_PRECISION,
390.	     1           0,4,MPI_COMM_WORLD,req(4),ierr)
391.	         endif
392.	         if(taskid.eq.numtasks-1)then
393.	            cALL mpi_isend(cv(1,n+1),n1,MPI_DOUBLE_PRECISION,
394.	     1           0,1,MPI_COMM_WORLD,req(5),ierr)
395.	            cALL mpi_isend(z(1,n+1),n1,MPI_DOUBLE_PRECISION,
396.	     1           0,2,MPI_COMM_WORLD,req(6),ierr)
397.	         endif
398.	         if(taskid.eq.0)then
399.	            cALL mpi_isend(cu(1,1),n1,MPI_DOUBLE_PRECISION,
400.	     1           numtasks-1,3,MPI_COMM_WORLD,req(7),ierr)
401.	            cALL mpi_isend(h(1,1),n1,MPI_DOUBLE_PRECISION,
402.	     1           numtasks-1,4,MPI_COMM_WORLD,req(8),ierr)
403.	         endif
404.	         if(taskid.eq.0.or.taskid.eq.numtasks-1)then
405.	            cALL MPI_WAITALL(8,req,istat,ierr)
406.	         endif
407.	         if(taskid.eq.numtasks-1)then
408.	            cALL mpi_irecv(Cu(1,N+1),1,MPI_DOUBLE_PRECISION,
409.	     1           0,1,MPI_COMM_WORLD,req(1),ierr)
410.	            cALL mpi_irecv(H(M+1,N+1),1,MPI_DOUBLE_PRECISION,
411.	     1           0,2,MPI_COMM_WORLD,req(2),ierr)
412.	         endif
413.	         if(taskid.eq.0)then
414.	            cALL mpi_irecv(Cv(M+1,1),1,MPI_DOUBLE_PRECISION,
415.	     1           numtasks-1,3,MPI_COMM_WORLD,req(3),ierr)
416.	            cALL mpi_irecv(Z(1,1),1,MPI_DOUBLE_PRECISION,
417.	     1           numtasks-1,4,MPI_COMM_WORLD,req(4),ierr)
418.	         endif
419.	         if(taskid.eq.numtasks-1)then
420.	            cALL mpi_isend(Cv(1,N+1),1,MPI_DOUBLE_PRECISION,
421.	     1           0,3,MPI_COMM_WORLD,req(5),ierr)
422.	            cALL mpi_isend(z(m+1,n+1),1,MPI_DOUBLE_PRECISION,
423.	     1           0,4,MPI_COMM_WORLD,req(6),ierr)
424.	         endif
425.	         if(taskid.eq.0)then
426.	            cALL mpi_isend(Cu(M+1,1),1,MPI_DOUBLE_PRECISION,
427.	     1           numtasks-1,1,MPI_COMM_WORLD,req(7),ierr)
428.	            cALL mpi_isend(h(1,1),1,MPI_DOUBLE_PRECISION,
429.	     1           numtasks-1,2,MPI_COMM_WORLD,req(8),ierr)
430.	         endif
431.	         if(taskid.eq.0.or.taskid.eq.numtasks-1)then
432.	            cALL MPI_WAITALL(8,req,istat,ierr)
433.	         endif
434.	         
435.	C     COMPUTE NEW VALUES U,V AND P
436.	         TDTS8 = TDT/8.
437.	         TDTSDX = TDT/DX
438.	         TDTSDY = TDT/DY
439.	        call system_clock(count=c1,count_rate=r,count_max=max)
440.	        T200 = c1
441.	C---------------------------------------------------
442.	         if(taskid.lt.numtasks-1)then
443.	            cALL mpi_irecv(CV(1,jej+1),n1,MPI_DOUBLE_PRECISION,
444.	     1           taskid+1,1,MPI_COMM_WORLD,req(1),ierr)
445.	            cALL mpi_irecv(Z(1,jej+1),n1,MPI_DOUBLE_PRECISION,
446.	     1           taskid+1,2,MPI_COMM_WORLD,req(2),ierr)
447.	         endif
448.	         if(taskid.gt.0)then
449.	            cALL mpi_isend(CV(1,jsj),n1,MPI_DOUBLE_PRECISION,
450.	     1           taskid-1,1,MPI_COMM_WORLD,req(5),ierr)
451.	            cALL mpi_isend(Z(1,jsj),n1,MPI_DOUBLE_PRECISION,
452.	     1           taskid-1,2,MPI_COMM_WORLD,req(6),ierr)
453.	         endif
454.	         if(taskid.gt.0)then
455.	            cALL mpi_irecv(H(1,jsj-1),n1,MPI_DOUBLE_PRECISION,
456.	     1           taskid-1,3,MPI_COMM_WORLD,req(3),ierr)
457.	            cALL mpi_irecv(CU(1,jsj-1),n1,MPI_DOUBLE_PRECISION,
458.	     1           taskid-1,4,MPI_COMM_WORLD,req(4),ierr)
459.	        endif
460.	         if(taskid.lt.numtasks-1)then
461.	            cALL mpi_isend(H(1,jej),n1,MPI_DOUBLE_PRECISION,
462.	     1           taskid+1,3,MPI_COMM_WORLD,req(7),ierr)
463.	            cALL mpi_isend(CU(1,jej),n1,MPI_DOUBLE_PRECISION,
464.	     1           taskid+1,4,MPI_COMM_WORLD,req(8),ierr)
465.	         endif
466.	         cALL MPI_WAITALL(8,req,istat,ierr)
467.	C-------------------------------------------
468.	C     
469.	C     PERIODIC CONTINUATION
470.	C$OMP PARALLEL
471.	C$OMP&SHARED (TDTSDX,TDTS8,TDTSDY,M,N,UNEW,VNEW,PNEW,UOLD,VOLD,POLD)
472.	C$OMP&SHARED(CU,CV,Z,H,js,je,jsj,jej)
473.	C$OMP&PRIVATE (I,J)
474.	C$OMP DO
475.	         DO J=jsj,jej
476.	            DO I=1,M
477.	               UNEW(I+1,J) = UOLD(I+1,J)+
478.	     1              TDTS8*(Z(I+1,J+1)+Z(I+1,J))*(CV(I+1,J+1)+CV(I,J+1)+CV(I,J)
479.	     2              +CV(I+1,J))-TDTSDX*(H(I+1,J)-H(I,J))
480.	               if(j.gt.1)then
481.	                  VNEW(I,J) = VOLD(I,J)-TDTS8*(Z(I+1,J)+Z(I,J))
482.	     1                 *(CU(I+1,J)+CU(I,J)+CU(I,J-1)+CU(I+1,J-1))
483.	     2                 -TDTSDY*(H(I,J)-H(I,J-1))
484.	               endif
485.	               if(j.eq.n)then
486.	                  VNEW(I,J+1) = VOLD(I,J+1)-TDTS8*(Z(I+1,J+1)+Z(I,J+1))
487.	     1                 *(CU(I+1,J+1)+CU(I,J+1)+CU(I,J)+CU(I+1,J))
488.	     2                 -TDTSDY*(H(I,J+1)-H(I,J))
489.	               endif
490.	               PNEW(I,J) = POLD(I,J)-TDTSDX*(CU(I+1,J)-CU(I,J))
491.	     1              -TDTSDY*(CV(I,J+1)-CV(I,J))
492.	            end do
493.	         end do
494.	C$OMP END PARALLEL
495.	       call system_clock(count=c2, count_rate=r,count_max=max)
496.	       T200 = dble(c2 - T200)/dble(r)
497.	C-----------------------------------------------------
498.	C     
499.	C     PERIODIC CONTINUATION
400.	C     
501.	         DO J=1,N
502.	            UNEW(1,J) = UNEW(M+1,J)
503.	            PNEW(M+1,J) = PNEW(1,J)
504.	            if(j.gt.1)then
505.	               VNEW(M+1,J) = VNEW(1,J)
506.	            endif
507.	            if(j.eq.n)then
508.	               VNEW(M+1,J+1) = VNEW(1,J+1)
509.	            endif
510.	 210     end do
511.	         if(taskid.eq.0)then
512.	            cALL mpi_irecv(VNEW(1,1),n1,MPI_DOUBLE_PRECISION,
513.	     1           numtasks-1,1,MPI_COMM_WORLD,req(1),ierr)
514.	         endif
515.	         if(taskid.eq.numtasks-1)then
516.	            cALL mpi_irecv(UNEW(1,n+1),n1,MPI_DOUBLE_PRECISION,
517.	     1           0,2,MPI_COMM_WORLD,req(2),ierr)
518.	            cALL mpi_irecv(PNEW(1,n+1),n1,MPI_DOUBLE_PRECISION,
519.	     1           0,3,MPI_COMM_WORLD,req(3),ierr)
520.	         endif
521.	         if(taskid.eq.numtasks-1)then
522.	            cALL mpi_isend(VNEW(1,n+1),n1,MPI_DOUBLE_PRECISION,
523.	     1           0,1,MPI_COMM_WORLD,req(4),ierr)
524.	         endif
525.	         if(taskid.eq.0)then
526.	            cALL mpi_isend(UNEW(1,1),n1,MPI_DOUBLE_PRECISION,
527.	     1           numtasks-1,2,MPI_COMM_WORLD,req(5),ierr)
528.	            cALL mpi_isend(PNEW(1,1),n1,MPI_DOUBLE_PRECISION,
529.	     1           numtasks-1,3,MPI_COMM_WORLD,req(6),ierr)
530.	         endif
531.	         if(taskid.eq.0.or.taskid.eq.numtasks-1)then
532.	            cALL MPI_WAITALL(6,req,istat,ierr)
533.	         endif
534.	         if(taskid.eq.numtasks-1)then
535.	            cALL mpi_irecv(UNEW(1,N+1),1,MPI_DOUBLE_PRECISION,
536.	     1           0,2,MPI_COMM_WORLD,req(1),ierr)
537.	            cALL mpi_irecv(PNEW(M+1,N+1),1,MPI_DOUBLE_PRECISION,
538.	     1           0,3,MPI_COMM_WORLD,req(2),ierr)
539.	         endif
540.	         if(taskid.eq.0)then
541.	            cALL mpi_irecv(VNEW(M+1,1),1,MPI_DOUBLE_PRECISION,
542.	     1           numtasks-1,1,MPI_COMM_WORLD,req(3),ierr)
543.	         endif
544.	         if(taskid.eq.0)then
545.	            cALL mpi_isend(UNEW(M+1,1),1,MPI_DOUBLE_PRECISION,
546.	     1           numtasks-1,2,MPI_COMM_WORLD,req(4),ierr)
547.	            cALL mpi_isend(PNEW(1,1),1,MPI_DOUBLE_PRECISION,
548.	     1           numtasks-1,3,MPI_COMM_WORLD,req(5),ierr)
549.	         endif
550.	         if(taskid.eq.numtasks-1)then
551.	            cALL mpi_isend(VNEW(1,n+1),1,MPI_DOUBLE_PRECISION,
552.	     1           0,1,MPI_COMM_WORLD,req(6),ierr)
553.	         endif
554.	         if(taskid.eq.0.or.taskid.eq.numtasks-1)then
555.	            cALL MPI_WAITALL(6,req,istat,ierr)
556.	         endif
557.	C     
558.	         TIME = TIME + DT
559.	* gather and write results
560.	      call mpi_gatherv(pnew(jsj,jsj),chunksize,MPI_DOUBLE_PRECISION,
561.	     +     precv,irecvcounts,idisp,MPI_DOUBLE_PRECISION,
562.	     +     master,MPI_COMM_WORLD,ierr)
563.	      
564.	      call mpi_gatherv(unew(jsj,jsj),chunksize,MPI_DOUBLE_PRECISION,
565.	     +     urecv,irecvcounts,idisp,MPI_DOUBLE_PRECISION,
566.	     +     master,MPI_COMM_WORLD,ierr)
567.	      
568.	      call mpi_gatherv(vnew(jsj,jsj),chunksize,MPI_DOUBLE_PRECISION,
569.	     +     vrecv,irecvcounts,idisp,MPI_DOUBLE_PRECISION,
570.	     +     master,MPI_COMM_WORLD,ierr)
571.	
572.	         PTIME = TIME/3600.
573.	         if(taskid .eq. MASTER) then
574.	            IF(MOD(NCYCLE,MPRINT) .EQ. 0) then
575.	               WRITE(6,355) (PRECV(I,I),I=1,MNMIN)
576.	 355           FORMAT(/' DIAGONAL ELEMENTS OF P ', //(8E15.7))
577.	               WRITE(6,360) (URECV(I,I),I=1,MNMIN)
578.	 360           FORMAT(/' DIAGONAL ELEMENTS OF U ', //(8E15.7))
579.	               WRITE(6,365) (VRECV(I,I),I=1,MNMIN)
580.	 365           FORMAT(/' DIAGONAL ELEMENTS OF V ', //(8E15.7))
581.	
582.	C     
583.	         MFS310 = 24.*M*N/T310/1.D5
584.	         MFS100 = 24.*M*N/T100/1.D5
585.	         MFS200 = 26.*M*N/T200/1.D5
586.	         MFS300 = 15.*M*N/T300/1.D5
587.	         call system_clock(count=c2,count_rate=r,count_max=max)
588.	         CTIME = dble(c2 - TSTART)/dble(r)
589.	         TCYC = CTIME/FLOAT(NCYCLE)
590.	         WRITE(6,375) NCYCLE,CTIME,TCYC
591.	 375      FORMAT(' CYCLE NUMBER', I5,' TOTAL COMPUTER TIME', D15.6,
592.	     +          'TIME PER CYCLE', D15.6)
593.           endif
594.           istart(3) = istart(3) + 1
595.	*     shape of record to be written (one ncycle at a time)
596.	      call my_ncwrite(ncid,p_id,istart,icount,precv,N1,N2)
597.	      call my_ncwrite(ncid,u_id,istart,icount,urecv,N1,N2)
598.	      call my_ncwrite(ncid,v_id,istart,icount,vrecv,N1,N2)
599.	
600.	c this endif is for whether task 0 is writing the output
601.         endif
602.	 
603.
604.	   
605.	C     
606.	C     TIME SMOOTHING AND UPDATE FOR NEXT CYCLE
607.	C     
608.	         IF(NCYCLE .EQ. 1) THEN
609.	C     TIME SMOOTHER FOR FIRST ITERATION
610.	C     
611.	            TDT = TDT+TDT
612.	            call system_clock(count=c1, count_rate=r,count_max=max)
613.	            T310 = c1
614.	            DO J=jsj,jej
615.	               DO I=1,MP1
616.	                  UOLD(I,J) = U(I,J)
617.	                  VOLD(I,J) = V(I,J)
618.	                  POLD(I,J) = P(I,J)
619.	                  U(I,J)    = UNEW(I,J)
620.	                  V(I,J)    = VNEW(I,J)
621.	                  P(I,J)    = PNEW(I,J)
622.	                  if(j.eq.n)then
623.	                     UOLD(I,J+1) = U(I,J+1)
624.	                     VOLD(I,J+1) = V(I,J+1)
625.	                     POLD(I,J+1) = P(I,J+1)
626.	                     U(I,J+1)    = UNEW(I,J+1)
627.	                     V(I,J+1)    = VNEW(I,J+1)
628.	                     P(I,J+1)    = PNEW(I,J+1)
629.	                  endif
630.	               end do
631.	 400        end do
632.	            call system_clock(count=c2, count_rate=r, count_max=max)
633.	            T310 = dble(c2 - T310)/dble(r)
634.	         ELSE
635.	           call system_clock(count=c1,count_rate=r,count_max=max)
636.	           T300 = c1
637.	C     
638.	C     TIME SMOOTHING AND UPDATE FOR NEXT CYCLE
639.	C     
640.	C$OMP PARALLEL
641.	C$OMP&SHARED (ALPHA,M,N,U,V,P,UNEW,VNEW,PNEW,UOLD,VOLD,POLD)
642.	C$OMP&SHARED (JS,JE,jsj,jej)
643.	C$OMP&PRIVATE (I,J)
644.	            I = 30+omp_get_thread_num()
645.	C$OMP DO
646.	            DO J=jsj,jej
647.	               DO I=1,M
648.	                  UOLD(I,J) = U(I,J)+ALPHA*(UNEW(I,J)-2.*U(I,J)+UOLD(I,J))
649.	                  VOLD(I,J) = V(I,J)+ALPHA*(VNEW(I,J)-2.*V(I,J)+VOLD(I,J))
650.	                  POLD(I,J) = P(I,J)+ALPHA*(PNEW(I,J)-2.*P(I,J)+POLD(I,J))
651.	                  U(I,J) = UNEW(I,J)
652.	                  V(I,J) = VNEW(I,J)
653.	                  P(I,J) = PNEW(I,J)
654.	               end do
655.	 300        end do
656.	C$OMP END PARALLEL
657.	         call system_clock(count=c2,count_rate=r, count_max=max)
658.	         T300 = dble(c2 - T300)/dble(r)
659.	C-------------------------------------------
660.	C
661.	C     PERIODIC CONTINUATION
662.	C
663.	C----------------------------------------------
664.	            DO J=1,N
665.	               UOLD(M+1,J) = UOLD(1,J)
666.	               VOLD(M+1,J) = VOLD(1,J)
667.	               POLD(M+1,J) = POLD(1,J)
668.	               U(M+1,J) = U(1,J)
669.	               V(M+1,J) = V(1,J)
670.	               P(M+1,J) = P(1,J)
671.	 320        end do
672.	C------------------------------------------------------
673.	            if(taskid.eq.numtasks-1)then                        
674.	               cALL mpi_irecv(v(1,n+1),n1,MPI_DOUBLE_PRECISION,     
675.	     1              0,1,MPI_COMM_WORLD,req(1),ierr)
676.	               cALL mpi_irecv(u(1,n+1),n1,MPI_DOUBLE_PRECISION,     
677.	     1              0,2,MPI_COMM_WORLD,req(2),ierr)         
678.	               cALL mpi_irecv(p(1,n+1),n1,MPI_DOUBLE_PRECISION,     
679.	     1              0,3,MPI_COMM_WORLD,req(3),ierr)         
680.	               cALL mpi_irecv(vold(1,n+1),n1,MPI_DOUBLE_PRECISION,     
681.	     1              0,4,MPI_COMM_WORLD,req(4),ierr)
682.	               cALL mpi_irecv(uold(1,n+1),n1,MPI_DOUBLE_PRECISION,     
683.	     1              0,5,MPI_COMM_WORLD,req(5),ierr)         
684.	               cALL mpi_irecv(pold(1,n+1),n1,MPI_DOUBLE_PRECISION,     
685.	     1              0,6,MPI_COMM_WORLD,req(6),ierr)         
686.	            endif
687.	            if(taskid.eq.0)then
688.	               cALL mpi_isend(v(1,1),n1,MPI_DOUBLE_PRECISION,      
689.	     1              numtasks-1,1,MPI_COMM_WORLD,req(7),ierr)                        
690.	               cALL mpi_isend(u(1,1),n1,MPI_DOUBLE_PRECISION,      
691.	     1              numtasks-1,2,MPI_COMM_WORLD,req(8),ierr)
692.	               cALL mpi_isend(p(1,1),n1,MPI_DOUBLE_PRECISION,      
693.	     1              numtasks-1,3,MPI_COMM_WORLD,req(9),ierr)
694.	               cALL mpi_isend(vold(1,1),n1,MPI_DOUBLE_PRECISION,      
695.	     1              numtasks-1,4,MPI_COMM_WORLD,req(10),ierr)                        
696.	               cALL mpi_isend(uold(1,1),n1,MPI_DOUBLE_PRECISION,      
697.	     1              numtasks-1,5,MPI_COMM_WORLD,req(11),ierr)
698.	               cALL mpi_isend(pold(1,1),n1,MPI_DOUBLE_PRECISION,      
699.	     1              numtasks-1,6,MPI_COMM_WORLD,req(12),ierr)
700.	            endif 
701.	            if(taskid.eq.0.or.taskid.eq.numtasks-1)then
702.	               cALL MPI_WAITALL(12,req,istat,ierr)
703.	            endif
704.	            if(taskid.eq.numtasks-1)then                        
705.	               cALL mpi_irecv(v(m+1,n+1),1,MPI_DOUBLE_PRECISION,     
706.	     1              0,1,MPI_COMM_WORLD,req(1),ierr)
707.	               cALL mpi_irecv(u(m+1,n+1),1,MPI_DOUBLE_PRECISION,     
708.	     1              0,2,MPI_COMM_WORLD,req(2),ierr)         
709.	               cALL mpi_irecv(p(m+1,n+1),1,MPI_DOUBLE_PRECISION,     
710.	     1              0,3,MPI_COMM_WORLD,req(3),ierr)         
711.	               cALL mpi_irecv(vold(m+1,n+1),1,MPI_DOUBLE_PRECISION,     
712.	     1              0,4,MPI_COMM_WORLD,req(4),ierr)
713.	               cALL mpi_irecv(uold(m+1,n+1),1,MPI_DOUBLE_PRECISION,     
714.	     1              0,5,MPI_COMM_WORLD,req(5),ierr)         
715.	               cALL mpi_irecv(pold(m+1,n+1),1,MPI_DOUBLE_PRECISION,     
716.	     1              0,6,MPI_COMM_WORLD,req(6),ierr)         
717.	            endif
718.	            if(taskid.eq.0)then
719.	               cALL mpi_isend(v(1,1),1,MPI_DOUBLE_PRECISION,      
720.	     1              numtasks-1,1,MPI_COMM_WORLD,req(7),ierr)                        
721.	               cALL mpi_isend(u(1,1),1,MPI_DOUBLE_PRECISION,      
722.	     1              numtasks-1,2,MPI_COMM_WORLD,req(8),ierr)
723.	               cALL mpi_isend(p(1,1),1,MPI_DOUBLE_PRECISION,      
724.	     1              numtasks-1,3,MPI_COMM_WORLD,req(9),ierr)
725.	               cALL mpi_isend(vold(1,1),1,MPI_DOUBLE_PRECISION,      
726.	     1              numtasks-1,4,MPI_COMM_WORLD,req(10),ierr)                        
727.	               cALL mpi_isend(uold(1,1),1,MPI_DOUBLE_PRECISION,      
728.	     1              numtasks-1,5,MPI_COMM_WORLD,req(11),ierr)
729.	               cALL mpi_isend(pold(1,1),1,MPI_DOUBLE_PRECISION,      
730.	     1              numtasks-1,6,MPI_COMM_WORLD,req(12),ierr)
731.	            endif
732.	            if(taskid.eq.0.or.taskid.eq.numtasks-1)then
733.	               cALL MPI_WAITALL(12,req,istat,ierr)
734.	            endif
735.	         endif
736.	C     
737.	*     end big loop
738.	      end do
739.	         
740.	*     close the netCDF file
741.	      iret = nf_close(ncid)
742.	      call check_err(iret)
743.	
744.	      CALL MPI_FINALIZE(ierr)
745.	      STOP
746.	      end
747.	*-------netcdf helper routines----------------------
748.	      subroutine check_err(iret)
749.	      integer iret
750.	      include 'netcdf.inc'
751.	      if(iret .ne. NF_NOERR) then
752.	         print *, nf_strerror(iret)
753.	         stop
754.	      endif
755.	      end
756.	      
757.	      subroutine netcdf_setup(N1,N2,ncid,p_id,u_id,v_id,istart,icount)
758.	*     Input args: N1, N2
759.	*     Output args: ncid, p_id,u_id,v_id,istart,icount)
760.	      integer N1,N2
761.	*     declarations for netCDF library
762.	      include 'netcdf.inc'
763.	*     error status return
764.	      integer iret
765.	*     netCDF id
766.	      integer ncid
767.	*     dimension ids
768.	      integer m_dim
769.	      integer n_dim
770.	      integer time_dim      
771.	*     variable ids
772.	      integer p_id
773.	      integer u_id
774.	      integer v_id
775.	*     rank (number of dimensions) for each variable
776.	      integer p_rank, u_rank, v_rank
777.	      parameter (p_rank = 3)
778.	      parameter (u_rank = 3)
779.	      parameter (v_rank = 3)
780.	*     variable shapes
781.	      integer p_dims(p_rank)
782.	      integer u_dims(u_rank)
783.	      integer v_dims(v_rank)
784.	      integer istart(p_rank)
785.	      integer icount(p_rank)
786.	      
787.	*     enter define mode
788.	      iret = nf_create('shallowdat.nc', NF_CLOBBER,
789.	     +     ncid)
790.	      call check_err(iret)
791.	*     define dimensions
792.	      iret = nf_def_dim(ncid, 'm', N1, m_dim)
793.	      call check_err(iret)
794.	      iret = nf_def_dim(ncid, 'n', N2, n_dim)
795.	      call check_err(iret)
796.	      iret = nf_def_dim(ncid, 'time', NF_UNLIMITED, time_dim)
797.	      call check_err(iret)
798.	*     define variables
799.	      p_dims(1) = m_dim
800.	      p_dims(2) = n_dim
801.	      p_dims(3) = time_dim
802.	      iret = nf_def_var(ncid, 'p', NF_DOUBLE, p_rank, p_dims, p_id)
803.	      call check_err(iret)
804.	      u_dims(1) = m_dim
805.	      u_dims(2) = n_dim
806.	      u_dims(3) = time_dim
807.	      iret = nf_def_var(ncid, 'u', NF_DOUBLE, u_rank, u_dims, u_id)
808.	      call check_err(iret)
809.	      v_dims(1) = m_dim
810.	      v_dims(2) = n_dim
811.	      v_dims(3) = time_dim
812.	      iret = nf_def_var(ncid, 'v', NF_DOUBLE, v_rank, v_dims, v_id)
813.	      call check_err(iret)
814.	*     start netCDF write at the first (1,1,1) position of the array
815.	      istart(1) = 1
816.	      istart(2) = 1
817.	      istart(3) = 1
818.	*     shape of record to be written (one ncycle at a time)
819.	      icount(1) = N1
820.	      icount(2) = N2
821.	      icount(3) = 1
822.	      
823.	*     leave define mode
824.	      iret = nf_enddef(ncid)
825.	      call check_err(iret)
826.	      
827.	*     end of netCDF definitions
828.	      end subroutine netcdf_setup
829.	      
830.	      subroutine my_ncwrite(id,varid,istart,icount,var,N1,N2)
831.	*     Input args: id, varid,istart,icount,var,N1,N2
832.	*     Write a whole array out to the netCDF file
833.	      integer id,varid,iret
834.	      integer icount(3)
835.	      integer istart(3)
836.	      real var (N1,N2)
837.	      iret = nf_put_vara_double(id,varid,istart,icount,var)
838.	      call check_err(iret)
839.	      
840.	      end subroutine my_ncwrite

If you have questions about this document, please contact SCD Customer Support. You can also reach us by telephone 24 hours a day, seven days a week at 303-497-1278. Additional contact methods: consult1@ucar.edu and during business hours in NCAR Mesa Lab Suite 39.

© Copyright 2006. University Corporation for Atmospheric Research (UCAR). All Rights Reserved.