|
|
Example 7 - shallow7.F - Advanced hybrid parallelization example.Download shallow7.F code.
1. PROGRAM SHALOW
2. C BENCHMARK WEATHER PREDICTION PROGRAM FOR COMPARING THE
3. C PREFORMANCE OF CURRENT SUPERCOMPUTERS. THE MODEL IS
4. C BASED OF THE PAPER - THE DYNAMICS OF FINITE-DIFFERENCE
5. C MODELS OF THE SHALLOW-WATER EQUATIONS, BY ROBERT SADOURNY
6. C J. ATM. SCIENCES, VOL 32, NO 4, APRIL 1975.
7. C
8. C CODE BY PAUL N. SWARZTRAUBER, NATIONAL CENTER FOR
9. C ATMOSPHERIC RESEARCH, BOULDER, CO, OCTOBER 1984.
10. C
11. C MODIFIED BY R. K. SATO, NCAR, APRIL 7, 1986 FOR MULTITASKING.
12. * Modified by Juliana Rew, May 2006.
13. * This version of the model, shallow7.F, can be run in parallel
14. * on a variable number of processors rather than just two.
15. * The goal is to obtain results in less total wallclock time
16. * Initial and calculated values
17. * of U, V, and P are written to a netCDF file
18. * for later use in visualizing the results. The netCDF data
19. * management library is freely available from
20. * http://www.unidata.ucar.edu/software/netcdf
21. * OpenMP parallel dos are used in the major worker
22. * loops.
23. * The modified code uses portable intrinsic Fortran 90 timing routines
24. * This can be compiled on the IBM SP using:
25. * mpxlf90_r -qmaxmem=-1 -g -o shallow7 -qsmp=omp -qfixed=132 -qsclk=micro \
26. * -I/usr/local/include shallow7.F -L/usr/local/lib64/r4i4 -l netcdf
27. * where mpxlf90_r is the variant of XLF on the IBM that brings in
28. * the MPI library, the -L and -I point to local installation of netCDF,
29. * and -qsmp=omp is XLF's option for OpenMP threading
30. * The MPI gathers and printout of results were commented out
31. * to make timings, as well as netCDF file writes. This is because
32. * writing to a file must be done serially to avoid each task clobbering
33. * the results of the other tasks. A future enhancement
34. * would be to parallelize writing of the results to the data file.
35.
36. integer N1,N2
37. PARAMETER (N1=65, N2=65)
38. real, dimension(N1,N2) :: U, V, P,UNEW, VNEW,PNEW, UOLD,VOLD,
39. + POLD,CU, CV,Z, H, PSI
40. C
41. integer ncid,p_id,u_id,v_id
42. integer istart(3)
43. integer icount(3)
44.
45. * Prepare netCDF file to receive model output data
46. * File writing will be done by MPI task 0, not in parallel
47.
48. include 'netcdf.inc'
49.
50. include "mpif.h"
51. integer js,je,jsj,jej,taskid,numtasks,req(16),
52. 1 istat(MPI_STATUS_SIZE,16)
53.
54. integer m,n
55. integer size
56. double precision MFS100, MFS200,MFS300
57. double precision T100,T200,T300,TSTART,CTIME,TCYC
58. integer c1,c2,r,max
59. C
60. real DT,TDT,DX,DY,A,ALPHA,EL,PI,TPI,DI,DJ,PCF
61. integer MASTER, itmax,mprint,mp1,np1
62. parameter (MASTER=0)
63. integer ierr, i,
64. & chunksize, extra
65. integer status(3)
66.
67. ******receive buffers for MPI gather calls of p,u,v values
68. real, dimension(:,:),allocatable:: precv, urecv,vrecv
69. ******variables to be used in gather before printing results
70. integer, allocatable:: idisp(:),irecvcounts(:),mychunk(:)
71. ** set up netcdf file
72. call netcdf_setup(N1,N2,ncid,p_id,u_id,v_id,istart,icount)
73. ******initialize MPI and get number of tasks working on this.
74. cALL MPI_INIT( ierr )
75.
76. cALL MPI_COMM_RANK( MPI_COMM_WORLD, taskid, ierr )
77. cALL MPI_COMM_SIZE( MPI_COMM_WORLD, numtasks, ierr )
78. C
79. C Initializing array req with MPI_REQUEST_NULL
80. C
81. do i=1,16
82. req(i) = MPI_REQUEST_NULL
83. end do
84. m=n2-1
85. n=n1-1
86. C
87. C INITIALIZE CONSTANTS AND ARRAYS
88. C
89. C NOTE BELOW THAT TWO DELTA T (TDT) IS SET TO DT ON THE FIRST
90. C CYCLE AFTER WHICH IT IS RESET TO DT+DT.
91. C
92. DT = 90.
93.
94. DX = 1.E5
95. DY = 1.E5
96.
97. A = 1.E6
98. ALPHA = .001
99. ITMAX = 4000
100. MPRINT = 100
101. M = N1 - 1
102. N = N2 - 1
103. TDT = DT
104. MP1 = M+1
105. NP1 = N+1
106. EL = FLOAT(N)*DX
107. PI = 4.*ATAN(1.)
108. TPI = PI+PI
109. DI = TPI/FLOAT(M)
110. DJ = TPI/FLOAT(N)
111. PCF = PI*PI*A*A/(EL*EL)
112. js = 1 + (N/numtasks+1)*taskid
113. je = min(n2,js+N/numtasks)
114. * index for the gather
115. jsj = 1 + (N/numtasks)*taskid
116. jej = min(n2,jsj + N/numtasks)
117. C
118. C INITIAL VALUES OF THE STREAM FUNCTION AND P
119. C
120 DO J=jsj,jej
121. DO I=1,mp1
122. PSI(I,J) = A*SIN((FLOAT(I)-.5)*DI)*SIN((FLOAT(J)-.5)*DJ)
123. P(I,J) = PCF*(COS(2.*FLOAT(I-1)*DI)
124. 1 +COS(2.*FLOAT(J-1)*DJ))+50000.
125. if(j.eq.n)then
126. P(I,J+1) = PCF*(COS(2.*FLOAT(I-1)*DI)
127. 1 +COS(2.*FLOAT(J)*DJ))+50000.
128. endif
129. end do
130. 50 end do
131. C
132. C INITIALIZE VELOCITIES
133. C
134. if(taskid.lt.numtasks-1)then
135. cALL mpi_irecv(psi(1,jej+1),n1,MPI_DOUBLE_PRECISION,
136. 1 taskid+1,1,MPI_COMM_WORLD,req(1),ierr)
137. endif
138. if(taskid.gt.0)then
139. cALL mpi_isend(psi(1,jsj),n1,MPI_DOUBLE_PRECISION,
140. 1 taskid-1,1,MPI_COMM_WORLD,req(2),ierr)
141. endif
142. cALL MPI_WAITALL(2,req,istat,ierr)
143. DO J=jsj,jej
144. DO I=1,M
145. if(i.eq.m) then
146. U(I+1,J) = -(PSI(I+1,J+1)-PSI(I+1,J))/DY
147. else
148. U(I,J) = -(PSI(I+1,J)-PSI(I,J))/DY
149. endif
150. if(j.gt.1)then
151. V(I,J) = (PSI(I+1,J)-PSI(I,J))/DX
152. endif
153. if(j.eq.n)then
154. V(I,J+1) = (PSI(I+1,J+1)-PSI(I,J+1))/DX
155. endif
156. end do
157. 60 end do
158. C
159. C PERIODIC CONTINUATION for U and V
160. C
161. DO J=1,N
162. U(1,J) = U(M+1,J)
163.
164. V(M+1,J) = V(1,J)
165. if(j.eq.n)then
166. V(M+1,J+1) = V(1,J+1)
167. endif
168. 70 end do
169. if(taskid.eq.0)then
170. cALL mpi_irecv(v(1,1),n1,MPI_DOUBLE_PRECISION,
171. 1 numtasks-1,1,MPI_COMM_WORLD,req(1),ierr)
172. endif
173. if(taskid.eq.numtasks-1)then
174. cALL mpi_irecv(u(1,n+1),n1,MPI_DOUBLE_PRECISION,
175. 1 0,2,MPI_COMM_WORLD,req(2),ierr)
176. endif
177. if(taskid.eq.numtasks-1)then
178. cALL mpi_isend(v(1,n+1),n1,MPI_DOUBLE_PRECISION,
179. 1 0,1,MPI_COMM_WORLD,req(3),ierr)
180. endif
181. if(taskid.eq.0)then
182. cALL mpi_isend(u(1,1),n1,MPI_DOUBLE_PRECISION,
183. 1 numtasks-1,2,MPI_COMM_WORLD,req(4),ierr)
184. endif
185. if(taskid.eq.0.or.taskid.eq.numtasks-1)then
186. cALL MPI_WAITALL(4,req,istat,ierr)
187. endif
188. if(taskid.eq.0)then
189. cALL mpi_irecv(v(m+1,1),1,MPI_DOUBLE_PRECISION,
190. 1 numtasks-1,1,MPI_COMM_WORLD,req(1),ierr)
191. endif
192. if(taskid.eq.numtasks-1)then
193. cALL mpi_irecv(u(1,n+1),1,MPI_DOUBLE_PRECISION,
194. 1 0,2,MPI_COMM_WORLD,req(2),ierr)
195. endif
196. if(taskid.eq.numtasks-1)then
197. cALL mpi_isend(v(1,n+1),1,MPI_DOUBLE_PRECISION,
198. 1 0,1,MPI_COMM_WORLD,req(3),ierr)
199. endif
200. if(taskid.eq.0)then
201. cALL mpi_isend(u(M+1,1),1,MPI_DOUBLE_PRECISION,
202. 1 numtasks-1,2,MPI_COMM_WORLD,req(4),ierr)
203. endif
204. if(taskid.eq.0.or.taskid.eq.numtasks-1)then
205. cALL MPI_WAITALL(4,req,istat,ierr)
206. endif
207. DO J=jsj,jej
208. DO I=1,mp1
209. UOLD(I,J) = U(I,J)
210. VOLD(I,J) = V(I,J)
211. POLD(I,J) = P(I,J)
212. if(j.eq.n)then
213. UOLD(I,J+1) = U(I,J+1)
214. VOLD(I,J+1) = V(I,J+1)
215. POLD(I,J+1) = P(I,J+1)
216. endif
217. end do
218. 86 end do
219. C END OF INITIALIZATION
220. C
221.
222. C PRINT INITIAL VALUES
223. C
224. * Each task has been working on 1/numtasks of the values. Printing
225. * will require gathering the values from each task.
226. * Gathering is also necessary if we want to create images
227. * from the data.
228. * Gathering is expensive (similar to doing I/O), so if you
229. * are "benchmarking" (seeing how fast the model performs), the
230. * gathering and printing should be commented out.
231. *
232. * The pold, uold, and vold arrays we wish to print are dimensioned
233. * 65x65, not evenly divisible. Each task's chunks will be approximately
234. * equal in size, but we will give any leftover to the final task
235.
236. extra=mod(n1*n2,numtasks)
237. if(taskid.eq. numtasks -1) then
238. chunksize=n1*n2/numtasks + extra
239. else
240. chunksize=n1*n2/numtasks
241. endif
242. allocate (mychunk(numtasks))
243. mychunk = chunksize
244.
245. * We then specify in the MPI gather call how big the chunks
246. * are and the displacement between chunks.
247. * Counts and displacements are only required on master
248. * gatherv is called by all the tasks,however
249. if(taskid == master)then
250. allocate(irecvcounts(0:numtasks-1))
251. allocate(idisp(0:numtasks-1))
252. endif
253. * gather the counts to the master
254.
255. call MPI_Gather(mychunk(1),1,MPI_INTEGER,
256. + irecvcounts,1, MPI_INTEGER,
257. + master, MPI_COMM_WORLD, ierr)
258. * calculate displacements and size of receive buffer
259. * (urecv array)
260. if(taskid == master) then
261. idisp(0) = 0
262. do i=1,numtasks-1,1
263. idisp(i)=irecvcounts(i-1)+idisp(i-1)
264. end do
265. * allocate receive buffers
266. allocate(precv(n1,n2))
267. allocate(urecv(n1,n2))
268. allocate(vrecv(n1,n2))
269. endif
270.
271. call mpi_gatherv(pold(jsj,jsj),chunksize,MPI_DOUBLE_PRECISION,
272. + precv,irecvcounts,idisp,MPI_DOUBLE_PRECISION,
273. + master,MPI_COMM_WORLD,ierr)
274.
275. call mpi_gatherv(uold(jsj,jsj),chunksize,MPI_DOUBLE_PRECISION,
276. + urecv,irecvcounts,idisp,MPI_DOUBLE_PRECISION,
277. + master,MPI_COMM_WORLD,ierr)
278.
279. call mpi_gatherv(vold(jsj,jsj),chunksize,MPI_DOUBLE_PRECISION,
280. + vrecv,irecvcounts,idisp,MPI_DOUBLE_PRECISION,
281. + master,MPI_COMM_WORLD,ierr)
282.
283. if(taskid.eq.MASTER)then
284. WRITE(6,390) N,M,DX,DY,DT,ALPHA,ITMAX
285. 390 FORMAT(' NUMBER OF POINTS IN THE X DIRECTION', I8/
286. 1 ' NUMBER OF POINTS IN THE Y DIRECTION', I8/
287. 2 ' GRID SPACING IN THE X DIRECTION ', F8.0/
288. 3 ' GRID SPACING IN THE Y DIRECTION ', F8.0/
289. 4 ' TIME STEP ', F8.0/
290. 5 ' TIME FILTER PARAMETER ', F8.3/
291. 6 ' NUMBER OF ITERATIONS ', I8)
292. endif
293.
294. MNMIN = MIN0(M,N)
295. if(taskid.eq.master) then
296. WRITE(6,391) (PRECV(I,I),I=1,MNMIN)
297. 391 FORMAT(/' INITIAL DIAGONAL ELEMENTS OF P ', //(8E15.7))
298. WRITE(6,392) (URECV(I,I),I=1,MNMIN)
299. 392 FORMAT(/' INITIAL DIAGONAL ELEMENTS OF U ', //(8E15.7))
200. WRITE(6,393) (VRECV(I,I),I=1,MNMIN)
301. 393 FORMAT(/' INITIAL DIAGONAL ELEMENTS OF V ', //(8E15.7))
302. endif
303.
304. * write initial values of p, u, and v into a netCDF file
305. if(taskid.eq.MASTER) then
306. call my_ncwrite(ncid,p_id,istart,icount,precv,N1,N2)
307. call my_ncwrite(ncid,u_id,istart,icount,urecv,N1,N2)
308. call my_ncwrite(ncid,v_id,istart,icount,vrecv,N1,N2)
309. endif
310. TIME = 0
311. NCYCLE = 0
312. call system_clock (count=c1, count_rate=r, count_max=max)
313. TSTART = c1
314. T300 = 1.
315. do NCYCLE = 1,itmax
316. C
317. C COMPUTE CAPITAL U, CAPITAL V, Z AND H
318. C
319. FSDX = 4./DX
320. FSDY = 4./DY
321. C - ------------------------------------------------
322. if(taskid.lt.numtasks-1)then
323. cALL mpi_irecv(V(1,je+1),n1,MPI_DOUBLE_PRECISION,
324. 1 taskid+1,1,MPI_COMM_WORLD,req(1),ierr)
325.
326. endif
327. if(taskid.gt.0)then
328. cALL mpi_isend(V(1,js),n1,MPI_DOUBLE_PRECISION,
329. 1 taskid-1,1,MPI_COMM_WORLD,req(4),ierr)
330. endif
331. if(taskid.gt.0)then
332. cALL mpi_irecv(U(1,js-1),n1,MPI_DOUBLE_PRECISION,
333. 1 taskid-1,2,MPI_COMM_WORLD,req(2),ierr)
334. cALL mpi_irecv(P(1,jsj-1),n1,MPI_DOUBLE_PRECISION,
335. 1 taskid-1,3,MPI_COMM_WORLD,req(3),ierr)
336. endif
337. if(taskid.lt.numtasks-1)then
338. cALL mpi_isend(U(1,je),n1,MPI_DOUBLE_PRECISION,
339. 1 taskid+1,2,MPI_COMM_WORLD,req(5),ierr)
340. cALL mpi_isend(P(1,jej),n1,MPI_DOUBLE_PRECISION,
341. 1 taskid+1,3,MPI_COMM_WORLD,req(6),ierr)
342. endif
343. cALL MPI_WAITALL(6,req,istat,ierr)
344. C$OMP PARALLELDO
345. C$OMP&SHARED (FSDY,FSDX,M,N,U,V,P,CU,CV,Z,H,js,je,jsj,jej)
346. C$OMP&PRIVATE (I,J)
347. DO J=jsj,jej
348. DO I=1,M
349. CU(I+1,J) = .5*(P(I+1,J)+P(I,J))*U(I+1,J)
350. if(j.gt.1)then
351. CV(I,J) = .5*(P(I,J)+P(I,J-1))*V(I,J)
352. Z(I+1,J) = (FSDX*(V(I+1,J)-V(I,J))-FSDY*(U(I+1,J)
353. 1 -U(I+1,J-1)))/(P(I,J-1)+P(I+1,J-1)+P(I+1,J)+P(I,J))
354. endif
355. if(j.eq.n)then
356. CV(I,J+1) = .5*(P(I,J+1)+P(I,J))*V(I,J+1)
357. Z(I+1,J+1) = (FSDX*(V(I+1,J+1)-V(I,J+1))-FSDY*(U(I+1,J+1)
358. 1 -U(I+1,J)))/(P(I,J)+P(I+1,J)+P(I+1,J+1)+P(I,J+1))
359. endif
360. H(I,J) = P(I,J)+.25*(U(I+1,J)*U(I+1,J)+U(I,J)*U(I,J)
361. 1 +V(I,J+1)*V(I,J+1)+V(I,J)*V(I,J))
362. end do
363. 100 end do
364. C - -------------------------------------------
365. C
366. C PERIODIC CONTINUATION
367. C
368. DO J=js,je
369. CU(1,J) = CU(M+1,J)
370. H(M+1,J) = H(1,J)
371. if(j.gt.1)then
372. Z(1,J) = Z(M+1,J)
373. CV(M+1,J) = CV(1,J)
374. endif
375. if(j.eq.n)then
376. Z(1,J+1) = Z(M+1,J+1)
377. CV(M+1,J+1) = CV(1,J+1)
378. endif
379. 110 end do
380. if(taskid.eq.0)then
381. cALL mpi_irecv(cv(1,1),n1,MPI_DOUBLE_PRECISION,
382. 1 numtasks-1,1,MPI_COMM_WORLD,req(1),ierr)
383. cALL mpi_irecv(z(1,1),n1,MPI_DOUBLE_PRECISION,
384. 1 numtasks-1,2,MPI_COMM_WORLD,req(2),ierr)
385. endif
386. if(taskid.eq.numtasks-1)then
387. cALL mpi_irecv(cu(1,n+1),n1,MPI_DOUBLE_PRECISION,
388. 1 0,3,MPI_COMM_WORLD,req(3),ierr)
389. cALL mpi_irecv(h(1,n+1),n1,MPI_DOUBLE_PRECISION,
390. 1 0,4,MPI_COMM_WORLD,req(4),ierr)
391. endif
392. if(taskid.eq.numtasks-1)then
393. cALL mpi_isend(cv(1,n+1),n1,MPI_DOUBLE_PRECISION,
394. 1 0,1,MPI_COMM_WORLD,req(5),ierr)
395. cALL mpi_isend(z(1,n+1),n1,MPI_DOUBLE_PRECISION,
396. 1 0,2,MPI_COMM_WORLD,req(6),ierr)
397. endif
398. if(taskid.eq.0)then
399. cALL mpi_isend(cu(1,1),n1,MPI_DOUBLE_PRECISION,
400. 1 numtasks-1,3,MPI_COMM_WORLD,req(7),ierr)
401. cALL mpi_isend(h(1,1),n1,MPI_DOUBLE_PRECISION,
402. 1 numtasks-1,4,MPI_COMM_WORLD,req(8),ierr)
403. endif
404. if(taskid.eq.0.or.taskid.eq.numtasks-1)then
405. cALL MPI_WAITALL(8,req,istat,ierr)
406. endif
407. if(taskid.eq.numtasks-1)then
408. cALL mpi_irecv(Cu(1,N+1),1,MPI_DOUBLE_PRECISION,
409. 1 0,1,MPI_COMM_WORLD,req(1),ierr)
410. cALL mpi_irecv(H(M+1,N+1),1,MPI_DOUBLE_PRECISION,
411. 1 0,2,MPI_COMM_WORLD,req(2),ierr)
412. endif
413. if(taskid.eq.0)then
414. cALL mpi_irecv(Cv(M+1,1),1,MPI_DOUBLE_PRECISION,
415. 1 numtasks-1,3,MPI_COMM_WORLD,req(3),ierr)
416. cALL mpi_irecv(Z(1,1),1,MPI_DOUBLE_PRECISION,
417. 1 numtasks-1,4,MPI_COMM_WORLD,req(4),ierr)
418. endif
419. if(taskid.eq.numtasks-1)then
420. cALL mpi_isend(Cv(1,N+1),1,MPI_DOUBLE_PRECISION,
421. 1 0,3,MPI_COMM_WORLD,req(5),ierr)
422. cALL mpi_isend(z(m+1,n+1),1,MPI_DOUBLE_PRECISION,
423. 1 0,4,MPI_COMM_WORLD,req(6),ierr)
424. endif
425. if(taskid.eq.0)then
426. cALL mpi_isend(Cu(M+1,1),1,MPI_DOUBLE_PRECISION,
427. 1 numtasks-1,1,MPI_COMM_WORLD,req(7),ierr)
428. cALL mpi_isend(h(1,1),1,MPI_DOUBLE_PRECISION,
429. 1 numtasks-1,2,MPI_COMM_WORLD,req(8),ierr)
430. endif
431. if(taskid.eq.0.or.taskid.eq.numtasks-1)then
432. cALL MPI_WAITALL(8,req,istat,ierr)
433. endif
434.
435. C COMPUTE NEW VALUES U,V AND P
436. TDTS8 = TDT/8.
437. TDTSDX = TDT/DX
438. TDTSDY = TDT/DY
439. call system_clock(count=c1,count_rate=r,count_max=max)
440. T200 = c1
441. C---------------------------------------------------
442. if(taskid.lt.numtasks-1)then
443. cALL mpi_irecv(CV(1,jej+1),n1,MPI_DOUBLE_PRECISION,
444. 1 taskid+1,1,MPI_COMM_WORLD,req(1),ierr)
445. cALL mpi_irecv(Z(1,jej+1),n1,MPI_DOUBLE_PRECISION,
446. 1 taskid+1,2,MPI_COMM_WORLD,req(2),ierr)
447. endif
448. if(taskid.gt.0)then
449. cALL mpi_isend(CV(1,jsj),n1,MPI_DOUBLE_PRECISION,
450. 1 taskid-1,1,MPI_COMM_WORLD,req(5),ierr)
451. cALL mpi_isend(Z(1,jsj),n1,MPI_DOUBLE_PRECISION,
452. 1 taskid-1,2,MPI_COMM_WORLD,req(6),ierr)
453. endif
454. if(taskid.gt.0)then
455. cALL mpi_irecv(H(1,jsj-1),n1,MPI_DOUBLE_PRECISION,
456. 1 taskid-1,3,MPI_COMM_WORLD,req(3),ierr)
457. cALL mpi_irecv(CU(1,jsj-1),n1,MPI_DOUBLE_PRECISION,
458. 1 taskid-1,4,MPI_COMM_WORLD,req(4),ierr)
459. endif
460. if(taskid.lt.numtasks-1)then
461. cALL mpi_isend(H(1,jej),n1,MPI_DOUBLE_PRECISION,
462. 1 taskid+1,3,MPI_COMM_WORLD,req(7),ierr)
463. cALL mpi_isend(CU(1,jej),n1,MPI_DOUBLE_PRECISION,
464. 1 taskid+1,4,MPI_COMM_WORLD,req(8),ierr)
465. endif
466. cALL MPI_WAITALL(8,req,istat,ierr)
467. C-------------------------------------------
468. C
469. C PERIODIC CONTINUATION
470. C$OMP PARALLEL
471. C$OMP&SHARED (TDTSDX,TDTS8,TDTSDY,M,N,UNEW,VNEW,PNEW,UOLD,VOLD,POLD)
472. C$OMP&SHARED(CU,CV,Z,H,js,je,jsj,jej)
473. C$OMP&PRIVATE (I,J)
474. C$OMP DO
475. DO J=jsj,jej
476. DO I=1,M
477. UNEW(I+1,J) = UOLD(I+1,J)+
478. 1 TDTS8*(Z(I+1,J+1)+Z(I+1,J))*(CV(I+1,J+1)+CV(I,J+1)+CV(I,J)
479. 2 +CV(I+1,J))-TDTSDX*(H(I+1,J)-H(I,J))
480. if(j.gt.1)then
481. VNEW(I,J) = VOLD(I,J)-TDTS8*(Z(I+1,J)+Z(I,J))
482. 1 *(CU(I+1,J)+CU(I,J)+CU(I,J-1)+CU(I+1,J-1))
483. 2 -TDTSDY*(H(I,J)-H(I,J-1))
484. endif
485. if(j.eq.n)then
486. VNEW(I,J+1) = VOLD(I,J+1)-TDTS8*(Z(I+1,J+1)+Z(I,J+1))
487. 1 *(CU(I+1,J+1)+CU(I,J+1)+CU(I,J)+CU(I+1,J))
488. 2 -TDTSDY*(H(I,J+1)-H(I,J))
489. endif
490. PNEW(I,J) = POLD(I,J)-TDTSDX*(CU(I+1,J)-CU(I,J))
491. 1 -TDTSDY*(CV(I,J+1)-CV(I,J))
492. end do
493. end do
494. C$OMP END PARALLEL
495. call system_clock(count=c2, count_rate=r,count_max=max)
496. T200 = dble(c2 - T200)/dble(r)
497. C-----------------------------------------------------
498. C
499. C PERIODIC CONTINUATION
400. C
501. DO J=1,N
502. UNEW(1,J) = UNEW(M+1,J)
503. PNEW(M+1,J) = PNEW(1,J)
504. if(j.gt.1)then
505. VNEW(M+1,J) = VNEW(1,J)
506. endif
507. if(j.eq.n)then
508. VNEW(M+1,J+1) = VNEW(1,J+1)
509. endif
510. 210 end do
511. if(taskid.eq.0)then
512. cALL mpi_irecv(VNEW(1,1),n1,MPI_DOUBLE_PRECISION,
513. 1 numtasks-1,1,MPI_COMM_WORLD,req(1),ierr)
514. endif
515. if(taskid.eq.numtasks-1)then
516. cALL mpi_irecv(UNEW(1,n+1),n1,MPI_DOUBLE_PRECISION,
517. 1 0,2,MPI_COMM_WORLD,req(2),ierr)
518. cALL mpi_irecv(PNEW(1,n+1),n1,MPI_DOUBLE_PRECISION,
519. 1 0,3,MPI_COMM_WORLD,req(3),ierr)
520. endif
521. if(taskid.eq.numtasks-1)then
522. cALL mpi_isend(VNEW(1,n+1),n1,MPI_DOUBLE_PRECISION,
523. 1 0,1,MPI_COMM_WORLD,req(4),ierr)
524. endif
525. if(taskid.eq.0)then
526. cALL mpi_isend(UNEW(1,1),n1,MPI_DOUBLE_PRECISION,
527. 1 numtasks-1,2,MPI_COMM_WORLD,req(5),ierr)
528. cALL mpi_isend(PNEW(1,1),n1,MPI_DOUBLE_PRECISION,
529. 1 numtasks-1,3,MPI_COMM_WORLD,req(6),ierr)
530. endif
531. if(taskid.eq.0.or.taskid.eq.numtasks-1)then
532. cALL MPI_WAITALL(6,req,istat,ierr)
533. endif
534. if(taskid.eq.numtasks-1)then
535. cALL mpi_irecv(UNEW(1,N+1),1,MPI_DOUBLE_PRECISION,
536. 1 0,2,MPI_COMM_WORLD,req(1),ierr)
537. cALL mpi_irecv(PNEW(M+1,N+1),1,MPI_DOUBLE_PRECISION,
538. 1 0,3,MPI_COMM_WORLD,req(2),ierr)
539. endif
540. if(taskid.eq.0)then
541. cALL mpi_irecv(VNEW(M+1,1),1,MPI_DOUBLE_PRECISION,
542. 1 numtasks-1,1,MPI_COMM_WORLD,req(3),ierr)
543. endif
544. if(taskid.eq.0)then
545. cALL mpi_isend(UNEW(M+1,1),1,MPI_DOUBLE_PRECISION,
546. 1 numtasks-1,2,MPI_COMM_WORLD,req(4),ierr)
547. cALL mpi_isend(PNEW(1,1),1,MPI_DOUBLE_PRECISION,
548. 1 numtasks-1,3,MPI_COMM_WORLD,req(5),ierr)
549. endif
550. if(taskid.eq.numtasks-1)then
551. cALL mpi_isend(VNEW(1,n+1),1,MPI_DOUBLE_PRECISION,
552. 1 0,1,MPI_COMM_WORLD,req(6),ierr)
553. endif
554. if(taskid.eq.0.or.taskid.eq.numtasks-1)then
555. cALL MPI_WAITALL(6,req,istat,ierr)
556. endif
557. C
558. TIME = TIME + DT
559. * gather and write results
560. call mpi_gatherv(pnew(jsj,jsj),chunksize,MPI_DOUBLE_PRECISION,
561. + precv,irecvcounts,idisp,MPI_DOUBLE_PRECISION,
562. + master,MPI_COMM_WORLD,ierr)
563.
564. call mpi_gatherv(unew(jsj,jsj),chunksize,MPI_DOUBLE_PRECISION,
565. + urecv,irecvcounts,idisp,MPI_DOUBLE_PRECISION,
566. + master,MPI_COMM_WORLD,ierr)
567.
568. call mpi_gatherv(vnew(jsj,jsj),chunksize,MPI_DOUBLE_PRECISION,
569. + vrecv,irecvcounts,idisp,MPI_DOUBLE_PRECISION,
570. + master,MPI_COMM_WORLD,ierr)
571.
572. PTIME = TIME/3600.
573. if(taskid .eq. MASTER) then
574. IF(MOD(NCYCLE,MPRINT) .EQ. 0) then
575. WRITE(6,355) (PRECV(I,I),I=1,MNMIN)
576. 355 FORMAT(/' DIAGONAL ELEMENTS OF P ', //(8E15.7))
577. WRITE(6,360) (URECV(I,I),I=1,MNMIN)
578. 360 FORMAT(/' DIAGONAL ELEMENTS OF U ', //(8E15.7))
579. WRITE(6,365) (VRECV(I,I),I=1,MNMIN)
580. 365 FORMAT(/' DIAGONAL ELEMENTS OF V ', //(8E15.7))
581.
582. C
583. MFS310 = 24.*M*N/T310/1.D5
584. MFS100 = 24.*M*N/T100/1.D5
585. MFS200 = 26.*M*N/T200/1.D5
586. MFS300 = 15.*M*N/T300/1.D5
587. call system_clock(count=c2,count_rate=r,count_max=max)
588. CTIME = dble(c2 - TSTART)/dble(r)
589. TCYC = CTIME/FLOAT(NCYCLE)
590. WRITE(6,375) NCYCLE,CTIME,TCYC
591. 375 FORMAT(' CYCLE NUMBER', I5,' TOTAL COMPUTER TIME', D15.6,
592. + 'TIME PER CYCLE', D15.6)
593. endif
594. istart(3) = istart(3) + 1
595. * shape of record to be written (one ncycle at a time)
596. call my_ncwrite(ncid,p_id,istart,icount,precv,N1,N2)
597. call my_ncwrite(ncid,u_id,istart,icount,urecv,N1,N2)
598. call my_ncwrite(ncid,v_id,istart,icount,vrecv,N1,N2)
599.
600. c this endif is for whether task 0 is writing the output
601. endif
602.
603.
604.
605. C
606. C TIME SMOOTHING AND UPDATE FOR NEXT CYCLE
607. C
608. IF(NCYCLE .EQ. 1) THEN
609. C TIME SMOOTHER FOR FIRST ITERATION
610. C
611. TDT = TDT+TDT
612. call system_clock(count=c1, count_rate=r,count_max=max)
613. T310 = c1
614. DO J=jsj,jej
615. DO I=1,MP1
616. UOLD(I,J) = U(I,J)
617. VOLD(I,J) = V(I,J)
618. POLD(I,J) = P(I,J)
619. U(I,J) = UNEW(I,J)
620. V(I,J) = VNEW(I,J)
621. P(I,J) = PNEW(I,J)
622. if(j.eq.n)then
623. UOLD(I,J+1) = U(I,J+1)
624. VOLD(I,J+1) = V(I,J+1)
625. POLD(I,J+1) = P(I,J+1)
626. U(I,J+1) = UNEW(I,J+1)
627. V(I,J+1) = VNEW(I,J+1)
628. P(I,J+1) = PNEW(I,J+1)
629. endif
630. end do
631. 400 end do
632. call system_clock(count=c2, count_rate=r, count_max=max)
633. T310 = dble(c2 - T310)/dble(r)
634. ELSE
635. call system_clock(count=c1,count_rate=r,count_max=max)
636. T300 = c1
637. C
638. C TIME SMOOTHING AND UPDATE FOR NEXT CYCLE
639. C
640. C$OMP PARALLEL
641. C$OMP&SHARED (ALPHA,M,N,U,V,P,UNEW,VNEW,PNEW,UOLD,VOLD,POLD)
642. C$OMP&SHARED (JS,JE,jsj,jej)
643. C$OMP&PRIVATE (I,J)
644. I = 30+omp_get_thread_num()
645. C$OMP DO
646. DO J=jsj,jej
647. DO I=1,M
648. UOLD(I,J) = U(I,J)+ALPHA*(UNEW(I,J)-2.*U(I,J)+UOLD(I,J))
649. VOLD(I,J) = V(I,J)+ALPHA*(VNEW(I,J)-2.*V(I,J)+VOLD(I,J))
650. POLD(I,J) = P(I,J)+ALPHA*(PNEW(I,J)-2.*P(I,J)+POLD(I,J))
651. U(I,J) = UNEW(I,J)
652. V(I,J) = VNEW(I,J)
653. P(I,J) = PNEW(I,J)
654. end do
655. 300 end do
656. C$OMP END PARALLEL
657. call system_clock(count=c2,count_rate=r, count_max=max)
658. T300 = dble(c2 - T300)/dble(r)
659. C-------------------------------------------
660. C
661. C PERIODIC CONTINUATION
662. C
663. C----------------------------------------------
664. DO J=1,N
665. UOLD(M+1,J) = UOLD(1,J)
666. VOLD(M+1,J) = VOLD(1,J)
667. POLD(M+1,J) = POLD(1,J)
668. U(M+1,J) = U(1,J)
669. V(M+1,J) = V(1,J)
670. P(M+1,J) = P(1,J)
671. 320 end do
672. C------------------------------------------------------
673. if(taskid.eq.numtasks-1)then
674. cALL mpi_irecv(v(1,n+1),n1,MPI_DOUBLE_PRECISION,
675. 1 0,1,MPI_COMM_WORLD,req(1),ierr)
676. cALL mpi_irecv(u(1,n+1),n1,MPI_DOUBLE_PRECISION,
677. 1 0,2,MPI_COMM_WORLD,req(2),ierr)
678. cALL mpi_irecv(p(1,n+1),n1,MPI_DOUBLE_PRECISION,
679. 1 0,3,MPI_COMM_WORLD,req(3),ierr)
680. cALL mpi_irecv(vold(1,n+1),n1,MPI_DOUBLE_PRECISION,
681. 1 0,4,MPI_COMM_WORLD,req(4),ierr)
682. cALL mpi_irecv(uold(1,n+1),n1,MPI_DOUBLE_PRECISION,
683. 1 0,5,MPI_COMM_WORLD,req(5),ierr)
684. cALL mpi_irecv(pold(1,n+1),n1,MPI_DOUBLE_PRECISION,
685. 1 0,6,MPI_COMM_WORLD,req(6),ierr)
686. endif
687. if(taskid.eq.0)then
688. cALL mpi_isend(v(1,1),n1,MPI_DOUBLE_PRECISION,
689. 1 numtasks-1,1,MPI_COMM_WORLD,req(7),ierr)
690. cALL mpi_isend(u(1,1),n1,MPI_DOUBLE_PRECISION,
691. 1 numtasks-1,2,MPI_COMM_WORLD,req(8),ierr)
692. cALL mpi_isend(p(1,1),n1,MPI_DOUBLE_PRECISION,
693. 1 numtasks-1,3,MPI_COMM_WORLD,req(9),ierr)
694. cALL mpi_isend(vold(1,1),n1,MPI_DOUBLE_PRECISION,
695. 1 numtasks-1,4,MPI_COMM_WORLD,req(10),ierr)
696. cALL mpi_isend(uold(1,1),n1,MPI_DOUBLE_PRECISION,
697. 1 numtasks-1,5,MPI_COMM_WORLD,req(11),ierr)
698. cALL mpi_isend(pold(1,1),n1,MPI_DOUBLE_PRECISION,
699. 1 numtasks-1,6,MPI_COMM_WORLD,req(12),ierr)
700. endif
701. if(taskid.eq.0.or.taskid.eq.numtasks-1)then
702. cALL MPI_WAITALL(12,req,istat,ierr)
703. endif
704. if(taskid.eq.numtasks-1)then
705. cALL mpi_irecv(v(m+1,n+1),1,MPI_DOUBLE_PRECISION,
706. 1 0,1,MPI_COMM_WORLD,req(1),ierr)
707. cALL mpi_irecv(u(m+1,n+1),1,MPI_DOUBLE_PRECISION,
708. 1 0,2,MPI_COMM_WORLD,req(2),ierr)
709. cALL mpi_irecv(p(m+1,n+1),1,MPI_DOUBLE_PRECISION,
710. 1 0,3,MPI_COMM_WORLD,req(3),ierr)
711. cALL mpi_irecv(vold(m+1,n+1),1,MPI_DOUBLE_PRECISION,
712. 1 0,4,MPI_COMM_WORLD,req(4),ierr)
713. cALL mpi_irecv(uold(m+1,n+1),1,MPI_DOUBLE_PRECISION,
714. 1 0,5,MPI_COMM_WORLD,req(5),ierr)
715. cALL mpi_irecv(pold(m+1,n+1),1,MPI_DOUBLE_PRECISION,
716. 1 0,6,MPI_COMM_WORLD,req(6),ierr)
717. endif
718. if(taskid.eq.0)then
719. cALL mpi_isend(v(1,1),1,MPI_DOUBLE_PRECISION,
720. 1 numtasks-1,1,MPI_COMM_WORLD,req(7),ierr)
721. cALL mpi_isend(u(1,1),1,MPI_DOUBLE_PRECISION,
722. 1 numtasks-1,2,MPI_COMM_WORLD,req(8),ierr)
723. cALL mpi_isend(p(1,1),1,MPI_DOUBLE_PRECISION,
724. 1 numtasks-1,3,MPI_COMM_WORLD,req(9),ierr)
725. cALL mpi_isend(vold(1,1),1,MPI_DOUBLE_PRECISION,
726. 1 numtasks-1,4,MPI_COMM_WORLD,req(10),ierr)
727. cALL mpi_isend(uold(1,1),1,MPI_DOUBLE_PRECISION,
728. 1 numtasks-1,5,MPI_COMM_WORLD,req(11),ierr)
729. cALL mpi_isend(pold(1,1),1,MPI_DOUBLE_PRECISION,
730. 1 numtasks-1,6,MPI_COMM_WORLD,req(12),ierr)
731. endif
732. if(taskid.eq.0.or.taskid.eq.numtasks-1)then
733. cALL MPI_WAITALL(12,req,istat,ierr)
734. endif
735. endif
736. C
737. * end big loop
738. end do
739.
740. * close the netCDF file
741. iret = nf_close(ncid)
742. call check_err(iret)
743.
744. CALL MPI_FINALIZE(ierr)
745. STOP
746. end
747. *-------netcdf helper routines----------------------
748. subroutine check_err(iret)
749. integer iret
750. include 'netcdf.inc'
751. if(iret .ne. NF_NOERR) then
752. print *, nf_strerror(iret)
753. stop
754. endif
755. end
756.
757. subroutine netcdf_setup(N1,N2,ncid,p_id,u_id,v_id,istart,icount)
758. * Input args: N1, N2
759. * Output args: ncid, p_id,u_id,v_id,istart,icount)
760. integer N1,N2
761. * declarations for netCDF library
762. include 'netcdf.inc'
763. * error status return
764. integer iret
765. * netCDF id
766. integer ncid
767. * dimension ids
768. integer m_dim
769. integer n_dim
770. integer time_dim
771. * variable ids
772. integer p_id
773. integer u_id
774. integer v_id
775. * rank (number of dimensions) for each variable
776. integer p_rank, u_rank, v_rank
777. parameter (p_rank = 3)
778. parameter (u_rank = 3)
779. parameter (v_rank = 3)
780. * variable shapes
781. integer p_dims(p_rank)
782. integer u_dims(u_rank)
783. integer v_dims(v_rank)
784. integer istart(p_rank)
785. integer icount(p_rank)
786.
787. * enter define mode
788. iret = nf_create('shallowdat.nc', NF_CLOBBER,
789. + ncid)
790. call check_err(iret)
791. * define dimensions
792. iret = nf_def_dim(ncid, 'm', N1, m_dim)
793. call check_err(iret)
794. iret = nf_def_dim(ncid, 'n', N2, n_dim)
795. call check_err(iret)
796. iret = nf_def_dim(ncid, 'time', NF_UNLIMITED, time_dim)
797. call check_err(iret)
798. * define variables
799. p_dims(1) = m_dim
800. p_dims(2) = n_dim
801. p_dims(3) = time_dim
802. iret = nf_def_var(ncid, 'p', NF_DOUBLE, p_rank, p_dims, p_id)
803. call check_err(iret)
804. u_dims(1) = m_dim
805. u_dims(2) = n_dim
806. u_dims(3) = time_dim
807. iret = nf_def_var(ncid, 'u', NF_DOUBLE, u_rank, u_dims, u_id)
808. call check_err(iret)
809. v_dims(1) = m_dim
810. v_dims(2) = n_dim
811. v_dims(3) = time_dim
812. iret = nf_def_var(ncid, 'v', NF_DOUBLE, v_rank, v_dims, v_id)
813. call check_err(iret)
814. * start netCDF write at the first (1,1,1) position of the array
815. istart(1) = 1
816. istart(2) = 1
817. istart(3) = 1
818. * shape of record to be written (one ncycle at a time)
819. icount(1) = N1
820. icount(2) = N2
821. icount(3) = 1
822.
823. * leave define mode
824. iret = nf_enddef(ncid)
825. call check_err(iret)
826.
827. * end of netCDF definitions
828. end subroutine netcdf_setup
829.
830. subroutine my_ncwrite(id,varid,istart,icount,var,N1,N2)
831. * Input args: id, varid,istart,icount,var,N1,N2
832. * Write a whole array out to the netCDF file
833. integer id,varid,iret
834. integer icount(3)
835. integer istart(3)
836. real var (N1,N2)
837. iret = nf_put_vara_double(id,varid,istart,icount,var)
838. call check_err(iret)
839.
840. end subroutine my_ncwrite
If you have questions about this document, please contact SCD Customer Support. You can also reach us by telephone 24 hours a day, seven days a week at 303-497-1278. Additional contact methods: consult1@ucar.edu and during business hours in NCAR Mesa Lab Suite 39. © Copyright 2006. University Corporation for Atmospheric Research (UCAR). All Rights Reserved. |
|
|
|
|